qExecutor.c 220.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
#include "os.h"
16 17
#include "tcache.h"
#include "tglobal.h"
H
Haojun Liao 已提交
18
#include "qfill.h"
19
#include "taosmsg.h"
20

H
Haojun Liao 已提交
21
#include "exception.h"
22
#include "hash.h"
23 24
#include "qExecutor.h"
#include "qUtil.h"
25
#include "qresultBuf.h"
H
hjxilinx 已提交
26
#include "query.h"
S
slguan 已提交
27
#include "queryLog.h"
H
Haojun Liao 已提交
28 29
#include "qast.h"
#include "tfile.h"
30 31 32
#include "tlosertree.h"
#include "tscompression.h"
#include "ttime.h"
33 34 35 36 37 38 39 40 41

/**
 * check if the primary column is load by default, otherwise, the program will
 * forced to load primary column explicitly.
 */
#define Q_STATUS_EQUAL(p, s) (((p) & (s)) != 0)
#define TSDB_COL_IS_TAG(f) (((f)&TSDB_COL_TAG) != 0)
#define QUERY_IS_ASC_QUERY(q) (GET_FORWARD_DIRECTION_FACTOR((q)->order.order) == QUERY_ASC_FORWARD_STEP)

42
#define IS_MASTER_SCAN(runtime)        ((runtime)->scanFlag == MASTER_SCAN)
H
hjxilinx 已提交
43
#define IS_REVERSE_SCAN(runtime)       ((runtime)->scanFlag == REVERSE_SCAN)
44
#define SET_MASTER_SCAN_FLAG(runtime)  ((runtime)->scanFlag = MASTER_SCAN)
H
hjxilinx 已提交
45
#define SET_REVERSE_SCAN_FLAG(runtime) ((runtime)->scanFlag = REVERSE_SCAN)
46

H
Haojun Liao 已提交
47
#define GET_QINFO_ADDR(x) ((SQInfo *)((char *)(x)-offsetof(SQInfo, runtimeEnv)))
48

49
#define GET_COL_DATA_POS(query, index, step) ((query)->pos + (index) * (step))
50
#define SWITCH_ORDER(n) (((n) = ((n) == TSDB_ORDER_ASC) ? TSDB_ORDER_DESC : TSDB_ORDER_ASC))
51

H
Haojun Liao 已提交
52 53
#define SDATA_BLOCK_INITIALIZER (SDataBlockInfo) {{0}, 0}

54 55
/* get the qinfo struct address from the query struct address */
#define GET_COLUMN_BYTES(query, colidx) \
56 57
  ((query)->colList[(query)->pSelectExpr[colidx].base.colInfo.colIndex].bytes)
#define GET_COLUMN_TYPE(query, colidx) ((query)->colList[(query)->pSelectExpr[colidx].base.colInfo.colIndex].type)
58

59
enum {
H
hjxilinx 已提交
60
  // when query starts to execute, this status will set
61 62
  QUERY_NOT_COMPLETED = 0x1u,

H
hjxilinx 已提交
63 64
  /* result output buffer is full, current query is paused.
   * this status is only exist in group-by clause and diff/add/division/multiply/ query.
65
   */
66 67
  QUERY_RESBUF_FULL = 0x2u,

H
hjxilinx 已提交
68 69 70
  /* query is over
   * 1. this status is used in one row result query process, e.g., count/sum/first/last/ avg...etc.
   * 2. when all data within queried time window, it is also denoted as query_completed
71
   */
72
  QUERY_COMPLETED = 0x4u,
73

H
hjxilinx 已提交
74 75
  /* when the result is not completed return to client, this status will be
   * usually used in case of interval query with interpolation option
76
   */
77
  QUERY_OVER = 0x8u,
78
};
79 80

enum {
81 82
  TS_JOIN_TS_EQUAL       = 0,
  TS_JOIN_TS_NOT_EQUALS  = 1,
83 84 85
  TS_JOIN_TAG_NOT_EQUALS = 2,
};

86
typedef struct {
87 88 89 90 91 92
  int32_t     status;       // query status
  TSKEY       lastKey;      // the lastKey value before query executed
  STimeWindow w;            // whole query time window
  STimeWindow curWindow;    // current query window
  int32_t     windowIndex;  // index of active time window result for interval query
  STSCursor   cur;
93 94
} SQueryStatusInfo;

H
Haojun Liao 已提交
95
#if 0
H
Haojun Liao 已提交
96
static UNUSED_FUNC void *u_malloc (size_t __size) {
H
Haojun Liao 已提交
97 98 99 100
  uint32_t v = rand();
  if (v % 5 <= 1) {
    return NULL;
  } else {
H
Haojun Liao 已提交
101
    return malloc(__size);
H
Haojun Liao 已提交
102
  }
H
Haojun Liao 已提交
103 104
}

H
Haojun Liao 已提交
105 106 107 108 109 110 111 112 113 114
static UNUSED_FUNC void* u_calloc(size_t num, size_t __size) {
  uint32_t v = rand();
  if (v % 5 <= 1) {
    return NULL;
  } else {
    return calloc(num, __size);
  }
}

#define calloc  u_calloc
H
Haojun Liao 已提交
115
#define malloc  u_malloc
H
Haojun Liao 已提交
116
#endif
H
Haojun Liao 已提交
117

118
#define CLEAR_QUERY_STATUS(q, st)   ((q)->status &= (~(st)))
H
Haojun Liao 已提交
119 120 121
#define GET_NUM_OF_TABLEGROUP(q)    taosArrayGetSize((q)->tableqinfoGroupInfo.pGroupList)
#define GET_TABLEGROUP(q, _index)   ((SArray*) taosArrayGetP((q)->tableqinfoGroupInfo.pGroupList, (_index)))

122
static void setQueryStatus(SQuery *pQuery, int8_t status);
H
Haojun Liao 已提交
123
static void finalizeQueryResult(SQueryRuntimeEnv *pRuntimeEnv);
124

H
Haojun Liao 已提交
125
#define QUERY_IS_INTERVAL_QUERY(_q) ((_q)->intervalTime > 0)
126

H
Haojun Liao 已提交
127 128 129 130 131 132 133 134
// previous time window may not be of the same size of pQuery->intervalTime
#define GET_NEXT_TIMEWINDOW(_q, tw)                                   \
  do {                                                                \
    int32_t factor = GET_FORWARD_DIRECTION_FACTOR((_q)->order.order); \
    (tw)->skey += ((_q)->slidingTime * factor);                       \
    (tw)->ekey = (tw)->skey + ((_q)->intervalTime - 1);               \
  } while (0)

H
hjxilinx 已提交
135
// todo move to utility
136
static int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *group);
137

H
hjxilinx 已提交
138
static void setWindowResOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult);
H
Haojun Liao 已提交
139
static void setWindowResOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult);
140 141
static void resetMergeResultBuf(SQuery *pQuery, SQLFunctionCtx *pCtx, SResultInfo *pResultInfo);
static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId);
142

143 144 145
static void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
                          SDataStatis *pStatis, void *param, int32_t colIndex);

146
static void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
H
Haojun Liao 已提交
147
static void destroyTableQueryInfo(STableQueryInfo *pTableQueryInfo);
148 149
static void resetCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
static bool hasMainOutput(SQuery *pQuery);
H
hjxilinx 已提交
150
static void buildTagQueryResult(SQInfo *pQInfo);
151

152
static int32_t setAdditionalInfo(SQInfo *pQInfo, void *pTable, STableQueryInfo *pTableQueryInfo);
153
static int32_t flushFromResultBuf(SQInfo *pQInfo);
154

155
bool doFilterData(SQuery *pQuery, int32_t elemPos) {
156 157
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
158

159 160
    char *pElem = pFilterInfo->pData + pFilterInfo->info.bytes * elemPos;
    if (isNull(pElem, pFilterInfo->info.type)) {
161 162
      return false;
    }
163

164 165
    bool qualified = false;
    for (int32_t j = 0; j < pFilterInfo->numOfFilters; ++j) {
166
      SColumnFilterElem *pFilterElem = &pFilterInfo->pFilters[j];
167

168 169 170 171 172
      if (pFilterElem->fp(pFilterElem, pElem, pElem)) {
        qualified = true;
        break;
      }
    }
173

174 175 176 177
    if (!qualified) {
      return false;
    }
  }
178

179 180 181 182 183 184
  return true;
}

int64_t getNumOfResult(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  bool    hasMainFunction = hasMainOutput(pQuery);
185

186
  int64_t maxOutput = 0;
187
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
188
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
189

190 191 192 193 194 195 196 197
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (hasMainFunction &&
        (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ)) {
      continue;
    }
198

199 200 201 202 203
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
    if (pResInfo != NULL && maxOutput < pResInfo->numOfRes) {
      maxOutput = pResInfo->numOfRes;
    }
  }
204

205
  assert(maxOutput >= 0);
206 207 208
  return maxOutput;
}

209 210 211 212 213 214 215 216 217
/*
 * the value of number of result needs to be update due to offset value upated.
 */
void updateNumOfResult(SQueryRuntimeEnv *pRuntimeEnv, int32_t numOfRes) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
    
H
Haojun Liao 已提交
218 219 220 221 222 223 224
    int16_t functionId = pRuntimeEnv->pCtx[j].functionId;
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ ||
        functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
    
    assert(pResInfo->numOfRes > numOfRes);
225 226 227 228
    pResInfo->numOfRes = numOfRes;
  }
}

229 230 231 232 233 234 235 236 237
static int32_t getGroupResultId(int32_t groupIndex) {
  int32_t base = 200000;
  return base + (groupIndex * 10000);
}

bool isGroupbyNormalCol(SSqlGroupbyExpr *pGroupbyExpr) {
  if (pGroupbyExpr == NULL || pGroupbyExpr->numOfGroupCols == 0) {
    return false;
  }
238

239
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
240
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
241 242 243 244 245
    if (pColIndex->flag == TSDB_COL_NORMAL) {
      /*
       * make sure the normal column locates at the second position if tbname exists in group by clause
       */
      if (pGroupbyExpr->numOfGroupCols > 1) {
246
        assert(pColIndex->colIndex > 0);
247
      }
248

249 250 251
      return true;
    }
  }
252

253 254 255 256 257
  return false;
}

int16_t getGroupbyColumnType(SQuery *pQuery, SSqlGroupbyExpr *pGroupbyExpr) {
  assert(pGroupbyExpr != NULL);
258

259 260
  int32_t colId = -2;
  int16_t type = TSDB_DATA_TYPE_NULL;
261

262
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
263
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
264 265 266 267 268
    if (pColIndex->flag == TSDB_COL_NORMAL) {
      colId = pColIndex->colId;
      break;
    }
  }
269

270
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
271 272
    if (colId == pQuery->colList[i].colId) {
      type = pQuery->colList[i].type;
273 274 275
      break;
    }
  }
276

277 278 279 280 281 282
  return type;
}

bool isSelectivityWithTagsQuery(SQuery *pQuery) {
  bool    hasTags = false;
  int32_t numOfSelectivity = 0;
283

284
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
285
    int32_t functId = pQuery->pSelectExpr[i].base.functionId;
286 287 288 289
    if (functId == TSDB_FUNC_TAG_DUMMY || functId == TSDB_FUNC_TS_DUMMY) {
      hasTags = true;
      continue;
    }
290

291 292 293 294
    if ((aAggs[functId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
      numOfSelectivity++;
    }
  }
295

296 297 298
  if (numOfSelectivity > 0 && hasTags) {
    return true;
  }
299

300 301 302
  return false;
}

303
bool isTSCompQuery(SQuery *pQuery) { return pQuery->pSelectExpr[0].base.functionId == TSDB_FUNC_TS_COMP; }
304

305 306 307 308
static bool limitResults(SQueryRuntimeEnv* pRuntimeEnv) {
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery* pQuery = pRuntimeEnv->pQuery;
  
309 310
  if ((pQuery->limit.limit > 0) && (pQuery->rec.total + pQuery->rec.rows > pQuery->limit.limit)) {
    pQuery->rec.rows = pQuery->limit.limit - pQuery->rec.total;
311
    
312
    qDebug("QInfo:%p discard remain data due to result limitation, limit:%"PRId64", current return:%" PRId64 ", total:%"PRId64,
313 314
        pQInfo, pQuery->limit.limit, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
    assert(pQuery->rec.rows >= 0);
315 316 317
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return true;
  }
318

319 320 321 322
  return false;
}

static bool isTopBottomQuery(SQuery *pQuery) {
323
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
324
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
325 326 327
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
328

329 330 331 332
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      return true;
    }
  }
333

334 335 336
  return false;
}

H
Haojun Liao 已提交
337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354
static bool hasTagValOutput(SQuery* pQuery) {
  SExprInfo *pExprInfo = &pQuery->pSelectExpr[0];
  if (pQuery->numOfOutput == 1 && pExprInfo->base.functionId == TSDB_FUNC_TS_COMP) {
    return true;
  } else {  // set tag value, by which the results are aggregated.
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
      SExprInfo *pLocalExprInfo = &pQuery->pSelectExpr[idx];

      // ts_comp column required the tag value for join filter
      if (TSDB_COL_IS_TAG(pLocalExprInfo->base.colInfo.flag)) {
        return true;
      }
    }
  }

  return false;
}

355 356 357 358 359 360 361 362
/**
 * @param pQuery
 * @param col
 * @param pDataBlockInfo
 * @param pStatis
 * @param pColStatis
 * @return
 */
H
Haojun Liao 已提交
363
static bool hasNullValue(SColIndex* pColIndex, SDataStatis *pStatis, SDataStatis **pColStatis) {
H
Haojun Liao 已提交
364
  if (pStatis != NULL && !TSDB_COL_IS_TAG(pColIndex->flag)) {
H
Haojun Liao 已提交
365 366
    *pColStatis = &pStatis[pColIndex->colIndex];
    assert((*pColStatis)->colId == pColIndex->colId);
H
hjxilinx 已提交
367 368
  } else {
    *pColStatis = NULL;
369
  }
370

H
Haojun Liao 已提交
371 372 373 374
  if (TSDB_COL_IS_TAG(pColIndex->flag) || pColIndex->colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
    return false;
  }

375 376 377
  if ((*pColStatis) != NULL && (*pColStatis)->numOfNull == 0) {
    return false;
  }
378

379 380 381 382
  return true;
}

static SWindowResult *doSetTimeWindowFromKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, char *pData,
383
                                             int16_t bytes, bool masterscan) {
384
  SQuery *pQuery = pRuntimeEnv->pQuery;
385

386
  int32_t *p1 = (int32_t *) taosHashGet(pWindowResInfo->hashList, pData, bytes);
387 388
  if (p1 != NULL) {
    pWindowResInfo->curIndex = *p1;
389
  } else {
H
Haojun Liao 已提交
390 391 392
    if (!masterscan) {  // not master scan, do not add new timewindow
      return NULL;
    }
393

H
Haojun Liao 已提交
394 395 396 397 398 399 400 401 402 403 404
    // more than the capacity, reallocate the resources
    if (pWindowResInfo->size >= pWindowResInfo->capacity) {
      int64_t newCap = pWindowResInfo->capacity * 1.5;
      char *t = realloc(pWindowResInfo->pResult, newCap * sizeof(SWindowResult));
      if (t != NULL) {
        pWindowResInfo->pResult = (SWindowResult *)t;

        int32_t inc = newCap - pWindowResInfo->capacity;
        memset(&pWindowResInfo->pResult[pWindowResInfo->capacity], 0, sizeof(SWindowResult) * inc);
      } else {
        // todo
405
      }
406

H
Haojun Liao 已提交
407 408 409 410 411
      for (int32_t i = pWindowResInfo->capacity; i < newCap; ++i) {
        createQueryResultInfo(pQuery, &pWindowResInfo->pResult[i], pRuntimeEnv->stableQuery, pRuntimeEnv->interBufSize);
      }

      pWindowResInfo->capacity = newCap;
412
    }
H
Haojun Liao 已提交
413 414 415 416

    // add a new result set for a new group
    pWindowResInfo->curIndex = pWindowResInfo->size++;
    taosHashPut(pWindowResInfo->hashList, pData, bytes, (char *)&pWindowResInfo->curIndex, sizeof(int32_t));
417
  }
418

419 420 421 422 423 424
  return getWindowResult(pWindowResInfo, pWindowResInfo->curIndex);
}

// get the correct time window according to the handled timestamp
static STimeWindow getActiveTimeWindow(SWindowResInfo *pWindowResInfo, int64_t ts, SQuery *pQuery) {
  STimeWindow w = {0};
425

426 427 428 429 430 431 432
  if (pWindowResInfo->curIndex == -1) {  // the first window, from the previous stored value
    w.skey = pWindowResInfo->prevSKey;
    w.ekey = w.skey + pQuery->intervalTime - 1;
  } else {
    int32_t slot = curTimeWindow(pWindowResInfo);
    w = getWindowResult(pWindowResInfo, slot)->window;
  }
433

434 435
  if (w.skey > ts || w.ekey < ts) {
    int64_t st = w.skey;
436

437 438 439
    if (st > ts) {
      st -= ((st - ts + pQuery->slidingTime - 1) / pQuery->slidingTime) * pQuery->slidingTime;
    }
440

441 442 443 444
    int64_t et = st + pQuery->intervalTime - 1;
    if (et < ts) {
      st += ((ts - et + pQuery->slidingTime - 1) / pQuery->slidingTime) * pQuery->slidingTime;
    }
445

446 447 448
    w.skey = st;
    w.ekey = w.skey + pQuery->intervalTime - 1;
  }
449

450 451 452 453 454 455 456
  /*
   * query border check, skey should not be bounded by the query time range, since the value skey will
   * be used as the time window index value. So we only change ekey of time window accordingly.
   */
  if (w.ekey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) {
    w.ekey = pQuery->window.ekey;
  }
457

458
  assert(ts >= w.skey && ts <= w.ekey);
459

460 461 462 463 464 465 466 467
  return w;
}

static int32_t addNewWindowResultBuf(SWindowResult *pWindowRes, SDiskbasedResultBuf *pResultBuf, int32_t sid,
                                     int32_t numOfRowsPerPage) {
  if (pWindowRes->pos.pageId != -1) {
    return 0;
  }
468

469
  tFilePage *pData = NULL;
470

471 472 473
  // in the first scan, new space needed for results
  int32_t pageId = -1;
  SIDList list = getDataBufPagesIdList(pResultBuf, sid);
474

H
Haojun Liao 已提交
475
  if (taosArrayGetSize(list) == 0) {
476 477
    pData = getNewDataBuf(pResultBuf, sid, &pageId);
  } else {
H
Haojun Liao 已提交
478
    pageId = getLastPageId(list);
H
Haojun Liao 已提交
479
    pData = GET_RES_BUF_PAGE_BY_ID(pResultBuf, pageId);
480

481
    if (pData->num >= numOfRowsPerPage) {
482 483
      pData = getNewDataBuf(pResultBuf, sid, &pageId);
      if (pData != NULL) {
484
        assert(pData->num == 0);  // number of elements must be 0 for new allocated buffer
485 486 487
      }
    }
  }
488

489 490 491
  if (pData == NULL) {
    return -1;
  }
492

493 494 495
  // set the number of rows in current disk page
  if (pWindowRes->pos.pageId == -1) {  // not allocated yet, allocate new buffer
    pWindowRes->pos.pageId = pageId;
496
    pWindowRes->pos.rowId = pData->num++;
497
  }
498

499 500 501 502
  return 0;
}

static int32_t setWindowOutputBufByKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, int32_t sid,
503
                                       STimeWindow *win, bool masterscan, bool* newWind) {
504 505
  assert(win->skey <= win->ekey);
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
506

507 508
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&win->skey,
      TSDB_KEYSIZE, masterscan);
509
  if (pWindowRes == NULL) {
510 511 512
    *newWind = false;

    return masterscan? -1:0;
513
  }
514

515
  *newWind = true;
H
Haojun Liao 已提交
516

517 518 519
  // not assign result buffer yet, add new result buffer
  if (pWindowRes->pos.pageId == -1) {
    int32_t ret = addNewWindowResultBuf(pWindowRes, pResultBuf, sid, pRuntimeEnv->numOfRowsPerPage);
H
Haojun Liao 已提交
520
    if (ret != TSDB_CODE_SUCCESS) {
521 522 523
      return -1;
    }
  }
524

525 526
  // set time window for current result
  pWindowRes->window = *win;
527

H
Haojun Liao 已提交
528
  setWindowResOutputBufInitCtx(pRuntimeEnv, pWindowRes);
529 530 531 532 533 534 535 536
  return TSDB_CODE_SUCCESS;
}

static SWindowStatus *getTimeWindowResStatus(SWindowResInfo *pWindowResInfo, int32_t slot) {
  assert(slot >= 0 && slot < pWindowResInfo->size);
  return &pWindowResInfo->pResult[slot].status;
}

H
Haojun Liao 已提交
537
static FORCE_INLINE int32_t getForwardStepsInBlock(int32_t numOfRows, __block_search_fn_t searchFn, TSKEY ekey, int16_t pos,
538 539
                                      int16_t order, int64_t *pData) {
  int32_t forwardStep = 0;
540

H
Haojun Liao 已提交
541 542 543 544
  if (order == TSDB_ORDER_ASC) {
    int32_t end = searchFn((char*) &pData[pos], numOfRows - pos, ekey, order);
    if (end >= 0) {
      forwardStep = end;
545

H
Haojun Liao 已提交
546 547 548 549 550 551 552 553 554 555 556 557
      if (pData[end + pos] == ekey) {
        forwardStep += 1;
      }
    }
  } else {
    int32_t end = searchFn((char *)pData, pos + 1, ekey, order);
    if (end >= 0) {
      forwardStep = pos - end;

      if (pData[end] == ekey) {
        forwardStep += 1;
      }
558 559
    }
  }
560

H
Haojun Liao 已提交
561
  assert(forwardStep > 0);
562 563 564 565 566 567
  return forwardStep;
}

/**
 * NOTE: the query status only set for the first scan of master scan.
 */
568
static int32_t doCheckQueryCompleted(SQueryRuntimeEnv *pRuntimeEnv, TSKEY lastKey, SWindowResInfo *pWindowResInfo) {
569
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
570
  if (pRuntimeEnv->scanFlag != MASTER_SCAN || (!QUERY_IS_INTERVAL_QUERY(pQuery))) {
571
    return pWindowResInfo->size;
572
  }
573

574
  // no qualified results exist, abort check
575 576
  int32_t numOfClosed = 0;
  
577
  if (pWindowResInfo->size == 0) {
578
    return pWindowResInfo->size;
579
  }
580

581
  // query completed
H
hjxilinx 已提交
582 583
  if ((lastKey >= pQuery->current->win.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (lastKey <= pQuery->current->win.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
584
    closeAllTimeWindow(pWindowResInfo);
585

586 587 588 589
    pWindowResInfo->curIndex = pWindowResInfo->size - 1;
    setQueryStatus(pQuery, QUERY_COMPLETED | QUERY_RESBUF_FULL);
  } else {  // set the current index to be the last unclosed window
    int32_t i = 0;
590
    int64_t skey = TSKEY_INITIAL_VAL;
591

592 593 594
    for (i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *pResult = &pWindowResInfo->pResult[i];
      if (pResult->status.closed) {
595
        numOfClosed += 1;
596 597
        continue;
      }
598

599 600 601 602 603 604 605 606
      if ((pResult->window.ekey <= lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (pResult->window.skey >= lastKey && !QUERY_IS_ASC_QUERY(pQuery))) {
        closeTimeWindow(pWindowResInfo, i);
      } else {
        skey = pResult->window.skey;
        break;
      }
    }
607

608
    // all windows are closed, set the last one to be the skey
609
    if (skey == TSKEY_INITIAL_VAL) {
610 611 612 613 614
      assert(i == pWindowResInfo->size);
      pWindowResInfo->curIndex = pWindowResInfo->size - 1;
    } else {
      pWindowResInfo->curIndex = i;
    }
615

616
    pWindowResInfo->prevSKey = pWindowResInfo->pResult[pWindowResInfo->curIndex].window.skey;
617

618 619
    // the number of completed slots are larger than the threshold, return current generated results to client.
    if (numOfClosed > pWindowResInfo->threshold) {
620
      qDebug("QInfo:%p total result window:%d closed:%d, reached the output threshold %d, return",
621 622
          GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size, numOfClosed, pQuery->rec.threshold);
      
623
      setQueryStatus(pQuery, QUERY_RESBUF_FULL);
624
    } else {
625
      qDebug("QInfo:%p total result window:%d already closed:%d", GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size,
626
             numOfClosed);
627 628
    }
  }
629 630 631 632 633 634 635
  
  // output has reached the limitation, set query completed
  if (pQuery->limit.limit > 0 && (pQuery->limit.limit + pQuery->limit.offset) <= numOfClosed &&
      pRuntimeEnv->scanFlag == MASTER_SCAN) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
  
636
  assert(pWindowResInfo->prevSKey != TSKEY_INITIAL_VAL);
637
  return numOfClosed;
638 639 640
}

static int32_t getNumOfRowsInTimeWindow(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo, TSKEY *pPrimaryColumn,
H
hjxilinx 已提交
641
                                        int32_t startPos, TSKEY ekey, __block_search_fn_t searchFn, bool updateLastKey) {
642
  assert(startPos >= 0 && startPos < pDataBlockInfo->rows);
643

644 645 646
  int32_t num = -1;
  int32_t order = pQuery->order.order;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(order);
647

H
hjxilinx 已提交
648 649
  STableQueryInfo* item = pQuery->current;
  
650 651
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (ekey < pDataBlockInfo->window.ekey) {
652
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
H
Haojun Liao 已提交
653 654
      if (updateLastKey) { // update the last key
        item->lastKey = pPrimaryColumn[startPos + (num - 1)] + step;
655 656
      }
    } else {
657
      num = pDataBlockInfo->rows - startPos;
658
      if (updateLastKey) {
H
hjxilinx 已提交
659
        item->lastKey = pDataBlockInfo->window.ekey + step;
660 661 662 663
      }
    }
  } else {  // desc
    if (ekey > pDataBlockInfo->window.skey) {
664
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
H
Haojun Liao 已提交
665 666
      if (updateLastKey) {  // update the last key
        item->lastKey = pPrimaryColumn[startPos - (num - 1)] + step;
667 668 669 670
      }
    } else {
      num = startPos + 1;
      if (updateLastKey) {
H
hjxilinx 已提交
671
        item->lastKey = pDataBlockInfo->window.skey + step;
672 673 674
      }
    }
  }
675

H
Haojun Liao 已提交
676
  assert(num > 0);
677 678 679 680
  return num;
}

static void doBlockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SWindowStatus *pStatus, STimeWindow *pWin,
H
Haojun Liao 已提交
681
                                      int32_t offset, int32_t forwardStep, TSKEY *tsBuf, int32_t numOfTotal) {
682 683
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
684

685 686 687
  if (IS_MASTER_SCAN(pRuntimeEnv) || pStatus->closed) {
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
688

689 690 691
      pCtx[k].nStartQueryTimestamp = pWin->skey;
      pCtx[k].size = forwardStep;
      pCtx[k].startOffset = (QUERY_IS_ASC_QUERY(pQuery)) ? offset : offset - (forwardStep - 1);
692

693 694 695
      if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
        pCtx[k].ptsList = &tsBuf[offset];
      }
696

697 698 699 700
      // not a whole block involved in query processing, statistics data can not be used
      if (forwardStep != numOfTotal) {
        pCtx[k].preAggVals.isSet = false;
      }
H
Haojun Liao 已提交
701

702 703 704
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunction(&pCtx[k]);
      }
705 706 707 708 709 710 711 712
    }
  }
}

static void doRowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SWindowStatus *pStatus, STimeWindow *pWin,
                                    int32_t offset) {
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
713

714 715 716
  if (IS_MASTER_SCAN(pRuntimeEnv) || pStatus->closed) {
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
      pCtx[k].nStartQueryTimestamp = pWin->skey;
717

718 719 720 721
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunctionF(&pCtx[k], offset);
      }
722 723 724 725
    }
  }
}

H
Haojun Liao 已提交
726 727
static int32_t getNextQualifiedWindow(SQueryRuntimeEnv *pRuntimeEnv, STimeWindow *pNext, SDataBlockInfo *pDataBlockInfo,
    TSKEY *primaryKeys, __block_search_fn_t searchFn, int32_t prevPosition) {
728
  SQuery *pQuery = pRuntimeEnv->pQuery;
729

H
Haojun Liao 已提交
730
  GET_NEXT_TIMEWINDOW(pQuery, pNext);
731

H
Haojun Liao 已提交
732
  // next time window is not in current block
H
Haojun Liao 已提交
733 734
  if ((pNext->skey > pDataBlockInfo->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (pNext->ekey < pDataBlockInfo->window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
H
Haojun Liao 已提交
735 736
    return -1;
  }
737

H
Haojun Liao 已提交
738 739
  TSKEY startKey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
740
    startKey = pNext->skey;
H
Haojun Liao 已提交
741 742
    if (startKey < pQuery->window.skey) {
      startKey = pQuery->window.skey;
743
    }
H
Haojun Liao 已提交
744
  } else {
H
Haojun Liao 已提交
745
    startKey = pNext->ekey;
H
Haojun Liao 已提交
746 747
    if (startKey > pQuery->window.skey) {
      startKey = pQuery->window.skey;
748
    }
H
Haojun Liao 已提交
749
  }
750

H
Haojun Liao 已提交
751 752 753 754 755 756 757 758
  int32_t startPos = 0;
  // tumbling time window query, a special case of sliding time window query
  if (pQuery->slidingTime == pQuery->intervalTime && prevPosition != -1) {
    int32_t factor = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
    startPos = prevPosition + factor;
  } else {
    startPos = searchFn((char *)primaryKeys, pDataBlockInfo->rows, startKey, pQuery->order.order);
  }
759

H
Haojun Liao 已提交
760 761 762 763
  /*
   * This time window does not cover any data, try next time window,
   * this case may happen when the time window is too small
   */
H
Haojun Liao 已提交
764
  if (QUERY_IS_ASC_QUERY(pQuery) && primaryKeys[startPos] > pNext->ekey) {
H
Haojun Liao 已提交
765
    TSKEY next = primaryKeys[startPos];
766

H
Haojun Liao 已提交
767 768 769
    pNext->ekey += ((next - pNext->ekey + pQuery->slidingTime - 1)/pQuery->slidingTime) * pQuery->slidingTime;
    pNext->skey = pNext->ekey - pQuery->intervalTime + 1;
  } else if ((!QUERY_IS_ASC_QUERY(pQuery)) && primaryKeys[startPos] < pNext->skey) {
H
Haojun Liao 已提交
770
    TSKEY next = primaryKeys[startPos];
771

H
Haojun Liao 已提交
772 773
    pNext->skey -= ((pNext->skey - next + pQuery->slidingTime - 1) / pQuery->slidingTime) * pQuery->slidingTime;
    pNext->ekey = pNext->skey + pQuery->intervalTime - 1;
774
  }
775

H
Haojun Liao 已提交
776
  return startPos;
777 778
}

H
Haojun Liao 已提交
779
static FORCE_INLINE TSKEY reviseWindowEkey(SQuery *pQuery, STimeWindow *pWindow) {
780 781 782 783 784 785 786 787 788 789 790 791
  TSKEY ekey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    ekey = pWindow->ekey;
    if (ekey > pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  } else {
    ekey = pWindow->skey;
    if (ekey < pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  }
792

793 794 795
  return ekey;
}

H
hjxilinx 已提交
796 797 798 799 800 801 802 803 804 805 806 807 808 809 810
//todo binary search
static void* getDataBlockImpl(SArray* pDataBlock, int32_t colId) {
  int32_t numOfCols = taosArrayGetSize(pDataBlock);
  
  for (int32_t i = 0; i < numOfCols; ++i) {
    SColumnInfoData *p = taosArrayGet(pDataBlock, i);
    if (colId == p->info.colId) {
      return p->pData;
    }
  }
  
  return NULL;
}

static char *getDataBlock(SQueryRuntimeEnv *pRuntimeEnv, SArithmeticSupport *sas, int32_t col, int32_t size,
811
                    SArray *pDataBlock) {
dengyihao's avatar
dengyihao 已提交
812 813 814
  if (pDataBlock == NULL) {
    return NULL;
  }
815

H
Haojun Liao 已提交
816
  char *dataBlock = NULL;
H
Haojun Liao 已提交
817
  SQuery *pQuery = pRuntimeEnv->pQuery;
818
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
819

820
  int32_t functionId = pQuery->pSelectExpr[col].base.functionId;
821
  if (functionId == TSDB_FUNC_ARITHM) {
822
    sas->pArithExpr = &pQuery->pSelectExpr[col];
823

824 825 826 827 828 829
    // set the start offset to be the lowest start position, no matter asc/desc query order
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      pCtx->startOffset = pQuery->pos;
    } else {
      pCtx->startOffset = pQuery->pos - (size - 1);
    }
830

831 832 833 834
    sas->offset  = 0;
    sas->colList = pQuery->colList;
    sas->numOfCols = pQuery->numOfCols;
    sas->data    = calloc(pQuery->numOfCols, POINTER_BYTES);
835

H
Haojun Liao 已提交
836
    if (sas->data == NULL) {
H
Haojun Liao 已提交
837
      finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
H
Haojun Liao 已提交
838 839 840
      longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
    }

841
    // here the pQuery->colList and sas->colList are identical
H
Haojun Liao 已提交
842
    int32_t numOfCols = taosArrayGetSize(pDataBlock);
843
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
844
      SColumnInfo *pColMsg = &pQuery->colList[i];
845

846 847 848 849 850 851 852 853
      dataBlock = NULL;
      for (int32_t k = 0; k < numOfCols; ++k) {  //todo refactor
        SColumnInfoData *p = taosArrayGet(pDataBlock, k);
        if (pColMsg->colId == p->info.colId) {
          dataBlock = p->pData;
          break;
        }
      }
854

855
      assert(dataBlock != NULL);
H
Haojun Liao 已提交
856
      sas->data[i] = dataBlock/* + pQuery->colList[i].bytes*/;  // start from the offset
857
    }
858

859
  } else {  // other type of query function
860
    SColIndex *pCol = &pQuery->pSelectExpr[col].base.colInfo;
H
Haojun Liao 已提交
861
    if (TSDB_COL_IS_TAG(pCol->flag)) {
862 863
      dataBlock = NULL;
    } else {
H
Haojun Liao 已提交
864 865 866 867 868
      SColIndex* pColIndex = &pQuery->pSelectExpr[col].base.colInfo;
      SColumnInfoData *p = taosArrayGet(pDataBlock, pColIndex->colIndex);
      assert(p->info.colId == pColIndex->colId);

      dataBlock = p->pData;
869 870
    }
  }
871

872 873 874 875
  return dataBlock;
}

/**
H
Haojun Liao 已提交
876
 * todo set the last value for pQueryTableInfo as in rowwiseapplyfunctions
877 878
 * @param pRuntimeEnv
 * @param forwardStep
879
 * @param tsCols
880 881 882 883 884
 * @param pFields
 * @param isDiskFileBlock
 * @return                  the incremental number of output value, so it maybe 0 for fixed number of query,
 *                          such as count/min/max etc.
 */
885
static void blockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis,
886 887
                                       SDataBlockInfo *pDataBlockInfo, SWindowResInfo *pWindowResInfo,
                                       __block_search_fn_t searchFn, SArray *pDataBlock) {
888
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
889 890
  bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);

891 892
  SQuery *pQuery = pRuntimeEnv->pQuery;
  TSKEY  *tsCols = NULL;
893
  if (pDataBlock != NULL) {
894
    SColumnInfoData* pColInfo = taosArrayGet(pDataBlock, 0);
895
    tsCols = (TSKEY *)(pColInfo->pData);
896
  }
897

898
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
H
Haojun Liao 已提交
899
  if (sasArray == NULL) {
H
Haojun Liao 已提交
900
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
H
Haojun Liao 已提交
901 902
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }
903

904
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
905
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
906
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k);
907
  }
908

909
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
910 911
  if (QUERY_IS_INTERVAL_QUERY(pQuery)/* && tsCols != NULL*/) {
    TSKEY ts = TSKEY_INITIAL_VAL;
912

H
Haojun Liao 已提交
913 914 915 916 917 918 919 920
    if (tsCols == NULL) {
      ts = QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->window.skey:pDataBlockInfo->window.ekey;
    } else {
      int32_t offset = GET_COL_DATA_POS(pQuery, 0, step);
      ts = tsCols[offset];
    }

    bool        hasTimeWindow = false;
921
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
H
Haojun Liao 已提交
922 923
    if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &win, masterScan, &hasTimeWindow) !=
        TSDB_CODE_SUCCESS) {
dengyihao's avatar
dengyihao 已提交
924
      tfree(sasArray);
H
hjxilinx 已提交
925
      return;
926
    }
927

H
Haojun Liao 已提交
928 929 930
    int32_t forwardStep = 0;
    int32_t startPos = pQuery->pos;

931
    if (hasTimeWindow) {
H
Haojun Liao 已提交
932
      TSKEY ekey = reviseWindowEkey(pQuery, &win);
H
Haojun Liao 已提交
933
      forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, pQuery->pos, ekey, searchFn, true);
934

935
      SWindowStatus *pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo));
H
Haojun Liao 已提交
936
      doBlockwiseApplyFunctions(pRuntimeEnv, pStatus, &win, startPos, forwardStep, tsCols, pDataBlockInfo->rows);
937
    }
938

939 940
    int32_t     index = pWindowResInfo->curIndex;
    STimeWindow nextWin = win;
941

942
    while (1) {
H
Haojun Liao 已提交
943 944
      int32_t prevEndPos = (forwardStep - 1) * step + startPos;
      startPos = getNextQualifiedWindow(pRuntimeEnv, &nextWin, pDataBlockInfo, tsCols, searchFn, prevEndPos);
945 946 947
      if (startPos < 0) {
        break;
      }
948

949
      // null data, failed to allocate more memory buffer
H
Haojun Liao 已提交
950
      hasTimeWindow = false;
H
Haojun Liao 已提交
951 952
      if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &nextWin, masterScan,
                                  &hasTimeWindow) != TSDB_CODE_SUCCESS) {
953 954
        break;
      }
955

956 957 958 959 960
      if (!hasTimeWindow) {
        continue;
      }

      TSKEY ekey = reviseWindowEkey(pQuery, &nextWin);
H
Haojun Liao 已提交
961
      forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, startPos, ekey, searchFn, true);
962

H
Haojun Liao 已提交
963
      SWindowStatus *pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo));
964
      doBlockwiseApplyFunctions(pRuntimeEnv, pStatus, &nextWin, startPos, forwardStep, tsCols, pDataBlockInfo->rows);
965
    }
966

967 968 969 970 971 972 973
    pWindowResInfo->curIndex = index;
  } else {
    /*
     * the sqlfunctionCtx parameters should be set done before all functions are invoked,
     * since the selectivity + tag_prj query needs all parameters been set done.
     * tag_prj function are changed to be TSDB_FUNC_TAG_DUMMY
     */
974
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
975
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
976 977 978 979 980
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunction(&pCtx[k]);
      }
    }
  }
981

982 983 984 985
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    if (pQuery->pSelectExpr[i].base.functionId != TSDB_FUNC_ARITHM) {
      continue;
    }
986

987 988
    tfree(sasArray[i].data);
  }
989

990 991 992 993 994 995 996
  tfree(sasArray);
}

static int32_t setGroupResultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, char *pData, int16_t type, int16_t bytes) {
  if (isNull(pData, type)) {  // ignore the null value
    return -1;
  }
997

998
  int32_t GROUPRESULTID = 1;
999

1000
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
1001

1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012
  int64_t v = -1;
  // not assign result buffer yet, add new result buffer
  switch(type) {
    case TSDB_DATA_TYPE_BOOL:
    case TSDB_DATA_TYPE_TINYINT:  v = GET_INT8_VAL(pData);  break;
    case TSDB_DATA_TYPE_SMALLINT: v = GET_INT16_VAL(pData); break;
    case TSDB_DATA_TYPE_INT:      v = GET_INT32_VAL(pData); break;
    case TSDB_DATA_TYPE_BIGINT:   v = GET_INT64_VAL(pData); break;
  }

//  assert(pRuntimeEnv->windowResInfo.hashList->size <= 2);
1013
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, &pRuntimeEnv->windowResInfo, pData, bytes, true);
1014 1015 1016
  if (pWindowRes == NULL) {
    return -1;
  }
1017

1018 1019 1020
  pWindowRes->window.skey = v;
  pWindowRes->window.ekey = v;

1021 1022 1023 1024 1025 1026
  if (pWindowRes->pos.pageId == -1) {
    int32_t ret = addNewWindowResultBuf(pWindowRes, pResultBuf, GROUPRESULTID, pRuntimeEnv->numOfRowsPerPage);
    if (ret != 0) {
      return -1;
    }
  }
1027

1028 1029 1030 1031 1032
  setWindowResOutputBuf(pRuntimeEnv, pWindowRes);
  initCtxOutputBuf(pRuntimeEnv);
  return TSDB_CODE_SUCCESS;
}

1033
static char *getGroupbyColumnData(SQuery *pQuery, int16_t *type, int16_t *bytes, SArray* pDataBlock) {
1034
  SSqlGroupbyExpr *pGroupbyExpr = pQuery->pGroupbyExpr;
1035

1036
  for (int32_t k = 0; k < pGroupbyExpr->numOfGroupCols; ++k) {
1037 1038
    SColIndex* pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, k);
    if (pColIndex->flag == TSDB_COL_TAG) {
1039 1040
      continue;
    }
1041

1042
    int16_t colIndex = -1;
1043
    int32_t colId = pColIndex->colId;
1044

1045
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
1046
      if (pQuery->colList[i].colId == colId) {
1047 1048 1049 1050
        colIndex = i;
        break;
      }
    }
1051

1052
    assert(colIndex >= 0 && colIndex < pQuery->numOfCols);
1053

1054 1055
    *type = pQuery->colList[colIndex].type;
    *bytes = pQuery->colList[colIndex].bytes;
1056 1057 1058 1059 1060 1061
    /*
     *  the colIndex is acquired from the first meter of all qualified meters in this vnode during query prepare
     * stage, the remain meter may not have the required column in cache actually. So, the validation of required
     * column in cache with the corresponding meter schema is reinforced.
     */
    int32_t numOfCols = taosArrayGetSize(pDataBlock);
1062

1063 1064 1065 1066 1067 1068
    for (int32_t i = 0; i < numOfCols; ++i) {
      SColumnInfoData *p = taosArrayGet(pDataBlock, i);
      if (pColIndex->colId == p->info.colId) {
        return p->pData;
      }
    }
1069
  }
1070

1071
  return NULL;
1072 1073 1074 1075
}

static int32_t doTSJoinFilter(SQueryRuntimeEnv *pRuntimeEnv, int32_t offset) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
1076

1077 1078
  STSElem         elem = tsBufGetElem(pRuntimeEnv->pTSBuf);
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1079

1080 1081 1082 1083
  // compare tag first
  if (pCtx[0].tag.i64Key != elem.tag) {
    return TS_JOIN_TAG_NOT_EQUALS;
  }
1084

1085 1086 1087
  TSKEY key = *(TSKEY *)(pCtx[0].aInputElemBuf + TSDB_KEYSIZE * offset);

#if defined(_DEBUG_VIEW)
1088 1089
  printf("elem in comp ts file:%" PRId64 ", key:%" PRId64 ", tag:%"PRIu64", query order:%d, ts order:%d, traverse:%d, index:%d\n",
         elem.ts, key, elem.tag, pQuery->order.order, pRuntimeEnv->pTSBuf->tsOrder,
1090 1091
         pRuntimeEnv->pTSBuf->cur.order, pRuntimeEnv->pTSBuf->cur.tsIndex);
#endif
1092

1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (key < elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key > elem.ts) {
      assert(false);
    }
  } else {
    if (key > elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key < elem.ts) {
      assert(false);
    }
  }
1106

1107 1108 1109 1110 1111
  return TS_JOIN_TS_EQUAL;
}

static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId) {
  SResultInfo *pResInfo = GET_RES_INFO(pCtx);
H
hjxilinx 已提交
1112
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
1113 1114 1115 1116 1117

  // in case of timestamp column, always generated results.
  if (functionId == TSDB_FUNC_TS) {
    return true;
  }
H
hjxilinx 已提交
1118
  
1119 1120 1121
  if (pResInfo->complete || functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TS_DUMMY) {
    return false;
  }
1122

1123
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_FIRST) {
H
hjxilinx 已提交
1124 1125
    return QUERY_IS_ASC_QUERY(pQuery);
  }
1126 1127 1128 1129 1130 1131

  // todo add comments
  if ((functionId == TSDB_FUNC_LAST_DST || functionId == TSDB_FUNC_LAST)) {
    return pCtx->param[0].i64Key == pQuery->order.order;
  }

1132
  // in the supplementary scan, only the following functions need to be executed
H
Haojun Liao 已提交
1133
  if (IS_REVERSE_SCAN(pRuntimeEnv)) {
1134 1135
    return false;
  }
1136

1137 1138 1139
  return true;
}

1140 1141
static void rowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis, SDataBlockInfo *pDataBlockInfo,
    SWindowResInfo *pWindowResInfo, SArray *pDataBlock) {
1142
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1143
  bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);
1144

1145
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1146
  STableQueryInfo* item = pQuery->current;
H
Haojun Liao 已提交
1147 1148 1149 1150

  SColumnInfoData* pColumnInfoData = (SColumnInfoData *)taosArrayGet(pDataBlock, 0);

  TSKEY  *tsCols = (pColumnInfoData->info.type == TSDB_DATA_TYPE_TIMESTAMP)? (TSKEY*) pColumnInfoData->pData:NULL;
H
Haojun Liao 已提交
1151 1152
  bool    groupbyColumnValue = pRuntimeEnv->groupbyNormalCol;

1153
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
H
Haojun Liao 已提交
1154
  if (sasArray == NULL) {
H
Haojun Liao 已提交
1155
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
H
Haojun Liao 已提交
1156 1157
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }
1158

1159 1160
  int16_t type = 0;
  int16_t bytes = 0;
1161

1162
  char *groupbyColumnData = NULL;
H
Haojun Liao 已提交
1163
  if (groupbyColumnValue) {
1164
    groupbyColumnData = getGroupbyColumnData(pQuery, &type, &bytes, pDataBlock);
1165
  }
1166

1167
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
1168
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
1169
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k);
1170
  }
1171

1172 1173
  // set the input column data
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
1174
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
H
hjxilinx 已提交
1175 1176
    pFilterInfo->pData = getDataBlockImpl(pDataBlock, pFilterInfo->info.colId);
    assert(pFilterInfo->pData != NULL);
1177
  }
1178

1179
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1180

1181 1182 1183
  // from top to bottom in desc
  // from bottom to top in asc order
  if (pRuntimeEnv->pTSBuf != NULL) {
H
Haojun Liao 已提交
1184
    SQInfo *pQInfo = (SQInfo *)GET_QINFO_ADDR(pRuntimeEnv);
1185
    qDebug("QInfo:%p process data rows, numOfRows:%d, query order:%d, ts comp order:%d", pQInfo, pDataBlockInfo->rows,
1186 1187
           pQuery->order.order, pRuntimeEnv->pTSBuf->cur.order);
  }
1188

1189
  int32_t j = 0;
H
hjxilinx 已提交
1190
  int32_t offset = -1;
1191

1192
  for (j = 0; j < pDataBlockInfo->rows; ++j) {
H
hjxilinx 已提交
1193
    offset = GET_COL_DATA_POS(pQuery, j, step);
1194

1195 1196 1197 1198 1199 1200 1201 1202 1203 1204
    if (pRuntimeEnv->pTSBuf != NULL) {
      int32_t r = doTSJoinFilter(pRuntimeEnv, offset);
      if (r == TS_JOIN_TAG_NOT_EQUALS) {
        break;
      } else if (r == TS_JOIN_TS_NOT_EQUALS) {
        continue;
      } else {
        assert(r == TS_JOIN_TS_EQUAL);
      }
    }
1205

1206
    if (pQuery->numOfFilterCols > 0 && (!doFilterData(pQuery, offset))) {
1207 1208
      continue;
    }
1209

1210
    // interval window query
H
Haojun Liao 已提交
1211
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1212
      // decide the time window according to the primary timestamp
1213
      int64_t     ts = tsCols[offset];
1214
      STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
1215

1216 1217
      bool hasTimeWindow = false;
      int32_t ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &win, masterScan, &hasTimeWindow);
1218 1219 1220
      if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
        continue;
      }
1221

1222 1223 1224 1225
      if (!hasTimeWindow) {
        continue;
      }

1226 1227
      SWindowStatus *pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo));
      doRowwiseApplyFunctions(pRuntimeEnv, pStatus, &win, offset);
1228

1229 1230
      STimeWindow nextWin = win;
      int32_t     index = pWindowResInfo->curIndex;
1231

1232
      while (1) {
H
Haojun Liao 已提交
1233
        GET_NEXT_TIMEWINDOW(pQuery, &nextWin);
H
Haojun Liao 已提交
1234
        if (/*pWindowResInfo->startTime > nextWin.skey ||*/
1235
            (nextWin.skey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
H
Haojun Liao 已提交
1236
            (nextWin.skey < pQuery->window.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
1237 1238
          break;
        }
1239

1240 1241 1242
        if (ts < nextWin.skey || ts > nextWin.ekey) {
          break;
        }
1243

1244
        // null data, failed to allocate more memory buffer
H
Haojun Liao 已提交
1245
        hasTimeWindow = false;
1246
        if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &nextWin, masterScan, &hasTimeWindow) != TSDB_CODE_SUCCESS) {
1247 1248
          break;
        }
1249

1250 1251 1252 1253
        if (hasTimeWindow) {
          pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo));
          doRowwiseApplyFunctions(pRuntimeEnv, pStatus, &nextWin, offset);
        }
1254
      }
1255

1256 1257 1258
      pWindowResInfo->curIndex = index;
    } else {  // other queries
      // decide which group this rows belongs to according to current state value
H
Haojun Liao 已提交
1259
      if (groupbyColumnValue) {
H
hjxilinx 已提交
1260
        char *val = groupbyColumnData + bytes * offset;
1261

H
hjxilinx 已提交
1262
        int32_t ret = setGroupResultOutputBuf(pRuntimeEnv, val, type, bytes);
1263 1264 1265 1266
        if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
          continue;
        }
      }
1267

1268
      for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
1269
        int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
1270 1271 1272 1273 1274
        if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
          aAggs[functionId].xFunctionF(&pCtx[k], offset);
        }
      }
    }
1275

1276 1277 1278
    if (pRuntimeEnv->pTSBuf != NULL) {
      // if timestamp filter list is empty, quit current query
      if (!tsBufNextPos(pRuntimeEnv->pTSBuf)) {
H
hjxilinx 已提交
1279
        setQueryStatus(pQuery, QUERY_COMPLETED);
1280 1281 1282 1283
        break;
      }
    }
  }
H
Haojun Liao 已提交
1284 1285 1286 1287 1288 1289 1290 1291

  assert(offset >= 0);
  if (tsCols != NULL) {
    item->lastKey = tsCols[offset] + step;
  } else {
    item->lastKey = (QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->window.ekey:pDataBlockInfo->window.skey) + step;
  }

1292 1293 1294 1295 1296
  // todo refactor: extract method
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    if (pQuery->pSelectExpr[i].base.functionId != TSDB_FUNC_ARITHM) {
      continue;
    }
1297

1298 1299
    tfree(sasArray[i].data);
  }
1300

1301 1302 1303 1304
  free(sasArray);
}

static int32_t tableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo,
H
hjxilinx 已提交
1305
                                          SDataStatis *pStatis, __block_search_fn_t searchFn, SArray *pDataBlock) {
H
hjxilinx 已提交
1306
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1307 1308 1309
  
  STableQueryInfo* pTableQInfo = pQuery->current;
  SWindowResInfo*  pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
hjxilinx 已提交
1310
  
H
Haojun Liao 已提交
1311
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->groupbyNormalCol) {
1312
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
1313
  } else {
1314
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
1315
  }
1316

1317
  // update the lastkey of current table
1318
  TSKEY lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pDataBlockInfo->window.ekey : pDataBlockInfo->window.skey;
H
hjxilinx 已提交
1319
  pTableQInfo->lastKey = lastKey + GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1320

1321
  // interval query with limit applied
1322
  int32_t numOfRes = 0;
H
Haojun Liao 已提交
1323
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1324 1325 1326
    numOfRes = doCheckQueryCompleted(pRuntimeEnv, lastKey, pWindowResInfo);
  } else {
    numOfRes = getNumOfResult(pRuntimeEnv);
1327

1328 1329 1330 1331
    // update the number of output result
    if (numOfRes > 0 && pQuery->checkBuffer == 1) {
      assert(numOfRes >= pQuery->rec.rows);
      pQuery->rec.rows = numOfRes;
1332

1333 1334 1335
      if (numOfRes >= pQuery->rec.threshold) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
      }
1336

1337 1338 1339
      if ((pQuery->limit.limit >= 0) && (pQuery->limit.limit + pQuery->limit.offset) <= numOfRes) {
        setQueryStatus(pQuery, QUERY_COMPLETED);
      }
H
Haojun Liao 已提交
1340
    }
1341
  }
1342

1343
  return numOfRes;
1344 1345
}

H
Haojun Liao 已提交
1346
void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
1347 1348 1349 1350 1351 1352
                   SDataStatis *pStatis, void *param, int32_t colIndex) {
  
  int32_t functionId = pQuery->pSelectExpr[colIndex].base.functionId;
  int32_t colId = pQuery->pSelectExpr[colIndex].base.colInfo.colId;
  
  SDataStatis *tpField = NULL;
H
Haojun Liao 已提交
1353
  pCtx->hasNull = hasNullValue(&pQuery->pSelectExpr[colIndex].base.colInfo, pStatis, &tpField);
1354
  pCtx->aInputElemBuf = inputData;
1355

1356
  if (tpField != NULL) {
H
Haojun Liao 已提交
1357
    pCtx->preAggVals.isSet  = true;
1358 1359
    pCtx->preAggVals.statis = *tpField;
    assert(pCtx->preAggVals.statis.numOfNull <= pBlockInfo->rows);
1360 1361 1362
  } else {
    pCtx->preAggVals.isSet = false;
  }
1363

H
Haojun Liao 已提交
1364 1365
  pCtx->preAggVals.dataBlockLoaded = (inputData != NULL);

H
Haojun Liao 已提交
1366 1367 1368
  // limit/offset query will affect this value
  pCtx->startOffset = QUERY_IS_ASC_QUERY(pQuery) ? pQuery->pos:0;
  pCtx->size = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->rows - pQuery->pos : pQuery->pos + 1;
1369

1370 1371
  uint32_t status = aAggs[functionId].nStatus;
  if (((status & (TSDB_FUNCSTATE_SELECTIVITY | TSDB_FUNCSTATE_NEED_TS)) != 0) && (tsCol != NULL)) {
H
Haojun Liao 已提交
1372
    pCtx->ptsList = tsCol;
1373
  }
1374

1375 1376 1377 1378 1379
  if (functionId >= TSDB_FUNC_FIRST_DST && functionId <= TSDB_FUNC_LAST_DST) {
    // last_dist or first_dist function
    // store the first&last timestamp into the intermediate buffer [1], the true
    // value may be null but timestamp will never be null
  } else if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_TWA ||
1380
             functionId == TSDB_FUNC_DIFF || (functionId >= TSDB_FUNC_RATE && functionId <= TSDB_FUNC_AVG_IRATE)) {
1381
    /*
H
Haojun Liao 已提交
1382
     * least squares function needs two columns of input, currently, the x value of linear equation is set to
1383 1384 1385 1386 1387 1388 1389 1390 1391 1392
     * timestamp column, and the y-value is the column specified in pQuery->pSelectExpr[i].colIdxInBuffer
     *
     * top/bottom function needs timestamp to indicate when the
     * top/bottom values emerge, so does diff function
     */
    if (functionId == TSDB_FUNC_TWA) {
      STwaInfo *pTWAInfo = GET_RES_INFO(pCtx)->interResultBuf;
      pTWAInfo->SKey = pQuery->window.skey;
      pTWAInfo->EKey = pQuery->window.ekey;
    }
1393

1394 1395
  } else if (functionId == TSDB_FUNC_ARITHM) {
    pCtx->param[1].pz = param;
H
Haojun Liao 已提交
1396 1397 1398 1399 1400 1401
  } else if (functionId == TSDB_FUNC_SPREAD) {  // set the statistics data for primary time stamp column
    if (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
      pCtx->preAggVals.isSet  = true;
      pCtx->preAggVals.statis.min = pBlockInfo->window.skey;
      pCtx->preAggVals.statis.max = pBlockInfo->window.ekey;
    }
1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414
  } else if (functionId == TSDB_FUNC_INTERP) {
    SInterpInfoDetail *pInterpInfo = GET_RES_INFO(pCtx)->interResultBuf;
    pInterpInfo->type = pQuery->fillType;
    pInterpInfo->ts = pQuery->window.skey;
    pInterpInfo->primaryCol = (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX);
  
    if (pQuery->fillVal != NULL) {
      if (isNull((const char*) &pQuery->fillVal[colIndex], pCtx->inputType)) {
        pCtx->param[1].nType = TSDB_DATA_TYPE_NULL;
      } else { // todo refactor, tVariantCreateFromBinary should handle the NULL value
        tVariantCreateFromBinary(&pCtx->param[1], (char*) &pQuery->fillVal[colIndex], pCtx->inputBytes, pCtx->inputType);
      }
    }
1415
  }
1416

1417 1418 1419 1420 1421 1422
#if defined(_DEBUG_VIEW)
  //  int64_t *tsList = (int64_t *)primaryColumnData;
//  int64_t  s = tsList[0];
//  int64_t  e = tsList[size - 1];

//    if (IS_DATA_BLOCK_LOADED(blockStatus)) {
1423
//        qDebug("QInfo:%p query ts:%lld-%lld, offset:%d, rows:%d, bstatus:%d,
1424 1425 1426
//        functId:%d", GET_QINFO_ADDR(pQuery),
//               s, e, startOffset, size, blockStatus, functionId);
//    } else {
1427
//        qDebug("QInfo:%p block not loaded, bstatus:%d",
1428 1429 1430 1431 1432 1433
//        GET_QINFO_ADDR(pQuery), blockStatus);
//    }
#endif
}

// set the output buffer for the selectivity + tag query
H
Haojun Liao 已提交
1434 1435 1436
static void setCtxTagColumnInfo(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

1437
  if (isSelectivityWithTagsQuery(pQuery)) {
1438
    int32_t num = 0;
1439
    int16_t tagLen = 0;
1440 1441
    
    SQLFunctionCtx *p = NULL;
1442
    SQLFunctionCtx **pTagCtx = calloc(pQuery->numOfOutput, POINTER_BYTES);
H
Haojun Liao 已提交
1443

1444
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1445
      SSqlFuncMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].base;
1446
      
1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459
      if (pSqlFuncMsg->functionId == TSDB_FUNC_TAG_DUMMY || pSqlFuncMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        tagLen += pCtx[i].outputBytes;
        pTagCtx[num++] = &pCtx[i];
      } else if ((aAggs[pSqlFuncMsg->functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
        p = &pCtx[i];
      } else if (pSqlFuncMsg->functionId == TSDB_FUNC_TS || pSqlFuncMsg->functionId == TSDB_FUNC_TAG) {
        // tag function may be the group by tag column
        // ts may be the required primary timestamp column
        continue;
      } else {
        // the column may be the normal column, group by normal_column, the functionId is TSDB_FUNC_PRJ
      }
    }
dengyihao's avatar
dengyihao 已提交
1460 1461 1462 1463 1464 1465 1466
    if (p != NULL) {
      p->tagInfo.pTagCtxList = pTagCtx;
      p->tagInfo.numOfTagCols = num;
      p->tagInfo.tagsLen = tagLen;
    } else {
      tfree(pTagCtx); 
    }
1467 1468 1469
  }
}

H
Haojun Liao 已提交
1470 1471
static FORCE_INLINE void setWindowResultInfo(SResultInfo *pResultInfo, SQuery *pQuery, bool isStableQuery, char* buf) {
  char* p = buf;
1472
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1473 1474 1475 1476
    int32_t size = pQuery->pSelectExpr[i].interBytes;
    setResultInfoBuf(&pResultInfo[i], size, isStableQuery, p);

    p += size;
1477 1478 1479
  }
}

1480
static int32_t setupQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv, int16_t order) {
1481
  qDebug("QInfo:%p setup runtime env", GET_QINFO_ADDR(pRuntimeEnv));
1482 1483
  SQuery *pQuery = pRuntimeEnv->pQuery;

H
Haojun Liao 已提交
1484 1485 1486
  size_t size = pRuntimeEnv->interBufSize + pQuery->numOfOutput * sizeof(SResultInfo);

  pRuntimeEnv->resultInfo = calloc(1, size);
1487
  pRuntimeEnv->pCtx = (SQLFunctionCtx *)calloc(pQuery->numOfOutput, sizeof(SQLFunctionCtx));
1488

1489
  if (pRuntimeEnv->resultInfo == NULL || pRuntimeEnv->pCtx == NULL) {
1490
    goto _clean;
1491
  }
1492

1493
  pRuntimeEnv->offset[0] = 0;
1494
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1495
    SSqlFuncMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].base;
1496

1497
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1498
    SColIndex* pIndex = &pSqlFuncMsg->colInfo;
1499

1500 1501
    int32_t index = pSqlFuncMsg->colInfo.colIndex;
    if (TSDB_COL_IS_TAG(pIndex->flag)) {
1502
      if (pIndex->colId == TSDB_TBNAME_COLUMN_INDEX) {  // todo refactor
H
Haojun Liao 已提交
1503 1504 1505 1506
        SSchema s = tGetTableNameColumnSchema();

        pCtx->inputBytes = s.bytes;
        pCtx->inputType = s.type;
1507 1508 1509 1510
      } else {
        pCtx->inputBytes = pQuery->tagColList[index].bytes;
        pCtx->inputType = pQuery->tagColList[index].type;
      }
1511
      
1512 1513 1514 1515
    } else {
      pCtx->inputBytes = pQuery->colList[index].bytes;
      pCtx->inputType = pQuery->colList[index].type;
    }
1516
  
1517
    assert(isValidDataType(pCtx->inputType));
1518
    pCtx->ptsOutputBuf = NULL;
1519

1520 1521
    pCtx->outputBytes = pQuery->pSelectExpr[i].bytes;
    pCtx->outputType = pQuery->pSelectExpr[i].type;
1522

1523 1524
    pCtx->order = pQuery->order.order;
    pCtx->functionId = pSqlFuncMsg->functionId;
1525

1526 1527 1528 1529 1530 1531 1532 1533 1534 1535
    pCtx->numOfParams = pSqlFuncMsg->numOfParams;
    for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
      int16_t type = pSqlFuncMsg->arg[j].argType;
      int16_t bytes = pSqlFuncMsg->arg[j].argBytes;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        tVariantCreateFromBinary(&pCtx->param[j], pSqlFuncMsg->arg->argValue.pz, bytes, type);
      } else {
        tVariantCreateFromBinary(&pCtx->param[j], (char *)&pSqlFuncMsg->arg[j].argValue.i64, bytes, type);
      }
    }
1536

1537 1538
    // set the order information for top/bottom query
    int32_t functionId = pCtx->functionId;
1539

1540
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
1541
      int32_t f = pQuery->pSelectExpr[0].base.functionId;
1542
      assert(f == TSDB_FUNC_TS || f == TSDB_FUNC_TS_DUMMY);
1543

1544 1545 1546 1547
      pCtx->param[2].i64Key = order;
      pCtx->param[2].nType = TSDB_DATA_TYPE_BIGINT;
      pCtx->param[3].i64Key = functionId;
      pCtx->param[3].nType = TSDB_DATA_TYPE_BIGINT;
1548

1549 1550
      pCtx->param[1].i64Key = pQuery->order.orderColId;
    }
1551

1552 1553 1554 1555
    if (i > 0) {
      pRuntimeEnv->offset[i] = pRuntimeEnv->offset[i - 1] + pRuntimeEnv->pCtx[i - 1].outputBytes;
    }
  }
1556

H
Haojun Liao 已提交
1557
  char* buf = (char*) pRuntimeEnv->resultInfo + sizeof(SResultInfo) * pQuery->numOfOutput;
H
Haojun Liao 已提交
1558

1559
  // set the intermediate result output buffer
H
Haojun Liao 已提交
1560
  setWindowResultInfo(pRuntimeEnv->resultInfo, pQuery, pRuntimeEnv->stableQuery, buf);
1561

1562
  // if it is group by normal column, do not set output buffer, the output buffer is pResult
1563
  if (!isGroupbyNormalCol(pQuery->pGroupbyExpr) && !pRuntimeEnv->stableQuery) {
1564 1565
    resetCtxOutputBuf(pRuntimeEnv);
  }
1566

H
Haojun Liao 已提交
1567
  setCtxTagColumnInfo(pRuntimeEnv, pRuntimeEnv->pCtx);
1568
  return TSDB_CODE_SUCCESS;
1569

1570
_clean:
1571 1572
  tfree(pRuntimeEnv->resultInfo);
  tfree(pRuntimeEnv->pCtx);
1573

1574
  return TSDB_CODE_QRY_OUT_OF_MEMORY;
1575 1576 1577 1578 1579 1580
}

static void teardownQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv) {
  if (pRuntimeEnv->pQuery == NULL) {
    return;
  }
1581

1582
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1583
  SQInfo* pQInfo = (SQInfo*) GET_QINFO_ADDR(pRuntimeEnv);
1584

1585
  qDebug("QInfo:%p teardown runtime env", pQInfo);
H
Haojun Liao 已提交
1586
  cleanupTimeWindowInfo(&pRuntimeEnv->windowResInfo);
1587

1588
  if (pRuntimeEnv->pCtx != NULL) {
1589
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1590
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1591

1592 1593 1594
      for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
        tVariantDestroy(&pCtx->param[j]);
      }
1595

1596 1597 1598
      tVariantDestroy(&pCtx->tag);
      tfree(pCtx->tagInfo.pTagCtxList);
    }
1599

1600 1601 1602
    tfree(pRuntimeEnv->resultInfo);
    tfree(pRuntimeEnv->pCtx);
  }
1603

H
Haojun Liao 已提交
1604
  pRuntimeEnv->pFillInfo = taosDestoryFillInfo(pRuntimeEnv->pFillInfo);
1605

H
hjxilinx 已提交
1606
  destroyResultBuf(pRuntimeEnv->pResultBuf, pQInfo);
1607
  tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
1608
  tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
1609

H
Haojun Liao 已提交
1610
  pRuntimeEnv->pTSBuf = tsBufDestroy(pRuntimeEnv->pTSBuf);
1611 1612
}

H
Haojun Liao 已提交
1613
#define IS_QUERY_KILLED(_q) ((_q)->code == TSDB_CODE_TSC_QUERY_CANCELLED)
1614

H
Haojun Liao 已提交
1615
static void setQueryKilled(SQInfo *pQInfo) { pQInfo->code = TSDB_CODE_TSC_QUERY_CANCELLED;}
H
hjxilinx 已提交
1616

H
Haojun Liao 已提交
1617 1618 1619
static bool isFixedOutputQuery(SQueryRuntimeEnv* pRuntimeEnv) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1620 1621
    return false;
  }
1622

1623
  // Note:top/bottom query is fixed output query
H
Haojun Liao 已提交
1624
  if (pRuntimeEnv->topBotQuery || pRuntimeEnv->groupbyNormalCol) {
1625 1626
    return true;
  }
1627

1628
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1629
    SSqlFuncMsg *pExprMsg = &pQuery->pSelectExpr[i].base;
1630

1631 1632
    // ignore the ts_comp function
    if (i == 0 && pExprMsg->functionId == TSDB_FUNC_PRJ && pExprMsg->numOfParams == 1 &&
1633
        pExprMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
1634 1635
      continue;
    }
1636

1637 1638 1639
    if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
1640

1641 1642 1643 1644
    if (!IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus)) {
      return true;
    }
  }
1645

1646 1647 1648
  return false;
}

1649
// todo refactor with isLastRowQuery
H
hjxilinx 已提交
1650
static bool isPointInterpoQuery(SQuery *pQuery) {
1651
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1652
    int32_t functionID = pQuery->pSelectExpr[i].base.functionId;
1653
    if (functionID == TSDB_FUNC_INTERP) {
1654 1655 1656
      return true;
    }
  }
1657

1658 1659 1660 1661
  return false;
}

// TODO REFACTOR:MERGE WITH CLIENT-SIDE FUNCTION
H
hjxilinx 已提交
1662
static bool isSumAvgRateQuery(SQuery *pQuery) {
1663
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1664
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1665 1666 1667
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
1668

1669 1670 1671 1672 1673
    if (functionId == TSDB_FUNC_SUM_RATE || functionId == TSDB_FUNC_SUM_IRATE || functionId == TSDB_FUNC_AVG_RATE ||
        functionId == TSDB_FUNC_AVG_IRATE) {
      return true;
    }
  }
1674

1675 1676 1677
  return false;
}

H
hjxilinx 已提交
1678
static bool isFirstLastRowQuery(SQuery *pQuery) {
1679
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1680
    int32_t functionID = pQuery->pSelectExpr[i].base.functionId;
1681 1682 1683 1684
    if (functionID == TSDB_FUNC_LAST_ROW) {
      return true;
    }
  }
1685

1686 1687 1688
  return false;
}

H
hjxilinx 已提交
1689
static bool needReverseScan(SQuery *pQuery) {
1690
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1691
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1692 1693 1694
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG) {
      continue;
    }
1695

1696
    if ((functionId == TSDB_FUNC_FIRST || functionId == TSDB_FUNC_FIRST_DST) && !QUERY_IS_ASC_QUERY(pQuery)) {
1697 1698
      return true;
    }
1699 1700 1701 1702 1703

    if (functionId == TSDB_FUNC_LAST || functionId == TSDB_FUNC_LAST_DST) {
      int32_t order = pQuery->pSelectExpr[i].base.arg->argValue.i64;
      return order != pQuery->order.order;
    }
1704
  }
1705

1706 1707
  return false;
}
H
hjxilinx 已提交
1708 1709 1710

static bool onlyQueryTags(SQuery* pQuery) {
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1711 1712 1713 1714 1715
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];

    int32_t functionId = pExprInfo->base.functionId;
    if (functionId != TSDB_FUNC_TAGPRJ && functionId != TSDB_FUNC_TID_TAG &&
        (!(functionId == TSDB_FUNC_COUNT && pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX))) {
H
hjxilinx 已提交
1716 1717 1718
      return false;
    }
  }
1719

H
hjxilinx 已提交
1720 1721 1722
  return true;
}

1723 1724
/////////////////////////////////////////////////////////////////////////////////////////////

H
Haojun Liao 已提交
1725
void getAlignQueryTimeWindow(SQuery *pQuery, int64_t key, int64_t keyFirst, int64_t keyLast, STimeWindow *win) {
1726
  assert(key >= keyFirst && key <= keyLast && pQuery->slidingTime <= pQuery->intervalTime);
H
Haojun Liao 已提交
1727
  win->skey = taosGetIntervalStartTimestamp(key, pQuery->slidingTime, pQuery->intervalTime, pQuery->slidingTimeUnit, pQuery->precision);
H
Haojun Liao 已提交
1728 1729 1730 1731 1732

  /*
   * if the realSkey > INT64_MAX - pQuery->intervalTime, the query duration between
   * realSkey and realEkey must be less than one interval.Therefore, no need to adjust the query ranges.
   */
1733 1734 1735 1736
  if (keyFirst > (INT64_MAX - pQuery->intervalTime)) {
    assert(keyLast - keyFirst < pQuery->intervalTime);
    win->ekey = INT64_MAX;
    return;
H
Haojun Liao 已提交
1737 1738
  } else {
    win->ekey = win->skey + pQuery->intervalTime - 1;
1739 1740 1741 1742 1743
  }
}

static void setScanLimitationByResultBuffer(SQuery *pQuery) {
  if (isTopBottomQuery(pQuery)) {
1744
    pQuery->checkBuffer = 0;
1745
  } else if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
1746
    pQuery->checkBuffer = 0;
1747 1748
  } else {
    bool hasMultioutput = false;
1749
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1750
      SSqlFuncMsg *pExprMsg = &pQuery->pSelectExpr[i].base;
1751 1752 1753
      if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        continue;
      }
1754

1755 1756 1757 1758 1759
      hasMultioutput = IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus);
      if (!hasMultioutput) {
        break;
      }
    }
1760

1761
    pQuery->checkBuffer = hasMultioutput ? 1 : 0;
1762 1763 1764 1765 1766 1767
  }
}

/*
 * todo add more parameters to check soon..
 */
1768
bool colIdCheck(SQuery *pQuery) {
1769 1770
  // load data column information is incorrect
  for (int32_t i = 0; i < pQuery->numOfCols - 1; ++i) {
1771
    if (pQuery->colList[i].colId == pQuery->colList[i + 1].colId) {
S
slguan 已提交
1772
      qError("QInfo:%p invalid data load column for query", GET_QINFO_ADDR(pQuery));
1773 1774 1775
      return false;
    }
  }
1776
  
1777 1778 1779 1780 1781 1782
  return true;
}

// todo ignore the avg/sum/min/max/count/stddev/top/bottom functions, of which
// the scan order is not matter
static bool onlyOneQueryType(SQuery *pQuery, int32_t functId, int32_t functIdDst) {
1783
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1784
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1785

1786 1787 1788 1789
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG ||
        functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
1790

1791 1792 1793 1794
    if (functionId != functId && functionId != functIdDst) {
      return false;
    }
  }
1795

1796 1797 1798 1799 1800 1801 1802
  return true;
}

static bool onlyFirstQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_FIRST, TSDB_FUNC_FIRST_DST); }

static bool onlyLastQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_LAST, TSDB_FUNC_LAST_DST); }

H
Haojun Liao 已提交
1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816
// todo refactor, add iterator
static void doExchangeTimeWindow(SQInfo* pQInfo) {
  size_t t = GET_NUM_OF_TABLEGROUP(pQInfo);
  for(int32_t i = 0; i < t; ++i) {
    SArray* p1 = GET_TABLEGROUP(pQInfo, i);

    size_t len = taosArrayGetSize(p1);
    for(int32_t j = 0; j < len; ++j) {
      STableQueryInfo* pTableQueryInfo = (STableQueryInfo*) taosArrayGetP(p1, j);
      SWAP(pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, TSKEY);
    }
  }
}

H
Haojun Liao 已提交
1817 1818 1819
static void changeExecuteScanOrder(SQInfo *pQInfo, bool stableQuery) {
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;

1820 1821 1822
  // in case of point-interpolation query, use asc order scan
  char msg[] = "QInfo:%p scan order changed for %s query, old:%d, new:%d, qrange exchanged, old qrange:%" PRId64
               "-%" PRId64 ", new qrange:%" PRId64 "-%" PRId64;
1823

1824 1825 1826
  // todo handle the case the the order irrelevant query type mixed up with order critical query type
  // descending order query for last_row query
  if (isFirstLastRowQuery(pQuery)) {
1827
    qDebug("QInfo:%p scan order changed for last_row query, old:%d, new:%d", GET_QINFO_ADDR(pQuery),
1828
           pQuery->order.order, TSDB_ORDER_DESC);
1829

1830
    pQuery->order.order = TSDB_ORDER_DESC;
1831

1832 1833
    int64_t skey = MIN(pQuery->window.skey, pQuery->window.ekey);
    int64_t ekey = MAX(pQuery->window.skey, pQuery->window.ekey);
1834

1835 1836
    pQuery->window.skey = ekey;
    pQuery->window.ekey = skey;
1837

1838 1839
    return;
  }
1840

1841 1842
  if (isPointInterpoQuery(pQuery) && pQuery->intervalTime == 0) {
    if (!QUERY_IS_ASC_QUERY(pQuery)) {
1843
      qDebug(msg, GET_QINFO_ADDR(pQuery), "interp", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
1844
             pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);
1845 1846
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
1847

1848
    pQuery->order.order = TSDB_ORDER_ASC;
1849 1850
    return;
  }
1851

1852 1853 1854
  if (pQuery->intervalTime == 0) {
    if (onlyFirstQuery(pQuery)) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
1855
        qDebug(msg, GET_QINFO_ADDR(pQuery), "only-first", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
1856 1857
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1858
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
H
Haojun Liao 已提交
1859
        doExchangeTimeWindow(pQInfo);
1860
      }
1861

1862
      pQuery->order.order = TSDB_ORDER_ASC;
1863 1864
    } else if (onlyLastQuery(pQuery)) {
      if (QUERY_IS_ASC_QUERY(pQuery)) {
1865
        qDebug(msg, GET_QINFO_ADDR(pQuery), "only-last", pQuery->order.order, TSDB_ORDER_DESC, pQuery->window.skey,
1866 1867
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1868
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
H
Haojun Liao 已提交
1869
        doExchangeTimeWindow(pQInfo);
1870
      }
1871

1872
      pQuery->order.order = TSDB_ORDER_DESC;
1873
    }
1874

1875
  } else {  // interval query
1876
    if (stableQuery) {
1877 1878
      if (onlyFirstQuery(pQuery)) {
        if (!QUERY_IS_ASC_QUERY(pQuery)) {
1879
          qDebug(msg, GET_QINFO_ADDR(pQuery), "only-first stable", pQuery->order.order, TSDB_ORDER_ASC,
1880 1881
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1882 1883
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
        }
1884

1885
        pQuery->order.order = TSDB_ORDER_ASC;
1886 1887
      } else if (onlyLastQuery(pQuery)) {
        if (QUERY_IS_ASC_QUERY(pQuery)) {
1888
          qDebug(msg, GET_QINFO_ADDR(pQuery), "only-last stable", pQuery->order.order, TSDB_ORDER_DESC,
1889 1890
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1891 1892
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
        }
1893

1894
        pQuery->order.order = TSDB_ORDER_DESC;
1895 1896 1897 1898 1899 1900 1901 1902
      }
    }
  }
}

static int32_t getInitialPageNum(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  int32_t INITIAL_RESULT_ROWS_VALUE = 16;
1903

1904
  int32_t num = 0;
1905

1906 1907
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
    num = 128;
H
Haojun Liao 已提交
1908
  } else if (QUERY_IS_INTERVAL_QUERY(pQuery)) {  // time window query, allocate one page for each table
1909
    size_t s = pQInfo->tableqinfoGroupInfo.numOfTables;
1910
    num = MAX(s, INITIAL_RESULT_ROWS_VALUE);
1911 1912
  } else {    // for super table query, one page for each subset
    num = 1;  // pQInfo->pSidSet->numOfSubSet;
1913
  }
1914

1915 1916 1917 1918
  assert(num > 0);
  return num;
}

H
Haojun Liao 已提交
1919 1920
static FORCE_INLINE int32_t getNumOfRowsInResultPage(SQuery *pQuery, bool topBotQuery, bool isSTableQuery) {
  int32_t rowSize = pQuery->rowSize * GET_ROW_PARAM_FOR_MULTIOUTPUT(pQuery, topBotQuery, isSTableQuery);
1921
  return (DEFAULT_INTERN_BUF_PAGE_SIZE - sizeof(tFilePage)) / rowSize;
1922 1923
}

H
Haojun Liao 已提交
1924
#define IS_PREFILTER_TYPE(_t) ((_t) != TSDB_DATA_TYPE_BINARY && (_t) != TSDB_DATA_TYPE_NCHAR)
1925

H
Haojun Liao 已提交
1926 1927 1928 1929
static bool needToLoadDataBlock(SQueryRuntimeEnv* pRuntimeEnv, SDataStatis *pDataStatis, SQLFunctionCtx *pCtx,
    int32_t numOfRows) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
  if (pDataStatis == NULL || (pQuery->numOfFilterCols == 0 && (!pRuntimeEnv->topBotQuery))) {
1930 1931 1932 1933 1934
    return true;
  }

  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
1935

H
Haojun Liao 已提交
1936 1937 1938 1939 1940 1941 1942 1943
    int32_t index = -1;
    for(int32_t i = 0; i < pQuery->numOfCols; ++i) {
      if (pDataStatis[i].colId == pFilterInfo->info.colId) {
        index = i;
        break;
      }
    }

H
Haojun Liao 已提交
1944
    // no statistics data
H
Haojun Liao 已提交
1945
    if (index == -1) {
H
Haojun Liao 已提交
1946
      return true;
1947
    }
1948

1949
    // not support pre-filter operation on binary/nchar data type
H
Haojun Liao 已提交
1950
    if (!IS_PREFILTER_TYPE(pFilterInfo->info.type)) {
H
Haojun Liao 已提交
1951
      return true;
1952
    }
1953

1954
    // all points in current column are NULL, no need to check its boundary value
H
Haojun Liao 已提交
1955
    if (pDataStatis[index].numOfNull == numOfRows) {
1956 1957
      continue;
    }
1958

H
Haojun Liao 已提交
1959 1960 1961 1962 1963
    SDataStatis* pDataBlockst = &pDataStatis[index];

    if (pFilterInfo->info.type == TSDB_DATA_TYPE_FLOAT) {
      float minval = *(double *)(&pDataBlockst->min);
      float maxval = *(double *)(&pDataBlockst->max);
1964

1965 1966 1967 1968 1969 1970 1971
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&minval, (char *)&maxval)) {
          return true;
        }
      }
    } else {
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
H
Haojun Liao 已提交
1972
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&pDataBlockst->min, (char *)&pDataBlockst->max)) {
1973 1974 1975 1976 1977
          return true;
        }
      }
    }
  }
1978

H
Haojun Liao 已提交
1979 1980 1981 1982 1983 1984 1985 1986
  if (pRuntimeEnv->topBotQuery) {
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
      int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
      if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
        return topbot_datablock_filter(&pCtx[i], functionId, (char *)&pDataStatis[i].min, (char *)&pDataStatis[i].max);
      }
    }
  }
1987

H
Haojun Liao 已提交
1988
  return false;
1989 1990
}

H
Haojun Liao 已提交
1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037
#define PT_IN_WINDOW(_p, _w)  ((_p) > (_w).skey && (_p) < (_w).ekey)

static bool overlapWithTimeWindow(SQuery* pQuery, SDataBlockInfo* pBlockInfo) {
  STimeWindow w = {0};

  TSKEY sk = MIN(pQuery->window.skey, pQuery->window.ekey);
  TSKEY ek = MAX(pQuery->window.skey, pQuery->window.ekey);


  if (QUERY_IS_ASC_QUERY(pQuery)) {
    getAlignQueryTimeWindow(pQuery, pBlockInfo->window.skey, sk, ek, &w);

    if (PT_IN_WINDOW(w.ekey, pBlockInfo->window)) {
      return true;
    }

    while(1) {
      GET_NEXT_TIMEWINDOW(pQuery, &w);
      if (w.skey > pBlockInfo->window.skey) {
        break;
      }

      if (PT_IN_WINDOW(w.skey, pBlockInfo->window) || PT_IN_WINDOW(w.ekey, pBlockInfo->window)) {
        return true;
      }
    }
  } else {
    getAlignQueryTimeWindow(pQuery, pBlockInfo->window.ekey, sk, ek, &w);
    if (PT_IN_WINDOW(w.skey, pBlockInfo->window)) {
      return true;
    }

    while(1) {
      GET_NEXT_TIMEWINDOW(pQuery, &w);
      if (w.ekey < pBlockInfo->window.skey) {
        break;
      }

      if (PT_IN_WINDOW(w.skey, pBlockInfo->window) || PT_IN_WINDOW(w.ekey, pBlockInfo->window)) {
        return true;
      }
    }
  }

  return false;
}

H
Haojun Liao 已提交
2038
int32_t loadDataBlockOnDemand(SQueryRuntimeEnv *pRuntimeEnv, void* pQueryHandle, SDataBlockInfo* pBlockInfo, SDataStatis **pStatis, SArray** pDataBlock) {
2039
  SQuery *pQuery = pRuntimeEnv->pQuery;
2040

H
Haojun Liao 已提交
2041
  uint32_t status = 0;
H
Haojun Liao 已提交
2042
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf > 0) {
H
Haojun Liao 已提交
2043 2044
    status = BLK_DATA_ALL_NEEDED;
  } else { // check if this data block is required to load
H
Haojun Liao 已提交
2045

H
Haojun Liao 已提交
2046 2047 2048 2049 2050
    // Calculate all time windows that are overlapping or contain current data block.
    // If current data block is contained by all possible time window, loading current
    // data block is not needed.
    if (QUERY_IS_INTERVAL_QUERY(pQuery) && overlapWithTimeWindow(pQuery, pBlockInfo)) {
      status = BLK_DATA_ALL_NEEDED;
2051
    }
2052

H
Haojun Liao 已提交
2053 2054 2055 2056 2057 2058 2059 2060
    if (status != BLK_DATA_ALL_NEEDED) {
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        SSqlFuncMsg* pSqlFunc = &pQuery->pSelectExpr[i].base;

        int32_t functionId = pSqlFunc->functionId;
        int32_t colId = pSqlFunc->colInfo.colId;

        status |= aAggs[functionId].dataReqFunc(&pRuntimeEnv->pCtx[i], pBlockInfo->window.skey, pBlockInfo->window.ekey, colId);
H
Haojun Liao 已提交
2061
        if ((status & BLK_DATA_ALL_NEEDED) == BLK_DATA_ALL_NEEDED) {
H
Haojun Liao 已提交
2062 2063 2064
          break;
        }
      }
2065 2066
    }
  }
2067

H
Haojun Liao 已提交
2068 2069 2070
  if (status == BLK_DATA_NO_NEEDED) {
    qDebug("QInfo:%p data block discard, brange:%"PRId64 "-%"PRId64", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
           pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
2071
    pRuntimeEnv->summary.discardBlocks += 1;
H
Haojun Liao 已提交
2072
  } else if (status == BLK_DATA_STATIS_NEEDED) {
H
hjxilinx 已提交
2073
    if (tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
2074
      //        return DISK_DATA_LOAD_FAILED;
2075
    }
2076 2077 2078 2079
  
    pRuntimeEnv->summary.loadBlockStatis += 1;
  
    if (*pStatis == NULL) { // data block statistics does not exist, load data block
H
Haojun Liao 已提交
2080
      *pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2081
      pRuntimeEnv->summary.totalCheckedRows += pBlockInfo->rows;
2082 2083
    }
  } else {
H
Haojun Liao 已提交
2084
    assert(status == BLK_DATA_ALL_NEEDED);
2085 2086
  
    // load the data block statistics to perform further filter
H
Haojun Liao 已提交
2087
    pRuntimeEnv->summary.loadBlockStatis += 1;
H
hjxilinx 已提交
2088
    if (tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis) != TSDB_CODE_SUCCESS) {
2089
    }
2090
    
H
Haojun Liao 已提交
2091
    if (!needToLoadDataBlock(pRuntimeEnv, *pStatis, pRuntimeEnv->pCtx, pBlockInfo->rows)) {
2092
#if defined(_DEBUG_VIEW)
2093
      qDebug("QInfo:%p block discarded by per-filter", GET_QINFO_ADDR(pRuntimeEnv));
2094
#endif
2095 2096
      // current block has been discard due to filter applied
      pRuntimeEnv->summary.discardBlocks += 1;
H
Haojun Liao 已提交
2097 2098 2099
      qDebug("QInfo:%p data block discard, brange:%"PRId64 "-%"PRId64", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
          pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
      return BLK_DATA_DISCARD;
2100
    }
2101
  
2102
    pRuntimeEnv->summary.totalCheckedRows += pBlockInfo->rows;
H
Haojun Liao 已提交
2103
    pRuntimeEnv->summary.loadBlocks += 1;
H
Haojun Liao 已提交
2104
    *pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2105
  }
2106

H
Haojun Liao 已提交
2107
  return TSDB_CODE_SUCCESS;
2108 2109
}

H
hjxilinx 已提交
2110
int32_t binarySearchForKey(char *pValue, int num, TSKEY key, int order) {
2111
  int32_t midPos = -1;
H
Haojun Liao 已提交
2112
  int32_t numOfRows;
2113

2114 2115 2116
  if (num <= 0) {
    return -1;
  }
2117

2118
  assert(order == TSDB_ORDER_ASC || order == TSDB_ORDER_DESC);
2119 2120

  TSKEY * keyList = (TSKEY *)pValue;
2121
  int32_t firstPos = 0;
2122
  int32_t lastPos = num - 1;
2123

2124
  if (order == TSDB_ORDER_DESC) {
H
hjxilinx 已提交
2125 2126 2127 2128 2129
    // find the first position which is smaller than the key
    while (1) {
      if (key >= keyList[lastPos]) return lastPos;
      if (key == keyList[firstPos]) return firstPos;
      if (key < keyList[firstPos]) return firstPos - 1;
2130

H
Haojun Liao 已提交
2131 2132
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2133

H
hjxilinx 已提交
2134 2135 2136 2137 2138 2139 2140 2141
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
2142

H
hjxilinx 已提交
2143 2144 2145 2146 2147
  } else {
    // find the first position which is bigger than the key
    while (1) {
      if (key <= keyList[firstPos]) return firstPos;
      if (key == keyList[lastPos]) return lastPos;
2148

H
hjxilinx 已提交
2149 2150 2151 2152 2153 2154 2155
      if (key > keyList[lastPos]) {
        lastPos = lastPos + 1;
        if (lastPos >= num)
          return -1;
        else
          return lastPos;
      }
2156

H
Haojun Liao 已提交
2157 2158
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2159

H
hjxilinx 已提交
2160 2161 2162 2163 2164 2165 2166 2167 2168
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
  }
2169

H
hjxilinx 已提交
2170 2171 2172
  return midPos;
}

2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194
static void ensureOutputBufferSimple(SQueryRuntimeEnv* pRuntimeEnv, int32_t capacity) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  if (capacity < pQuery->rec.capacity) {
    return;
  }

  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    int32_t bytes = pQuery->pSelectExpr[i].bytes;
    assert(bytes > 0 && capacity > 0);

    char *tmp = realloc(pQuery->sdata[i], bytes * capacity + sizeof(tFilePage));
    if (tmp == NULL) {  // todo handle the oom
      assert(0);
    } else {
      pQuery->sdata[i] = (tFilePage *)tmp;
    }

    // set the pCtx output buffer position
    pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data;
  }

2195
  qDebug("QInfo:%p realloc output buffer to inc output buffer from: %" PRId64 " rows to:%d rows", GET_QINFO_ADDR(pRuntimeEnv),
2196 2197 2198 2199 2200
         pQuery->rec.capacity, capacity);

  pQuery->rec.capacity = capacity;
}

2201 2202 2203
static void ensureOutputBuffer(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pBlockInfo) {
  // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
2204
  if (!QUERY_IS_INTERVAL_QUERY(pQuery) && !pRuntimeEnv->groupbyNormalCol && !isFixedOutputQuery(pRuntimeEnv)) {
2205 2206 2207 2208 2209 2210 2211 2212
    SResultRec *pRec = &pQuery->rec;
    
    if (pQuery->rec.capacity - pQuery->rec.rows < pBlockInfo->rows) {
      int32_t remain = pRec->capacity - pRec->rows;
      int32_t newSize = pRec->capacity + (pBlockInfo->rows - remain);
      
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        int32_t bytes = pQuery->pSelectExpr[i].bytes;
H
Haojun Liao 已提交
2213 2214
        assert(bytes > 0 && newSize > 0);

2215 2216 2217 2218
        char *tmp = realloc(pQuery->sdata[i], bytes * newSize + sizeof(tFilePage));
        if (tmp == NULL) {  // todo handle the oom
          assert(0);
        } else {
H
Hongze Cheng 已提交
2219
          memset(tmp + sizeof(tFilePage) + bytes * pRec->rows, 0, (newSize - pRec->rows) * bytes);
2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231
          pQuery->sdata[i] = (tFilePage *)tmp;
        }
        
        // set the pCtx output buffer position
        pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data + pRec->rows * bytes;
        
        int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
        if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
          pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
        }
      }
      
2232
      qDebug("QInfo:%p realloc output buffer, new size: %d rows, old:%" PRId64 ", remain:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv),
2233 2234 2235 2236 2237 2238 2239
             newSize, pRec->capacity, newSize - pRec->rows);
      
      pRec->capacity = newSize;
    }
  }
}

2240 2241
static int64_t doScanAllDataBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
2242
  STableQueryInfo* pTableQueryInfo = pQuery->current;
H
Haojun Liao 已提交
2243
  SQueryCostInfo*  summary  = &pRuntimeEnv->summary;
2244

2245
  qDebug("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", lastkey:%" PRId64 ", order:%d",
H
hjxilinx 已提交
2246 2247
         GET_QINFO_ADDR(pRuntimeEnv), pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, pTableQueryInfo->lastKey,
         pQuery->order.order);
2248

2249
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
H
Haojun Liao 已提交
2250
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
2251

H
Haojun Liao 已提交
2252
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
2253
  while (tsdbNextDataBlock(pQueryHandle)) {
H
Haojun Liao 已提交
2254
    summary->totalBlocks += 1;
H
Haojun Liao 已提交
2255

H
Haojun Liao 已提交
2256
    if (IS_QUERY_KILLED(GET_QINFO_ADDR(pRuntimeEnv))) {
H
Haojun Liao 已提交
2257
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
2258
    }
2259

H
Haojun Liao 已提交
2260
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
2261

2262
    // todo extract methods
H
Haojun Liao 已提交
2263
    if (QUERY_IS_INTERVAL_QUERY(pQuery) && pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL) {
H
Haojun Liao 已提交
2264
      STimeWindow w = TSWINDOW_INITIALIZER;
2265 2266
      SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;

2267
      if (QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
2268
        getAlignQueryTimeWindow(pQuery, blockInfo.window.skey, blockInfo.window.skey, pQuery->window.ekey, &w);
2269 2270 2271 2272
        pWindowResInfo->startTime = w.skey;
        pWindowResInfo->prevSKey = w.skey;
      } else {
        // the start position of the first time window in the endpoint that spreads beyond the queried last timestamp
H
Haojun Liao 已提交
2273
        getAlignQueryTimeWindow(pQuery, blockInfo.window.ekey, pQuery->window.ekey, blockInfo.window.ekey, &w);
2274

H
hjxilinx 已提交
2275
        pWindowResInfo->startTime = pQuery->window.skey;
2276 2277 2278
        pWindowResInfo->prevSKey = w.skey;
      }
    }
2279

H
hjxilinx 已提交
2280
    // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
2281
    ensureOutputBuffer(pRuntimeEnv, &blockInfo);
2282

2283
    SDataStatis *pStatis = NULL;
H
Haojun Liao 已提交
2284 2285 2286 2287 2288
    SArray *pDataBlock   = NULL;
    if (loadDataBlockOnDemand(pRuntimeEnv, pQueryHandle, &blockInfo, &pStatis, &pDataBlock) == BLK_DATA_DISCARD) {
      pQuery->current->lastKey = QUERY_IS_ASC_QUERY(pQuery)? blockInfo.window.ekey + step:blockInfo.window.skey + step;
      continue;
    }
2289

H
Haojun Liao 已提交
2290 2291
    // query start position can not move into tableApplyFunctionsOnBlock due to limit/offset condition
    pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : blockInfo.rows - 1;
H
hjxilinx 已提交
2292
    int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, binarySearchForKey, pDataBlock);
2293

H
Haojun Liao 已提交
2294
    summary->totalRows += blockInfo.rows;
2295
    qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
2296
           blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes, pQuery->current->lastKey);
2297

2298 2299
    // while the output buffer is full or limit/offset is applied, query may be paused here
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL | QUERY_COMPLETED)) {
H
hjxilinx 已提交
2300
      break;
2301 2302
    }
  }
2303

H
hjxilinx 已提交
2304
  // if the result buffer is not full, set the query complete
2305 2306 2307
  if (!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
2308

H
Haojun Liao 已提交
2309
  if (QUERY_IS_INTERVAL_QUERY(pQuery) && IS_MASTER_SCAN(pRuntimeEnv)) {
H
hjxilinx 已提交
2310
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
2311
      closeAllTimeWindow(&pRuntimeEnv->windowResInfo);
H
Haojun Liao 已提交
2312
//      removeRedundantWindow(&pRuntimeEnv->windowResInfo, pTableQueryInfo->lastKey - step, step);
H
hjxilinx 已提交
2313
      pRuntimeEnv->windowResInfo.curIndex = pRuntimeEnv->windowResInfo.size - 1;  // point to the last time window
2314 2315 2316 2317
    } else {
      assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
    }
  }
2318

2319
  return 0;
2320 2321 2322 2323 2324 2325
}

/*
 * set tag value in SQLFunctionCtx
 * e.g.,tag information into input buffer
 */
2326
static void doSetTagValueInParam(void *tsdb, void* pTable, int32_t tagColId, tVariant *tag, int16_t type, int16_t bytes) {
H
[td-90]  
Haojun Liao 已提交
2327
  tVariantDestroy(tag);
2328

2329
  if (tagColId == TSDB_TBNAME_COLUMN_INDEX) {
2330
    char* val = tsdbGetTableName(pTable);
H
[td-90]  
Haojun Liao 已提交
2331 2332 2333
    assert(val != NULL);
    
    tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), TSDB_DATA_TYPE_BINARY);
2334
  } else {
2335
    char* val = tsdbGetTableTagVal(pTable, tagColId, type, bytes);
H
[td-90]  
Haojun Liao 已提交
2336 2337 2338 2339
    if (val == NULL) {
      tag->nType = TSDB_DATA_TYPE_NULL;
      return;
    }
H
hjxilinx 已提交
2340 2341
    
    if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
H
Hongze Cheng 已提交
2342
      if (isNull(val, type)) {
H
Haojun Liao 已提交
2343 2344 2345 2346
        tag->nType = TSDB_DATA_TYPE_NULL;
        return;
      }

H
[td-90]  
Haojun Liao 已提交
2347
      tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), type);
H
hjxilinx 已提交
2348
    } else {
H
Haojun Liao 已提交
2349 2350 2351 2352 2353
      if (isNull(val, type)) {
        tag->nType = TSDB_DATA_TYPE_NULL;
        return;
      }

H
[td-90]  
Haojun Liao 已提交
2354
      tVariantCreateFromBinary(tag, val, bytes, type);
H
hjxilinx 已提交
2355
    }
2356
  }
2357 2358
}

2359
void setTagVal(SQueryRuntimeEnv *pRuntimeEnv, void *pTable, void *tsdb) {
2360
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
2361
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
2362

H
[td-90]  
Haojun Liao 已提交
2363 2364 2365
  SExprInfo *pExprInfo = &pQuery->pSelectExpr[0];
  if (pQuery->numOfOutput == 1 && pExprInfo->base.functionId == TSDB_FUNC_TS_COMP) {
    assert(pExprInfo->base.numOfParams == 1);
H
Haojun Liao 已提交
2366 2367 2368 2369 2370 2371 2372 2373 2374 2375

    // todo refactor extract function.
    int16_t type = -1, bytes = -1;
    for(int32_t i = 0; i < pQuery->numOfTags; ++i) {
      if (pQuery->tagColList[i].colId == pExprInfo->base.arg->argValue.i64) {
        type = pQuery->tagColList[i].type;
        bytes = pQuery->tagColList[i].bytes;
      }
    }

2376
    doSetTagValueInParam(tsdb, pTable, pExprInfo->base.arg->argValue.i64, &pRuntimeEnv->pCtx[0].tag, type, bytes);
2377 2378
  } else {
    // set tag value, by which the results are aggregated.
2379
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
H
Haojun Liao 已提交
2380
      SExprInfo* pLocalExprInfo = &pQuery->pSelectExpr[idx];
H
[td-90]  
Haojun Liao 已提交
2381
  
2382
      // ts_comp column required the tag value for join filter
H
Haojun Liao 已提交
2383
      if (!TSDB_COL_IS_TAG(pLocalExprInfo->base.colInfo.flag)) {
2384 2385
        continue;
      }
2386

2387
      // todo use tag column index to optimize performance
2388
      doSetTagValueInParam(tsdb, pTable, pLocalExprInfo->base.colInfo.colId, &pRuntimeEnv->pCtx[idx].tag,
H
Haojun Liao 已提交
2389
                           pLocalExprInfo->type, pLocalExprInfo->bytes);
2390
    }
2391

2392
    // set the join tag for first column
H
[td-90]  
Haojun Liao 已提交
2393
    SSqlFuncMsg *pFuncMsg = &pExprInfo->base;
H
Haojun Liao 已提交
2394
    if ((pFuncMsg->functionId == TSDB_FUNC_TS || pFuncMsg->functionId == TSDB_FUNC_PRJ) && pFuncMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX &&
2395 2396
        pRuntimeEnv->pTSBuf != NULL) {
      assert(pFuncMsg->numOfParams == 1);
H
Haojun Liao 已提交
2397 2398 2399 2400 2401 2402 2403 2404 2405 2406

      // todo refactor
      int16_t type = -1, bytes = -1;
      for(int32_t i = 0; i < pQuery->numOfTags; ++i) {
        if (pQuery->tagColList[i].colId == pExprInfo->base.arg->argValue.i64) {
          type = pQuery->tagColList[i].type;
          bytes = pQuery->tagColList[i].bytes;
        }
      }

2407
      doSetTagValueInParam(tsdb, pTable, pExprInfo->base.arg->argValue.i64, &pRuntimeEnv->pCtx[0].tag, type, bytes);
2408
      qDebug("QInfo:%p set tag value for join comparison, colId:%" PRId64 ", val:%"PRId64, pQInfo, pExprInfo->base.arg->argValue.i64,
B
Bomin Zhang 已提交
2409
          pRuntimeEnv->pCtx[0].tag.i64Key)
2410 2411 2412 2413 2414 2415 2416
    }
  }
}

static void doMerge(SQueryRuntimeEnv *pRuntimeEnv, int64_t timestamp, SWindowResult *pWindowRes, bool mergeFlag) {
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
2417

2418
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2419
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2420 2421 2422
    if (!mergeFlag) {
      pCtx[i].aOutputBuf = pCtx[i].aOutputBuf + pCtx[i].outputBytes;
      pCtx[i].currentStage = FIRST_STAGE_MERGE;
2423

2424 2425 2426
      resetResultInfo(pCtx[i].resultInfo);
      aAggs[functionId].init(&pCtx[i]);
    }
2427

2428 2429 2430
    pCtx[i].hasNull = true;
    pCtx[i].nStartQueryTimestamp = timestamp;
    pCtx[i].aInputElemBuf = getPosInResultPage(pRuntimeEnv, i, pWindowRes);
2431

2432 2433 2434
    // in case of tag column, the tag information should be extracted from input buffer
    if (functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TAG) {
      tVariantDestroy(&pCtx[i].tag);
2435 2436 2437 2438 2439 2440 2441 2442
  
      int32_t type = pCtx[i].outputType;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        tVariantCreateFromBinary(&pCtx[i].tag, varDataVal(pCtx[i].aInputElemBuf), varDataLen(pCtx[i].aInputElemBuf), type);
      } else {
        tVariantCreateFromBinary(&pCtx[i].tag, pCtx[i].aInputElemBuf, pCtx[i].inputBytes, pCtx[i].inputType);
      }
      
2443 2444
    }
  }
2445

2446
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2447
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2448 2449 2450
    if (functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
2451

2452 2453 2454 2455
    aAggs[functionId].distMergeFunc(&pCtx[i]);
  }
}

2456
static UNUSED_FUNC void printBinaryData(int32_t functionId, char *data, int32_t srcDataType) {
2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_LAST_DST) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_BINARY:
        printf("%" PRId64 ",%s\t", *(TSKEY *)data, (data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int8_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int16_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 ",%" PRId64 "\t", *(TSKEY *)data, *(TSKEY *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int32_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%" PRId64 ",%f\t", *(TSKEY *)data, *(float *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%" PRId64 ",%lf\t", *(TSKEY *)data, *(double *)(data + TSDB_KEYSIZE + 1));
        break;
    }
  } else if (functionId == TSDB_FUNC_AVG) {
    printf("%lf,%d\t", *(double *)data, *(int32_t *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_SPREAD) {
    printf("%lf,%lf\t", *(double *)data, *(double *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_TWA) {
    data += 1;
    printf("%lf,%" PRId64 ",%" PRId64 ",%" PRId64 "\t", *(double *)data, *(int64_t *)(data + 8),
           *(int64_t *)(data + 16), *(int64_t *)(data + 24));
  } else if (functionId == TSDB_FUNC_MIN || functionId == TSDB_FUNC_MAX) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%d\t", *(int8_t *)data);
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%d\t", *(int16_t *)data);
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 "\t", *(int64_t *)data);
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%d\t", *(int *)data);
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%f\t", *(float *)data);
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%f\t", *(float *)data);
        break;
    }
  } else if (functionId == TSDB_FUNC_SUM) {
    if (srcDataType == TSDB_DATA_TYPE_FLOAT || srcDataType == TSDB_DATA_TYPE_DOUBLE) {
      printf("%lf\t", *(float *)data);
    } else {
      printf("%" PRId64 "\t", *(int64_t *)data);
    }
  } else {
    printf("%s\t", data);
  }
}

2525
void UNUSED_FUNC displayInterResult(tFilePage **pdata, SQueryRuntimeEnv* pRuntimeEnv, int32_t numOfRows) {
2526
  SQuery* pQuery = pRuntimeEnv->pQuery;
2527
  int32_t numOfCols = pQuery->numOfOutput;
2528
  printf("super table query intermediate result, total:%d\n", numOfRows);
2529

2530 2531
  for (int32_t j = 0; j < numOfRows; ++j) {
    for (int32_t i = 0; i < numOfCols; ++i) {
2532
      
2533
      switch (pQuery->pSelectExpr[i].type) {
2534
        case TSDB_DATA_TYPE_BINARY: {
2535
          int32_t type = pQuery->pSelectExpr[i].type;
2536
          printBinaryData(pQuery->pSelectExpr[i].base.functionId, pdata[i]->data + pQuery->pSelectExpr[i].bytes * j,
2537 2538 2539 2540 2541
                          type);
          break;
        }
        case TSDB_DATA_TYPE_TIMESTAMP:
        case TSDB_DATA_TYPE_BIGINT:
2542
          printf("%" PRId64 "\t", *(int64_t *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2543 2544
          break;
        case TSDB_DATA_TYPE_INT:
2545
          printf("%d\t", *(int32_t *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2546 2547
          break;
        case TSDB_DATA_TYPE_FLOAT:
2548
          printf("%f\t", *(float *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2549 2550
          break;
        case TSDB_DATA_TYPE_DOUBLE:
2551
          printf("%lf\t", *(double *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2552 2553 2554 2555 2556 2557 2558 2559
          break;
      }
    }
    printf("\n");
  }
}

typedef struct SCompSupporter {
H
hjxilinx 已提交
2560 2561 2562
  STableQueryInfo **pTableQueryInfo;
  int32_t *         position;
  SQInfo *          pQInfo;
2563 2564 2565 2566 2567
} SCompSupporter;

int32_t tableResultComparFn(const void *pLeft, const void *pRight, void *param) {
  int32_t left = *(int32_t *)pLeft;
  int32_t right = *(int32_t *)pRight;
2568

2569 2570
  SCompSupporter *  supporter = (SCompSupporter *)param;
  SQueryRuntimeEnv *pRuntimeEnv = &supporter->pQInfo->runtimeEnv;
2571

2572 2573
  int32_t leftPos = supporter->position[left];
  int32_t rightPos = supporter->position[right];
2574

2575 2576 2577 2578
  /* left source is exhausted */
  if (leftPos == -1) {
    return 1;
  }
2579

2580 2581 2582 2583
  /* right source is exhausted*/
  if (rightPos == -1) {
    return -1;
  }
2584

H
hjxilinx 已提交
2585
  SWindowResInfo *pWindowResInfo1 = &supporter->pTableQueryInfo[left]->windowResInfo;
2586
  SWindowResult * pWindowRes1 = getWindowResult(pWindowResInfo1, leftPos);
2587

2588 2589
  char *b1 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes1);
  TSKEY leftTimestamp = GET_INT64_VAL(b1);
2590

H
hjxilinx 已提交
2591
  SWindowResInfo *pWindowResInfo2 = &supporter->pTableQueryInfo[right]->windowResInfo;
2592
  SWindowResult * pWindowRes2 = getWindowResult(pWindowResInfo2, rightPos);
2593

2594 2595
  char *b2 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes2);
  TSKEY rightTimestamp = GET_INT64_VAL(b2);
2596

2597 2598 2599
  if (leftTimestamp == rightTimestamp) {
    return 0;
  }
2600

2601 2602 2603
  return leftTimestamp > rightTimestamp ? 1 : -1;
}

2604
int32_t mergeIntoGroupResult(SQInfo *pQInfo) {
2605
  int64_t st = taosGetTimestampMs();
2606
  int32_t ret = TSDB_CODE_SUCCESS;
2607

H
Haojun Liao 已提交
2608
  int32_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
2609

2610
  while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
2611
    SArray *group = GET_TABLEGROUP(pQInfo, pQInfo->groupIndex);
2612
    ret = mergeIntoGroupResultImpl(pQInfo, group);
2613 2614 2615 2616
    if (ret < 0) {  // not enough disk space to save the data into disk
      return -1;
    }

2617
    pQInfo->groupIndex += 1;
2618 2619

    // this group generates at least one result, return results
2620 2621 2622
    if (ret > 0) {
      break;
    }
2623 2624

    assert(pQInfo->numOfGroupResultPages == 0);
2625
    qDebug("QInfo:%p no result in group %d, continue", pQInfo, pQInfo->groupIndex - 1);
2626
  }
2627

2628
  qDebug("QInfo:%p merge res data into group, index:%d, total group:%d, elapsed time:%" PRId64 "ms", pQInfo,
2629
         pQInfo->groupIndex - 1, numOfGroups, taosGetTimestampMs() - st);
2630

2631 2632 2633 2634 2635 2636
  return TSDB_CODE_SUCCESS;
}

void copyResToQueryResultBuf(SQInfo *pQInfo, SQuery *pQuery) {
  if (pQInfo->offset == pQInfo->numOfGroupResultPages) {
    pQInfo->numOfGroupResultPages = 0;
2637

2638
    // current results of group has been sent to client, try next group
2639
    if (mergeIntoGroupResult(pQInfo) != TSDB_CODE_SUCCESS) {
2640 2641
      return;  // failed to save data in the disk
    }
2642

2643
    // check if all results has been sent to client
H
Haojun Liao 已提交
2644
    int32_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
2645
    if (pQInfo->numOfGroupResultPages == 0 && pQInfo->groupIndex == numOfGroup) {
2646
      pQInfo->tableIndex = pQInfo->tableqinfoGroupInfo.numOfTables;  // set query completed
2647 2648
      return;
    }
2649
  }
2650 2651

  SQueryRuntimeEnv *   pRuntimeEnv = &pQInfo->runtimeEnv;
2652
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
2653

2654
  int32_t id = getGroupResultId(pQInfo->groupIndex - 1);
2655
  SIDList list = getDataBufPagesIdList(pResultBuf, pQInfo->offset + id);
2656

2657
  int32_t total = 0;
H
Haojun Liao 已提交
2658 2659 2660 2661
  int32_t size = taosArrayGetSize(list);
  for (int32_t i = 0; i < size; ++i) {
    int32_t* pgId = taosArrayGet(list, i);
    tFilePage *pData = GET_RES_BUF_PAGE_BY_ID(pResultBuf, *pgId);
2662
    total += pData->num;
2663
  }
2664

2665
  int32_t rows = total;
2666

2667
  int32_t offset = 0;
H
Haojun Liao 已提交
2668 2669 2670
  for (int32_t j = 0; j < size; ++j) {
    int32_t* pgId = taosArrayGet(list, j);
    tFilePage *pData = GET_RES_BUF_PAGE_BY_ID(pResultBuf, *pgId);
2671

2672
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2673
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
2674
      char *  pDest = pQuery->sdata[i]->data;
2675

2676 2677
      memcpy(pDest + offset * bytes, pData->data + pRuntimeEnv->offset[i] * pData->num,
             bytes * pData->num);
2678
    }
2679

2680
    offset += pData->num;
2681
  }
2682

2683
  assert(pQuery->rec.rows == 0);
2684

2685
  pQuery->rec.rows += rows;
2686 2687 2688
  pQInfo->offset += 1;
}

H
Haojun Liao 已提交
2689
int64_t getNumOfResultWindowRes(SQuery *pQuery, SWindowResult *pWindowRes) {
2690
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
2691
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
2692

2693 2694 2695 2696 2697 2698 2699
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
      continue;
    }
2700

2701
    SResultInfo *pResultInfo = &pWindowRes->resultInfo[j];
H
Haojun Liao 已提交
2702
    assert(pResultInfo != NULL);
2703

H
Haojun Liao 已提交
2704 2705
    if (pResultInfo->numOfRes > 0) {
      return pResultInfo->numOfRes;
2706 2707
    }
  }
2708

H
Haojun Liao 已提交
2709
  return 0;
2710 2711
}

2712
int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *pGroup) {
2713
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
2714
  SQuery *          pQuery = pRuntimeEnv->pQuery;
2715

2716
  size_t size = taosArrayGetSize(pGroup);
2717
  tFilePage **buffer = pQuery->sdata;
2718

2719
  int32_t*   posList = calloc(size, sizeof(int32_t));
H
hjxilinx 已提交
2720
  STableQueryInfo **pTableList = malloc(POINTER_BYTES * size);
2721

2722 2723 2724 2725 2726
  if (pTableList == NULL || posList == NULL) {
    tfree(posList);
    tfree(pTableList);

    qError("QInfo:%p failed alloc memory", pQInfo);
H
Haojun Liao 已提交
2727
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
2728 2729
  }

2730
  // todo opt for the case of one table per group
2731
  int32_t numOfTables = 0;
2732
  for (int32_t i = 0; i < size; ++i) {
2733
    STableQueryInfo *item = taosArrayGetP(pGroup, i);
2734

H
Haojun Liao 已提交
2735
    SIDList list = getDataBufPagesIdList(pRuntimeEnv->pResultBuf, TSDB_TABLEID(item->pTable)->tid);
H
Haojun Liao 已提交
2736
    if (taosArrayGetSize(list) > 0 && item->windowResInfo.size > 0) {
2737
      pTableList[numOfTables] = item;
2738
      numOfTables += 1;
2739 2740
    }
  }
2741

2742
  if (numOfTables == 0) {
2743 2744
    tfree(posList);
    tfree(pTableList);
2745

2746 2747
    assert(pQInfo->numOfGroupResultPages == 0);
    return 0;
H
Haojun Liao 已提交
2748
  } else if (numOfTables == 1) { // no need to merge results since only one table in each group
2749

2750
  }
2751

2752
  SCompSupporter cs = {pTableList, posList, pQInfo};
2753

2754
  SLoserTreeInfo *pTree = NULL;
2755
  tLoserTreeCreate(&pTree, numOfTables, &cs, tableResultComparFn);
2756

2757
  SResultInfo *pResultInfo = calloc(pQuery->numOfOutput, sizeof(SResultInfo));
H
Haojun Liao 已提交
2758 2759 2760 2761
  if (pResultInfo == NULL) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }

H
Haojun Liao 已提交
2762 2763
  char* buf = calloc(1, pRuntimeEnv->interBufSize);
  setWindowResultInfo(pResultInfo, pQuery, pRuntimeEnv->stableQuery, buf);
2764
  resetMergeResultBuf(pQuery, pRuntimeEnv->pCtx, pResultInfo);
2765

2766 2767
  int64_t lastTimestamp = -1;
  int64_t startt = taosGetTimestampMs();
2768

2769 2770
  while (1) {
    int32_t pos = pTree->pNode[0].index;
2771

H
hjxilinx 已提交
2772
    SWindowResInfo *pWindowResInfo = &pTableList[pos]->windowResInfo;
2773
    SWindowResult * pWindowRes = getWindowResult(pWindowResInfo, cs.position[pos]);
2774

2775 2776
    char *b = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes);
    TSKEY ts = GET_INT64_VAL(b);
2777

2778
    assert(ts == pWindowRes->window.skey);
H
Haojun Liao 已提交
2779
    int64_t num = getNumOfResultWindowRes(pQuery, pWindowRes);
2780 2781
    if (num <= 0) {
      cs.position[pos] += 1;
2782

2783 2784
      if (cs.position[pos] >= pWindowResInfo->size) {
        cs.position[pos] = -1;
2785

2786
        // all input sources are exhausted
2787
        if (--numOfTables == 0) {
2788 2789 2790 2791 2792 2793 2794
          break;
        }
      }
    } else {
      if (ts == lastTimestamp) {  // merge with the last one
        doMerge(pRuntimeEnv, ts, pWindowRes, true);
      } else {  // copy data to disk buffer
2795
        if (buffer[0]->num == pQuery->rec.capacity) {
2796 2797 2798
          if (flushFromResultBuf(pQInfo) != TSDB_CODE_SUCCESS) {
            return -1;
          }
2799

2800 2801
          resetMergeResultBuf(pQuery, pRuntimeEnv->pCtx, pResultInfo);
        }
2802

2803
        doMerge(pRuntimeEnv, ts, pWindowRes, false);
2804
        buffer[0]->num += 1;
2805
      }
2806

2807
      lastTimestamp = ts;
2808

2809 2810 2811
      cs.position[pos] += 1;
      if (cs.position[pos] >= pWindowResInfo->size) {
        cs.position[pos] = -1;
2812

2813
        // all input sources are exhausted
2814
        if (--numOfTables == 0) {
2815 2816 2817 2818
          break;
        }
      }
    }
2819

2820 2821
    tLoserTreeAdjust(pTree, pos + pTree->numOfEntries);
  }
2822

2823
  if (buffer[0]->num != 0) {  // there are data in buffer
2824
    if (flushFromResultBuf(pQInfo) != TSDB_CODE_SUCCESS) {
S
slguan 已提交
2825
      qError("QInfo:%p failed to flush data into temp file, abort query", pQInfo);
2826

2827 2828 2829 2830
      tfree(pTree);
      tfree(pTableList);
      tfree(posList);
      tfree(pResultInfo);
2831

2832 2833 2834
      return -1;
    }
  }
2835

2836 2837 2838
  int64_t endt = taosGetTimestampMs();

#ifdef _DEBUG_VIEW
2839
  displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
2840
#endif
2841

2842
  qDebug("QInfo:%p result merge completed for group:%d, elapsed time:%" PRId64 " ms", pQInfo, pQInfo->groupIndex, endt - startt);
2843

2844 2845
  tfree(pTableList);
  tfree(posList);
H
Haojun Liao 已提交
2846
  tfree(pTree);
2847

2848
  pQInfo->offset = 0;
2849

2850
  tfree(pResultInfo);
H
Haojun Liao 已提交
2851
  tfree(buf);
2852 2853 2854 2855
  return pQInfo->numOfGroupResultPages;
}

int32_t flushFromResultBuf(SQInfo *pQInfo) {
2856 2857 2858
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

2859
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
2860
  int32_t              capacity = (DEFAULT_INTERN_BUF_PAGE_SIZE - sizeof(tFilePage)) / pQuery->rowSize;
2861

2862 2863
  // the base value for group result, since the maximum number of table for each vnode will not exceed 100,000.
  int32_t pageId = -1;
2864

2865
  int32_t remain = pQuery->sdata[0]->num;
2866
  int32_t offset = 0;
2867

2868 2869 2870 2871 2872
  while (remain > 0) {
    int32_t r = remain;
    if (r > capacity) {
      r = capacity;
    }
2873

2874
    int32_t    id = getGroupResultId(pQInfo->groupIndex) + pQInfo->numOfGroupResultPages;
2875
    tFilePage *buf = getNewDataBuf(pResultBuf, id, &pageId);
2876

2877
    // pagewise copy to dest buffer
2878
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2879
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
2880
      buf->num = r;
2881

2882 2883
      memcpy(buf->data + pRuntimeEnv->offset[i] * buf->num, ((char *)pQuery->sdata[i]->data) + offset * bytes,
             buf->num * bytes);
2884
    }
2885

2886 2887 2888
    offset += r;
    remain -= r;
  }
2889

2890 2891 2892 2893 2894
  pQInfo->numOfGroupResultPages += 1;
  return TSDB_CODE_SUCCESS;
}

void resetMergeResultBuf(SQuery *pQuery, SQLFunctionCtx *pCtx, SResultInfo *pResultInfo) {
2895
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
2896
    pCtx[k].aOutputBuf = pQuery->sdata[k]->data - pCtx[k].outputBytes;
2897 2898 2899
    pCtx[k].size = 1;
    pCtx[k].startOffset = 0;
    pCtx[k].resultInfo = &pResultInfo[k];
2900

2901
    pQuery->sdata[k]->num = 0;
2902 2903 2904
  }
}

2905 2906 2907 2908 2909 2910 2911
static void updateTableQueryInfoForReverseScan(SQuery *pQuery, STableQueryInfo *pTableQueryInfo) {
  if (pTableQueryInfo == NULL) {
    return;
  }
  
  // order has change already!
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
2912 2913 2914 2915 2916 2917 2918
  
  // TODO validate the assertion
//  if (!QUERY_IS_ASC_QUERY(pQuery)) {
//    assert(pTableQueryInfo->win.ekey >= pTableQueryInfo->lastKey + step);
//  } else {
//    assert(pTableQueryInfo->win.ekey <= pTableQueryInfo->lastKey + step);
//  }
2919 2920 2921 2922 2923 2924 2925 2926
  
  pTableQueryInfo->win.ekey = pTableQueryInfo->lastKey + step;
  
  SWAP(pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, TSKEY);
  pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
  
  SWITCH_ORDER(pTableQueryInfo->cur.order);
  pTableQueryInfo->cur.vgroupIndex = -1;
H
Haojun Liao 已提交
2927 2928 2929

  // set the index at the end of time window
  pTableQueryInfo->windowResInfo.curIndex = pTableQueryInfo->windowResInfo.size - 1;
2930 2931 2932 2933 2934
}

static void disableFuncInReverseScanImpl(SQInfo* pQInfo, SWindowResInfo *pWindowResInfo, int32_t order) {
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
  
2935 2936 2937 2938 2939
  for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
    SWindowStatus *pStatus = getTimeWindowResStatus(pWindowResInfo, i);
    if (!pStatus->closed) {
      continue;
    }
2940

2941
    SWindowResult *buf = getWindowResult(pWindowResInfo, i);
2942

2943
    // open/close the specified query for each group result
2944
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
2945
      int32_t functId = pQuery->pSelectExpr[j].base.functionId;
2946

2947 2948
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
2949 2950 2951 2952 2953 2954 2955 2956
        buf->resultInfo[j].complete = false;
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
        buf->resultInfo[j].complete = true;
      }
    }
  }
}

2957 2958
void disableFuncInReverseScan(SQInfo *pQInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
2959
  SQuery *pQuery = pRuntimeEnv->pQuery;
2960
  int32_t order = pQuery->order.order;
2961

2962 2963
  // group by normal columns and interval query on normal table
  SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
2964
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
2965
    disableFuncInReverseScanImpl(pQInfo, pWindowResInfo, order);
2966
  } else {  // for simple result of table query,
2967
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {  // todo refactor
2968
      int32_t functId = pQuery->pSelectExpr[j].base.functionId;
2969

2970
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[j];
2971 2972 2973
      if (pCtx->resultInfo == NULL) {
        continue; // resultInfo is NULL, means no data checked in previous scan
      }
2974

2975 2976
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
2977 2978 2979 2980 2981 2982
        pCtx->resultInfo->complete = false;
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
        pCtx->resultInfo->complete = true;
      }
    }
  }
H
hjxilinx 已提交
2983
  
H
Haojun Liao 已提交
2984
  int32_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
H
hjxilinx 已提交
2985 2986
  
  for(int32_t i = 0; i < numOfGroups; ++i) {
H
Haojun Liao 已提交
2987
    SArray *group = GET_TABLEGROUP(pQInfo, i);
H
hjxilinx 已提交
2988 2989 2990
    
    size_t t = taosArrayGetSize(group);
    for (int32_t j = 0; j < t; ++j) {
2991 2992
      STableQueryInfo *pCheckInfo = taosArrayGetP(group, j);
      updateTableQueryInfoForReverseScan(pQuery, pCheckInfo);
H
hjxilinx 已提交
2993 2994
    }
  }
2995 2996
}

2997
void switchCtxOrder(SQueryRuntimeEnv *pRuntimeEnv) {
2998
  SQuery *pQuery = pRuntimeEnv->pQuery;
2999
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
3000
    SWITCH_ORDER(pRuntimeEnv->pCtx[i].order);
3001 3002 3003
  }
}

H
Haojun Liao 已提交
3004
int32_t createQueryResultInfo(SQuery *pQuery, SWindowResult *pResultRow, bool isSTableQuery, size_t interBufSize) {
3005
  int32_t numOfCols = pQuery->numOfOutput;
3006

H
Haojun Liao 已提交
3007 3008
  size_t size = numOfCols * sizeof(SResultInfo) + interBufSize;
  pResultRow->resultInfo = calloc(1, size);
B
Bomin Zhang 已提交
3009 3010 3011
  if (pResultRow->resultInfo == NULL) {
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
  }
3012

H
Haojun Liao 已提交
3013
  pResultRow->pos = (SPosInfo) {-1, -1};
3014

H
Haojun Liao 已提交
3015
  char* buf = (char*) pResultRow->resultInfo + numOfCols * sizeof(SResultInfo);
H
Haojun Liao 已提交
3016

3017
  // set the intermediate result output buffer
H
Haojun Liao 已提交
3018
  setWindowResultInfo(pResultRow->resultInfo, pQuery, isSTableQuery, buf);
B
Bomin Zhang 已提交
3019
  return TSDB_CODE_SUCCESS;
3020 3021 3022 3023
}

void resetCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3024

3025
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3026 3027
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
    pCtx->aOutputBuf = pQuery->sdata[i]->data;
3028

3029 3030 3031 3032 3033 3034
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT/TAGPRJ/PRJ/TAG etc.
     */
    resetResultInfo(&pRuntimeEnv->resultInfo[i]);
    pCtx->resultInfo = &pRuntimeEnv->resultInfo[i];
3035

3036
    // set the timestamp output buffer for top/bottom/diff query
3037
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3038 3039 3040
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3041

3042
    memset(pQuery->sdata[i]->data, 0, (size_t)pQuery->pSelectExpr[i].bytes * pQuery->rec.capacity);
3043
  }
3044

3045 3046 3047 3048 3049
  initCtxOutputBuf(pRuntimeEnv);
}

void forwardCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, int64_t output) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3050

3051
  // reset the execution contexts
3052
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3053
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
3054
    assert(functionId != TSDB_FUNC_DIFF);
3055

3056 3057 3058 3059
    // set next output position
    if (IS_OUTER_FORWARD(aAggs[functionId].nStatus)) {
      pRuntimeEnv->pCtx[j].aOutputBuf += pRuntimeEnv->pCtx[j].outputBytes * output;
    }
3060

3061 3062 3063 3064 3065 3066 3067 3068 3069 3070
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      /*
       * NOTE: for top/bottom query, the value of first column of output (timestamp) are assigned
       * in the procedure of top/bottom routine
       * the output buffer in top/bottom routine is ptsOutputBuf, so we need to forward the output buffer
       *
       * diff function is handled in multi-output function
       */
      pRuntimeEnv->pCtx[j].ptsOutputBuf += TSDB_KEYSIZE * output;
    }
3071

3072 3073 3074 3075 3076 3077
    resetResultInfo(pRuntimeEnv->pCtx[j].resultInfo);
  }
}

void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3078

3079
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3080
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
3081
    pRuntimeEnv->pCtx[j].currentStage = 0;
3082

H
Haojun Liao 已提交
3083 3084 3085 3086
    SResultInfo* pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
    if (pResInfo->initialized) {
      continue;
    }
3087

3088 3089 3090 3091
    aAggs[functionId].init(&pRuntimeEnv->pCtx[j]);
  }
}

3092
void skipResults(SQueryRuntimeEnv *pRuntimeEnv) {
3093
  SQuery *pQuery = pRuntimeEnv->pQuery;
3094
  if (pQuery->rec.rows == 0 || pQuery->limit.offset == 0) {
3095 3096
    return;
  }
3097

3098
  if (pQuery->rec.rows <= pQuery->limit.offset) {
3099
    qDebug("QInfo:%p skip rows:%" PRId64 ", new offset:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), pQuery->rec.rows,
3100 3101
        pQuery->limit.offset - pQuery->rec.rows);
    
3102 3103
    pQuery->limit.offset -= pQuery->rec.rows;
    pQuery->rec.rows = 0;
3104

3105
    resetCtxOutputBuf(pRuntimeEnv);
3106

H
Haojun Liao 已提交
3107
    // clear the buffer full flag if exists
3108
    CLEAR_QUERY_STATUS(pQuery, QUERY_RESBUF_FULL);
3109
  } else {
3110
    int64_t numOfSkip = pQuery->limit.offset;
3111
    pQuery->rec.rows -= numOfSkip;
3112 3113
    pQuery->limit.offset = 0;
  
3114
    qDebug("QInfo:%p skip row:%"PRId64", new offset:%d, numOfRows remain:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), numOfSkip,
3115 3116
           0, pQuery->rec.rows);
    
3117
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3118
      int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3119
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
3120
      
H
Haojun Liao 已提交
3121 3122
      memmove(pQuery->sdata[i]->data, (char*) pQuery->sdata[i]->data + bytes * numOfSkip, pQuery->rec.rows * bytes);
      pRuntimeEnv->pCtx[i].aOutputBuf = ((char*) pQuery->sdata[i]->data) + pQuery->rec.rows * bytes;
3123

3124
      if (functionId == TSDB_FUNC_DIFF || functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
3125
        pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
3126 3127
      }
    }
3128

3129
    updateNumOfResult(pRuntimeEnv, pQuery->rec.rows);
3130 3131 3132 3133 3134 3135 3136 3137
  }
}

void setQueryStatus(SQuery *pQuery, int8_t status) {
  if (status == QUERY_NOT_COMPLETED) {
    pQuery->status = status;
  } else {
    // QUERY_NOT_COMPLETED is not compatible with any other status, so clear its position first
3138
    CLEAR_QUERY_STATUS(pQuery, QUERY_NOT_COMPLETED);
3139 3140 3141 3142 3143 3144
    pQuery->status |= status;
  }
}

bool needScanDataBlocksAgain(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3145

H
hjxilinx 已提交
3146
  bool toContinue = false;
H
Haojun Liao 已提交
3147
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3148 3149
    // for each group result, call the finalize function for each column
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
3150

3151 3152 3153 3154 3155
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *pResult = getWindowResult(pWindowResInfo, i);
      if (!pResult->status.closed) {
        continue;
      }
3156

3157
      setWindowResOutputBuf(pRuntimeEnv, pResult);
3158

3159
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3160
        int16_t functId = pQuery->pSelectExpr[j].base.functionId;
3161 3162 3163
        if (functId == TSDB_FUNC_TS) {
          continue;
        }
3164

3165 3166
        aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
        SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3167

3168 3169 3170 3171
        toContinue |= (!pResInfo->complete);
      }
    }
  } else {
3172
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3173
      int16_t functId = pQuery->pSelectExpr[j].base.functionId;
3174 3175 3176
      if (functId == TSDB_FUNC_TS) {
        continue;
      }
3177

3178 3179
      aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
      SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3180

3181 3182 3183
      toContinue |= (!pResInfo->complete);
    }
  }
3184

3185 3186 3187
  return toContinue;
}

H
Haojun Liao 已提交
3188
static SQueryStatusInfo getQueryStatusInfo(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
3189
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3190 3191
  STableQueryInfo* pTableQueryInfo = pQuery->current;
  
H
Haojun Liao 已提交
3192 3193 3194
  assert((start <= pTableQueryInfo->lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (start >= pTableQueryInfo->lastKey && !QUERY_IS_ASC_QUERY(pQuery)));
  
3195
  SQueryStatusInfo info = {
H
hjxilinx 已提交
3196
      .status      = pQuery->status,
3197
      .windowIndex = pRuntimeEnv->windowResInfo.curIndex,
H
Haojun Liao 已提交
3198
      .lastKey     = start,
H
hjxilinx 已提交
3199
      .w           = pQuery->window,
H
Haojun Liao 已提交
3200
      .curWindow   = {.skey = start, .ekey = pTableQueryInfo->win.ekey},
3201
  };
3202

3203 3204 3205
  return info;
}

3206 3207 3208 3209
static void setEnvBeforeReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQInfo *pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery *pQuery = pRuntimeEnv->pQuery;

3210 3211 3212 3213 3214
  pStatus->cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);  // save the cursor
  if (pRuntimeEnv->pTSBuf) {
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
    tsBufNextPos(pRuntimeEnv->pTSBuf);
  }
3215

3216
  // reverse order time range
3217 3218 3219
  pQuery->window = pStatus->curWindow;
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);

3220
  SWITCH_ORDER(pQuery->order.order);
3221
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
3222

3223
  STsdbQueryCond cond = {
3224
      .twindow = pQuery->window,
H
hjxilinx 已提交
3225
      .order   = pQuery->order.order,
3226
      .colList = pQuery->colList,
3227 3228
      .numOfCols = pQuery->numOfCols,
  };
3229

3230 3231 3232 3233
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
3234

3235 3236
  // add ref for table
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
3237

3238 3239
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
3240
  disableFuncInReverseScan(pQInfo);
3241 3242
}

3243 3244
static void clearEnvAfterReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3245
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3246

3247 3248
  SWITCH_ORDER(pQuery->order.order);
  switchCtxOrder(pRuntimeEnv);
3249

3250 3251 3252 3253
  tsBufSetCursor(pRuntimeEnv->pTSBuf, &pStatus->cur);
  if (pRuntimeEnv->pTSBuf) {
    pRuntimeEnv->pTSBuf->cur.order = pQuery->order.order;
  }
3254

3255
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
3256

3257 3258
  // update the pQuery->window.skey and pQuery->window.ekey to limit the scan scope of sliding query
  // during reverse scan
H
hjxilinx 已提交
3259
  pTableQueryInfo->lastKey = pStatus->lastKey;
3260
  pQuery->status = pStatus->status;
3261
  
H
hjxilinx 已提交
3262
  pTableQueryInfo->win = pStatus->w;
3263
  pQuery->window = pTableQueryInfo->win;
3264 3265
}

3266
void scanOneTableDataBlocks(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
H
hjxilinx 已提交
3267
  SQInfo *pQInfo = (SQInfo *) GET_QINFO_ADDR(pRuntimeEnv);
3268
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3269 3270
  STableQueryInfo *pTableQueryInfo = pQuery->current;
  
3271
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
3272

3273
  // store the start query position
H
Haojun Liao 已提交
3274
  SQueryStatusInfo qstatus = getQueryStatusInfo(pRuntimeEnv, start);
3275

3276 3277
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
3278

3279 3280
  while (1) {
    doScanAllDataBlocks(pRuntimeEnv);
3281

3282 3283
    if (pRuntimeEnv->scanFlag == MASTER_SCAN) {
      qstatus.status = pQuery->status;
H
hjxilinx 已提交
3284
      qstatus.curWindow.ekey = pTableQueryInfo->lastKey - step;
3285
      qstatus.lastKey = pTableQueryInfo->lastKey;
3286
    }
3287

3288
    if (!needScanDataBlocksAgain(pRuntimeEnv)) {
3289
      // restore the status code and jump out of loop
3290
      if (pRuntimeEnv->scanFlag == REPEAT_SCAN) {
3291
        pQuery->status = qstatus.status;
3292
      }
3293

3294 3295
      break;
    }
3296

3297
    STsdbQueryCond cond = {
3298
        .twindow = qstatus.curWindow,
H
hjxilinx 已提交
3299
        .order   = pQuery->order.order,
3300
        .colList = pQuery->colList,
3301
        .numOfCols = pQuery->numOfCols,
3302
    };
3303

3304 3305
    if (pRuntimeEnv->pSecQueryHandle != NULL) {
      tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
3306
    }
3307

3308
    pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
3309
    pRuntimeEnv->windowResInfo.curIndex = qstatus.windowIndex;
3310

3311 3312
    setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
    pRuntimeEnv->scanFlag = REPEAT_SCAN;
3313
    
3314
    qDebug("QInfo:%p start to repeat scan data blocks due to query func required, qrange:%"PRId64"-%"PRId64, pQInfo,
3315
        cond.twindow.skey, cond.twindow.ekey);
3316

3317
    // check if query is killed or not
H
Haojun Liao 已提交
3318
    if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
3319 3320
      finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
3321 3322
    }
  }
3323

H
hjxilinx 已提交
3324
  if (!needReverseScan(pQuery)) {
3325 3326
    return;
  }
3327

3328
  setEnvBeforeReverseScan(pRuntimeEnv, &qstatus);
3329

3330
  // reverse scan from current position
3331
  qDebug("QInfo:%p start to reverse scan", pQInfo);
3332
  doScanAllDataBlocks(pRuntimeEnv);
3333 3334

  clearEnvAfterReverseScan(pRuntimeEnv, &qstatus);
3335 3336
}

H
hjxilinx 已提交
3337
void finalizeQueryResult(SQueryRuntimeEnv *pRuntimeEnv) {
3338
  SQuery *pQuery = pRuntimeEnv->pQuery;
3339

H
Haojun Liao 已提交
3340
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3341 3342
    // for each group result, call the finalize function for each column
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
3343
    if (pRuntimeEnv->groupbyNormalCol) {
3344 3345
      closeAllTimeWindow(pWindowResInfo);
    }
3346

3347 3348 3349 3350 3351
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *buf = &pWindowResInfo->pResult[i];
      if (!isWindowResClosed(pWindowResInfo, i)) {
        continue;
      }
3352

3353
      setWindowResOutputBuf(pRuntimeEnv, buf);
3354

3355
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3356
        aAggs[pQuery->pSelectExpr[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3357
      }
3358

3359 3360 3361 3362 3363 3364
      /*
       * set the number of output results for group by normal columns, the number of output rows usually is 1 except
       * the top and bottom query
       */
      buf->numOfRows = getNumOfResult(pRuntimeEnv);
    }
3365

3366
  } else {
3367
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3368
      aAggs[pQuery->pSelectExpr[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3369 3370 3371 3372 3373
    }
  }
}

static bool hasMainOutput(SQuery *pQuery) {
3374
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3375
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3376

3377 3378 3379 3380
    if (functionId != TSDB_FUNC_TS && functionId != TSDB_FUNC_TAG && functionId != TSDB_FUNC_TAGPRJ) {
      return true;
    }
  }
3381

3382 3383 3384
  return false;
}

H
Haojun Liao 已提交
3385
static STableQueryInfo *createTableQueryInfo(SQueryRuntimeEnv *pRuntimeEnv, void* pTable, STimeWindow win, void* buf) {
H
Haojun Liao 已提交
3386
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
3387

H
Haojun Liao 已提交
3388
  STableQueryInfo *pTableQueryInfo = buf;
3389

H
hjxilinx 已提交
3390 3391
  pTableQueryInfo->win = win;
  pTableQueryInfo->lastKey = win.skey;
3392

3393
  pTableQueryInfo->pTable = pTable;
3394
  pTableQueryInfo->cur.vgroupIndex = -1;
3395

H
Haojun Liao 已提交
3396 3397
  // set more initial size of interval/groupby query
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyNormalCol) {
H
Haojun Liao 已提交
3398
    int32_t initialSize = 16;
H
Haojun Liao 已提交
3399
    int32_t initialThreshold = 100;
B
Bomin Zhang 已提交
3400 3401 3402 3403
    int32_t code = initWindowResInfo(&pTableQueryInfo->windowResInfo, pRuntimeEnv, initialSize, initialThreshold, TSDB_DATA_TYPE_INT);
    if (code != TSDB_CODE_SUCCESS) {
      return NULL;
    }
H
Haojun Liao 已提交
3404
  } else { // in other aggregate query, do not initialize the windowResInfo
H
Haojun Liao 已提交
3405 3406
  }

3407 3408 3409
  return pTableQueryInfo;
}

H
Haojun Liao 已提交
3410
void destroyTableQueryInfo(STableQueryInfo *pTableQueryInfo) {
3411 3412 3413
  if (pTableQueryInfo == NULL) {
    return;
  }
3414

H
Haojun Liao 已提交
3415
  cleanupTimeWindowInfo(&pTableQueryInfo->windowResInfo);
3416 3417
}

H
Haojun Liao 已提交
3418 3419 3420 3421
#define CHECK_QUERY_TIME_RANGE(_q, _tableInfo)                                              \
  do {                                                                                      \
    assert((((_tableInfo)->lastKey >= (_tableInfo)->win.skey) && QUERY_IS_ASC_QUERY(_q)) || \
           (((_tableInfo)->lastKey <= (_tableInfo)->win.skey) && !QUERY_IS_ASC_QUERY(_q))); \
H
Haojun Liao 已提交
3422
  } while (0)
3423 3424 3425 3426

/**
 * set output buffer for different group
 * @param pRuntimeEnv
3427
 * @param pDataBlockInfo
3428
 */
H
Haojun Liao 已提交
3429
void setExecutionContext(SQInfo *pQInfo, int32_t groupIndex, TSKEY nextKey) {
3430
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
Haojun Liao 已提交
3431 3432 3433
  STableQueryInfo  *pTableQueryInfo = pRuntimeEnv->pQuery->current;
  SWindowResInfo   *pWindowResInfo = &pRuntimeEnv->windowResInfo;

H
Haojun Liao 已提交
3434 3435
  // lastKey needs to be updated
  pTableQueryInfo->lastKey = nextKey;
H
Haojun Liao 已提交
3436 3437 3438 3439

  if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
    setAdditionalInfo(pQInfo, pTableQueryInfo->pTable, pTableQueryInfo);
  }
H
Haojun Liao 已提交
3440

H
Haojun Liao 已提交
3441 3442 3443
  if (pRuntimeEnv->prevGroupId != INT32_MIN && pRuntimeEnv->prevGroupId == groupIndex) {
    return;
  }
3444

H
Haojun Liao 已提交
3445
  int32_t GROUPRESULTID = 1;
3446 3447
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&groupIndex,
      sizeof(groupIndex), true);
3448 3449 3450
  if (pWindowRes == NULL) {
    return;
  }
3451

3452 3453 3454 3455 3456 3457 3458 3459 3460 3461
  /*
   * not assign result buffer yet, add new result buffer
   * all group belong to one result set, and each group result has different group id so set the id to be one
   */
  if (pWindowRes->pos.pageId == -1) {
    if (addNewWindowResultBuf(pWindowRes, pRuntimeEnv->pResultBuf, GROUPRESULTID, pRuntimeEnv->numOfRowsPerPage) !=
        TSDB_CODE_SUCCESS) {
      return;
    }
  }
3462

H
Haojun Liao 已提交
3463 3464
  // record the current active group id
  pRuntimeEnv->prevGroupId = groupIndex;
3465 3466 3467 3468
  setWindowResOutputBuf(pRuntimeEnv, pWindowRes);
  initCtxOutputBuf(pRuntimeEnv);
}

H
Haojun Liao 已提交
3469
void setWindowResOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult) {
3470
  SQuery *pQuery = pRuntimeEnv->pQuery;
3471

3472
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
3473
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3474 3475
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult);
3476

3477
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3478 3479 3480
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3481

3482 3483 3484 3485 3486
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT
     */
    pCtx->resultInfo = &pResult->resultInfo[i];
3487

3488 3489 3490 3491 3492 3493
    // set super table query flag
    SResultInfo *pResInfo = GET_RES_INFO(pCtx);
    pResInfo->superTableQ = pRuntimeEnv->stableQuery;
  }
}

H
Haojun Liao 已提交
3494 3495
void setWindowResOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3496

H
Haojun Liao 已提交
3497 3498 3499 3500 3501
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];

    pCtx->resultInfo = &pResult->resultInfo[i];
H
Haojun Liao 已提交
3502
    if (pCtx->resultInfo->initialized && pCtx->resultInfo->complete) {
H
Haojun Liao 已提交
3503 3504
      continue;
    }
3505

H
Haojun Liao 已提交
3506 3507
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult);
    pCtx->currentStage = 0;
3508

H
Haojun Liao 已提交
3509 3510 3511 3512
    int32_t functionId = pCtx->functionId;
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3513

H
Haojun Liao 已提交
3514 3515 3516 3517 3518
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT
     */
    pCtx->resultInfo->superTableQ = pRuntimeEnv->stableQuery;     // set super table query flag
3519

H
Haojun Liao 已提交
3520 3521 3522 3523 3524 3525
    if (!pCtx->resultInfo->initialized) {
      aAggs[functionId].init(pCtx);
    }
  }
}

3526
int32_t setAdditionalInfo(SQInfo *pQInfo, void* pTable, STableQueryInfo *pTableQueryInfo) {
3527
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
3528

3529
  setTagVal(pRuntimeEnv, pTable, pQInfo->tsdb);
3530

3531 3532
  // both the master and supplement scan needs to set the correct ts comp start position
  if (pRuntimeEnv->pTSBuf != NULL) {
3533
    if (pTableQueryInfo->cur.vgroupIndex == -1) {
3534
      pTableQueryInfo->tag = pRuntimeEnv->pCtx[0].tag.i64Key;
3535

3536
      tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, 0, pTableQueryInfo->tag);
3537

3538 3539 3540 3541 3542 3543
      // keep the cursor info of current meter
      pTableQueryInfo->cur = pRuntimeEnv->pTSBuf->cur;
    } else {
      tsBufSetCursor(pRuntimeEnv->pTSBuf, &pTableQueryInfo->cur);
    }
  }
3544

3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556
  return 0;
}

/*
 * There are two cases to handle:
 *
 * 1. Query range is not set yet (queryRangeSet = 0). we need to set the query range info, including pQuery->lastKey,
 *    pQuery->window.skey, and pQuery->eKey.
 * 2. Query range is set and query is in progress. There may be another result with the same query ranges to be
 *    merged during merge stage. In this case, we need the pTableQueryInfo->lastResRows to decide if there
 *    is a previous result generated or not.
 */
H
hjxilinx 已提交
3557
void setIntervalQueryRange(SQInfo *pQInfo, TSKEY key) {
3558 3559
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3560 3561
  STableQueryInfo *pTableQueryInfo = pQuery->current;
  
3562 3563 3564
  if (pTableQueryInfo->queryRangeSet) {
    pTableQueryInfo->lastKey = key;
  } else {
3565
    pTableQueryInfo->win.skey = key;
3566
    STimeWindow win = {.skey = key, .ekey = pQuery->window.ekey};
3567

3568 3569 3570 3571 3572
    // for too small query range, no data in this interval.
    if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey < pQuery->window.skey)) ||
        (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey < pQuery->window.ekey))) {
      return;
    }
3573

3574 3575 3576 3577 3578 3579
    /**
     * In handling the both ascending and descending order super table query, we need to find the first qualified
     * timestamp of this table, and then set the first qualified start timestamp.
     * In ascending query, key is the first qualified timestamp. However, in the descending order query, additional
     * operations involve.
     */
H
Haojun Liao 已提交
3580
    STimeWindow     w = TSWINDOW_INITIALIZER;
3581
    SWindowResInfo *pWindowResInfo = &pTableQueryInfo->windowResInfo;
3582

H
Haojun Liao 已提交
3583 3584
    TSKEY sk = MIN(win.skey, win.ekey);
    TSKEY ek = MAX(win.skey, win.ekey);
H
Haojun Liao 已提交
3585
    getAlignQueryTimeWindow(pQuery, win.skey, sk, ek, &w);
3586
    pWindowResInfo->startTime = pTableQueryInfo->win.skey;  // windowSKey may be 0 in case of 1970 timestamp
3587

3588 3589
    if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
3590
        assert(win.ekey == pQuery->window.ekey);
3591
      }
3592 3593
      
      pWindowResInfo->prevSKey = w.skey;
3594
    }
3595

3596
    pTableQueryInfo->queryRangeSet = 1;
3597
    pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
3598 3599 3600 3601
  }
}

bool requireTimestamp(SQuery *pQuery) {
3602
  for (int32_t i = 0; i < pQuery->numOfOutput; i++) {
3603
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616
    if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_NEED_TS) != 0) {
      return true;
    }
  }
  return false;
}

bool needPrimaryTimestampCol(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo) {
  /*
   * 1. if skey or ekey locates in this block, we need to load the timestamp column to decide the precise position
   * 2. if there are top/bottom, first_dst/last_dst functions, we need to load timestamp column in any cases;
   */
  STimeWindow *w = &pDataBlockInfo->window;
H
hjxilinx 已提交
3617 3618 3619
  STableQueryInfo* pTableQueryInfo = pQuery->current;
  
  bool loadPrimaryTS = (pTableQueryInfo->lastKey >= w->skey && pTableQueryInfo->lastKey <= w->ekey) ||
3620 3621
                       (pQuery->window.ekey >= w->skey && pQuery->window.ekey <= w->ekey) || requireTimestamp(pQuery);

3622 3623 3624
  return loadPrimaryTS;
}

3625
static int32_t doCopyToSData(SQInfo *pQInfo, SWindowResInfo *pResultInfo, int32_t orderType) {
3626 3627
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
3628

3629 3630 3631
  int32_t numOfResult = 0;
  int32_t startIdx = 0;
  int32_t step = -1;
3632

3633
  qDebug("QInfo:%p start to copy data from windowResInfo to query buf", pQInfo);
3634 3635
  int32_t totalSet = numOfClosedTimeWindow(pResultInfo);
  SWindowResult* result = pResultInfo->pResult;
3636

3637
  if (orderType == TSDB_ORDER_ASC) {
3638
    startIdx = pQInfo->groupIndex;
3639 3640
    step = 1;
  } else {  // desc order copy all data
3641
    startIdx = totalSet - pQInfo->groupIndex - 1;
3642 3643
    step = -1;
  }
3644

3645
  for (int32_t i = startIdx; (i < totalSet) && (i >= 0); i += step) {
3646 3647
    if (result[i].numOfRows == 0) {
      pQInfo->offset = 0;
3648
      pQInfo->groupIndex += 1;
3649 3650
      continue;
    }
3651

dengyihao's avatar
dengyihao 已提交
3652
    assert(pQInfo->offset <= 1);
3653

3654 3655
    int32_t numOfRowsToCopy = result[i].numOfRows - pQInfo->offset;
    int32_t oldOffset = pQInfo->offset;
3656

3657 3658 3659 3660
    /*
     * current output space is not enough to keep all the result data of this group, only copy partial results
     * to SQuery object's result buffer
     */
3661 3662 3663 3664 3665
    if (numOfRowsToCopy > pQuery->rec.capacity - numOfResult) {
      numOfRowsToCopy = pQuery->rec.capacity - numOfResult;
      pQInfo->offset += numOfRowsToCopy;
    } else {
      pQInfo->offset = 0;
3666
      pQInfo->groupIndex += 1;
3667
    }
3668

3669
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3670
      int32_t size = pRuntimeEnv->pCtx[j].outputBytes;
3671

3672 3673 3674 3675
      char *out = pQuery->sdata[j]->data + numOfResult * size;
      char *in = getPosInResultPage(pRuntimeEnv, j, &result[i]);
      memcpy(out, in + oldOffset * size, size * numOfRowsToCopy);
    }
3676

3677
    numOfResult += numOfRowsToCopy;
3678 3679 3680
    if (numOfResult == pQuery->rec.capacity) {
      break;
    }
3681
  }
3682

3683
  qDebug("QInfo:%p copy data to query buf completed", pQInfo);
3684 3685

#ifdef _DEBUG_VIEW
3686
  displayInterResult(pQuery->sdata, pRuntimeEnv, numOfResult);
3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699
#endif
  return numOfResult;
}

/**
 * copyFromWindowResToSData support copy data in ascending/descending order
 * For interval query of both super table and table, copy the data in ascending order, since the output results are
 * ordered in SWindowResutl already. While handling the group by query for both table and super table,
 * all group result are completed already.
 *
 * @param pQInfo
 * @param result
 */
3700
void copyFromWindowResToSData(SQInfo *pQInfo, SWindowResInfo *pResultInfo) {
3701
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
3702

3703
  int32_t orderType = (pQuery->pGroupbyExpr != NULL) ? pQuery->pGroupbyExpr->orderType : TSDB_ORDER_ASC;
3704
  int32_t numOfResult = doCopyToSData(pQInfo, pResultInfo, orderType);
3705

3706
  pQuery->rec.rows += numOfResult;
3707

3708
  assert(pQuery->rec.rows <= pQuery->rec.capacity);
3709 3710
}

H
Haojun Liao 已提交
3711
static void updateWindowResNumOfRes(SQueryRuntimeEnv *pRuntimeEnv) {
3712
  SQuery *pQuery = pRuntimeEnv->pQuery;
3713

3714
  // update the number of result for each, only update the number of rows for the corresponding window result.
H
Haojun Liao 已提交
3715 3716 3717
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
    return;
  }
3718

H
Haojun Liao 已提交
3719 3720
  for (int32_t i = 0; i < pRuntimeEnv->windowResInfo.size; ++i) {
    SWindowResult *pResult = &pRuntimeEnv->windowResInfo.pResult[i];
3721

H
Haojun Liao 已提交
3722 3723 3724 3725
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
      int32_t functionId = pRuntimeEnv->pCtx[j].functionId;
      if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
        continue;
3726
      }
H
Haojun Liao 已提交
3727 3728

      pResult->numOfRows = MAX(pResult->numOfRows, pResult->resultInfo[j].numOfRes);
3729 3730 3731 3732
    }
  }
}

H
Haojun Liao 已提交
3733
static void stableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo, SDataStatis *pStatis,
3734
    SArray *pDataBlock, __block_search_fn_t searchFn) {
3735
  SQuery *         pQuery = pRuntimeEnv->pQuery;
3736 3737
  STableQueryInfo* pTableQueryInfo = pQuery->current;
  
3738
  SWindowResInfo * pWindowResInfo = &pTableQueryInfo->windowResInfo;
H
hjxilinx 已提交
3739
  pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : pDataBlockInfo->rows - 1;
3740

H
Haojun Liao 已提交
3741
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->groupbyNormalCol) {
3742
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
3743
  } else {
3744
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
3745 3746 3747
  }
}

3748 3749 3750
bool queryHasRemainResults(SQueryRuntimeEnv* pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  SFillInfo *pFillInfo = pRuntimeEnv->pFillInfo;
3751

H
Haojun Liao 已提交
3752
  if (pQuery->limit.limit > 0 && pQuery->rec.total >= pQuery->limit.limit) {
3753 3754
    return false;
  }
3755

3756
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
H
Haojun Liao 已提交
3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780
    // There are results not returned to client yet, so filling operation applied to the remain result is required
    // in the first place.
    int32_t remain = taosNumOfRemainRows(pFillInfo);
    if (remain > 0) {
      return true;
    }

    /*
     * While the code reaches here, there are no results remains now.
     * If query is not completed yet, the gaps between two results blocks need to be handled after next data block
     * is retrieved from TSDB.
     *
     * NOTE: If the result set is not the first block, the gap in front of the result set will be filled. If the result
     * set is the FIRST result block, the gap between the start time of query time window and the timestamp of the
     * first result row in the actual result set will fill nothing.
     */
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
      int32_t numOfTotal = getFilledNumOfRes(pFillInfo, pQuery->window.ekey, pQuery->rec.capacity);
      return numOfTotal > 0;
    }

  } else {
    // there are results waiting for returned to client.
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED) &&
H
Haojun Liao 已提交
3781
        (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) &&
H
Haojun Liao 已提交
3782 3783 3784
        (pRuntimeEnv->windowResInfo.size > 0)) {
      return true;
    }
3785
  }
3786 3787

  return false;
3788 3789 3790
}

static void doCopyQueryResultToMsg(SQInfo *pQInfo, int32_t numOfRows, char *data) {
3791
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
3792

3793 3794
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
    int32_t bytes = pQuery->pSelectExpr[col].bytes;
3795

3796 3797 3798
    memmove(data, pQuery->sdata[col]->data, bytes * numOfRows);
    data += bytes * numOfRows;
  }
3799

weixin_48148422's avatar
weixin_48148422 已提交
3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811
  int32_t numOfTables = (int32_t)taosArrayGetSize(pQInfo->arrTableIdInfo);
  *(int32_t*)data = htonl(numOfTables);
  data += sizeof(int32_t);
  for(int32_t i = 0; i < numOfTables; i++) {
    STableIdInfo* pSrc = taosArrayGet(pQInfo->arrTableIdInfo, i);
    STableIdInfo* pDst = (STableIdInfo*)data;
    pDst->uid = htobe64(pSrc->uid);
    pDst->tid = htonl(pSrc->tid);
    pDst->key = htobe64(pSrc->key);
    data += sizeof(STableIdInfo);
  }

H
hjxilinx 已提交
3812 3813
  // all data returned, set query over
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
3814
    if (pQInfo->runtimeEnv.stableQuery) {
3815
      if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
3816 3817 3818
        setQueryStatus(pQuery, QUERY_OVER);
      }
    } else {
3819 3820 3821
      if (!queryHasRemainResults(&pQInfo->runtimeEnv)) {
        setQueryStatus(pQuery, QUERY_OVER);
      }
3822
    }
H
hjxilinx 已提交
3823
  }
3824 3825
}

H
Haojun Liao 已提交
3826
int32_t doFillGapsInResults(SQueryRuntimeEnv* pRuntimeEnv, tFilePage **pDst, int32_t *numOfFilled) {
3827
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
3828
  SQuery *pQuery = pRuntimeEnv->pQuery;
3829 3830
  SFillInfo* pFillInfo = pRuntimeEnv->pFillInfo;
  
3831
  while (1) {
3832
    int32_t ret = taosGenerateDataBlock(pFillInfo, (tFilePage**) pQuery->sdata, pQuery->rec.capacity);
3833
    
3834
    // todo apply limit output function
3835 3836
    /* reached the start position of according to offset value, return immediately */
    if (pQuery->limit.offset == 0) {
3837
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows", pQInfo, pFillInfo->numOfRows, ret);
3838 3839
      return ret;
    }
3840

3841
    if (pQuery->limit.offset < ret) {
3842
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%" PRId64 ". Discard due to offset, remain:%" PRId64 ", new offset:%d",
3843 3844
             pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, ret - pQuery->limit.offset, 0);
      
3845 3846 3847
      ret -= pQuery->limit.offset;
      // todo !!!!there exactly number of interpo is not valid.
      // todo refactor move to the beginning of buffer
3848 3849 3850
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        memmove(pDst[i]->data, pDst[i]->data + pQuery->pSelectExpr[i].bytes * pQuery->limit.offset,
                ret * pQuery->pSelectExpr[i].bytes);
3851
      }
3852
      
3853 3854 3855
      pQuery->limit.offset = 0;
      return ret;
    } else {
3856
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%" PRId64 ". Discard due to offset, "
B
Bomin Zhang 已提交
3857
             "remain:%d, new offset:%" PRId64, pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, 0,
3858 3859
          pQuery->limit.offset - ret);
      
3860
      pQuery->limit.offset -= ret;
3861
      pQuery->rec.rows = 0;
3862 3863
      ret = 0;
    }
3864 3865

    if (!queryHasRemainResults(pRuntimeEnv)) {
3866 3867 3868 3869 3870
      return ret;
    }
  }
}

3871
static void queryCostStatis(SQInfo *pQInfo) {
3872
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
3873
  SQueryCostInfo *pSummary = &pRuntimeEnv->summary;
H
Haojun Liao 已提交
3874 3875

  qDebug("QInfo:%p :cost summary: elapsed time:%"PRId64" us, total blocks:%d, load block statis:%d,"
H
Haojun Liao 已提交
3876
         " load data block:%d, total rows:%"PRId64 ", check rows:%"PRId64,
H
Haojun Liao 已提交
3877
         pQInfo, pSummary->elapsedTime, pSummary->totalBlocks, pSummary->loadBlockStatis,
H
Haojun Liao 已提交
3878
         pSummary->loadBlocks, pSummary->totalRows, pSummary->totalCheckedRows);
3879 3880
}

3881 3882
static void updateOffsetVal(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pBlockInfo) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3883 3884
  STableQueryInfo* pTableQueryInfo = pQuery->current;
  
3885
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
3886

3887
  if (pQuery->limit.offset == pBlockInfo->rows) {  // current block will ignore completed
H
hjxilinx 已提交
3888
    pTableQueryInfo->lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->window.ekey + step : pBlockInfo->window.skey + step;
3889 3890 3891
    pQuery->limit.offset = 0;
    return;
  }
3892

3893 3894 3895 3896 3897
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    pQuery->pos = pQuery->limit.offset;
  } else {
    pQuery->pos = pBlockInfo->rows - pQuery->limit.offset - 1;
  }
3898

3899
  assert(pQuery->pos >= 0 && pQuery->pos <= pBlockInfo->rows - 1);
3900

3901
  SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
3902
  SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);
3903

3904
  // update the pQuery->limit.offset value, and pQuery->pos value
H
Haojun Liao 已提交
3905
  TSKEY *keys = (TSKEY *) pColInfoData->pData;
3906 3907

  // update the offset value
H
hjxilinx 已提交
3908
  pTableQueryInfo->lastKey = keys[pQuery->pos];
3909
  pQuery->limit.offset = 0;
3910

H
hjxilinx 已提交
3911
  int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, pBlockInfo, NULL, binarySearchForKey, pDataBlock);
3912

3913
  qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
3914
         pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows, numOfRes, pQuery->current->lastKey);
3915
}
3916

3917 3918 3919 3920 3921
void skipBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;

  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0) {
    return;
3922
  }
3923

3924 3925 3926
  pQuery->pos = 0;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

H
hjxilinx 已提交
3927
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3928
  TsdbQueryHandleT pQueryHandle = pRuntimeEnv->pQueryHandle;
3929

H
Haojun Liao 已提交
3930
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
3931
  while (tsdbNextDataBlock(pQueryHandle)) {
H
Haojun Liao 已提交
3932
    if (IS_QUERY_KILLED(GET_QINFO_ADDR(pRuntimeEnv))) {
H
Haojun Liao 已提交
3933 3934
      finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
3935
    }
3936

H
Haojun Liao 已提交
3937
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
3938

3939 3940
    if (pQuery->limit.offset > blockInfo.rows) {
      pQuery->limit.offset -= blockInfo.rows;
H
hjxilinx 已提交
3941 3942
      pTableQueryInfo->lastKey = (QUERY_IS_ASC_QUERY(pQuery)) ? blockInfo.window.ekey : blockInfo.window.skey;
      pTableQueryInfo->lastKey += step;
3943

3944
      qDebug("QInfo:%p skip rows:%d, offset:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv), blockInfo.rows,
3945 3946
             pQuery->limit.offset);
    } else {  // find the appropriated start position in current block
3947 3948 3949
      updateOffsetVal(pRuntimeEnv, &blockInfo);
      break;
    }
3950
  }
3951
}
3952

H
Haojun Liao 已提交
3953
static bool skipTimeInterval(SQueryRuntimeEnv *pRuntimeEnv, TSKEY* start) {
3954
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
3955
  *start = pQuery->current->lastKey;
3956

3957
  // if queried with value filter, do NOT forward query start position
3958
  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->pFillInfo != NULL) {
3959
    return true;
3960
  }
3961

3962 3963 3964 3965 3966
  /*
   * 1. for interval without interpolation query we forward pQuery->intervalTime at a time for
   *    pQuery->limit.offset times. Since hole exists, pQuery->intervalTime*pQuery->limit.offset value is
   *    not valid. otherwise, we only forward pQuery->limit.offset number of points
   */
3967
  assert(pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL);
3968

H
Haojun Liao 已提交
3969
  STimeWindow w = TSWINDOW_INITIALIZER;
3970
  
3971
  SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
hjxilinx 已提交
3972
  STableQueryInfo *pTableQueryInfo = pQuery->current;
3973

H
Haojun Liao 已提交
3974
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
3975
  while (tsdbNextDataBlock(pRuntimeEnv->pQueryHandle)) {
H
Haojun Liao 已提交
3976
    tsdbRetrieveDataBlockInfo(pRuntimeEnv->pQueryHandle, &blockInfo);
3977

H
Haojun Liao 已提交
3978 3979
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
H
Haojun Liao 已提交
3980
        getAlignQueryTimeWindow(pQuery, blockInfo.window.skey, blockInfo.window.skey, pQuery->window.ekey, &w);
H
Haojun Liao 已提交
3981 3982 3983
        pWindowResInfo->startTime = w.skey;
        pWindowResInfo->prevSKey = w.skey;
      }
3984
    } else {
H
Haojun Liao 已提交
3985
      getAlignQueryTimeWindow(pQuery, blockInfo.window.ekey, pQuery->window.ekey, blockInfo.window.ekey, &w);
3986

3987 3988 3989
      pWindowResInfo->startTime = pQuery->window.skey;
      pWindowResInfo->prevSKey = w.skey;
    }
3990

3991 3992
    // the first time window
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, pWindowResInfo->prevSKey, pQuery);
3993

3994 3995 3996 3997 3998 3999
    while (pQuery->limit.offset > 0) {
      if ((win.ekey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (win.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        pQuery->limit.offset -= 1;
        pWindowResInfo->prevSKey = win.skey;
      }
4000

4001
      STimeWindow tw = win;
H
Haojun Liao 已提交
4002
      GET_NEXT_TIMEWINDOW(pQuery, &tw);
4003

4004
      if (pQuery->limit.offset == 0) {
4005 4006
        if ((tw.skey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
            (tw.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
H
Haojun Liao 已提交
4007 4008
          // load the data block and check data remaining in current data block
          // TODO optimize performance
4009 4010 4011
          SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
          SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

4012 4013
          tw = win;
          int32_t startPos =
H
Haojun Liao 已提交
4014
              getNextQualifiedWindow(pRuntimeEnv, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey, -1);
4015 4016 4017 4018
          assert(startPos >= 0);

          // set the abort info
          pQuery->pos = startPos;
H
Haojun Liao 已提交
4019 4020 4021 4022 4023 4024
          
          // reset the query start timestamp
          pTableQueryInfo->win.skey = ((TSKEY *)pColInfoData->pData)[startPos];
          pQuery->window.skey = pTableQueryInfo->win.skey;
          *start = pTableQueryInfo->win.skey;
          
4025
          pWindowResInfo->prevSKey = tw.skey;
H
Haojun Liao 已提交
4026 4027
          int32_t index = pRuntimeEnv->windowResInfo.curIndex;
          
H
hjxilinx 已提交
4028
          int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, NULL, binarySearchForKey, pDataBlock);
H
Haojun Liao 已提交
4029 4030
          pRuntimeEnv->windowResInfo.curIndex = index;  // restore the window index
          
4031
          qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64,
4032 4033
                 GET_QINFO_ADDR(pRuntimeEnv), blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes, pQuery->current->lastKey);
          
4034
          return true;
H
Haojun Liao 已提交
4035 4036 4037 4038
        } else { // do nothing
          *start = tw.skey;
          pQuery->window.skey = tw.skey;
          pWindowResInfo->prevSKey = tw.skey;
4039
          return true;
4040 4041 4042
        }
      }

H
Haojun Liao 已提交
4043 4044 4045 4046 4047 4048 4049
      /*
       * If the next time window still starts from current data block,
       * load the primary timestamp column first, and then find the start position for the next queried time window.
       * Note that only the primary timestamp column is required.
       * TODO: Optimize for this cases. All data blocks are not needed to be loaded, only if the first actually required
       * time window resides in current data block.
       */
4050 4051 4052 4053 4054 4055 4056
      if ((tw.skey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (tw.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
        SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

        tw = win;
        int32_t startPos =
H
Haojun Liao 已提交
4057
            getNextQualifiedWindow(pRuntimeEnv, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey, -1);
4058 4059 4060 4061
        assert(startPos >= 0);

        // set the abort info
        pQuery->pos = startPos;
H
hjxilinx 已提交
4062
        pTableQueryInfo->lastKey = ((TSKEY *)pColInfoData->pData)[startPos];
4063 4064
        pWindowResInfo->prevSKey = tw.skey;
        win = tw;
4065
      } else {
H
Haojun Liao 已提交
4066
        break;  // offset is not 0, and next time window begins or ends in the next block.
4067 4068 4069
      }
    }
  }
4070

4071 4072 4073
  return true;
}

B
Bomin Zhang 已提交
4074 4075
static void setupQueryHandle(void* tsdb, SQInfo* pQInfo, bool isSTableQuery) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4076 4077
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

B
Bomin Zhang 已提交
4078 4079 4080 4081
  if (onlyQueryTags(pQuery)) {
    return;
  }

H
Haojun Liao 已提交
4082
  if (isSTableQuery && (!QUERY_IS_INTERVAL_QUERY(pQuery)) && (!isFixedOutputQuery(pRuntimeEnv))) {
B
Bomin Zhang 已提交
4083 4084
    return;
  }
4085 4086

  STsdbQueryCond cond = {
B
Bomin Zhang 已提交
4087 4088 4089 4090
    .twindow = pQuery->window,
    .order   = pQuery->order.order,
    .colList = pQuery->colList,
    .numOfCols = pQuery->numOfCols,
4091
  };
weixin_48148422's avatar
weixin_48148422 已提交
4092

B
Bomin Zhang 已提交
4093
  if (!isSTableQuery
4094
    && (pQInfo->tableqinfoGroupInfo.numOfTables == 1)
B
Bomin Zhang 已提交
4095
    && (cond.order == TSDB_ORDER_ASC) 
H
Haojun Liao 已提交
4096
    && (!QUERY_IS_INTERVAL_QUERY(pQuery))
B
Bomin Zhang 已提交
4097
    && (!isGroupbyNormalCol(pQuery->pGroupbyExpr))
H
Haojun Liao 已提交
4098
    && (!isFixedOutputQuery(pRuntimeEnv))
B
Bomin Zhang 已提交
4099
  ) {
H
Haojun Liao 已提交
4100
    SArray* pa = GET_TABLEGROUP(pQInfo, 0);
4101 4102
    STableQueryInfo* pCheckInfo = taosArrayGetP(pa, 0);
    cond.twindow = pCheckInfo->win;
4103
  }
B
Bomin Zhang 已提交
4104

H
Haojun Liao 已提交
4105
  if (isFirstLastRowQuery(pQuery)) {
4106
    pRuntimeEnv->pQueryHandle = tsdbQueryLastRow(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
4107
  } else if (isPointInterpoQuery(pQuery)) {
4108
    pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
H
Haojun Liao 已提交
4109
  } else {
4110
    pRuntimeEnv->pQueryHandle = tsdbQueryTables(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
H
Haojun Liao 已提交
4111
  }
B
Bomin Zhang 已提交
4112 4113
}

4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126
static SFillColInfo* taosCreateFillColInfo(SQuery* pQuery) {
  int32_t numOfCols = pQuery->numOfOutput;
  int32_t offset = 0;
  
  SFillColInfo* pFillCol = calloc(numOfCols, sizeof(SFillColInfo));
  for(int32_t i = 0; i < numOfCols; ++i) {
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];
    
    pFillCol[i].col.bytes  = pExprInfo->bytes;
    pFillCol[i].col.type   = pExprInfo->type;
    pFillCol[i].col.offset = offset;
    pFillCol[i].flag       = TSDB_COL_NORMAL;    // always be ta normal column for table query
    pFillCol[i].functionId = pExprInfo->base.functionId;
4127
    pFillCol[i].fillVal.i = pQuery->fillVal[i];
4128 4129 4130 4131 4132 4133 4134
    
    offset += pExprInfo->bytes;
  }
  
  return pFillCol;
}

4135
int32_t doInitQInfo(SQInfo *pQInfo, STSBuf *pTsBuf, void *tsdb, int32_t vgId, bool isSTableQuery) {
4136 4137
  int32_t code = TSDB_CODE_SUCCESS;
  
4138 4139 4140
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;

  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
4141

4142
  pQuery->precision = tsdbGetCfg(tsdb)->precision;
H
Haojun Liao 已提交
4143 4144
  pRuntimeEnv->topBotQuery = isTopBottomQuery(pQuery);
  pRuntimeEnv->hasTagResults = hasTagValOutput(pQuery);
4145 4146

  setScanLimitationByResultBuffer(pQuery);
H
Haojun Liao 已提交
4147
  changeExecuteScanOrder(pQInfo, false);
B
Bomin Zhang 已提交
4148
  setupQueryHandle(tsdb, pQInfo, isSTableQuery);
4149
  
4150
  pQInfo->tsdb = tsdb;
4151
  pQInfo->vgId = vgId;
4152 4153

  pRuntimeEnv->pQuery = pQuery;
H
Haojun Liao 已提交
4154
  pRuntimeEnv->pTSBuf = pTsBuf;
4155
  pRuntimeEnv->cur.vgroupIndex = -1;
4156
  pRuntimeEnv->stableQuery = isSTableQuery;
H
Haojun Liao 已提交
4157
  pRuntimeEnv->prevGroupId = INT32_MIN;
H
Haojun Liao 已提交
4158
  pRuntimeEnv->groupbyNormalCol = isGroupbyNormalCol(pQuery->pGroupbyExpr);
4159

H
Haojun Liao 已提交
4160
  if (pTsBuf != NULL) {
4161 4162 4163 4164 4165 4166 4167 4168 4169 4170
    int16_t order = (pQuery->order.order == pRuntimeEnv->pTSBuf->tsOrder) ? TSDB_ORDER_ASC : TSDB_ORDER_DESC;
    tsBufSetTraverseOrder(pRuntimeEnv->pTSBuf, order);
  }

  // create runtime environment
  code = setupQueryRuntimeEnv(pRuntimeEnv, pQuery->order.order);
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }

H
Haojun Liao 已提交
4171
  pRuntimeEnv->numOfRowsPerPage = getNumOfRowsInResultPage(pQuery, pRuntimeEnv->topBotQuery, isSTableQuery);
4172

H
Haojun Liao 已提交
4173
  if (isSTableQuery && !onlyQueryTags(pRuntimeEnv->pQuery)) {
4174
    int32_t rows = getInitialPageNum(pQInfo);
4175
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rows, pQuery->rowSize, pQInfo);
4176 4177 4178 4179
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

H
Haojun Liao 已提交
4180
    if (!QUERY_IS_INTERVAL_QUERY(pQuery)) {
4181 4182
      int16_t type = TSDB_DATA_TYPE_NULL;

H
Haojun Liao 已提交
4183
      if (pRuntimeEnv->groupbyNormalCol) {  // group by columns not tags;
4184 4185 4186 4187 4188
        type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
      } else {
        type = TSDB_DATA_TYPE_INT;  // group id
      }

B
Bomin Zhang 已提交
4189 4190 4191 4192
      code = initWindowResInfo(&pRuntimeEnv->windowResInfo, pRuntimeEnv, 32, 4096, type);
      if (code != TSDB_CODE_SUCCESS) {
        return code;
      }
4193 4194
    }

H
Haojun Liao 已提交
4195
  } else if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
4196
    int32_t rows = getInitialPageNum(pQInfo);
4197
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rows, pQuery->rowSize, pQInfo);
4198 4199 4200 4201 4202
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

    int16_t type = TSDB_DATA_TYPE_NULL;
H
Haojun Liao 已提交
4203
    if (pRuntimeEnv->groupbyNormalCol) {
4204 4205 4206 4207 4208
      type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
    } else {
      type = TSDB_DATA_TYPE_TIMESTAMP;
    }

B
Bomin Zhang 已提交
4209 4210 4211 4212
    code = initWindowResInfo(&pRuntimeEnv->windowResInfo, pRuntimeEnv, rows, 4096, type);
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }
4213 4214
  }

4215
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
4216
    SFillColInfo* pColInfo = taosCreateFillColInfo(pQuery);
H
Haojun Liao 已提交
4217 4218 4219 4220 4221 4222 4223
    STimeWindow w = TSWINDOW_INITIALIZER;

    TSKEY sk = MIN(pQuery->window.skey, pQuery->window.ekey);
    TSKEY ek = MAX(pQuery->window.skey, pQuery->window.ekey);
    getAlignQueryTimeWindow(pQuery, pQuery->window.skey, sk, ek, &w);

    pRuntimeEnv->pFillInfo = taosInitFillInfo(pQuery->order.order, w.skey, 0, pQuery->rec.capacity, pQuery->numOfOutput,
H
Haojun Liao 已提交
4224 4225
                                              pQuery->slidingTime, pQuery->slidingTimeUnit, pQuery->precision,
                                              pQuery->fillType, pColInfo);
4226
  }
4227

H
Haojun Liao 已提交
4228
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
4229
  return TSDB_CODE_SUCCESS;
4230 4231
}

4232
static void enableExecutionForNextTable(SQueryRuntimeEnv *pRuntimeEnv) {
4233
  SQuery *pQuery = pRuntimeEnv->pQuery;
4234

4235
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
4236 4237 4238 4239 4240 4241 4242
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[i]);
    if (pResInfo != NULL) {
      pResInfo->complete = false;
    }
  }
}

H
Haojun Liao 已提交
4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259
static FORCE_INLINE void setEnvForEachBlock(SQInfo* pQInfo, STableQueryInfo* pTableQueryInfo, SDataBlockInfo* pBlockInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

  if (!QUERY_IS_INTERVAL_QUERY(pQuery)) {
    setExecutionContext(pQInfo, pTableQueryInfo->groupIndex, pBlockInfo->window.ekey + step);
  } else {  // interval query
    TSKEY nextKey = pBlockInfo->window.skey;
    setIntervalQueryRange(pQInfo, nextKey);

    if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
      setAdditionalInfo(pQInfo, pTableQueryInfo->pTable, pTableQueryInfo);
    }
  }
}

H
Haojun Liao 已提交
4260
static int64_t scanMultiTableDataBlocks(SQInfo *pQInfo) {
4261
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
Haojun Liao 已提交
4262 4263
  SQuery*           pQuery = pRuntimeEnv->pQuery;
  SQueryCostInfo*   summary  = &pRuntimeEnv->summary;
4264
  
H
hjxilinx 已提交
4265
  int64_t st = taosGetTimestampMs();
4266

4267
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
H
Haojun Liao 已提交
4268
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4269

H
Haojun Liao 已提交
4270 4271
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

4272
  while (tsdbNextDataBlock(pQueryHandle)) {
4273
    summary->totalBlocks += 1;
H
Haojun Liao 已提交
4274
    
H
Haojun Liao 已提交
4275
    if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
4276
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4277
    }
4278

H
Haojun Liao 已提交
4279
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
H
Haojun Liao 已提交
4280 4281 4282 4283
    STableQueryInfo **pTableQueryInfo = (STableQueryInfo**) taosHashGet(pQInfo->tableqinfoGroupInfo.map, &blockInfo.tid, sizeof(blockInfo.tid));
    if(pTableQueryInfo == NULL) {
      break;
    }
4284

H
Haojun Liao 已提交
4285 4286
    pQuery->current = *pTableQueryInfo;
    CHECK_QUERY_TIME_RANGE(pQuery, *pTableQueryInfo);
4287

H
Haojun Liao 已提交
4288
    if (!pRuntimeEnv->groupbyNormalCol) {
H
Haojun Liao 已提交
4289
      setEnvForEachBlock(pQInfo, *pTableQueryInfo, &blockInfo);
4290
    }
4291

H
Haojun Liao 已提交
4292 4293 4294 4295 4296 4297 4298
    SDataStatis *pStatis = NULL;
    SArray *pDataBlock = NULL;
    if (loadDataBlockOnDemand(pRuntimeEnv, pQueryHandle, &blockInfo, &pStatis, &pDataBlock) == BLK_DATA_DISCARD) {
      pQuery->current->lastKey = QUERY_IS_ASC_QUERY(pQuery)? blockInfo.window.ekey + step:blockInfo.window.skey + step;
      continue;
    }

4299 4300 4301
    summary->totalRows += blockInfo.rows;
    stableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, pDataBlock, binarySearchForKey);
  
4302
    qDebug("QInfo:%p check data block, uid:%"PRId64", tid:%d, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, lastKey:%" PRId64,
4303
           pQInfo, blockInfo.uid, blockInfo.tid, blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, pQuery->current->lastKey);
4304
  }
4305

H
Haojun Liao 已提交
4306 4307
  updateWindowResNumOfRes(pRuntimeEnv);

H
hjxilinx 已提交
4308 4309
  int64_t et = taosGetTimestampMs();
  return et - st;
4310 4311
}

4312 4313
static bool multiTableMultioutputHelper(SQInfo *pQInfo, int32_t index) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4314
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4315

4316
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
H
Haojun Liao 已提交
4317
  SArray *group = GET_TABLEGROUP(pQInfo, 0);
4318
  STableQueryInfo* pCheckInfo = taosArrayGetP(group, index);
4319

H
Haojun Liao 已提交
4320 4321 4322
  if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
    setTagVal(pRuntimeEnv, pCheckInfo->pTable, pQInfo->tsdb);
  }
4323

H
Haojun Liao 已提交
4324
  STableId* id = TSDB_TABLEID(pCheckInfo->pTable);
4325
  qDebug("QInfo:%p query on (%d): uid:%" PRIu64 ", tid:%d, qrange:%" PRId64 "-%" PRId64, pQInfo, index,
H
Haojun Liao 已提交
4326
         id->uid, id->tid, pCheckInfo->lastKey, pCheckInfo->win.ekey);
4327

4328
  STsdbQueryCond cond = {
4329
      .twindow   = {pCheckInfo->lastKey, pCheckInfo->win.ekey},
H
hjxilinx 已提交
4330 4331
      .order     = pQuery->order.order,
      .colList   = pQuery->colList,
4332
      .numOfCols = pQuery->numOfCols,
4333
  };
4334

H
hjxilinx 已提交
4335
  // todo refactor
4336
  SArray *g1 = taosArrayInit(1, POINTER_BYTES);
4337
  SArray *tx = taosArrayInit(1, POINTER_BYTES);
4338

4339
  taosArrayPush(tx, &pCheckInfo->pTable);
4340
  taosArrayPush(g1, &tx);
4341
  STableGroupInfo gp = {.numOfTables = 1, .pGroupList = g1};
4342

4343
  // include only current table
4344 4345 4346 4347
  if (pRuntimeEnv->pQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
    pRuntimeEnv->pQueryHandle = NULL;
  }
4348

H
Haojun Liao 已提交
4349
  pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo);
4350 4351
  taosArrayDestroy(tx);
  taosArrayDestroy(g1);
4352

4353
  if (pRuntimeEnv->pTSBuf != NULL) {
4354
    if (pRuntimeEnv->cur.vgroupIndex == -1) {
4355 4356
      int64_t tag = pRuntimeEnv->pCtx[0].tag.i64Key;
      STSElem elem = tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, 0, tag);
4357

4358 4359 4360 4361 4362 4363 4364 4365
      // failed to find data with the specified tag value
      if (elem.vnode < 0) {
        return false;
      }
    } else {
      tsBufSetCursor(pRuntimeEnv->pTSBuf, &pRuntimeEnv->cur);
    }
  }
4366

4367
  initCtxOutputBuf(pRuntimeEnv);
4368 4369 4370 4371 4372 4373 4374 4375 4376 4377
  return true;
}

/**
 * super table query handler
 * 1. super table projection query, group-by on normal columns query, ts-comp query
 * 2. point interpolation query, last row query
 *
 * @param pQInfo
 */
4378
static void sequentialTableProcess(SQInfo *pQInfo) {
4379
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4380
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4381
  setQueryStatus(pQuery, QUERY_COMPLETED);
4382

H
Haojun Liao 已提交
4383
  size_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
4384

H
Haojun Liao 已提交
4385
  if (isPointInterpoQuery(pQuery) || isFirstLastRowQuery(pQuery)) {
4386 4387
    resetCtxOutputBuf(pRuntimeEnv);
    assert(pQuery->limit.offset == 0 && pQuery->limit.limit != 0);
4388

4389
    while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
4390
      SArray* group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex);
4391

4392
      qDebug("QInfo:%p last_row query on group:%d, total group:%zu, current group:%p", pQInfo, pQInfo->groupIndex,
dengyihao's avatar
dengyihao 已提交
4393
             numOfGroups, group);
H
Haojun Liao 已提交
4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413

      STsdbQueryCond cond = {
          .twindow = pQuery->window,
          .colList = pQuery->colList,
          .order   = pQuery->order.order,
          .numOfCols = pQuery->numOfCols,
      };

      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);
      
      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }
      
4414
      if (isFirstLastRowQuery(pQuery)) {
H
Haojun Liao 已提交
4415
        pRuntimeEnv->pQueryHandle = tsdbQueryLastRow(pQInfo->tsdb, &cond, &gp, pQInfo);
H
Haojun Liao 已提交
4416
      } else {
H
Haojun Liao 已提交
4417
        pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(pQInfo->tsdb, &cond, &gp, pQInfo);
4418
      }
H
Haojun Liao 已提交
4419 4420
      
      initCtxOutputBuf(pRuntimeEnv);
4421
      
4422
      SArray* s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle);
4423 4424
      assert(taosArrayGetSize(s) >= 1);
      
4425
      setTagVal(pRuntimeEnv, taosArrayGetP(s, 0), pQInfo->tsdb);
4426 4427 4428
      if (isFirstLastRowQuery(pQuery)) {
        assert(taosArrayGetSize(s) == 1);
      }
H
Haojun Liao 已提交
4429

dengyihao's avatar
dengyihao 已提交
4430
      taosArrayDestroy(s);
H
Haojun Liao 已提交
4431

H
Haojun Liao 已提交
4432
      // here we simply set the first table as current table
4433 4434 4435
      SArray* first = GET_TABLEGROUP(pQInfo, pQInfo->groupIndex);
      pQuery->current = taosArrayGetP(first, 0);

4436
      scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
H
Haojun Liao 已提交
4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448
      
      int64_t numOfRes = getNumOfResult(pRuntimeEnv);
      if (numOfRes > 0) {
        pQuery->rec.rows += numOfRes;
        forwardCtxOutputBuf(pRuntimeEnv, numOfRes);
      }
      
      skipResults(pRuntimeEnv);
      pQInfo->groupIndex += 1;

      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
4449 4450 4451 4452 4453 4454

      if (pQuery->rec.rows >= pQuery->rec.capacity) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
        break;
      }
    }
H
Haojun Liao 已提交
4455
  } else if (pRuntimeEnv->groupbyNormalCol) { // group-by on normal columns query
4456
    while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
4457
      SArray* group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex);
4458

4459
      qDebug("QInfo:%p group by normal columns group:%d, total group:%zu", pQInfo, pQInfo->groupIndex, numOfGroups);
4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480

      STsdbQueryCond cond = {
          .twindow = pQuery->window,
          .colList = pQuery->colList,
          .order   = pQuery->order.order,
          .numOfCols = pQuery->numOfCols,
      };

      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);

      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }

      pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo);
B
Bomin Zhang 已提交
4481 4482
      taosArrayDestroy(g1);
      taosArrayDestroy(tx);
4483

4484
      SArray* s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle);
4485 4486
      assert(taosArrayGetSize(s) >= 1);

4487
      setTagVal(pRuntimeEnv, taosArrayGetP(s, 0), pQInfo->tsdb);
4488 4489 4490 4491 4492 4493 4494 4495

      // here we simply set the first table as current table
      scanMultiTableDataBlocks(pQInfo);
      pQInfo->groupIndex += 1;

      SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;

        // no results generated for current group, continue to try the next group
dengyihao's avatar
dengyihao 已提交
4496
      taosArrayDestroy(s); 
4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510
      if (pWindowResInfo->size <= 0) {
        continue;
      }

      for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
        SWindowStatus *pStatus = &pWindowResInfo->pResult[i].status;
        pStatus->closed = true;  // enable return all results for group by normal columns

        SWindowResult *pResult = &pWindowResInfo->pResult[i];
        for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
          pResult->numOfRows = MAX(pResult->numOfRows, pResult->resultInfo[j].numOfRes);
        }
      }

4511
      qDebug("QInfo:%p generated groupby columns results %d rows for group %d completed", pQInfo, pWindowResInfo->size,
4512 4513 4514 4515 4516 4517 4518
          pQInfo->groupIndex);
      int32_t currentGroupIndex = pQInfo->groupIndex;

      pQuery->rec.rows = 0;
      pQInfo->groupIndex = 0;

      ensureOutputBufferSimple(pRuntimeEnv, pWindowResInfo->size);
4519
      copyFromWindowResToSData(pQInfo, pWindowResInfo);
4520 4521 4522 4523 4524 4525

      pQInfo->groupIndex = currentGroupIndex;  //restore the group index
      assert(pQuery->rec.rows == pWindowResInfo->size);

      clearClosedTimeWindow(pRuntimeEnv);
      break;
4526 4527 4528
    }
  } else {
    /*
4529
     * 1. super table projection query, 2. ts-comp query
4530 4531 4532
     * if the subgroup index is larger than 0, results generated by group by tbname,k is existed.
     * we need to return it to client in the first place.
     */
4533
    if (pQInfo->groupIndex > 0) {
4534
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
4535
      pQuery->rec.total += pQuery->rec.rows;
4536

4537
      if (pQuery->rec.rows > 0) {
4538 4539 4540
        return;
      }
    }
4541

4542
    // all data have returned already
4543
    if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
4544 4545
      return;
    }
4546

4547 4548
    resetCtxOutputBuf(pRuntimeEnv);
    resetTimeWindowInfo(pRuntimeEnv, &pRuntimeEnv->windowResInfo);
4549

H
Haojun Liao 已提交
4550
    SArray *group = GET_TABLEGROUP(pQInfo, 0);
4551 4552
    assert(taosArrayGetSize(group) == pQInfo->tableqinfoGroupInfo.numOfTables &&
           1 == taosArrayGetSize(pQInfo->tableqinfoGroupInfo.pGroupList));
4553

4554
    while (pQInfo->tableIndex < pQInfo->tableqinfoGroupInfo.numOfTables) {
H
Haojun Liao 已提交
4555
      if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
4556
        longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4557
      }
4558

4559
      pQuery->current = taosArrayGetP(group, pQInfo->tableIndex);
4560
      if (!multiTableMultioutputHelper(pQInfo, pQInfo->tableIndex)) {
4561
        pQInfo->tableIndex++;
4562 4563
        continue;
      }
4564

H
hjxilinx 已提交
4565
      // TODO handle the limit offset problem
4566
      if (pQuery->numOfFilterCols == 0 && pQuery->limit.offset > 0) {
4567
        //        skipBlocks(pRuntimeEnv);
4568 4569
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
          pQInfo->tableIndex++;
4570 4571 4572
          continue;
        }
      }
4573

4574
      scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
4575
      skipResults(pRuntimeEnv);
4576

4577
      // the limitation of output result is reached, set the query completed
4578
      if (limitResults(pRuntimeEnv)) {
4579
        pQInfo->tableIndex = pQInfo->tableqinfoGroupInfo.numOfTables;
4580 4581
        break;
      }
4582

4583 4584
      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
4585

4586
      if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4587 4588 4589 4590 4591 4592
        /*
         * query range is identical in terms of all meters involved in query,
         * so we need to restore them at the *beginning* of query on each meter,
         * not the consecutive query on meter on which is aborted due to buffer limitation
         * to ensure that, we can reset the query range once query on a meter is completed.
         */
4593
        pQInfo->tableIndex++;
weixin_48148422's avatar
weixin_48148422 已提交
4594

H
Haojun Liao 已提交
4595
        STableIdInfo tidInfo = {0};
4596

H
Haojun Liao 已提交
4597 4598 4599
        STableId* id = TSDB_TABLEID(pQuery->current->pTable);
        tidInfo.uid = id->uid;
        tidInfo.tid = id->tid;
weixin_48148422's avatar
weixin_48148422 已提交
4600
        tidInfo.key = pQuery->current->lastKey;
weixin_48148422's avatar
weixin_48148422 已提交
4601 4602
        taosArrayPush(pQInfo->arrTableIdInfo, &tidInfo);

4603
        // if the buffer is full or group by each table, we need to jump out of the loop
4604 4605
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL) /*||
            isGroupbyEachTable(pQuery->pGroupbyExpr, pSupporter->pSidSet)*/) {
4606 4607
          break;
        }
4608

4609
      } else {
4610
        // all data in the result buffer are skipped due to the offset, continue to retrieve data from current meter
4611 4612
        if (pQuery->rec.rows == 0) {
          assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
4613 4614
          continue;
        } else {
4615 4616 4617
          // buffer is full, wait for the next round to retrieve data from current meter
          assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
          break;
4618 4619 4620
        }
      }
    }
H
Haojun Liao 已提交
4621

4622
    if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
H
Haojun Liao 已提交
4623 4624
      setQueryStatus(pQuery, QUERY_COMPLETED);
    }
4625
  }
4626

4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638
  /*
   * 1. super table projection query, group-by on normal columns query, ts-comp query
   * 2. point interpolation query, last row query
   *
   * group-by on normal columns query and last_row query do NOT invoke the finalizer here,
   * since the finalize stage will be done at the client side.
   *
   * projection query, point interpolation query do not need the finalizer.
   *
   * Only the ts-comp query requires the finalizer function to be executed here.
   */
  if (isTSCompQuery(pQuery)) {
H
hjxilinx 已提交
4639
    finalizeQueryResult(pRuntimeEnv);
4640
  }
4641

4642 4643 4644
  if (pRuntimeEnv->pTSBuf != NULL) {
    pRuntimeEnv->cur = pRuntimeEnv->pTSBuf->cur;
  }
4645

4646
  qDebug(
B
Bomin Zhang 已提交
4647
      "QInfo %p numOfTables:%"PRIu64", index:%d, numOfGroups:%zu, %"PRId64" points returned, total:%"PRId64", offset:%" PRId64,
4648
      pQInfo, pQInfo->tableqinfoGroupInfo.numOfTables, pQInfo->tableIndex, numOfGroups, pQuery->rec.rows, pQuery->rec.total,
4649
      pQuery->limit.offset);
4650 4651
}

4652 4653 4654 4655
static void doSaveContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

4656 4657 4658 4659
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
  SWITCH_ORDER(pQuery->order.order);
  
4660
  if (pRuntimeEnv->pTSBuf != NULL) {
4661
    pRuntimeEnv->pTSBuf->cur.order = pQuery->order.order;
4662
  }
4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674
  
  STsdbQueryCond cond = {
      .twindow = pQuery->window,
      .order   = pQuery->order.order,
      .colList = pQuery->colList,
      .numOfCols = pQuery->numOfCols,
  };
  
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
H
Haojun Liao 已提交
4675 4676

  pRuntimeEnv->prevGroupId = INT32_MIN;
4677
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
4678 4679 4680 4681
  
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
  disableFuncInReverseScan(pQInfo);
H
hjxilinx 已提交
4682 4683
}

4684 4685 4686 4687
static void doRestoreContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

H
hjxilinx 已提交
4688
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
4689

4690
  if (pRuntimeEnv->pTSBuf != NULL) {
4691
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
4692
  }
4693

4694
  switchCtxOrder(pRuntimeEnv);
4695 4696 4697
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
}

4698 4699 4700
static void doCloseAllTimeWindowAfterScan(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
Haojun Liao 已提交
4701
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
H
Haojun Liao 已提交
4702
    size_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
4703
    for (int32_t i = 0; i < numOfGroup; ++i) {
H
Haojun Liao 已提交
4704
      SArray *group = GET_TABLEGROUP(pQInfo, i);
4705

4706
      size_t num = taosArrayGetSize(group);
4707
      for (int32_t j = 0; j < num; ++j) {
4708 4709
        STableQueryInfo* item = taosArrayGetP(group, j);
        closeAllTimeWindow(&item->windowResInfo);
4710
      }
H
hjxilinx 已提交
4711 4712 4713 4714 4715 4716 4717
    }
  } else {  // close results for group result
    closeAllTimeWindow(&pQInfo->runtimeEnv.windowResInfo);
  }
}

static void multiTableQueryProcess(SQInfo *pQInfo) {
4718 4719 4720
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

4721
  if (pQInfo->groupIndex > 0) {
4722
    /*
4723
     * if the groupIndex > 0, the query process must be completed yet, we only need to
4724 4725
     * copy the data into output buffer
     */
H
Haojun Liao 已提交
4726
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
4727 4728
      copyResToQueryResultBuf(pQInfo, pQuery);
#ifdef _DEBUG_VIEW
4729
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
4730 4731
#endif
    } else {
4732
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
4733
    }
4734

4735
    qDebug("QInfo:%p current:%"PRId64", total:%"PRId64"", pQInfo, pQuery->rec.rows, pQuery->rec.total);
4736 4737
    return;
  }
4738

4739
  qDebug("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", order:%d, forward scan start", pQInfo,
4740 4741
         pQuery->window.skey, pQuery->window.ekey, pQuery->order.order);

H
hjxilinx 已提交
4742
  // do check all qualified data blocks
H
Haojun Liao 已提交
4743
  int64_t el = scanMultiTableDataBlocks(pQInfo);
4744
  qDebug("QInfo:%p master scan completed, elapsed time: %" PRId64 "ms, reverse scan start", pQInfo, el);
4745

H
hjxilinx 已提交
4746
  // query error occurred or query is killed, abort current execution
H
Haojun Liao 已提交
4747
  if (pQInfo->code != TSDB_CODE_SUCCESS || IS_QUERY_KILLED(pQInfo)) {
4748
    qDebug("QInfo:%p query killed or error occurred, code:%s, abort", pQInfo, tstrerror(pQInfo->code));
H
Haojun Liao 已提交
4749 4750
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4751
  }
4752

H
hjxilinx 已提交
4753 4754
  // close all time window results
  doCloseAllTimeWindowAfterScan(pQInfo);
4755

H
hjxilinx 已提交
4756 4757
  if (needReverseScan(pQuery)) {
    doSaveContext(pQInfo);
4758

H
Haojun Liao 已提交
4759
    el = scanMultiTableDataBlocks(pQInfo);
4760
    qDebug("QInfo:%p reversed scan completed, elapsed time: %" PRId64 "ms", pQInfo, el);
4761

H
Haojun Liao 已提交
4762
//    doCloseAllTimeWindowAfterScan(pQInfo);
H
Haojun Liao 已提交
4763
    doRestoreContext(pQInfo);
H
hjxilinx 已提交
4764
  } else {
4765
    qDebug("QInfo:%p no need to do reversed scan, query completed", pQInfo);
4766
  }
4767

4768
  setQueryStatus(pQuery, QUERY_COMPLETED);
4769

H
Haojun Liao 已提交
4770
  if (pQInfo->code != TSDB_CODE_SUCCESS || IS_QUERY_KILLED(pQInfo)) {
4771
    qDebug("QInfo:%p query killed or error occurred, code:%s, abort", pQInfo, tstrerror(pQInfo->code));
H
Haojun Liao 已提交
4772 4773
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
H
hjxilinx 已提交
4774
  }
4775

H
Haojun Liao 已提交
4776
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || isSumAvgRateQuery(pQuery)) {
4777
    if (mergeIntoGroupResult(pQInfo) == TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
4778
      copyResToQueryResultBuf(pQInfo, pQuery);
4779 4780

#ifdef _DEBUG_VIEW
4781
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
4782 4783 4784
#endif
    }
  } else {  // not a interval query
4785
    copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
4786
  }
4787

4788
  // handle the limitation of output buffer
4789
  qDebug("QInfo:%p points returned:%" PRId64 ", total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
4790 4791 4792 4793 4794 4795 4796 4797
}

/*
 * in each query, this function will be called only once, no retry for further result.
 *
 * select count(*)/top(field,k)/avg(field name) from table_name [where ts>now-1a];
 * select count(*) from table_name group by status_column;
 */
H
hjxilinx 已提交
4798
static void tableFixedOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
4799
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
4800 4801
  
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
4802 4803 4804 4805
  if (!isTopBottomQuery(pQuery) && pQuery->limit.offset > 0) {  // no need to execute, since the output will be ignore.
    return;
  }
  
H
hjxilinx 已提交
4806 4807
  pQuery->current = pTableInfo;  // set current query table info
  
4808
  scanOneTableDataBlocks(pRuntimeEnv, pTableInfo->lastKey);
H
hjxilinx 已提交
4809
  finalizeQueryResult(pRuntimeEnv);
4810

H
Haojun Liao 已提交
4811
  if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
4812 4813
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4814
  }
4815

H
Haojun Liao 已提交
4816
  // since the numOfRows must be identical for all sql functions that are allowed to be executed simutaneously.
4817
  pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
4818

4819
  skipResults(pRuntimeEnv);
4820
  limitResults(pRuntimeEnv);
4821 4822
}

H
hjxilinx 已提交
4823
static void tableMultiOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
4824
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
4825 4826 4827 4828
  
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
  
4829 4830 4831 4832
  // for ts_comp query, re-initialized is not allowed
  if (!isTSCompQuery(pQuery)) {
    resetCtxOutputBuf(pRuntimeEnv);
  }
4833

4834 4835 4836 4837 4838 4839
  // skip blocks without load the actual data block from file if no filter condition present
  skipBlocks(&pQInfo->runtimeEnv);
  if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return;
  }
4840 4841

  while (1) {
4842
    scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
H
hjxilinx 已提交
4843
    finalizeQueryResult(pRuntimeEnv);
4844

4845 4846
    pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
    if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols > 0 && pQuery->rec.rows > 0) {
4847
      skipResults(pRuntimeEnv);
4848 4849 4850
    }

    /*
H
hjxilinx 已提交
4851 4852
     * 1. if pQuery->size == 0, pQuery->limit.offset >= 0, still need to check data
     * 2. if pQuery->size > 0, pQuery->limit.offset must be 0
4853
     */
4854
    if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4855 4856 4857
      break;
    }

4858
    qDebug("QInfo:%p skip current result, offset:%" PRId64 ", next qrange:%" PRId64 "-%" PRId64,
B
Bomin Zhang 已提交
4859
           pQInfo, pQuery->limit.offset, pQuery->current->lastKey, pQuery->current->win.ekey);
4860 4861 4862 4863

    resetCtxOutputBuf(pRuntimeEnv);
  }

4864
  limitResults(pRuntimeEnv);
4865
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
4866
    qDebug("QInfo:%p query paused due to output limitation, next qrange:%" PRId64 "-%" PRId64, pQInfo,
H
hjxilinx 已提交
4867
        pQuery->current->lastKey, pQuery->window.ekey);
weixin_48148422's avatar
weixin_48148422 已提交
4868 4869
  } else if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
    STableIdInfo tidInfo;
H
Haojun Liao 已提交
4870
    STableId* id = TSDB_TABLEID(pQuery->current);
4871

H
Haojun Liao 已提交
4872 4873
    tidInfo.uid = id->uid;
    tidInfo.tid = id->tid;
weixin_48148422's avatar
weixin_48148422 已提交
4874 4875
    tidInfo.key = pQuery->current->lastKey;
    taosArrayPush(pQInfo->arrTableIdInfo, &tidInfo);
4876 4877
  }

4878 4879 4880
  if (!isTSCompQuery(pQuery)) {
    assert(pQuery->rec.rows <= pQuery->rec.capacity);
  }
4881 4882
}

H
Haojun Liao 已提交
4883
static void tableIntervalProcessImpl(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
4884
  SQuery *pQuery = pRuntimeEnv->pQuery;
4885

4886
  while (1) {
4887
    scanOneTableDataBlocks(pRuntimeEnv, start);
4888

4889
    assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_NOT_COMPLETED));
H
hjxilinx 已提交
4890
    finalizeQueryResult(pRuntimeEnv);
4891

4892 4893 4894
    // here we can ignore the records in case of no interpolation
    // todo handle offset, in case of top/bottom interval query
    if ((pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL) && pQuery->limit.offset > 0 &&
4895
        pQuery->fillType == TSDB_FILL_NONE) {
4896 4897
      // maxOutput <= 0, means current query does not generate any results
      int32_t numOfClosed = numOfClosedTimeWindow(&pRuntimeEnv->windowResInfo);
4898

4899 4900 4901 4902
      int32_t c = MIN(numOfClosed, pQuery->limit.offset);
      clearFirstNTimeWindow(pRuntimeEnv, c);
      pQuery->limit.offset -= c;
    }
4903

4904
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED | QUERY_RESBUF_FULL)) {
4905 4906 4907 4908 4909
      break;
    }
  }
}

4910
// handle time interval query on table
H
hjxilinx 已提交
4911
static void tableIntervalProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
4912 4913
  SQueryRuntimeEnv *pRuntimeEnv = &(pQInfo->runtimeEnv);

H
hjxilinx 已提交
4914 4915
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
4916

H
Haojun Liao 已提交
4917
  int32_t numOfFilled = 0;
H
Haojun Liao 已提交
4918 4919
  TSKEY newStartKey = TSKEY_INITIAL_VAL;
  
4920
  // skip blocks without load the actual data block from file if no filter condition present
H
Haojun Liao 已提交
4921
  skipTimeInterval(pRuntimeEnv, &newStartKey);
4922
  if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0 && pRuntimeEnv->pFillInfo == NULL) {
4923 4924 4925 4926
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return;
  }

4927
  while (1) {
H
Haojun Liao 已提交
4928
    tableIntervalProcessImpl(pRuntimeEnv, newStartKey);
4929

H
Haojun Liao 已提交
4930
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
4931
      pQInfo->groupIndex = 0;  // always start from 0
4932
      pQuery->rec.rows = 0;
4933
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
4934

4935
      clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
4936
    }
4937

4938
    // the offset is handled at prepare stage if no interpolation involved
4939
    if (pQuery->fillType == TSDB_FILL_NONE || pQuery->rec.rows == 0) {
4940
      limitResults(pRuntimeEnv);
4941 4942
      break;
    } else {
H
Haojun Liao 已提交
4943
      taosFillSetStartInfo(pRuntimeEnv->pFillInfo, pQuery->rec.rows, pQuery->window.ekey);
4944
      taosFillCopyInputDataFromFilePage(pRuntimeEnv->pFillInfo, (tFilePage**) pQuery->sdata);
H
Haojun Liao 已提交
4945
      numOfFilled = 0;
4946
      
H
Haojun Liao 已提交
4947
      pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, &numOfFilled);
4948
      if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4949
        limitResults(pRuntimeEnv);
4950 4951
        break;
      }
4952

4953
      // no result generated yet, continue retrieve data
4954
      pQuery->rec.rows = 0;
4955 4956
    }
  }
4957

4958
  // all data scanned, the group by normal column can return
H
Haojun Liao 已提交
4959
  if (pRuntimeEnv->groupbyNormalCol) {  // todo refactor with merge interval time result
4960
    pQInfo->groupIndex = 0;
4961
    pQuery->rec.rows = 0;
4962
    copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
4963
    clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
4964
  }
4965

H
Haojun Liao 已提交
4966
  pQInfo->pointsInterpo += numOfFilled;
4967 4968
}

4969 4970 4971 4972
static void tableQueryImpl(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

4973
  if (queryHasRemainResults(pRuntimeEnv)) {
4974

H
Haojun Liao 已提交
4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986
    if (pQuery->fillType != TSDB_FILL_NONE) {
      /*
       * There are remain results that are not returned due to result interpolation
       * So, we do keep in this procedure instead of launching retrieve procedure for next results.
       */
      int32_t numOfFilled = 0;
      pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, &numOfFilled);

      if (pQuery->rec.rows > 0) {
        limitResults(pRuntimeEnv);
      }

H
Haojun Liao 已提交
4987
      qDebug("QInfo:%p current:%" PRId64 " returned, total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total);
4988
      return;
H
Haojun Liao 已提交
4989
    } else {
4990
      pQuery->rec.rows = 0;
4991
      pQInfo->groupIndex = 0;  // always start from 0
4992

4993
      if (pRuntimeEnv->windowResInfo.size > 0) {
4994
        copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
4995
        clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
4996

4997
        if (pQuery->rec.rows > 0) {
4998
          qDebug("QInfo:%p %"PRId64" rows returned from group results, total:%"PRId64"", pQInfo, pQuery->rec.rows, pQuery->rec.total);
H
Haojun Liao 已提交
4999 5000 5001

          // there are not data remains
          if (pRuntimeEnv->windowResInfo.size <= 0) {
H
Haojun Liao 已提交
5002
            qDebug("QInfo:%p query over, %"PRId64" rows are returned", pQInfo, pQuery->rec.total);
H
Haojun Liao 已提交
5003 5004
          }

5005 5006 5007 5008 5009
          return;
        }
      }
    }
  }
5010

H
hjxilinx 已提交
5011
  // number of points returned during this query
5012
  pQuery->rec.rows = 0;
5013
  int64_t st = taosGetTimestampUs();
H
hjxilinx 已提交
5014
  
5015
  assert(pQInfo->tableqinfoGroupInfo.numOfTables == 1);
H
Haojun Liao 已提交
5016
  SArray* g = GET_TABLEGROUP(pQInfo, 0);
5017
  STableQueryInfo* item = taosArrayGetP(g, 0);
H
hjxilinx 已提交
5018
  
5019
  // group by normal column, sliding window query, interval query are handled by interval query processor
H
Haojun Liao 已提交
5020
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyNormalCol) {  // interval (down sampling operation)
5021
    tableIntervalProcess(pQInfo, item);
H
Haojun Liao 已提交
5022
  } else if (isFixedOutputQuery(pRuntimeEnv)) {
5023
    tableFixedOutputProcess(pQInfo, item);
5024 5025
  } else {  // diff/add/multiply/subtract/division
    assert(pQuery->checkBuffer == 1);
5026
    tableMultiOutputProcess(pQInfo, item);
5027
  }
5028

5029
  // record the total elapsed time
5030
  pRuntimeEnv->summary.elapsedTime += (taosGetTimestampUs() - st);
5031
  assert(pQInfo->tableqinfoGroupInfo.numOfTables == 1);
5032 5033
}

5034
static void stableQueryImpl(SQInfo *pQInfo) {
H
Haojun Liao 已提交
5035 5036
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *pQuery = pRuntimeEnv->pQuery;
5037
  pQuery->rec.rows = 0;
5038

5039
  int64_t st = taosGetTimestampUs();
5040

H
Haojun Liao 已提交
5041
  if (QUERY_IS_INTERVAL_QUERY(pQuery) ||
H
Haojun Liao 已提交
5042
      (isFixedOutputQuery(pRuntimeEnv) && (!isPointInterpoQuery(pQuery)) && !pRuntimeEnv->groupbyNormalCol &&
5043
      !isFirstLastRowQuery(pQuery))) {
H
hjxilinx 已提交
5044
    multiTableQueryProcess(pQInfo);
5045
  } else {
5046
    assert((pQuery->checkBuffer == 1 && pQuery->intervalTime == 0) || isPointInterpoQuery(pQuery) ||
H
Haojun Liao 已提交
5047
            isFirstLastRowQuery(pQuery) || pRuntimeEnv->groupbyNormalCol);
5048

5049
    sequentialTableProcess(pQInfo);
H
Haojun Liao 已提交
5050

5051
  }
5052

H
hjxilinx 已提交
5053
  // record the total elapsed time
5054
  pQInfo->runtimeEnv.summary.elapsedTime += (taosGetTimestampUs() - st);
H
hjxilinx 已提交
5055 5056
}

5057
static int32_t getColumnIndexInSource(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
5058
  int32_t j = 0;
5059

5060
  if (TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) {
H
Haojun Liao 已提交
5061 5062 5063 5064
    if (pExprMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
      return -1;
    }

5065 5066 5067 5068
    while(j < pQueryMsg->numOfTags) {
      if (pExprMsg->colInfo.colId == pTagCols[j].colId) {
        return j;
      }
5069

5070 5071
      j += 1;
    }
5072

5073 5074 5075 5076 5077
  } else {
    while (j < pQueryMsg->numOfCols) {
      if (pExprMsg->colInfo.colId == pQueryMsg->colList[j].colId) {
        return j;
      }
5078

5079
      j += 1;
5080 5081 5082
    }
  }

5083
  assert(0);
5084 5085
}

5086 5087 5088
bool validateExprColumnInfo(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
  int32_t j = getColumnIndexInSource(pQueryMsg, pExprMsg, pTagCols);
  return j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags;
5089 5090
}

5091
static bool validateQueryMsg(SQueryTableMsg *pQueryMsg) {
H
hjxilinx 已提交
5092
  if (pQueryMsg->intervalTime < 0) {
5093
    qError("qmsg:%p illegal value of interval time %" PRId64, pQueryMsg, pQueryMsg->intervalTime);
5094
    return false;
5095 5096
  }

H
hjxilinx 已提交
5097
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
5098
    qError("qmsg:%p illegal value of numOfTables %d", pQueryMsg, pQueryMsg->numOfTables);
5099
    return false;
5100 5101
  }

H
hjxilinx 已提交
5102
  if (pQueryMsg->numOfGroupCols < 0) {
S
slguan 已提交
5103
    qError("qmsg:%p illegal value of numOfGroupbyCols %d", pQueryMsg, pQueryMsg->numOfGroupCols);
5104
    return false;
5105 5106
  }

5107 5108
  if (pQueryMsg->numOfOutput > TSDB_MAX_COLUMNS || pQueryMsg->numOfOutput <= 0) {
    qError("qmsg:%p illegal value of output columns %d", pQueryMsg, pQueryMsg->numOfOutput);
5109
    return false;
5110 5111
  }

5112 5113 5114 5115 5116 5117 5118 5119 5120 5121
  return true;
}

static bool validateQuerySourceCols(SQueryTableMsg *pQueryMsg, SSqlFuncMsg** pExprMsg) {
  int32_t numOfTotal = pQueryMsg->numOfCols + pQueryMsg->numOfTags;
  if (pQueryMsg->numOfCols < 0 || pQueryMsg->numOfTags < 0 || numOfTotal > TSDB_MAX_COLUMNS) {
    qError("qmsg:%p illegal value of numOfCols %d numOfTags:%d", pQueryMsg, pQueryMsg->numOfCols, pQueryMsg->numOfTags);
    return false;
  } else if (numOfTotal == 0) {
    for(int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
H
Haojun Liao 已提交
5122 5123 5124 5125 5126
      SSqlFuncMsg* pFuncMsg = pExprMsg[i];

      if ((pFuncMsg->functionId == TSDB_FUNC_TAGPRJ) ||
          (pFuncMsg->functionId == TSDB_FUNC_TID_TAG && pFuncMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) ||
          (pFuncMsg->functionId == TSDB_FUNC_COUNT && pFuncMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX)) {
5127
        continue;
5128
      }
5129

5130
      return false;
5131 5132
    }
  }
5133

5134
  return true;
5135 5136
}

5137
static char *createTableIdList(SQueryTableMsg *pQueryMsg, char *pMsg, SArray **pTableIdList) {
H
hjxilinx 已提交
5138
  assert(pQueryMsg->numOfTables > 0);
5139

weixin_48148422's avatar
weixin_48148422 已提交
5140
  *pTableIdList = taosArrayInit(pQueryMsg->numOfTables, sizeof(STableIdInfo));
5141

weixin_48148422's avatar
weixin_48148422 已提交
5142 5143
  for (int32_t j = 0; j < pQueryMsg->numOfTables; ++j) {
    STableIdInfo* pTableIdInfo = (STableIdInfo *)pMsg;
5144

5145
    pTableIdInfo->tid = htonl(pTableIdInfo->tid);
H
hjxilinx 已提交
5146 5147
    pTableIdInfo->uid = htobe64(pTableIdInfo->uid);
    pTableIdInfo->key = htobe64(pTableIdInfo->key);
5148

H
hjxilinx 已提交
5149 5150 5151
    taosArrayPush(*pTableIdList, pTableIdInfo);
    pMsg += sizeof(STableIdInfo);
  }
5152

H
hjxilinx 已提交
5153 5154
  return pMsg;
}
5155

5156
/**
H
hjxilinx 已提交
5157
 * pQueryMsg->head has been converted before this function is called.
5158
 *
H
hjxilinx 已提交
5159
 * @param pQueryMsg
5160 5161 5162 5163
 * @param pTableIdList
 * @param pExpr
 * @return
 */
5164
static int32_t convertQueryMsg(SQueryTableMsg *pQueryMsg, SArray **pTableIdList, SSqlFuncMsg ***pExpr,
weixin_48148422's avatar
weixin_48148422 已提交
5165
                               char **tagCond, char** tbnameCond, SColIndex **groupbyCols, SColumnInfo** tagCols) {
5166 5167
  int32_t code = TSDB_CODE_SUCCESS;

5168 5169 5170 5171 5172 5173 5174 5175
  pQueryMsg->numOfTables = htonl(pQueryMsg->numOfTables);

  pQueryMsg->window.skey = htobe64(pQueryMsg->window.skey);
  pQueryMsg->window.ekey = htobe64(pQueryMsg->window.ekey);
  pQueryMsg->intervalTime = htobe64(pQueryMsg->intervalTime);
  pQueryMsg->slidingTime = htobe64(pQueryMsg->slidingTime);
  pQueryMsg->limit = htobe64(pQueryMsg->limit);
  pQueryMsg->offset = htobe64(pQueryMsg->offset);
H
hjxilinx 已提交
5176

5177 5178
  pQueryMsg->order = htons(pQueryMsg->order);
  pQueryMsg->orderColId = htons(pQueryMsg->orderColId);
H
Haojun Liao 已提交
5179
  pQueryMsg->queryType = htonl(pQueryMsg->queryType);
weixin_48148422's avatar
weixin_48148422 已提交
5180
  pQueryMsg->tagNameRelType = htons(pQueryMsg->tagNameRelType);
5181 5182

  pQueryMsg->numOfCols = htons(pQueryMsg->numOfCols);
5183
  pQueryMsg->numOfOutput = htons(pQueryMsg->numOfOutput);
H
hjxilinx 已提交
5184
  pQueryMsg->numOfGroupCols = htons(pQueryMsg->numOfGroupCols);
5185 5186 5187
  pQueryMsg->tagCondLen = htons(pQueryMsg->tagCondLen);
  pQueryMsg->tsOffset = htonl(pQueryMsg->tsOffset);
  pQueryMsg->tsLen = htonl(pQueryMsg->tsLen);
H
hjxilinx 已提交
5188
  pQueryMsg->tsNumOfBlocks = htonl(pQueryMsg->tsNumOfBlocks);
5189
  pQueryMsg->tsOrder = htonl(pQueryMsg->tsOrder);
5190
  pQueryMsg->numOfTags = htonl(pQueryMsg->numOfTags);
5191

5192
  // query msg safety check
5193
  if (!validateQueryMsg(pQueryMsg)) {
5194 5195
    code = TSDB_CODE_QRY_INVALID_MSG;
    goto _cleanup;
5196 5197
  }

H
hjxilinx 已提交
5198 5199
  char *pMsg = (char *)(pQueryMsg->colList) + sizeof(SColumnInfo) * pQueryMsg->numOfCols;
  for (int32_t col = 0; col < pQueryMsg->numOfCols; ++col) {
5200 5201
    SColumnInfo *pColInfo = &pQueryMsg->colList[col];

H
hjxilinx 已提交
5202
    pColInfo->colId = htons(pColInfo->colId);
5203
    pColInfo->type = htons(pColInfo->type);
H
hjxilinx 已提交
5204 5205
    pColInfo->bytes = htons(pColInfo->bytes);
    pColInfo->numOfFilters = htons(pColInfo->numOfFilters);
5206

H
hjxilinx 已提交
5207
    assert(pColInfo->type >= TSDB_DATA_TYPE_BOOL && pColInfo->type <= TSDB_DATA_TYPE_NCHAR);
5208

H
hjxilinx 已提交
5209
    int32_t numOfFilters = pColInfo->numOfFilters;
5210
    if (numOfFilters > 0) {
H
hjxilinx 已提交
5211
      pColInfo->filters = calloc(numOfFilters, sizeof(SColumnFilterInfo));
5212 5213 5214
    }

    for (int32_t f = 0; f < numOfFilters; ++f) {
5215 5216 5217 5218
      SColumnFilterInfo *pFilterMsg = (SColumnFilterInfo *)pMsg;
      
      SColumnFilterInfo *pColFilter = &pColInfo->filters[f];
      pColFilter->filterstr = htons(pFilterMsg->filterstr);
5219 5220 5221

      pMsg += sizeof(SColumnFilterInfo);

5222 5223
      if (pColFilter->filterstr) {
        pColFilter->len = htobe64(pFilterMsg->len);
5224

5225
        pColFilter->pz = (int64_t) calloc(1, pColFilter->len + 1 * TSDB_NCHAR_SIZE); // note: null-terminator
5226 5227
        memcpy((void *)pColFilter->pz, pMsg, pColFilter->len);
        pMsg += (pColFilter->len + 1);
5228
      } else {
5229 5230
        pColFilter->lowerBndi = htobe64(pFilterMsg->lowerBndi);
        pColFilter->upperBndi = htobe64(pFilterMsg->upperBndi);
5231 5232
      }

5233 5234
      pColFilter->lowerRelOptr = htons(pFilterMsg->lowerRelOptr);
      pColFilter->upperRelOptr = htons(pFilterMsg->upperRelOptr);
5235 5236 5237
    }
  }

5238 5239
  *pExpr = calloc(pQueryMsg->numOfOutput, POINTER_BYTES);
  SSqlFuncMsg *pExprMsg = (SSqlFuncMsg *)pMsg;
5240

5241
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5242
    (*pExpr)[i] = pExprMsg;
5243

5244
    pExprMsg->colInfo.colIndex = htons(pExprMsg->colInfo.colIndex);
5245 5246 5247 5248
    pExprMsg->colInfo.colId = htons(pExprMsg->colInfo.colId);
    pExprMsg->colInfo.flag = htons(pExprMsg->colInfo.flag);
    pExprMsg->functionId = htons(pExprMsg->functionId);
    pExprMsg->numOfParams = htons(pExprMsg->numOfParams);
5249

5250
    pMsg += sizeof(SSqlFuncMsg);
5251 5252

    for (int32_t j = 0; j < pExprMsg->numOfParams; ++j) {
5253
      pExprMsg->arg[j].argType = htons(pExprMsg->arg[j].argType);
5254 5255 5256 5257
      pExprMsg->arg[j].argBytes = htons(pExprMsg->arg[j].argBytes);

      if (pExprMsg->arg[j].argType == TSDB_DATA_TYPE_BINARY) {
        pExprMsg->arg[j].argValue.pz = pMsg;
5258
        pMsg += pExprMsg->arg[j].argBytes;  // one more for the string terminated char.
5259 5260 5261 5262 5263
      } else {
        pExprMsg->arg[j].argValue.i64 = htobe64(pExprMsg->arg[j].argValue.i64);
      }
    }

H
Haojun Liao 已提交
5264 5265
    int16_t functionId = pExprMsg->functionId;
    if (functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ || functionId == TSDB_FUNC_TAG_DUMMY) {
5266
      if (pExprMsg->colInfo.flag != TSDB_COL_TAG) {  // ignore the column  index check for arithmetic expression.
5267 5268
        code = TSDB_CODE_QRY_INVALID_MSG;
        goto _cleanup;
5269 5270
      }
    } else {
5271
//      if (!validateExprColumnInfo(pQueryMsg, pExprMsg)) {
5272
//        return TSDB_CODE_QRY_INVALID_MSG;
5273
//      }
5274 5275
    }

5276
    pExprMsg = (SSqlFuncMsg *)pMsg;
5277
  }
5278

5279
  if (!validateQuerySourceCols(pQueryMsg, *pExpr)) {
5280
    code = TSDB_CODE_QRY_INVALID_MSG;
dengyihao's avatar
dengyihao 已提交
5281
    goto _cleanup;
5282
  }
5283

H
hjxilinx 已提交
5284
  pMsg = createTableIdList(pQueryMsg, pMsg, pTableIdList);
5285

H
hjxilinx 已提交
5286
  if (pQueryMsg->numOfGroupCols > 0) {  // group by tag columns
5287
    *groupbyCols = malloc(pQueryMsg->numOfGroupCols * sizeof(SColIndex));
5288 5289 5290 5291
    if (*groupbyCols == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }
5292 5293 5294

    for (int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
      (*groupbyCols)[i].colId = *(int16_t *)pMsg;
5295
      pMsg += sizeof((*groupbyCols)[i].colId);
5296 5297

      (*groupbyCols)[i].colIndex = *(int16_t *)pMsg;
5298 5299
      pMsg += sizeof((*groupbyCols)[i].colIndex);

5300
      (*groupbyCols)[i].flag = *(int16_t *)pMsg;
5301 5302 5303 5304 5305
      pMsg += sizeof((*groupbyCols)[i].flag);

      memcpy((*groupbyCols)[i].name, pMsg, tListLen(groupbyCols[i]->name));
      pMsg += tListLen((*groupbyCols)[i].name);
    }
5306

H
hjxilinx 已提交
5307 5308
    pQueryMsg->orderByIdx = htons(pQueryMsg->orderByIdx);
    pQueryMsg->orderType = htons(pQueryMsg->orderType);
5309 5310
  }

5311 5312
  pQueryMsg->fillType = htons(pQueryMsg->fillType);
  if (pQueryMsg->fillType != TSDB_FILL_NONE) {
5313
    pQueryMsg->fillVal = (uint64_t)(pMsg);
5314 5315

    int64_t *v = (int64_t *)pMsg;
5316
    for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5317 5318
      v[i] = htobe64(v[i]);
    }
5319

5320
    pMsg += sizeof(int64_t) * pQueryMsg->numOfOutput;
5321
  }
5322

5323 5324 5325 5326
  if (pQueryMsg->numOfTags > 0) {
    (*tagCols) = calloc(1, sizeof(SColumnInfo) * pQueryMsg->numOfTags);
    for (int32_t i = 0; i < pQueryMsg->numOfTags; ++i) {
      SColumnInfo* pTagCol = (SColumnInfo*) pMsg;
5327

5328 5329 5330 5331
      pTagCol->colId = htons(pTagCol->colId);
      pTagCol->bytes = htons(pTagCol->bytes);
      pTagCol->type  = htons(pTagCol->type);
      pTagCol->numOfFilters = 0;
5332

5333
      (*tagCols)[i] = *pTagCol;
5334
      pMsg += sizeof(SColumnInfo);
5335
    }
H
hjxilinx 已提交
5336
  }
5337

5338 5339 5340 5341 5342 5343
  // the tag query condition expression string is located at the end of query msg
  if (pQueryMsg->tagCondLen > 0) {
    *tagCond = calloc(1, pQueryMsg->tagCondLen);
    memcpy(*tagCond, pMsg, pQueryMsg->tagCondLen);
    pMsg += pQueryMsg->tagCondLen;
  }
5344

weixin_48148422's avatar
weixin_48148422 已提交
5345
  if (*pMsg != 0) {
5346
    size_t len = strlen(pMsg) + 1;
5347

5348
    *tbnameCond = malloc(len);
5349 5350 5351 5352 5353
    if (*tbnameCond == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }

weixin_48148422's avatar
weixin_48148422 已提交
5354
    strcpy(*tbnameCond, pMsg);
5355
    pMsg += len;
weixin_48148422's avatar
weixin_48148422 已提交
5356
  }
5357

5358
  qDebug("qmsg:%p query %d tables, type:%d, qrange:%" PRId64 "-%" PRId64 ", numOfGroupbyTagCols:%d, order:%d, "
H
Haojun Liao 已提交
5359 5360
         "outputCols:%d, numOfCols:%d, interval:%" PRId64 ", fillType:%d, comptsLen:%d, compNumOfBlocks:%d, limit:%" PRId64 ", offset:%" PRId64,
         pQueryMsg, pQueryMsg->numOfTables, pQueryMsg->queryType, pQueryMsg->window.skey, pQueryMsg->window.ekey, pQueryMsg->numOfGroupCols,
5361
         pQueryMsg->order, pQueryMsg->numOfOutput, pQueryMsg->numOfCols, pQueryMsg->intervalTime,
H
Haojun Liao 已提交
5362
         pQueryMsg->fillType, pQueryMsg->tsLen, pQueryMsg->tsNumOfBlocks, pQueryMsg->limit, pQueryMsg->offset);
5363 5364

  return TSDB_CODE_SUCCESS;
dengyihao's avatar
dengyihao 已提交
5365 5366 5367 5368 5369 5370 5371 5372 5373

_cleanup:
  tfree(*pExpr);
  taosArrayDestroy(*pTableIdList);
  *pTableIdList = NULL;
  tfree(*tbnameCond);
  tfree(*groupbyCols);
  tfree(*tagCols);
  tfree(*tagCond);
5374 5375

  return code;
5376 5377
}

H
hjxilinx 已提交
5378
static int32_t buildAirthmeticExprFromMsg(SExprInfo *pArithExprInfo, SQueryTableMsg *pQueryMsg) {
5379
  qDebug("qmsg:%p create arithmetic expr from binary string: %s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz);
weixin_48148422's avatar
weixin_48148422 已提交
5380 5381

  tExprNode* pExprNode = NULL;
dengyihao's avatar
dengyihao 已提交
5382
  TRY(TSDB_MAX_TAGS) {
weixin_48148422's avatar
weixin_48148422 已提交
5383 5384 5385
    pExprNode = exprTreeFromBinary(pArithExprInfo->base.arg[0].argValue.pz, pArithExprInfo->base.arg[0].argBytes);
  } CATCH( code ) {
    CLEANUP_EXECUTE();
dengyihao's avatar
TD-816  
dengyihao 已提交
5386
    qError("qmsg:%p failed to create arithmetic expression string from:%s, reason: %s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz, tstrerror(code));
weixin_48148422's avatar
weixin_48148422 已提交
5387 5388 5389
    return code;
  } END_TRY

H
hjxilinx 已提交
5390
  if (pExprNode == NULL) {
5391
    qError("qmsg:%p failed to create arithmetic expression string from:%s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz);
5392
    return TSDB_CODE_QRY_APP_ERROR;
5393
  }
5394

5395
  pArithExprInfo->pExpr = pExprNode;
5396 5397 5398
  return TSDB_CODE_SUCCESS;
}

H
Haojun Liao 已提交
5399
static int32_t createQFunctionExprFromMsg(SQueryTableMsg *pQueryMsg, SExprInfo **pExprInfo, SSqlFuncMsg **pExprMsg,
5400 5401
    SColumnInfo* pTagCols) {
  *pExprInfo = NULL;
H
hjxilinx 已提交
5402
  int32_t code = TSDB_CODE_SUCCESS;
5403

H
Haojun Liao 已提交
5404
  SExprInfo *pExprs = (SExprInfo *)calloc(pQueryMsg->numOfOutput, sizeof(SExprInfo));
5405
  if (pExprs == NULL) {
5406
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
5407 5408 5409 5410 5411
  }

  bool    isSuperTable = QUERY_IS_STABLE_QUERY(pQueryMsg->queryType);
  int16_t tagLen = 0;

5412
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5413
    pExprs[i].base = *pExprMsg[i];
5414
    pExprs[i].bytes = 0;
5415 5416 5417 5418

    int16_t type = 0;
    int16_t bytes = 0;

5419
    // parse the arithmetic expression
5420
    if (pExprs[i].base.functionId == TSDB_FUNC_ARITHM) {
5421
      code = buildAirthmeticExprFromMsg(&pExprs[i], pQueryMsg);
5422

5423 5424 5425
      if (code != TSDB_CODE_SUCCESS) {
        tfree(pExprs);
        return code;
5426 5427
      }

5428
      type  = TSDB_DATA_TYPE_DOUBLE;
5429
      bytes = tDataTypeDesc[type].nSize;
H
Haojun Liao 已提交
5430
    } else if (pExprs[i].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX && pExprs[i].base.functionId == TSDB_FUNC_TAGPRJ) {  // parse the normal column
H
Haojun Liao 已提交
5431 5432 5433
      SSchema s = tGetTableNameColumnSchema();
      type  = s.type;
      bytes = s.bytes;
B
Bomin Zhang 已提交
5434
    } else{
5435
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
dengyihao's avatar
dengyihao 已提交
5436
      assert(j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags);
H
Haojun Liao 已提交
5437

dengyihao's avatar
dengyihao 已提交
5438
      if (pExprs[i].base.colInfo.colId != TSDB_TBNAME_COLUMN_INDEX && j >= 0) {
H
Haojun Liao 已提交
5439 5440 5441 5442
        SColumnInfo* pCol = (TSDB_COL_IS_TAG(pExprs[i].base.colInfo.flag))? &pTagCols[j]:&pQueryMsg->colList[j];
        type = pCol->type;
        bytes = pCol->bytes;
      } else {
H
Haojun Liao 已提交
5443
        SSchema s = tGetTableNameColumnSchema();
H
hjxilinx 已提交
5444

H
Haojun Liao 已提交
5445 5446 5447
        type  = s.type;
        bytes = s.bytes;
      }
5448 5449
    }

5450 5451
    int32_t param = pExprs[i].base.arg[0].argValue.i64;
    if (getResultDataInfo(type, bytes, pExprs[i].base.functionId, param, &pExprs[i].type, &pExprs[i].bytes,
5452
                          &pExprs[i].interBytes, 0, isSuperTable) != TSDB_CODE_SUCCESS) {
5453
      tfree(pExprs);
5454
      return TSDB_CODE_QRY_INVALID_MSG;
5455 5456
    }

5457
    if (pExprs[i].base.functionId == TSDB_FUNC_TAG_DUMMY || pExprs[i].base.functionId == TSDB_FUNC_TS_DUMMY) {
5458
      tagLen += pExprs[i].bytes;
5459
    }
5460
    assert(isValidDataType(pExprs[i].type));
5461 5462 5463
  }

  // TODO refactor
5464
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5465 5466
    pExprs[i].base = *pExprMsg[i];
    int16_t functId = pExprs[i].base.functionId;
5467

5468
    if (functId == TSDB_FUNC_TOP || functId == TSDB_FUNC_BOTTOM) {
5469
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
5470 5471 5472 5473 5474
      assert(j < pQueryMsg->numOfCols);

      SColumnInfo *pCol = &pQueryMsg->colList[j];

      int32_t ret =
5475
          getResultDataInfo(pCol->type, pCol->bytes, functId, pExprs[i].base.arg[0].argValue.i64,
5476
                            &pExprs[i].type, &pExprs[i].bytes, &pExprs[i].interBytes, tagLen, isSuperTable);
5477 5478 5479
      assert(ret == TSDB_CODE_SUCCESS);
    }
  }
5480
  *pExprInfo = pExprs;
5481 5482 5483 5484

  return TSDB_CODE_SUCCESS;
}

5485
static SSqlGroupbyExpr *createGroupbyExprFromMsg(SQueryTableMsg *pQueryMsg, SColIndex *pColIndex, int32_t *code) {
5486 5487 5488 5489 5490
  if (pQueryMsg->numOfGroupCols == 0) {
    return NULL;
  }

  // using group by tag columns
5491
  SSqlGroupbyExpr *pGroupbyExpr = (SSqlGroupbyExpr *)calloc(1, sizeof(SSqlGroupbyExpr));
5492
  if (pGroupbyExpr == NULL) {
5493
    *code = TSDB_CODE_QRY_OUT_OF_MEMORY;
5494 5495 5496 5497 5498 5499 5500
    return NULL;
  }

  pGroupbyExpr->numOfGroupCols = pQueryMsg->numOfGroupCols;
  pGroupbyExpr->orderType = pQueryMsg->orderType;
  pGroupbyExpr->orderIndex = pQueryMsg->orderByIdx;

5501 5502 5503 5504
  pGroupbyExpr->columnInfo = taosArrayInit(pQueryMsg->numOfGroupCols, sizeof(SColIndex));
  for(int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
    taosArrayPush(pGroupbyExpr->columnInfo, &pColIndex[i]);
  }
5505

5506 5507 5508
  return pGroupbyExpr;
}

5509
static int32_t createFilterInfo(void *pQInfo, SQuery *pQuery) {
5510
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
5511
    if (pQuery->colList[i].numOfFilters > 0) {
5512 5513 5514 5515 5516 5517 5518 5519 5520 5521 5522
      pQuery->numOfFilterCols++;
    }
  }

  if (pQuery->numOfFilterCols == 0) {
    return TSDB_CODE_SUCCESS;
  }

  pQuery->pFilterInfo = calloc(1, sizeof(SSingleColumnFilterInfo) * pQuery->numOfFilterCols);

  for (int32_t i = 0, j = 0; i < pQuery->numOfCols; ++i) {
5523
    if (pQuery->colList[i].numOfFilters > 0) {
5524 5525
      SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[j];

B
Bomin Zhang 已提交
5526
      memcpy(&pFilterInfo->info, &pQuery->colList[i], sizeof(SColumnInfo));
5527
      pFilterInfo->info = pQuery->colList[i];
5528

5529
      pFilterInfo->numOfFilters = pQuery->colList[i].numOfFilters;
5530 5531 5532 5533
      pFilterInfo->pFilters = calloc(pFilterInfo->numOfFilters, sizeof(SColumnFilterElem));

      for (int32_t f = 0; f < pFilterInfo->numOfFilters; ++f) {
        SColumnFilterElem *pSingleColFilter = &pFilterInfo->pFilters[f];
5534
        pSingleColFilter->filterInfo = pQuery->colList[i].filters[f];
5535 5536 5537 5538 5539

        int32_t lower = pSingleColFilter->filterInfo.lowerRelOptr;
        int32_t upper = pSingleColFilter->filterInfo.upperRelOptr;

        if (lower == TSDB_RELATION_INVALID && upper == TSDB_RELATION_INVALID) {
S
slguan 已提交
5540
          qError("QInfo:%p invalid filter info", pQInfo);
5541
          return TSDB_CODE_QRY_INVALID_MSG;
5542 5543
        }

5544 5545
        int16_t type  = pQuery->colList[i].type;
        int16_t bytes = pQuery->colList[i].bytes;
5546

5547 5548 5549
        // todo refactor
        __filter_func_t *rangeFilterArray = getRangeFilterFuncArray(type);
        __filter_func_t *filterArray = getValueFilterFuncArray(type);
5550 5551

        if (rangeFilterArray == NULL && filterArray == NULL) {
S
slguan 已提交
5552
          qError("QInfo:%p failed to get filter function, invalid data type:%d", pQInfo, type);
5553
          return TSDB_CODE_QRY_INVALID_MSG;
5554 5555
        }

5556
        if ((lower == TSDB_RELATION_GREATER_EQUAL || lower == TSDB_RELATION_GREATER) &&
5557
            (upper == TSDB_RELATION_LESS_EQUAL || upper == TSDB_RELATION_LESS)) {
dengyihao's avatar
dengyihao 已提交
5558
          assert(rangeFilterArray != NULL);
5559
          if (lower == TSDB_RELATION_GREATER_EQUAL) {
5560 5561 5562 5563 5564 5565 5566 5567 5568 5569 5570 5571 5572
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[4];
            } else {
              pSingleColFilter->fp = rangeFilterArray[2];
            }
          } else {
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[3];
            } else {
              pSingleColFilter->fp = rangeFilterArray[1];
            }
          }
        } else {  // set callback filter function
dengyihao's avatar
dengyihao 已提交
5573
          assert(filterArray != NULL);
5574 5575 5576 5577
          if (lower != TSDB_RELATION_INVALID) {
            pSingleColFilter->fp = filterArray[lower];

            if (upper != TSDB_RELATION_INVALID) {
dengyihao's avatar
dengyihao 已提交
5578
              qError("pQInfo:%p failed to get filter function, invalid filter condition: %d", pQInfo, type);
5579
              return TSDB_CODE_QRY_INVALID_MSG;
5580 5581 5582 5583 5584 5585 5586 5587 5588 5589 5590 5591 5592 5593 5594 5595
            }
          } else {
            pSingleColFilter->fp = filterArray[upper];
          }
        }
        assert(pSingleColFilter->fp != NULL);
        pSingleColFilter->bytes = bytes;
      }

      j++;
    }
  }

  return TSDB_CODE_SUCCESS;
}

5596
static void doUpdateExprColumnIndex(SQuery *pQuery) {
5597
  assert(pQuery->pSelectExpr != NULL && pQuery != NULL);
5598

5599
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
5600
    SSqlFuncMsg *pSqlExprMsg = &pQuery->pSelectExpr[k].base;
5601
    if (pSqlExprMsg->functionId == TSDB_FUNC_ARITHM) {
5602 5603
      continue;
    }
5604

5605
    // todo opt performance
H
Haojun Liao 已提交
5606 5607
    SColIndex *pColIndex = &pSqlExprMsg->colInfo;
    if (!TSDB_COL_IS_TAG(pColIndex->flag)) {
5608 5609
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfCols; ++f) {
H
Haojun Liao 已提交
5610 5611
        if (pColIndex->colId == pQuery->colList[f].colId) {
          pColIndex->colIndex = f;
5612 5613 5614
          break;
        }
      }
5615 5616
      
      assert (f < pQuery->numOfCols);
5617
    } else {
5618 5619
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfTags; ++f) {
H
Haojun Liao 已提交
5620 5621
        if (pColIndex->colId == pQuery->tagColList[f].colId) {
          pColIndex->colIndex = f;
5622 5623
          break;
        }
5624
      }
5625 5626
      
      assert(f < pQuery->numOfTags || pColIndex->colId == TSDB_TBNAME_COLUMN_INDEX);
5627 5628 5629 5630
    }
  }
}

weixin_48148422's avatar
weixin_48148422 已提交
5631

5632
static int compareTableIdInfo(const void* a, const void* b) {
weixin_48148422's avatar
weixin_48148422 已提交
5633 5634 5635 5636 5637 5638 5639
  const STableIdInfo* x = (const STableIdInfo*)a;
  const STableIdInfo* y = (const STableIdInfo*)b;
  if (x->uid > y->uid) return 1;
  if (x->uid < y->uid) return -1;
  return 0;
}

dengyihao's avatar
dengyihao 已提交
5640 5641
static void freeQInfo(SQInfo *pQInfo);

weixin_48148422's avatar
weixin_48148422 已提交
5642
static SQInfo *createQInfoImpl(SQueryTableMsg *pQueryMsg, SArray* pTableIdList, SSqlGroupbyExpr *pGroupbyExpr, SExprInfo *pExprs,
5643
                               STableGroupInfo *pTableGroupInfo, SColumnInfo* pTagCols) {
B
Bomin Zhang 已提交
5644 5645 5646
  int16_t numOfCols = pQueryMsg->numOfCols;
  int16_t numOfOutput = pQueryMsg->numOfOutput;

5647 5648
  SQInfo *pQInfo = (SQInfo *)calloc(1, sizeof(SQInfo));
  if (pQInfo == NULL) {
B
Bomin Zhang 已提交
5649
    goto _cleanup_qinfo;
5650
  }
B
Bomin Zhang 已提交
5651 5652 5653
  // to make sure third party won't overwrite this structure
  pQInfo->signature = pQInfo;
  pQInfo->tableGroupInfo = *pTableGroupInfo;
5654 5655

  SQuery *pQuery = calloc(1, sizeof(SQuery));
B
Bomin Zhang 已提交
5656 5657 5658
  if (pQuery == NULL) {
    goto _cleanup_query;
  }
5659 5660
  pQInfo->runtimeEnv.pQuery = pQuery;

5661
  pQuery->numOfCols       = numOfCols;
H
hjxilinx 已提交
5662
  pQuery->numOfOutput     = numOfOutput;
5663 5664 5665
  pQuery->limit.limit     = pQueryMsg->limit;
  pQuery->limit.offset    = pQueryMsg->offset;
  pQuery->order.order     = pQueryMsg->order;
5666
  pQuery->order.orderColId = pQueryMsg->orderColId;
5667 5668 5669 5670
  pQuery->pSelectExpr     = pExprs;
  pQuery->pGroupbyExpr    = pGroupbyExpr;
  pQuery->intervalTime    = pQueryMsg->intervalTime;
  pQuery->slidingTime     = pQueryMsg->slidingTime;
5671
  pQuery->slidingTimeUnit = pQueryMsg->slidingTimeUnit;
5672
  pQuery->fillType        = pQueryMsg->fillType;
5673
  pQuery->numOfTags       = pQueryMsg->numOfTags;
B
Bomin Zhang 已提交
5674
  pQuery->tagColList      = pTagCols;
5675
  
5676
  // todo do not allocate ??
5677
  pQuery->colList = calloc(numOfCols, sizeof(SSingleColumnFilterInfo));
5678
  if (pQuery->colList == NULL) {
5679
    goto _cleanup;
5680
  }
5681

H
hjxilinx 已提交
5682
  for (int16_t i = 0; i < numOfCols; ++i) {
5683
    pQuery->colList[i] = pQueryMsg->colList[i];
5684
    pQuery->colList[i].filters = tscFilterInfoClone(pQueryMsg->colList[i].filters, pQuery->colList[i].numOfFilters);
H
hjxilinx 已提交
5685
  }
5686

5687
  // calculate the result row size
5688 5689 5690
  for (int16_t col = 0; col < numOfOutput; ++col) {
    assert(pExprs[col].bytes > 0);
    pQuery->rowSize += pExprs[col].bytes;
5691
  }
5692

5693
  doUpdateExprColumnIndex(pQuery);
5694

5695
  int32_t ret = createFilterInfo(pQInfo, pQuery);
5696
  if (ret != TSDB_CODE_SUCCESS) {
5697
    goto _cleanup;
5698 5699 5700
  }

  // prepare the result buffer
5701
  pQuery->sdata = (tFilePage **)calloc(pQuery->numOfOutput, POINTER_BYTES);
5702
  if (pQuery->sdata == NULL) {
5703
    goto _cleanup;
5704 5705
  }

H
hjxilinx 已提交
5706
  // set the output buffer capacity
H
hjxilinx 已提交
5707
  pQuery->rec.capacity = 4096;
5708
  pQuery->rec.threshold = 4000;
5709

5710
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
5711
    assert(pExprs[col].interBytes >= pExprs[col].bytes);
5712 5713

    // allocate additional memory for interResults that are usually larger then final results
5714 5715
    size_t size = (pQuery->rec.capacity + 1) * pExprs[col].bytes + pExprs[col].interBytes + sizeof(tFilePage);
    pQuery->sdata[col] = (tFilePage *)calloc(1, size);
5716
    if (pQuery->sdata[col] == NULL) {
5717
      goto _cleanup;
5718 5719 5720
    }
  }

5721
  if (pQuery->fillType != TSDB_FILL_NONE) {
5722 5723
    pQuery->fillVal = malloc(sizeof(int64_t) * pQuery->numOfOutput);
    if (pQuery->fillVal == NULL) {
5724
      goto _cleanup;
5725 5726 5727
    }

    // the first column is the timestamp
5728
    memcpy(pQuery->fillVal, (char *)pQueryMsg->fillVal, pQuery->numOfOutput * sizeof(int64_t));
5729 5730
  }

dengyihao's avatar
dengyihao 已提交
5731 5732 5733 5734 5735 5736
  size_t numOfGroups = 0;
  if (pTableGroupInfo->pGroupList != NULL) {
    numOfGroups = taosArrayGetSize(pTableGroupInfo->pGroupList);

    pQInfo->tableqinfoGroupInfo.pGroupList = taosArrayInit(numOfGroups, POINTER_BYTES);
    pQInfo->tableqinfoGroupInfo.numOfTables = pTableGroupInfo->numOfTables;
H
Haojun Liao 已提交
5737 5738 5739
    pQInfo->tableqinfoGroupInfo.map = taosHashInit(pTableGroupInfo->numOfTables,
                                                   taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false);
  }
5740

weixin_48148422's avatar
weixin_48148422 已提交
5741 5742
  int tableIndex = 0;
  STimeWindow window = pQueryMsg->window;
5743
  taosArraySort(pTableIdList, compareTableIdInfo);
5744

H
Haojun Liao 已提交
5745
  pQInfo->runtimeEnv.interBufSize = getOutputInterResultBufSize(pQuery);
H
Haojun Liao 已提交
5746 5747 5748
  pQInfo->pBuf = calloc(pTableGroupInfo->numOfTables, sizeof(STableQueryInfo));
  int32_t index = 0;

H
hjxilinx 已提交
5749
  for(int32_t i = 0; i < numOfGroups; ++i) {
5750
    SArray* pa = taosArrayGetP(pTableGroupInfo->pGroupList, i);
5751

H
Haojun Liao 已提交
5752
    size_t s = taosArrayGetSize(pa);
5753
    SArray* p1 = taosArrayInit(s, POINTER_BYTES);
B
Bomin Zhang 已提交
5754 5755 5756
    if (p1 == NULL) {
      goto _cleanup;
    }
5757

H
hjxilinx 已提交
5758
    for(int32_t j = 0; j < s; ++j) {
5759
      void* pTable = taosArrayGetP(pa, j);
H
Haojun Liao 已提交
5760
      STableId* id = TSDB_TABLEID(pTable);
5761

H
Haojun Liao 已提交
5762
      STableIdInfo* pTableId = taosArraySearch(pTableIdList, id, compareTableIdInfo);
weixin_48148422's avatar
weixin_48148422 已提交
5763 5764 5765
      if (pTableId != NULL ) {
        window.skey = pTableId->key;
      } else {
B
Bomin Zhang 已提交
5766
        window.skey = pQueryMsg->window.skey;
weixin_48148422's avatar
weixin_48148422 已提交
5767
      }
5768

H
Haojun Liao 已提交
5769 5770
      void* buf = pQInfo->pBuf + index * sizeof(STableQueryInfo);
      STableQueryInfo* item = createTableQueryInfo(&pQInfo->runtimeEnv, pTable, window, buf);
B
Bomin Zhang 已提交
5771 5772 5773
      if (item == NULL) {
        goto _cleanup;
      }
5774
      item->groupIndex = i;
H
hjxilinx 已提交
5775
      taosArrayPush(p1, &item);
H
Haojun Liao 已提交
5776 5777
      taosHashPut(pQInfo->tableqinfoGroupInfo.map, &id->tid, sizeof(id->tid), &item, POINTER_BYTES);
      index += 1;
H
hjxilinx 已提交
5778
    }
5779

5780
    taosArrayPush(pQInfo->tableqinfoGroupInfo.pGroupList, &p1);
H
hjxilinx 已提交
5781
  }
5782

weixin_48148422's avatar
weixin_48148422 已提交
5783 5784
  pQInfo->arrTableIdInfo = taosArrayInit(tableIndex, sizeof(STableIdInfo));

5785
  pQuery->pos = -1;
5786
  pQuery->window = pQueryMsg->window;
5787

5788
  if (sem_init(&pQInfo->dataReady, 0, 0) != 0) {
5789 5790
    int32_t code = TAOS_SYSTEM_ERROR(errno);
    qError("QInfo:%p init dataReady sem failed, reason:%s", pQInfo, tstrerror(code));
5791
    goto _cleanup;
5792
  }
5793

5794
  colIdCheck(pQuery);
5795

5796
  qDebug("qmsg:%p QInfo:%p created", pQueryMsg, pQInfo);
5797 5798
  return pQInfo;

B
Bomin Zhang 已提交
5799 5800 5801 5802 5803 5804 5805 5806 5807 5808 5809 5810 5811 5812 5813
_cleanup_qinfo:
  tsdbDestoryTableGroup(pTableGroupInfo);

_cleanup_query:
  taosArrayDestroy(pGroupbyExpr->columnInfo);
  tfree(pGroupbyExpr);
  tfree(pTagCols);
  for (int32_t i = 0; i < numOfOutput; ++i) {
    SExprInfo* pExprInfo = &pExprs[i];
    if (pExprInfo->pExpr != NULL) {
      tExprTreeDestroy(&pExprInfo->pExpr, NULL);
    }
  }
  tfree(pExprs);

5814
_cleanup:
dengyihao's avatar
dengyihao 已提交
5815
  freeQInfo(pQInfo);
5816 5817 5818
  return NULL;
}

H
hjxilinx 已提交
5819
static bool isValidQInfo(void *param) {
H
hjxilinx 已提交
5820 5821 5822 5823
  SQInfo *pQInfo = (SQInfo *)param;
  if (pQInfo == NULL) {
    return false;
  }
5824

H
hjxilinx 已提交
5825 5826 5827 5828
  /*
   * pQInfo->signature may be changed by another thread, so we assign value of signature
   * into local variable, then compare by using local variable
   */
5829
  uint64_t sig = (uint64_t)pQInfo->signature;
H
hjxilinx 已提交
5830 5831 5832
  return (sig == (uint64_t)pQInfo);
}

H
Haojun Liao 已提交
5833
static int32_t initQInfo(SQueryTableMsg *pQueryMsg, void *tsdb, int32_t vgId, SQInfo *pQInfo, bool isSTable, void* param) {
H
hjxilinx 已提交
5834
  int32_t code = TSDB_CODE_SUCCESS;
5835
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
5836

H
hjxilinx 已提交
5837 5838
  STSBuf *pTSBuf = NULL;
  if (pQueryMsg->tsLen > 0) {  // open new file to save the result
H
Haojun Liao 已提交
5839
    char *tsBlock = (char *) pQueryMsg + pQueryMsg->tsOffset;
H
hjxilinx 已提交
5840
    pTSBuf = tsBufCreateFromCompBlocks(tsBlock, pQueryMsg->tsNumOfBlocks, pQueryMsg->tsLen, pQueryMsg->tsOrder);
5841

H
hjxilinx 已提交
5842
    tsBufResetPos(pTSBuf);
dengyihao's avatar
dengyihao 已提交
5843 5844
    bool ret = tsBufNextPos(pTSBuf);
    UNUSED(ret);
H
hjxilinx 已提交
5845
  }
5846

5847 5848
  if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey > pQuery->window.ekey)) ||
      (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey > pQuery->window.skey))) {
5849
    qDebug("QInfo:%p no result in time range %" PRId64 "-%" PRId64 ", order %d", pQInfo, pQuery->window.skey,
5850
           pQuery->window.ekey, pQuery->order.order);
5851
    setQueryStatus(pQuery, QUERY_COMPLETED);
B
Bomin Zhang 已提交
5852
    pQInfo->tableqinfoGroupInfo.numOfTables = 0;
5853 5854 5855
    sem_post(&pQInfo->dataReady);
    return TSDB_CODE_SUCCESS;
  }
5856

5857 5858
  pQInfo->param = param;

5859
  if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
5860
    qDebug("QInfo:%p no table qualified for tag filter, abort query", pQInfo);
5861 5862 5863 5864 5865
    setQueryStatus(pQuery, QUERY_COMPLETED);
  
    sem_post(&pQInfo->dataReady);
    return TSDB_CODE_SUCCESS;
  }
H
hjxilinx 已提交
5866 5867

  // filter the qualified
5868
  if ((code = doInitQInfo(pQInfo, pTSBuf, tsdb, vgId, isSTable)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
5869 5870
    goto _error;
  }
H
hjxilinx 已提交
5871
  
H
hjxilinx 已提交
5872 5873 5874 5875
  return code;

_error:
  // table query ref will be decrease during error handling
5876
  freeQInfo(pQInfo);
H
hjxilinx 已提交
5877 5878 5879
  return code;
}

B
Bomin Zhang 已提交
5880 5881 5882 5883 5884 5885 5886 5887 5888 5889 5890 5891
static void freeColumnFilterInfo(SColumnFilterInfo* pFilter, int32_t numOfFilters) {
    if (pFilter == NULL) {
      return;
    }
    for (int32_t i = 0; i < numOfFilters; i++) {
      if (pFilter[i].filterstr) {
        free((void*)(pFilter[i].pz));
      }
    }
    free(pFilter);
}

H
hjxilinx 已提交
5892 5893 5894 5895
static void freeQInfo(SQInfo *pQInfo) {
  if (!isValidQInfo(pQInfo)) {
    return;
  }
5896 5897

  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
hjxilinx 已提交
5898
  setQueryKilled(pQInfo);
5899

5900
  qDebug("QInfo:%p start to free QInfo", pQInfo);
5901
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
H
hjxilinx 已提交
5902 5903
    tfree(pQuery->sdata[col]);
  }
5904

H
hjxilinx 已提交
5905
  sem_destroy(&(pQInfo->dataReady));
5906
  teardownQueryRuntimeEnv(&pQInfo->runtimeEnv);
5907

H
hjxilinx 已提交
5908 5909 5910 5911 5912 5913
  for (int32_t i = 0; i < pQuery->numOfFilterCols; ++i) {
    SSingleColumnFilterInfo *pColFilter = &pQuery->pFilterInfo[i];
    if (pColFilter->numOfFilters > 0) {
      tfree(pColFilter->pFilters);
    }
  }
5914

H
hjxilinx 已提交
5915
  if (pQuery->pSelectExpr != NULL) {
5916
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
hjxilinx 已提交
5917
      SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];
5918

H
hjxilinx 已提交
5919 5920 5921
      if (pExprInfo->pExpr != NULL) {
        tExprTreeDestroy(&pExprInfo->pExpr, NULL);
      }
H
hjxilinx 已提交
5922
    }
5923

H
hjxilinx 已提交
5924 5925
    tfree(pQuery->pSelectExpr);
  }
5926

5927 5928
  if (pQuery->fillVal != NULL) {
    tfree(pQuery->fillVal);
H
hjxilinx 已提交
5929
  }
5930

5931
  // todo refactor, extract method to destroytableDataInfo
B
Bomin Zhang 已提交
5932 5933 5934 5935 5936 5937 5938 5939
  if (pQInfo->tableqinfoGroupInfo.pGroupList != NULL) {
    int32_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
    for (int32_t i = 0; i < numOfGroups; ++i) {
      SArray *p = GET_TABLEGROUP(pQInfo, i);

      size_t num = taosArrayGetSize(p);
      for(int32_t j = 0; j < num; ++j) {
        STableQueryInfo* item = taosArrayGetP(p, j);
H
Haojun Liao 已提交
5940
        destroyTableQueryInfo(item);
5941
      }
5942

B
Bomin Zhang 已提交
5943 5944
      taosArrayDestroy(p);
    }
H
hjxilinx 已提交
5945
  }
5946

H
Haojun Liao 已提交
5947
  tfree(pQInfo->pBuf);
5948
  taosArrayDestroy(pQInfo->tableqinfoGroupInfo.pGroupList);
H
Haojun Liao 已提交
5949
  taosHashCleanup(pQInfo->tableqinfoGroupInfo.map);
5950
  tsdbDestoryTableGroup(&pQInfo->tableGroupInfo);
weixin_48148422's avatar
weixin_48148422 已提交
5951
  taosArrayDestroy(pQInfo->arrTableIdInfo);
H
hjxilinx 已提交
5952
  
5953 5954 5955 5956
  if (pQuery->pGroupbyExpr != NULL) {
    taosArrayDestroy(pQuery->pGroupbyExpr->columnInfo);
    tfree(pQuery->pGroupbyExpr);
  }
5957

5958 5959
  tfree(pQuery->tagColList);
  tfree(pQuery->pFilterInfo);
B
Bomin Zhang 已提交
5960 5961 5962 5963 5964 5965 5966 5967 5968

  if (pQuery->colList != NULL) {
    for (int32_t i = 0; i < pQuery->numOfCols; i++) {
      SColumnInfo* column = pQuery->colList + i;
      freeColumnFilterInfo(column->filters, column->numOfFilters);
    }
    tfree(pQuery->colList);
  }

5969
  tfree(pQuery->sdata);
5970

5971
  tfree(pQuery);
5972

5973
  qDebug("QInfo:%p QInfo is freed", pQInfo);
5974

5975
  // destroy signature, in order to avoid the query process pass the object safety check
H
hjxilinx 已提交
5976 5977 5978 5979
  memset(pQInfo, 0, sizeof(SQInfo));
  tfree(pQInfo);
}

H
hjxilinx 已提交
5980
static size_t getResultSize(SQInfo *pQInfo, int64_t *numOfRows) {
5981 5982
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
hjxilinx 已提交
5983 5984 5985 5986 5987 5988 5989 5990 5991 5992 5993
  /*
   * get the file size and set the numOfRows to be the file size, since for tsComp query,
   * the returned row size is equalled to 1
   * TODO handle the case that the file is too large to send back one time
   */
  if (isTSCompQuery(pQuery) && (*numOfRows) > 0) {
    struct stat fstat;
    if (stat(pQuery->sdata[0]->data, &fstat) == 0) {
      *numOfRows = fstat.st_size;
      return fstat.st_size;
    } else {
S
slguan 已提交
5994
      qError("QInfo:%p failed to get file info, path:%s, reason:%s", pQInfo, pQuery->sdata[0]->data, strerror(errno));
H
hjxilinx 已提交
5995 5996 5997 5998
      return 0;
    }
  } else {
    return pQuery->rowSize * (*numOfRows);
5999
  }
H
hjxilinx 已提交
6000
}
6001

H
hjxilinx 已提交
6002 6003 6004
static int32_t doDumpQueryResult(SQInfo *pQInfo, char *data) {
  // the remained number of retrieved rows, not the interpolated result
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
6005

H
hjxilinx 已提交
6006 6007 6008
  // load data from file to msg buffer
  if (isTSCompQuery(pQuery)) {
    int32_t fd = open(pQuery->sdata[0]->data, O_RDONLY, 0666);
6009

H
hjxilinx 已提交
6010 6011
    // make sure file exist
    if (FD_VALID(fd)) {
dengyihao's avatar
dengyihao 已提交
6012 6013
      int32_t s = lseek(fd, 0, SEEK_END);
      UNUSED(s);
6014
      qDebug("QInfo:%p ts comp data return, file:%s, size:%d", pQInfo, pQuery->sdata[0]->data, s);
H
Haojun Liao 已提交
6015
      if (lseek(fd, 0, SEEK_SET) >= 0) {
dengyihao's avatar
dengyihao 已提交
6016 6017
        size_t sz = read(fd, data, s);
        UNUSED(sz);
H
Haojun Liao 已提交
6018 6019
      } else {
        // todo handle error
dengyihao's avatar
dengyihao 已提交
6020
      }
H
Haojun Liao 已提交
6021

H
hjxilinx 已提交
6022 6023 6024
      close(fd);
      unlink(pQuery->sdata[0]->data);
    } else {
dengyihao's avatar
dengyihao 已提交
6025
      // todo return the error code to client and handle invalid fd
S
slguan 已提交
6026
      qError("QInfo:%p failed to open tmp file to send ts-comp data to client, path:%s, reason:%s", pQInfo,
H
hjxilinx 已提交
6027
             pQuery->sdata[0]->data, strerror(errno));
dengyihao's avatar
dengyihao 已提交
6028 6029 6030
      if (fd != -1) {
        close(fd); 
      }
H
hjxilinx 已提交
6031
    }
6032

H
hjxilinx 已提交
6033 6034 6035 6036
    // all data returned, set query over
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
      setQueryStatus(pQuery, QUERY_OVER);
    }
H
hjxilinx 已提交
6037
  } else {
6038
    doCopyQueryResultToMsg(pQInfo, pQuery->rec.rows, data);
6039
  }
6040

6041
  pQuery->rec.total += pQuery->rec.rows;
6042
  qDebug("QInfo:%p current numOfRes rows:%" PRId64 ", total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total);
6043

6044
  if (pQuery->limit.limit > 0 && pQuery->limit.limit == pQuery->rec.total) {
6045
    qDebug("QInfo:%p results limitation reached, limitation:%"PRId64, pQInfo, pQuery->limit.limit);
6046 6047 6048
    setQueryStatus(pQuery, QUERY_OVER);
  }
  
H
hjxilinx 已提交
6049
  return TSDB_CODE_SUCCESS;
6050 6051
}

6052 6053 6054 6055 6056 6057 6058
typedef struct SQueryMgmt {
  SCacheObj      *qinfoPool;      // query handle pool
  int32_t         vgId;
  bool            closed;
  pthread_mutex_t lock;
} SQueryMgmt;

H
Haojun Liao 已提交
6059
int32_t qCreateQueryInfo(void* tsdb, int32_t vgId, SQueryTableMsg* pQueryMsg, void* param, qinfo_t* pQInfo) {
6060
  assert(pQueryMsg != NULL && tsdb != NULL);
6061 6062

  int32_t code = TSDB_CODE_SUCCESS;
6063

weixin_48148422's avatar
weixin_48148422 已提交
6064
  char *        tagCond = NULL, *tbnameCond = NULL;
6065
  SArray *      pTableIdList = NULL;
6066
  SSqlFuncMsg **pExprMsg = NULL;
6067 6068
  SColIndex *   pGroupColIndex = NULL;
  SColumnInfo*  pTagColumnInfo = NULL;
dengyihao's avatar
dengyihao 已提交
6069 6070
  SExprInfo     *pExprs = NULL;
  SSqlGroupbyExpr *pGroupbyExpr = NULL;
6071

weixin_48148422's avatar
weixin_48148422 已提交
6072
  if ((code = convertQueryMsg(pQueryMsg, &pTableIdList, &pExprMsg, &tagCond, &tbnameCond, &pGroupColIndex, &pTagColumnInfo)) !=
6073
         TSDB_CODE_SUCCESS) {
B
Bomin Zhang 已提交
6074
    goto _over;
6075 6076
  }

H
hjxilinx 已提交
6077
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
6078
    qError("Invalid number of tables to query, numOfTables:%d", pQueryMsg->numOfTables);
6079
    code = TSDB_CODE_QRY_INVALID_MSG;
H
hjxilinx 已提交
6080
    goto _over;
6081 6082
  }

H
hjxilinx 已提交
6083
  if (pTableIdList == NULL || taosArrayGetSize(pTableIdList) == 0) {
S
slguan 已提交
6084
    qError("qmsg:%p, SQueryTableMsg wrong format", pQueryMsg);
6085
    code = TSDB_CODE_QRY_INVALID_MSG;
H
hjxilinx 已提交
6086
    goto _over;
6087 6088
  }

H
Haojun Liao 已提交
6089
  if ((code = createQFunctionExprFromMsg(pQueryMsg, &pExprs, pExprMsg, pTagColumnInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6090
    goto _over;
6091 6092
  }

dengyihao's avatar
dengyihao 已提交
6093
  pGroupbyExpr = createGroupbyExprFromMsg(pQueryMsg, pGroupColIndex, &code);
H
hjxilinx 已提交
6094
  if ((pGroupbyExpr == NULL && pQueryMsg->numOfGroupCols != 0) || code != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6095
    goto _over;
6096
  }
6097

H
hjxilinx 已提交
6098
  bool isSTableQuery = false;
6099
  STableGroupInfo tableGroupInfo = {0};
6100
  
H
Haojun Liao 已提交
6101
  if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_TABLE_QUERY)) {
weixin_48148422's avatar
weixin_48148422 已提交
6102
    STableIdInfo *id = taosArrayGet(pTableIdList, 0);
H
Haojun Liao 已提交
6103

6104
    qDebug("qmsg:%p query normal table, uid:%"PRId64", tid:%d", pQueryMsg, id->uid, id->tid);
6105
    if ((code = tsdbGetOneTableGroup(tsdb, id->uid, &tableGroupInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6106
      goto _over;
6107
    }
H
Haojun Liao 已提交
6108
  } else if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY|TSDB_QUERY_TYPE_STABLE_QUERY)) {
6109
    isSTableQuery = true;
H
Haojun Liao 已提交
6110 6111 6112 6113
    // TODO: need a macro from TSDB to check if table is super table

    // also note there's possibility that only one table in the super table
    if (!TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY)) {
weixin_48148422's avatar
weixin_48148422 已提交
6114 6115 6116 6117 6118 6119 6120 6121
      STableIdInfo *id = taosArrayGet(pTableIdList, 0);

      // group by normal column, do not pass the group by condition to tsdb to group table into different group
      int32_t numOfGroupByCols = pQueryMsg->numOfGroupCols;
      if (pQueryMsg->numOfGroupCols == 1 && !TSDB_COL_IS_TAG(pGroupColIndex->flag)) {
        numOfGroupByCols = 0;
      }
      
6122
      code = tsdbQuerySTableByTagCond(tsdb, id->uid, tagCond, pQueryMsg->tagCondLen, pQueryMsg->tagNameRelType, tbnameCond, &tableGroupInfo, pGroupColIndex,
weixin_48148422's avatar
weixin_48148422 已提交
6123
                                          numOfGroupByCols);
6124
      if (code != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
6125
        qError("qmsg:%p failed to QueryStable, reason: %s", pQueryMsg, tstrerror(code));
6126 6127
        goto _over;
      }
weixin_48148422's avatar
weixin_48148422 已提交
6128
    } else {
6129 6130 6131 6132
      code = tsdbGetTableGroupFromIdList(tsdb, pTableIdList, &tableGroupInfo);
      if (code != TSDB_CODE_SUCCESS) {
        goto _over;
      }
H
Haojun Liao 已提交
6133

6134
      qDebug("qmsg:%p query on %zu tables in one group from client", pQueryMsg, tableGroupInfo.numOfTables);
6135
    }
H
hjxilinx 已提交
6136
  } else {
6137
    assert(0);
6138
  }
6139

6140
  (*pQInfo) = createQInfoImpl(pQueryMsg, pTableIdList, pGroupbyExpr, pExprs, &tableGroupInfo, pTagColumnInfo);
dengyihao's avatar
dengyihao 已提交
6141 6142 6143 6144
  pExprs = NULL;
  pGroupbyExpr = NULL;
  pTagColumnInfo = NULL;
  
6145
  if ((*pQInfo) == NULL) {
6146
    code = TSDB_CODE_QRY_OUT_OF_MEMORY;
H
hjxilinx 已提交
6147
    goto _over;
6148
  }
6149

H
Haojun Liao 已提交
6150
  code = initQInfo(pQueryMsg, tsdb, vgId, *pQInfo, isSTableQuery, param);
6151

H
hjxilinx 已提交
6152
_over:
dengyihao's avatar
dengyihao 已提交
6153 6154 6155
  free(tagCond);
  free(tbnameCond);
  free(pGroupColIndex);
dengyihao's avatar
dengyihao 已提交
6156 6157
  if (pGroupbyExpr != NULL) {
    taosArrayDestroy(pGroupbyExpr->columnInfo);
dengyihao's avatar
dengyihao 已提交
6158
    free(pGroupbyExpr);
dengyihao's avatar
dengyihao 已提交
6159
  } 
dengyihao's avatar
dengyihao 已提交
6160 6161
  free(pTagColumnInfo);
  free(pExprs);
dengyihao's avatar
dengyihao 已提交
6162
  free(pExprMsg);
H
hjxilinx 已提交
6163
  taosArrayDestroy(pTableIdList);
6164

B
Bomin Zhang 已提交
6165 6166 6167 6168 6169
  for (int32_t i = 0; i < pQueryMsg->numOfCols; i++) {
    SColumnInfo* column = pQueryMsg->colList + i;
    freeColumnFilterInfo(column->filters, column->numOfFilters);
  }

H
Haojun Liao 已提交
6170
  //pQInfo already freed in initQInfo, but *pQInfo may not pointer to null;
6171 6172 6173 6174
  if (code != TSDB_CODE_SUCCESS) {
    *pQInfo = NULL;
  }

6175
  // if failed to add ref for all meters in this query, abort current query
6176
  return code;
H
hjxilinx 已提交
6177 6178
}

H
Haojun Liao 已提交
6179
void qDestroyQueryInfo(qinfo_t qHandle) {
H
Haojun Liao 已提交
6180 6181 6182 6183 6184
  SQInfo* pQInfo = (SQInfo*) qHandle;
  if (!isValidQInfo(pQInfo)) {
    return;
  }

H
Haojun Liao 已提交
6185 6186 6187
  qDebug("QInfo:%p query completed", pQInfo);
  queryCostStatis(pQInfo);   // print the query cost summary
  freeQInfo(pQInfo);
H
Haojun Liao 已提交
6188 6189
}

6190
void qTableQuery(qinfo_t qinfo) {
6191 6192
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
6193
  if (pQInfo == NULL || pQInfo->signature != pQInfo) {
6194
    qDebug("QInfo:%p has been freed, no need to execute", pQInfo);
H
hjxilinx 已提交
6195 6196
    return;
  }
6197

H
Haojun Liao 已提交
6198
  if (IS_QUERY_KILLED(pQInfo)) {
6199
    qDebug("QInfo:%p it is already killed, abort", pQInfo);
6200
    sem_post(&pQInfo->dataReady);
H
hjxilinx 已提交
6201 6202
    return;
  }
6203

6204 6205
  if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
    qDebug("QInfo:%p no table exists for query, abort", pQInfo);
6206 6207 6208 6209 6210
    sem_post(&pQInfo->dataReady);
    return;
  }

  // error occurs, record the error code and return to client
H
Haojun Liao 已提交
6211
  int32_t ret = setjmp(pQInfo->runtimeEnv.env);
6212 6213
  if (ret != TSDB_CODE_SUCCESS) {
    pQInfo->code = ret;
H
Haojun Liao 已提交
6214
    qDebug("QInfo:%p query abort due to error/cancel occurs, code:%s", pQInfo, tstrerror(pQInfo->code));
6215
    sem_post(&pQInfo->dataReady);
6216 6217 6218
    return;
  }

6219
  qDebug("QInfo:%p query task is launched", pQInfo);
6220

6221
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
6222
  if (onlyQueryTags(pQInfo->runtimeEnv.pQuery)) {
H
Haojun Liao 已提交
6223
    assert(pQInfo->runtimeEnv.pQueryHandle == NULL);
H
hjxilinx 已提交
6224
    buildTagQueryResult(pQInfo);   // todo support the limit/offset
H
hjxilinx 已提交
6225
  } else if (pQInfo->runtimeEnv.stableQuery) {
6226
    stableQueryImpl(pQInfo);
H
hjxilinx 已提交
6227
  } else {
6228
    tableQueryImpl(pQInfo);
H
hjxilinx 已提交
6229
  }
6230

6231
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
6232
  if (IS_QUERY_KILLED(pQInfo)) {
6233 6234 6235 6236 6237 6238 6239 6240
    qDebug("QInfo:%p query is killed", pQInfo);
  } else if (pQuery->rec.rows == 0) {
    qDebug("QInfo:%p over, %zu tables queried, %"PRId64" rows are returned", pQInfo, pQInfo->tableqinfoGroupInfo.numOfTables, pQuery->rec.total);
  } else {
    qDebug("QInfo:%p query paused, %" PRId64 " rows returned, numOfTotal:%" PRId64 " rows",
           pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
  }

H
hjxilinx 已提交
6241
  sem_post(&pQInfo->dataReady);
H
hjxilinx 已提交
6242 6243
}

H
hjxilinx 已提交
6244
int32_t qRetrieveQueryResultInfo(qinfo_t qinfo) {
6245 6246
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
6247
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
6248
    return TSDB_CODE_QRY_INVALID_QHANDLE;
H
hjxilinx 已提交
6249
  }
6250

H
hjxilinx 已提交
6251
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
6252
  if (IS_QUERY_KILLED(pQInfo)) {
6253
    qDebug("QInfo:%p query is killed, code:%d", pQInfo, pQInfo->code);
H
hjxilinx 已提交
6254
    return pQInfo->code;
H
hjxilinx 已提交
6255
  }
6256

H
hjxilinx 已提交
6257
  sem_wait(&pQInfo->dataReady);
6258
  qDebug("QInfo:%p retrieve result info, rowsize:%d, rows:%"PRId64", code:%d", pQInfo, pQuery->rowSize, pQuery->rec.rows,
6259 6260
         pQInfo->code);

H
hjxilinx 已提交
6261
  return pQInfo->code;
H
hjxilinx 已提交
6262
}
6263

H
hjxilinx 已提交
6264
bool qHasMoreResultsToRetrieve(qinfo_t qinfo) {
6265 6266
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
Haojun Liao 已提交
6267
  if (!isValidQInfo(pQInfo) || pQInfo->code != TSDB_CODE_SUCCESS) {
6268
    qDebug("QInfo:%p invalid qhandle or error occurs, abort query, code:%x", pQInfo, pQInfo->code);
H
hjxilinx 已提交
6269 6270
    return false;
  }
6271 6272

  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
6273
  bool ret = false;
H
hjxilinx 已提交
6274
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_OVER)) {
H
Haojun Liao 已提交
6275
    ret = false;
H
hjxilinx 已提交
6276
  } else if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
H
Haojun Liao 已提交
6277
    ret = true;
H
hjxilinx 已提交
6278
  } else if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
H
Haojun Liao 已提交
6279
    ret = true;
H
hjxilinx 已提交
6280 6281
  } else {
    assert(0);
6282
  }
H
Haojun Liao 已提交
6283 6284

  if (ret) {
6285
    qDebug("QInfo:%p has more results waits for client retrieve", pQInfo);
H
Haojun Liao 已提交
6286 6287 6288
  }

  return ret;
6289 6290
}

6291 6292 6293
int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *contLen) {
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
6294
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
6295
    return TSDB_CODE_QRY_INVALID_QHANDLE;
6296
  }
6297

6298
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
6299 6300
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  size_t  size = getResultSize(pQInfo, &pQuery->rec.rows);
weixin_48148422's avatar
weixin_48148422 已提交
6301 6302
  size += sizeof(int32_t);
  size += sizeof(STableIdInfo) * taosArrayGetSize(pQInfo->arrTableIdInfo);
6303
  *contLen = size + sizeof(SRetrieveTableRsp);
6304

6305 6306
  // todo handle failed to allocate memory
  *pRsp = (SRetrieveTableRsp *)rpcMallocCont(*contLen);
6307
  (*pRsp)->numOfRows = htonl(pQuery->rec.rows);
6308

6309 6310 6311
  int32_t code = pQInfo->code;
  if (code == TSDB_CODE_SUCCESS) {
    (*pRsp)->offset = htobe64(pQuery->limit.offset);
6312
    (*pRsp)->useconds = htobe64(pRuntimeEnv->summary.elapsedTime);
6313 6314 6315 6316
  } else {
    (*pRsp)->offset = 0;
    (*pRsp)->useconds = 0;
  }
6317 6318
  
  (*pRsp)->precision = htons(pQuery->precision);
6319
  if (pQuery->rec.rows > 0 && code == TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6320
    code = doDumpQueryResult(pQInfo, (*pRsp)->data);
6321
  } else {
H
hjxilinx 已提交
6322
    setQueryStatus(pQuery, QUERY_OVER);
6323
    code = pQInfo->code;
6324
  }
6325

H
Haojun Liao 已提交
6326
  if (IS_QUERY_KILLED(pQInfo) || Q_STATUS_EQUAL(pQuery->status, QUERY_OVER)) {
6327
    (*pRsp)->completed = 1;  // notify no more result to client
H
hjxilinx 已提交
6328
  }
6329

H
hjxilinx 已提交
6330
  return code;
6331
}
H
hjxilinx 已提交
6332

H
Haojun Liao 已提交
6333
int32_t qKillQuery(qinfo_t qinfo) {
H
Haojun Liao 已提交
6334 6335 6336 6337 6338 6339
  SQInfo *pQInfo = (SQInfo *)qinfo;

  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
    return TSDB_CODE_QRY_INVALID_QHANDLE;
  }

6340
  sem_post(&pQInfo->dataReady);
H
Haojun Liao 已提交
6341 6342 6343 6344
  setQueryKilled(pQInfo);
  return TSDB_CODE_SUCCESS;
}

H
hjxilinx 已提交
6345 6346 6347
static void buildTagQueryResult(SQInfo* pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
6348

H
Haojun Liao 已提交
6349
  size_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
H
Haojun Liao 已提交
6350
  assert(numOfGroup == 0 || numOfGroup == 1);
6351

H
Haojun Liao 已提交
6352
  if (numOfGroup == 0) {
6353 6354
    return;
  }
H
hjxilinx 已提交
6355
  
H
Haojun Liao 已提交
6356
  SArray* pa = GET_TABLEGROUP(pQInfo, 0);
6357

H
Haojun Liao 已提交
6358
  size_t num = taosArrayGetSize(pa);
6359
  assert(num == pQInfo->tableqinfoGroupInfo.numOfTables);
6360

H
Haojun Liao 已提交
6361
  int32_t count = 0;
6362 6363 6364
  int32_t functionId = pQuery->pSelectExpr[0].base.functionId;
  if (functionId == TSDB_FUNC_TID_TAG) { // return the tags & table Id
    assert(pQuery->numOfOutput == 1);
6365

6366 6367
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[0];
    int32_t rsize = pExprInfo->bytes;
H
Haojun Liao 已提交
6368
    count = 0;
6369

H
Haojun Liao 已提交
6370 6371 6372 6373 6374 6375 6376 6377 6378 6379 6380
    int16_t bytes = pExprInfo->bytes;
    int16_t type = pExprInfo->type;

    for(int32_t i = 0; i < pQuery->numOfTags; ++i) {
      if (pQuery->tagColList[i].colId == pExprInfo->base.colInfo.colId) {
        bytes = pQuery->tagColList[i].bytes;
        type = pQuery->tagColList[i].type;
        break;
      }
    }

H
Haojun Liao 已提交
6381 6382
    while(pQInfo->tableIndex < num && count < pQuery->rec.capacity) {
      int32_t i = pQInfo->tableIndex++;
6383
      STableQueryInfo *item = taosArrayGetP(pa, i);
6384

6385
      char *output = pQuery->sdata[0]->data + i * rsize;
6386
      varDataSetLen(output, rsize - VARSTR_HEADER_SIZE);
6387

6388
      output = varDataVal(output);
H
Haojun Liao 已提交
6389
      STableId* id = TSDB_TABLEID(item->pTable);
6390

H
Haojun Liao 已提交
6391 6392
      *(int64_t *)output = id->uid;  // memory align problem, todo serialize
      output += sizeof(id->uid);
6393

H
Haojun Liao 已提交
6394 6395
      *(int32_t *)output = id->tid;
      output += sizeof(id->tid);
6396

6397
      *(int32_t *)output = pQInfo->vgId;
6398
      output += sizeof(pQInfo->vgId);
6399

6400
      if (pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
6401
        char *data = tsdbGetTableName(item->pTable);
6402
        memcpy(output, data, varDataTLen(data));
H
[td-90]  
Haojun Liao 已提交
6403
      } else {
6404
        char *val = tsdbGetTableTagVal(item->pTable, pExprInfo->base.colInfo.colId, type, bytes);
6405 6406 6407 6408 6409 6410 6411 6412

        // todo refactor
        if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
          if (val == NULL) {
            setVardataNull(output, type);
          } else {
            memcpy(output, val, varDataTLen(val));
          }
H
[td-90]  
Haojun Liao 已提交
6413
        } else {
6414 6415
          if (val == NULL) {
            setNull(output, type, bytes);
H
Haojun Liao 已提交
6416
          } else {  // todo here stop will cause client crash
6417 6418
            memcpy(output, val, bytes);
          }
H
[td-90]  
Haojun Liao 已提交
6419 6420
        }
      }
6421

H
Haojun Liao 已提交
6422
      count += 1;
6423
    }
6424

6425
    qDebug("QInfo:%p create (tableId, tag) info completed, rows:%d", pQInfo, count);
6426

H
Haojun Liao 已提交
6427 6428 6429 6430 6431
  } else if (functionId == TSDB_FUNC_COUNT) {// handle the "count(tbname)" query
    *(int64_t*) pQuery->sdata[0]->data = num;

    count = 1;
    pQInfo->tableIndex = num;  //set query completed
6432
    qDebug("QInfo:%p create count(tbname) query, res:%d rows:1", pQInfo, count);
6433
  } else {  // return only the tags|table name etc.
H
Haojun Liao 已提交
6434
    count = 0;
H
Haojun Liao 已提交
6435
    SSchema tbnameSchema = tGetTableNameColumnSchema();
H
Haojun Liao 已提交
6436 6437
    while(pQInfo->tableIndex < num && count < pQuery->rec.capacity) {
      int32_t i = pQInfo->tableIndex++;
6438

6439
      SExprInfo* pExprInfo = pQuery->pSelectExpr;
6440
      STableQueryInfo* item = taosArrayGetP(pa, i);
6441

6442 6443
      for(int32_t j = 0; j < pQuery->numOfOutput; ++j) {
        if (pExprInfo[j].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
6444
          char* data = tsdbGetTableName(item->pTable);
H
Haojun Liao 已提交
6445
          char* dst = pQuery->sdata[j]->data + count * tbnameSchema.bytes;
H
hjxilinx 已提交
6446
          memcpy(dst, data, varDataTLen(data));
H
[td-90]  
Haojun Liao 已提交
6447 6448 6449 6450
        } else {// todo refactor
          int16_t type = pExprInfo[j].type;
          int16_t bytes = pExprInfo[j].bytes;
          
6451
          char* data = tsdbGetTableTagVal(item->pTable, pExprInfo[j].base.colInfo.colId, type, bytes);
H
Haojun Liao 已提交
6452
          char* dst = pQuery->sdata[j]->data + count * pExprInfo[j].bytes;
6453

H
hjxilinx 已提交
6454
          if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
H
[td-90]  
Haojun Liao 已提交
6455 6456 6457 6458 6459
            if (data == NULL) {
              setVardataNull(dst, type);
            } else {
              memcpy(dst, data, varDataTLen(data));
            }
H
hjxilinx 已提交
6460
          } else {
H
[td-90]  
Haojun Liao 已提交
6461 6462 6463 6464 6465
            if (data == NULL) {
              setNull(dst, type, bytes);
            } else {
              memcpy(dst, data, pExprInfo[j].bytes);
            }
H
hjxilinx 已提交
6466
          }
6467
        }
H
hjxilinx 已提交
6468
      }
H
Haojun Liao 已提交
6469
      count += 1;
H
hjxilinx 已提交
6470
    }
6471

6472
    qDebug("QInfo:%p create tag values results completed, rows:%d", pQInfo, count);
H
hjxilinx 已提交
6473
  }
6474

H
Haojun Liao 已提交
6475
  pQuery->rec.rows = count;
H
hjxilinx 已提交
6476
  setQueryStatus(pQuery, QUERY_COMPLETED);
H
hjxilinx 已提交
6477 6478
}

6479 6480 6481 6482 6483 6484 6485
void freeqinfoFn(void *qhandle) {
  void** handle = qhandle;
  if (handle == NULL || *handle == NULL) {
    return;
  }

  qKillQuery(*handle);
H
Haojun Liao 已提交
6486
  qDestroyQueryInfo(*handle);
6487 6488 6489 6490 6491 6492 6493 6494 6495 6496 6497 6498 6499 6500 6501 6502 6503 6504
}

void* qOpenQueryMgmt(int32_t vgId) {
  const int32_t REFRESH_HANDLE_INTERVAL = 2; // every 2 seconds, refresh handle pool

  char cacheName[128] = {0};
  sprintf(cacheName, "qhandle_%d", vgId);

  SQueryMgmt* pQueryHandle = calloc(1, sizeof(SQueryMgmt));

  pQueryHandle->qinfoPool = taosCacheInit(TSDB_DATA_TYPE_BIGINT, REFRESH_HANDLE_INTERVAL, true, freeqinfoFn, cacheName);
  pQueryHandle->closed    = false;
  pthread_mutex_init(&pQueryHandle->lock, NULL);

  qDebug("vgId:%d, open querymgmt success", vgId);
  return pQueryHandle;
}

H
Haojun Liao 已提交
6505 6506 6507 6508 6509
static void queryMgmtKillQueryFn(void* handle) {
  qKillQuery(handle);
}

void qQueryMgmtNotifyClosed(void* pQMgmt) {
6510 6511 6512 6513 6514 6515 6516 6517 6518 6519 6520
  if (pQMgmt == NULL) {
    return;
  }

  SQueryMgmt* pQueryMgmt = pQMgmt;
  qDebug("vgId:%d, set querymgmt closed, wait for all queries cancelled", pQueryMgmt->vgId);

  pthread_mutex_lock(&pQueryMgmt->lock);
  pQueryMgmt->closed = true;
  pthread_mutex_unlock(&pQueryMgmt->lock);

H
Haojun Liao 已提交
6521
  taosCacheRefresh(pQueryMgmt->qinfoPool, queryMgmtKillQueryFn);
6522 6523 6524 6525 6526 6527 6528 6529 6530 6531 6532 6533 6534 6535 6536 6537 6538 6539 6540 6541 6542 6543
}

void qCleanupQueryMgmt(void* pQMgmt) {
  if (pQMgmt == NULL) {
    return;
  }

  SQueryMgmt* pQueryMgmt = pQMgmt;
  int32_t vgId = pQueryMgmt->vgId;

  assert(pQueryMgmt->closed);

  SCacheObj* pqinfoPool = pQueryMgmt->qinfoPool;
  pQueryMgmt->qinfoPool = NULL;

  taosCacheCleanup(pqinfoPool);
  pthread_mutex_destroy(&pQueryMgmt->lock);
  tfree(pQueryMgmt);

  qDebug("vgId:%d querymgmt cleanup completed", vgId);
}

6544
void** qRegisterQInfo(void* pMgmt, uint64_t qInfo) {
6545 6546 6547 6548
  if (pMgmt == NULL) {
    return NULL;
  }

6549 6550
  const int32_t DEFAULT_QHANDLE_LIFE_SPAN = tsShellActivityTimer * 2;

6551 6552
  SQueryMgmt *pQueryMgmt = pMgmt;
  if (pQueryMgmt->qinfoPool == NULL) {
6553
    qError("QInfo:%p failed to add qhandle into qMgmt, since qMgmt is closed", (void *)qInfo);
6554 6555 6556 6557 6558 6559
    return NULL;
  }

  pthread_mutex_lock(&pQueryMgmt->lock);
  if (pQueryMgmt->closed) {
    pthread_mutex_unlock(&pQueryMgmt->lock);
6560
    qError("QInfo:%p failed to add qhandle into cache, since qMgmt is colsing", (void *)qInfo);
6561 6562
    return NULL;
  } else {
6563 6564 6565
    uint64_t handleVal = (uint64_t) qInfo;

    void** handle = taosCachePut(pQueryMgmt->qinfoPool, &handleVal, sizeof(int64_t), &qInfo, POINTER_BYTES, DEFAULT_QHANDLE_LIFE_SPAN);
6566 6567 6568 6569 6570 6571
    pthread_mutex_unlock(&pQueryMgmt->lock);

    return handle;
  }
}

6572
void** qAcquireQInfo(void* pMgmt, uint64_t key) {
6573 6574 6575 6576 6577 6578
  SQueryMgmt *pQueryMgmt = pMgmt;

  if (pQueryMgmt->qinfoPool == NULL || pQueryMgmt->closed) {
    return NULL;
  }

6579
  void** handle = taosCacheAcquireByKey(pQueryMgmt->qinfoPool, &key, sizeof(uint64_t));
6580 6581 6582 6583 6584 6585 6586 6587 6588 6589 6590 6591 6592 6593 6594 6595 6596 6597
  if (handle == NULL || *handle == NULL) {
    return NULL;
  } else {
    return handle;
  }
}

void** qReleaseQInfo(void* pMgmt, void* pQInfo, bool needFree) {
  SQueryMgmt *pQueryMgmt = pMgmt;

  if (pQueryMgmt->qinfoPool == NULL) {
    return NULL;
  }

  taosCacheRelease(pQueryMgmt->qinfoPool, pQInfo, needFree);
  return 0;
}