qExecutor.c 221.2 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
#include "os.h"
16 17
#include "tcache.h"
#include "tglobal.h"
H
Haojun Liao 已提交
18
#include "qfill.h"
19
#include "taosmsg.h"
20

H
Haojun Liao 已提交
21
#include "exception.h"
22
#include "hash.h"
23 24
#include "qExecutor.h"
#include "qUtil.h"
25
#include "qresultBuf.h"
H
hjxilinx 已提交
26
#include "query.h"
S
slguan 已提交
27
#include "queryLog.h"
H
Haojun Liao 已提交
28 29
#include "qast.h"
#include "tfile.h"
30 31 32
#include "tlosertree.h"
#include "tscompression.h"
#include "ttime.h"
33 34 35 36 37 38 39 40 41

/**
 * check if the primary column is load by default, otherwise, the program will
 * forced to load primary column explicitly.
 */
#define Q_STATUS_EQUAL(p, s) (((p) & (s)) != 0)
#define TSDB_COL_IS_TAG(f) (((f)&TSDB_COL_TAG) != 0)
#define QUERY_IS_ASC_QUERY(q) (GET_FORWARD_DIRECTION_FACTOR((q)->order.order) == QUERY_ASC_FORWARD_STEP)

42
#define IS_MASTER_SCAN(runtime)        ((runtime)->scanFlag == MASTER_SCAN)
H
hjxilinx 已提交
43
#define IS_REVERSE_SCAN(runtime)       ((runtime)->scanFlag == REVERSE_SCAN)
44
#define SET_MASTER_SCAN_FLAG(runtime)  ((runtime)->scanFlag = MASTER_SCAN)
H
hjxilinx 已提交
45
#define SET_REVERSE_SCAN_FLAG(runtime) ((runtime)->scanFlag = REVERSE_SCAN)
46

H
Haojun Liao 已提交
47
#define GET_QINFO_ADDR(x) ((SQInfo *)((char *)(x)-offsetof(SQInfo, runtimeEnv)))
48

49
#define GET_COL_DATA_POS(query, index, step) ((query)->pos + (index) * (step))
50
#define SWITCH_ORDER(n) (((n) = ((n) == TSDB_ORDER_ASC) ? TSDB_ORDER_DESC : TSDB_ORDER_ASC))
51

H
Haojun Liao 已提交
52 53
#define SDATA_BLOCK_INITIALIZER (SDataBlockInfo) {{0}, 0}

54 55
/* get the qinfo struct address from the query struct address */
#define GET_COLUMN_BYTES(query, colidx) \
56 57
  ((query)->colList[(query)->pSelectExpr[colidx].base.colInfo.colIndex].bytes)
#define GET_COLUMN_TYPE(query, colidx) ((query)->colList[(query)->pSelectExpr[colidx].base.colInfo.colIndex].type)
58

59
enum {
H
hjxilinx 已提交
60
  // when query starts to execute, this status will set
61 62
  QUERY_NOT_COMPLETED = 0x1u,

H
hjxilinx 已提交
63 64
  /* result output buffer is full, current query is paused.
   * this status is only exist in group-by clause and diff/add/division/multiply/ query.
65
   */
66 67
  QUERY_RESBUF_FULL = 0x2u,

H
hjxilinx 已提交
68 69 70
  /* query is over
   * 1. this status is used in one row result query process, e.g., count/sum/first/last/ avg...etc.
   * 2. when all data within queried time window, it is also denoted as query_completed
71
   */
72
  QUERY_COMPLETED = 0x4u,
73

H
hjxilinx 已提交
74 75
  /* when the result is not completed return to client, this status will be
   * usually used in case of interval query with interpolation option
76
   */
77
  QUERY_OVER = 0x8u,
78
};
79 80

enum {
81 82
  TS_JOIN_TS_EQUAL       = 0,
  TS_JOIN_TS_NOT_EQUALS  = 1,
83 84 85
  TS_JOIN_TAG_NOT_EQUALS = 2,
};

86
typedef struct {
87 88 89 90 91 92
  int32_t     status;       // query status
  TSKEY       lastKey;      // the lastKey value before query executed
  STimeWindow w;            // whole query time window
  STimeWindow curWindow;    // current query window
  int32_t     windowIndex;  // index of active time window result for interval query
  STSCursor   cur;
93 94
} SQueryStatusInfo;

H
Haojun Liao 已提交
95
#if 0
H
Haojun Liao 已提交
96
static UNUSED_FUNC void *u_malloc (size_t __size) {
H
Haojun Liao 已提交
97 98 99 100
  uint32_t v = rand();
  if (v % 5 <= 1) {
    return NULL;
  } else {
H
Haojun Liao 已提交
101
    return malloc(__size);
H
Haojun Liao 已提交
102
  }
H
Haojun Liao 已提交
103 104
}

H
Haojun Liao 已提交
105 106 107 108 109 110 111 112 113 114
static UNUSED_FUNC void* u_calloc(size_t num, size_t __size) {
  uint32_t v = rand();
  if (v % 5 <= 1) {
    return NULL;
  } else {
    return calloc(num, __size);
  }
}

#define calloc  u_calloc
H
Haojun Liao 已提交
115
#define malloc  u_malloc
H
Haojun Liao 已提交
116
#endif
H
Haojun Liao 已提交
117

118
#define CLEAR_QUERY_STATUS(q, st)   ((q)->status &= (~(st)))
H
Haojun Liao 已提交
119 120 121
#define GET_NUM_OF_TABLEGROUP(q)    taosArrayGetSize((q)->tableqinfoGroupInfo.pGroupList)
#define GET_TABLEGROUP(q, _index)   ((SArray*) taosArrayGetP((q)->tableqinfoGroupInfo.pGroupList, (_index)))

122
static void setQueryStatus(SQuery *pQuery, int8_t status);
H
Haojun Liao 已提交
123
static void finalizeQueryResult(SQueryRuntimeEnv *pRuntimeEnv);
124

H
Haojun Liao 已提交
125
#define QUERY_IS_INTERVAL_QUERY(_q) ((_q)->intervalTime > 0)
126

H
Haojun Liao 已提交
127 128 129 130 131 132 133 134
// previous time window may not be of the same size of pQuery->intervalTime
#define GET_NEXT_TIMEWINDOW(_q, tw)                                   \
  do {                                                                \
    int32_t factor = GET_FORWARD_DIRECTION_FACTOR((_q)->order.order); \
    (tw)->skey += ((_q)->slidingTime * factor);                       \
    (tw)->ekey = (tw)->skey + ((_q)->intervalTime - 1);               \
  } while (0)

H
hjxilinx 已提交
135
// todo move to utility
136
static int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *group);
137

H
hjxilinx 已提交
138
static void setWindowResOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult);
H
Haojun Liao 已提交
139
static void setWindowResOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult);
140 141
static void resetMergeResultBuf(SQuery *pQuery, SQLFunctionCtx *pCtx, SResultInfo *pResultInfo);
static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId);
142

143 144 145
static void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
                          SDataStatis *pStatis, void *param, int32_t colIndex);

146
static void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
147
static void destroyTableQueryInfo(STableQueryInfo *pTableQueryInfo, int32_t numOfCols);
148 149
static void resetCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
static bool hasMainOutput(SQuery *pQuery);
H
hjxilinx 已提交
150
static void buildTagQueryResult(SQInfo *pQInfo);
151

152
static int32_t setAdditionalInfo(SQInfo *pQInfo, void *pTable, STableQueryInfo *pTableQueryInfo);
153
static int32_t flushFromResultBuf(SQInfo *pQInfo);
154

155
bool doFilterData(SQuery *pQuery, int32_t elemPos) {
156 157
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
158

159 160
    char *pElem = pFilterInfo->pData + pFilterInfo->info.bytes * elemPos;
    if (isNull(pElem, pFilterInfo->info.type)) {
161 162
      return false;
    }
163

164 165
    bool qualified = false;
    for (int32_t j = 0; j < pFilterInfo->numOfFilters; ++j) {
166
      SColumnFilterElem *pFilterElem = &pFilterInfo->pFilters[j];
167

168 169 170 171 172
      if (pFilterElem->fp(pFilterElem, pElem, pElem)) {
        qualified = true;
        break;
      }
    }
173

174 175 176 177
    if (!qualified) {
      return false;
    }
  }
178

179 180 181 182 183 184
  return true;
}

int64_t getNumOfResult(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  bool    hasMainFunction = hasMainOutput(pQuery);
185

186
  int64_t maxOutput = 0;
187
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
188
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
189

190 191 192 193 194 195 196 197
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (hasMainFunction &&
        (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ)) {
      continue;
    }
198

199 200 201 202 203
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
    if (pResInfo != NULL && maxOutput < pResInfo->numOfRes) {
      maxOutput = pResInfo->numOfRes;
    }
  }
204

205
  assert(maxOutput >= 0);
206 207 208
  return maxOutput;
}

209 210 211 212 213 214 215 216 217
/*
 * the value of number of result needs to be update due to offset value upated.
 */
void updateNumOfResult(SQueryRuntimeEnv *pRuntimeEnv, int32_t numOfRes) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
    
H
Haojun Liao 已提交
218 219 220 221 222 223 224
    int16_t functionId = pRuntimeEnv->pCtx[j].functionId;
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ ||
        functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
    
    assert(pResInfo->numOfRes > numOfRes);
225 226 227 228
    pResInfo->numOfRes = numOfRes;
  }
}

229 230 231 232 233 234 235 236 237
static int32_t getGroupResultId(int32_t groupIndex) {
  int32_t base = 200000;
  return base + (groupIndex * 10000);
}

bool isGroupbyNormalCol(SSqlGroupbyExpr *pGroupbyExpr) {
  if (pGroupbyExpr == NULL || pGroupbyExpr->numOfGroupCols == 0) {
    return false;
  }
238

239
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
240
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
241 242 243 244 245
    if (pColIndex->flag == TSDB_COL_NORMAL) {
      /*
       * make sure the normal column locates at the second position if tbname exists in group by clause
       */
      if (pGroupbyExpr->numOfGroupCols > 1) {
246
        assert(pColIndex->colIndex > 0);
247
      }
248

249 250 251
      return true;
    }
  }
252

253 254 255 256 257
  return false;
}

int16_t getGroupbyColumnType(SQuery *pQuery, SSqlGroupbyExpr *pGroupbyExpr) {
  assert(pGroupbyExpr != NULL);
258

259 260
  int32_t colId = -2;
  int16_t type = TSDB_DATA_TYPE_NULL;
261

262
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
263
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
264 265 266 267 268
    if (pColIndex->flag == TSDB_COL_NORMAL) {
      colId = pColIndex->colId;
      break;
    }
  }
269

270
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
271 272
    if (colId == pQuery->colList[i].colId) {
      type = pQuery->colList[i].type;
273 274 275
      break;
    }
  }
276

277 278 279 280 281 282
  return type;
}

bool isSelectivityWithTagsQuery(SQuery *pQuery) {
  bool    hasTags = false;
  int32_t numOfSelectivity = 0;
283

284
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
285
    int32_t functId = pQuery->pSelectExpr[i].base.functionId;
286 287 288 289
    if (functId == TSDB_FUNC_TAG_DUMMY || functId == TSDB_FUNC_TS_DUMMY) {
      hasTags = true;
      continue;
    }
290

291 292 293 294
    if ((aAggs[functId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
      numOfSelectivity++;
    }
  }
295

296 297 298
  if (numOfSelectivity > 0 && hasTags) {
    return true;
  }
299

300 301 302
  return false;
}

303
bool isTSCompQuery(SQuery *pQuery) { return pQuery->pSelectExpr[0].base.functionId == TSDB_FUNC_TS_COMP; }
304

305 306 307 308
static bool limitResults(SQueryRuntimeEnv* pRuntimeEnv) {
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery* pQuery = pRuntimeEnv->pQuery;
  
309 310
  if ((pQuery->limit.limit > 0) && (pQuery->rec.total + pQuery->rec.rows > pQuery->limit.limit)) {
    pQuery->rec.rows = pQuery->limit.limit - pQuery->rec.total;
311
    
312
    qDebug("QInfo:%p discard remain data due to result limitation, limit:%"PRId64", current return:%" PRId64 ", total:%"PRId64,
313 314
        pQInfo, pQuery->limit.limit, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
    assert(pQuery->rec.rows >= 0);
315 316 317
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return true;
  }
318

319 320 321 322
  return false;
}

static bool isTopBottomQuery(SQuery *pQuery) {
323
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
324
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
325 326 327
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
328

329 330 331 332
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      return true;
    }
  }
333

334 335 336
  return false;
}

H
Haojun Liao 已提交
337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354
static bool hasTagValOutput(SQuery* pQuery) {
  SExprInfo *pExprInfo = &pQuery->pSelectExpr[0];
  if (pQuery->numOfOutput == 1 && pExprInfo->base.functionId == TSDB_FUNC_TS_COMP) {
    return true;
  } else {  // set tag value, by which the results are aggregated.
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
      SExprInfo *pLocalExprInfo = &pQuery->pSelectExpr[idx];

      // ts_comp column required the tag value for join filter
      if (TSDB_COL_IS_TAG(pLocalExprInfo->base.colInfo.flag)) {
        return true;
      }
    }
  }

  return false;
}

355 356 357 358 359 360 361 362
/**
 * @param pQuery
 * @param col
 * @param pDataBlockInfo
 * @param pStatis
 * @param pColStatis
 * @return
 */
H
Haojun Liao 已提交
363 364
static bool hasNullValue(SColIndex* pColIndex, SDataStatis *pStatis, SDataStatis **pColStatis) {
  if (TSDB_COL_IS_TAG(pColIndex->flag) || pColIndex->colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
365 366
    return false;
  }
367

368
  if (pStatis != NULL) {
H
Haojun Liao 已提交
369 370
    *pColStatis = &pStatis[pColIndex->colIndex];
    assert((*pColStatis)->colId == pColIndex->colId);
H
hjxilinx 已提交
371 372
  } else {
    *pColStatis = NULL;
373
  }
374

375 376 377
  if ((*pColStatis) != NULL && (*pColStatis)->numOfNull == 0) {
    return false;
  }
378

379 380 381 382
  return true;
}

static SWindowResult *doSetTimeWindowFromKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, char *pData,
383
                                             int16_t bytes, bool masterscan) {
384
  SQuery *pQuery = pRuntimeEnv->pQuery;
385

386
  int32_t *p1 = (int32_t *) taosHashGet(pWindowResInfo->hashList, pData, bytes);
387 388
  if (p1 != NULL) {
    pWindowResInfo->curIndex = *p1;
389 390 391 392 393 394 395 396 397 398 399 400
  } else {
    if (masterscan) {  // more than the capacity, reallocate the resources
      if (pWindowResInfo->size >= pWindowResInfo->capacity) {
        int64_t newCap = pWindowResInfo->capacity * 2;

        char *t = realloc(pWindowResInfo->pResult, newCap * sizeof(SWindowResult));
        if (t != NULL) {
          pWindowResInfo->pResult = (SWindowResult *)t;
          memset(&pWindowResInfo->pResult[pWindowResInfo->capacity], 0, sizeof(SWindowResult) * pWindowResInfo->capacity);
        } else {
          // todo
        }
401

402 403
        for (int32_t i = pWindowResInfo->capacity; i < newCap; ++i) {
          SPosInfo pos = {-1, -1};
H
Haojun Liao 已提交
404
          createQueryResultInfo(pQuery, &pWindowResInfo->pResult[i], pRuntimeEnv->stableQuery, &pos, pRuntimeEnv->interBufSize);
405 406
        }
        pWindowResInfo->capacity = newCap;
407
      }
408

409 410 411 412 413 414
      // add a new result set for a new group
      pWindowResInfo->curIndex = pWindowResInfo->size++;
      taosHashPut(pWindowResInfo->hashList, pData, bytes, (char *)&pWindowResInfo->curIndex, sizeof(int32_t));
    } else {
      return NULL;
    }
415
  }
416

417 418 419 420 421 422
  return getWindowResult(pWindowResInfo, pWindowResInfo->curIndex);
}

// get the correct time window according to the handled timestamp
static STimeWindow getActiveTimeWindow(SWindowResInfo *pWindowResInfo, int64_t ts, SQuery *pQuery) {
  STimeWindow w = {0};
423

424 425 426 427 428 429 430
  if (pWindowResInfo->curIndex == -1) {  // the first window, from the previous stored value
    w.skey = pWindowResInfo->prevSKey;
    w.ekey = w.skey + pQuery->intervalTime - 1;
  } else {
    int32_t slot = curTimeWindow(pWindowResInfo);
    w = getWindowResult(pWindowResInfo, slot)->window;
  }
431

432 433
  if (w.skey > ts || w.ekey < ts) {
    int64_t st = w.skey;
434

435 436 437
    if (st > ts) {
      st -= ((st - ts + pQuery->slidingTime - 1) / pQuery->slidingTime) * pQuery->slidingTime;
    }
438

439 440 441 442
    int64_t et = st + pQuery->intervalTime - 1;
    if (et < ts) {
      st += ((ts - et + pQuery->slidingTime - 1) / pQuery->slidingTime) * pQuery->slidingTime;
    }
443

444 445 446
    w.skey = st;
    w.ekey = w.skey + pQuery->intervalTime - 1;
  }
447

448 449 450 451 452 453 454
  /*
   * query border check, skey should not be bounded by the query time range, since the value skey will
   * be used as the time window index value. So we only change ekey of time window accordingly.
   */
  if (w.ekey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) {
    w.ekey = pQuery->window.ekey;
  }
455

456
  assert(ts >= w.skey && ts <= w.ekey);
457

458 459 460 461 462 463 464 465
  return w;
}

static int32_t addNewWindowResultBuf(SWindowResult *pWindowRes, SDiskbasedResultBuf *pResultBuf, int32_t sid,
                                     int32_t numOfRowsPerPage) {
  if (pWindowRes->pos.pageId != -1) {
    return 0;
  }
466

467
  tFilePage *pData = NULL;
468

469 470 471
  // in the first scan, new space needed for results
  int32_t pageId = -1;
  SIDList list = getDataBufPagesIdList(pResultBuf, sid);
472

473 474 475 476
  if (list.size == 0) {
    pData = getNewDataBuf(pResultBuf, sid, &pageId);
  } else {
    pageId = getLastPageId(&list);
H
Haojun Liao 已提交
477
    pData = GET_RES_BUF_PAGE_BY_ID(pResultBuf, pageId);
478

479
    if (pData->num >= numOfRowsPerPage) {
480 481
      pData = getNewDataBuf(pResultBuf, sid, &pageId);
      if (pData != NULL) {
482
        assert(pData->num == 0);  // number of elements must be 0 for new allocated buffer
483 484 485
      }
    }
  }
486

487 488 489
  if (pData == NULL) {
    return -1;
  }
490

491 492 493
  // set the number of rows in current disk page
  if (pWindowRes->pos.pageId == -1) {  // not allocated yet, allocate new buffer
    pWindowRes->pos.pageId = pageId;
494
    pWindowRes->pos.rowId = pData->num++;
495
  }
496

497 498 499 500
  return 0;
}

static int32_t setWindowOutputBufByKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, int32_t sid,
501
                                       STimeWindow *win, bool masterscan, bool* newWind) {
502 503
  assert(win->skey <= win->ekey);
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
504

505 506
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&win->skey,
      TSDB_KEYSIZE, masterscan);
507
  if (pWindowRes == NULL) {
508 509 510
    *newWind = false;

    return masterscan? -1:0;
511
  }
512

513
  *newWind = true;
514 515 516 517 518 519 520
  // not assign result buffer yet, add new result buffer
  if (pWindowRes->pos.pageId == -1) {
    int32_t ret = addNewWindowResultBuf(pWindowRes, pResultBuf, sid, pRuntimeEnv->numOfRowsPerPage);
    if (ret != 0) {
      return -1;
    }
  }
521

522 523
  // set time window for current result
  pWindowRes->window = *win;
524

H
Haojun Liao 已提交
525
  setWindowResOutputBufInitCtx(pRuntimeEnv, pWindowRes);
526 527 528 529 530 531 532 533
  return TSDB_CODE_SUCCESS;
}

static SWindowStatus *getTimeWindowResStatus(SWindowResInfo *pWindowResInfo, int32_t slot) {
  assert(slot >= 0 && slot < pWindowResInfo->size);
  return &pWindowResInfo->pResult[slot].status;
}

H
Haojun Liao 已提交
534
static int32_t getForwardStepsInBlock(int32_t numOfRows, __block_search_fn_t searchFn, TSKEY ekey, int16_t pos,
535 536
                                      int16_t order, int64_t *pData) {
  int32_t forwardStep = 0;
537

H
Haojun Liao 已提交
538 539 540 541
  if (order == TSDB_ORDER_ASC) {
    int32_t end = searchFn((char*) &pData[pos], numOfRows - pos, ekey, order);
    if (end >= 0) {
      forwardStep = end;
542

H
Haojun Liao 已提交
543 544 545 546 547 548 549 550 551 552 553 554
      if (pData[end + pos] == ekey) {
        forwardStep += 1;
      }
    }
  } else {
    int32_t end = searchFn((char *)pData, pos + 1, ekey, order);
    if (end >= 0) {
      forwardStep = pos - end;

      if (pData[end] == ekey) {
        forwardStep += 1;
      }
555 556
    }
  }
557

H
Haojun Liao 已提交
558
  assert(forwardStep > 0);
559 560 561 562 563 564
  return forwardStep;
}

/**
 * NOTE: the query status only set for the first scan of master scan.
 */
565
static int32_t doCheckQueryCompleted(SQueryRuntimeEnv *pRuntimeEnv, TSKEY lastKey, SWindowResInfo *pWindowResInfo) {
566
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
567
  if (pRuntimeEnv->scanFlag != MASTER_SCAN || (!QUERY_IS_INTERVAL_QUERY(pQuery))) {
568
    return pWindowResInfo->size;
569
  }
570

571
  // no qualified results exist, abort check
572 573
  int32_t numOfClosed = 0;
  
574
  if (pWindowResInfo->size == 0) {
575
    return pWindowResInfo->size;
576
  }
577

578
  // query completed
H
hjxilinx 已提交
579 580
  if ((lastKey >= pQuery->current->win.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (lastKey <= pQuery->current->win.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
581
    closeAllTimeWindow(pWindowResInfo);
582

583 584 585 586
    pWindowResInfo->curIndex = pWindowResInfo->size - 1;
    setQueryStatus(pQuery, QUERY_COMPLETED | QUERY_RESBUF_FULL);
  } else {  // set the current index to be the last unclosed window
    int32_t i = 0;
587
    int64_t skey = TSKEY_INITIAL_VAL;
588

589 590 591
    for (i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *pResult = &pWindowResInfo->pResult[i];
      if (pResult->status.closed) {
592
        numOfClosed += 1;
593 594
        continue;
      }
595

596 597 598 599 600 601 602 603
      if ((pResult->window.ekey <= lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (pResult->window.skey >= lastKey && !QUERY_IS_ASC_QUERY(pQuery))) {
        closeTimeWindow(pWindowResInfo, i);
      } else {
        skey = pResult->window.skey;
        break;
      }
    }
604

605
    // all windows are closed, set the last one to be the skey
606
    if (skey == TSKEY_INITIAL_VAL) {
607 608 609 610 611
      assert(i == pWindowResInfo->size);
      pWindowResInfo->curIndex = pWindowResInfo->size - 1;
    } else {
      pWindowResInfo->curIndex = i;
    }
612

613
    pWindowResInfo->prevSKey = pWindowResInfo->pResult[pWindowResInfo->curIndex].window.skey;
614

615 616
    // the number of completed slots are larger than the threshold, return current generated results to client.
    if (numOfClosed > pWindowResInfo->threshold) {
617
      qDebug("QInfo:%p total result window:%d closed:%d, reached the output threshold %d, return",
618 619
          GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size, numOfClosed, pQuery->rec.threshold);
      
620
      setQueryStatus(pQuery, QUERY_RESBUF_FULL);
621
    } else {
622
      qDebug("QInfo:%p total result window:%d already closed:%d", GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size,
623
             numOfClosed);
624 625
    }
  }
626 627 628 629 630 631 632
  
  // output has reached the limitation, set query completed
  if (pQuery->limit.limit > 0 && (pQuery->limit.limit + pQuery->limit.offset) <= numOfClosed &&
      pRuntimeEnv->scanFlag == MASTER_SCAN) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
  
633
  assert(pWindowResInfo->prevSKey != TSKEY_INITIAL_VAL);
634
  return numOfClosed;
635 636 637
}

static int32_t getNumOfRowsInTimeWindow(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo, TSKEY *pPrimaryColumn,
H
hjxilinx 已提交
638
                                        int32_t startPos, TSKEY ekey, __block_search_fn_t searchFn, bool updateLastKey) {
639
  assert(startPos >= 0 && startPos < pDataBlockInfo->rows);
640

641 642 643
  int32_t num = -1;
  int32_t order = pQuery->order.order;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(order);
644

H
hjxilinx 已提交
645 646
  STableQueryInfo* item = pQuery->current;
  
647 648
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (ekey < pDataBlockInfo->window.ekey) {
649
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
650 651 652 653
      if (num == 0) {  // no qualified data in current block, do not update the lastKey value
        assert(ekey < pPrimaryColumn[startPos]);
      } else {
        if (updateLastKey) {
H
hjxilinx 已提交
654
          item->lastKey = pPrimaryColumn[startPos + (num - 1)] + step;
655 656 657
        }
      }
    } else {
658
      num = pDataBlockInfo->rows - startPos;
659
      if (updateLastKey) {
H
hjxilinx 已提交
660
        item->lastKey = pDataBlockInfo->window.ekey + step;
661 662 663 664
      }
    }
  } else {  // desc
    if (ekey > pDataBlockInfo->window.skey) {
665
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
666 667 668 669
      if (num == 0) {  // no qualified data in current block, do not update the lastKey value
        assert(ekey > pPrimaryColumn[startPos]);
      } else {
        if (updateLastKey) {
H
hjxilinx 已提交
670
          item->lastKey = pPrimaryColumn[startPos - (num - 1)] + step;
671 672 673 674 675
        }
      }
    } else {
      num = startPos + 1;
      if (updateLastKey) {
H
hjxilinx 已提交
676
        item->lastKey = pDataBlockInfo->window.skey + step;
677 678 679
      }
    }
  }
680

H
Haojun Liao 已提交
681
  assert(num > 0);
682 683 684 685
  return num;
}

static void doBlockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SWindowStatus *pStatus, STimeWindow *pWin,
H
Haojun Liao 已提交
686
                                      int32_t offset, int32_t forwardStep, TSKEY *tsBuf, int32_t numOfTotal) {
687 688
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
689

690 691 692
  if (IS_MASTER_SCAN(pRuntimeEnv) || pStatus->closed) {
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
693

694 695 696
      pCtx[k].nStartQueryTimestamp = pWin->skey;
      pCtx[k].size = forwardStep;
      pCtx[k].startOffset = (QUERY_IS_ASC_QUERY(pQuery)) ? offset : offset - (forwardStep - 1);
697

698 699 700
      if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
        pCtx[k].ptsList = &tsBuf[offset];
      }
701

702 703 704 705
      // not a whole block involved in query processing, statistics data can not be used
      if (forwardStep != numOfTotal) {
        pCtx[k].preAggVals.isSet = false;
      }
H
Haojun Liao 已提交
706

707 708 709
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunction(&pCtx[k]);
      }
710 711 712 713 714 715 716 717
    }
  }
}

static void doRowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SWindowStatus *pStatus, STimeWindow *pWin,
                                    int32_t offset) {
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
718

719 720 721
  if (IS_MASTER_SCAN(pRuntimeEnv) || pStatus->closed) {
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
      pCtx[k].nStartQueryTimestamp = pWin->skey;
722

723 724 725 726
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunctionF(&pCtx[k], offset);
      }
727 728 729 730
    }
  }
}

H
Haojun Liao 已提交
731 732
static int32_t getNextQualifiedWindow(SQueryRuntimeEnv *pRuntimeEnv, STimeWindow *pNext, SDataBlockInfo *pDataBlockInfo,
    TSKEY *primaryKeys, __block_search_fn_t searchFn, int32_t prevPosition) {
733
  SQuery *pQuery = pRuntimeEnv->pQuery;
734

H
Haojun Liao 已提交
735
  GET_NEXT_TIMEWINDOW(pQuery, pNext);
736

H
Haojun Liao 已提交
737
  // next time window is not in current block
H
Haojun Liao 已提交
738 739
  if ((pNext->skey > pDataBlockInfo->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (pNext->ekey < pDataBlockInfo->window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
H
Haojun Liao 已提交
740 741
    return -1;
  }
742

H
Haojun Liao 已提交
743 744
  TSKEY startKey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
745
    startKey = pNext->skey;
H
Haojun Liao 已提交
746 747
    if (startKey < pQuery->window.skey) {
      startKey = pQuery->window.skey;
748
    }
H
Haojun Liao 已提交
749
  } else {
H
Haojun Liao 已提交
750
    startKey = pNext->ekey;
H
Haojun Liao 已提交
751 752
    if (startKey > pQuery->window.skey) {
      startKey = pQuery->window.skey;
753
    }
H
Haojun Liao 已提交
754
  }
755

H
Haojun Liao 已提交
756 757 758 759 760 761 762 763
  int32_t startPos = 0;
  // tumbling time window query, a special case of sliding time window query
  if (pQuery->slidingTime == pQuery->intervalTime && prevPosition != -1) {
    int32_t factor = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
    startPos = prevPosition + factor;
  } else {
    startPos = searchFn((char *)primaryKeys, pDataBlockInfo->rows, startKey, pQuery->order.order);
  }
764

H
Haojun Liao 已提交
765 766 767 768
  /*
   * This time window does not cover any data, try next time window,
   * this case may happen when the time window is too small
   */
H
Haojun Liao 已提交
769
  if (QUERY_IS_ASC_QUERY(pQuery) && primaryKeys[startPos] > pNext->ekey) {
H
Haojun Liao 已提交
770
    TSKEY next = primaryKeys[startPos];
771

H
Haojun Liao 已提交
772 773 774
    pNext->ekey += ((next - pNext->ekey + pQuery->slidingTime - 1)/pQuery->slidingTime) * pQuery->slidingTime;
    pNext->skey = pNext->ekey - pQuery->intervalTime + 1;
  } else if ((!QUERY_IS_ASC_QUERY(pQuery)) && primaryKeys[startPos] < pNext->skey) {
H
Haojun Liao 已提交
775
    TSKEY next = primaryKeys[startPos];
776

H
Haojun Liao 已提交
777 778
    pNext->skey -= ((pNext->skey - next + pQuery->slidingTime - 1) / pQuery->slidingTime) * pQuery->slidingTime;
    pNext->ekey = pNext->skey + pQuery->intervalTime - 1;
779
  }
780

H
Haojun Liao 已提交
781
  return startPos;
782 783
}

H
Haojun Liao 已提交
784
static FORCE_INLINE TSKEY reviseWindowEkey(SQuery *pQuery, STimeWindow *pWindow) {
785 786 787 788 789 790 791 792 793 794 795 796
  TSKEY ekey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    ekey = pWindow->ekey;
    if (ekey > pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  } else {
    ekey = pWindow->skey;
    if (ekey < pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  }
797

798 799 800
  return ekey;
}

H
hjxilinx 已提交
801 802 803 804 805 806 807 808 809 810 811 812 813 814 815
//todo binary search
static void* getDataBlockImpl(SArray* pDataBlock, int32_t colId) {
  int32_t numOfCols = taosArrayGetSize(pDataBlock);
  
  for (int32_t i = 0; i < numOfCols; ++i) {
    SColumnInfoData *p = taosArrayGet(pDataBlock, i);
    if (colId == p->info.colId) {
      return p->pData;
    }
  }
  
  return NULL;
}

static char *getDataBlock(SQueryRuntimeEnv *pRuntimeEnv, SArithmeticSupport *sas, int32_t col, int32_t size,
816
                    SArray *pDataBlock) {
dengyihao's avatar
dengyihao 已提交
817 818 819
  if (pDataBlock == NULL) {
    return NULL;
  }
820

H
Haojun Liao 已提交
821
  char *dataBlock = NULL;
H
Haojun Liao 已提交
822
  SQuery *pQuery = pRuntimeEnv->pQuery;
823
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
824

825
  int32_t functionId = pQuery->pSelectExpr[col].base.functionId;
826
  if (functionId == TSDB_FUNC_ARITHM) {
827
    sas->pArithExpr = &pQuery->pSelectExpr[col];
828

829 830 831 832 833 834
    // set the start offset to be the lowest start position, no matter asc/desc query order
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      pCtx->startOffset = pQuery->pos;
    } else {
      pCtx->startOffset = pQuery->pos - (size - 1);
    }
835

836 837 838 839
    sas->offset  = 0;
    sas->colList = pQuery->colList;
    sas->numOfCols = pQuery->numOfCols;
    sas->data    = calloc(pQuery->numOfCols, POINTER_BYTES);
840

H
Haojun Liao 已提交
841
    if (sas->data == NULL) {
H
Haojun Liao 已提交
842
      finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
H
Haojun Liao 已提交
843 844 845
      longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
    }

846
    // here the pQuery->colList and sas->colList are identical
H
Haojun Liao 已提交
847
    int32_t numOfCols = taosArrayGetSize(pDataBlock);
848
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
849
      SColumnInfo *pColMsg = &pQuery->colList[i];
850

851 852 853 854 855 856 857 858
      dataBlock = NULL;
      for (int32_t k = 0; k < numOfCols; ++k) {  //todo refactor
        SColumnInfoData *p = taosArrayGet(pDataBlock, k);
        if (pColMsg->colId == p->info.colId) {
          dataBlock = p->pData;
          break;
        }
      }
859

860
      assert(dataBlock != NULL);
H
Haojun Liao 已提交
861
      sas->data[i] = dataBlock/* + pQuery->colList[i].bytes*/;  // start from the offset
862
    }
863

864
  } else {  // other type of query function
865
    SColIndex *pCol = &pQuery->pSelectExpr[col].base.colInfo;
H
Haojun Liao 已提交
866
    if (TSDB_COL_IS_TAG(pCol->flag)) {
867 868
      dataBlock = NULL;
    } else {
H
Haojun Liao 已提交
869 870 871 872 873
      SColIndex* pColIndex = &pQuery->pSelectExpr[col].base.colInfo;
      SColumnInfoData *p = taosArrayGet(pDataBlock, pColIndex->colIndex);
      assert(p->info.colId == pColIndex->colId);

      dataBlock = p->pData;
874 875
    }
  }
876

877 878 879 880
  return dataBlock;
}

/**
H
Haojun Liao 已提交
881
 * todo set the last value for pQueryTableInfo as in rowwiseapplyfunctions
882 883
 * @param pRuntimeEnv
 * @param forwardStep
884
 * @param tsCols
885 886 887 888 889
 * @param pFields
 * @param isDiskFileBlock
 * @return                  the incremental number of output value, so it maybe 0 for fixed number of query,
 *                          such as count/min/max etc.
 */
890
static void blockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis,
891 892
                                       SDataBlockInfo *pDataBlockInfo, SWindowResInfo *pWindowResInfo,
                                       __block_search_fn_t searchFn, SArray *pDataBlock) {
893
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
894 895
  bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);

896 897
  SQuery *pQuery = pRuntimeEnv->pQuery;
  TSKEY  *tsCols = NULL;
898
  if (pDataBlock != NULL) {
899
    SColumnInfoData* pColInfo = taosArrayGet(pDataBlock, 0);
900
    tsCols = (TSKEY *)(pColInfo->pData);
901
  }
902

903
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
H
Haojun Liao 已提交
904
  if (sasArray == NULL) {
H
Haojun Liao 已提交
905
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
H
Haojun Liao 已提交
906 907
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }
908

909
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
910
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
911
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k);
912
  }
913

914
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
915
  if (QUERY_IS_INTERVAL_QUERY(pQuery) && tsCols != NULL) {
916
    int32_t offset = GET_COL_DATA_POS(pQuery, 0, step);
917
    TSKEY   ts = tsCols[offset];
918

919
    bool hasTimeWindow = false;
920
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
921
    if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &win, masterScan, &hasTimeWindow) != TSDB_CODE_SUCCESS) {
dengyihao's avatar
dengyihao 已提交
922
      tfree(sasArray);
H
hjxilinx 已提交
923
      return;
924
    }
925

H
Haojun Liao 已提交
926 927 928
    int32_t forwardStep = 0;
    int32_t startPos = pQuery->pos;

929 930
    if (hasTimeWindow) {
      TSKEY   ekey = reviseWindowEkey(pQuery, &win);
H
Haojun Liao 已提交
931
      forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, pQuery->pos, ekey, searchFn, true);
932

933
      SWindowStatus *pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo));
H
Haojun Liao 已提交
934
      doBlockwiseApplyFunctions(pRuntimeEnv, pStatus, &win, startPos, forwardStep, tsCols, pDataBlockInfo->rows);
935
    }
936

937 938
    int32_t     index = pWindowResInfo->curIndex;
    STimeWindow nextWin = win;
939

940
    while (1) {
H
Haojun Liao 已提交
941 942
      int32_t prevEndPos = (forwardStep - 1) * step + startPos;
      startPos = getNextQualifiedWindow(pRuntimeEnv, &nextWin, pDataBlockInfo, tsCols, searchFn, prevEndPos);
943 944 945
      if (startPos < 0) {
        break;
      }
946

947
      // null data, failed to allocate more memory buffer
H
Haojun Liao 已提交
948
      hasTimeWindow = false;
949
      if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &nextWin, masterScan, &hasTimeWindow) != TSDB_CODE_SUCCESS) {
950 951
        break;
      }
952

953 954 955 956 957
      if (!hasTimeWindow) {
        continue;
      }

      TSKEY ekey = reviseWindowEkey(pQuery, &nextWin);
H
Haojun Liao 已提交
958
      forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, startPos, ekey, searchFn, true);
959

960
      SWindowStatus* pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo));
961
      doBlockwiseApplyFunctions(pRuntimeEnv, pStatus, &nextWin, startPos, forwardStep, tsCols, pDataBlockInfo->rows);
962
    }
963

964 965 966 967 968 969 970
    pWindowResInfo->curIndex = index;
  } else {
    /*
     * the sqlfunctionCtx parameters should be set done before all functions are invoked,
     * since the selectivity + tag_prj query needs all parameters been set done.
     * tag_prj function are changed to be TSDB_FUNC_TAG_DUMMY
     */
971
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
972
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
973 974 975 976 977
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunction(&pCtx[k]);
      }
    }
  }
978

979 980 981 982
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    if (pQuery->pSelectExpr[i].base.functionId != TSDB_FUNC_ARITHM) {
      continue;
    }
983

984 985
    tfree(sasArray[i].data);
  }
986

987 988 989 990 991 992 993
  tfree(sasArray);
}

static int32_t setGroupResultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, char *pData, int16_t type, int16_t bytes) {
  if (isNull(pData, type)) {  // ignore the null value
    return -1;
  }
994

995
  int32_t GROUPRESULTID = 1;
996

997
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
998

999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009
  int64_t v = -1;
  // not assign result buffer yet, add new result buffer
  switch(type) {
    case TSDB_DATA_TYPE_BOOL:
    case TSDB_DATA_TYPE_TINYINT:  v = GET_INT8_VAL(pData);  break;
    case TSDB_DATA_TYPE_SMALLINT: v = GET_INT16_VAL(pData); break;
    case TSDB_DATA_TYPE_INT:      v = GET_INT32_VAL(pData); break;
    case TSDB_DATA_TYPE_BIGINT:   v = GET_INT64_VAL(pData); break;
  }

//  assert(pRuntimeEnv->windowResInfo.hashList->size <= 2);
1010
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, &pRuntimeEnv->windowResInfo, pData, bytes, true);
1011 1012 1013
  if (pWindowRes == NULL) {
    return -1;
  }
1014

1015 1016 1017
  pWindowRes->window.skey = v;
  pWindowRes->window.ekey = v;

1018 1019 1020 1021 1022 1023
  if (pWindowRes->pos.pageId == -1) {
    int32_t ret = addNewWindowResultBuf(pWindowRes, pResultBuf, GROUPRESULTID, pRuntimeEnv->numOfRowsPerPage);
    if (ret != 0) {
      return -1;
    }
  }
1024

1025 1026 1027 1028 1029
  setWindowResOutputBuf(pRuntimeEnv, pWindowRes);
  initCtxOutputBuf(pRuntimeEnv);
  return TSDB_CODE_SUCCESS;
}

1030
static char *getGroupbyColumnData(SQuery *pQuery, int16_t *type, int16_t *bytes, SArray* pDataBlock) {
1031
  SSqlGroupbyExpr *pGroupbyExpr = pQuery->pGroupbyExpr;
1032

1033
  for (int32_t k = 0; k < pGroupbyExpr->numOfGroupCols; ++k) {
1034 1035
    SColIndex* pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, k);
    if (pColIndex->flag == TSDB_COL_TAG) {
1036 1037
      continue;
    }
1038

1039
    int16_t colIndex = -1;
1040
    int32_t colId = pColIndex->colId;
1041

1042
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
1043
      if (pQuery->colList[i].colId == colId) {
1044 1045 1046 1047
        colIndex = i;
        break;
      }
    }
1048

1049
    assert(colIndex >= 0 && colIndex < pQuery->numOfCols);
1050

1051 1052
    *type = pQuery->colList[colIndex].type;
    *bytes = pQuery->colList[colIndex].bytes;
1053 1054 1055 1056 1057 1058
    /*
     *  the colIndex is acquired from the first meter of all qualified meters in this vnode during query prepare
     * stage, the remain meter may not have the required column in cache actually. So, the validation of required
     * column in cache with the corresponding meter schema is reinforced.
     */
    int32_t numOfCols = taosArrayGetSize(pDataBlock);
1059

1060 1061 1062 1063 1064 1065
    for (int32_t i = 0; i < numOfCols; ++i) {
      SColumnInfoData *p = taosArrayGet(pDataBlock, i);
      if (pColIndex->colId == p->info.colId) {
        return p->pData;
      }
    }
1066
  }
1067

1068
  return NULL;
1069 1070 1071 1072
}

static int32_t doTSJoinFilter(SQueryRuntimeEnv *pRuntimeEnv, int32_t offset) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
1073

1074 1075
  STSElem         elem = tsBufGetElem(pRuntimeEnv->pTSBuf);
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1076

1077 1078 1079 1080
  // compare tag first
  if (pCtx[0].tag.i64Key != elem.tag) {
    return TS_JOIN_TAG_NOT_EQUALS;
  }
1081

1082 1083 1084
  TSKEY key = *(TSKEY *)(pCtx[0].aInputElemBuf + TSDB_KEYSIZE * offset);

#if defined(_DEBUG_VIEW)
1085 1086
  printf("elem in comp ts file:%" PRId64 ", key:%" PRId64 ", tag:%"PRIu64", query order:%d, ts order:%d, traverse:%d, index:%d\n",
         elem.ts, key, elem.tag, pQuery->order.order, pRuntimeEnv->pTSBuf->tsOrder,
1087 1088
         pRuntimeEnv->pTSBuf->cur.order, pRuntimeEnv->pTSBuf->cur.tsIndex);
#endif
1089

1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (key < elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key > elem.ts) {
      assert(false);
    }
  } else {
    if (key > elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key < elem.ts) {
      assert(false);
    }
  }
1103

1104 1105 1106 1107 1108
  return TS_JOIN_TS_EQUAL;
}

static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId) {
  SResultInfo *pResInfo = GET_RES_INFO(pCtx);
H
hjxilinx 已提交
1109
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
1110 1111 1112 1113 1114

  // in case of timestamp column, always generated results.
  if (functionId == TSDB_FUNC_TS) {
    return true;
  }
H
hjxilinx 已提交
1115
  
1116 1117 1118
  if (pResInfo->complete || functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TS_DUMMY) {
    return false;
  }
1119

1120
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_FIRST) {
H
hjxilinx 已提交
1121 1122
    return QUERY_IS_ASC_QUERY(pQuery);
  }
1123 1124 1125 1126 1127 1128

  // todo add comments
  if ((functionId == TSDB_FUNC_LAST_DST || functionId == TSDB_FUNC_LAST)) {
    return pCtx->param[0].i64Key == pQuery->order.order;
  }

1129
  // in the supplementary scan, only the following functions need to be executed
H
Haojun Liao 已提交
1130
  if (IS_REVERSE_SCAN(pRuntimeEnv)) {
1131 1132
    return false;
  }
1133

1134 1135 1136
  return true;
}

1137 1138
static void rowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis, SDataBlockInfo *pDataBlockInfo,
    SWindowResInfo *pWindowResInfo, SArray *pDataBlock) {
1139
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1140
  bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);
1141

1142
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1143
  STableQueryInfo* item = pQuery->current;
H
Haojun Liao 已提交
1144 1145 1146 1147

  SColumnInfoData* pColumnInfoData = (SColumnInfoData *)taosArrayGet(pDataBlock, 0);

  TSKEY  *tsCols = (pColumnInfoData->info.type == TSDB_DATA_TYPE_TIMESTAMP)? (TSKEY*) pColumnInfoData->pData:NULL;
H
Haojun Liao 已提交
1148 1149
  bool    groupbyColumnValue = pRuntimeEnv->groupbyNormalCol;

1150
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
H
Haojun Liao 已提交
1151
  if (sasArray == NULL) {
H
Haojun Liao 已提交
1152
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
H
Haojun Liao 已提交
1153 1154
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }
1155

1156 1157
  int16_t type = 0;
  int16_t bytes = 0;
1158

1159
  char *groupbyColumnData = NULL;
H
Haojun Liao 已提交
1160
  if (groupbyColumnValue) {
1161
    groupbyColumnData = getGroupbyColumnData(pQuery, &type, &bytes, pDataBlock);
1162
  }
1163

1164
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
1165
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
1166
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k);
1167
  }
1168

1169 1170
  // set the input column data
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
1171
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
H
hjxilinx 已提交
1172 1173
    pFilterInfo->pData = getDataBlockImpl(pDataBlock, pFilterInfo->info.colId);
    assert(pFilterInfo->pData != NULL);
1174
  }
1175

1176
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1177

1178 1179 1180
  // from top to bottom in desc
  // from bottom to top in asc order
  if (pRuntimeEnv->pTSBuf != NULL) {
H
Haojun Liao 已提交
1181
    SQInfo *pQInfo = (SQInfo *)GET_QINFO_ADDR(pRuntimeEnv);
1182
    qDebug("QInfo:%p process data rows, numOfRows:%d, query order:%d, ts comp order:%d", pQInfo, pDataBlockInfo->rows,
1183 1184
           pQuery->order.order, pRuntimeEnv->pTSBuf->cur.order);
  }
1185

1186
  int32_t j = 0;
H
hjxilinx 已提交
1187
  int32_t offset = -1;
1188

1189
  for (j = 0; j < pDataBlockInfo->rows; ++j) {
H
hjxilinx 已提交
1190
    offset = GET_COL_DATA_POS(pQuery, j, step);
1191

1192 1193 1194 1195 1196 1197 1198 1199 1200 1201
    if (pRuntimeEnv->pTSBuf != NULL) {
      int32_t r = doTSJoinFilter(pRuntimeEnv, offset);
      if (r == TS_JOIN_TAG_NOT_EQUALS) {
        break;
      } else if (r == TS_JOIN_TS_NOT_EQUALS) {
        continue;
      } else {
        assert(r == TS_JOIN_TS_EQUAL);
      }
    }
1202

1203
    if (pQuery->numOfFilterCols > 0 && (!doFilterData(pQuery, offset))) {
1204 1205
      continue;
    }
1206

1207
    // interval window query
H
Haojun Liao 已提交
1208
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1209
      // decide the time window according to the primary timestamp
1210
      int64_t     ts = tsCols[offset];
1211
      STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
1212

1213 1214
      bool hasTimeWindow = false;
      int32_t ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &win, masterScan, &hasTimeWindow);
1215 1216 1217
      if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
        continue;
      }
1218

1219 1220 1221 1222
      if (!hasTimeWindow) {
        continue;
      }

1223 1224
      SWindowStatus *pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo));
      doRowwiseApplyFunctions(pRuntimeEnv, pStatus, &win, offset);
1225

1226 1227
      STimeWindow nextWin = win;
      int32_t     index = pWindowResInfo->curIndex;
1228

1229
      while (1) {
H
Haojun Liao 已提交
1230
        GET_NEXT_TIMEWINDOW(pQuery, &nextWin);
H
Haojun Liao 已提交
1231
        if (/*pWindowResInfo->startTime > nextWin.skey ||*/
1232
            (nextWin.skey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
H
Haojun Liao 已提交
1233
            (nextWin.skey < pQuery->window.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
1234 1235
          break;
        }
1236

1237 1238 1239
        if (ts < nextWin.skey || ts > nextWin.ekey) {
          break;
        }
1240

1241
        // null data, failed to allocate more memory buffer
H
Haojun Liao 已提交
1242
        hasTimeWindow = false;
1243
        if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &nextWin, masterScan, &hasTimeWindow) != TSDB_CODE_SUCCESS) {
1244 1245
          break;
        }
1246

1247 1248 1249 1250
        if (hasTimeWindow) {
          pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo));
          doRowwiseApplyFunctions(pRuntimeEnv, pStatus, &nextWin, offset);
        }
1251
      }
1252

1253 1254 1255
      pWindowResInfo->curIndex = index;
    } else {  // other queries
      // decide which group this rows belongs to according to current state value
H
Haojun Liao 已提交
1256
      if (groupbyColumnValue) {
H
hjxilinx 已提交
1257
        char *val = groupbyColumnData + bytes * offset;
1258

H
hjxilinx 已提交
1259
        int32_t ret = setGroupResultOutputBuf(pRuntimeEnv, val, type, bytes);
1260 1261 1262 1263
        if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
          continue;
        }
      }
1264

1265
      for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
1266
        int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
1267 1268 1269 1270 1271
        if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
          aAggs[functionId].xFunctionF(&pCtx[k], offset);
        }
      }
    }
1272

1273 1274 1275
    if (pRuntimeEnv->pTSBuf != NULL) {
      // if timestamp filter list is empty, quit current query
      if (!tsBufNextPos(pRuntimeEnv->pTSBuf)) {
H
hjxilinx 已提交
1276
        setQueryStatus(pQuery, QUERY_COMPLETED);
1277 1278 1279 1280
        break;
      }
    }
  }
H
Haojun Liao 已提交
1281 1282 1283 1284 1285 1286 1287 1288

  assert(offset >= 0);
  if (tsCols != NULL) {
    item->lastKey = tsCols[offset] + step;
  } else {
    item->lastKey = (QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->window.ekey:pDataBlockInfo->window.skey) + step;
  }

1289 1290 1291 1292 1293
  // todo refactor: extract method
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    if (pQuery->pSelectExpr[i].base.functionId != TSDB_FUNC_ARITHM) {
      continue;
    }
1294

1295 1296
    tfree(sasArray[i].data);
  }
1297

1298 1299 1300 1301
  free(sasArray);
}

static int32_t tableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo,
H
hjxilinx 已提交
1302
                                          SDataStatis *pStatis, __block_search_fn_t searchFn, SArray *pDataBlock) {
H
hjxilinx 已提交
1303
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1304 1305 1306
  
  STableQueryInfo* pTableQInfo = pQuery->current;
  SWindowResInfo*  pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
hjxilinx 已提交
1307
  
H
Haojun Liao 已提交
1308
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->groupbyNormalCol) {
1309
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
1310
  } else {
1311
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
1312
  }
1313

1314
  // update the lastkey of current table
1315
  TSKEY lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pDataBlockInfo->window.ekey : pDataBlockInfo->window.skey;
H
hjxilinx 已提交
1316
  pTableQInfo->lastKey = lastKey + GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1317

1318
  // interval query with limit applied
1319
  int32_t numOfRes = 0;
H
Haojun Liao 已提交
1320
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1321 1322 1323
    numOfRes = doCheckQueryCompleted(pRuntimeEnv, lastKey, pWindowResInfo);
  } else {
    numOfRes = getNumOfResult(pRuntimeEnv);
1324

1325 1326 1327 1328
    // update the number of output result
    if (numOfRes > 0 && pQuery->checkBuffer == 1) {
      assert(numOfRes >= pQuery->rec.rows);
      pQuery->rec.rows = numOfRes;
1329

1330 1331 1332
      if (numOfRes >= pQuery->rec.threshold) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
      }
1333

1334 1335 1336
      if ((pQuery->limit.limit >= 0) && (pQuery->limit.limit + pQuery->limit.offset) <= numOfRes) {
        setQueryStatus(pQuery, QUERY_COMPLETED);
      }
H
Haojun Liao 已提交
1337
    }
1338
  }
1339

1340
  return numOfRes;
1341 1342
}

H
Haojun Liao 已提交
1343
void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
1344 1345 1346 1347 1348 1349
                   SDataStatis *pStatis, void *param, int32_t colIndex) {
  
  int32_t functionId = pQuery->pSelectExpr[colIndex].base.functionId;
  int32_t colId = pQuery->pSelectExpr[colIndex].base.colInfo.colId;
  
  SDataStatis *tpField = NULL;
H
Haojun Liao 已提交
1350
  pCtx->hasNull = hasNullValue(&pQuery->pSelectExpr[colIndex].base.colInfo, pStatis, &tpField);
1351
  pCtx->aInputElemBuf = inputData;
1352

1353
  if (tpField != NULL) {
H
Haojun Liao 已提交
1354
    pCtx->preAggVals.isSet  = true;
1355 1356
    pCtx->preAggVals.statis = *tpField;
    assert(pCtx->preAggVals.statis.numOfNull <= pBlockInfo->rows);
1357 1358 1359
  } else {
    pCtx->preAggVals.isSet = false;
  }
1360

H
Haojun Liao 已提交
1361 1362
  pCtx->preAggVals.dataBlockLoaded = (inputData != NULL);

H
Haojun Liao 已提交
1363 1364 1365
  // limit/offset query will affect this value
  pCtx->startOffset = QUERY_IS_ASC_QUERY(pQuery) ? pQuery->pos:0;
  pCtx->size = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->rows - pQuery->pos : pQuery->pos + 1;
1366

1367 1368
  uint32_t status = aAggs[functionId].nStatus;
  if (((status & (TSDB_FUNCSTATE_SELECTIVITY | TSDB_FUNCSTATE_NEED_TS)) != 0) && (tsCol != NULL)) {
H
Haojun Liao 已提交
1369
    pCtx->ptsList = tsCol;
1370
  }
1371

1372 1373 1374 1375 1376
  if (functionId >= TSDB_FUNC_FIRST_DST && functionId <= TSDB_FUNC_LAST_DST) {
    // last_dist or first_dist function
    // store the first&last timestamp into the intermediate buffer [1], the true
    // value may be null but timestamp will never be null
  } else if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_TWA ||
1377
             functionId == TSDB_FUNC_DIFF || (functionId >= TSDB_FUNC_RATE && functionId <= TSDB_FUNC_AVG_IRATE)) {
1378
    /*
H
Haojun Liao 已提交
1379
     * least squares function needs two columns of input, currently, the x value of linear equation is set to
1380 1381 1382 1383 1384 1385 1386 1387 1388 1389
     * timestamp column, and the y-value is the column specified in pQuery->pSelectExpr[i].colIdxInBuffer
     *
     * top/bottom function needs timestamp to indicate when the
     * top/bottom values emerge, so does diff function
     */
    if (functionId == TSDB_FUNC_TWA) {
      STwaInfo *pTWAInfo = GET_RES_INFO(pCtx)->interResultBuf;
      pTWAInfo->SKey = pQuery->window.skey;
      pTWAInfo->EKey = pQuery->window.ekey;
    }
1390

1391 1392
  } else if (functionId == TSDB_FUNC_ARITHM) {
    pCtx->param[1].pz = param;
H
Haojun Liao 已提交
1393 1394 1395 1396 1397 1398
  } else if (functionId == TSDB_FUNC_SPREAD) {  // set the statistics data for primary time stamp column
    if (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
      pCtx->preAggVals.isSet  = true;
      pCtx->preAggVals.statis.min = pBlockInfo->window.skey;
      pCtx->preAggVals.statis.max = pBlockInfo->window.ekey;
    }
1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411
  } else if (functionId == TSDB_FUNC_INTERP) {
    SInterpInfoDetail *pInterpInfo = GET_RES_INFO(pCtx)->interResultBuf;
    pInterpInfo->type = pQuery->fillType;
    pInterpInfo->ts = pQuery->window.skey;
    pInterpInfo->primaryCol = (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX);
  
    if (pQuery->fillVal != NULL) {
      if (isNull((const char*) &pQuery->fillVal[colIndex], pCtx->inputType)) {
        pCtx->param[1].nType = TSDB_DATA_TYPE_NULL;
      } else { // todo refactor, tVariantCreateFromBinary should handle the NULL value
        tVariantCreateFromBinary(&pCtx->param[1], (char*) &pQuery->fillVal[colIndex], pCtx->inputBytes, pCtx->inputType);
      }
    }
1412
  }
1413

1414 1415 1416 1417 1418 1419
#if defined(_DEBUG_VIEW)
  //  int64_t *tsList = (int64_t *)primaryColumnData;
//  int64_t  s = tsList[0];
//  int64_t  e = tsList[size - 1];

//    if (IS_DATA_BLOCK_LOADED(blockStatus)) {
1420
//        qDebug("QInfo:%p query ts:%lld-%lld, offset:%d, rows:%d, bstatus:%d,
1421 1422 1423
//        functId:%d", GET_QINFO_ADDR(pQuery),
//               s, e, startOffset, size, blockStatus, functionId);
//    } else {
1424
//        qDebug("QInfo:%p block not loaded, bstatus:%d",
1425 1426 1427 1428 1429 1430
//        GET_QINFO_ADDR(pQuery), blockStatus);
//    }
#endif
}

// set the output buffer for the selectivity + tag query
H
Haojun Liao 已提交
1431 1432 1433
static void setCtxTagColumnInfo(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

1434
  if (isSelectivityWithTagsQuery(pQuery)) {
1435
    int32_t num = 0;
1436
    int16_t tagLen = 0;
1437 1438
    
    SQLFunctionCtx *p = NULL;
1439
    SQLFunctionCtx **pTagCtx = calloc(pQuery->numOfOutput, POINTER_BYTES);
H
Haojun Liao 已提交
1440

1441
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1442
      SSqlFuncMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].base;
1443
      
1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456
      if (pSqlFuncMsg->functionId == TSDB_FUNC_TAG_DUMMY || pSqlFuncMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        tagLen += pCtx[i].outputBytes;
        pTagCtx[num++] = &pCtx[i];
      } else if ((aAggs[pSqlFuncMsg->functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
        p = &pCtx[i];
      } else if (pSqlFuncMsg->functionId == TSDB_FUNC_TS || pSqlFuncMsg->functionId == TSDB_FUNC_TAG) {
        // tag function may be the group by tag column
        // ts may be the required primary timestamp column
        continue;
      } else {
        // the column may be the normal column, group by normal_column, the functionId is TSDB_FUNC_PRJ
      }
    }
dengyihao's avatar
dengyihao 已提交
1457 1458 1459 1460 1461 1462 1463
    if (p != NULL) {
      p->tagInfo.pTagCtxList = pTagCtx;
      p->tagInfo.numOfTagCols = num;
      p->tagInfo.tagsLen = tagLen;
    } else {
      tfree(pTagCtx); 
    }
1464 1465 1466
  }
}

H
Haojun Liao 已提交
1467 1468
static FORCE_INLINE void setWindowResultInfo(SResultInfo *pResultInfo, SQuery *pQuery, bool isStableQuery, char* buf) {
  char* p = buf;
1469
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1470 1471 1472 1473
    int32_t size = pQuery->pSelectExpr[i].interBytes;
    setResultInfoBuf(&pResultInfo[i], size, isStableQuery, p);

    p += size;
1474 1475 1476
  }
}

1477
static int32_t setupQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv, int16_t order) {
1478
  qDebug("QInfo:%p setup runtime env", GET_QINFO_ADDR(pRuntimeEnv));
1479 1480
  SQuery *pQuery = pRuntimeEnv->pQuery;

1481 1482
  pRuntimeEnv->resultInfo = calloc(pQuery->numOfOutput, sizeof(SResultInfo));
  pRuntimeEnv->pCtx = (SQLFunctionCtx *)calloc(pQuery->numOfOutput, sizeof(SQLFunctionCtx));
1483

1484
  if (pRuntimeEnv->resultInfo == NULL || pRuntimeEnv->pCtx == NULL) {
1485
    goto _clean;
1486
  }
1487

1488
  pRuntimeEnv->offset[0] = 0;
1489
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1490
    SSqlFuncMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].base;
1491

1492
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1493
    SColIndex* pIndex = &pSqlFuncMsg->colInfo;
1494

1495 1496
    int32_t index = pSqlFuncMsg->colInfo.colIndex;
    if (TSDB_COL_IS_TAG(pIndex->flag)) {
1497
      if (pIndex->colId == TSDB_TBNAME_COLUMN_INDEX) {  // todo refactor
H
Haojun Liao 已提交
1498 1499 1500 1501
        SSchema s = tGetTableNameColumnSchema();

        pCtx->inputBytes = s.bytes;
        pCtx->inputType = s.type;
1502 1503 1504 1505
      } else {
        pCtx->inputBytes = pQuery->tagColList[index].bytes;
        pCtx->inputType = pQuery->tagColList[index].type;
      }
1506
      
1507 1508 1509 1510
    } else {
      pCtx->inputBytes = pQuery->colList[index].bytes;
      pCtx->inputType = pQuery->colList[index].type;
    }
1511
  
1512
    assert(isValidDataType(pCtx->inputType));
1513
    pCtx->ptsOutputBuf = NULL;
1514

1515 1516
    pCtx->outputBytes = pQuery->pSelectExpr[i].bytes;
    pCtx->outputType = pQuery->pSelectExpr[i].type;
1517

1518 1519
    pCtx->order = pQuery->order.order;
    pCtx->functionId = pSqlFuncMsg->functionId;
1520

1521 1522 1523 1524 1525 1526 1527 1528 1529 1530
    pCtx->numOfParams = pSqlFuncMsg->numOfParams;
    for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
      int16_t type = pSqlFuncMsg->arg[j].argType;
      int16_t bytes = pSqlFuncMsg->arg[j].argBytes;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        tVariantCreateFromBinary(&pCtx->param[j], pSqlFuncMsg->arg->argValue.pz, bytes, type);
      } else {
        tVariantCreateFromBinary(&pCtx->param[j], (char *)&pSqlFuncMsg->arg[j].argValue.i64, bytes, type);
      }
    }
1531

1532 1533
    // set the order information for top/bottom query
    int32_t functionId = pCtx->functionId;
1534

1535
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
1536
      int32_t f = pQuery->pSelectExpr[0].base.functionId;
1537
      assert(f == TSDB_FUNC_TS || f == TSDB_FUNC_TS_DUMMY);
1538

1539 1540 1541 1542
      pCtx->param[2].i64Key = order;
      pCtx->param[2].nType = TSDB_DATA_TYPE_BIGINT;
      pCtx->param[3].i64Key = functionId;
      pCtx->param[3].nType = TSDB_DATA_TYPE_BIGINT;
1543

1544 1545
      pCtx->param[1].i64Key = pQuery->order.orderColId;
    }
1546

1547 1548 1549 1550
    if (i > 0) {
      pRuntimeEnv->offset[i] = pRuntimeEnv->offset[i - 1] + pRuntimeEnv->pCtx[i - 1].outputBytes;
    }
  }
1551

H
Haojun Liao 已提交
1552 1553
  char* buf = calloc(1, pRuntimeEnv->interBufSize);

1554
  // set the intermediate result output buffer
H
Haojun Liao 已提交
1555
  setWindowResultInfo(pRuntimeEnv->resultInfo, pQuery, pRuntimeEnv->stableQuery, buf);
1556

1557
  // if it is group by normal column, do not set output buffer, the output buffer is pResult
1558
  if (!isGroupbyNormalCol(pQuery->pGroupbyExpr) && !pRuntimeEnv->stableQuery) {
1559 1560
    resetCtxOutputBuf(pRuntimeEnv);
  }
1561

H
Haojun Liao 已提交
1562
  setCtxTagColumnInfo(pRuntimeEnv, pRuntimeEnv->pCtx);
1563
  return TSDB_CODE_SUCCESS;
1564

1565
_clean:
1566 1567
  tfree(pRuntimeEnv->resultInfo);
  tfree(pRuntimeEnv->pCtx);
1568

1569
  return TSDB_CODE_QRY_OUT_OF_MEMORY;
1570 1571 1572 1573 1574 1575
}

static void teardownQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv) {
  if (pRuntimeEnv->pQuery == NULL) {
    return;
  }
1576

1577
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1578
  SQInfo* pQInfo = (SQInfo*) GET_QINFO_ADDR(pRuntimeEnv);
1579

1580
  qDebug("QInfo:%p teardown runtime env", pQInfo);
1581
  cleanupTimeWindowInfo(&pRuntimeEnv->windowResInfo, pQuery->numOfOutput);
1582

1583
  if (pRuntimeEnv->pCtx != NULL) {
1584
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1585
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1586

1587 1588 1589
      for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
        tVariantDestroy(&pCtx->param[j]);
      }
1590

1591 1592 1593
      tVariantDestroy(&pCtx->tag);
      tfree(pCtx->tagInfo.pTagCtxList);
    }
1594

H
Haojun Liao 已提交
1595
    tfree(pRuntimeEnv->resultInfo[0].interResultBuf);
1596 1597 1598
    tfree(pRuntimeEnv->resultInfo);
    tfree(pRuntimeEnv->pCtx);
  }
1599

H
Haojun Liao 已提交
1600
  pRuntimeEnv->pFillInfo = taosDestoryFillInfo(pRuntimeEnv->pFillInfo);
1601

H
hjxilinx 已提交
1602
  destroyResultBuf(pRuntimeEnv->pResultBuf, pQInfo);
1603
  tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
1604
  tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
1605

H
Haojun Liao 已提交
1606
  pRuntimeEnv->pTSBuf = tsBufDestroy(pRuntimeEnv->pTSBuf);
1607 1608
}

H
Haojun Liao 已提交
1609
#define IS_QUERY_KILLED(_q) ((_q)->code == TSDB_CODE_TSC_QUERY_CANCELLED)
1610

1611
static void setQueryKilled(SQInfo *pQInfo) { pQInfo->code = TSDB_CODE_TSC_QUERY_CANCELLED; }
H
hjxilinx 已提交
1612

H
Haojun Liao 已提交
1613 1614 1615
static bool isFixedOutputQuery(SQueryRuntimeEnv* pRuntimeEnv) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1616 1617
    return false;
  }
1618

1619
  // Note:top/bottom query is fixed output query
H
Haojun Liao 已提交
1620
  if (pRuntimeEnv->topBotQuery || pRuntimeEnv->groupbyNormalCol) {
1621 1622
    return true;
  }
1623

1624
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1625
    SSqlFuncMsg *pExprMsg = &pQuery->pSelectExpr[i].base;
1626

1627 1628
    // ignore the ts_comp function
    if (i == 0 && pExprMsg->functionId == TSDB_FUNC_PRJ && pExprMsg->numOfParams == 1 &&
1629
        pExprMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
1630 1631
      continue;
    }
1632

1633 1634 1635
    if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
1636

1637 1638 1639 1640
    if (!IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus)) {
      return true;
    }
  }
1641

1642 1643 1644
  return false;
}

1645
// todo refactor with isLastRowQuery
H
hjxilinx 已提交
1646
static bool isPointInterpoQuery(SQuery *pQuery) {
1647
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1648
    int32_t functionID = pQuery->pSelectExpr[i].base.functionId;
1649
    if (functionID == TSDB_FUNC_INTERP) {
1650 1651 1652
      return true;
    }
  }
1653

1654 1655 1656 1657
  return false;
}

// TODO REFACTOR:MERGE WITH CLIENT-SIDE FUNCTION
H
hjxilinx 已提交
1658
static bool isSumAvgRateQuery(SQuery *pQuery) {
1659
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1660
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1661 1662 1663
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
1664

1665 1666 1667 1668 1669
    if (functionId == TSDB_FUNC_SUM_RATE || functionId == TSDB_FUNC_SUM_IRATE || functionId == TSDB_FUNC_AVG_RATE ||
        functionId == TSDB_FUNC_AVG_IRATE) {
      return true;
    }
  }
1670

1671 1672 1673
  return false;
}

H
hjxilinx 已提交
1674
static bool isFirstLastRowQuery(SQuery *pQuery) {
1675
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1676
    int32_t functionID = pQuery->pSelectExpr[i].base.functionId;
1677 1678 1679 1680
    if (functionID == TSDB_FUNC_LAST_ROW) {
      return true;
    }
  }
1681

1682 1683 1684
  return false;
}

H
hjxilinx 已提交
1685
static bool needReverseScan(SQuery *pQuery) {
1686
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1687
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1688 1689 1690
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG) {
      continue;
    }
1691

1692
    if ((functionId == TSDB_FUNC_FIRST || functionId == TSDB_FUNC_FIRST_DST) && !QUERY_IS_ASC_QUERY(pQuery)) {
1693 1694
      return true;
    }
1695 1696 1697 1698 1699

    if (functionId == TSDB_FUNC_LAST || functionId == TSDB_FUNC_LAST_DST) {
      int32_t order = pQuery->pSelectExpr[i].base.arg->argValue.i64;
      return order != pQuery->order.order;
    }
1700
  }
1701

1702 1703
  return false;
}
H
hjxilinx 已提交
1704 1705 1706

static bool onlyQueryTags(SQuery* pQuery) {
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1707 1708 1709 1710 1711
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];

    int32_t functionId = pExprInfo->base.functionId;
    if (functionId != TSDB_FUNC_TAGPRJ && functionId != TSDB_FUNC_TID_TAG &&
        (!(functionId == TSDB_FUNC_COUNT && pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX))) {
H
hjxilinx 已提交
1712 1713 1714
      return false;
    }
  }
1715

H
hjxilinx 已提交
1716 1717 1718
  return true;
}

1719 1720
/////////////////////////////////////////////////////////////////////////////////////////////

H
Haojun Liao 已提交
1721
void getAlignQueryTimeWindow(SQuery *pQuery, int64_t key, int64_t keyFirst, int64_t keyLast, STimeWindow *win) {
1722
  assert(key >= keyFirst && key <= keyLast && pQuery->slidingTime <= pQuery->intervalTime);
H
Haojun Liao 已提交
1723
  win->skey = taosGetIntervalStartTimestamp(key, pQuery->slidingTime, pQuery->intervalTime, pQuery->slidingTimeUnit, pQuery->precision);
H
Haojun Liao 已提交
1724 1725 1726 1727 1728

  /*
   * if the realSkey > INT64_MAX - pQuery->intervalTime, the query duration between
   * realSkey and realEkey must be less than one interval.Therefore, no need to adjust the query ranges.
   */
1729 1730 1731 1732
  if (keyFirst > (INT64_MAX - pQuery->intervalTime)) {
    assert(keyLast - keyFirst < pQuery->intervalTime);
    win->ekey = INT64_MAX;
    return;
H
Haojun Liao 已提交
1733 1734
  } else {
    win->ekey = win->skey + pQuery->intervalTime - 1;
1735 1736 1737 1738 1739
  }
}

static void setScanLimitationByResultBuffer(SQuery *pQuery) {
  if (isTopBottomQuery(pQuery)) {
1740
    pQuery->checkBuffer = 0;
1741
  } else if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
1742
    pQuery->checkBuffer = 0;
1743 1744
  } else {
    bool hasMultioutput = false;
1745
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1746
      SSqlFuncMsg *pExprMsg = &pQuery->pSelectExpr[i].base;
1747 1748 1749
      if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        continue;
      }
1750

1751 1752 1753 1754 1755
      hasMultioutput = IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus);
      if (!hasMultioutput) {
        break;
      }
    }
1756

1757
    pQuery->checkBuffer = hasMultioutput ? 1 : 0;
1758 1759 1760 1761 1762 1763
  }
}

/*
 * todo add more parameters to check soon..
 */
1764
bool colIdCheck(SQuery *pQuery) {
1765 1766
  // load data column information is incorrect
  for (int32_t i = 0; i < pQuery->numOfCols - 1; ++i) {
1767
    if (pQuery->colList[i].colId == pQuery->colList[i + 1].colId) {
S
slguan 已提交
1768
      qError("QInfo:%p invalid data load column for query", GET_QINFO_ADDR(pQuery));
1769 1770 1771
      return false;
    }
  }
1772
  
1773 1774 1775 1776 1777 1778
  return true;
}

// todo ignore the avg/sum/min/max/count/stddev/top/bottom functions, of which
// the scan order is not matter
static bool onlyOneQueryType(SQuery *pQuery, int32_t functId, int32_t functIdDst) {
1779
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1780
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1781

1782 1783 1784 1785
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG ||
        functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
1786

1787 1788 1789 1790
    if (functionId != functId && functionId != functIdDst) {
      return false;
    }
  }
1791

1792 1793 1794 1795 1796 1797 1798
  return true;
}

static bool onlyFirstQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_FIRST, TSDB_FUNC_FIRST_DST); }

static bool onlyLastQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_LAST, TSDB_FUNC_LAST_DST); }

H
Haojun Liao 已提交
1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812
// todo refactor, add iterator
static void doExchangeTimeWindow(SQInfo* pQInfo) {
  size_t t = GET_NUM_OF_TABLEGROUP(pQInfo);
  for(int32_t i = 0; i < t; ++i) {
    SArray* p1 = GET_TABLEGROUP(pQInfo, i);

    size_t len = taosArrayGetSize(p1);
    for(int32_t j = 0; j < len; ++j) {
      STableQueryInfo* pTableQueryInfo = (STableQueryInfo*) taosArrayGetP(p1, j);
      SWAP(pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, TSKEY);
    }
  }
}

H
Haojun Liao 已提交
1813 1814 1815
static void changeExecuteScanOrder(SQInfo *pQInfo, bool stableQuery) {
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;

1816 1817 1818
  // in case of point-interpolation query, use asc order scan
  char msg[] = "QInfo:%p scan order changed for %s query, old:%d, new:%d, qrange exchanged, old qrange:%" PRId64
               "-%" PRId64 ", new qrange:%" PRId64 "-%" PRId64;
1819

1820 1821 1822
  // todo handle the case the the order irrelevant query type mixed up with order critical query type
  // descending order query for last_row query
  if (isFirstLastRowQuery(pQuery)) {
1823
    qDebug("QInfo:%p scan order changed for last_row query, old:%d, new:%d", GET_QINFO_ADDR(pQuery),
1824
           pQuery->order.order, TSDB_ORDER_DESC);
1825

1826
    pQuery->order.order = TSDB_ORDER_DESC;
1827

1828 1829
    int64_t skey = MIN(pQuery->window.skey, pQuery->window.ekey);
    int64_t ekey = MAX(pQuery->window.skey, pQuery->window.ekey);
1830

1831 1832
    pQuery->window.skey = ekey;
    pQuery->window.ekey = skey;
1833

1834 1835
    return;
  }
1836

1837 1838
  if (isPointInterpoQuery(pQuery) && pQuery->intervalTime == 0) {
    if (!QUERY_IS_ASC_QUERY(pQuery)) {
1839
      qDebug(msg, GET_QINFO_ADDR(pQuery), "interp", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
1840
             pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);
1841 1842
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
1843

1844
    pQuery->order.order = TSDB_ORDER_ASC;
1845 1846
    return;
  }
1847

1848 1849 1850
  if (pQuery->intervalTime == 0) {
    if (onlyFirstQuery(pQuery)) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
1851
        qDebug(msg, GET_QINFO_ADDR(pQuery), "only-first", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
1852 1853
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1854
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
H
Haojun Liao 已提交
1855
        doExchangeTimeWindow(pQInfo);
1856
      }
1857

1858
      pQuery->order.order = TSDB_ORDER_ASC;
1859 1860
    } else if (onlyLastQuery(pQuery)) {
      if (QUERY_IS_ASC_QUERY(pQuery)) {
1861
        qDebug(msg, GET_QINFO_ADDR(pQuery), "only-last", pQuery->order.order, TSDB_ORDER_DESC, pQuery->window.skey,
1862 1863
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1864
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
H
Haojun Liao 已提交
1865
        doExchangeTimeWindow(pQInfo);
1866
      }
1867

1868
      pQuery->order.order = TSDB_ORDER_DESC;
1869
    }
1870

1871
  } else {  // interval query
1872
    if (stableQuery) {
1873 1874
      if (onlyFirstQuery(pQuery)) {
        if (!QUERY_IS_ASC_QUERY(pQuery)) {
1875
          qDebug(msg, GET_QINFO_ADDR(pQuery), "only-first stable", pQuery->order.order, TSDB_ORDER_ASC,
1876 1877
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1878 1879
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
        }
1880

1881
        pQuery->order.order = TSDB_ORDER_ASC;
1882 1883
      } else if (onlyLastQuery(pQuery)) {
        if (QUERY_IS_ASC_QUERY(pQuery)) {
1884
          qDebug(msg, GET_QINFO_ADDR(pQuery), "only-last stable", pQuery->order.order, TSDB_ORDER_DESC,
1885 1886
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1887 1888
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
        }
1889

1890
        pQuery->order.order = TSDB_ORDER_DESC;
1891 1892 1893 1894 1895 1896 1897 1898
      }
    }
  }
}

static int32_t getInitialPageNum(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  int32_t INITIAL_RESULT_ROWS_VALUE = 16;
1899

1900
  int32_t num = 0;
1901

1902 1903
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
    num = 128;
H
Haojun Liao 已提交
1904
  } else if (QUERY_IS_INTERVAL_QUERY(pQuery)) {  // time window query, allocate one page for each table
1905
    size_t s = pQInfo->tableqinfoGroupInfo.numOfTables;
1906
    num = MAX(s, INITIAL_RESULT_ROWS_VALUE);
1907 1908
  } else {    // for super table query, one page for each subset
    num = 1;  // pQInfo->pSidSet->numOfSubSet;
1909
  }
1910

1911 1912 1913 1914
  assert(num > 0);
  return num;
}

H
Haojun Liao 已提交
1915
#define GET_ROW_PARAM_FOR_MULTIOUTPUT(_q, tbq, sq) (((tbq) && (!sq))? (_q)->pSelectExpr[1].base.arg->argValue.i64:1)
1916

H
Haojun Liao 已提交
1917 1918
static FORCE_INLINE int32_t getNumOfRowsInResultPage(SQuery *pQuery, bool topBotQuery, bool isSTableQuery) {
  int32_t rowSize = pQuery->rowSize * GET_ROW_PARAM_FOR_MULTIOUTPUT(pQuery, topBotQuery, isSTableQuery);
1919
  return (DEFAULT_INTERN_BUF_PAGE_SIZE - sizeof(tFilePage)) / rowSize;
1920 1921 1922 1923
}

char *getPosInResultPage(SQueryRuntimeEnv *pRuntimeEnv, int32_t columnIndex, SWindowResult *pResult) {
  assert(pResult != NULL && pRuntimeEnv != NULL);
1924

H
Haojun Liao 已提交
1925 1926 1927
  SQuery    *pQuery = pRuntimeEnv->pQuery;
  tFilePage *page = GET_RES_BUF_PAGE_BY_ID(pRuntimeEnv->pResultBuf, pResult->pos.pageId);
  int32_t realRowId = pResult->pos.rowId * GET_ROW_PARAM_FOR_MULTIOUTPUT(pQuery, pRuntimeEnv->topBotQuery, pRuntimeEnv->stableQuery);
1928

H
Haojun Liao 已提交
1929
  return ((char *)page->data) + pRuntimeEnv->offset[columnIndex] * pRuntimeEnv->numOfRowsPerPage +
1930
         pQuery->pSelectExpr[columnIndex].bytes * realRowId;
1931 1932
}

H
Haojun Liao 已提交
1933
#define IS_PREFILTER_TYPE(_t) ((_t) != TSDB_DATA_TYPE_BINARY && (_t) != TSDB_DATA_TYPE_NCHAR)
1934

H
Haojun Liao 已提交
1935 1936 1937 1938
static bool needToLoadDataBlock(SQueryRuntimeEnv* pRuntimeEnv, SDataStatis *pDataStatis, SQLFunctionCtx *pCtx,
    int32_t numOfRows) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
  if (pDataStatis == NULL || (pQuery->numOfFilterCols == 0 && (!pRuntimeEnv->topBotQuery))) {
1939 1940 1941 1942 1943
    return true;
  }

  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
1944

H
Haojun Liao 已提交
1945 1946 1947 1948 1949 1950 1951 1952
    int32_t index = -1;
    for(int32_t i = 0; i < pQuery->numOfCols; ++i) {
      if (pDataStatis[i].colId == pFilterInfo->info.colId) {
        index = i;
        break;
      }
    }

H
Haojun Liao 已提交
1953
    // no statistics data
H
Haojun Liao 已提交
1954
    if (index == -1) {
H
Haojun Liao 已提交
1955
      return true;
1956
    }
1957

1958
    // not support pre-filter operation on binary/nchar data type
H
Haojun Liao 已提交
1959
    if (!IS_PREFILTER_TYPE(pFilterInfo->info.type)) {
H
Haojun Liao 已提交
1960
      return true;
1961
    }
1962

1963
    // all points in current column are NULL, no need to check its boundary value
H
Haojun Liao 已提交
1964
    if (pDataStatis[index].numOfNull == numOfRows) {
1965 1966
      continue;
    }
1967

H
Haojun Liao 已提交
1968 1969 1970 1971 1972
    SDataStatis* pDataBlockst = &pDataStatis[index];

    if (pFilterInfo->info.type == TSDB_DATA_TYPE_FLOAT) {
      float minval = *(double *)(&pDataBlockst->min);
      float maxval = *(double *)(&pDataBlockst->max);
1973

1974 1975 1976 1977 1978 1979 1980
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&minval, (char *)&maxval)) {
          return true;
        }
      }
    } else {
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
H
Haojun Liao 已提交
1981
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&pDataBlockst->min, (char *)&pDataBlockst->max)) {
1982 1983 1984 1985 1986
          return true;
        }
      }
    }
  }
1987

H
Haojun Liao 已提交
1988 1989 1990 1991 1992 1993 1994 1995
  if (pRuntimeEnv->topBotQuery) {
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
      int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
      if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
        return topbot_datablock_filter(&pCtx[i], functionId, (char *)&pDataStatis[i].min, (char *)&pDataStatis[i].max);
      }
    }
  }
1996

H
Haojun Liao 已提交
1997
  return false;
1998 1999
}

H
Haojun Liao 已提交
2000
int32_t loadDataBlockOnDemand(SQueryRuntimeEnv *pRuntimeEnv, void* pQueryHandle, SDataBlockInfo* pBlockInfo, SDataStatis **pStatis, SArray** pDataBlock) {
2001
  SQuery *pQuery = pRuntimeEnv->pQuery;
2002

H
Haojun Liao 已提交
2003
  uint32_t status = 0;
2004
  if (pQuery->numOfFilterCols > 0) {
H
Haojun Liao 已提交
2005 2006
    status = BLK_DATA_ALL_NEEDED;
  } else { // check if this data block is required to load
2007
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2008
      SSqlFuncMsg* pSqlFunc = &pQuery->pSelectExpr[i].base;
H
Haojun Liao 已提交
2009

2010 2011
      int32_t functionId = pSqlFunc->functionId;
      int32_t colId = pSqlFunc->colInfo.colId;
H
Haojun Liao 已提交
2012
      status |= aAggs[functionId].dataReqFunc(&pRuntimeEnv->pCtx[i], pBlockInfo->window.skey, pBlockInfo->window.ekey, colId);
2013
    }
2014

H
Haojun Liao 已提交
2015
    if (pRuntimeEnv->pTSBuf > 0 || QUERY_IS_INTERVAL_QUERY(pQuery)) {
H
Haojun Liao 已提交
2016
      status |= BLK_DATA_ALL_NEEDED;
2017 2018
    }
  }
2019

H
Haojun Liao 已提交
2020 2021 2022
  if (status == BLK_DATA_NO_NEEDED) {
    qDebug("QInfo:%p data block discard, brange:%"PRId64 "-%"PRId64", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
           pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
2023
    pRuntimeEnv->summary.discardBlocks += 1;
H
Haojun Liao 已提交
2024
  } else if (status == BLK_DATA_STATIS_NEEDED) {
H
hjxilinx 已提交
2025
    if (tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
2026
      //        return DISK_DATA_LOAD_FAILED;
2027
    }
2028 2029 2030 2031
  
    pRuntimeEnv->summary.loadBlockStatis += 1;
  
    if (*pStatis == NULL) { // data block statistics does not exist, load data block
H
Haojun Liao 已提交
2032
      *pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2033
      pRuntimeEnv->summary.totalCheckedRows += pBlockInfo->rows;
2034 2035
    }
  } else {
H
Haojun Liao 已提交
2036
    assert(status == BLK_DATA_ALL_NEEDED);
2037 2038
  
    // load the data block statistics to perform further filter
H
Haojun Liao 已提交
2039
    pRuntimeEnv->summary.loadBlockStatis += 1;
H
hjxilinx 已提交
2040
    if (tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis) != TSDB_CODE_SUCCESS) {
2041
    }
2042
    
H
Haojun Liao 已提交
2043
    if (!needToLoadDataBlock(pRuntimeEnv, *pStatis, pRuntimeEnv->pCtx, pBlockInfo->rows)) {
2044
#if defined(_DEBUG_VIEW)
2045
      qDebug("QInfo:%p block discarded by per-filter", GET_QINFO_ADDR(pRuntimeEnv));
2046
#endif
2047 2048
      // current block has been discard due to filter applied
      pRuntimeEnv->summary.discardBlocks += 1;
H
Haojun Liao 已提交
2049 2050 2051
      qDebug("QInfo:%p data block discard, brange:%"PRId64 "-%"PRId64", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
          pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
      return BLK_DATA_DISCARD;
2052
    }
2053
  
2054
    pRuntimeEnv->summary.totalCheckedRows += pBlockInfo->rows;
H
Haojun Liao 已提交
2055
    pRuntimeEnv->summary.loadBlocks += 1;
H
Haojun Liao 已提交
2056
    *pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2057
  }
2058

H
Haojun Liao 已提交
2059
  return TSDB_CODE_SUCCESS;
2060 2061
}

H
hjxilinx 已提交
2062
int32_t binarySearchForKey(char *pValue, int num, TSKEY key, int order) {
2063
  int32_t midPos = -1;
H
Haojun Liao 已提交
2064
  int32_t numOfRows;
2065

2066 2067 2068
  if (num <= 0) {
    return -1;
  }
2069

2070
  assert(order == TSDB_ORDER_ASC || order == TSDB_ORDER_DESC);
2071 2072

  TSKEY * keyList = (TSKEY *)pValue;
2073
  int32_t firstPos = 0;
2074
  int32_t lastPos = num - 1;
2075

2076
  if (order == TSDB_ORDER_DESC) {
H
hjxilinx 已提交
2077 2078 2079 2080 2081
    // find the first position which is smaller than the key
    while (1) {
      if (key >= keyList[lastPos]) return lastPos;
      if (key == keyList[firstPos]) return firstPos;
      if (key < keyList[firstPos]) return firstPos - 1;
2082

H
Haojun Liao 已提交
2083 2084
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2085

H
hjxilinx 已提交
2086 2087 2088 2089 2090 2091 2092 2093
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
2094

H
hjxilinx 已提交
2095 2096 2097 2098 2099
  } else {
    // find the first position which is bigger than the key
    while (1) {
      if (key <= keyList[firstPos]) return firstPos;
      if (key == keyList[lastPos]) return lastPos;
2100

H
hjxilinx 已提交
2101 2102 2103 2104 2105 2106 2107
      if (key > keyList[lastPos]) {
        lastPos = lastPos + 1;
        if (lastPos >= num)
          return -1;
        else
          return lastPos;
      }
2108

H
Haojun Liao 已提交
2109 2110
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2111

H
hjxilinx 已提交
2112 2113 2114 2115 2116 2117 2118 2119 2120
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
  }
2121

H
hjxilinx 已提交
2122 2123 2124
  return midPos;
}

2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146
static void ensureOutputBufferSimple(SQueryRuntimeEnv* pRuntimeEnv, int32_t capacity) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  if (capacity < pQuery->rec.capacity) {
    return;
  }

  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    int32_t bytes = pQuery->pSelectExpr[i].bytes;
    assert(bytes > 0 && capacity > 0);

    char *tmp = realloc(pQuery->sdata[i], bytes * capacity + sizeof(tFilePage));
    if (tmp == NULL) {  // todo handle the oom
      assert(0);
    } else {
      pQuery->sdata[i] = (tFilePage *)tmp;
    }

    // set the pCtx output buffer position
    pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data;
  }

2147
  qDebug("QInfo:%p realloc output buffer to inc output buffer from: %" PRId64 " rows to:%d rows", GET_QINFO_ADDR(pRuntimeEnv),
2148 2149 2150 2151 2152
         pQuery->rec.capacity, capacity);

  pQuery->rec.capacity = capacity;
}

2153 2154 2155
static void ensureOutputBuffer(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pBlockInfo) {
  // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
2156
  if (!QUERY_IS_INTERVAL_QUERY(pQuery) && !pRuntimeEnv->groupbyNormalCol && !isFixedOutputQuery(pRuntimeEnv)) {
2157 2158 2159 2160 2161 2162 2163 2164
    SResultRec *pRec = &pQuery->rec;
    
    if (pQuery->rec.capacity - pQuery->rec.rows < pBlockInfo->rows) {
      int32_t remain = pRec->capacity - pRec->rows;
      int32_t newSize = pRec->capacity + (pBlockInfo->rows - remain);
      
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        int32_t bytes = pQuery->pSelectExpr[i].bytes;
H
Haojun Liao 已提交
2165 2166
        assert(bytes > 0 && newSize > 0);

2167 2168 2169 2170
        char *tmp = realloc(pQuery->sdata[i], bytes * newSize + sizeof(tFilePage));
        if (tmp == NULL) {  // todo handle the oom
          assert(0);
        } else {
H
Hongze Cheng 已提交
2171
          memset(tmp + sizeof(tFilePage) + bytes * pRec->rows, 0, (newSize - pRec->rows) * bytes);
2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183
          pQuery->sdata[i] = (tFilePage *)tmp;
        }
        
        // set the pCtx output buffer position
        pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data + pRec->rows * bytes;
        
        int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
        if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
          pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
        }
      }
      
2184
      qDebug("QInfo:%p realloc output buffer, new size: %d rows, old:%" PRId64 ", remain:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv),
2185 2186 2187 2188 2189 2190 2191
             newSize, pRec->capacity, newSize - pRec->rows);
      
      pRec->capacity = newSize;
    }
  }
}

2192 2193
static int64_t doScanAllDataBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
2194
  STableQueryInfo* pTableQueryInfo = pQuery->current;
H
Haojun Liao 已提交
2195
  SQueryCostInfo*  summary  = &pRuntimeEnv->summary;
2196

2197
  qDebug("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", lastkey:%" PRId64 ", order:%d",
H
hjxilinx 已提交
2198 2199
         GET_QINFO_ADDR(pRuntimeEnv), pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, pTableQueryInfo->lastKey,
         pQuery->order.order);
2200

2201
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
H
Haojun Liao 已提交
2202
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
2203

H
Haojun Liao 已提交
2204
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
2205
  while (tsdbNextDataBlock(pQueryHandle)) {
H
Haojun Liao 已提交
2206
    summary->totalBlocks += 1;
H
Haojun Liao 已提交
2207

H
Haojun Liao 已提交
2208
    if (IS_QUERY_KILLED(GET_QINFO_ADDR(pRuntimeEnv))) {
H
Haojun Liao 已提交
2209
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
2210
    }
2211

H
Haojun Liao 已提交
2212
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
2213

2214
    // todo extract methods
H
Haojun Liao 已提交
2215
    if (QUERY_IS_INTERVAL_QUERY(pQuery) && pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL) {
H
Haojun Liao 已提交
2216
      STimeWindow w = TSWINDOW_INITIALIZER;
2217 2218
      SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;

2219
      if (QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
2220
        getAlignQueryTimeWindow(pQuery, blockInfo.window.skey, blockInfo.window.skey, pQuery->window.ekey, &w);
2221 2222 2223 2224
        pWindowResInfo->startTime = w.skey;
        pWindowResInfo->prevSKey = w.skey;
      } else {
        // the start position of the first time window in the endpoint that spreads beyond the queried last timestamp
H
Haojun Liao 已提交
2225
        getAlignQueryTimeWindow(pQuery, blockInfo.window.ekey, pQuery->window.ekey, blockInfo.window.ekey, &w);
2226

H
hjxilinx 已提交
2227
        pWindowResInfo->startTime = pQuery->window.skey;
2228 2229 2230
        pWindowResInfo->prevSKey = w.skey;
      }
    }
2231

H
hjxilinx 已提交
2232
    // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
2233
    ensureOutputBuffer(pRuntimeEnv, &blockInfo);
2234

2235
    SDataStatis *pStatis = NULL;
H
Haojun Liao 已提交
2236 2237 2238 2239 2240
    SArray *pDataBlock   = NULL;
    if (loadDataBlockOnDemand(pRuntimeEnv, pQueryHandle, &blockInfo, &pStatis, &pDataBlock) == BLK_DATA_DISCARD) {
      pQuery->current->lastKey = QUERY_IS_ASC_QUERY(pQuery)? blockInfo.window.ekey + step:blockInfo.window.skey + step;
      continue;
    }
2241

H
Haojun Liao 已提交
2242 2243
    // query start position can not move into tableApplyFunctionsOnBlock due to limit/offset condition
    pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : blockInfo.rows - 1;
H
hjxilinx 已提交
2244
    int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, binarySearchForKey, pDataBlock);
2245

H
Haojun Liao 已提交
2246
    summary->totalRows += blockInfo.rows;
2247
    qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
2248
           blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes, pQuery->current->lastKey);
2249

2250 2251
    // while the output buffer is full or limit/offset is applied, query may be paused here
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL | QUERY_COMPLETED)) {
H
hjxilinx 已提交
2252
      break;
2253 2254
    }
  }
2255

H
hjxilinx 已提交
2256
  // if the result buffer is not full, set the query complete
2257 2258 2259
  if (!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
2260

H
Haojun Liao 已提交
2261
  if (QUERY_IS_INTERVAL_QUERY(pQuery) && IS_MASTER_SCAN(pRuntimeEnv)) {
H
hjxilinx 已提交
2262
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
2263
      closeAllTimeWindow(&pRuntimeEnv->windowResInfo);
H
Haojun Liao 已提交
2264
//      removeRedundantWindow(&pRuntimeEnv->windowResInfo, pTableQueryInfo->lastKey - step, step);
H
hjxilinx 已提交
2265
      pRuntimeEnv->windowResInfo.curIndex = pRuntimeEnv->windowResInfo.size - 1;  // point to the last time window
2266 2267 2268 2269
    } else {
      assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
    }
  }
2270

2271
  return 0;
2272 2273 2274 2275 2276 2277
}

/*
 * set tag value in SQLFunctionCtx
 * e.g.,tag information into input buffer
 */
2278
static void doSetTagValueInParam(void *tsdb, void* pTable, int32_t tagColId, tVariant *tag, int16_t type, int16_t bytes) {
H
[td-90]  
Haojun Liao 已提交
2279
  tVariantDestroy(tag);
2280

2281
  if (tagColId == TSDB_TBNAME_COLUMN_INDEX) {
2282
    char* val = tsdbGetTableName(pTable);
H
[td-90]  
Haojun Liao 已提交
2283 2284 2285
    assert(val != NULL);
    
    tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), TSDB_DATA_TYPE_BINARY);
2286
  } else {
2287
    char* val = tsdbGetTableTagVal(pTable, tagColId, type, bytes);
H
[td-90]  
Haojun Liao 已提交
2288 2289 2290 2291
    if (val == NULL) {
      tag->nType = TSDB_DATA_TYPE_NULL;
      return;
    }
H
hjxilinx 已提交
2292 2293
    
    if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
H
Hongze Cheng 已提交
2294
      if (isNull(val, type)) {
H
Haojun Liao 已提交
2295 2296 2297 2298
        tag->nType = TSDB_DATA_TYPE_NULL;
        return;
      }

H
[td-90]  
Haojun Liao 已提交
2299
      tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), type);
H
hjxilinx 已提交
2300
    } else {
H
Haojun Liao 已提交
2301 2302 2303 2304 2305
      if (isNull(val, type)) {
        tag->nType = TSDB_DATA_TYPE_NULL;
        return;
      }

H
[td-90]  
Haojun Liao 已提交
2306
      tVariantCreateFromBinary(tag, val, bytes, type);
H
hjxilinx 已提交
2307
    }
2308
  }
2309 2310
}

2311
void setTagVal(SQueryRuntimeEnv *pRuntimeEnv, void *pTable, void *tsdb) {
2312
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
2313
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
2314

H
[td-90]  
Haojun Liao 已提交
2315 2316 2317
  SExprInfo *pExprInfo = &pQuery->pSelectExpr[0];
  if (pQuery->numOfOutput == 1 && pExprInfo->base.functionId == TSDB_FUNC_TS_COMP) {
    assert(pExprInfo->base.numOfParams == 1);
H
Haojun Liao 已提交
2318 2319 2320 2321 2322 2323 2324 2325 2326 2327

    // todo refactor extract function.
    int16_t type = -1, bytes = -1;
    for(int32_t i = 0; i < pQuery->numOfTags; ++i) {
      if (pQuery->tagColList[i].colId == pExprInfo->base.arg->argValue.i64) {
        type = pQuery->tagColList[i].type;
        bytes = pQuery->tagColList[i].bytes;
      }
    }

2328
    doSetTagValueInParam(tsdb, pTable, pExprInfo->base.arg->argValue.i64, &pRuntimeEnv->pCtx[0].tag, type, bytes);
2329 2330
  } else {
    // set tag value, by which the results are aggregated.
2331
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
H
Haojun Liao 已提交
2332
      SExprInfo* pLocalExprInfo = &pQuery->pSelectExpr[idx];
H
[td-90]  
Haojun Liao 已提交
2333
  
2334
      // ts_comp column required the tag value for join filter
H
Haojun Liao 已提交
2335
      if (!TSDB_COL_IS_TAG(pLocalExprInfo->base.colInfo.flag)) {
2336 2337
        continue;
      }
2338

2339
      // todo use tag column index to optimize performance
2340
      doSetTagValueInParam(tsdb, pTable, pLocalExprInfo->base.colInfo.colId, &pRuntimeEnv->pCtx[idx].tag,
H
Haojun Liao 已提交
2341
                           pLocalExprInfo->type, pLocalExprInfo->bytes);
2342
    }
2343

2344
    // set the join tag for first column
H
[td-90]  
Haojun Liao 已提交
2345
    SSqlFuncMsg *pFuncMsg = &pExprInfo->base;
H
Haojun Liao 已提交
2346
    if ((pFuncMsg->functionId == TSDB_FUNC_TS || pFuncMsg->functionId == TSDB_FUNC_PRJ) && pFuncMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX &&
2347 2348
        pRuntimeEnv->pTSBuf != NULL) {
      assert(pFuncMsg->numOfParams == 1);
H
Haojun Liao 已提交
2349 2350 2351 2352 2353 2354 2355 2356 2357 2358

      // todo refactor
      int16_t type = -1, bytes = -1;
      for(int32_t i = 0; i < pQuery->numOfTags; ++i) {
        if (pQuery->tagColList[i].colId == pExprInfo->base.arg->argValue.i64) {
          type = pQuery->tagColList[i].type;
          bytes = pQuery->tagColList[i].bytes;
        }
      }

2359
      doSetTagValueInParam(tsdb, pTable, pExprInfo->base.arg->argValue.i64, &pRuntimeEnv->pCtx[0].tag, type, bytes);
2360
      qDebug("QInfo:%p set tag value for join comparison, colId:%" PRId64 ", val:%"PRId64, pQInfo, pExprInfo->base.arg->argValue.i64,
B
Bomin Zhang 已提交
2361
          pRuntimeEnv->pCtx[0].tag.i64Key)
2362 2363 2364 2365 2366 2367 2368
    }
  }
}

static void doMerge(SQueryRuntimeEnv *pRuntimeEnv, int64_t timestamp, SWindowResult *pWindowRes, bool mergeFlag) {
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
2369

2370
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2371
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2372 2373 2374
    if (!mergeFlag) {
      pCtx[i].aOutputBuf = pCtx[i].aOutputBuf + pCtx[i].outputBytes;
      pCtx[i].currentStage = FIRST_STAGE_MERGE;
2375

2376 2377 2378
      resetResultInfo(pCtx[i].resultInfo);
      aAggs[functionId].init(&pCtx[i]);
    }
2379

2380 2381 2382
    pCtx[i].hasNull = true;
    pCtx[i].nStartQueryTimestamp = timestamp;
    pCtx[i].aInputElemBuf = getPosInResultPage(pRuntimeEnv, i, pWindowRes);
2383

2384 2385 2386
    // in case of tag column, the tag information should be extracted from input buffer
    if (functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TAG) {
      tVariantDestroy(&pCtx[i].tag);
2387 2388 2389 2390 2391 2392 2393 2394
  
      int32_t type = pCtx[i].outputType;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        tVariantCreateFromBinary(&pCtx[i].tag, varDataVal(pCtx[i].aInputElemBuf), varDataLen(pCtx[i].aInputElemBuf), type);
      } else {
        tVariantCreateFromBinary(&pCtx[i].tag, pCtx[i].aInputElemBuf, pCtx[i].inputBytes, pCtx[i].inputType);
      }
      
2395 2396
    }
  }
2397

2398
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2399
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2400 2401 2402
    if (functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
2403

2404 2405 2406 2407
    aAggs[functionId].distMergeFunc(&pCtx[i]);
  }
}

2408
static UNUSED_FUNC void printBinaryData(int32_t functionId, char *data, int32_t srcDataType) {
2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_LAST_DST) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_BINARY:
        printf("%" PRId64 ",%s\t", *(TSKEY *)data, (data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int8_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int16_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 ",%" PRId64 "\t", *(TSKEY *)data, *(TSKEY *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int32_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%" PRId64 ",%f\t", *(TSKEY *)data, *(float *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%" PRId64 ",%lf\t", *(TSKEY *)data, *(double *)(data + TSDB_KEYSIZE + 1));
        break;
    }
  } else if (functionId == TSDB_FUNC_AVG) {
    printf("%lf,%d\t", *(double *)data, *(int32_t *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_SPREAD) {
    printf("%lf,%lf\t", *(double *)data, *(double *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_TWA) {
    data += 1;
    printf("%lf,%" PRId64 ",%" PRId64 ",%" PRId64 "\t", *(double *)data, *(int64_t *)(data + 8),
           *(int64_t *)(data + 16), *(int64_t *)(data + 24));
  } else if (functionId == TSDB_FUNC_MIN || functionId == TSDB_FUNC_MAX) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%d\t", *(int8_t *)data);
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%d\t", *(int16_t *)data);
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 "\t", *(int64_t *)data);
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%d\t", *(int *)data);
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%f\t", *(float *)data);
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%f\t", *(float *)data);
        break;
    }
  } else if (functionId == TSDB_FUNC_SUM) {
    if (srcDataType == TSDB_DATA_TYPE_FLOAT || srcDataType == TSDB_DATA_TYPE_DOUBLE) {
      printf("%lf\t", *(float *)data);
    } else {
      printf("%" PRId64 "\t", *(int64_t *)data);
    }
  } else {
    printf("%s\t", data);
  }
}

2477
void UNUSED_FUNC displayInterResult(tFilePage **pdata, SQueryRuntimeEnv* pRuntimeEnv, int32_t numOfRows) {
2478
  SQuery* pQuery = pRuntimeEnv->pQuery;
2479
  int32_t numOfCols = pQuery->numOfOutput;
2480
  printf("super table query intermediate result, total:%d\n", numOfRows);
2481

2482 2483
  for (int32_t j = 0; j < numOfRows; ++j) {
    for (int32_t i = 0; i < numOfCols; ++i) {
2484
      
2485
      switch (pQuery->pSelectExpr[i].type) {
2486
        case TSDB_DATA_TYPE_BINARY: {
2487 2488 2489 2490 2491
//          int32_t colIndex = pQuery->pSelectExpr[i].base.colInfo.colIndex;
          int32_t type = pQuery->pSelectExpr[i].type;
//          } else {
//            type = pMeterObj->schema[colIndex].type;
//          }
2492
          printBinaryData(pQuery->pSelectExpr[i].base.functionId, pdata[i]->data + pQuery->pSelectExpr[i].bytes * j,
2493 2494 2495 2496 2497
                          type);
          break;
        }
        case TSDB_DATA_TYPE_TIMESTAMP:
        case TSDB_DATA_TYPE_BIGINT:
2498
          printf("%" PRId64 "\t", *(int64_t *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2499 2500
          break;
        case TSDB_DATA_TYPE_INT:
2501
          printf("%d\t", *(int32_t *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2502 2503
          break;
        case TSDB_DATA_TYPE_FLOAT:
2504
          printf("%f\t", *(float *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2505 2506
          break;
        case TSDB_DATA_TYPE_DOUBLE:
2507
          printf("%lf\t", *(double *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2508 2509 2510 2511 2512 2513 2514 2515
          break;
      }
    }
    printf("\n");
  }
}

typedef struct SCompSupporter {
H
hjxilinx 已提交
2516 2517 2518
  STableQueryInfo **pTableQueryInfo;
  int32_t *         position;
  SQInfo *          pQInfo;
2519 2520 2521 2522 2523
} SCompSupporter;

int32_t tableResultComparFn(const void *pLeft, const void *pRight, void *param) {
  int32_t left = *(int32_t *)pLeft;
  int32_t right = *(int32_t *)pRight;
2524

2525 2526
  SCompSupporter *  supporter = (SCompSupporter *)param;
  SQueryRuntimeEnv *pRuntimeEnv = &supporter->pQInfo->runtimeEnv;
2527

2528 2529
  int32_t leftPos = supporter->position[left];
  int32_t rightPos = supporter->position[right];
2530

2531 2532 2533 2534
  /* left source is exhausted */
  if (leftPos == -1) {
    return 1;
  }
2535

2536 2537 2538 2539
  /* right source is exhausted*/
  if (rightPos == -1) {
    return -1;
  }
2540

H
hjxilinx 已提交
2541
  SWindowResInfo *pWindowResInfo1 = &supporter->pTableQueryInfo[left]->windowResInfo;
2542
  SWindowResult * pWindowRes1 = getWindowResult(pWindowResInfo1, leftPos);
2543

2544 2545
  char *b1 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes1);
  TSKEY leftTimestamp = GET_INT64_VAL(b1);
2546

H
hjxilinx 已提交
2547
  SWindowResInfo *pWindowResInfo2 = &supporter->pTableQueryInfo[right]->windowResInfo;
2548
  SWindowResult * pWindowRes2 = getWindowResult(pWindowResInfo2, rightPos);
2549

2550 2551
  char *b2 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes2);
  TSKEY rightTimestamp = GET_INT64_VAL(b2);
2552

2553 2554 2555
  if (leftTimestamp == rightTimestamp) {
    return 0;
  }
2556

2557 2558 2559
  return leftTimestamp > rightTimestamp ? 1 : -1;
}

2560
int32_t mergeIntoGroupResult(SQInfo *pQInfo) {
2561
  int64_t st = taosGetTimestampMs();
2562
  int32_t ret = TSDB_CODE_SUCCESS;
2563

H
Haojun Liao 已提交
2564
  int32_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
2565

2566
  while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
2567
    SArray *group = GET_TABLEGROUP(pQInfo, pQInfo->groupIndex);
2568
    ret = mergeIntoGroupResultImpl(pQInfo, group);
2569 2570 2571 2572
    if (ret < 0) {  // not enough disk space to save the data into disk
      return -1;
    }

2573
    pQInfo->groupIndex += 1;
2574 2575

    // this group generates at least one result, return results
2576 2577 2578
    if (ret > 0) {
      break;
    }
2579 2580

    assert(pQInfo->numOfGroupResultPages == 0);
2581
    qDebug("QInfo:%p no result in group %d, continue", pQInfo, pQInfo->groupIndex - 1);
2582
  }
2583

2584
  qDebug("QInfo:%p merge res data into group, index:%d, total group:%d, elapsed time:%" PRId64 "ms", pQInfo,
2585
         pQInfo->groupIndex - 1, numOfGroups, taosGetTimestampMs() - st);
2586

2587 2588 2589 2590 2591 2592
  return TSDB_CODE_SUCCESS;
}

void copyResToQueryResultBuf(SQInfo *pQInfo, SQuery *pQuery) {
  if (pQInfo->offset == pQInfo->numOfGroupResultPages) {
    pQInfo->numOfGroupResultPages = 0;
2593

2594
    // current results of group has been sent to client, try next group
2595
    if (mergeIntoGroupResult(pQInfo) != TSDB_CODE_SUCCESS) {
2596 2597
      return;  // failed to save data in the disk
    }
2598

2599
    // check if all results has been sent to client
H
Haojun Liao 已提交
2600
    int32_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
2601
    if (pQInfo->numOfGroupResultPages == 0 && pQInfo->groupIndex == numOfGroup) {
2602
      pQInfo->tableIndex = pQInfo->tableqinfoGroupInfo.numOfTables;  // set query completed
2603 2604
      return;
    }
2605
  }
2606 2607

  SQueryRuntimeEnv *   pRuntimeEnv = &pQInfo->runtimeEnv;
2608
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
2609

2610
  int32_t id = getGroupResultId(pQInfo->groupIndex - 1);
2611
  SIDList list = getDataBufPagesIdList(pResultBuf, pQInfo->offset + id);
2612

2613 2614
  int32_t total = 0;
  for (int32_t i = 0; i < list.size; ++i) {
H
Haojun Liao 已提交
2615
    tFilePage *pData = GET_RES_BUF_PAGE_BY_ID(pResultBuf, list.pData[i]);
2616
    total += pData->num;
2617
  }
2618

2619
  int32_t rows = total;
2620

2621 2622
  int32_t offset = 0;
  for (int32_t num = 0; num < list.size; ++num) {
H
Haojun Liao 已提交
2623
    tFilePage *pData = GET_RES_BUF_PAGE_BY_ID(pResultBuf, list.pData[num]);
2624

2625
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2626
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
2627
      char *  pDest = pQuery->sdata[i]->data;
2628

2629 2630
      memcpy(pDest + offset * bytes, pData->data + pRuntimeEnv->offset[i] * pData->num,
             bytes * pData->num);
2631
    }
2632

2633
    offset += pData->num;
2634
  }
2635

2636
  assert(pQuery->rec.rows == 0);
2637

2638
  pQuery->rec.rows += rows;
2639 2640 2641
  pQInfo->offset += 1;
}

H
Haojun Liao 已提交
2642
int64_t getNumOfResultWindowRes(SQuery *pQuery, SWindowResult *pWindowRes) {
2643
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
2644
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
2645

2646 2647 2648 2649 2650 2651 2652
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
      continue;
    }
2653

2654
    SResultInfo *pResultInfo = &pWindowRes->resultInfo[j];
H
Haojun Liao 已提交
2655
    assert(pResultInfo != NULL);
2656

H
Haojun Liao 已提交
2657 2658
    if (pResultInfo->numOfRes > 0) {
      return pResultInfo->numOfRes;
2659 2660
    }
  }
2661

H
Haojun Liao 已提交
2662
  return 0;
2663 2664
}

2665
int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *pGroup) {
2666
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
2667
  SQuery *          pQuery = pRuntimeEnv->pQuery;
2668

2669
  size_t size = taosArrayGetSize(pGroup);
2670
  tFilePage **buffer = pQuery->sdata;
2671

2672
  int32_t*   posList = calloc(size, sizeof(int32_t));
H
hjxilinx 已提交
2673
  STableQueryInfo **pTableList = malloc(POINTER_BYTES * size);
2674

2675 2676 2677 2678 2679
  if (pTableList == NULL || posList == NULL) {
    tfree(posList);
    tfree(pTableList);

    qError("QInfo:%p failed alloc memory", pQInfo);
H
Haojun Liao 已提交
2680
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
2681 2682
  }

2683
  // todo opt for the case of one table per group
2684
  int32_t numOfTables = 0;
2685
  for (int32_t i = 0; i < size; ++i) {
2686
    STableQueryInfo *item = taosArrayGetP(pGroup, i);
2687

H
Haojun Liao 已提交
2688
    SIDList list = getDataBufPagesIdList(pRuntimeEnv->pResultBuf, TSDB_TABLEID(item->pTable)->tid);
2689 2690
    if (list.size > 0 && item->windowResInfo.size > 0) {
      pTableList[numOfTables] = item;
2691
      numOfTables += 1;
2692 2693
    }
  }
2694

2695
  if (numOfTables == 0) {
2696 2697
    tfree(posList);
    tfree(pTableList);
2698

2699 2700
    assert(pQInfo->numOfGroupResultPages == 0);
    return 0;
H
Haojun Liao 已提交
2701
  } else if (numOfTables == 1) { // no need to merge results since only one table in each group
2702

2703
  }
2704

2705
  SCompSupporter cs = {pTableList, posList, pQInfo};
2706

2707
  SLoserTreeInfo *pTree = NULL;
2708
  tLoserTreeCreate(&pTree, numOfTables, &cs, tableResultComparFn);
2709

2710
  SResultInfo *pResultInfo = calloc(pQuery->numOfOutput, sizeof(SResultInfo));
H
Haojun Liao 已提交
2711 2712 2713 2714
  if (pResultInfo == NULL) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }

H
Haojun Liao 已提交
2715 2716
  char* buf = calloc(1, pRuntimeEnv->interBufSize);
  setWindowResultInfo(pResultInfo, pQuery, pRuntimeEnv->stableQuery, buf);
2717
  resetMergeResultBuf(pQuery, pRuntimeEnv->pCtx, pResultInfo);
2718

2719 2720
  int64_t lastTimestamp = -1;
  int64_t startt = taosGetTimestampMs();
2721

2722 2723
  while (1) {
    int32_t pos = pTree->pNode[0].index;
2724

H
hjxilinx 已提交
2725
    SWindowResInfo *pWindowResInfo = &pTableList[pos]->windowResInfo;
2726
    SWindowResult * pWindowRes = getWindowResult(pWindowResInfo, cs.position[pos]);
2727

2728 2729
    char *b = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes);
    TSKEY ts = GET_INT64_VAL(b);
2730

2731
    assert(ts == pWindowRes->window.skey);
H
Haojun Liao 已提交
2732
    int64_t num = getNumOfResultWindowRes(pQuery, pWindowRes);
2733 2734
    if (num <= 0) {
      cs.position[pos] += 1;
2735

2736 2737
      if (cs.position[pos] >= pWindowResInfo->size) {
        cs.position[pos] = -1;
2738

2739
        // all input sources are exhausted
2740
        if (--numOfTables == 0) {
2741 2742 2743 2744 2745 2746 2747
          break;
        }
      }
    } else {
      if (ts == lastTimestamp) {  // merge with the last one
        doMerge(pRuntimeEnv, ts, pWindowRes, true);
      } else {  // copy data to disk buffer
2748
        if (buffer[0]->num == pQuery->rec.capacity) {
2749 2750 2751
          if (flushFromResultBuf(pQInfo) != TSDB_CODE_SUCCESS) {
            return -1;
          }
2752

2753 2754
          resetMergeResultBuf(pQuery, pRuntimeEnv->pCtx, pResultInfo);
        }
2755

2756
        doMerge(pRuntimeEnv, ts, pWindowRes, false);
2757
        buffer[0]->num += 1;
2758
      }
2759

2760
      lastTimestamp = ts;
2761

2762 2763 2764
      cs.position[pos] += 1;
      if (cs.position[pos] >= pWindowResInfo->size) {
        cs.position[pos] = -1;
2765

2766
        // all input sources are exhausted
2767
        if (--numOfTables == 0) {
2768 2769 2770 2771
          break;
        }
      }
    }
2772

2773 2774
    tLoserTreeAdjust(pTree, pos + pTree->numOfEntries);
  }
2775

2776
  if (buffer[0]->num != 0) {  // there are data in buffer
2777
    if (flushFromResultBuf(pQInfo) != TSDB_CODE_SUCCESS) {
S
slguan 已提交
2778
      qError("QInfo:%p failed to flush data into temp file, abort query", pQInfo);
2779

2780 2781 2782 2783
      tfree(pTree);
      tfree(pTableList);
      tfree(posList);
      tfree(pResultInfo);
2784

2785 2786 2787
      return -1;
    }
  }
2788

2789 2790 2791
  int64_t endt = taosGetTimestampMs();

#ifdef _DEBUG_VIEW
2792
  displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
2793
#endif
2794

2795
  qDebug("QInfo:%p result merge completed for group:%d, elapsed time:%" PRId64 " ms", pQInfo, pQInfo->groupIndex, endt - startt);
2796

2797 2798
  tfree(pTableList);
  tfree(posList);
H
Haojun Liao 已提交
2799
  tfree(pTree);
2800

2801
  pQInfo->offset = 0;
2802

2803
  tfree(pResultInfo);
H
Haojun Liao 已提交
2804
  tfree(buf);
2805 2806 2807 2808
  return pQInfo->numOfGroupResultPages;
}

int32_t flushFromResultBuf(SQInfo *pQInfo) {
2809 2810 2811
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

2812
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
2813
  int32_t              capacity = (DEFAULT_INTERN_BUF_PAGE_SIZE - sizeof(tFilePage)) / pQuery->rowSize;
2814

2815 2816
  // the base value for group result, since the maximum number of table for each vnode will not exceed 100,000.
  int32_t pageId = -1;
2817

2818
  int32_t remain = pQuery->sdata[0]->num;
2819
  int32_t offset = 0;
2820

2821 2822 2823 2824 2825
  while (remain > 0) {
    int32_t r = remain;
    if (r > capacity) {
      r = capacity;
    }
2826

2827
    int32_t    id = getGroupResultId(pQInfo->groupIndex) + pQInfo->numOfGroupResultPages;
2828
    tFilePage *buf = getNewDataBuf(pResultBuf, id, &pageId);
2829

2830
    // pagewise copy to dest buffer
2831
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2832
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
2833
      buf->num = r;
2834

2835 2836
      memcpy(buf->data + pRuntimeEnv->offset[i] * buf->num, ((char *)pQuery->sdata[i]->data) + offset * bytes,
             buf->num * bytes);
2837
    }
2838

2839 2840 2841
    offset += r;
    remain -= r;
  }
2842

2843 2844 2845 2846 2847
  pQInfo->numOfGroupResultPages += 1;
  return TSDB_CODE_SUCCESS;
}

void resetMergeResultBuf(SQuery *pQuery, SQLFunctionCtx *pCtx, SResultInfo *pResultInfo) {
2848
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
2849
    pCtx[k].aOutputBuf = pQuery->sdata[k]->data - pCtx[k].outputBytes;
2850 2851 2852
    pCtx[k].size = 1;
    pCtx[k].startOffset = 0;
    pCtx[k].resultInfo = &pResultInfo[k];
2853

2854
    pQuery->sdata[k]->num = 0;
2855 2856 2857
  }
}

2858 2859 2860 2861 2862 2863 2864
static void updateTableQueryInfoForReverseScan(SQuery *pQuery, STableQueryInfo *pTableQueryInfo) {
  if (pTableQueryInfo == NULL) {
    return;
  }
  
  // order has change already!
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
2865 2866 2867 2868 2869 2870 2871
  
  // TODO validate the assertion
//  if (!QUERY_IS_ASC_QUERY(pQuery)) {
//    assert(pTableQueryInfo->win.ekey >= pTableQueryInfo->lastKey + step);
//  } else {
//    assert(pTableQueryInfo->win.ekey <= pTableQueryInfo->lastKey + step);
//  }
2872 2873 2874 2875 2876 2877 2878 2879
  
  pTableQueryInfo->win.ekey = pTableQueryInfo->lastKey + step;
  
  SWAP(pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, TSKEY);
  pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
  
  SWITCH_ORDER(pTableQueryInfo->cur.order);
  pTableQueryInfo->cur.vgroupIndex = -1;
H
Haojun Liao 已提交
2880 2881 2882

  // set the index at the end of time window
  pTableQueryInfo->windowResInfo.curIndex = pTableQueryInfo->windowResInfo.size - 1;
2883 2884 2885 2886 2887
}

static void disableFuncInReverseScanImpl(SQInfo* pQInfo, SWindowResInfo *pWindowResInfo, int32_t order) {
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
  
2888 2889 2890 2891 2892
  for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
    SWindowStatus *pStatus = getTimeWindowResStatus(pWindowResInfo, i);
    if (!pStatus->closed) {
      continue;
    }
2893

2894
    SWindowResult *buf = getWindowResult(pWindowResInfo, i);
2895

2896
    // open/close the specified query for each group result
2897
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
2898
      int32_t functId = pQuery->pSelectExpr[j].base.functionId;
2899

2900 2901
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
2902 2903 2904 2905 2906 2907 2908 2909
        buf->resultInfo[j].complete = false;
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
        buf->resultInfo[j].complete = true;
      }
    }
  }
}

2910 2911
void disableFuncInReverseScan(SQInfo *pQInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
2912
  SQuery *pQuery = pRuntimeEnv->pQuery;
2913
  int32_t order = pQuery->order.order;
2914

2915 2916
  // group by normal columns and interval query on normal table
  SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
2917
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
2918
    disableFuncInReverseScanImpl(pQInfo, pWindowResInfo, order);
2919
  } else {  // for simple result of table query,
2920
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {  // todo refactor
2921
      int32_t functId = pQuery->pSelectExpr[j].base.functionId;
2922

2923
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[j];
2924 2925 2926
      if (pCtx->resultInfo == NULL) {
        continue; // resultInfo is NULL, means no data checked in previous scan
      }
2927

2928 2929
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
2930 2931 2932 2933 2934 2935
        pCtx->resultInfo->complete = false;
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
        pCtx->resultInfo->complete = true;
      }
    }
  }
H
hjxilinx 已提交
2936
  
H
Haojun Liao 已提交
2937
  int32_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
H
hjxilinx 已提交
2938 2939
  
  for(int32_t i = 0; i < numOfGroups; ++i) {
H
Haojun Liao 已提交
2940
    SArray *group = GET_TABLEGROUP(pQInfo, i);
H
hjxilinx 已提交
2941 2942 2943
    
    size_t t = taosArrayGetSize(group);
    for (int32_t j = 0; j < t; ++j) {
2944 2945
      STableQueryInfo *pCheckInfo = taosArrayGetP(group, j);
      updateTableQueryInfoForReverseScan(pQuery, pCheckInfo);
H
hjxilinx 已提交
2946 2947
    }
  }
2948 2949
}

2950
void switchCtxOrder(SQueryRuntimeEnv *pRuntimeEnv) {
2951
  SQuery *pQuery = pRuntimeEnv->pQuery;
2952
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2953
    SWITCH_ORDER(pRuntimeEnv->pCtx[i].order);
2954 2955 2956
  }
}

B
Bomin Zhang 已提交
2957
int32_t createQueryResultInfo(SQuery *pQuery, SWindowResult *pResultRow, bool isSTableQuery, SPosInfo *posInfo, size_t interBufSize) {
2958
  int32_t numOfCols = pQuery->numOfOutput;
2959

2960
  pResultRow->resultInfo = calloc((size_t)numOfCols, sizeof(SResultInfo));
B
Bomin Zhang 已提交
2961 2962 2963
  if (pResultRow->resultInfo == NULL) {
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
  }
2964
  pResultRow->pos = *posInfo;
2965

H
Haojun Liao 已提交
2966
  char* buf = calloc(1, interBufSize);
B
Bomin Zhang 已提交
2967 2968 2969
  if (buf == NULL) {
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
  }
H
Haojun Liao 已提交
2970

2971
  // set the intermediate result output buffer
H
Haojun Liao 已提交
2972
  setWindowResultInfo(pResultRow->resultInfo, pQuery, isSTableQuery, buf);
B
Bomin Zhang 已提交
2973
  return TSDB_CODE_SUCCESS;
2974 2975 2976 2977
}

void resetCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
2978

2979
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2980 2981
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
    pCtx->aOutputBuf = pQuery->sdata[i]->data;
2982

2983 2984 2985 2986 2987 2988
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT/TAGPRJ/PRJ/TAG etc.
     */
    resetResultInfo(&pRuntimeEnv->resultInfo[i]);
    pCtx->resultInfo = &pRuntimeEnv->resultInfo[i];
2989

2990
    // set the timestamp output buffer for top/bottom/diff query
2991
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2992 2993 2994
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
2995

2996
    memset(pQuery->sdata[i]->data, 0, (size_t)pQuery->pSelectExpr[i].bytes * pQuery->rec.capacity);
2997
  }
2998

2999 3000 3001 3002 3003
  initCtxOutputBuf(pRuntimeEnv);
}

void forwardCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, int64_t output) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3004

3005
  // reset the execution contexts
3006
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3007
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
3008
    assert(functionId != TSDB_FUNC_DIFF);
3009

3010 3011 3012 3013
    // set next output position
    if (IS_OUTER_FORWARD(aAggs[functionId].nStatus)) {
      pRuntimeEnv->pCtx[j].aOutputBuf += pRuntimeEnv->pCtx[j].outputBytes * output;
    }
3014

3015 3016 3017 3018 3019 3020 3021 3022 3023 3024
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      /*
       * NOTE: for top/bottom query, the value of first column of output (timestamp) are assigned
       * in the procedure of top/bottom routine
       * the output buffer in top/bottom routine is ptsOutputBuf, so we need to forward the output buffer
       *
       * diff function is handled in multi-output function
       */
      pRuntimeEnv->pCtx[j].ptsOutputBuf += TSDB_KEYSIZE * output;
    }
3025

3026 3027 3028 3029 3030 3031
    resetResultInfo(pRuntimeEnv->pCtx[j].resultInfo);
  }
}

void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3032

3033
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3034
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
3035
    pRuntimeEnv->pCtx[j].currentStage = 0;
3036

H
Haojun Liao 已提交
3037 3038 3039 3040
    SResultInfo* pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
    if (pResInfo->initialized) {
      continue;
    }
3041

3042 3043 3044 3045
    aAggs[functionId].init(&pRuntimeEnv->pCtx[j]);
  }
}

3046
void skipResults(SQueryRuntimeEnv *pRuntimeEnv) {
3047
  SQuery *pQuery = pRuntimeEnv->pQuery;
3048
  if (pQuery->rec.rows == 0 || pQuery->limit.offset == 0) {
3049 3050
    return;
  }
3051

3052
  if (pQuery->rec.rows <= pQuery->limit.offset) {
3053
    qDebug("QInfo:%p skip rows:%" PRId64 ", new offset:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), pQuery->rec.rows,
3054 3055
        pQuery->limit.offset - pQuery->rec.rows);
    
3056 3057
    pQuery->limit.offset -= pQuery->rec.rows;
    pQuery->rec.rows = 0;
3058

3059
    resetCtxOutputBuf(pRuntimeEnv);
3060

H
Haojun Liao 已提交
3061
    // clear the buffer full flag if exists
3062
    CLEAR_QUERY_STATUS(pQuery, QUERY_RESBUF_FULL);
3063
  } else {
3064
    int64_t numOfSkip = pQuery->limit.offset;
3065
    pQuery->rec.rows -= numOfSkip;
3066 3067
    pQuery->limit.offset = 0;
  
3068
    qDebug("QInfo:%p skip row:%"PRId64", new offset:%d, numOfRows remain:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), numOfSkip,
3069 3070
           0, pQuery->rec.rows);
    
3071
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3072
      int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3073
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
3074
      
H
Haojun Liao 已提交
3075 3076
      memmove(pQuery->sdata[i]->data, (char*) pQuery->sdata[i]->data + bytes * numOfSkip, pQuery->rec.rows * bytes);
      pRuntimeEnv->pCtx[i].aOutputBuf = ((char*) pQuery->sdata[i]->data) + pQuery->rec.rows * bytes;
3077

3078
      if (functionId == TSDB_FUNC_DIFF || functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
3079
        pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
3080 3081
      }
    }
3082

3083
    updateNumOfResult(pRuntimeEnv, pQuery->rec.rows);
3084 3085 3086 3087 3088 3089 3090 3091
  }
}

void setQueryStatus(SQuery *pQuery, int8_t status) {
  if (status == QUERY_NOT_COMPLETED) {
    pQuery->status = status;
  } else {
    // QUERY_NOT_COMPLETED is not compatible with any other status, so clear its position first
3092
    CLEAR_QUERY_STATUS(pQuery, QUERY_NOT_COMPLETED);
3093 3094 3095 3096 3097 3098
    pQuery->status |= status;
  }
}

bool needScanDataBlocksAgain(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3099

H
hjxilinx 已提交
3100
  bool toContinue = false;
H
Haojun Liao 已提交
3101
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3102 3103
    // for each group result, call the finalize function for each column
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
3104

3105 3106 3107 3108 3109
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *pResult = getWindowResult(pWindowResInfo, i);
      if (!pResult->status.closed) {
        continue;
      }
3110

3111
      setWindowResOutputBuf(pRuntimeEnv, pResult);
3112

3113
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3114
        int16_t functId = pQuery->pSelectExpr[j].base.functionId;
3115 3116 3117
        if (functId == TSDB_FUNC_TS) {
          continue;
        }
3118

3119 3120
        aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
        SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3121

3122 3123 3124 3125
        toContinue |= (!pResInfo->complete);
      }
    }
  } else {
3126
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3127
      int16_t functId = pQuery->pSelectExpr[j].base.functionId;
3128 3129 3130
      if (functId == TSDB_FUNC_TS) {
        continue;
      }
3131

3132 3133
      aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
      SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3134

3135 3136 3137
      toContinue |= (!pResInfo->complete);
    }
  }
3138

3139 3140 3141
  return toContinue;
}

H
Haojun Liao 已提交
3142
static SQueryStatusInfo getQueryStatusInfo(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
3143
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3144 3145
  STableQueryInfo* pTableQueryInfo = pQuery->current;
  
H
Haojun Liao 已提交
3146 3147 3148
  assert((start <= pTableQueryInfo->lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (start >= pTableQueryInfo->lastKey && !QUERY_IS_ASC_QUERY(pQuery)));
  
3149
  SQueryStatusInfo info = {
H
hjxilinx 已提交
3150
      .status      = pQuery->status,
3151
      .windowIndex = pRuntimeEnv->windowResInfo.curIndex,
H
Haojun Liao 已提交
3152
      .lastKey     = start,
H
hjxilinx 已提交
3153
      .w           = pQuery->window,
H
Haojun Liao 已提交
3154
      .curWindow   = {.skey = start, .ekey = pTableQueryInfo->win.ekey},
3155
  };
3156

3157 3158 3159
  return info;
}

3160 3161 3162 3163
static void setEnvBeforeReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQInfo *pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery *pQuery = pRuntimeEnv->pQuery;

3164 3165 3166 3167 3168
  pStatus->cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);  // save the cursor
  if (pRuntimeEnv->pTSBuf) {
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
    tsBufNextPos(pRuntimeEnv->pTSBuf);
  }
3169

3170
  // reverse order time range
3171 3172 3173
  pQuery->window = pStatus->curWindow;
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);

3174
  SWITCH_ORDER(pQuery->order.order);
3175
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
3176

3177
  STsdbQueryCond cond = {
3178
      .twindow = pQuery->window,
H
hjxilinx 已提交
3179
      .order   = pQuery->order.order,
3180
      .colList = pQuery->colList,
3181 3182
      .numOfCols = pQuery->numOfCols,
  };
3183

3184 3185 3186 3187
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
3188

3189 3190
  // add ref for table
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
3191

3192 3193
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
3194
  disableFuncInReverseScan(pQInfo);
3195 3196
}

3197 3198
static void clearEnvAfterReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3199
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3200

3201 3202
  SWITCH_ORDER(pQuery->order.order);
  switchCtxOrder(pRuntimeEnv);
3203

3204 3205 3206 3207
  tsBufSetCursor(pRuntimeEnv->pTSBuf, &pStatus->cur);
  if (pRuntimeEnv->pTSBuf) {
    pRuntimeEnv->pTSBuf->cur.order = pQuery->order.order;
  }
3208

3209
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
3210

3211 3212
  // update the pQuery->window.skey and pQuery->window.ekey to limit the scan scope of sliding query
  // during reverse scan
H
hjxilinx 已提交
3213
  pTableQueryInfo->lastKey = pStatus->lastKey;
3214
  pQuery->status = pStatus->status;
3215
  
H
hjxilinx 已提交
3216
  pTableQueryInfo->win = pStatus->w;
3217
  pQuery->window = pTableQueryInfo->win;
3218 3219
}

3220
void scanOneTableDataBlocks(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
H
hjxilinx 已提交
3221
  SQInfo *pQInfo = (SQInfo *) GET_QINFO_ADDR(pRuntimeEnv);
3222
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3223 3224
  STableQueryInfo *pTableQueryInfo = pQuery->current;
  
3225
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
3226

3227
  // store the start query position
H
Haojun Liao 已提交
3228
  SQueryStatusInfo qstatus = getQueryStatusInfo(pRuntimeEnv, start);
3229

3230 3231
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
3232

3233 3234
  while (1) {
    doScanAllDataBlocks(pRuntimeEnv);
3235

3236 3237
    if (pRuntimeEnv->scanFlag == MASTER_SCAN) {
      qstatus.status = pQuery->status;
H
hjxilinx 已提交
3238
      qstatus.curWindow.ekey = pTableQueryInfo->lastKey - step;
3239
      qstatus.lastKey = pTableQueryInfo->lastKey;
3240
    }
3241

3242
    if (!needScanDataBlocksAgain(pRuntimeEnv)) {
3243
      // restore the status code and jump out of loop
3244
      if (pRuntimeEnv->scanFlag == REPEAT_SCAN) {
3245
        pQuery->status = qstatus.status;
3246
      }
3247

3248 3249
      break;
    }
3250

3251
    STsdbQueryCond cond = {
3252
        .twindow = qstatus.curWindow,
H
hjxilinx 已提交
3253
        .order   = pQuery->order.order,
3254
        .colList = pQuery->colList,
3255
        .numOfCols = pQuery->numOfCols,
3256
    };
3257

3258 3259
    if (pRuntimeEnv->pSecQueryHandle != NULL) {
      tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
3260
    }
3261

3262
    pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
3263
    pRuntimeEnv->windowResInfo.curIndex = qstatus.windowIndex;
3264

3265 3266
    setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
    pRuntimeEnv->scanFlag = REPEAT_SCAN;
3267
    
3268
    qDebug("QInfo:%p start to repeat scan data blocks due to query func required, qrange:%"PRId64"-%"PRId64, pQInfo,
3269
        cond.twindow.skey, cond.twindow.ekey);
3270

3271
    // check if query is killed or not
H
Haojun Liao 已提交
3272
    if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
3273 3274
      finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
3275 3276
    }
  }
3277

H
hjxilinx 已提交
3278
  if (!needReverseScan(pQuery)) {
3279 3280
    return;
  }
3281

3282
  setEnvBeforeReverseScan(pRuntimeEnv, &qstatus);
3283

3284
  // reverse scan from current position
3285
  qDebug("QInfo:%p start to reverse scan", pQInfo);
3286
  doScanAllDataBlocks(pRuntimeEnv);
3287 3288

  clearEnvAfterReverseScan(pRuntimeEnv, &qstatus);
3289 3290
}

H
hjxilinx 已提交
3291
void finalizeQueryResult(SQueryRuntimeEnv *pRuntimeEnv) {
3292
  SQuery *pQuery = pRuntimeEnv->pQuery;
3293

H
Haojun Liao 已提交
3294
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3295 3296
    // for each group result, call the finalize function for each column
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
3297
    if (pRuntimeEnv->groupbyNormalCol) {
3298 3299
      closeAllTimeWindow(pWindowResInfo);
    }
3300

3301 3302 3303 3304 3305
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *buf = &pWindowResInfo->pResult[i];
      if (!isWindowResClosed(pWindowResInfo, i)) {
        continue;
      }
3306

3307
      setWindowResOutputBuf(pRuntimeEnv, buf);
3308

3309
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3310
        aAggs[pQuery->pSelectExpr[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3311
      }
3312

3313 3314 3315 3316 3317 3318
      /*
       * set the number of output results for group by normal columns, the number of output rows usually is 1 except
       * the top and bottom query
       */
      buf->numOfRows = getNumOfResult(pRuntimeEnv);
    }
3319

3320
  } else {
3321
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3322
      aAggs[pQuery->pSelectExpr[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3323 3324 3325 3326 3327
    }
  }
}

static bool hasMainOutput(SQuery *pQuery) {
3328
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3329
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3330

3331 3332 3333 3334
    if (functionId != TSDB_FUNC_TS && functionId != TSDB_FUNC_TAG && functionId != TSDB_FUNC_TAGPRJ) {
      return true;
    }
  }
3335

3336 3337 3338
  return false;
}

H
Haojun Liao 已提交
3339
static STableQueryInfo *createTableQueryInfo(SQueryRuntimeEnv *pRuntimeEnv, void* pTable, STimeWindow win, void* buf) {
H
Haojun Liao 已提交
3340
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
3341

H
Haojun Liao 已提交
3342
  STableQueryInfo *pTableQueryInfo = buf;
3343

H
hjxilinx 已提交
3344 3345
  pTableQueryInfo->win = win;
  pTableQueryInfo->lastKey = win.skey;
3346

3347
  pTableQueryInfo->pTable = pTable;
3348
  pTableQueryInfo->cur.vgroupIndex = -1;
3349

H
Haojun Liao 已提交
3350 3351
  // set more initial size of interval/groupby query
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyNormalCol) {
H
Haojun Liao 已提交
3352
    int32_t initialSize = 16;
H
Haojun Liao 已提交
3353
    int32_t initialThreshold = 100;
B
Bomin Zhang 已提交
3354 3355 3356 3357
    int32_t code = initWindowResInfo(&pTableQueryInfo->windowResInfo, pRuntimeEnv, initialSize, initialThreshold, TSDB_DATA_TYPE_INT);
    if (code != TSDB_CODE_SUCCESS) {
      return NULL;
    }
H
Haojun Liao 已提交
3358
  } else { // in other aggregate query, do not initialize the windowResInfo
H
Haojun Liao 已提交
3359 3360
  }

3361 3362 3363
  return pTableQueryInfo;
}

3364
void destroyTableQueryInfo(STableQueryInfo *pTableQueryInfo, int32_t numOfCols) {
3365 3366 3367
  if (pTableQueryInfo == NULL) {
    return;
  }
3368

3369 3370 3371
  cleanupTimeWindowInfo(&pTableQueryInfo->windowResInfo, numOfCols);
}

H
Haojun Liao 已提交
3372 3373 3374 3375
#define CHECK_QUERY_TIME_RANGE(_q, _tableInfo)                                              \
  do {                                                                                      \
    assert((((_tableInfo)->lastKey >= (_tableInfo)->win.skey) && QUERY_IS_ASC_QUERY(_q)) || \
           (((_tableInfo)->lastKey <= (_tableInfo)->win.skey) && !QUERY_IS_ASC_QUERY(_q))); \
H
Haojun Liao 已提交
3376
  } while (0)
3377 3378 3379 3380

/**
 * set output buffer for different group
 * @param pRuntimeEnv
3381
 * @param pDataBlockInfo
3382
 */
H
Haojun Liao 已提交
3383
void setExecutionContext(SQInfo *pQInfo, int32_t groupIndex, TSKEY nextKey) {
3384
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
Haojun Liao 已提交
3385 3386 3387
  STableQueryInfo  *pTableQueryInfo = pRuntimeEnv->pQuery->current;
  SWindowResInfo   *pWindowResInfo = &pRuntimeEnv->windowResInfo;

H
Haojun Liao 已提交
3388 3389
  // lastKey needs to be updated
  pTableQueryInfo->lastKey = nextKey;
H
Haojun Liao 已提交
3390 3391 3392 3393

  if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
    setAdditionalInfo(pQInfo, pTableQueryInfo->pTable, pTableQueryInfo);
  }
H
Haojun Liao 已提交
3394

H
Haojun Liao 已提交
3395 3396 3397
  if (pRuntimeEnv->prevGroupId != INT32_MIN && pRuntimeEnv->prevGroupId == groupIndex) {
    return;
  }
3398

H
Haojun Liao 已提交
3399
  int32_t GROUPRESULTID = 1;
3400 3401
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&groupIndex,
      sizeof(groupIndex), true);
3402 3403 3404
  if (pWindowRes == NULL) {
    return;
  }
3405

3406 3407 3408 3409 3410 3411 3412 3413 3414 3415
  /*
   * not assign result buffer yet, add new result buffer
   * all group belong to one result set, and each group result has different group id so set the id to be one
   */
  if (pWindowRes->pos.pageId == -1) {
    if (addNewWindowResultBuf(pWindowRes, pRuntimeEnv->pResultBuf, GROUPRESULTID, pRuntimeEnv->numOfRowsPerPage) !=
        TSDB_CODE_SUCCESS) {
      return;
    }
  }
3416

H
Haojun Liao 已提交
3417 3418
  // record the current active group id
  pRuntimeEnv->prevGroupId = groupIndex;
3419 3420 3421 3422
  setWindowResOutputBuf(pRuntimeEnv, pWindowRes);
  initCtxOutputBuf(pRuntimeEnv);
}

H
Haojun Liao 已提交
3423
void setWindowResOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult) {
3424
  SQuery *pQuery = pRuntimeEnv->pQuery;
3425

3426
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
3427
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3428 3429
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult);
3430

3431
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3432 3433 3434
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3435

3436 3437 3438 3439 3440
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT
     */
    pCtx->resultInfo = &pResult->resultInfo[i];
3441

3442 3443 3444 3445 3446 3447
    // set super table query flag
    SResultInfo *pResInfo = GET_RES_INFO(pCtx);
    pResInfo->superTableQ = pRuntimeEnv->stableQuery;
  }
}

H
Haojun Liao 已提交
3448 3449
void setWindowResOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3450

H
Haojun Liao 已提交
3451 3452 3453 3454 3455
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];

    pCtx->resultInfo = &pResult->resultInfo[i];
H
Haojun Liao 已提交
3456
    if (pCtx->resultInfo->initialized && pCtx->resultInfo->complete) {
H
Haojun Liao 已提交
3457 3458
      continue;
    }
3459

H
Haojun Liao 已提交
3460 3461
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult);
    pCtx->currentStage = 0;
3462

H
Haojun Liao 已提交
3463 3464 3465 3466
    int32_t functionId = pCtx->functionId;
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3467

H
Haojun Liao 已提交
3468 3469 3470 3471 3472
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT
     */
    pCtx->resultInfo->superTableQ = pRuntimeEnv->stableQuery;     // set super table query flag
3473

H
Haojun Liao 已提交
3474 3475 3476 3477 3478 3479
    if (!pCtx->resultInfo->initialized) {
      aAggs[functionId].init(pCtx);
    }
  }
}

3480
int32_t setAdditionalInfo(SQInfo *pQInfo, void* pTable, STableQueryInfo *pTableQueryInfo) {
3481
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
3482

3483
  setTagVal(pRuntimeEnv, pTable, pQInfo->tsdb);
3484

3485 3486
  // both the master and supplement scan needs to set the correct ts comp start position
  if (pRuntimeEnv->pTSBuf != NULL) {
3487
    if (pTableQueryInfo->cur.vgroupIndex == -1) {
3488
      pTableQueryInfo->tag = pRuntimeEnv->pCtx[0].tag.i64Key;
3489

3490
      tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, 0, pTableQueryInfo->tag);
3491

3492 3493 3494 3495 3496 3497
      // keep the cursor info of current meter
      pTableQueryInfo->cur = pRuntimeEnv->pTSBuf->cur;
    } else {
      tsBufSetCursor(pRuntimeEnv->pTSBuf, &pTableQueryInfo->cur);
    }
  }
3498

3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510
  return 0;
}

/*
 * There are two cases to handle:
 *
 * 1. Query range is not set yet (queryRangeSet = 0). we need to set the query range info, including pQuery->lastKey,
 *    pQuery->window.skey, and pQuery->eKey.
 * 2. Query range is set and query is in progress. There may be another result with the same query ranges to be
 *    merged during merge stage. In this case, we need the pTableQueryInfo->lastResRows to decide if there
 *    is a previous result generated or not.
 */
H
hjxilinx 已提交
3511
void setIntervalQueryRange(SQInfo *pQInfo, TSKEY key) {
3512 3513
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3514 3515
  STableQueryInfo *pTableQueryInfo = pQuery->current;
  
3516 3517 3518
  if (pTableQueryInfo->queryRangeSet) {
    pTableQueryInfo->lastKey = key;
  } else {
3519
    pTableQueryInfo->win.skey = key;
3520
    STimeWindow win = {.skey = key, .ekey = pQuery->window.ekey};
3521

3522 3523 3524 3525 3526
    // for too small query range, no data in this interval.
    if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey < pQuery->window.skey)) ||
        (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey < pQuery->window.ekey))) {
      return;
    }
3527

3528 3529 3530 3531 3532 3533
    /**
     * In handling the both ascending and descending order super table query, we need to find the first qualified
     * timestamp of this table, and then set the first qualified start timestamp.
     * In ascending query, key is the first qualified timestamp. However, in the descending order query, additional
     * operations involve.
     */
H
Haojun Liao 已提交
3534
    STimeWindow     w = TSWINDOW_INITIALIZER;
3535
    SWindowResInfo *pWindowResInfo = &pTableQueryInfo->windowResInfo;
3536

H
Haojun Liao 已提交
3537 3538
    TSKEY sk = MIN(win.skey, win.ekey);
    TSKEY ek = MAX(win.skey, win.ekey);
H
Haojun Liao 已提交
3539
    getAlignQueryTimeWindow(pQuery, win.skey, sk, ek, &w);
3540
    pWindowResInfo->startTime = pTableQueryInfo->win.skey;  // windowSKey may be 0 in case of 1970 timestamp
3541

3542 3543
    if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
3544
        assert(win.ekey == pQuery->window.ekey);
3545
      }
3546 3547
      
      pWindowResInfo->prevSKey = w.skey;
3548
    }
3549

3550
    pTableQueryInfo->queryRangeSet = 1;
3551
    pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
3552 3553 3554 3555
  }
}

bool requireTimestamp(SQuery *pQuery) {
3556
  for (int32_t i = 0; i < pQuery->numOfOutput; i++) {
3557
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570
    if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_NEED_TS) != 0) {
      return true;
    }
  }
  return false;
}

bool needPrimaryTimestampCol(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo) {
  /*
   * 1. if skey or ekey locates in this block, we need to load the timestamp column to decide the precise position
   * 2. if there are top/bottom, first_dst/last_dst functions, we need to load timestamp column in any cases;
   */
  STimeWindow *w = &pDataBlockInfo->window;
H
hjxilinx 已提交
3571 3572 3573
  STableQueryInfo* pTableQueryInfo = pQuery->current;
  
  bool loadPrimaryTS = (pTableQueryInfo->lastKey >= w->skey && pTableQueryInfo->lastKey <= w->ekey) ||
3574 3575
                       (pQuery->window.ekey >= w->skey && pQuery->window.ekey <= w->ekey) || requireTimestamp(pQuery);

3576 3577 3578
  return loadPrimaryTS;
}

3579
static int32_t doCopyToSData(SQInfo *pQInfo, SWindowResInfo *pResultInfo, int32_t orderType) {
3580 3581
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
3582

3583 3584 3585
  int32_t numOfResult = 0;
  int32_t startIdx = 0;
  int32_t step = -1;
3586

3587
  qDebug("QInfo:%p start to copy data from windowResInfo to query buf", pQInfo);
3588 3589
  int32_t totalSet = numOfClosedTimeWindow(pResultInfo);
  SWindowResult* result = pResultInfo->pResult;
3590

3591
  if (orderType == TSDB_ORDER_ASC) {
3592
    startIdx = pQInfo->groupIndex;
3593 3594
    step = 1;
  } else {  // desc order copy all data
3595
    startIdx = totalSet - pQInfo->groupIndex - 1;
3596 3597
    step = -1;
  }
3598

3599
  for (int32_t i = startIdx; (i < totalSet) && (i >= 0); i += step) {
3600 3601
    if (result[i].numOfRows == 0) {
      pQInfo->offset = 0;
3602
      pQInfo->groupIndex += 1;
3603 3604
      continue;
    }
3605

dengyihao's avatar
dengyihao 已提交
3606
    assert(pQInfo->offset <= 1);
3607

3608 3609
    int32_t numOfRowsToCopy = result[i].numOfRows - pQInfo->offset;
    int32_t oldOffset = pQInfo->offset;
3610

3611 3612 3613 3614
    /*
     * current output space is not enough to keep all the result data of this group, only copy partial results
     * to SQuery object's result buffer
     */
3615 3616 3617 3618 3619
    if (numOfRowsToCopy > pQuery->rec.capacity - numOfResult) {
      numOfRowsToCopy = pQuery->rec.capacity - numOfResult;
      pQInfo->offset += numOfRowsToCopy;
    } else {
      pQInfo->offset = 0;
3620
      pQInfo->groupIndex += 1;
3621
    }
3622

3623
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3624
      int32_t size = pRuntimeEnv->pCtx[j].outputBytes;
3625

3626 3627 3628 3629
      char *out = pQuery->sdata[j]->data + numOfResult * size;
      char *in = getPosInResultPage(pRuntimeEnv, j, &result[i]);
      memcpy(out, in + oldOffset * size, size * numOfRowsToCopy);
    }
3630

3631
    numOfResult += numOfRowsToCopy;
3632 3633 3634
    if (numOfResult == pQuery->rec.capacity) {
      break;
    }
3635
  }
3636

3637
  qDebug("QInfo:%p copy data to query buf completed", pQInfo);
3638 3639

#ifdef _DEBUG_VIEW
3640
  displayInterResult(pQuery->sdata, pRuntimeEnv, numOfResult);
3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653
#endif
  return numOfResult;
}

/**
 * copyFromWindowResToSData support copy data in ascending/descending order
 * For interval query of both super table and table, copy the data in ascending order, since the output results are
 * ordered in SWindowResutl already. While handling the group by query for both table and super table,
 * all group result are completed already.
 *
 * @param pQInfo
 * @param result
 */
3654
void copyFromWindowResToSData(SQInfo *pQInfo, SWindowResInfo *pResultInfo) {
3655
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
3656

3657
  int32_t orderType = (pQuery->pGroupbyExpr != NULL) ? pQuery->pGroupbyExpr->orderType : TSDB_ORDER_ASC;
3658
  int32_t numOfResult = doCopyToSData(pQInfo, pResultInfo, orderType);
3659

3660
  pQuery->rec.rows += numOfResult;
3661

3662
  assert(pQuery->rec.rows <= pQuery->rec.capacity);
3663 3664
}

H
Haojun Liao 已提交
3665
static void updateWindowResNumOfRes(SQueryRuntimeEnv *pRuntimeEnv) {
3666
  SQuery *pQuery = pRuntimeEnv->pQuery;
3667

3668
  // update the number of result for each, only update the number of rows for the corresponding window result.
H
Haojun Liao 已提交
3669 3670 3671
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
    return;
  }
3672

H
Haojun Liao 已提交
3673 3674
  for (int32_t i = 0; i < pRuntimeEnv->windowResInfo.size; ++i) {
    SWindowResult *pResult = &pRuntimeEnv->windowResInfo.pResult[i];
3675

H
Haojun Liao 已提交
3676 3677 3678 3679
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
      int32_t functionId = pRuntimeEnv->pCtx[j].functionId;
      if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
        continue;
3680
      }
H
Haojun Liao 已提交
3681 3682

      pResult->numOfRows = MAX(pResult->numOfRows, pResult->resultInfo[j].numOfRes);
3683 3684 3685 3686
    }
  }
}

H
Haojun Liao 已提交
3687
static void stableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo, SDataStatis *pStatis,
3688
    SArray *pDataBlock, __block_search_fn_t searchFn) {
3689
  SQuery *         pQuery = pRuntimeEnv->pQuery;
3690 3691
  STableQueryInfo* pTableQueryInfo = pQuery->current;
  
3692
  SWindowResInfo * pWindowResInfo = &pTableQueryInfo->windowResInfo;
H
hjxilinx 已提交
3693
  pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : pDataBlockInfo->rows - 1;
3694

H
Haojun Liao 已提交
3695
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->groupbyNormalCol) {
3696
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
3697
  } else {
3698
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
3699 3700 3701
  }
}

3702 3703 3704
bool queryHasRemainResults(SQueryRuntimeEnv* pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  SFillInfo *pFillInfo = pRuntimeEnv->pFillInfo;
3705

H
Haojun Liao 已提交
3706
  if (pQuery->limit.limit > 0 && pQuery->rec.total >= pQuery->limit.limit) {
3707 3708
    return false;
  }
3709

3710
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
H
Haojun Liao 已提交
3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734
    // There are results not returned to client yet, so filling operation applied to the remain result is required
    // in the first place.
    int32_t remain = taosNumOfRemainRows(pFillInfo);
    if (remain > 0) {
      return true;
    }

    /*
     * While the code reaches here, there are no results remains now.
     * If query is not completed yet, the gaps between two results blocks need to be handled after next data block
     * is retrieved from TSDB.
     *
     * NOTE: If the result set is not the first block, the gap in front of the result set will be filled. If the result
     * set is the FIRST result block, the gap between the start time of query time window and the timestamp of the
     * first result row in the actual result set will fill nothing.
     */
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
      int32_t numOfTotal = getFilledNumOfRes(pFillInfo, pQuery->window.ekey, pQuery->rec.capacity);
      return numOfTotal > 0;
    }

  } else {
    // there are results waiting for returned to client.
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED) &&
H
Haojun Liao 已提交
3735
        (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) &&
H
Haojun Liao 已提交
3736 3737 3738
        (pRuntimeEnv->windowResInfo.size > 0)) {
      return true;
    }
3739
  }
3740 3741

  return false;
3742 3743 3744
}

static void doCopyQueryResultToMsg(SQInfo *pQInfo, int32_t numOfRows, char *data) {
3745
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
3746

3747 3748
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
    int32_t bytes = pQuery->pSelectExpr[col].bytes;
3749

3750 3751 3752
    memmove(data, pQuery->sdata[col]->data, bytes * numOfRows);
    data += bytes * numOfRows;
  }
3753

weixin_48148422's avatar
weixin_48148422 已提交
3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765
  int32_t numOfTables = (int32_t)taosArrayGetSize(pQInfo->arrTableIdInfo);
  *(int32_t*)data = htonl(numOfTables);
  data += sizeof(int32_t);
  for(int32_t i = 0; i < numOfTables; i++) {
    STableIdInfo* pSrc = taosArrayGet(pQInfo->arrTableIdInfo, i);
    STableIdInfo* pDst = (STableIdInfo*)data;
    pDst->uid = htobe64(pSrc->uid);
    pDst->tid = htonl(pSrc->tid);
    pDst->key = htobe64(pSrc->key);
    data += sizeof(STableIdInfo);
  }

H
hjxilinx 已提交
3766 3767
  // all data returned, set query over
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
3768
    if (pQInfo->runtimeEnv.stableQuery) {
3769
      if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
3770 3771 3772
        setQueryStatus(pQuery, QUERY_OVER);
      }
    } else {
3773 3774 3775
      if (!queryHasRemainResults(&pQInfo->runtimeEnv)) {
        setQueryStatus(pQuery, QUERY_OVER);
      }
3776
    }
H
hjxilinx 已提交
3777
  }
3778 3779
}

H
Haojun Liao 已提交
3780
int32_t doFillGapsInResults(SQueryRuntimeEnv* pRuntimeEnv, tFilePage **pDst, int32_t *numOfFilled) {
3781
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
3782
  SQuery *pQuery = pRuntimeEnv->pQuery;
3783 3784
  SFillInfo* pFillInfo = pRuntimeEnv->pFillInfo;
  
3785
  while (1) {
3786
    int32_t ret = taosGenerateDataBlock(pFillInfo, (tFilePage**) pQuery->sdata, pQuery->rec.capacity);
3787
    
3788
    // todo apply limit output function
3789 3790
    /* reached the start position of according to offset value, return immediately */
    if (pQuery->limit.offset == 0) {
3791
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows", pQInfo, pFillInfo->numOfRows, ret);
3792 3793
      return ret;
    }
3794

3795
    if (pQuery->limit.offset < ret) {
3796
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%" PRId64 ". Discard due to offset, remain:%" PRId64 ", new offset:%d",
3797 3798
             pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, ret - pQuery->limit.offset, 0);
      
3799 3800 3801
      ret -= pQuery->limit.offset;
      // todo !!!!there exactly number of interpo is not valid.
      // todo refactor move to the beginning of buffer
3802 3803 3804
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        memmove(pDst[i]->data, pDst[i]->data + pQuery->pSelectExpr[i].bytes * pQuery->limit.offset,
                ret * pQuery->pSelectExpr[i].bytes);
3805
      }
3806
      
3807 3808 3809
      pQuery->limit.offset = 0;
      return ret;
    } else {
3810
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%" PRId64 ". Discard due to offset, "
B
Bomin Zhang 已提交
3811
             "remain:%d, new offset:%" PRId64, pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, 0,
3812 3813
          pQuery->limit.offset - ret);
      
3814
      pQuery->limit.offset -= ret;
3815
      pQuery->rec.rows = 0;
3816 3817
      ret = 0;
    }
3818 3819

    if (!queryHasRemainResults(pRuntimeEnv)) {
3820 3821 3822 3823 3824
      return ret;
    }
  }
}

3825
static void queryCostStatis(SQInfo *pQInfo) {
3826
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
3827 3828 3829 3830 3831 3832 3833
  SQueryCostInfo *pSummary = &pRuntimeEnv->summary;
//  if (pRuntimeEnv->pResultBuf == NULL) {
////    pSummary->tmpBufferInDisk = 0;
//  } else {
////    pSummary->tmpBufferInDisk = getResBufSize(pRuntimeEnv->pResultBuf);
//  }
//
3834
//  qDebug("QInfo:%p cost: comp blocks:%d, size:%d Bytes, elapsed time:%.2f ms", pQInfo, pSummary->readCompInfo,
3835 3836
//         pSummary->totalCompInfoSize, pSummary->loadCompInfoUs / 1000.0);
//
3837
//  qDebug("QInfo:%p cost: field info: %d, size:%d Bytes, avg size:%.2f Bytes, elapsed time:%.2f ms", pQInfo,
3838 3839 3840
//         pSummary->readField, pSummary->totalFieldSize, (double)pSummary->totalFieldSize / pSummary->readField,
//         pSummary->loadFieldUs / 1000.0);
//
3841
//  qDebug(
3842 3843 3844
//      "QInfo:%p cost: file blocks:%d, size:%d Bytes, elapsed time:%.2f ms, skipped:%d, in-memory gen null:%d Bytes",
//      pQInfo, pSummary->readDiskBlocks, pSummary->totalBlockSize, pSummary->loadBlocksUs / 1000.0,
//      pSummary->skippedFileBlocks, pSummary->totalGenData);
3845
  
H
Haojun Liao 已提交
3846 3847 3848 3849
  qDebug("QInfo:%p :cost summary: elapsed time:%"PRId64" us, io time:%"PRId64" us, total blocks:%d, load block statis:%d,"
         " load data block:%d, total rows:%"PRId64 ", check rows:%"PRId64,
         pQInfo, pSummary->elapsedTime, pSummary->ioTime, pSummary->totalBlocks, pSummary->loadBlockStatis,
         pSummary->loadBlocks, pSummary->totalRows, pSummary->totalCheckedRows);
3850

3851
//  qDebug("QInfo:%p cost: temp file:%d Bytes", pQInfo, pSummary->tmpBufferInDisk);
3852
//
3853 3854
//  qDebug("QInfo:%p cost: file:%d, table:%d", pQInfo, pSummary->numOfFiles, pSummary->numOfTables);
//  qDebug("QInfo:%p cost: seek ops:%d", pQInfo, pSummary->numOfSeek);
3855 3856 3857
//
//  double total = pSummary->fileTimeUs + pSummary->cacheTimeUs;
//  double io = pSummary->loadCompInfoUs + pSummary->loadBlocksUs + pSummary->loadFieldUs;
3858
  
3859 3860
//  double computing = total - io;
//
3861
//  qDebug(
3862 3863 3864 3865 3866
//      "QInfo:%p cost: total elapsed time:%.2f ms, file:%.2f ms(%.2f%), cache:%.2f ms(%.2f%). io:%.2f ms(%.2f%),"
//      "comput:%.2fms(%.2f%)",
//      pQInfo, total / 1000.0, pSummary->fileTimeUs / 1000.0, pSummary->fileTimeUs * 100 / total,
//      pSummary->cacheTimeUs / 1000.0, pSummary->cacheTimeUs * 100 / total, io / 1000.0, io * 100 / total,
//      computing / 1000.0, computing * 100 / total);
3867 3868
}

3869 3870
static void updateOffsetVal(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pBlockInfo) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3871 3872
  STableQueryInfo* pTableQueryInfo = pQuery->current;
  
3873
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
3874

3875
  if (pQuery->limit.offset == pBlockInfo->rows) {  // current block will ignore completed
H
hjxilinx 已提交
3876
    pTableQueryInfo->lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->window.ekey + step : pBlockInfo->window.skey + step;
3877 3878 3879
    pQuery->limit.offset = 0;
    return;
  }
3880

3881 3882 3883 3884 3885
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    pQuery->pos = pQuery->limit.offset;
  } else {
    pQuery->pos = pBlockInfo->rows - pQuery->limit.offset - 1;
  }
3886

3887
  assert(pQuery->pos >= 0 && pQuery->pos <= pBlockInfo->rows - 1);
3888

3889
  SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
3890
  SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);
3891

3892
  // update the pQuery->limit.offset value, and pQuery->pos value
H
Haojun Liao 已提交
3893
  TSKEY *keys = (TSKEY *) pColInfoData->pData;
3894 3895

  // update the offset value
H
hjxilinx 已提交
3896
  pTableQueryInfo->lastKey = keys[pQuery->pos];
3897
  pQuery->limit.offset = 0;
3898

H
hjxilinx 已提交
3899
  int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, pBlockInfo, NULL, binarySearchForKey, pDataBlock);
3900

3901
  qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
3902
         pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows, numOfRes, pQuery->current->lastKey);
3903
}
3904

3905 3906 3907 3908 3909
void skipBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;

  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0) {
    return;
3910
  }
3911

3912 3913 3914
  pQuery->pos = 0;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

H
hjxilinx 已提交
3915
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3916
  TsdbQueryHandleT pQueryHandle = pRuntimeEnv->pQueryHandle;
3917

H
Haojun Liao 已提交
3918
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
3919
  while (tsdbNextDataBlock(pQueryHandle)) {
H
Haojun Liao 已提交
3920
    if (IS_QUERY_KILLED(GET_QINFO_ADDR(pRuntimeEnv))) {
H
Haojun Liao 已提交
3921 3922
      finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
3923
    }
3924

H
Haojun Liao 已提交
3925
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
3926

3927 3928
    if (pQuery->limit.offset > blockInfo.rows) {
      pQuery->limit.offset -= blockInfo.rows;
H
hjxilinx 已提交
3929 3930
      pTableQueryInfo->lastKey = (QUERY_IS_ASC_QUERY(pQuery)) ? blockInfo.window.ekey : blockInfo.window.skey;
      pTableQueryInfo->lastKey += step;
3931

3932
      qDebug("QInfo:%p skip rows:%d, offset:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv), blockInfo.rows,
3933 3934
             pQuery->limit.offset);
    } else {  // find the appropriated start position in current block
3935 3936 3937
      updateOffsetVal(pRuntimeEnv, &blockInfo);
      break;
    }
3938
  }
3939
}
3940

H
Haojun Liao 已提交
3941
static bool skipTimeInterval(SQueryRuntimeEnv *pRuntimeEnv, TSKEY* start) {
3942
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
3943
  *start = pQuery->current->lastKey;
3944

3945
  // if queried with value filter, do NOT forward query start position
3946
  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->pFillInfo != NULL) {
3947
    return true;
3948
  }
3949

3950 3951 3952 3953 3954
  /*
   * 1. for interval without interpolation query we forward pQuery->intervalTime at a time for
   *    pQuery->limit.offset times. Since hole exists, pQuery->intervalTime*pQuery->limit.offset value is
   *    not valid. otherwise, we only forward pQuery->limit.offset number of points
   */
3955
  assert(pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL);
3956

H
Haojun Liao 已提交
3957
  STimeWindow w = TSWINDOW_INITIALIZER;
3958
  
3959
  SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
hjxilinx 已提交
3960
  STableQueryInfo *pTableQueryInfo = pQuery->current;
3961

H
Haojun Liao 已提交
3962
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
3963
  while (tsdbNextDataBlock(pRuntimeEnv->pQueryHandle)) {
H
Haojun Liao 已提交
3964
    tsdbRetrieveDataBlockInfo(pRuntimeEnv->pQueryHandle, &blockInfo);
3965

H
Haojun Liao 已提交
3966 3967
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
H
Haojun Liao 已提交
3968
        getAlignQueryTimeWindow(pQuery, blockInfo.window.skey, blockInfo.window.skey, pQuery->window.ekey, &w);
H
Haojun Liao 已提交
3969 3970 3971
        pWindowResInfo->startTime = w.skey;
        pWindowResInfo->prevSKey = w.skey;
      }
3972
    } else {
H
Haojun Liao 已提交
3973
      getAlignQueryTimeWindow(pQuery, blockInfo.window.ekey, pQuery->window.ekey, blockInfo.window.ekey, &w);
3974

3975 3976 3977
      pWindowResInfo->startTime = pQuery->window.skey;
      pWindowResInfo->prevSKey = w.skey;
    }
3978

3979 3980
    // the first time window
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, pWindowResInfo->prevSKey, pQuery);
3981

3982 3983 3984 3985 3986 3987
    while (pQuery->limit.offset > 0) {
      if ((win.ekey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (win.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        pQuery->limit.offset -= 1;
        pWindowResInfo->prevSKey = win.skey;
      }
3988

3989
      STimeWindow tw = win;
H
Haojun Liao 已提交
3990
      GET_NEXT_TIMEWINDOW(pQuery, &tw);
3991

3992
      if (pQuery->limit.offset == 0) {
3993 3994
        if ((tw.skey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
            (tw.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
H
Haojun Liao 已提交
3995 3996
          // load the data block and check data remaining in current data block
          // TODO optimize performance
3997 3998 3999
          SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
          SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

4000 4001
          tw = win;
          int32_t startPos =
H
Haojun Liao 已提交
4002
              getNextQualifiedWindow(pRuntimeEnv, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey, -1);
4003 4004 4005 4006
          assert(startPos >= 0);

          // set the abort info
          pQuery->pos = startPos;
H
Haojun Liao 已提交
4007 4008 4009 4010 4011 4012
          
          // reset the query start timestamp
          pTableQueryInfo->win.skey = ((TSKEY *)pColInfoData->pData)[startPos];
          pQuery->window.skey = pTableQueryInfo->win.skey;
          *start = pTableQueryInfo->win.skey;
          
4013
          pWindowResInfo->prevSKey = tw.skey;
H
Haojun Liao 已提交
4014 4015
          int32_t index = pRuntimeEnv->windowResInfo.curIndex;
          
H
hjxilinx 已提交
4016
          int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, NULL, binarySearchForKey, pDataBlock);
H
Haojun Liao 已提交
4017 4018
          pRuntimeEnv->windowResInfo.curIndex = index;  // restore the window index
          
4019
          qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64,
4020 4021
                 GET_QINFO_ADDR(pRuntimeEnv), blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes, pQuery->current->lastKey);
          
4022
          return true;
H
Haojun Liao 已提交
4023 4024 4025 4026
        } else { // do nothing
          *start = tw.skey;
          pQuery->window.skey = tw.skey;
          pWindowResInfo->prevSKey = tw.skey;
4027
          return true;
4028 4029 4030
        }
      }

H
Haojun Liao 已提交
4031 4032 4033 4034 4035 4036 4037
      /*
       * If the next time window still starts from current data block,
       * load the primary timestamp column first, and then find the start position for the next queried time window.
       * Note that only the primary timestamp column is required.
       * TODO: Optimize for this cases. All data blocks are not needed to be loaded, only if the first actually required
       * time window resides in current data block.
       */
4038 4039 4040 4041 4042 4043 4044
      if ((tw.skey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (tw.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
        SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

        tw = win;
        int32_t startPos =
H
Haojun Liao 已提交
4045
            getNextQualifiedWindow(pRuntimeEnv, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey, -1);
4046 4047 4048 4049
        assert(startPos >= 0);

        // set the abort info
        pQuery->pos = startPos;
H
hjxilinx 已提交
4050
        pTableQueryInfo->lastKey = ((TSKEY *)pColInfoData->pData)[startPos];
4051 4052
        pWindowResInfo->prevSKey = tw.skey;
        win = tw;
4053
      } else {
H
Haojun Liao 已提交
4054
        break;  // offset is not 0, and next time window begins or ends in the next block.
4055 4056 4057
      }
    }
  }
4058

4059 4060 4061
  return true;
}

B
Bomin Zhang 已提交
4062 4063
static void setupQueryHandle(void* tsdb, SQInfo* pQInfo, bool isSTableQuery) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4064 4065
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

B
Bomin Zhang 已提交
4066 4067 4068 4069
  if (onlyQueryTags(pQuery)) {
    return;
  }

H
Haojun Liao 已提交
4070
  if (isSTableQuery && (!QUERY_IS_INTERVAL_QUERY(pQuery)) && (!isFixedOutputQuery(pRuntimeEnv))) {
B
Bomin Zhang 已提交
4071 4072
    return;
  }
4073 4074

  STsdbQueryCond cond = {
B
Bomin Zhang 已提交
4075 4076 4077 4078
    .twindow = pQuery->window,
    .order   = pQuery->order.order,
    .colList = pQuery->colList,
    .numOfCols = pQuery->numOfCols,
4079
  };
weixin_48148422's avatar
weixin_48148422 已提交
4080

B
Bomin Zhang 已提交
4081
  if (!isSTableQuery
4082
    && (pQInfo->tableqinfoGroupInfo.numOfTables == 1)
B
Bomin Zhang 已提交
4083
    && (cond.order == TSDB_ORDER_ASC) 
H
Haojun Liao 已提交
4084
    && (!QUERY_IS_INTERVAL_QUERY(pQuery))
B
Bomin Zhang 已提交
4085
    && (!isGroupbyNormalCol(pQuery->pGroupbyExpr))
H
Haojun Liao 已提交
4086
    && (!isFixedOutputQuery(pRuntimeEnv))
B
Bomin Zhang 已提交
4087
  ) {
H
Haojun Liao 已提交
4088
    SArray* pa = GET_TABLEGROUP(pQInfo, 0);
4089 4090
    STableQueryInfo* pCheckInfo = taosArrayGetP(pa, 0);
    cond.twindow = pCheckInfo->win;
4091
  }
B
Bomin Zhang 已提交
4092

H
Haojun Liao 已提交
4093
  if (isFirstLastRowQuery(pQuery)) {
4094
    pRuntimeEnv->pQueryHandle = tsdbQueryLastRow(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
4095
  } else if (isPointInterpoQuery(pQuery)) {
4096
    pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
H
Haojun Liao 已提交
4097
  } else {
4098
    pRuntimeEnv->pQueryHandle = tsdbQueryTables(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
H
Haojun Liao 已提交
4099
  }
B
Bomin Zhang 已提交
4100 4101
}

4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114
static SFillColInfo* taosCreateFillColInfo(SQuery* pQuery) {
  int32_t numOfCols = pQuery->numOfOutput;
  int32_t offset = 0;
  
  SFillColInfo* pFillCol = calloc(numOfCols, sizeof(SFillColInfo));
  for(int32_t i = 0; i < numOfCols; ++i) {
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];
    
    pFillCol[i].col.bytes  = pExprInfo->bytes;
    pFillCol[i].col.type   = pExprInfo->type;
    pFillCol[i].col.offset = offset;
    pFillCol[i].flag       = TSDB_COL_NORMAL;    // always be ta normal column for table query
    pFillCol[i].functionId = pExprInfo->base.functionId;
4115
    pFillCol[i].fillVal.i = pQuery->fillVal[i];
4116 4117 4118 4119 4120 4121 4122
    
    offset += pExprInfo->bytes;
  }
  
  return pFillCol;
}

4123
int32_t doInitQInfo(SQInfo *pQInfo, STSBuf *pTsBuf, void *tsdb, int32_t vgId, bool isSTableQuery) {
4124 4125
  int32_t code = TSDB_CODE_SUCCESS;
  
4126 4127 4128
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;

  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
4129

4130
  pQuery->precision = tsdbGetCfg(tsdb)->precision;
H
Haojun Liao 已提交
4131 4132
  pRuntimeEnv->topBotQuery = isTopBottomQuery(pQuery);
  pRuntimeEnv->hasTagResults = hasTagValOutput(pQuery);
4133 4134

  setScanLimitationByResultBuffer(pQuery);
H
Haojun Liao 已提交
4135
  changeExecuteScanOrder(pQInfo, false);
B
Bomin Zhang 已提交
4136
  setupQueryHandle(tsdb, pQInfo, isSTableQuery);
4137
  
4138
  pQInfo->tsdb = tsdb;
4139
  pQInfo->vgId = vgId;
4140 4141

  pRuntimeEnv->pQuery = pQuery;
H
Haojun Liao 已提交
4142
  pRuntimeEnv->pTSBuf = pTsBuf;
4143
  pRuntimeEnv->cur.vgroupIndex = -1;
4144
  pRuntimeEnv->stableQuery = isSTableQuery;
H
Haojun Liao 已提交
4145
  pRuntimeEnv->prevGroupId = INT32_MIN;
H
Haojun Liao 已提交
4146
  pRuntimeEnv->groupbyNormalCol = isGroupbyNormalCol(pQuery->pGroupbyExpr);
4147

H
Haojun Liao 已提交
4148
  if (pTsBuf != NULL) {
4149 4150 4151 4152 4153 4154 4155 4156 4157 4158
    int16_t order = (pQuery->order.order == pRuntimeEnv->pTSBuf->tsOrder) ? TSDB_ORDER_ASC : TSDB_ORDER_DESC;
    tsBufSetTraverseOrder(pRuntimeEnv->pTSBuf, order);
  }

  // create runtime environment
  code = setupQueryRuntimeEnv(pRuntimeEnv, pQuery->order.order);
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }

H
Haojun Liao 已提交
4159
  pRuntimeEnv->numOfRowsPerPage = getNumOfRowsInResultPage(pQuery, pRuntimeEnv->topBotQuery, isSTableQuery);
4160 4161 4162

  if (isSTableQuery) {
    int32_t rows = getInitialPageNum(pQInfo);
4163
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rows, pQuery->rowSize, pQInfo);
4164 4165 4166 4167 4168 4169 4170
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

    if (pQuery->intervalTime == 0) {
      int16_t type = TSDB_DATA_TYPE_NULL;

H
Haojun Liao 已提交
4171
      if (pRuntimeEnv->groupbyNormalCol) {  // group by columns not tags;
4172 4173 4174 4175 4176
        type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
      } else {
        type = TSDB_DATA_TYPE_INT;  // group id
      }

B
Bomin Zhang 已提交
4177 4178 4179 4180
      code = initWindowResInfo(&pRuntimeEnv->windowResInfo, pRuntimeEnv, 32, 4096, type);
      if (code != TSDB_CODE_SUCCESS) {
        return code;
      }
4181 4182
    }

H
Haojun Liao 已提交
4183
  } else if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
4184
    int32_t rows = getInitialPageNum(pQInfo);
4185
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rows, pQuery->rowSize, pQInfo);
4186 4187 4188 4189 4190
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

    int16_t type = TSDB_DATA_TYPE_NULL;
H
Haojun Liao 已提交
4191
    if (pRuntimeEnv->groupbyNormalCol) {
4192 4193 4194 4195 4196
      type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
    } else {
      type = TSDB_DATA_TYPE_TIMESTAMP;
    }

B
Bomin Zhang 已提交
4197 4198 4199 4200
    code = initWindowResInfo(&pRuntimeEnv->windowResInfo, pRuntimeEnv, rows, 4096, type);
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }
4201 4202
  }

4203
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
4204
    SFillColInfo* pColInfo = taosCreateFillColInfo(pQuery);
H
Haojun Liao 已提交
4205 4206 4207 4208 4209 4210 4211
    STimeWindow w = TSWINDOW_INITIALIZER;

    TSKEY sk = MIN(pQuery->window.skey, pQuery->window.ekey);
    TSKEY ek = MAX(pQuery->window.skey, pQuery->window.ekey);
    getAlignQueryTimeWindow(pQuery, pQuery->window.skey, sk, ek, &w);

    pRuntimeEnv->pFillInfo = taosInitFillInfo(pQuery->order.order, w.skey, 0, pQuery->rec.capacity, pQuery->numOfOutput,
H
Haojun Liao 已提交
4212 4213
                                              pQuery->slidingTime, pQuery->slidingTimeUnit, pQuery->precision,
                                              pQuery->fillType, pColInfo);
4214
  }
4215

H
Haojun Liao 已提交
4216
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
4217
  return TSDB_CODE_SUCCESS;
4218 4219
}

4220
static void enableExecutionForNextTable(SQueryRuntimeEnv *pRuntimeEnv) {
4221
  SQuery *pQuery = pRuntimeEnv->pQuery;
4222

4223
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
4224 4225 4226 4227 4228 4229 4230
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[i]);
    if (pResInfo != NULL) {
      pResInfo->complete = false;
    }
  }
}

H
Haojun Liao 已提交
4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247
static FORCE_INLINE void setEnvForEachBlock(SQInfo* pQInfo, STableQueryInfo* pTableQueryInfo, SDataBlockInfo* pBlockInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

  if (!QUERY_IS_INTERVAL_QUERY(pQuery)) {
    setExecutionContext(pQInfo, pTableQueryInfo->groupIndex, pBlockInfo->window.ekey + step);
  } else {  // interval query
    TSKEY nextKey = pBlockInfo->window.skey;
    setIntervalQueryRange(pQInfo, nextKey);

    if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
      setAdditionalInfo(pQInfo, pTableQueryInfo->pTable, pTableQueryInfo);
    }
  }
}

H
Haojun Liao 已提交
4248
static int64_t scanMultiTableDataBlocks(SQInfo *pQInfo) {
4249
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
Haojun Liao 已提交
4250 4251
  SQuery*           pQuery = pRuntimeEnv->pQuery;
  SQueryCostInfo*   summary  = &pRuntimeEnv->summary;
4252
  
H
hjxilinx 已提交
4253
  int64_t st = taosGetTimestampMs();
4254

4255
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
H
Haojun Liao 已提交
4256
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4257

H
Haojun Liao 已提交
4258 4259
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

4260
  while (tsdbNextDataBlock(pQueryHandle)) {
4261
    summary->totalBlocks += 1;
H
Haojun Liao 已提交
4262
    
H
Haojun Liao 已提交
4263
    if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
4264
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4265
    }
4266

H
Haojun Liao 已提交
4267
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
H
Haojun Liao 已提交
4268 4269 4270 4271
    STableQueryInfo **pTableQueryInfo = (STableQueryInfo**) taosHashGet(pQInfo->tableqinfoGroupInfo.map, &blockInfo.tid, sizeof(blockInfo.tid));
    if(pTableQueryInfo == NULL) {
      break;
    }
4272

H
Haojun Liao 已提交
4273 4274
    pQuery->current = *pTableQueryInfo;
    CHECK_QUERY_TIME_RANGE(pQuery, *pTableQueryInfo);
4275

H
Haojun Liao 已提交
4276
    if (!pRuntimeEnv->groupbyNormalCol) {
H
Haojun Liao 已提交
4277
      setEnvForEachBlock(pQInfo, *pTableQueryInfo, &blockInfo);
4278
    }
4279

4280
    SDataStatis *pStatis = NULL;
H
Haojun Liao 已提交
4281
    SArray *pDataBlock = NULL;
4282

H
Haojun Liao 已提交
4283 4284 4285
    if (loadDataBlockOnDemand(pRuntimeEnv, pQueryHandle, &blockInfo, &pStatis, &pDataBlock) == BLK_DATA_DISCARD) {
      pQuery->current->lastKey = QUERY_IS_ASC_QUERY(pQuery)? blockInfo.window.ekey + step:blockInfo.window.skey + step;
      continue;
4286
    }
4287

4288 4289 4290
    summary->totalRows += blockInfo.rows;
    stableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, pDataBlock, binarySearchForKey);
  
4291
    qDebug("QInfo:%p check data block, uid:%"PRId64", tid:%d, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, lastKey:%" PRId64,
4292
           pQInfo, blockInfo.uid, blockInfo.tid, blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, pQuery->current->lastKey);
4293
  }
4294

H
Haojun Liao 已提交
4295 4296
  updateWindowResNumOfRes(pRuntimeEnv);

H
hjxilinx 已提交
4297 4298
  int64_t et = taosGetTimestampMs();
  return et - st;
4299 4300
}

4301 4302
static bool multiTableMultioutputHelper(SQInfo *pQInfo, int32_t index) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4303
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4304

4305
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
H
Haojun Liao 已提交
4306
  SArray *group = GET_TABLEGROUP(pQInfo, 0);
4307
  STableQueryInfo* pCheckInfo = taosArrayGetP(group, index);
4308

H
Haojun Liao 已提交
4309 4310 4311
  if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
    setTagVal(pRuntimeEnv, pCheckInfo->pTable, pQInfo->tsdb);
  }
4312

H
Haojun Liao 已提交
4313
  STableId* id = TSDB_TABLEID(pCheckInfo->pTable);
4314
  qDebug("QInfo:%p query on (%d): uid:%" PRIu64 ", tid:%d, qrange:%" PRId64 "-%" PRId64, pQInfo, index,
H
Haojun Liao 已提交
4315
         id->uid, id->tid, pCheckInfo->lastKey, pCheckInfo->win.ekey);
4316

4317
  STsdbQueryCond cond = {
4318
      .twindow   = {pCheckInfo->lastKey, pCheckInfo->win.ekey},
H
hjxilinx 已提交
4319 4320
      .order     = pQuery->order.order,
      .colList   = pQuery->colList,
4321
      .numOfCols = pQuery->numOfCols,
4322
  };
4323

H
hjxilinx 已提交
4324
  // todo refactor
4325
  SArray *g1 = taosArrayInit(1, POINTER_BYTES);
4326
  SArray *tx = taosArrayInit(1, POINTER_BYTES);
4327

4328
  taosArrayPush(tx, &pCheckInfo->pTable);
4329
  taosArrayPush(g1, &tx);
4330
  STableGroupInfo gp = {.numOfTables = 1, .pGroupList = g1};
4331

4332
  // include only current table
4333 4334 4335 4336
  if (pRuntimeEnv->pQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
    pRuntimeEnv->pQueryHandle = NULL;
  }
4337

H
Haojun Liao 已提交
4338
  pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo);
4339 4340
  taosArrayDestroy(tx);
  taosArrayDestroy(g1);
4341

4342
  if (pRuntimeEnv->pTSBuf != NULL) {
4343
    if (pRuntimeEnv->cur.vgroupIndex == -1) {
4344 4345
      int64_t tag = pRuntimeEnv->pCtx[0].tag.i64Key;
      STSElem elem = tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, 0, tag);
4346

4347 4348 4349 4350 4351 4352 4353 4354
      // failed to find data with the specified tag value
      if (elem.vnode < 0) {
        return false;
      }
    } else {
      tsBufSetCursor(pRuntimeEnv->pTSBuf, &pRuntimeEnv->cur);
    }
  }
4355

4356
  initCtxOutputBuf(pRuntimeEnv);
4357 4358 4359 4360 4361 4362 4363 4364 4365 4366
  return true;
}

/**
 * super table query handler
 * 1. super table projection query, group-by on normal columns query, ts-comp query
 * 2. point interpolation query, last row query
 *
 * @param pQInfo
 */
4367
static void sequentialTableProcess(SQInfo *pQInfo) {
4368
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4369
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4370
  setQueryStatus(pQuery, QUERY_COMPLETED);
4371

H
Haojun Liao 已提交
4372
  size_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
4373

H
Haojun Liao 已提交
4374
  if (isPointInterpoQuery(pQuery) || isFirstLastRowQuery(pQuery)) {
4375 4376
    resetCtxOutputBuf(pRuntimeEnv);
    assert(pQuery->limit.offset == 0 && pQuery->limit.limit != 0);
4377

4378
    while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
4379
      SArray* group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex);
4380

4381
      qDebug("QInfo:%p last_row query on group:%d, total group:%zu, current group:%p", pQInfo, pQInfo->groupIndex,
dengyihao's avatar
dengyihao 已提交
4382
             numOfGroups, group);
H
Haojun Liao 已提交
4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402

      STsdbQueryCond cond = {
          .twindow = pQuery->window,
          .colList = pQuery->colList,
          .order   = pQuery->order.order,
          .numOfCols = pQuery->numOfCols,
      };

      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);
      
      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }
      
4403
      if (isFirstLastRowQuery(pQuery)) {
H
Haojun Liao 已提交
4404
        pRuntimeEnv->pQueryHandle = tsdbQueryLastRow(pQInfo->tsdb, &cond, &gp, pQInfo);
H
Haojun Liao 已提交
4405
      } else {
H
Haojun Liao 已提交
4406
        pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(pQInfo->tsdb, &cond, &gp, pQInfo);
4407
      }
H
Haojun Liao 已提交
4408 4409
      
      initCtxOutputBuf(pRuntimeEnv);
4410
      
4411
      SArray* s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle);
4412 4413
      assert(taosArrayGetSize(s) >= 1);
      
4414
      setTagVal(pRuntimeEnv, taosArrayGetP(s, 0), pQInfo->tsdb);
4415 4416 4417
      if (isFirstLastRowQuery(pQuery)) {
        assert(taosArrayGetSize(s) == 1);
      }
H
Haojun Liao 已提交
4418

dengyihao's avatar
dengyihao 已提交
4419
      taosArrayDestroy(s);
H
Haojun Liao 已提交
4420

H
Haojun Liao 已提交
4421
      // here we simply set the first table as current table
4422 4423 4424
      SArray* first = GET_TABLEGROUP(pQInfo, pQInfo->groupIndex);
      pQuery->current = taosArrayGetP(first, 0);

4425
      scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
H
Haojun Liao 已提交
4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437
      
      int64_t numOfRes = getNumOfResult(pRuntimeEnv);
      if (numOfRes > 0) {
        pQuery->rec.rows += numOfRes;
        forwardCtxOutputBuf(pRuntimeEnv, numOfRes);
      }
      
      skipResults(pRuntimeEnv);
      pQInfo->groupIndex += 1;

      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
4438 4439 4440 4441 4442 4443

      if (pQuery->rec.rows >= pQuery->rec.capacity) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
        break;
      }
    }
H
Haojun Liao 已提交
4444
  } else if (pRuntimeEnv->groupbyNormalCol) { // group-by on normal columns query
4445
    while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
4446
      SArray* group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex);
4447

4448
      qDebug("QInfo:%p group by normal columns group:%d, total group:%zu", pQInfo, pQInfo->groupIndex, numOfGroups);
4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469

      STsdbQueryCond cond = {
          .twindow = pQuery->window,
          .colList = pQuery->colList,
          .order   = pQuery->order.order,
          .numOfCols = pQuery->numOfCols,
      };

      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);

      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }

      pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo);
B
Bomin Zhang 已提交
4470 4471
      taosArrayDestroy(g1);
      taosArrayDestroy(tx);
4472

4473
      SArray* s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle);
4474 4475
      assert(taosArrayGetSize(s) >= 1);

4476
      setTagVal(pRuntimeEnv, taosArrayGetP(s, 0), pQInfo->tsdb);
4477 4478 4479 4480 4481 4482 4483 4484

      // here we simply set the first table as current table
      scanMultiTableDataBlocks(pQInfo);
      pQInfo->groupIndex += 1;

      SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;

        // no results generated for current group, continue to try the next group
dengyihao's avatar
dengyihao 已提交
4485
      taosArrayDestroy(s); 
4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499
      if (pWindowResInfo->size <= 0) {
        continue;
      }

      for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
        SWindowStatus *pStatus = &pWindowResInfo->pResult[i].status;
        pStatus->closed = true;  // enable return all results for group by normal columns

        SWindowResult *pResult = &pWindowResInfo->pResult[i];
        for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
          pResult->numOfRows = MAX(pResult->numOfRows, pResult->resultInfo[j].numOfRes);
        }
      }

4500
      qDebug("QInfo:%p generated groupby columns results %d rows for group %d completed", pQInfo, pWindowResInfo->size,
4501 4502 4503 4504 4505 4506 4507
          pQInfo->groupIndex);
      int32_t currentGroupIndex = pQInfo->groupIndex;

      pQuery->rec.rows = 0;
      pQInfo->groupIndex = 0;

      ensureOutputBufferSimple(pRuntimeEnv, pWindowResInfo->size);
4508
      copyFromWindowResToSData(pQInfo, pWindowResInfo);
4509 4510 4511 4512 4513 4514

      pQInfo->groupIndex = currentGroupIndex;  //restore the group index
      assert(pQuery->rec.rows == pWindowResInfo->size);

      clearClosedTimeWindow(pRuntimeEnv);
      break;
4515 4516 4517
    }
  } else {
    /*
4518
     * 1. super table projection query, 2. ts-comp query
4519 4520 4521
     * if the subgroup index is larger than 0, results generated by group by tbname,k is existed.
     * we need to return it to client in the first place.
     */
4522
    if (pQInfo->groupIndex > 0) {
4523
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
4524
      pQuery->rec.total += pQuery->rec.rows;
4525

4526
      if (pQuery->rec.rows > 0) {
4527 4528 4529
        return;
      }
    }
4530

4531
    // all data have returned already
4532
    if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
4533 4534
      return;
    }
4535

4536 4537
    resetCtxOutputBuf(pRuntimeEnv);
    resetTimeWindowInfo(pRuntimeEnv, &pRuntimeEnv->windowResInfo);
4538

H
Haojun Liao 已提交
4539
    SArray *group = GET_TABLEGROUP(pQInfo, 0);
4540 4541
    assert(taosArrayGetSize(group) == pQInfo->tableqinfoGroupInfo.numOfTables &&
           1 == taosArrayGetSize(pQInfo->tableqinfoGroupInfo.pGroupList));
4542

4543
    while (pQInfo->tableIndex < pQInfo->tableqinfoGroupInfo.numOfTables) {
H
Haojun Liao 已提交
4544
      if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
4545
        longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4546
      }
4547

4548
      pQuery->current = taosArrayGetP(group, pQInfo->tableIndex);
4549
      if (!multiTableMultioutputHelper(pQInfo, pQInfo->tableIndex)) {
4550
        pQInfo->tableIndex++;
4551 4552
        continue;
      }
4553

H
hjxilinx 已提交
4554
      // TODO handle the limit offset problem
4555
      if (pQuery->numOfFilterCols == 0 && pQuery->limit.offset > 0) {
4556
        //        skipBlocks(pRuntimeEnv);
4557 4558
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
          pQInfo->tableIndex++;
4559 4560 4561
          continue;
        }
      }
4562

4563
      scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
4564
      skipResults(pRuntimeEnv);
4565

4566
      // the limitation of output result is reached, set the query completed
4567
      if (limitResults(pRuntimeEnv)) {
4568
        pQInfo->tableIndex = pQInfo->tableqinfoGroupInfo.numOfTables;
4569 4570
        break;
      }
4571

4572 4573
      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
4574

4575
      if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4576 4577 4578 4579 4580 4581
        /*
         * query range is identical in terms of all meters involved in query,
         * so we need to restore them at the *beginning* of query on each meter,
         * not the consecutive query on meter on which is aborted due to buffer limitation
         * to ensure that, we can reset the query range once query on a meter is completed.
         */
4582
        pQInfo->tableIndex++;
weixin_48148422's avatar
weixin_48148422 已提交
4583

H
Haojun Liao 已提交
4584
        STableIdInfo tidInfo = {0};
4585

H
Haojun Liao 已提交
4586 4587 4588
        STableId* id = TSDB_TABLEID(pQuery->current->pTable);
        tidInfo.uid = id->uid;
        tidInfo.tid = id->tid;
weixin_48148422's avatar
weixin_48148422 已提交
4589
        tidInfo.key = pQuery->current->lastKey;
weixin_48148422's avatar
weixin_48148422 已提交
4590 4591
        taosArrayPush(pQInfo->arrTableIdInfo, &tidInfo);

4592
        // if the buffer is full or group by each table, we need to jump out of the loop
4593 4594
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL) /*||
            isGroupbyEachTable(pQuery->pGroupbyExpr, pSupporter->pSidSet)*/) {
4595 4596
          break;
        }
4597

4598
      } else {
4599
        // all data in the result buffer are skipped due to the offset, continue to retrieve data from current meter
4600 4601
        if (pQuery->rec.rows == 0) {
          assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
4602 4603
          continue;
        } else {
4604 4605 4606
          // buffer is full, wait for the next round to retrieve data from current meter
          assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
          break;
4607 4608 4609
        }
      }
    }
H
Haojun Liao 已提交
4610

4611
    if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
H
Haojun Liao 已提交
4612 4613
      setQueryStatus(pQuery, QUERY_COMPLETED);
    }
4614
  }
4615

4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627
  /*
   * 1. super table projection query, group-by on normal columns query, ts-comp query
   * 2. point interpolation query, last row query
   *
   * group-by on normal columns query and last_row query do NOT invoke the finalizer here,
   * since the finalize stage will be done at the client side.
   *
   * projection query, point interpolation query do not need the finalizer.
   *
   * Only the ts-comp query requires the finalizer function to be executed here.
   */
  if (isTSCompQuery(pQuery)) {
H
hjxilinx 已提交
4628
    finalizeQueryResult(pRuntimeEnv);
4629
  }
4630

4631 4632 4633
  if (pRuntimeEnv->pTSBuf != NULL) {
    pRuntimeEnv->cur = pRuntimeEnv->pTSBuf->cur;
  }
4634

4635
  qDebug(
B
Bomin Zhang 已提交
4636
      "QInfo %p numOfTables:%"PRIu64", index:%d, numOfGroups:%zu, %"PRId64" points returned, total:%"PRId64", offset:%" PRId64,
4637
      pQInfo, pQInfo->tableqinfoGroupInfo.numOfTables, pQInfo->tableIndex, numOfGroups, pQuery->rec.rows, pQuery->rec.total,
4638
      pQuery->limit.offset);
4639 4640
}

4641 4642 4643 4644
static void doSaveContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

4645 4646 4647 4648
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
  SWITCH_ORDER(pQuery->order.order);
  
4649
  if (pRuntimeEnv->pTSBuf != NULL) {
4650
    pRuntimeEnv->pTSBuf->cur.order = pQuery->order.order;
4651
  }
4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663
  
  STsdbQueryCond cond = {
      .twindow = pQuery->window,
      .order   = pQuery->order.order,
      .colList = pQuery->colList,
      .numOfCols = pQuery->numOfCols,
  };
  
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
H
Haojun Liao 已提交
4664 4665

  pRuntimeEnv->prevGroupId = INT32_MIN;
4666
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
4667 4668 4669 4670
  
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
  disableFuncInReverseScan(pQInfo);
H
hjxilinx 已提交
4671 4672
}

4673 4674 4675 4676
static void doRestoreContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

H
hjxilinx 已提交
4677
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
4678

4679
  if (pRuntimeEnv->pTSBuf != NULL) {
4680
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
4681
  }
4682

4683
  switchCtxOrder(pRuntimeEnv);
4684 4685 4686
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
}

4687 4688 4689
static void doCloseAllTimeWindowAfterScan(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
Haojun Liao 已提交
4690
//  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
4691

H
Haojun Liao 已提交
4692
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
H
Haojun Liao 已提交
4693
    size_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
4694
    for (int32_t i = 0; i < numOfGroup; ++i) {
H
Haojun Liao 已提交
4695
      SArray *group = GET_TABLEGROUP(pQInfo, i);
4696

4697
      size_t num = taosArrayGetSize(group);
4698
      for (int32_t j = 0; j < num; ++j) {
4699 4700
        STableQueryInfo* item = taosArrayGetP(group, j);
        closeAllTimeWindow(&item->windowResInfo);
H
Haojun Liao 已提交
4701
//        removeRedundantWindow(&item->windowResInfo, item->lastKey - step, step);
4702
      }
H
hjxilinx 已提交
4703 4704 4705 4706 4707 4708 4709
    }
  } else {  // close results for group result
    closeAllTimeWindow(&pQInfo->runtimeEnv.windowResInfo);
  }
}

static void multiTableQueryProcess(SQInfo *pQInfo) {
4710 4711 4712
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

4713
  if (pQInfo->groupIndex > 0) {
4714
    /*
4715
     * if the groupIndex > 0, the query process must be completed yet, we only need to
4716 4717
     * copy the data into output buffer
     */
H
Haojun Liao 已提交
4718
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
4719 4720
      copyResToQueryResultBuf(pQInfo, pQuery);
#ifdef _DEBUG_VIEW
4721
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
4722 4723
#endif
    } else {
4724
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
4725
    }
4726

4727
    qDebug("QInfo:%p current:%"PRId64", total:%"PRId64"", pQInfo, pQuery->rec.rows, pQuery->rec.total);
4728 4729
    return;
  }
4730

4731
  qDebug("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", order:%d, forward scan start", pQInfo,
4732 4733
         pQuery->window.skey, pQuery->window.ekey, pQuery->order.order);

H
hjxilinx 已提交
4734
  // do check all qualified data blocks
H
Haojun Liao 已提交
4735
  int64_t el = scanMultiTableDataBlocks(pQInfo);
4736
  qDebug("QInfo:%p master scan completed, elapsed time: %" PRId64 "ms, reverse scan start", pQInfo, el);
4737

H
hjxilinx 已提交
4738
  // query error occurred or query is killed, abort current execution
H
Haojun Liao 已提交
4739
  if (pQInfo->code != TSDB_CODE_SUCCESS || IS_QUERY_KILLED(pQInfo)) {
4740
    qDebug("QInfo:%p query killed or error occurred, code:%s, abort", pQInfo, tstrerror(pQInfo->code));
H
Haojun Liao 已提交
4741 4742
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4743
  }
4744

H
hjxilinx 已提交
4745 4746
  // close all time window results
  doCloseAllTimeWindowAfterScan(pQInfo);
4747

H
hjxilinx 已提交
4748 4749
  if (needReverseScan(pQuery)) {
    doSaveContext(pQInfo);
4750

H
Haojun Liao 已提交
4751
    el = scanMultiTableDataBlocks(pQInfo);
4752
    qDebug("QInfo:%p reversed scan completed, elapsed time: %" PRId64 "ms", pQInfo, el);
4753

H
Haojun Liao 已提交
4754
//    doCloseAllTimeWindowAfterScan(pQInfo);
H
Haojun Liao 已提交
4755
    doRestoreContext(pQInfo);
H
hjxilinx 已提交
4756
  } else {
4757
    qDebug("QInfo:%p no need to do reversed scan, query completed", pQInfo);
4758
  }
4759

4760
  setQueryStatus(pQuery, QUERY_COMPLETED);
4761

H
Haojun Liao 已提交
4762
  if (pQInfo->code != TSDB_CODE_SUCCESS || IS_QUERY_KILLED(pQInfo)) {
4763
    qDebug("QInfo:%p query killed or error occurred, code:%s, abort", pQInfo, tstrerror(pQInfo->code));
H
Haojun Liao 已提交
4764 4765
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
H
hjxilinx 已提交
4766
  }
4767

H
Haojun Liao 已提交
4768
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || isSumAvgRateQuery(pQuery)) {
4769
    if (mergeIntoGroupResult(pQInfo) == TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
4770
      copyResToQueryResultBuf(pQInfo, pQuery);
4771 4772

#ifdef _DEBUG_VIEW
4773
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
4774 4775 4776
#endif
    }
  } else {  // not a interval query
4777
    copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
4778
  }
4779

4780
  // handle the limitation of output buffer
4781
  qDebug("QInfo:%p points returned:%" PRId64 ", total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
4782 4783 4784 4785 4786 4787 4788 4789
}

/*
 * in each query, this function will be called only once, no retry for further result.
 *
 * select count(*)/top(field,k)/avg(field name) from table_name [where ts>now-1a];
 * select count(*) from table_name group by status_column;
 */
H
hjxilinx 已提交
4790
static void tableFixedOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
4791
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
4792 4793
  
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
4794 4795 4796 4797
  if (!isTopBottomQuery(pQuery) && pQuery->limit.offset > 0) {  // no need to execute, since the output will be ignore.
    return;
  }
  
H
hjxilinx 已提交
4798 4799
  pQuery->current = pTableInfo;  // set current query table info
  
4800
  scanOneTableDataBlocks(pRuntimeEnv, pTableInfo->lastKey);
H
hjxilinx 已提交
4801
  finalizeQueryResult(pRuntimeEnv);
4802

H
Haojun Liao 已提交
4803
  if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
4804 4805
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4806
  }
4807

H
Haojun Liao 已提交
4808
  // since the numOfRows must be identical for all sql functions that are allowed to be executed simutaneously.
4809
  pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
4810

4811
  skipResults(pRuntimeEnv);
4812
  limitResults(pRuntimeEnv);
4813 4814
}

H
hjxilinx 已提交
4815
static void tableMultiOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
4816
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
4817 4818 4819 4820
  
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
  
4821 4822 4823 4824
  // for ts_comp query, re-initialized is not allowed
  if (!isTSCompQuery(pQuery)) {
    resetCtxOutputBuf(pRuntimeEnv);
  }
4825

4826 4827 4828 4829 4830 4831
  // skip blocks without load the actual data block from file if no filter condition present
  skipBlocks(&pQInfo->runtimeEnv);
  if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return;
  }
4832 4833

  while (1) {
4834
    scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
H
hjxilinx 已提交
4835
    finalizeQueryResult(pRuntimeEnv);
4836

4837 4838
    pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
    if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols > 0 && pQuery->rec.rows > 0) {
4839
      skipResults(pRuntimeEnv);
4840 4841 4842
    }

    /*
H
hjxilinx 已提交
4843 4844
     * 1. if pQuery->size == 0, pQuery->limit.offset >= 0, still need to check data
     * 2. if pQuery->size > 0, pQuery->limit.offset must be 0
4845
     */
4846
    if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4847 4848 4849
      break;
    }

4850
    qDebug("QInfo:%p skip current result, offset:%" PRId64 ", next qrange:%" PRId64 "-%" PRId64,
B
Bomin Zhang 已提交
4851
           pQInfo, pQuery->limit.offset, pQuery->current->lastKey, pQuery->current->win.ekey);
4852 4853 4854 4855

    resetCtxOutputBuf(pRuntimeEnv);
  }

4856
  limitResults(pRuntimeEnv);
4857
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
4858
    qDebug("QInfo:%p query paused due to output limitation, next qrange:%" PRId64 "-%" PRId64, pQInfo,
H
hjxilinx 已提交
4859
        pQuery->current->lastKey, pQuery->window.ekey);
weixin_48148422's avatar
weixin_48148422 已提交
4860 4861
  } else if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
    STableIdInfo tidInfo;
H
Haojun Liao 已提交
4862
    STableId* id = TSDB_TABLEID(pQuery->current);
4863

H
Haojun Liao 已提交
4864 4865
    tidInfo.uid = id->uid;
    tidInfo.tid = id->tid;
weixin_48148422's avatar
weixin_48148422 已提交
4866 4867
    tidInfo.key = pQuery->current->lastKey;
    taosArrayPush(pQInfo->arrTableIdInfo, &tidInfo);
4868 4869
  }

4870 4871 4872
  if (!isTSCompQuery(pQuery)) {
    assert(pQuery->rec.rows <= pQuery->rec.capacity);
  }
4873 4874
}

H
Haojun Liao 已提交
4875
static void tableIntervalProcessImpl(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
4876
  SQuery *pQuery = pRuntimeEnv->pQuery;
4877

4878
  while (1) {
4879
    scanOneTableDataBlocks(pRuntimeEnv, start);
4880

4881
    assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_NOT_COMPLETED));
H
hjxilinx 已提交
4882
    finalizeQueryResult(pRuntimeEnv);
4883

4884 4885 4886
    // here we can ignore the records in case of no interpolation
    // todo handle offset, in case of top/bottom interval query
    if ((pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL) && pQuery->limit.offset > 0 &&
4887
        pQuery->fillType == TSDB_FILL_NONE) {
4888 4889
      // maxOutput <= 0, means current query does not generate any results
      int32_t numOfClosed = numOfClosedTimeWindow(&pRuntimeEnv->windowResInfo);
4890

4891 4892 4893 4894
      int32_t c = MIN(numOfClosed, pQuery->limit.offset);
      clearFirstNTimeWindow(pRuntimeEnv, c);
      pQuery->limit.offset -= c;
    }
4895

4896
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED | QUERY_RESBUF_FULL)) {
4897 4898 4899 4900 4901
      break;
    }
  }
}

4902
// handle time interval query on table
H
hjxilinx 已提交
4903
static void tableIntervalProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
4904 4905
  SQueryRuntimeEnv *pRuntimeEnv = &(pQInfo->runtimeEnv);

H
hjxilinx 已提交
4906 4907
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
4908

H
Haojun Liao 已提交
4909
  int32_t numOfFilled = 0;
H
Haojun Liao 已提交
4910 4911
  TSKEY newStartKey = TSKEY_INITIAL_VAL;
  
4912
  // skip blocks without load the actual data block from file if no filter condition present
H
Haojun Liao 已提交
4913
  skipTimeInterval(pRuntimeEnv, &newStartKey);
4914
  if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0 && pRuntimeEnv->pFillInfo == NULL) {
4915 4916 4917 4918
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return;
  }

4919
  while (1) {
H
Haojun Liao 已提交
4920
    tableIntervalProcessImpl(pRuntimeEnv, newStartKey);
4921

H
Haojun Liao 已提交
4922
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
4923
      pQInfo->groupIndex = 0;  // always start from 0
4924
      pQuery->rec.rows = 0;
4925
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
4926

4927
      clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
4928
    }
4929

4930
    // the offset is handled at prepare stage if no interpolation involved
4931
    if (pQuery->fillType == TSDB_FILL_NONE || pQuery->rec.rows == 0) {
4932
      limitResults(pRuntimeEnv);
4933 4934
      break;
    } else {
H
Haojun Liao 已提交
4935
      taosFillSetStartInfo(pRuntimeEnv->pFillInfo, pQuery->rec.rows, pQuery->window.ekey);
4936
      taosFillCopyInputDataFromFilePage(pRuntimeEnv->pFillInfo, (tFilePage**) pQuery->sdata);
H
Haojun Liao 已提交
4937
      numOfFilled = 0;
4938
      
H
Haojun Liao 已提交
4939
      pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, &numOfFilled);
4940
      if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4941
        limitResults(pRuntimeEnv);
4942 4943
        break;
      }
4944

4945
      // no result generated yet, continue retrieve data
4946
      pQuery->rec.rows = 0;
4947 4948
    }
  }
4949

4950
  // all data scanned, the group by normal column can return
H
Haojun Liao 已提交
4951
  if (pRuntimeEnv->groupbyNormalCol) {  // todo refactor with merge interval time result
4952
    pQInfo->groupIndex = 0;
4953
    pQuery->rec.rows = 0;
4954
    copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
4955
    clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
4956
  }
4957

H
Haojun Liao 已提交
4958
  pQInfo->pointsInterpo += numOfFilled;
4959 4960
}

4961 4962 4963 4964
static void tableQueryImpl(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

4965
  if (queryHasRemainResults(pRuntimeEnv)) {
4966

H
Haojun Liao 已提交
4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978
    if (pQuery->fillType != TSDB_FILL_NONE) {
      /*
       * There are remain results that are not returned due to result interpolation
       * So, we do keep in this procedure instead of launching retrieve procedure for next results.
       */
      int32_t numOfFilled = 0;
      pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, &numOfFilled);

      if (pQuery->rec.rows > 0) {
        limitResults(pRuntimeEnv);
      }

H
Haojun Liao 已提交
4979
      qDebug("QInfo:%p current:%" PRId64 " returned, total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total);
4980
      return;
H
Haojun Liao 已提交
4981
    } else {
4982
      pQuery->rec.rows = 0;
4983
      pQInfo->groupIndex = 0;  // always start from 0
4984

4985
      if (pRuntimeEnv->windowResInfo.size > 0) {
4986
        copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
4987
        clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
4988

4989
        if (pQuery->rec.rows > 0) {
4990
          qDebug("QInfo:%p %"PRId64" rows returned from group results, total:%"PRId64"", pQInfo, pQuery->rec.rows, pQuery->rec.total);
H
Haojun Liao 已提交
4991 4992 4993

          // there are not data remains
          if (pRuntimeEnv->windowResInfo.size <= 0) {
H
Haojun Liao 已提交
4994
            qDebug("QInfo:%p query over, %"PRId64" rows are returned", pQInfo, pQuery->rec.total);
H
Haojun Liao 已提交
4995 4996
          }

4997 4998 4999 5000 5001
          return;
        }
      }
    }
  }
5002

H
hjxilinx 已提交
5003
  // number of points returned during this query
5004
  pQuery->rec.rows = 0;
5005
  int64_t st = taosGetTimestampUs();
H
hjxilinx 已提交
5006
  
5007
  assert(pQInfo->tableqinfoGroupInfo.numOfTables == 1);
H
Haojun Liao 已提交
5008
  SArray* g = GET_TABLEGROUP(pQInfo, 0);
5009
  STableQueryInfo* item = taosArrayGetP(g, 0);
H
hjxilinx 已提交
5010
  
5011
  // group by normal column, sliding window query, interval query are handled by interval query processor
H
Haojun Liao 已提交
5012
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyNormalCol) {  // interval (down sampling operation)
5013
    tableIntervalProcess(pQInfo, item);
H
Haojun Liao 已提交
5014
  } else if (isFixedOutputQuery(pRuntimeEnv)) {
5015
    tableFixedOutputProcess(pQInfo, item);
5016 5017
  } else {  // diff/add/multiply/subtract/division
    assert(pQuery->checkBuffer == 1);
5018
    tableMultiOutputProcess(pQInfo, item);
5019
  }
5020

5021
  // record the total elapsed time
5022
  pRuntimeEnv->summary.elapsedTime += (taosGetTimestampUs() - st);
5023
  assert(pQInfo->tableqinfoGroupInfo.numOfTables == 1);
5024 5025
}

5026
static void stableQueryImpl(SQInfo *pQInfo) {
H
Haojun Liao 已提交
5027 5028
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *pQuery = pRuntimeEnv->pQuery;
5029
  pQuery->rec.rows = 0;
5030

5031
  int64_t st = taosGetTimestampUs();
5032

H
Haojun Liao 已提交
5033
  if (QUERY_IS_INTERVAL_QUERY(pQuery) ||
H
Haojun Liao 已提交
5034
      (isFixedOutputQuery(pRuntimeEnv) && (!isPointInterpoQuery(pQuery)) && !pRuntimeEnv->groupbyNormalCol &&
5035
      !isFirstLastRowQuery(pQuery))) {
H
hjxilinx 已提交
5036
    multiTableQueryProcess(pQInfo);
5037
  } else {
5038
    assert((pQuery->checkBuffer == 1 && pQuery->intervalTime == 0) || isPointInterpoQuery(pQuery) ||
H
Haojun Liao 已提交
5039
            isFirstLastRowQuery(pQuery) || pRuntimeEnv->groupbyNormalCol);
5040

5041
    sequentialTableProcess(pQInfo);
H
Haojun Liao 已提交
5042

5043
  }
5044

H
hjxilinx 已提交
5045
  // record the total elapsed time
5046
  pQInfo->runtimeEnv.summary.elapsedTime += (taosGetTimestampUs() - st);
H
hjxilinx 已提交
5047 5048
}

5049
static int32_t getColumnIndexInSource(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
5050
  int32_t j = 0;
5051

5052
  if (TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) {
H
Haojun Liao 已提交
5053 5054 5055 5056
    if (pExprMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
      return -1;
    }

5057 5058 5059 5060
    while(j < pQueryMsg->numOfTags) {
      if (pExprMsg->colInfo.colId == pTagCols[j].colId) {
        return j;
      }
5061

5062 5063
      j += 1;
    }
5064

5065 5066 5067 5068 5069
  } else {
    while (j < pQueryMsg->numOfCols) {
      if (pExprMsg->colInfo.colId == pQueryMsg->colList[j].colId) {
        return j;
      }
5070

5071
      j += 1;
5072 5073 5074
    }
  }

5075
  assert(0);
5076 5077
}

5078 5079 5080
bool validateExprColumnInfo(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
  int32_t j = getColumnIndexInSource(pQueryMsg, pExprMsg, pTagCols);
  return j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags;
5081 5082
}

5083
static bool validateQueryMsg(SQueryTableMsg *pQueryMsg) {
H
hjxilinx 已提交
5084
  if (pQueryMsg->intervalTime < 0) {
5085
    qError("qmsg:%p illegal value of interval time %" PRId64, pQueryMsg, pQueryMsg->intervalTime);
5086
    return false;
5087 5088
  }

H
hjxilinx 已提交
5089
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
5090
    qError("qmsg:%p illegal value of numOfTables %d", pQueryMsg, pQueryMsg->numOfTables);
5091
    return false;
5092 5093
  }

H
hjxilinx 已提交
5094
  if (pQueryMsg->numOfGroupCols < 0) {
S
slguan 已提交
5095
    qError("qmsg:%p illegal value of numOfGroupbyCols %d", pQueryMsg, pQueryMsg->numOfGroupCols);
5096
    return false;
5097 5098
  }

5099 5100
  if (pQueryMsg->numOfOutput > TSDB_MAX_COLUMNS || pQueryMsg->numOfOutput <= 0) {
    qError("qmsg:%p illegal value of output columns %d", pQueryMsg, pQueryMsg->numOfOutput);
5101
    return false;
5102 5103
  }

5104 5105 5106 5107 5108 5109 5110 5111 5112 5113
  return true;
}

static bool validateQuerySourceCols(SQueryTableMsg *pQueryMsg, SSqlFuncMsg** pExprMsg) {
  int32_t numOfTotal = pQueryMsg->numOfCols + pQueryMsg->numOfTags;
  if (pQueryMsg->numOfCols < 0 || pQueryMsg->numOfTags < 0 || numOfTotal > TSDB_MAX_COLUMNS) {
    qError("qmsg:%p illegal value of numOfCols %d numOfTags:%d", pQueryMsg, pQueryMsg->numOfCols, pQueryMsg->numOfTags);
    return false;
  } else if (numOfTotal == 0) {
    for(int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
H
Haojun Liao 已提交
5114 5115 5116 5117 5118
      SSqlFuncMsg* pFuncMsg = pExprMsg[i];

      if ((pFuncMsg->functionId == TSDB_FUNC_TAGPRJ) ||
          (pFuncMsg->functionId == TSDB_FUNC_TID_TAG && pFuncMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) ||
          (pFuncMsg->functionId == TSDB_FUNC_COUNT && pFuncMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX)) {
5119
        continue;
5120
      }
5121

5122
      return false;
5123 5124
    }
  }
5125

5126
  return true;
5127 5128
}

5129
static char *createTableIdList(SQueryTableMsg *pQueryMsg, char *pMsg, SArray **pTableIdList) {
H
hjxilinx 已提交
5130
  assert(pQueryMsg->numOfTables > 0);
5131

weixin_48148422's avatar
weixin_48148422 已提交
5132
  *pTableIdList = taosArrayInit(pQueryMsg->numOfTables, sizeof(STableIdInfo));
5133

weixin_48148422's avatar
weixin_48148422 已提交
5134 5135
  for (int32_t j = 0; j < pQueryMsg->numOfTables; ++j) {
    STableIdInfo* pTableIdInfo = (STableIdInfo *)pMsg;
5136

5137
    pTableIdInfo->tid = htonl(pTableIdInfo->tid);
H
hjxilinx 已提交
5138 5139
    pTableIdInfo->uid = htobe64(pTableIdInfo->uid);
    pTableIdInfo->key = htobe64(pTableIdInfo->key);
5140

H
hjxilinx 已提交
5141 5142 5143
    taosArrayPush(*pTableIdList, pTableIdInfo);
    pMsg += sizeof(STableIdInfo);
  }
5144

H
hjxilinx 已提交
5145 5146
  return pMsg;
}
5147

5148
/**
H
hjxilinx 已提交
5149
 * pQueryMsg->head has been converted before this function is called.
5150
 *
H
hjxilinx 已提交
5151
 * @param pQueryMsg
5152 5153 5154 5155
 * @param pTableIdList
 * @param pExpr
 * @return
 */
5156
static int32_t convertQueryMsg(SQueryTableMsg *pQueryMsg, SArray **pTableIdList, SSqlFuncMsg ***pExpr,
weixin_48148422's avatar
weixin_48148422 已提交
5157
                               char **tagCond, char** tbnameCond, SColIndex **groupbyCols, SColumnInfo** tagCols) {
5158 5159
  int32_t code = TSDB_CODE_SUCCESS;

5160 5161 5162 5163 5164 5165 5166 5167
  pQueryMsg->numOfTables = htonl(pQueryMsg->numOfTables);

  pQueryMsg->window.skey = htobe64(pQueryMsg->window.skey);
  pQueryMsg->window.ekey = htobe64(pQueryMsg->window.ekey);
  pQueryMsg->intervalTime = htobe64(pQueryMsg->intervalTime);
  pQueryMsg->slidingTime = htobe64(pQueryMsg->slidingTime);
  pQueryMsg->limit = htobe64(pQueryMsg->limit);
  pQueryMsg->offset = htobe64(pQueryMsg->offset);
H
hjxilinx 已提交
5168

5169 5170
  pQueryMsg->order = htons(pQueryMsg->order);
  pQueryMsg->orderColId = htons(pQueryMsg->orderColId);
H
Haojun Liao 已提交
5171
  pQueryMsg->queryType = htonl(pQueryMsg->queryType);
weixin_48148422's avatar
weixin_48148422 已提交
5172
  pQueryMsg->tagNameRelType = htons(pQueryMsg->tagNameRelType);
5173 5174

  pQueryMsg->numOfCols = htons(pQueryMsg->numOfCols);
5175
  pQueryMsg->numOfOutput = htons(pQueryMsg->numOfOutput);
H
hjxilinx 已提交
5176
  pQueryMsg->numOfGroupCols = htons(pQueryMsg->numOfGroupCols);
5177 5178 5179
  pQueryMsg->tagCondLen = htons(pQueryMsg->tagCondLen);
  pQueryMsg->tsOffset = htonl(pQueryMsg->tsOffset);
  pQueryMsg->tsLen = htonl(pQueryMsg->tsLen);
H
hjxilinx 已提交
5180
  pQueryMsg->tsNumOfBlocks = htonl(pQueryMsg->tsNumOfBlocks);
5181
  pQueryMsg->tsOrder = htonl(pQueryMsg->tsOrder);
5182
  pQueryMsg->numOfTags = htonl(pQueryMsg->numOfTags);
5183

5184
  // query msg safety check
5185
  if (!validateQueryMsg(pQueryMsg)) {
5186 5187
    code = TSDB_CODE_QRY_INVALID_MSG;
    goto _cleanup;
5188 5189
  }

H
hjxilinx 已提交
5190 5191
  char *pMsg = (char *)(pQueryMsg->colList) + sizeof(SColumnInfo) * pQueryMsg->numOfCols;
  for (int32_t col = 0; col < pQueryMsg->numOfCols; ++col) {
5192 5193
    SColumnInfo *pColInfo = &pQueryMsg->colList[col];

H
hjxilinx 已提交
5194
    pColInfo->colId = htons(pColInfo->colId);
5195
    pColInfo->type = htons(pColInfo->type);
H
hjxilinx 已提交
5196 5197
    pColInfo->bytes = htons(pColInfo->bytes);
    pColInfo->numOfFilters = htons(pColInfo->numOfFilters);
5198

H
hjxilinx 已提交
5199
    assert(pColInfo->type >= TSDB_DATA_TYPE_BOOL && pColInfo->type <= TSDB_DATA_TYPE_NCHAR);
5200

H
hjxilinx 已提交
5201
    int32_t numOfFilters = pColInfo->numOfFilters;
5202
    if (numOfFilters > 0) {
H
hjxilinx 已提交
5203
      pColInfo->filters = calloc(numOfFilters, sizeof(SColumnFilterInfo));
5204 5205 5206
    }

    for (int32_t f = 0; f < numOfFilters; ++f) {
5207 5208 5209 5210
      SColumnFilterInfo *pFilterMsg = (SColumnFilterInfo *)pMsg;
      
      SColumnFilterInfo *pColFilter = &pColInfo->filters[f];
      pColFilter->filterstr = htons(pFilterMsg->filterstr);
5211 5212 5213

      pMsg += sizeof(SColumnFilterInfo);

5214 5215
      if (pColFilter->filterstr) {
        pColFilter->len = htobe64(pFilterMsg->len);
5216

5217
        pColFilter->pz = (int64_t) calloc(1, pColFilter->len + 1 * TSDB_NCHAR_SIZE); // note: null-terminator
5218 5219
        memcpy((void *)pColFilter->pz, pMsg, pColFilter->len);
        pMsg += (pColFilter->len + 1);
5220
      } else {
5221 5222
        pColFilter->lowerBndi = htobe64(pFilterMsg->lowerBndi);
        pColFilter->upperBndi = htobe64(pFilterMsg->upperBndi);
5223 5224
      }

5225 5226
      pColFilter->lowerRelOptr = htons(pFilterMsg->lowerRelOptr);
      pColFilter->upperRelOptr = htons(pFilterMsg->upperRelOptr);
5227 5228 5229
    }
  }

5230 5231
  *pExpr = calloc(pQueryMsg->numOfOutput, POINTER_BYTES);
  SSqlFuncMsg *pExprMsg = (SSqlFuncMsg *)pMsg;
5232

5233
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5234
    (*pExpr)[i] = pExprMsg;
5235

5236
    pExprMsg->colInfo.colIndex = htons(pExprMsg->colInfo.colIndex);
5237 5238 5239 5240
    pExprMsg->colInfo.colId = htons(pExprMsg->colInfo.colId);
    pExprMsg->colInfo.flag = htons(pExprMsg->colInfo.flag);
    pExprMsg->functionId = htons(pExprMsg->functionId);
    pExprMsg->numOfParams = htons(pExprMsg->numOfParams);
5241

5242
    pMsg += sizeof(SSqlFuncMsg);
5243 5244

    for (int32_t j = 0; j < pExprMsg->numOfParams; ++j) {
5245
      pExprMsg->arg[j].argType = htons(pExprMsg->arg[j].argType);
5246 5247 5248 5249
      pExprMsg->arg[j].argBytes = htons(pExprMsg->arg[j].argBytes);

      if (pExprMsg->arg[j].argType == TSDB_DATA_TYPE_BINARY) {
        pExprMsg->arg[j].argValue.pz = pMsg;
5250
        pMsg += pExprMsg->arg[j].argBytes;  // one more for the string terminated char.
5251 5252 5253 5254 5255
      } else {
        pExprMsg->arg[j].argValue.i64 = htobe64(pExprMsg->arg[j].argValue.i64);
      }
    }

H
Haojun Liao 已提交
5256 5257
    int16_t functionId = pExprMsg->functionId;
    if (functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ || functionId == TSDB_FUNC_TAG_DUMMY) {
5258
      if (pExprMsg->colInfo.flag != TSDB_COL_TAG) {  // ignore the column  index check for arithmetic expression.
5259 5260
        code = TSDB_CODE_QRY_INVALID_MSG;
        goto _cleanup;
5261 5262
      }
    } else {
5263
//      if (!validateExprColumnInfo(pQueryMsg, pExprMsg)) {
5264
//        return TSDB_CODE_QRY_INVALID_MSG;
5265
//      }
5266 5267
    }

5268
    pExprMsg = (SSqlFuncMsg *)pMsg;
5269
  }
5270

5271
  if (!validateQuerySourceCols(pQueryMsg, *pExpr)) {
5272
    code = TSDB_CODE_QRY_INVALID_MSG;
dengyihao's avatar
dengyihao 已提交
5273
    goto _cleanup;
5274
  }
5275

H
hjxilinx 已提交
5276
  pMsg = createTableIdList(pQueryMsg, pMsg, pTableIdList);
5277

H
hjxilinx 已提交
5278
  if (pQueryMsg->numOfGroupCols > 0) {  // group by tag columns
5279
    *groupbyCols = malloc(pQueryMsg->numOfGroupCols * sizeof(SColIndex));
5280 5281 5282 5283
    if (*groupbyCols == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }
5284 5285 5286

    for (int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
      (*groupbyCols)[i].colId = *(int16_t *)pMsg;
5287
      pMsg += sizeof((*groupbyCols)[i].colId);
5288 5289

      (*groupbyCols)[i].colIndex = *(int16_t *)pMsg;
5290 5291
      pMsg += sizeof((*groupbyCols)[i].colIndex);

5292
      (*groupbyCols)[i].flag = *(int16_t *)pMsg;
5293 5294 5295 5296 5297
      pMsg += sizeof((*groupbyCols)[i].flag);

      memcpy((*groupbyCols)[i].name, pMsg, tListLen(groupbyCols[i]->name));
      pMsg += tListLen((*groupbyCols)[i].name);
    }
5298

H
hjxilinx 已提交
5299 5300
    pQueryMsg->orderByIdx = htons(pQueryMsg->orderByIdx);
    pQueryMsg->orderType = htons(pQueryMsg->orderType);
5301 5302
  }

5303 5304
  pQueryMsg->fillType = htons(pQueryMsg->fillType);
  if (pQueryMsg->fillType != TSDB_FILL_NONE) {
5305
    pQueryMsg->fillVal = (uint64_t)(pMsg);
5306 5307

    int64_t *v = (int64_t *)pMsg;
5308
    for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5309 5310
      v[i] = htobe64(v[i]);
    }
5311

5312
    pMsg += sizeof(int64_t) * pQueryMsg->numOfOutput;
5313
  }
5314

5315 5316 5317 5318
  if (pQueryMsg->numOfTags > 0) {
    (*tagCols) = calloc(1, sizeof(SColumnInfo) * pQueryMsg->numOfTags);
    for (int32_t i = 0; i < pQueryMsg->numOfTags; ++i) {
      SColumnInfo* pTagCol = (SColumnInfo*) pMsg;
5319

5320 5321 5322 5323
      pTagCol->colId = htons(pTagCol->colId);
      pTagCol->bytes = htons(pTagCol->bytes);
      pTagCol->type  = htons(pTagCol->type);
      pTagCol->numOfFilters = 0;
5324

5325
      (*tagCols)[i] = *pTagCol;
5326
      pMsg += sizeof(SColumnInfo);
5327
    }
H
hjxilinx 已提交
5328
  }
5329

5330 5331 5332 5333 5334 5335
  // the tag query condition expression string is located at the end of query msg
  if (pQueryMsg->tagCondLen > 0) {
    *tagCond = calloc(1, pQueryMsg->tagCondLen);
    memcpy(*tagCond, pMsg, pQueryMsg->tagCondLen);
    pMsg += pQueryMsg->tagCondLen;
  }
5336

weixin_48148422's avatar
weixin_48148422 已提交
5337
  if (*pMsg != 0) {
5338
    size_t len = strlen(pMsg) + 1;
5339

5340
    *tbnameCond = malloc(len);
5341 5342 5343 5344 5345
    if (*tbnameCond == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }

weixin_48148422's avatar
weixin_48148422 已提交
5346
    strcpy(*tbnameCond, pMsg);
5347
    pMsg += len;
weixin_48148422's avatar
weixin_48148422 已提交
5348
  }
5349

5350
  qDebug("qmsg:%p query %d tables, type:%d, qrange:%" PRId64 "-%" PRId64 ", numOfGroupbyTagCols:%d, order:%d, "
H
Haojun Liao 已提交
5351 5352
         "outputCols:%d, numOfCols:%d, interval:%" PRId64 ", fillType:%d, comptsLen:%d, compNumOfBlocks:%d, limit:%" PRId64 ", offset:%" PRId64,
         pQueryMsg, pQueryMsg->numOfTables, pQueryMsg->queryType, pQueryMsg->window.skey, pQueryMsg->window.ekey, pQueryMsg->numOfGroupCols,
5353
         pQueryMsg->order, pQueryMsg->numOfOutput, pQueryMsg->numOfCols, pQueryMsg->intervalTime,
H
Haojun Liao 已提交
5354
         pQueryMsg->fillType, pQueryMsg->tsLen, pQueryMsg->tsNumOfBlocks, pQueryMsg->limit, pQueryMsg->offset);
5355 5356

  return TSDB_CODE_SUCCESS;
dengyihao's avatar
dengyihao 已提交
5357 5358 5359 5360 5361 5362 5363 5364 5365

_cleanup:
  tfree(*pExpr);
  taosArrayDestroy(*pTableIdList);
  *pTableIdList = NULL;
  tfree(*tbnameCond);
  tfree(*groupbyCols);
  tfree(*tagCols);
  tfree(*tagCond);
5366 5367

  return code;
5368 5369
}

H
hjxilinx 已提交
5370
static int32_t buildAirthmeticExprFromMsg(SExprInfo *pArithExprInfo, SQueryTableMsg *pQueryMsg) {
5371
  qDebug("qmsg:%p create arithmetic expr from binary string: %s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz);
weixin_48148422's avatar
weixin_48148422 已提交
5372 5373

  tExprNode* pExprNode = NULL;
dengyihao's avatar
dengyihao 已提交
5374
  TRY(TSDB_MAX_TAGS) {
weixin_48148422's avatar
weixin_48148422 已提交
5375 5376 5377
    pExprNode = exprTreeFromBinary(pArithExprInfo->base.arg[0].argValue.pz, pArithExprInfo->base.arg[0].argBytes);
  } CATCH( code ) {
    CLEANUP_EXECUTE();
dengyihao's avatar
TD-816  
dengyihao 已提交
5378
    qError("qmsg:%p failed to create arithmetic expression string from:%s, reason: %s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz, tstrerror(code));
weixin_48148422's avatar
weixin_48148422 已提交
5379 5380 5381
    return code;
  } END_TRY

H
hjxilinx 已提交
5382
  if (pExprNode == NULL) {
5383
    qError("qmsg:%p failed to create arithmetic expression string from:%s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz);
5384
    return TSDB_CODE_QRY_APP_ERROR;
5385
  }
5386

5387
  pArithExprInfo->pExpr = pExprNode;
5388 5389 5390
  return TSDB_CODE_SUCCESS;
}

H
Haojun Liao 已提交
5391
static int32_t createQFunctionExprFromMsg(SQueryTableMsg *pQueryMsg, SExprInfo **pExprInfo, SSqlFuncMsg **pExprMsg,
5392 5393
    SColumnInfo* pTagCols) {
  *pExprInfo = NULL;
H
hjxilinx 已提交
5394
  int32_t code = TSDB_CODE_SUCCESS;
5395

H
Haojun Liao 已提交
5396
  SExprInfo *pExprs = (SExprInfo *)calloc(pQueryMsg->numOfOutput, sizeof(SExprInfo));
5397
  if (pExprs == NULL) {
5398
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
5399 5400 5401 5402 5403
  }

  bool    isSuperTable = QUERY_IS_STABLE_QUERY(pQueryMsg->queryType);
  int16_t tagLen = 0;

5404
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5405
    pExprs[i].base = *pExprMsg[i];
5406
    pExprs[i].bytes = 0;
5407 5408 5409 5410

    int16_t type = 0;
    int16_t bytes = 0;

5411
    // parse the arithmetic expression
5412
    if (pExprs[i].base.functionId == TSDB_FUNC_ARITHM) {
5413
      code = buildAirthmeticExprFromMsg(&pExprs[i], pQueryMsg);
5414

5415 5416 5417
      if (code != TSDB_CODE_SUCCESS) {
        tfree(pExprs);
        return code;
5418 5419
      }

5420
      type  = TSDB_DATA_TYPE_DOUBLE;
5421
      bytes = tDataTypeDesc[type].nSize;
H
Haojun Liao 已提交
5422
    } else if (pExprs[i].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX && pExprs[i].base.functionId == TSDB_FUNC_TAGPRJ) {  // parse the normal column
H
Haojun Liao 已提交
5423 5424 5425
      SSchema s = tGetTableNameColumnSchema();
      type  = s.type;
      bytes = s.bytes;
B
Bomin Zhang 已提交
5426
    } else{
5427
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
dengyihao's avatar
dengyihao 已提交
5428
      assert(j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags);
H
Haojun Liao 已提交
5429

dengyihao's avatar
dengyihao 已提交
5430
      if (pExprs[i].base.colInfo.colId != TSDB_TBNAME_COLUMN_INDEX && j >= 0) {
H
Haojun Liao 已提交
5431 5432 5433 5434
        SColumnInfo* pCol = (TSDB_COL_IS_TAG(pExprs[i].base.colInfo.flag))? &pTagCols[j]:&pQueryMsg->colList[j];
        type = pCol->type;
        bytes = pCol->bytes;
      } else {
H
Haojun Liao 已提交
5435
        SSchema s = tGetTableNameColumnSchema();
H
hjxilinx 已提交
5436

H
Haojun Liao 已提交
5437 5438 5439
        type  = s.type;
        bytes = s.bytes;
      }
5440 5441
    }

5442 5443
    int32_t param = pExprs[i].base.arg[0].argValue.i64;
    if (getResultDataInfo(type, bytes, pExprs[i].base.functionId, param, &pExprs[i].type, &pExprs[i].bytes,
5444
                          &pExprs[i].interBytes, 0, isSuperTable) != TSDB_CODE_SUCCESS) {
5445
      tfree(pExprs);
5446
      return TSDB_CODE_QRY_INVALID_MSG;
5447 5448
    }

5449
    if (pExprs[i].base.functionId == TSDB_FUNC_TAG_DUMMY || pExprs[i].base.functionId == TSDB_FUNC_TS_DUMMY) {
5450
      tagLen += pExprs[i].bytes;
5451
    }
5452
    assert(isValidDataType(pExprs[i].type));
5453 5454 5455
  }

  // TODO refactor
5456
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5457 5458
    pExprs[i].base = *pExprMsg[i];
    int16_t functId = pExprs[i].base.functionId;
5459

5460
    if (functId == TSDB_FUNC_TOP || functId == TSDB_FUNC_BOTTOM) {
5461
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
5462 5463 5464 5465 5466
      assert(j < pQueryMsg->numOfCols);

      SColumnInfo *pCol = &pQueryMsg->colList[j];

      int32_t ret =
5467
          getResultDataInfo(pCol->type, pCol->bytes, functId, pExprs[i].base.arg[0].argValue.i64,
5468
                            &pExprs[i].type, &pExprs[i].bytes, &pExprs[i].interBytes, tagLen, isSuperTable);
5469 5470 5471
      assert(ret == TSDB_CODE_SUCCESS);
    }
  }
5472
  *pExprInfo = pExprs;
5473 5474 5475 5476

  return TSDB_CODE_SUCCESS;
}

5477
static SSqlGroupbyExpr *createGroupbyExprFromMsg(SQueryTableMsg *pQueryMsg, SColIndex *pColIndex, int32_t *code) {
5478 5479 5480 5481 5482
  if (pQueryMsg->numOfGroupCols == 0) {
    return NULL;
  }

  // using group by tag columns
5483
  SSqlGroupbyExpr *pGroupbyExpr = (SSqlGroupbyExpr *)calloc(1, sizeof(SSqlGroupbyExpr));
5484
  if (pGroupbyExpr == NULL) {
5485
    *code = TSDB_CODE_QRY_OUT_OF_MEMORY;
5486 5487 5488 5489 5490 5491 5492
    return NULL;
  }

  pGroupbyExpr->numOfGroupCols = pQueryMsg->numOfGroupCols;
  pGroupbyExpr->orderType = pQueryMsg->orderType;
  pGroupbyExpr->orderIndex = pQueryMsg->orderByIdx;

5493 5494 5495 5496
  pGroupbyExpr->columnInfo = taosArrayInit(pQueryMsg->numOfGroupCols, sizeof(SColIndex));
  for(int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
    taosArrayPush(pGroupbyExpr->columnInfo, &pColIndex[i]);
  }
5497

5498 5499 5500
  return pGroupbyExpr;
}

5501
static int32_t createFilterInfo(void *pQInfo, SQuery *pQuery) {
5502
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
5503
    if (pQuery->colList[i].numOfFilters > 0) {
5504 5505 5506 5507 5508 5509 5510 5511 5512 5513 5514
      pQuery->numOfFilterCols++;
    }
  }

  if (pQuery->numOfFilterCols == 0) {
    return TSDB_CODE_SUCCESS;
  }

  pQuery->pFilterInfo = calloc(1, sizeof(SSingleColumnFilterInfo) * pQuery->numOfFilterCols);

  for (int32_t i = 0, j = 0; i < pQuery->numOfCols; ++i) {
5515
    if (pQuery->colList[i].numOfFilters > 0) {
5516 5517
      SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[j];

B
Bomin Zhang 已提交
5518
      memcpy(&pFilterInfo->info, &pQuery->colList[i], sizeof(SColumnInfo));
5519
      pFilterInfo->info = pQuery->colList[i];
5520

5521
      pFilterInfo->numOfFilters = pQuery->colList[i].numOfFilters;
5522 5523 5524 5525
      pFilterInfo->pFilters = calloc(pFilterInfo->numOfFilters, sizeof(SColumnFilterElem));

      for (int32_t f = 0; f < pFilterInfo->numOfFilters; ++f) {
        SColumnFilterElem *pSingleColFilter = &pFilterInfo->pFilters[f];
5526
        pSingleColFilter->filterInfo = pQuery->colList[i].filters[f];
5527 5528 5529 5530 5531

        int32_t lower = pSingleColFilter->filterInfo.lowerRelOptr;
        int32_t upper = pSingleColFilter->filterInfo.upperRelOptr;

        if (lower == TSDB_RELATION_INVALID && upper == TSDB_RELATION_INVALID) {
S
slguan 已提交
5532
          qError("QInfo:%p invalid filter info", pQInfo);
5533
          return TSDB_CODE_QRY_INVALID_MSG;
5534 5535
        }

5536 5537
        int16_t type  = pQuery->colList[i].type;
        int16_t bytes = pQuery->colList[i].bytes;
5538

5539 5540 5541
        // todo refactor
        __filter_func_t *rangeFilterArray = getRangeFilterFuncArray(type);
        __filter_func_t *filterArray = getValueFilterFuncArray(type);
5542 5543

        if (rangeFilterArray == NULL && filterArray == NULL) {
S
slguan 已提交
5544
          qError("QInfo:%p failed to get filter function, invalid data type:%d", pQInfo, type);
5545
          return TSDB_CODE_QRY_INVALID_MSG;
5546 5547
        }

5548
        if ((lower == TSDB_RELATION_GREATER_EQUAL || lower == TSDB_RELATION_GREATER) &&
5549
            (upper == TSDB_RELATION_LESS_EQUAL || upper == TSDB_RELATION_LESS)) {
dengyihao's avatar
dengyihao 已提交
5550
          assert(rangeFilterArray != NULL);
5551
          if (lower == TSDB_RELATION_GREATER_EQUAL) {
5552 5553 5554 5555 5556 5557 5558 5559 5560 5561 5562 5563 5564
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[4];
            } else {
              pSingleColFilter->fp = rangeFilterArray[2];
            }
          } else {
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[3];
            } else {
              pSingleColFilter->fp = rangeFilterArray[1];
            }
          }
        } else {  // set callback filter function
dengyihao's avatar
dengyihao 已提交
5565
          assert(filterArray != NULL);
5566 5567 5568 5569
          if (lower != TSDB_RELATION_INVALID) {
            pSingleColFilter->fp = filterArray[lower];

            if (upper != TSDB_RELATION_INVALID) {
dengyihao's avatar
dengyihao 已提交
5570
              qError("pQInfo:%p failed to get filter function, invalid filter condition: %d", pQInfo, type);
5571
              return TSDB_CODE_QRY_INVALID_MSG;
5572 5573 5574 5575 5576 5577 5578 5579 5580 5581 5582 5583 5584 5585 5586 5587
            }
          } else {
            pSingleColFilter->fp = filterArray[upper];
          }
        }
        assert(pSingleColFilter->fp != NULL);
        pSingleColFilter->bytes = bytes;
      }

      j++;
    }
  }

  return TSDB_CODE_SUCCESS;
}

5588
static void doUpdateExprColumnIndex(SQuery *pQuery) {
5589
  assert(pQuery->pSelectExpr != NULL && pQuery != NULL);
5590

5591
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
5592
    SSqlFuncMsg *pSqlExprMsg = &pQuery->pSelectExpr[k].base;
5593
    if (pSqlExprMsg->functionId == TSDB_FUNC_ARITHM) {
5594 5595
      continue;
    }
5596

5597
    // todo opt performance
H
Haojun Liao 已提交
5598 5599
    SColIndex *pColIndex = &pSqlExprMsg->colInfo;
    if (!TSDB_COL_IS_TAG(pColIndex->flag)) {
5600 5601
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfCols; ++f) {
H
Haojun Liao 已提交
5602 5603
        if (pColIndex->colId == pQuery->colList[f].colId) {
          pColIndex->colIndex = f;
5604 5605 5606
          break;
        }
      }
5607 5608
      
      assert (f < pQuery->numOfCols);
5609
    } else {
5610 5611
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfTags; ++f) {
H
Haojun Liao 已提交
5612 5613
        if (pColIndex->colId == pQuery->tagColList[f].colId) {
          pColIndex->colIndex = f;
5614 5615
          break;
        }
5616
      }
5617 5618
      
      assert(f < pQuery->numOfTags || pColIndex->colId == TSDB_TBNAME_COLUMN_INDEX);
5619 5620 5621 5622
    }
  }
}

weixin_48148422's avatar
weixin_48148422 已提交
5623

5624
static int compareTableIdInfo(const void* a, const void* b) {
weixin_48148422's avatar
weixin_48148422 已提交
5625 5626 5627 5628 5629 5630 5631
  const STableIdInfo* x = (const STableIdInfo*)a;
  const STableIdInfo* y = (const STableIdInfo*)b;
  if (x->uid > y->uid) return 1;
  if (x->uid < y->uid) return -1;
  return 0;
}

dengyihao's avatar
dengyihao 已提交
5632 5633
static void freeQInfo(SQInfo *pQInfo);

weixin_48148422's avatar
weixin_48148422 已提交
5634
static SQInfo *createQInfoImpl(SQueryTableMsg *pQueryMsg, SArray* pTableIdList, SSqlGroupbyExpr *pGroupbyExpr, SExprInfo *pExprs,
5635
                               STableGroupInfo *pTableGroupInfo, SColumnInfo* pTagCols) {
B
Bomin Zhang 已提交
5636 5637 5638
  int16_t numOfCols = pQueryMsg->numOfCols;
  int16_t numOfOutput = pQueryMsg->numOfOutput;

5639 5640
  SQInfo *pQInfo = (SQInfo *)calloc(1, sizeof(SQInfo));
  if (pQInfo == NULL) {
B
Bomin Zhang 已提交
5641
    goto _cleanup_qinfo;
5642
  }
B
Bomin Zhang 已提交
5643 5644 5645
  // to make sure third party won't overwrite this structure
  pQInfo->signature = pQInfo;
  pQInfo->tableGroupInfo = *pTableGroupInfo;
5646 5647

  SQuery *pQuery = calloc(1, sizeof(SQuery));
B
Bomin Zhang 已提交
5648 5649 5650
  if (pQuery == NULL) {
    goto _cleanup_query;
  }
5651 5652
  pQInfo->runtimeEnv.pQuery = pQuery;

5653
  pQuery->numOfCols       = numOfCols;
H
hjxilinx 已提交
5654
  pQuery->numOfOutput     = numOfOutput;
5655 5656 5657
  pQuery->limit.limit     = pQueryMsg->limit;
  pQuery->limit.offset    = pQueryMsg->offset;
  pQuery->order.order     = pQueryMsg->order;
5658
  pQuery->order.orderColId = pQueryMsg->orderColId;
5659 5660 5661 5662
  pQuery->pSelectExpr     = pExprs;
  pQuery->pGroupbyExpr    = pGroupbyExpr;
  pQuery->intervalTime    = pQueryMsg->intervalTime;
  pQuery->slidingTime     = pQueryMsg->slidingTime;
5663
  pQuery->slidingTimeUnit = pQueryMsg->slidingTimeUnit;
5664
  pQuery->fillType        = pQueryMsg->fillType;
5665
  pQuery->numOfTags       = pQueryMsg->numOfTags;
B
Bomin Zhang 已提交
5666
  pQuery->tagColList      = pTagCols;
5667
  
5668
  // todo do not allocate ??
5669
  pQuery->colList = calloc(numOfCols, sizeof(SSingleColumnFilterInfo));
5670
  if (pQuery->colList == NULL) {
5671
    goto _cleanup;
5672
  }
5673

H
hjxilinx 已提交
5674
  for (int16_t i = 0; i < numOfCols; ++i) {
5675
    pQuery->colList[i] = pQueryMsg->colList[i];
5676
    pQuery->colList[i].filters = tscFilterInfoClone(pQueryMsg->colList[i].filters, pQuery->colList[i].numOfFilters);
H
hjxilinx 已提交
5677
  }
5678

5679
  // calculate the result row size
5680 5681 5682
  for (int16_t col = 0; col < numOfOutput; ++col) {
    assert(pExprs[col].bytes > 0);
    pQuery->rowSize += pExprs[col].bytes;
5683
  }
5684

5685
  doUpdateExprColumnIndex(pQuery);
5686

5687
  int32_t ret = createFilterInfo(pQInfo, pQuery);
5688
  if (ret != TSDB_CODE_SUCCESS) {
5689
    goto _cleanup;
5690 5691 5692
  }

  // prepare the result buffer
5693
  pQuery->sdata = (tFilePage **)calloc(pQuery->numOfOutput, POINTER_BYTES);
5694
  if (pQuery->sdata == NULL) {
5695
    goto _cleanup;
5696 5697
  }

H
hjxilinx 已提交
5698
  // set the output buffer capacity
H
hjxilinx 已提交
5699
  pQuery->rec.capacity = 4096;
5700
  pQuery->rec.threshold = 4000;
5701

5702
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
5703
    assert(pExprs[col].interBytes >= pExprs[col].bytes);
5704 5705

    // allocate additional memory for interResults that are usually larger then final results
5706 5707
    size_t size = (pQuery->rec.capacity + 1) * pExprs[col].bytes + pExprs[col].interBytes + sizeof(tFilePage);
    pQuery->sdata[col] = (tFilePage *)calloc(1, size);
5708
    if (pQuery->sdata[col] == NULL) {
5709
      goto _cleanup;
5710 5711 5712
    }
  }

5713
  if (pQuery->fillType != TSDB_FILL_NONE) {
5714 5715
    pQuery->fillVal = malloc(sizeof(int64_t) * pQuery->numOfOutput);
    if (pQuery->fillVal == NULL) {
5716
      goto _cleanup;
5717 5718 5719
    }

    // the first column is the timestamp
5720
    memcpy(pQuery->fillVal, (char *)pQueryMsg->fillVal, pQuery->numOfOutput * sizeof(int64_t));
5721 5722
  }

dengyihao's avatar
dengyihao 已提交
5723 5724 5725 5726 5727 5728
  size_t numOfGroups = 0;
  if (pTableGroupInfo->pGroupList != NULL) {
    numOfGroups = taosArrayGetSize(pTableGroupInfo->pGroupList);

    pQInfo->tableqinfoGroupInfo.pGroupList = taosArrayInit(numOfGroups, POINTER_BYTES);
    pQInfo->tableqinfoGroupInfo.numOfTables = pTableGroupInfo->numOfTables;
H
Haojun Liao 已提交
5729 5730 5731
    pQInfo->tableqinfoGroupInfo.map = taosHashInit(pTableGroupInfo->numOfTables,
                                                   taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false);
  }
5732

weixin_48148422's avatar
weixin_48148422 已提交
5733 5734
  int tableIndex = 0;
  STimeWindow window = pQueryMsg->window;
5735
  taosArraySort(pTableIdList, compareTableIdInfo);
5736

H
Haojun Liao 已提交
5737
  pQInfo->runtimeEnv.interBufSize = getOutputInterResultBufSize(pQuery);
H
Haojun Liao 已提交
5738 5739 5740
  pQInfo->pBuf = calloc(pTableGroupInfo->numOfTables, sizeof(STableQueryInfo));
  int32_t index = 0;

H
hjxilinx 已提交
5741
  for(int32_t i = 0; i < numOfGroups; ++i) {
5742
    SArray* pa = taosArrayGetP(pTableGroupInfo->pGroupList, i);
5743

H
Haojun Liao 已提交
5744
    size_t s = taosArrayGetSize(pa);
5745
    SArray* p1 = taosArrayInit(s, POINTER_BYTES);
B
Bomin Zhang 已提交
5746 5747 5748 5749
    if (p1 == NULL) {
      goto _cleanup;
    }
    taosArrayPush(pQInfo->tableqinfoGroupInfo.pGroupList, &p1);
5750

H
hjxilinx 已提交
5751
    for(int32_t j = 0; j < s; ++j) {
5752
      void* pTable = taosArrayGetP(pa, j);
H
Haojun Liao 已提交
5753
      STableId* id = TSDB_TABLEID(pTable);
5754

H
Haojun Liao 已提交
5755
      STableIdInfo* pTableId = taosArraySearch(pTableIdList, id, compareTableIdInfo);
weixin_48148422's avatar
weixin_48148422 已提交
5756 5757 5758
      if (pTableId != NULL ) {
        window.skey = pTableId->key;
      } else {
B
Bomin Zhang 已提交
5759
        window.skey = pQueryMsg->window.skey;
weixin_48148422's avatar
weixin_48148422 已提交
5760
      }
5761

H
Haojun Liao 已提交
5762 5763
      void* buf = pQInfo->pBuf + index * sizeof(STableQueryInfo);
      STableQueryInfo* item = createTableQueryInfo(&pQInfo->runtimeEnv, pTable, window, buf);
B
Bomin Zhang 已提交
5764 5765 5766
      if (item == NULL) {
        goto _cleanup;
      }
5767
      item->groupIndex = i;
H
hjxilinx 已提交
5768
      taosArrayPush(p1, &item);
H
Haojun Liao 已提交
5769 5770
      taosHashPut(pQInfo->tableqinfoGroupInfo.map, &id->tid, sizeof(id->tid), &item, POINTER_BYTES);
      index += 1;
H
hjxilinx 已提交
5771 5772
    }
  }
5773

weixin_48148422's avatar
weixin_48148422 已提交
5774 5775
  pQInfo->arrTableIdInfo = taosArrayInit(tableIndex, sizeof(STableIdInfo));

5776
  pQuery->pos = -1;
5777
  pQuery->window = pQueryMsg->window;
5778

5779
  if (sem_init(&pQInfo->dataReady, 0, 0) != 0) {
5780 5781
    int32_t code = TAOS_SYSTEM_ERROR(errno);
    qError("QInfo:%p init dataReady sem failed, reason:%s", pQInfo, tstrerror(code));
5782
    goto _cleanup;
5783
  }
5784

5785
  colIdCheck(pQuery);
5786

5787
  qDebug("qmsg:%p QInfo:%p created", pQueryMsg, pQInfo);
5788 5789
  return pQInfo;

B
Bomin Zhang 已提交
5790 5791 5792 5793 5794 5795 5796 5797 5798 5799 5800 5801 5802 5803 5804
_cleanup_qinfo:
  tsdbDestoryTableGroup(pTableGroupInfo);

_cleanup_query:
  taosArrayDestroy(pGroupbyExpr->columnInfo);
  tfree(pGroupbyExpr);
  tfree(pTagCols);
  for (int32_t i = 0; i < numOfOutput; ++i) {
    SExprInfo* pExprInfo = &pExprs[i];
    if (pExprInfo->pExpr != NULL) {
      tExprTreeDestroy(&pExprInfo->pExpr, NULL);
    }
  }
  tfree(pExprs);

5805
_cleanup:
dengyihao's avatar
dengyihao 已提交
5806
  freeQInfo(pQInfo);
5807 5808 5809
  return NULL;
}

H
hjxilinx 已提交
5810
static bool isValidQInfo(void *param) {
H
hjxilinx 已提交
5811 5812 5813 5814
  SQInfo *pQInfo = (SQInfo *)param;
  if (pQInfo == NULL) {
    return false;
  }
5815

H
hjxilinx 已提交
5816 5817 5818 5819
  /*
   * pQInfo->signature may be changed by another thread, so we assign value of signature
   * into local variable, then compare by using local variable
   */
5820
  uint64_t sig = (uint64_t)pQInfo->signature;
H
hjxilinx 已提交
5821 5822 5823
  return (sig == (uint64_t)pQInfo);
}

H
Haojun Liao 已提交
5824
static int32_t initQInfo(SQueryTableMsg *pQueryMsg, void *tsdb, int32_t vgId, SQInfo *pQInfo, bool isSTable, void* param) {
H
hjxilinx 已提交
5825
  int32_t code = TSDB_CODE_SUCCESS;
5826
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
5827

H
hjxilinx 已提交
5828 5829
  STSBuf *pTSBuf = NULL;
  if (pQueryMsg->tsLen > 0) {  // open new file to save the result
H
Haojun Liao 已提交
5830
    char *tsBlock = (char *) pQueryMsg + pQueryMsg->tsOffset;
H
hjxilinx 已提交
5831
    pTSBuf = tsBufCreateFromCompBlocks(tsBlock, pQueryMsg->tsNumOfBlocks, pQueryMsg->tsLen, pQueryMsg->tsOrder);
5832

H
hjxilinx 已提交
5833
    tsBufResetPos(pTSBuf);
dengyihao's avatar
dengyihao 已提交
5834 5835
    bool ret = tsBufNextPos(pTSBuf);
    UNUSED(ret);
H
hjxilinx 已提交
5836
  }
5837

5838 5839
  if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey > pQuery->window.ekey)) ||
      (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey > pQuery->window.skey))) {
5840
    qDebug("QInfo:%p no result in time range %" PRId64 "-%" PRId64 ", order %d", pQInfo, pQuery->window.skey,
5841
           pQuery->window.ekey, pQuery->order.order);
5842
    setQueryStatus(pQuery, QUERY_COMPLETED);
B
Bomin Zhang 已提交
5843
    pQInfo->tableqinfoGroupInfo.numOfTables = 0;
5844 5845 5846
    sem_post(&pQInfo->dataReady);
    return TSDB_CODE_SUCCESS;
  }
5847

5848 5849
  pQInfo->param = param;

5850
  if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
5851
    qDebug("QInfo:%p no table qualified for tag filter, abort query", pQInfo);
5852 5853 5854 5855 5856
    setQueryStatus(pQuery, QUERY_COMPLETED);
  
    sem_post(&pQInfo->dataReady);
    return TSDB_CODE_SUCCESS;
  }
H
hjxilinx 已提交
5857 5858

  // filter the qualified
5859
  if ((code = doInitQInfo(pQInfo, pTSBuf, tsdb, vgId, isSTable)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
5860 5861
    goto _error;
  }
H
hjxilinx 已提交
5862
  
H
hjxilinx 已提交
5863 5864 5865 5866
  return code;

_error:
  // table query ref will be decrease during error handling
5867
  freeQInfo(pQInfo);
H
hjxilinx 已提交
5868 5869 5870
  return code;
}

B
Bomin Zhang 已提交
5871 5872 5873 5874 5875 5876 5877 5878 5879 5880 5881 5882
static void freeColumnFilterInfo(SColumnFilterInfo* pFilter, int32_t numOfFilters) {
    if (pFilter == NULL) {
      return;
    }
    for (int32_t i = 0; i < numOfFilters; i++) {
      if (pFilter[i].filterstr) {
        free((void*)(pFilter[i].pz));
      }
    }
    free(pFilter);
}

H
hjxilinx 已提交
5883 5884 5885 5886
static void freeQInfo(SQInfo *pQInfo) {
  if (!isValidQInfo(pQInfo)) {
    return;
  }
5887 5888

  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
hjxilinx 已提交
5889
  setQueryKilled(pQInfo);
5890

5891
  qDebug("QInfo:%p start to free QInfo", pQInfo);
5892
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
H
hjxilinx 已提交
5893 5894
    tfree(pQuery->sdata[col]);
  }
5895

H
hjxilinx 已提交
5896
  sem_destroy(&(pQInfo->dataReady));
5897
  teardownQueryRuntimeEnv(&pQInfo->runtimeEnv);
5898

H
hjxilinx 已提交
5899 5900 5901 5902 5903 5904
  for (int32_t i = 0; i < pQuery->numOfFilterCols; ++i) {
    SSingleColumnFilterInfo *pColFilter = &pQuery->pFilterInfo[i];
    if (pColFilter->numOfFilters > 0) {
      tfree(pColFilter->pFilters);
    }
  }
5905

H
hjxilinx 已提交
5906
  if (pQuery->pSelectExpr != NULL) {
5907
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
hjxilinx 已提交
5908
      SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];
5909

H
hjxilinx 已提交
5910 5911 5912
      if (pExprInfo->pExpr != NULL) {
        tExprTreeDestroy(&pExprInfo->pExpr, NULL);
      }
H
hjxilinx 已提交
5913
    }
5914

H
hjxilinx 已提交
5915 5916
    tfree(pQuery->pSelectExpr);
  }
5917

5918 5919
  if (pQuery->fillVal != NULL) {
    tfree(pQuery->fillVal);
H
hjxilinx 已提交
5920
  }
5921

5922
  // todo refactor, extract method to destroytableDataInfo
B
Bomin Zhang 已提交
5923 5924 5925 5926 5927 5928 5929 5930 5931 5932 5933
  if (pQInfo->tableqinfoGroupInfo.pGroupList != NULL) {
    int32_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
    for (int32_t i = 0; i < numOfGroups; ++i) {
      SArray *p = GET_TABLEGROUP(pQInfo, i);

      size_t num = taosArrayGetSize(p);
      for(int32_t j = 0; j < num; ++j) {
        STableQueryInfo* item = taosArrayGetP(p, j);
        if (item != NULL) {
          destroyTableQueryInfo(item, pQuery->numOfOutput);
        }
5934
      }
5935

B
Bomin Zhang 已提交
5936 5937
      taosArrayDestroy(p);
    }
H
hjxilinx 已提交
5938
  }
5939

H
Haojun Liao 已提交
5940
  tfree(pQInfo->pBuf);
5941
  taosArrayDestroy(pQInfo->tableqinfoGroupInfo.pGroupList);
H
Haojun Liao 已提交
5942
  taosHashCleanup(pQInfo->tableqinfoGroupInfo.map);
5943
  tsdbDestoryTableGroup(&pQInfo->tableGroupInfo);
weixin_48148422's avatar
weixin_48148422 已提交
5944
  taosArrayDestroy(pQInfo->arrTableIdInfo);
H
hjxilinx 已提交
5945
  
5946 5947 5948 5949
  if (pQuery->pGroupbyExpr != NULL) {
    taosArrayDestroy(pQuery->pGroupbyExpr->columnInfo);
    tfree(pQuery->pGroupbyExpr);
  }
5950

5951 5952
  tfree(pQuery->tagColList);
  tfree(pQuery->pFilterInfo);
B
Bomin Zhang 已提交
5953 5954 5955 5956 5957 5958 5959 5960 5961

  if (pQuery->colList != NULL) {
    for (int32_t i = 0; i < pQuery->numOfCols; i++) {
      SColumnInfo* column = pQuery->colList + i;
      freeColumnFilterInfo(column->filters, column->numOfFilters);
    }
    tfree(pQuery->colList);
  }

5962
  tfree(pQuery->sdata);
5963

5964
  tfree(pQuery);
5965

5966
  qDebug("QInfo:%p QInfo is freed", pQInfo);
5967

5968
  // destroy signature, in order to avoid the query process pass the object safety check
H
hjxilinx 已提交
5969 5970 5971 5972
  memset(pQInfo, 0, sizeof(SQInfo));
  tfree(pQInfo);
}

H
hjxilinx 已提交
5973
static size_t getResultSize(SQInfo *pQInfo, int64_t *numOfRows) {
5974 5975
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
hjxilinx 已提交
5976 5977 5978 5979 5980 5981 5982 5983 5984 5985 5986
  /*
   * get the file size and set the numOfRows to be the file size, since for tsComp query,
   * the returned row size is equalled to 1
   * TODO handle the case that the file is too large to send back one time
   */
  if (isTSCompQuery(pQuery) && (*numOfRows) > 0) {
    struct stat fstat;
    if (stat(pQuery->sdata[0]->data, &fstat) == 0) {
      *numOfRows = fstat.st_size;
      return fstat.st_size;
    } else {
S
slguan 已提交
5987
      qError("QInfo:%p failed to get file info, path:%s, reason:%s", pQInfo, pQuery->sdata[0]->data, strerror(errno));
H
hjxilinx 已提交
5988 5989 5990 5991
      return 0;
    }
  } else {
    return pQuery->rowSize * (*numOfRows);
5992
  }
H
hjxilinx 已提交
5993
}
5994

H
hjxilinx 已提交
5995 5996 5997
static int32_t doDumpQueryResult(SQInfo *pQInfo, char *data) {
  // the remained number of retrieved rows, not the interpolated result
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
5998

H
hjxilinx 已提交
5999 6000 6001
  // load data from file to msg buffer
  if (isTSCompQuery(pQuery)) {
    int32_t fd = open(pQuery->sdata[0]->data, O_RDONLY, 0666);
6002

H
hjxilinx 已提交
6003 6004
    // make sure file exist
    if (FD_VALID(fd)) {
dengyihao's avatar
dengyihao 已提交
6005 6006
      int32_t s = lseek(fd, 0, SEEK_END);
      UNUSED(s);
6007
      qDebug("QInfo:%p ts comp data return, file:%s, size:%d", pQInfo, pQuery->sdata[0]->data, s);
H
Haojun Liao 已提交
6008
      if (lseek(fd, 0, SEEK_SET) >= 0) {
dengyihao's avatar
dengyihao 已提交
6009 6010
        size_t sz = read(fd, data, s);
        UNUSED(sz);
H
Haojun Liao 已提交
6011 6012
      } else {
        // todo handle error
dengyihao's avatar
dengyihao 已提交
6013
      }
H
Haojun Liao 已提交
6014

H
hjxilinx 已提交
6015 6016 6017
      close(fd);
      unlink(pQuery->sdata[0]->data);
    } else {
dengyihao's avatar
dengyihao 已提交
6018
      // todo return the error code to client and handle invalid fd
S
slguan 已提交
6019
      qError("QInfo:%p failed to open tmp file to send ts-comp data to client, path:%s, reason:%s", pQInfo,
H
hjxilinx 已提交
6020
             pQuery->sdata[0]->data, strerror(errno));
dengyihao's avatar
dengyihao 已提交
6021 6022 6023
      if (fd != -1) {
        close(fd); 
      }
H
hjxilinx 已提交
6024
    }
6025

H
hjxilinx 已提交
6026 6027 6028 6029
    // all data returned, set query over
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
      setQueryStatus(pQuery, QUERY_OVER);
    }
H
hjxilinx 已提交
6030
  } else {
6031
    doCopyQueryResultToMsg(pQInfo, pQuery->rec.rows, data);
6032
  }
6033

6034
  pQuery->rec.total += pQuery->rec.rows;
6035
  qDebug("QInfo:%p current numOfRes rows:%" PRId64 ", total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total);
6036

6037
  if (pQuery->limit.limit > 0 && pQuery->limit.limit == pQuery->rec.total) {
6038
    qDebug("QInfo:%p results limitation reached, limitation:%"PRId64, pQInfo, pQuery->limit.limit);
6039 6040 6041
    setQueryStatus(pQuery, QUERY_OVER);
  }
  
H
hjxilinx 已提交
6042
  return TSDB_CODE_SUCCESS;
6043 6044
}

6045 6046 6047 6048 6049 6050 6051
typedef struct SQueryMgmt {
  SCacheObj      *qinfoPool;      // query handle pool
  int32_t         vgId;
  bool            closed;
  pthread_mutex_t lock;
} SQueryMgmt;

H
Haojun Liao 已提交
6052
int32_t qCreateQueryInfo(void* tsdb, int32_t vgId, SQueryTableMsg* pQueryMsg, void* param, qinfo_t* pQInfo) {
6053
  assert(pQueryMsg != NULL && tsdb != NULL);
6054 6055

  int32_t code = TSDB_CODE_SUCCESS;
6056

weixin_48148422's avatar
weixin_48148422 已提交
6057
  char *        tagCond = NULL, *tbnameCond = NULL;
6058
  SArray *      pTableIdList = NULL;
6059
  SSqlFuncMsg **pExprMsg = NULL;
6060 6061
  SColIndex *   pGroupColIndex = NULL;
  SColumnInfo*  pTagColumnInfo = NULL;
dengyihao's avatar
dengyihao 已提交
6062 6063
  SExprInfo     *pExprs = NULL;
  SSqlGroupbyExpr *pGroupbyExpr = NULL;
6064

weixin_48148422's avatar
weixin_48148422 已提交
6065
  if ((code = convertQueryMsg(pQueryMsg, &pTableIdList, &pExprMsg, &tagCond, &tbnameCond, &pGroupColIndex, &pTagColumnInfo)) !=
6066
         TSDB_CODE_SUCCESS) {
B
Bomin Zhang 已提交
6067
    goto _over;
6068 6069
  }

H
hjxilinx 已提交
6070
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
6071
    qError("Invalid number of tables to query, numOfTables:%d", pQueryMsg->numOfTables);
6072
    code = TSDB_CODE_QRY_INVALID_MSG;
H
hjxilinx 已提交
6073
    goto _over;
6074 6075
  }

H
hjxilinx 已提交
6076
  if (pTableIdList == NULL || taosArrayGetSize(pTableIdList) == 0) {
S
slguan 已提交
6077
    qError("qmsg:%p, SQueryTableMsg wrong format", pQueryMsg);
6078
    code = TSDB_CODE_QRY_INVALID_MSG;
H
hjxilinx 已提交
6079
    goto _over;
6080 6081
  }

H
Haojun Liao 已提交
6082
  if ((code = createQFunctionExprFromMsg(pQueryMsg, &pExprs, pExprMsg, pTagColumnInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6083
    goto _over;
6084 6085
  }

dengyihao's avatar
dengyihao 已提交
6086
  pGroupbyExpr = createGroupbyExprFromMsg(pQueryMsg, pGroupColIndex, &code);
H
hjxilinx 已提交
6087
  if ((pGroupbyExpr == NULL && pQueryMsg->numOfGroupCols != 0) || code != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6088
    goto _over;
6089
  }
6090

H
hjxilinx 已提交
6091
  bool isSTableQuery = false;
6092
  STableGroupInfo tableGroupInfo = {0};
6093
  
H
Haojun Liao 已提交
6094
  if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_TABLE_QUERY)) {
weixin_48148422's avatar
weixin_48148422 已提交
6095
    STableIdInfo *id = taosArrayGet(pTableIdList, 0);
H
Haojun Liao 已提交
6096

6097
    qDebug("qmsg:%p query normal table, uid:%"PRId64", tid:%d", pQueryMsg, id->uid, id->tid);
6098
    if ((code = tsdbGetOneTableGroup(tsdb, id->uid, &tableGroupInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6099
      goto _over;
6100
    }
H
Haojun Liao 已提交
6101
  } else if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY|TSDB_QUERY_TYPE_STABLE_QUERY)) {
6102
    isSTableQuery = true;
H
Haojun Liao 已提交
6103 6104 6105 6106
    // TODO: need a macro from TSDB to check if table is super table

    // also note there's possibility that only one table in the super table
    if (!TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY)) {
weixin_48148422's avatar
weixin_48148422 已提交
6107 6108 6109 6110 6111 6112 6113 6114
      STableIdInfo *id = taosArrayGet(pTableIdList, 0);

      // group by normal column, do not pass the group by condition to tsdb to group table into different group
      int32_t numOfGroupByCols = pQueryMsg->numOfGroupCols;
      if (pQueryMsg->numOfGroupCols == 1 && !TSDB_COL_IS_TAG(pGroupColIndex->flag)) {
        numOfGroupByCols = 0;
      }
      
6115
      code = tsdbQuerySTableByTagCond(tsdb, id->uid, tagCond, pQueryMsg->tagCondLen, pQueryMsg->tagNameRelType, tbnameCond, &tableGroupInfo, pGroupColIndex,
weixin_48148422's avatar
weixin_48148422 已提交
6116
                                          numOfGroupByCols);
6117
      if (code != TSDB_CODE_SUCCESS) {
dengyihao's avatar
TD-816  
dengyihao 已提交
6118 6119 6120
        if (code == TSDB_CODE_QRY_EXCEED_TAGS_LIMIT) {
          qError("qmsg:%p failed to QueryStable, reason: %s", pQueryMsg, tstrerror(code));
        }
6121 6122
        goto _over;
      }
weixin_48148422's avatar
weixin_48148422 已提交
6123
    } else {
6124 6125 6126 6127
      code = tsdbGetTableGroupFromIdList(tsdb, pTableIdList, &tableGroupInfo);
      if (code != TSDB_CODE_SUCCESS) {
        goto _over;
      }
H
Haojun Liao 已提交
6128

6129
      qDebug("qmsg:%p query on %zu tables in one group from client", pQueryMsg, tableGroupInfo.numOfTables);
6130
    }
H
hjxilinx 已提交
6131
  } else {
6132
    assert(0);
6133
  }
6134

6135
  (*pQInfo) = createQInfoImpl(pQueryMsg, pTableIdList, pGroupbyExpr, pExprs, &tableGroupInfo, pTagColumnInfo);
dengyihao's avatar
dengyihao 已提交
6136 6137 6138 6139
  pExprs = NULL;
  pGroupbyExpr = NULL;
  pTagColumnInfo = NULL;
  
6140
  if ((*pQInfo) == NULL) {
6141
    code = TSDB_CODE_QRY_OUT_OF_MEMORY;
H
hjxilinx 已提交
6142
    goto _over;
6143
  }
6144

H
Haojun Liao 已提交
6145
  code = initQInfo(pQueryMsg, tsdb, vgId, *pQInfo, isSTableQuery, param);
6146

H
hjxilinx 已提交
6147
_over:
dengyihao's avatar
dengyihao 已提交
6148 6149 6150
  free(tagCond);
  free(tbnameCond);
  free(pGroupColIndex);
dengyihao's avatar
dengyihao 已提交
6151 6152
  if (pGroupbyExpr != NULL) {
    taosArrayDestroy(pGroupbyExpr->columnInfo);
dengyihao's avatar
dengyihao 已提交
6153
    free(pGroupbyExpr);
dengyihao's avatar
dengyihao 已提交
6154
  } 
dengyihao's avatar
dengyihao 已提交
6155 6156
  free(pTagColumnInfo);
  free(pExprs);
dengyihao's avatar
dengyihao 已提交
6157
  free(pExprMsg);
H
hjxilinx 已提交
6158
  taosArrayDestroy(pTableIdList);
6159

B
Bomin Zhang 已提交
6160 6161 6162 6163 6164
  for (int32_t i = 0; i < pQueryMsg->numOfCols; i++) {
    SColumnInfo* column = pQueryMsg->colList + i;
    freeColumnFilterInfo(column->filters, column->numOfFilters);
  }

H
Haojun Liao 已提交
6165
  //pQInfo already freed in initQInfo, but *pQInfo may not pointer to null;
6166 6167 6168 6169
  if (code != TSDB_CODE_SUCCESS) {
    *pQInfo = NULL;
  }

6170
  // if failed to add ref for all meters in this query, abort current query
6171
  return code;
H
hjxilinx 已提交
6172 6173
}

H
Haojun Liao 已提交
6174
void qDestroyQueryInfo(qinfo_t qHandle) {
H
Haojun Liao 已提交
6175 6176 6177 6178 6179
  SQInfo* pQInfo = (SQInfo*) qHandle;
  if (!isValidQInfo(pQInfo)) {
    return;
  }

H
Haojun Liao 已提交
6180 6181 6182
  qDebug("QInfo:%p query completed", pQInfo);
  queryCostStatis(pQInfo);   // print the query cost summary
  freeQInfo(pQInfo);
H
Haojun Liao 已提交
6183 6184
}

6185
void qTableQuery(qinfo_t qinfo) {
6186 6187
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
6188
  if (pQInfo == NULL || pQInfo->signature != pQInfo) {
6189
    qDebug("QInfo:%p has been freed, no need to execute", pQInfo);
H
hjxilinx 已提交
6190 6191
    return;
  }
6192

H
Haojun Liao 已提交
6193
  if (IS_QUERY_KILLED(pQInfo)) {
6194
    qDebug("QInfo:%p it is already killed, abort", pQInfo);
6195
    sem_post(&pQInfo->dataReady);
H
hjxilinx 已提交
6196 6197
    return;
  }
6198

6199 6200
  if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
    qDebug("QInfo:%p no table exists for query, abort", pQInfo);
6201 6202 6203 6204 6205
    sem_post(&pQInfo->dataReady);
    return;
  }

  // error occurs, record the error code and return to client
H
Haojun Liao 已提交
6206
  int32_t ret = setjmp(pQInfo->runtimeEnv.env);
6207 6208
  if (ret != TSDB_CODE_SUCCESS) {
    pQInfo->code = ret;
H
Haojun Liao 已提交
6209
    qDebug("QInfo:%p query abort due to error/cancel occurs, code:%s", pQInfo, tstrerror(pQInfo->code));
6210
    sem_post(&pQInfo->dataReady);
6211 6212 6213
    return;
  }

6214
  qDebug("QInfo:%p query task is launched", pQInfo);
6215

6216
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
6217
  if (onlyQueryTags(pQInfo->runtimeEnv.pQuery)) {
H
Haojun Liao 已提交
6218
    assert(pQInfo->runtimeEnv.pQueryHandle == NULL);
H
hjxilinx 已提交
6219
    buildTagQueryResult(pQInfo);   // todo support the limit/offset
H
hjxilinx 已提交
6220
  } else if (pQInfo->runtimeEnv.stableQuery) {
6221
    stableQueryImpl(pQInfo);
H
hjxilinx 已提交
6222
  } else {
6223
    tableQueryImpl(pQInfo);
H
hjxilinx 已提交
6224
  }
6225

6226
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
6227
  if (IS_QUERY_KILLED(pQInfo)) {
6228 6229 6230 6231 6232 6233 6234 6235
    qDebug("QInfo:%p query is killed", pQInfo);
  } else if (pQuery->rec.rows == 0) {
    qDebug("QInfo:%p over, %zu tables queried, %"PRId64" rows are returned", pQInfo, pQInfo->tableqinfoGroupInfo.numOfTables, pQuery->rec.total);
  } else {
    qDebug("QInfo:%p query paused, %" PRId64 " rows returned, numOfTotal:%" PRId64 " rows",
           pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
  }

H
hjxilinx 已提交
6236
  sem_post(&pQInfo->dataReady);
H
hjxilinx 已提交
6237 6238
}

H
hjxilinx 已提交
6239
int32_t qRetrieveQueryResultInfo(qinfo_t qinfo) {
6240 6241
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
6242
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
6243
    return TSDB_CODE_QRY_INVALID_QHANDLE;
H
hjxilinx 已提交
6244
  }
6245

H
hjxilinx 已提交
6246
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
6247
  if (IS_QUERY_KILLED(pQInfo)) {
6248
    qDebug("QInfo:%p query is killed, code:%d", pQInfo, pQInfo->code);
H
hjxilinx 已提交
6249
    return pQInfo->code;
H
hjxilinx 已提交
6250
  }
6251

H
hjxilinx 已提交
6252
  sem_wait(&pQInfo->dataReady);
6253
  qDebug("QInfo:%p retrieve result info, rowsize:%d, rows:%"PRId64", code:%d", pQInfo, pQuery->rowSize, pQuery->rec.rows,
6254 6255
         pQInfo->code);

H
hjxilinx 已提交
6256
  return pQInfo->code;
H
hjxilinx 已提交
6257
}
6258

H
hjxilinx 已提交
6259
bool qHasMoreResultsToRetrieve(qinfo_t qinfo) {
6260 6261
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
Haojun Liao 已提交
6262
  if (!isValidQInfo(pQInfo) || pQInfo->code != TSDB_CODE_SUCCESS) {
6263
    qDebug("QInfo:%p invalid qhandle or error occurs, abort query, code:%x", pQInfo, pQInfo->code);
H
hjxilinx 已提交
6264 6265
    return false;
  }
6266 6267

  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
6268
  bool ret = false;
H
hjxilinx 已提交
6269
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_OVER)) {
H
Haojun Liao 已提交
6270
    ret = false;
H
hjxilinx 已提交
6271
  } else if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
H
Haojun Liao 已提交
6272
    ret = true;
H
hjxilinx 已提交
6273
  } else if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
H
Haojun Liao 已提交
6274
    ret = true;
H
hjxilinx 已提交
6275 6276
  } else {
    assert(0);
6277
  }
H
Haojun Liao 已提交
6278 6279

  if (ret) {
H
Haojun Liao 已提交
6280
//    T_REF_INC(pQInfo);
6281
    qDebug("QInfo:%p has more results waits for client retrieve", pQInfo);
H
Haojun Liao 已提交
6282 6283 6284
  }

  return ret;
6285 6286
}

6287 6288 6289
int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *contLen) {
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
6290
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
6291
    return TSDB_CODE_QRY_INVALID_QHANDLE;
6292
  }
6293

6294
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
6295 6296
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  size_t  size = getResultSize(pQInfo, &pQuery->rec.rows);
weixin_48148422's avatar
weixin_48148422 已提交
6297 6298
  size += sizeof(int32_t);
  size += sizeof(STableIdInfo) * taosArrayGetSize(pQInfo->arrTableIdInfo);
6299
  *contLen = size + sizeof(SRetrieveTableRsp);
6300

6301 6302
  // todo handle failed to allocate memory
  *pRsp = (SRetrieveTableRsp *)rpcMallocCont(*contLen);
6303
  (*pRsp)->numOfRows = htonl(pQuery->rec.rows);
6304

6305 6306 6307
  int32_t code = pQInfo->code;
  if (code == TSDB_CODE_SUCCESS) {
    (*pRsp)->offset = htobe64(pQuery->limit.offset);
6308
    (*pRsp)->useconds = htobe64(pRuntimeEnv->summary.elapsedTime);
6309 6310 6311 6312
  } else {
    (*pRsp)->offset = 0;
    (*pRsp)->useconds = 0;
  }
6313 6314
  
  (*pRsp)->precision = htons(pQuery->precision);
6315
  if (pQuery->rec.rows > 0 && code == TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6316
    code = doDumpQueryResult(pQInfo, (*pRsp)->data);
6317
  } else {
H
hjxilinx 已提交
6318
    setQueryStatus(pQuery, QUERY_OVER);
6319
    code = pQInfo->code;
6320
  }
6321

H
Haojun Liao 已提交
6322
  if (IS_QUERY_KILLED(pQInfo) || Q_STATUS_EQUAL(pQuery->status, QUERY_OVER)) {
6323
    (*pRsp)->completed = 1;  // notify no more result to client
H
hjxilinx 已提交
6324
  }
6325

H
hjxilinx 已提交
6326
  return code;
6327
}
H
hjxilinx 已提交
6328

H
Haojun Liao 已提交
6329
int32_t qKillQuery(qinfo_t qinfo) {
H
Haojun Liao 已提交
6330 6331 6332 6333 6334 6335 6336 6337 6338 6339
  SQInfo *pQInfo = (SQInfo *)qinfo;

  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
    return TSDB_CODE_QRY_INVALID_QHANDLE;
  }

  setQueryKilled(pQInfo);
  return TSDB_CODE_SUCCESS;
}

H
hjxilinx 已提交
6340 6341 6342
static void buildTagQueryResult(SQInfo* pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
6343

H
Haojun Liao 已提交
6344
  size_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
H
Haojun Liao 已提交
6345
  assert(numOfGroup == 0 || numOfGroup == 1);
6346

H
Haojun Liao 已提交
6347
  if (numOfGroup == 0) {
6348 6349
    return;
  }
H
hjxilinx 已提交
6350
  
H
Haojun Liao 已提交
6351
  SArray* pa = GET_TABLEGROUP(pQInfo, 0);
6352

H
Haojun Liao 已提交
6353
  size_t num = taosArrayGetSize(pa);
6354
  assert(num == pQInfo->tableqinfoGroupInfo.numOfTables);
6355

H
Haojun Liao 已提交
6356
  int32_t count = 0;
6357 6358 6359
  int32_t functionId = pQuery->pSelectExpr[0].base.functionId;
  if (functionId == TSDB_FUNC_TID_TAG) { // return the tags & table Id
    assert(pQuery->numOfOutput == 1);
6360

6361 6362
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[0];
    int32_t rsize = pExprInfo->bytes;
H
Haojun Liao 已提交
6363
    count = 0;
6364

H
Haojun Liao 已提交
6365 6366 6367 6368 6369 6370 6371 6372 6373 6374 6375
    int16_t bytes = pExprInfo->bytes;
    int16_t type = pExprInfo->type;

    for(int32_t i = 0; i < pQuery->numOfTags; ++i) {
      if (pQuery->tagColList[i].colId == pExprInfo->base.colInfo.colId) {
        bytes = pQuery->tagColList[i].bytes;
        type = pQuery->tagColList[i].type;
        break;
      }
    }

H
Haojun Liao 已提交
6376 6377
    while(pQInfo->tableIndex < num && count < pQuery->rec.capacity) {
      int32_t i = pQInfo->tableIndex++;
6378
      STableQueryInfo *item = taosArrayGetP(pa, i);
6379

6380
      char *output = pQuery->sdata[0]->data + i * rsize;
6381
      varDataSetLen(output, rsize - VARSTR_HEADER_SIZE);
6382

6383
      output = varDataVal(output);
H
Haojun Liao 已提交
6384
      STableId* id = TSDB_TABLEID(item->pTable);
6385

H
Haojun Liao 已提交
6386 6387
      *(int64_t *)output = id->uid;  // memory align problem, todo serialize
      output += sizeof(id->uid);
6388

H
Haojun Liao 已提交
6389 6390
      *(int32_t *)output = id->tid;
      output += sizeof(id->tid);
6391

6392
      *(int32_t *)output = pQInfo->vgId;
6393
      output += sizeof(pQInfo->vgId);
6394

6395
      if (pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
6396
        char *data = tsdbGetTableName(item->pTable);
6397
        memcpy(output, data, varDataTLen(data));
H
[td-90]  
Haojun Liao 已提交
6398
      } else {
6399
        char *val = tsdbGetTableTagVal(item->pTable, pExprInfo->base.colInfo.colId, type, bytes);
6400 6401 6402 6403 6404 6405 6406 6407

        // todo refactor
        if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
          if (val == NULL) {
            setVardataNull(output, type);
          } else {
            memcpy(output, val, varDataTLen(val));
          }
H
[td-90]  
Haojun Liao 已提交
6408
        } else {
6409 6410
          if (val == NULL) {
            setNull(output, type, bytes);
H
Haojun Liao 已提交
6411
          } else {  // todo here stop will cause client crash
6412 6413
            memcpy(output, val, bytes);
          }
H
[td-90]  
Haojun Liao 已提交
6414 6415
        }
      }
6416

H
Haojun Liao 已提交
6417
      count += 1;
6418
    }
6419

6420
    qDebug("QInfo:%p create (tableId, tag) info completed, rows:%d", pQInfo, count);
6421

H
Haojun Liao 已提交
6422 6423 6424 6425 6426
  } else if (functionId == TSDB_FUNC_COUNT) {// handle the "count(tbname)" query
    *(int64_t*) pQuery->sdata[0]->data = num;

    count = 1;
    pQInfo->tableIndex = num;  //set query completed
6427
    qDebug("QInfo:%p create count(tbname) query, res:%d rows:1", pQInfo, count);
6428
  } else {  // return only the tags|table name etc.
H
Haojun Liao 已提交
6429
    count = 0;
H
Haojun Liao 已提交
6430
    SSchema tbnameSchema = tGetTableNameColumnSchema();
H
Haojun Liao 已提交
6431 6432
    while(pQInfo->tableIndex < num && count < pQuery->rec.capacity) {
      int32_t i = pQInfo->tableIndex++;
6433

6434
      SExprInfo* pExprInfo = pQuery->pSelectExpr;
6435
      STableQueryInfo* item = taosArrayGetP(pa, i);
6436

6437 6438
      for(int32_t j = 0; j < pQuery->numOfOutput; ++j) {
        if (pExprInfo[j].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
6439
          char* data = tsdbGetTableName(item->pTable);
H
Haojun Liao 已提交
6440
          char* dst = pQuery->sdata[j]->data + count * tbnameSchema.bytes;
H
hjxilinx 已提交
6441
          memcpy(dst, data, varDataTLen(data));
H
[td-90]  
Haojun Liao 已提交
6442 6443 6444 6445
        } else {// todo refactor
          int16_t type = pExprInfo[j].type;
          int16_t bytes = pExprInfo[j].bytes;
          
6446
          char* data = tsdbGetTableTagVal(item->pTable, pExprInfo[j].base.colInfo.colId, type, bytes);
H
Haojun Liao 已提交
6447
          char* dst = pQuery->sdata[j]->data + count * pExprInfo[j].bytes;
6448

H
hjxilinx 已提交
6449
          if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
H
[td-90]  
Haojun Liao 已提交
6450 6451 6452 6453 6454
            if (data == NULL) {
              setVardataNull(dst, type);
            } else {
              memcpy(dst, data, varDataTLen(data));
            }
H
hjxilinx 已提交
6455
          } else {
H
[td-90]  
Haojun Liao 已提交
6456 6457 6458 6459 6460
            if (data == NULL) {
              setNull(dst, type, bytes);
            } else {
              memcpy(dst, data, pExprInfo[j].bytes);
            }
H
hjxilinx 已提交
6461
          }
6462
        }
H
hjxilinx 已提交
6463
      }
H
Haojun Liao 已提交
6464
      count += 1;
H
hjxilinx 已提交
6465
    }
6466

6467
    qDebug("QInfo:%p create tag values results completed, rows:%d", pQInfo, count);
H
hjxilinx 已提交
6468
  }
6469

H
Haojun Liao 已提交
6470
  pQuery->rec.rows = count;
H
hjxilinx 已提交
6471
  setQueryStatus(pQuery, QUERY_COMPLETED);
H
hjxilinx 已提交
6472 6473
}

6474 6475 6476 6477 6478 6479 6480
void freeqinfoFn(void *qhandle) {
  void** handle = qhandle;
  if (handle == NULL || *handle == NULL) {
    return;
  }

  qKillQuery(*handle);
H
Haojun Liao 已提交
6481
  qDestroyQueryInfo(*handle);
6482 6483 6484 6485 6486 6487 6488 6489 6490 6491 6492 6493 6494 6495 6496 6497 6498 6499
}

void* qOpenQueryMgmt(int32_t vgId) {
  const int32_t REFRESH_HANDLE_INTERVAL = 2; // every 2 seconds, refresh handle pool

  char cacheName[128] = {0};
  sprintf(cacheName, "qhandle_%d", vgId);

  SQueryMgmt* pQueryHandle = calloc(1, sizeof(SQueryMgmt));

  pQueryHandle->qinfoPool = taosCacheInit(TSDB_DATA_TYPE_BIGINT, REFRESH_HANDLE_INTERVAL, true, freeqinfoFn, cacheName);
  pQueryHandle->closed    = false;
  pthread_mutex_init(&pQueryHandle->lock, NULL);

  qDebug("vgId:%d, open querymgmt success", vgId);
  return pQueryHandle;
}

H
Haojun Liao 已提交
6500 6501 6502 6503 6504
static void queryMgmtKillQueryFn(void* handle) {
  qKillQuery(handle);
}

void qQueryMgmtNotifyClosed(void* pQMgmt) {
6505 6506 6507 6508 6509 6510 6511 6512 6513 6514 6515
  if (pQMgmt == NULL) {
    return;
  }

  SQueryMgmt* pQueryMgmt = pQMgmt;
  qDebug("vgId:%d, set querymgmt closed, wait for all queries cancelled", pQueryMgmt->vgId);

  pthread_mutex_lock(&pQueryMgmt->lock);
  pQueryMgmt->closed = true;
  pthread_mutex_unlock(&pQueryMgmt->lock);

H
Haojun Liao 已提交
6516
  taosCacheRefresh(pQueryMgmt->qinfoPool, queryMgmtKillQueryFn);
6517 6518 6519 6520 6521 6522 6523 6524 6525 6526 6527 6528 6529 6530 6531 6532 6533 6534 6535 6536 6537 6538
}

void qCleanupQueryMgmt(void* pQMgmt) {
  if (pQMgmt == NULL) {
    return;
  }

  SQueryMgmt* pQueryMgmt = pQMgmt;
  int32_t vgId = pQueryMgmt->vgId;

  assert(pQueryMgmt->closed);

  SCacheObj* pqinfoPool = pQueryMgmt->qinfoPool;
  pQueryMgmt->qinfoPool = NULL;

  taosCacheCleanup(pqinfoPool);
  pthread_mutex_destroy(&pQueryMgmt->lock);
  tfree(pQueryMgmt);

  qDebug("vgId:%d querymgmt cleanup completed", vgId);
}

6539
void** qRegisterQInfo(void* pMgmt, uint64_t qInfo) {
6540 6541 6542 6543
  if (pMgmt == NULL) {
    return NULL;
  }

6544 6545
  const int32_t DEFAULT_QHANDLE_LIFE_SPAN = tsShellActivityTimer * 2;

6546 6547 6548 6549 6550 6551 6552 6553 6554 6555 6556
  SQueryMgmt *pQueryMgmt = pMgmt;
  if (pQueryMgmt->qinfoPool == NULL) {
    return NULL;
  }

  pthread_mutex_lock(&pQueryMgmt->lock);
  if (pQueryMgmt->closed) {
    pthread_mutex_unlock(&pQueryMgmt->lock);

    return NULL;
  } else {
6557 6558 6559
    uint64_t handleVal = (uint64_t) qInfo;

    void** handle = taosCachePut(pQueryMgmt->qinfoPool, &handleVal, sizeof(int64_t), &qInfo, POINTER_BYTES, DEFAULT_QHANDLE_LIFE_SPAN);
6560 6561 6562 6563 6564 6565
    pthread_mutex_unlock(&pQueryMgmt->lock);

    return handle;
  }
}

6566
void** qAcquireQInfo(void* pMgmt, uint64_t key) {
6567 6568 6569 6570 6571 6572
  SQueryMgmt *pQueryMgmt = pMgmt;

  if (pQueryMgmt->qinfoPool == NULL || pQueryMgmt->closed) {
    return NULL;
  }

6573
  void** handle = taosCacheAcquireByKey(pQueryMgmt->qinfoPool, &key, sizeof(uint64_t));
6574 6575 6576 6577 6578 6579 6580 6581 6582 6583 6584 6585 6586 6587 6588 6589 6590 6591
  if (handle == NULL || *handle == NULL) {
    return NULL;
  } else {
    return handle;
  }
}

void** qReleaseQInfo(void* pMgmt, void* pQInfo, bool needFree) {
  SQueryMgmt *pQueryMgmt = pMgmt;

  if (pQueryMgmt->qinfoPool == NULL) {
    return NULL;
  }

  taosCacheRelease(pQueryMgmt->qinfoPool, pQInfo, needFree);
  return 0;
}