qExecutor.c 276.2 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
#include "os.h"
H
Haojun Liao 已提交
16 17
#include "qFill.h"
#include "taosmsg.h"
18 19
#include "tcache.h"
#include "tglobal.h"
20

H
Haojun Liao 已提交
21
#include "exception.h"
22
#include "hash.h"
H
Haojun Liao 已提交
23 24 25 26
#include "qAst.h"
#include "qExecutor.h"
#include "qResultbuf.h"
#include "qUtil.h"
H
hjxilinx 已提交
27
#include "query.h"
S
slguan 已提交
28
#include "queryLog.h"
29
#include "tlosertree.h"
30
#include "ttype.h"
31

H
Haojun Liao 已提交
32
#define MAX_ROWS_PER_RESBUF_PAGE  ((1u<<12) - 1)
33 34 35 36 37

/**
 * check if the primary column is load by default, otherwise, the program will
 * forced to load primary column explicitly.
 */
38
#define Q_STATUS_EQUAL(p, s)  (((p) & (s)) != 0)
39 40
#define QUERY_IS_ASC_QUERY(q) (GET_FORWARD_DIRECTION_FACTOR((q)->order.order) == QUERY_ASC_FORWARD_STEP)

41
#define IS_MASTER_SCAN(runtime)        ((runtime)->scanFlag == MASTER_SCAN)
H
hjxilinx 已提交
42
#define IS_REVERSE_SCAN(runtime)       ((runtime)->scanFlag == REVERSE_SCAN)
43
#define SET_MASTER_SCAN_FLAG(runtime)  ((runtime)->scanFlag = MASTER_SCAN)
H
hjxilinx 已提交
44
#define SET_REVERSE_SCAN_FLAG(runtime) ((runtime)->scanFlag = REVERSE_SCAN)
45

H
Haojun Liao 已提交
46
#define GET_QINFO_ADDR(x) ((SQInfo *)((char *)(x)-offsetof(SQInfo, runtimeEnv)))
47

48
#define GET_COL_DATA_POS(query, index, step) ((query)->pos + (index) * (step))
49
#define SWITCH_ORDER(n) (((n) = ((n) == TSDB_ORDER_ASC) ? TSDB_ORDER_DESC : TSDB_ORDER_ASC))
50

H
Haojun Liao 已提交
51 52
#define SDATA_BLOCK_INITIALIZER (SDataBlockInfo) {{0}, 0}

S
TD-1057  
Shengliang Guan 已提交
53
#define TIME_WINDOW_COPY(_dst, _src)  do {\
H
Haojun Liao 已提交
54 55
   (_dst).skey = (_src).skey;\
   (_dst).ekey = (_src).ekey;\
S
TD-1057  
Shengliang Guan 已提交
56 57
} while (0);

58
enum {
H
hjxilinx 已提交
59
  // when query starts to execute, this status will set
60 61
  QUERY_NOT_COMPLETED = 0x1u,

H
hjxilinx 已提交
62 63
  /* result output buffer is full, current query is paused.
   * this status is only exist in group-by clause and diff/add/division/multiply/ query.
64
   */
65 66
  QUERY_RESBUF_FULL = 0x2u,

H
hjxilinx 已提交
67 68 69
  /* query is over
   * 1. this status is used in one row result query process, e.g., count/sum/first/last/ avg...etc.
   * 2. when all data within queried time window, it is also denoted as query_completed
70
   */
71
  QUERY_COMPLETED = 0x4u,
72

H
hjxilinx 已提交
73 74
  /* when the result is not completed return to client, this status will be
   * usually used in case of interval query with interpolation option
75
   */
76
  QUERY_OVER = 0x8u,
77
};
78 79

enum {
80 81
  TS_JOIN_TS_EQUAL       = 0,
  TS_JOIN_TS_NOT_EQUALS  = 1,
82 83 84
  TS_JOIN_TAG_NOT_EQUALS = 2,
};

85
typedef struct {
86 87 88 89 90 91
  int32_t     status;       // query status
  TSKEY       lastKey;      // the lastKey value before query executed
  STimeWindow w;            // whole query time window
  STimeWindow curWindow;    // current query window
  int32_t     windowIndex;  // index of active time window result for interval query
  STSCursor   cur;
92 93
} SQueryStatusInfo;

H
Haojun Liao 已提交
94
#if 0
H
Haojun Liao 已提交
95
static UNUSED_FUNC void *u_malloc (size_t __size) {
H
Haojun Liao 已提交
96
  uint32_t v = rand();
H
Haojun Liao 已提交
97 98

  if (v % 1000 <= 0) {
H
Haojun Liao 已提交
99 100
    return NULL;
  } else {
H
Haojun Liao 已提交
101
    return malloc(__size);
H
Haojun Liao 已提交
102
  }
H
Haojun Liao 已提交
103 104
}

H
Haojun Liao 已提交
105 106
static UNUSED_FUNC void* u_calloc(size_t num, size_t __size) {
  uint32_t v = rand();
H
Haojun Liao 已提交
107
  if (v % 1000 <= 0) {
H
Haojun Liao 已提交
108 109 110 111 112 113
    return NULL;
  } else {
    return calloc(num, __size);
  }
}

H
Haojun Liao 已提交
114 115 116 117 118 119 120 121 122
static UNUSED_FUNC void* u_realloc(void* p, size_t __size) {
  uint32_t v = rand();
  if (v % 5 <= 1) {
    return NULL;
  } else {
    return realloc(p, __size);
  }
}

H
Haojun Liao 已提交
123
#define calloc  u_calloc
H
Haojun Liao 已提交
124
#define malloc  u_malloc
H
Haojun Liao 已提交
125
#define realloc u_realloc
H
Haojun Liao 已提交
126
#endif
H
Haojun Liao 已提交
127

128
#define CLEAR_QUERY_STATUS(q, st)   ((q)->status &= (~(st)))
H
Haojun Liao 已提交
129 130
#define GET_NUM_OF_TABLEGROUP(q)    taosArrayGetSize((q)->tableqinfoGroupInfo.pGroupList)
#define GET_TABLEGROUP(q, _index)   ((SArray*) taosArrayGetP((q)->tableqinfoGroupInfo.pGroupList, (_index)))
131
#define QUERY_IS_INTERVAL_QUERY(_q) ((_q)->interval.interval > 0)
H
Haojun Liao 已提交
132

133
static void setQueryStatus(SQuery *pQuery, int8_t status);
H
Haojun Liao 已提交
134
static void finalizeQueryResult(SQueryRuntimeEnv *pRuntimeEnv);
135

136 137 138
static int32_t getMaximumIdleDurationSec() {
  return tsShellActivityTimer * 2;
}
139

140 141
static void getNextTimeWindow(SQuery* pQuery, STimeWindow* tw) {
  int32_t factor = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
142 143 144
  if (pQuery->interval.intervalUnit != 'n' && pQuery->interval.intervalUnit != 'y') {
    tw->skey += pQuery->interval.sliding * factor;
    tw->ekey = tw->skey + pQuery->interval.interval - 1;
145 146 147
    return;
  }

148
  int64_t key = tw->skey / 1000, interval = pQuery->interval.interval;
149 150 151
  if (pQuery->precision == TSDB_TIME_PRECISION_MICRO) {
    key /= 1000;
  }
152
  if (pQuery->interval.intervalUnit == 'y') {
153 154
    interval *= 12;
  }
155 156 157 158 159

  struct tm tm;
  time_t t = (time_t)key;
  localtime_r(&t, &tm);

S
Shengliang Guan 已提交
160
  int mon = (int)(tm.tm_year * 12 + tm.tm_mon + interval * factor);
161 162 163 164
  tm.tm_year = mon / 12;
  tm.tm_mon = mon % 12;
  tw->skey = mktime(&tm) * 1000L;

S
Shengliang Guan 已提交
165
  mon = (int)(mon + interval);
166 167 168 169 170 171 172 173 174 175 176
  tm.tm_year = mon / 12;
  tm.tm_mon = mon % 12;
  tw->ekey = mktime(&tm) * 1000L;

  if (pQuery->precision == TSDB_TIME_PRECISION_MICRO) {
    tw->skey *= 1000L;
    tw->ekey *= 1000L;
  }
  tw->ekey -= 1;
}

177 178
#define SET_STABLE_QUERY_OVER(_q) ((_q)->tableIndex = (int32_t)((_q)->tableqinfoGroupInfo.numOfTables))
#define IS_STASBLE_QUERY_OVER(_q) ((_q)->tableIndex >= (int32_t)((_q)->tableqinfoGroupInfo.numOfTables))
H
Haojun Liao 已提交
179

H
hjxilinx 已提交
180
// todo move to utility
181
static int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *group);
182

183
static void setResultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SResultRow *pResult);
H
Haojun Liao 已提交
184
static void setResultRowOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SResultRow *pResult);
H
Haojun Liao 已提交
185
static void resetMergeResultBuf(SQueryRuntimeEnv* pRuntimeEnv, SQLFunctionCtx *pCtx, SResultRow *pRow);
186
static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId);
187

188
static void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
H
Haojun Liao 已提交
189
                          SDataStatis *pStatis, void *param, int32_t colIndex, int32_t vgId);
190

191
static void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
H
Haojun Liao 已提交
192
static void destroyTableQueryInfoImpl(STableQueryInfo *pTableQueryInfo);
H
Haojun Liao 已提交
193
static void resetDefaultResInfoOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
194
static bool hasMainOutput(SQuery *pQuery);
H
hjxilinx 已提交
195
static void buildTagQueryResult(SQInfo *pQInfo);
196

197
static int32_t setAdditionalInfo(SQInfo *pQInfo, void *pTable, STableQueryInfo *pTableQueryInfo);
H
Haojun Liao 已提交
198
static int32_t flushFromResultBuf(SQueryRuntimeEnv* pRuntimeEnv, SGroupResInfo* pGroupResInfo);
H
Haojun Liao 已提交
199 200
static int32_t checkForQueryBuf(size_t numOfTables);
static void releaseQueryBuf(size_t numOfTables);
201
static int32_t binarySearchForKey(char *pValue, int num, TSKEY key, int order);
H
Haojun Liao 已提交
202
static void doRowwiseTimeWindowInterpolation(SQueryRuntimeEnv* pRuntimeEnv, SArray* pDataBlock, TSKEY prevTs, int32_t prevRowIndex, TSKEY curTs, int32_t curRowIndex, TSKEY windowKey, int32_t type);
H
Haojun Liao 已提交
203
static STsdbQueryCond createTsdbQueryCond(SQuery* pQuery, STimeWindow* win);
204
static STableIdInfo createTableIdInfo(SQuery* pQuery);
205

206
bool doFilterData(SQuery *pQuery, int32_t elemPos) {
207 208
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
209

S
TD-1057  
Shengliang Guan 已提交
210
    char *pElem = (char*)pFilterInfo->pData + pFilterInfo->info.bytes * elemPos;
211

212 213
    bool qualified = false;
    for (int32_t j = 0; j < pFilterInfo->numOfFilters; ++j) {
214
      SColumnFilterElem *pFilterElem = &pFilterInfo->pFilters[j];
215

H
Haojun Liao 已提交
216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232
      bool isnull = isNull(pElem, pFilterInfo->info.type);
      if (isnull) {
        if (pFilterElem->fp == isNull_filter) {
          qualified = true;
          break;
        } else {
          continue;
        }
      } else {
        if (pFilterElem->fp == notNull_filter) {
          qualified = true;
          break;
        } else if (pFilterElem->fp == isNull_filter) {
          continue;
        }
      }

233 234 235 236 237
      if (pFilterElem->fp(pFilterElem, pElem, pElem)) {
        qualified = true;
        break;
      }
    }
238

239 240 241 242
    if (!qualified) {
      return false;
    }
  }
243

244 245 246 247 248 249
  return true;
}

int64_t getNumOfResult(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  bool    hasMainFunction = hasMainOutput(pQuery);
250

251
  int64_t maxOutput = 0;
252
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
253
    int32_t functionId = pQuery->pExpr1[j].base.functionId;
254

255 256 257 258 259 260 261 262
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (hasMainFunction &&
        (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ)) {
      continue;
    }
263

H
Haojun Liao 已提交
264
    SResultRowCellInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
265 266 267 268
    if (pResInfo != NULL && maxOutput < pResInfo->numOfRes) {
      maxOutput = pResInfo->numOfRes;
    }
  }
269

270
  assert(maxOutput >= 0);
271 272 273
  return maxOutput;
}

274 275 276 277 278
/*
 * the value of number of result needs to be update due to offset value upated.
 */
void updateNumOfResult(SQueryRuntimeEnv *pRuntimeEnv, int32_t numOfRes) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
279

280
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
281
    SResultRowCellInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
282

H
Haojun Liao 已提交
283 284 285 286 287
    int16_t functionId = pRuntimeEnv->pCtx[j].functionId;
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ ||
        functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
288

H
Haojun Liao 已提交
289
    assert(pResInfo->numOfRes > numOfRes);
290 291 292 293
    pResInfo->numOfRes = numOfRes;
  }
}

H
Haojun Liao 已提交
294 295
static int32_t getMergeResultGroupId(int32_t groupIndex) {
  int32_t base = 50000000;
296 297 298 299 300 301 302
  return base + (groupIndex * 10000);
}

bool isGroupbyNormalCol(SSqlGroupbyExpr *pGroupbyExpr) {
  if (pGroupbyExpr == NULL || pGroupbyExpr->numOfGroupCols == 0) {
    return false;
  }
303

304
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
305
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
Y
TD-1230  
yihaoDeng 已提交
306
    if (TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
307
      //make sure the normal column locates at the second position if tbname exists in group by clause
308
      if (pGroupbyExpr->numOfGroupCols > 1) {
309
        assert(pColIndex->colIndex > 0);
310
      }
311

312 313 314
      return true;
    }
  }
315

316 317 318 319 320
  return false;
}

int16_t getGroupbyColumnType(SQuery *pQuery, SSqlGroupbyExpr *pGroupbyExpr) {
  assert(pGroupbyExpr != NULL);
321

322 323
  int32_t colId = -2;
  int16_t type = TSDB_DATA_TYPE_NULL;
324

325
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
326
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
Y
TD-1230  
yihaoDeng 已提交
327
    if (TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
328 329 330 331
      colId = pColIndex->colId;
      break;
    }
  }
332

333
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
334 335
    if (colId == pQuery->colList[i].colId) {
      type = pQuery->colList[i].type;
336 337 338
      break;
    }
  }
339

340 341 342 343 344 345
  return type;
}

bool isSelectivityWithTagsQuery(SQuery *pQuery) {
  bool    hasTags = false;
  int32_t numOfSelectivity = 0;
346

347
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
348
    int32_t functId = pQuery->pExpr1[i].base.functionId;
349 350 351 352
    if (functId == TSDB_FUNC_TAG_DUMMY || functId == TSDB_FUNC_TS_DUMMY) {
      hasTags = true;
      continue;
    }
353

354 355 356 357
    if ((aAggs[functId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
      numOfSelectivity++;
    }
  }
358

359 360 361
  if (numOfSelectivity > 0 && hasTags) {
    return true;
  }
362

363 364 365
  return false;
}

366 367
bool isProjQuery(SQuery *pQuery) {
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
368
    int32_t functId = pQuery->pExpr1[i].base.functionId;
369 370 371 372 373 374 375 376
    if (functId != TSDB_FUNC_PRJ && functId != TSDB_FUNC_TAGPRJ) {
      return false;
    }
  }

  return true;
}

H
Haojun Liao 已提交
377
bool isTSCompQuery(SQuery *pQuery) { return pQuery->pExpr1[0].base.functionId == TSDB_FUNC_TS_COMP; }
378

379 380 381
static bool limitResults(SQueryRuntimeEnv* pRuntimeEnv) {
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery* pQuery = pRuntimeEnv->pQuery;
382

383 384
  if ((pQuery->limit.limit > 0) && (pQuery->rec.total + pQuery->rec.rows > pQuery->limit.limit)) {
    pQuery->rec.rows = pQuery->limit.limit - pQuery->rec.total;
385

386
    qDebug("QInfo:%p discard remain data due to result limitation, limit:%"PRId64", current return:%" PRId64 ", total:%"PRId64,
387 388
        pQInfo, pQuery->limit.limit, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
    assert(pQuery->rec.rows >= 0);
389 390 391
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return true;
  }
392

393 394 395 396
  return false;
}

static bool isTopBottomQuery(SQuery *pQuery) {
397
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
398
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
399 400 401
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
402

403 404 405 406
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      return true;
    }
  }
407

408 409 410
  return false;
}

411 412 413 414 415 416 417 418 419 420 421
static bool timeWindowInterpoRequired(SQuery *pQuery) {
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
    if (functionId == TSDB_FUNC_TWA) {
      return true;
    }
  }

  return false;
}

H
Haojun Liao 已提交
422
static bool hasTagValOutput(SQuery* pQuery) {
H
Haojun Liao 已提交
423
  SExprInfo *pExprInfo = &pQuery->pExpr1[0];
H
Haojun Liao 已提交
424 425 426 427
  if (pQuery->numOfOutput == 1 && pExprInfo->base.functionId == TSDB_FUNC_TS_COMP) {
    return true;
  } else {  // set tag value, by which the results are aggregated.
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
H
Haojun Liao 已提交
428
      SExprInfo *pLocalExprInfo = &pQuery->pExpr1[idx];
H
Haojun Liao 已提交
429 430 431 432 433 434 435 436 437 438 439

      // ts_comp column required the tag value for join filter
      if (TSDB_COL_IS_TAG(pLocalExprInfo->base.colInfo.flag)) {
        return true;
      }
    }
  }

  return false;
}

440 441 442 443 444 445 446 447
/**
 * @param pQuery
 * @param col
 * @param pDataBlockInfo
 * @param pStatis
 * @param pColStatis
 * @return
 */
H
Haojun Liao 已提交
448
static bool hasNullValue(SColIndex* pColIndex, SDataStatis *pStatis, SDataStatis **pColStatis) {
H
Haojun Liao 已提交
449
  if (pStatis != NULL && TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
H
Haojun Liao 已提交
450 451
    *pColStatis = &pStatis[pColIndex->colIndex];
    assert((*pColStatis)->colId == pColIndex->colId);
H
hjxilinx 已提交
452 453
  } else {
    *pColStatis = NULL;
454
  }
455

H
Haojun Liao 已提交
456
  if (TSDB_COL_IS_TAG(pColIndex->flag) || TSDB_COL_IS_UD_COL(pColIndex->flag) || pColIndex->colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
H
Haojun Liao 已提交
457 458 459
    return false;
  }

460 461 462
  if ((*pColStatis) != NULL && (*pColStatis)->numOfNull == 0) {
    return false;
  }
463

464 465 466
  return true;
}

H
Haojun Liao 已提交
467
static SResultRow *doPrepareResultRowFromKey(SQueryRuntimeEnv *pRuntimeEnv, SResultRowInfo *pWindowResInfo, char *pData,
H
Haojun Liao 已提交
468 469
                                             int16_t bytes, bool masterscan, uint64_t uid) {
  SET_RES_WINDOW_KEY(pRuntimeEnv->keyBuf, pData, bytes, uid);
470 471
  int32_t *p1 =
      (int32_t *)taosHashGet(pRuntimeEnv->pResultRowHashTable, pRuntimeEnv->keyBuf, GET_RES_WINDOW_KEY_LEN(bytes));
472 473
  if (p1 != NULL) {
    pWindowResInfo->curIndex = *p1;
474
  } else {
H
Haojun Liao 已提交
475 476 477
    if (!masterscan) {  // not master scan, do not add new timewindow
      return NULL;
    }
478

H
Haojun Liao 已提交
479
    // TODO refactor
H
Haojun Liao 已提交
480 481
    // more than the capacity, reallocate the resources
    if (pWindowResInfo->size >= pWindowResInfo->capacity) {
H
Haojun Liao 已提交
482
      int64_t newCapacity = 0;
483
      if (pWindowResInfo->capacity > 10000) {
H
Haojun Liao 已提交
484
        newCapacity = (int64_t)(pWindowResInfo->capacity * 1.25);
485
      } else {
H
Haojun Liao 已提交
486
        newCapacity = (int64_t)(pWindowResInfo->capacity * 1.5);
487 488
      }

489
      char *t = realloc(pWindowResInfo->pResult, (size_t)(newCapacity * POINTER_BYTES));
490 491
      if (t == NULL) {
        longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
492
      }
493

H
Haojun Liao 已提交
494
      pWindowResInfo->pResult = (SResultRow **)t;
495

H
Haojun Liao 已提交
496
      int32_t inc = (int32_t)newCapacity - pWindowResInfo->capacity;
497
      memset(&pWindowResInfo->pResult[pWindowResInfo->capacity], 0, POINTER_BYTES * inc);
498

499 500
      pWindowResInfo->capacity = (int32_t)newCapacity;
    }
501 502 503 504 505 506 507

    SResultRow *pResult = getNewResultRow(pRuntimeEnv->pool);
    pWindowResInfo->pResult[pWindowResInfo->size] = pResult;
    int32_t ret = initResultRow(pResult);
    if (ret != TSDB_CODE_SUCCESS) {
      longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
    }
H
Haojun Liao 已提交
508 509 510

    // add a new result set for a new group
    pWindowResInfo->curIndex = pWindowResInfo->size++;
511 512
    taosHashPut(pRuntimeEnv->pResultRowHashTable, pRuntimeEnv->keyBuf, GET_RES_WINDOW_KEY_LEN(bytes),
                (char *)&pWindowResInfo->curIndex, sizeof(int32_t));
513
  }
514

515 516 517 518 519
  // too many time window in query
  if (pWindowResInfo->size > MAX_INTERVAL_TIME_WINDOW) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_TOO_MANY_TIMEWINDOW);
  }

520
  return getResultRow(pWindowResInfo, pWindowResInfo->curIndex);
521 522 523
}

// get the correct time window according to the handled timestamp
H
Haojun Liao 已提交
524
static STimeWindow getActiveTimeWindow(SResultRowInfo *pWindowResInfo, int64_t ts, SQuery *pQuery) {
525
  STimeWindow w = {0};
526

527
 if (pWindowResInfo->curIndex == -1) {  // the first window, from the previous stored value
528
    w.skey = pWindowResInfo->prevSKey;
529 530
    if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
      w.ekey = taosTimeAdd(w.skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision);
531
    } else {
532
      w.ekey = w.skey + pQuery->interval.interval - 1;
533
    }
534
  } else {
535
    int32_t slot = curTimeWindowIndex(pWindowResInfo);
536
    SResultRow* pWindowRes = getResultRow(pWindowResInfo, slot);
537
    w = pWindowRes->win;
538
  }
539

540
  if (w.skey > ts || w.ekey < ts) {
541 542 543
    if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
      w.skey = taosTimeTruncate(ts, &pQuery->interval, pQuery->precision);
      w.ekey = taosTimeAdd(w.skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
544 545
    } else {
      int64_t st = w.skey;
546

547
      if (st > ts) {
548
        st -= ((st - ts + pQuery->interval.sliding - 1) / pQuery->interval.sliding) * pQuery->interval.sliding;
549
      }
550

551
      int64_t et = st + pQuery->interval.interval - 1;
552
      if (et < ts) {
553
        st += ((ts - et + pQuery->interval.sliding - 1) / pQuery->interval.sliding) * pQuery->interval.sliding;
554
      }
555

556
      w.skey = st;
557
      w.ekey = w.skey + pQuery->interval.interval - 1;
558
    }
559
  }
560

561 562 563 564 565 566 567
  /*
   * query border check, skey should not be bounded by the query time range, since the value skey will
   * be used as the time window index value. So we only change ekey of time window accordingly.
   */
  if (w.ekey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) {
    w.ekey = pQuery->window.ekey;
  }
568

569 570 571
  return w;
}

H
Haojun Liao 已提交
572
static int32_t addNewWindowResultBuf(SResultRow *pWindowRes, SDiskbasedResultBuf *pResultBuf, int32_t tid,
573
                                     int32_t numOfRowsPerPage) {
574
  if (pWindowRes->pageId != -1) {
575 576
    return 0;
  }
577

578
  tFilePage *pData = NULL;
579

580 581
  // in the first scan, new space needed for results
  int32_t pageId = -1;
H
Haojun Liao 已提交
582
  SIDList list = getDataBufPagesIdList(pResultBuf, tid);
583

H
Haojun Liao 已提交
584
  if (taosArrayGetSize(list) == 0) {
H
Haojun Liao 已提交
585
    pData = getNewDataBuf(pResultBuf, tid, &pageId);
586
  } else {
H
Haojun Liao 已提交
587 588 589
    SPageInfo* pi = getLastPageInfo(list);
    pData = getResBufPage(pResultBuf, pi->pageId);
    pageId = pi->pageId;
590

591
    if (pData->num >= numOfRowsPerPage) {
H
Haojun Liao 已提交
592 593 594
      // release current page first, and prepare the next one
      releaseResBufPageInfo(pResultBuf, pi);

H
Haojun Liao 已提交
595
      pData = getNewDataBuf(pResultBuf, tid, &pageId);
596
      if (pData != NULL) {
597
        assert(pData->num == 0);  // number of elements must be 0 for new allocated buffer
598 599 600
      }
    }
  }
601

602 603 604
  if (pData == NULL) {
    return -1;
  }
605

606
  // set the number of rows in current disk page
607 608 609
  if (pWindowRes->pageId == -1) {  // not allocated yet, allocate new buffer
    pWindowRes->pageId = pageId;
    pWindowRes->rowId = (int32_t)(pData->num++);
H
Haojun Liao 已提交
610

611
    assert(pWindowRes->pageId >= 0);
612
  }
613

614 615 616
  return 0;
}

H
Haojun Liao 已提交
617
static int32_t setWindowOutputBufByKey(SQueryRuntimeEnv *pRuntimeEnv, SResultRowInfo *pWindowResInfo, SDataBlockInfo* pBockInfo,
618
                                       STimeWindow *win, bool masterscan, bool* newWind, SResultRow** pResult) {
619 620
  assert(win->skey <= win->ekey);
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
621

H
Haojun Liao 已提交
622 623 624
  // todo refactor
  int64_t uid = getResultInfoUId(pRuntimeEnv);
  SResultRow *pResultRow = doPrepareResultRowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&win->skey, TSDB_KEYSIZE, masterscan, uid);
H
Haojun Liao 已提交
625
  if (pResultRow == NULL) {
626 627
    *newWind = false;

628
    // no master scan, no result generated means error occurs
629
    return masterscan? -1:0;
630
  }
631

632
  *newWind = true;
H
Haojun Liao 已提交
633

634
  // not assign result buffer yet, add new result buffer
H
Haojun Liao 已提交
635 636
  if (pResultRow->pageId == -1) {
    int32_t ret = addNewWindowResultBuf(pResultRow, pResultBuf, pBockInfo->tid, pRuntimeEnv->numOfRowsPerPage);
H
Haojun Liao 已提交
637
    if (ret != TSDB_CODE_SUCCESS) {
638 639 640
      return -1;
    }
  }
641

642
  // set time window for current result
H
Haojun Liao 已提交
643
  pResultRow->win = (*win);
644
  *pResult = pResultRow;
H
Haojun Liao 已提交
645
  setResultRowOutputBufInitCtx(pRuntimeEnv, pResultRow);
646

647 648 649
  return TSDB_CODE_SUCCESS;
}

H
Haojun Liao 已提交
650
static bool getResultRowStatus(SResultRowInfo *pWindowResInfo, int32_t slot) {
651
  assert(slot >= 0 && slot < pWindowResInfo->size);
652
  return pWindowResInfo->pResult[slot]->closed;
653 654
}

655 656 657 658 659 660 661 662 663 664 665 666 667 668
typedef enum SResultTsInterpType {
  RESULT_ROW_START_INTERP = 1,
  RESULT_ROW_END_INTERP   = 2,
} SResultTsInterpType;

static void setResultRowInterpo(SResultRow* pResult, SResultTsInterpType type) {
  assert(pResult != NULL && (type == RESULT_ROW_START_INTERP || type == RESULT_ROW_END_INTERP));
  if (type == RESULT_ROW_START_INTERP) {
    pResult->startInterp = true;
  } else {
    pResult->endInterp   = true;
  }
}

H
Haojun Liao 已提交
669
static bool resultRowInterpolated(SResultRow* pResult, SResultTsInterpType type) {
670 671 672 673 674 675 676 677
  assert(pResult != NULL && (type == RESULT_ROW_START_INTERP || type == RESULT_ROW_END_INTERP));
  if (type == RESULT_ROW_START_INTERP) {
    return pResult->startInterp == true;
  } else {
    return pResult->endInterp   == true;
  }
}

H
Haojun Liao 已提交
678
static FORCE_INLINE int32_t getForwardStepsInBlock(int32_t numOfRows, __block_search_fn_t searchFn, TSKEY ekey, int16_t pos,
679 680
                                      int16_t order, int64_t *pData) {
  int32_t forwardStep = 0;
681

H
Haojun Liao 已提交
682 683 684 685
  if (order == TSDB_ORDER_ASC) {
    int32_t end = searchFn((char*) &pData[pos], numOfRows - pos, ekey, order);
    if (end >= 0) {
      forwardStep = end;
686

H
Haojun Liao 已提交
687 688 689 690 691 692 693 694 695 696 697 698
      if (pData[end + pos] == ekey) {
        forwardStep += 1;
      }
    }
  } else {
    int32_t end = searchFn((char *)pData, pos + 1, ekey, order);
    if (end >= 0) {
      forwardStep = pos - end;

      if (pData[end] == ekey) {
        forwardStep += 1;
      }
699 700
    }
  }
701

H
Haojun Liao 已提交
702
  assert(forwardStep > 0);
703 704 705
  return forwardStep;
}

H
Haojun Liao 已提交
706
static int32_t updateResultRowCurrentIndex(SResultRowInfo* pWindowResInfo, TSKEY lastKey, bool ascQuery) {
H
Haojun Liao 已提交
707 708 709
  int32_t i = 0;
  int64_t skey = TSKEY_INITIAL_VAL;

H
Haojun Liao 已提交
710
  int32_t numOfClosed = 0;
H
Haojun Liao 已提交
711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732
  for (i = 0; i < pWindowResInfo->size; ++i) {
    SResultRow *pResult = pWindowResInfo->pResult[i];
    if (pResult->closed) {
      numOfClosed += 1;
      continue;
    }

    TSKEY ekey = pResult->win.ekey;
    if ((ekey <= lastKey && ascQuery) || (pResult->win.skey >= lastKey && !ascQuery)) {
      closeTimeWindow(pWindowResInfo, i);
    } else {
      skey = pResult->win.skey;
      break;
    }
  }

  // all windows are closed, set the last one to be the skey
  if (skey == TSKEY_INITIAL_VAL) {
    assert(i == pWindowResInfo->size);
    pWindowResInfo->curIndex = pWindowResInfo->size - 1;
  } else {
    pWindowResInfo->curIndex = i;
H
Haojun Liao 已提交
733
    pWindowResInfo->prevSKey = pWindowResInfo->pResult[pWindowResInfo->curIndex]->win.skey;
H
Haojun Liao 已提交
734 735
  }

H
Haojun Liao 已提交
736
  return numOfClosed;
H
Haojun Liao 已提交
737 738
}

739 740 741
/**
 * NOTE: the query status only set for the first scan of master scan.
 */
H
Haojun Liao 已提交
742
static int32_t doCheckQueryCompleted(SQueryRuntimeEnv *pRuntimeEnv, TSKEY lastKey, SResultRowInfo *pWindowResInfo) {
743
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
744
  if (pRuntimeEnv->scanFlag != MASTER_SCAN || pWindowResInfo->size == 0) {
745
    return pWindowResInfo->size;
746
  }
747

748
  // no qualified results exist, abort check
749
  int32_t numOfClosed = 0;
H
Haojun Liao 已提交
750
  bool ascQuery = QUERY_IS_ASC_QUERY(pQuery);
751

752
  // query completed
H
Haojun Liao 已提交
753
  if ((lastKey >= pQuery->current->win.ekey && ascQuery) || (lastKey <= pQuery->current->win.ekey && (!ascQuery))) {
754
    closeAllTimeWindow(pWindowResInfo);
755

756 757 758
    pWindowResInfo->curIndex = pWindowResInfo->size - 1;
    setQueryStatus(pQuery, QUERY_COMPLETED | QUERY_RESBUF_FULL);
  } else {  // set the current index to be the last unclosed window
H
Haojun Liao 已提交
759
    numOfClosed = updateResultRowCurrentIndex(pWindowResInfo, lastKey, ascQuery);
760

761
    // the number of completed slots are larger than the threshold, return current generated results to client.
H
Haojun Liao 已提交
762
    if (numOfClosed > pQuery->rec.threshold) {
763
      qDebug("QInfo:%p total result window:%d closed:%d, reached the output threshold %d, return",
764
          GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size, numOfClosed, pQuery->rec.threshold);
765

766
      setQueryStatus(pQuery, QUERY_RESBUF_FULL);
767
    } else {
768
      qDebug("QInfo:%p total result window:%d already closed:%d", GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size,
769
             numOfClosed);
770 771
    }
  }
772

773 774 775 776 777
  // output has reached the limitation, set query completed
  if (pQuery->limit.limit > 0 && (pQuery->limit.limit + pQuery->limit.offset) <= numOfClosed &&
      pRuntimeEnv->scanFlag == MASTER_SCAN) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
778

779
  assert(pWindowResInfo->prevSKey != TSKEY_INITIAL_VAL);
780
  return numOfClosed;
781 782 783
}

static int32_t getNumOfRowsInTimeWindow(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo, TSKEY *pPrimaryColumn,
H
hjxilinx 已提交
784
                                        int32_t startPos, TSKEY ekey, __block_search_fn_t searchFn, bool updateLastKey) {
785
  assert(startPos >= 0 && startPos < pDataBlockInfo->rows);
786

H
Haojun Liao 已提交
787
  int32_t num   = -1;
788
  int32_t order = pQuery->order.order;
H
Haojun Liao 已提交
789
  int32_t step  = GET_FORWARD_DIRECTION_FACTOR(order);
790

H
hjxilinx 已提交
791
  STableQueryInfo* item = pQuery->current;
792

793 794
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (ekey < pDataBlockInfo->window.ekey) {
795
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
H
Haojun Liao 已提交
796 797
      if (updateLastKey) { // update the last key
        item->lastKey = pPrimaryColumn[startPos + (num - 1)] + step;
798 799
      }
    } else {
800
      num = pDataBlockInfo->rows - startPos;
801
      if (updateLastKey) {
H
hjxilinx 已提交
802
        item->lastKey = pDataBlockInfo->window.ekey + step;
803 804 805 806
      }
    }
  } else {  // desc
    if (ekey > pDataBlockInfo->window.skey) {
807
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
H
Haojun Liao 已提交
808 809
      if (updateLastKey) {  // update the last key
        item->lastKey = pPrimaryColumn[startPos - (num - 1)] + step;
810 811 812 813
      }
    } else {
      num = startPos + 1;
      if (updateLastKey) {
H
hjxilinx 已提交
814
        item->lastKey = pDataBlockInfo->window.skey + step;
815 816 817
      }
    }
  }
818

H
Haojun Liao 已提交
819
  assert(num > 0);
820 821 822
  return num;
}

H
Haojun Liao 已提交
823 824
static void doBlockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, bool closed, STimeWindow *pWin, int32_t offset,
                                      int32_t forwardStep, TSKEY *tsCol, int32_t numOfTotal) {
825 826
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
827

H
Haojun Liao 已提交
828 829
  bool hasPrev = pCtx[0].preAggVals.isSet;

830
  if (IS_MASTER_SCAN(pRuntimeEnv) || closed) {
831 832 833 834
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
      pCtx[k].nStartQueryTimestamp = pWin->skey;
      pCtx[k].size = forwardStep;
      pCtx[k].startOffset = (QUERY_IS_ASC_QUERY(pQuery)) ? offset : offset - (forwardStep - 1);
835

H
Haojun Liao 已提交
836
      int32_t functionId = pQuery->pExpr1[k].base.functionId;
837
      if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
H
Haojun Liao 已提交
838
        pCtx[k].ptsList = &tsCol[pCtx[k].startOffset];
839
      }
840

841
      // not a whole block involved in query processing, statistics data can not be used
H
Haojun Liao 已提交
842 843 844 845
      // NOTE: the original value of isSet have been changed here
      if (pCtx[k].preAggVals.isSet && forwardStep < numOfTotal) {
        pCtx[k].preAggVals.isSet = false;
      }
H
Haojun Liao 已提交
846

847 848 849
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunction(&pCtx[k]);
      }
H
Haojun Liao 已提交
850 851 852

      // restore it
      pCtx[k].preAggVals.isSet = hasPrev;
853 854 855 856
    }
  }
}

857
static void doRowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, bool closed, STimeWindow *pWin, int32_t offset) {
858 859
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
860

861
  if (IS_MASTER_SCAN(pRuntimeEnv) || closed) {
862 863
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
      pCtx[k].nStartQueryTimestamp = pWin->skey;
864

H
Haojun Liao 已提交
865
      int32_t functionId = pQuery->pExpr1[k].base.functionId;
866 867 868
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunctionF(&pCtx[k], offset);
      }
869 870 871 872
    }
  }
}

H
Haojun Liao 已提交
873 874
static int32_t getNextQualifiedWindow(SQueryRuntimeEnv *pRuntimeEnv, STimeWindow *pNext, SDataBlockInfo *pDataBlockInfo,
    TSKEY *primaryKeys, __block_search_fn_t searchFn, int32_t prevPosition) {
875
  SQuery *pQuery = pRuntimeEnv->pQuery;
876

H
Haojun Liao 已提交
877
  getNextTimeWindow(pQuery, pNext);
878

H
Haojun Liao 已提交
879
  // next time window is not in current block
H
Haojun Liao 已提交
880 881
  if ((pNext->skey > pDataBlockInfo->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (pNext->ekey < pDataBlockInfo->window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
H
Haojun Liao 已提交
882 883
    return -1;
  }
884

H
Haojun Liao 已提交
885 886
  TSKEY startKey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
887
    startKey = pNext->skey;
H
Haojun Liao 已提交
888 889
    if (startKey < pQuery->window.skey) {
      startKey = pQuery->window.skey;
890
    }
H
Haojun Liao 已提交
891
  } else {
H
Haojun Liao 已提交
892
    startKey = pNext->ekey;
H
Haojun Liao 已提交
893 894
    if (startKey > pQuery->window.skey) {
      startKey = pQuery->window.skey;
895
    }
H
Haojun Liao 已提交
896
  }
897

H
Haojun Liao 已提交
898
  int32_t startPos = 0;
H
Haojun Liao 已提交
899

H
Haojun Liao 已提交
900
  // tumbling time window query, a special case of sliding time window query
901
  if (pQuery->interval.sliding == pQuery->interval.interval && prevPosition != -1) {
H
Haojun Liao 已提交
902 903 904
    int32_t factor = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
    startPos = prevPosition + factor;
  } else {
H
Haojun Liao 已提交
905
    if (startKey <= pDataBlockInfo->window.skey && QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
906
      startPos = 0;
H
Haojun Liao 已提交
907
    } else if (startKey >= pDataBlockInfo->window.ekey && !QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
908 909 910 911
      startPos = pDataBlockInfo->rows - 1;
    } else {
      startPos = searchFn((char *)primaryKeys, pDataBlockInfo->rows, startKey, pQuery->order.order);
    }
H
Haojun Liao 已提交
912
  }
913

H
Haojun Liao 已提交
914 915 916 917
  /*
   * This time window does not cover any data, try next time window,
   * this case may happen when the time window is too small
   */
H
Haojun Liao 已提交
918 919 920
  if (primaryKeys == NULL) {
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      assert(pDataBlockInfo->window.skey <= pNext->ekey);
921
    } else {
H
Haojun Liao 已提交
922
      assert(pDataBlockInfo->window.ekey >= pNext->skey);
923
    }
H
Haojun Liao 已提交
924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942
  } else {
    if (QUERY_IS_ASC_QUERY(pQuery) && primaryKeys[startPos] > pNext->ekey) {
      TSKEY next = primaryKeys[startPos];
      if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
        pNext->skey = taosTimeTruncate(next, &pQuery->interval, pQuery->precision);
        pNext->ekey = taosTimeAdd(pNext->skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
      } else {
        pNext->ekey += ((next - pNext->ekey + pQuery->interval.sliding - 1)/pQuery->interval.sliding) * pQuery->interval.sliding;
        pNext->skey = pNext->ekey - pQuery->interval.interval + 1;
      }
    } else if ((!QUERY_IS_ASC_QUERY(pQuery)) && primaryKeys[startPos] < pNext->skey) {
      TSKEY next = primaryKeys[startPos];
      if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
        pNext->skey = taosTimeTruncate(next, &pQuery->interval, pQuery->precision);
        pNext->ekey = taosTimeAdd(pNext->skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
      } else {
        pNext->skey -= ((pNext->skey - next + pQuery->interval.sliding - 1) / pQuery->interval.sliding) * pQuery->interval.sliding;
        pNext->ekey = pNext->skey + pQuery->interval.interval - 1;
      }
943
    }
944
  }
945

H
Haojun Liao 已提交
946
  return startPos;
947 948
}

H
Haojun Liao 已提交
949
static FORCE_INLINE TSKEY reviseWindowEkey(SQuery *pQuery, STimeWindow *pWindow) {
950 951 952 953 954 955 956 957 958 959 960 961
  TSKEY ekey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    ekey = pWindow->ekey;
    if (ekey > pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  } else {
    ekey = pWindow->skey;
    if (ekey < pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  }
962

963 964 965
  return ekey;
}

H
hjxilinx 已提交
966 967
//todo binary search
static void* getDataBlockImpl(SArray* pDataBlock, int32_t colId) {
S
TD-1057  
Shengliang Guan 已提交
968
  int32_t numOfCols = (int32_t)taosArrayGetSize(pDataBlock);
969

H
hjxilinx 已提交
970 971 972 973 974 975
  for (int32_t i = 0; i < numOfCols; ++i) {
    SColumnInfoData *p = taosArrayGet(pDataBlock, i);
    if (colId == p->info.colId) {
      return p->pData;
    }
  }
976

H
hjxilinx 已提交
977 978 979 980
  return NULL;
}

static char *getDataBlock(SQueryRuntimeEnv *pRuntimeEnv, SArithmeticSupport *sas, int32_t col, int32_t size,
981
                    SArray *pDataBlock) {
dengyihao's avatar
dengyihao 已提交
982 983 984
  if (pDataBlock == NULL) {
    return NULL;
  }
985

H
Haojun Liao 已提交
986
  char *dataBlock = NULL;
H
Haojun Liao 已提交
987
  SQuery *pQuery = pRuntimeEnv->pQuery;
988

H
Haojun Liao 已提交
989
  int32_t functionId = pQuery->pExpr1[col].base.functionId;
990
  if (functionId == TSDB_FUNC_ARITHM) {
H
Haojun Liao 已提交
991
    sas->pArithExpr = &pQuery->pExpr1[col];
992

H
Haojun Liao 已提交
993
    sas->offset    = (QUERY_IS_ASC_QUERY(pQuery))? pQuery->pos : pQuery->pos - (size - 1);
H
Haojun Liao 已提交
994
    sas->colList   = pQuery->colList;
995
    sas->numOfCols = pQuery->numOfCols;
H
Haojun Liao 已提交
996
    sas->data      = calloc(pQuery->numOfCols, POINTER_BYTES);
997

H
Haojun Liao 已提交
998 999 1000 1001
    if (sas->data == NULL) {
      longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
    }

1002
    // here the pQuery->colList and sas->colList are identical
S
TD-1057  
Shengliang Guan 已提交
1003
    int32_t numOfCols = (int32_t)taosArrayGetSize(pDataBlock);
1004
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
1005
      SColumnInfo *pColMsg = &pQuery->colList[i];
1006

1007 1008 1009 1010 1011 1012 1013 1014
      dataBlock = NULL;
      for (int32_t k = 0; k < numOfCols; ++k) {  //todo refactor
        SColumnInfoData *p = taosArrayGet(pDataBlock, k);
        if (pColMsg->colId == p->info.colId) {
          dataBlock = p->pData;
          break;
        }
      }
1015

1016
      assert(dataBlock != NULL);
1017
      sas->data[i] = dataBlock;  // start from the offset
1018
    }
1019

1020
  } else {  // other type of query function
H
Haojun Liao 已提交
1021
    SColIndex *pCol = &pQuery->pExpr1[col].base.colInfo;
H
Haojun Liao 已提交
1022
    if (TSDB_COL_IS_NORMAL_COL(pCol->flag)) {
H
Haojun Liao 已提交
1023
      SColIndex* pColIndex = &pQuery->pExpr1[col].base.colInfo;
H
Haojun Liao 已提交
1024 1025 1026 1027
      SColumnInfoData *p = taosArrayGet(pDataBlock, pColIndex->colIndex);
      assert(p->info.colId == pColIndex->colId);

      dataBlock = p->pData;
H
Haojun Liao 已提交
1028 1029
    } else {
      dataBlock = NULL;
1030 1031
    }
  }
1032

1033 1034 1035
  return dataBlock;
}

H
Haojun Liao 已提交
1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047
static void setNotInterpoWindowKey(SQLFunctionCtx* pCtx, int32_t numOfOutput, int32_t type) {
  if (type == RESULT_ROW_START_INTERP) {
    for (int32_t k = 0; k < numOfOutput; ++k) {
      pCtx[k].start.key = INT64_MIN;
    }
  } else {
    for (int32_t k = 0; k < numOfOutput; ++k) {
      pCtx[k].end.key = INT64_MIN;
    }
  }
}

1048
// window start key interpolation
H
Haojun Liao 已提交
1049
static bool setTimeWindowInterpolationStartTs(SQueryRuntimeEnv* pRuntimeEnv, int32_t pos, int32_t numOfRows, SArray* pDataBlock, TSKEY* tsCols, STimeWindow* win) {
1050 1051
  SQuery* pQuery = pRuntimeEnv->pQuery;

H
Haojun Liao 已提交
1052
  TSKEY curTs  = tsCols[pos];
1053 1054
  TSKEY lastTs = *(TSKEY *) pRuntimeEnv->prevRow[0];

H
Haojun Liao 已提交
1055 1056 1057 1058
  // lastTs == INT64_MIN and pos == 0 means this is the first time window, interpolation is not needed.
  // start exactly from this point, no need to do interpolation
  TSKEY key = QUERY_IS_ASC_QUERY(pQuery)? win->skey:win->ekey;
  if (key == curTs) {
H
Haojun Liao 已提交
1059
    setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_START_INTERP);
1060
    return true;
H
Haojun Liao 已提交
1061
  }
1062

H
Haojun Liao 已提交
1063
  if (lastTs == INT64_MIN && ((pos == 0 && QUERY_IS_ASC_QUERY(pQuery)) || (pos == (numOfRows - 1) && !QUERY_IS_ASC_QUERY(pQuery)))) {
H
Haojun Liao 已提交
1064
    setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_START_INTERP);
H
Haojun Liao 已提交
1065
    return true;
1066 1067
  }

H
Haojun Liao 已提交
1068 1069 1070
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
  TSKEY   prevTs = ((pos == 0 && QUERY_IS_ASC_QUERY(pQuery)) || (pos == (numOfRows - 1) && !QUERY_IS_ASC_QUERY(pQuery)))?
      lastTs:tsCols[pos - step];
1071

H
Haojun Liao 已提交
1072 1073 1074
  doRowwiseTimeWindowInterpolation(pRuntimeEnv, pDataBlock, prevTs, pos - step, curTs, pos, key, RESULT_ROW_START_INTERP);
  return true;
}
1075

H
Haojun Liao 已提交
1076 1077 1078
static bool setTimeWindowInterpolationEndTs(SQueryRuntimeEnv* pRuntimeEnv, int32_t endRowIndex, SArray* pDataBlock, TSKEY* tsCols, TSKEY blockEkey, STimeWindow* win) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
  TSKEY   actualEndKey = tsCols[endRowIndex];
1079

H
Haojun Liao 已提交
1080
  TSKEY key = QUERY_IS_ASC_QUERY(pQuery)? win->ekey:win->skey;
1081

H
Haojun Liao 已提交
1082 1083
  // not ended in current data block, do not invoke interpolation
  if ((key > blockEkey && QUERY_IS_ASC_QUERY(pQuery)) || (key < blockEkey && !QUERY_IS_ASC_QUERY(pQuery))) {
H
Haojun Liao 已提交
1084
    setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_END_INTERP);
H
Haojun Liao 已提交
1085 1086
    return false;
  }
1087

H
Haojun Liao 已提交
1088 1089
  // there is actual end point of current time window, no interpolation need
  if (key == actualEndKey) {
H
Haojun Liao 已提交
1090
    setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_END_INTERP);
1091 1092
    return true;
  }
H
Haojun Liao 已提交
1093 1094 1095 1096 1097 1098 1099 1100

  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
  int32_t nextRowIndex = endRowIndex + step;
  assert(nextRowIndex >= 0);

  TSKEY nextKey = tsCols[nextRowIndex];
  doRowwiseTimeWindowInterpolation(pRuntimeEnv, pDataBlock, actualEndKey, endRowIndex, nextKey, nextRowIndex, key, RESULT_ROW_END_INTERP);
  return true;
1101 1102 1103 1104 1105 1106 1107 1108
}

static void saveDataBlockLastRow(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pDataBlockInfo, SArray* pDataBlock) {
  if (pDataBlock == NULL) {
    return;
  }

  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
1109
  int32_t rowIndex = QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->rows-1:0;
1110 1111
  for (int32_t k = 0; k < pQuery->numOfCols; ++k) {
    SColumnInfoData *pColInfo = taosArrayGet(pDataBlock, k);
H
Haojun Liao 已提交
1112
    memcpy(pRuntimeEnv->prevRow[k], ((char*)pColInfo->pData) + (pColInfo->info.bytes * rowIndex), pColInfo->info.bytes);
1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128
  }
}

static TSKEY getStartTsKey(SQuery* pQuery, SDataBlockInfo* pDataBlockInfo, TSKEY* tsCols, int32_t step) {
  TSKEY ts = TSKEY_INITIAL_VAL;

  if (tsCols == NULL) {
    ts = QUERY_IS_ASC_QUERY(pQuery) ? pDataBlockInfo->window.skey : pDataBlockInfo->window.ekey;
  } else {
    int32_t offset = GET_COL_DATA_POS(pQuery, 0, step);
    ts = tsCols[offset];
  }

  return ts;
}

1129
/**
H
Haojun Liao 已提交
1130
 * todo set the last value for pQueryTableInfo as in rowwiseapplyfunctions
1131 1132
 * @param pRuntimeEnv
 * @param forwardStep
1133
 * @param tsCols
1134 1135 1136 1137 1138
 * @param pFields
 * @param isDiskFileBlock
 * @return                  the incremental number of output value, so it maybe 0 for fixed number of query,
 *                          such as count/min/max etc.
 */
H
Haojun Liao 已提交
1139
static void blockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis, SDataBlockInfo *pDataBlockInfo,
H
Haojun Liao 已提交
1140
                                    SResultRowInfo *pWindowResInfo, __block_search_fn_t searchFn, SArray *pDataBlock) {
1141
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1142
  bool            masterScan = IS_MASTER_SCAN(pRuntimeEnv);
1143

1144 1145
  SQuery *pQuery = pRuntimeEnv->pQuery;
  TSKEY  *tsCols = NULL;
1146
  if (pDataBlock != NULL) {
1147
    SColumnInfoData *pColInfo = taosArrayGet(pDataBlock, 0);
1148
    tsCols = (TSKEY *)(pColInfo->pData);
1149
  }
1150

1151
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
H
Haojun Liao 已提交
1152 1153 1154
  if (sasArray == NULL) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }
1155

1156
  SQInfo *pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
1157
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
1158
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
H
Haojun Liao 已提交
1159
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k, pQInfo->vgId);
1160
  }
1161

1162
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
1163
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
H
Haojun Liao 已提交
1164 1165
    int32_t prevIndex = curTimeWindowIndex(pWindowResInfo);

1166
    TSKEY ts = getStartTsKey(pQuery, pDataBlockInfo, tsCols, step);
H
Haojun Liao 已提交
1167
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
1168

1169 1170 1171 1172
    bool hasTimeWindow  = false;
    SResultRow* pResult = NULL;
    int32_t ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo, &win, masterScan, &hasTimeWindow, &pResult);
    if (ret != TSDB_CODE_SUCCESS) {
S
TD-1848  
Shengliang Guan 已提交
1173
      tfree(sasArray);
H
hjxilinx 已提交
1174
      return;
1175
    }
1176

H
Haojun Liao 已提交
1177 1178 1179
    int32_t forwardStep = 0;
    int32_t startPos = pQuery->pos;

H
Haojun Liao 已提交
1180
    // in case of repeat scan/reverse scan, no new time window added.
1181
    if (hasTimeWindow) {
H
Haojun Liao 已提交
1182
      TSKEY ekey = reviseWindowEkey(pQuery, &win);
H
Haojun Liao 已提交
1183
      forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, pQuery->pos, ekey, searchFn, true);
1184

H
Haojun Liao 已提交
1185 1186
      // prev time window not interpolation yet.
      int32_t curIndex = curTimeWindowIndex(pWindowResInfo);
H
Haojun Liao 已提交
1187
      if (prevIndex != -1 && prevIndex < curIndex && pRuntimeEnv->timeWindowInterpo) {
H
Haojun Liao 已提交
1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208
        for(int32_t j = prevIndex; j < curIndex; ++j) {
          SResultRow *pRes = pWindowResInfo->pResult[j];

          STimeWindow w = pRes->win;
          ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo, &w, masterScan, &hasTimeWindow, &pResult);
          assert(ret == TSDB_CODE_SUCCESS && !resultRowInterpolated(pResult, RESULT_ROW_END_INTERP));

          int32_t p = QUERY_IS_ASC_QUERY(pQuery)? 0:pDataBlockInfo->rows-1;
          doRowwiseTimeWindowInterpolation(pRuntimeEnv, pDataBlock, *(TSKEY*) pRuntimeEnv->prevRow[0], -1,  tsCols[0], p, w.ekey, RESULT_ROW_END_INTERP);
          setResultRowInterpo(pResult, RESULT_ROW_END_INTERP);
          setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_START_INTERP);

          bool closed = getResultRowStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
          doBlockwiseApplyFunctions(pRuntimeEnv, closed, &w, startPos, 0, tsCols, pDataBlockInfo->rows);
        }

        // restore current time window
        ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo, &win, masterScan, &hasTimeWindow, &pResult);
        assert (ret == TSDB_CODE_SUCCESS);  // null data, too many state code
      }

1209 1210
      // window start key interpolation
      if (pRuntimeEnv->timeWindowInterpo) {
H
Haojun Liao 已提交
1211 1212 1213 1214
        bool done = resultRowInterpolated(pResult, RESULT_ROW_START_INTERP);
        if (!done) {
          int32_t startRowIndex = pQuery->pos;
          bool    interp = setTimeWindowInterpolationStartTs(pRuntimeEnv, startRowIndex, pDataBlockInfo->rows, pDataBlock, tsCols, &win);
1215 1216 1217
          if (interp) {
            setResultRowInterpo(pResult, RESULT_ROW_START_INTERP);
          }
H
Haojun Liao 已提交
1218 1219
        } else {
          setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_START_INTERP);
1220 1221
        }

H
Haojun Liao 已提交
1222 1223 1224 1225 1226 1227
        done = resultRowInterpolated(pResult, RESULT_ROW_END_INTERP);
        if (!done) {
          int32_t endRowIndex = pQuery->pos + (forwardStep - 1) * step;

          TSKEY endKey = QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->window.ekey:pDataBlockInfo->window.skey;
          bool  interp = setTimeWindowInterpolationEndTs(pRuntimeEnv, endRowIndex, pDataBlock, tsCols, endKey, &win);
1228 1229 1230
          if (interp) {
            setResultRowInterpo(pResult, RESULT_ROW_END_INTERP);
          }
H
Haojun Liao 已提交
1231 1232
        } else {
          setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_END_INTERP);
1233 1234 1235 1236
        }
      }

      bool pStatus = getResultRowStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
H
Haojun Liao 已提交
1237
      doBlockwiseApplyFunctions(pRuntimeEnv, pStatus, &win, startPos, forwardStep, tsCols, pDataBlockInfo->rows);
1238
    }
1239

1240 1241
    int32_t     index = pWindowResInfo->curIndex;
    STimeWindow nextWin = win;
1242

1243
    while (1) {
H
Haojun Liao 已提交
1244 1245
      int32_t prevEndPos = (forwardStep - 1) * step + startPos;
      startPos = getNextQualifiedWindow(pRuntimeEnv, &nextWin, pDataBlockInfo, tsCols, searchFn, prevEndPos);
1246 1247 1248
      if (startPos < 0) {
        break;
      }
1249

1250
      // null data, failed to allocate more memory buffer
H
Haojun Liao 已提交
1251
      hasTimeWindow = false;
1252 1253
      if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo, &nextWin, masterScan, &hasTimeWindow, &pResult) !=
          TSDB_CODE_SUCCESS) {
1254 1255
        break;
      }
1256

1257 1258 1259 1260 1261
      if (!hasTimeWindow) {
        continue;
      }

      TSKEY ekey = reviseWindowEkey(pQuery, &nextWin);
H
Haojun Liao 已提交
1262
      forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, startPos, ekey, searchFn, true);
1263

1264 1265
      // window start(end) key interpolation
      if (pRuntimeEnv->timeWindowInterpo) {
H
Haojun Liao 已提交
1266 1267 1268 1269
        bool done = resultRowInterpolated(pResult, RESULT_ROW_START_INTERP);
        if (!done) {
          int32_t startRowIndex = startPos;
          bool    interp = setTimeWindowInterpolationStartTs(pRuntimeEnv, startRowIndex, pDataBlockInfo->rows, pDataBlock, tsCols, &nextWin);
1270 1271 1272
          if (interp) {
            setResultRowInterpo(pResult, RESULT_ROW_START_INTERP);
          }
H
Haojun Liao 已提交
1273 1274
        } else {
          setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_START_INTERP);
1275 1276
        }

H
Haojun Liao 已提交
1277 1278 1279 1280 1281
        done = resultRowInterpolated(pResult, RESULT_ROW_END_INTERP);
        if (!done) {
          int32_t endRowIndex = startPos + (forwardStep - 1)*step;
          TSKEY endKey = QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->window.ekey:pDataBlockInfo->window.skey;
          bool  interp = setTimeWindowInterpolationEndTs(pRuntimeEnv, endRowIndex, pDataBlock, tsCols, endKey, &nextWin);
1282 1283 1284
          if (interp) {
            setResultRowInterpo(pResult, RESULT_ROW_END_INTERP);
          }
H
Haojun Liao 已提交
1285 1286
        } else {
          setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_END_INTERP);
1287 1288 1289 1290
        }
      }

      bool closed = getResultRowStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
1291
      doBlockwiseApplyFunctions(pRuntimeEnv, closed, &nextWin, startPos, forwardStep, tsCols, pDataBlockInfo->rows);
1292
    }
1293

1294 1295 1296 1297 1298 1299 1300
    pWindowResInfo->curIndex = index;
  } else {
    /*
     * the sqlfunctionCtx parameters should be set done before all functions are invoked,
     * since the selectivity + tag_prj query needs all parameters been set done.
     * tag_prj function are changed to be TSDB_FUNC_TAG_DUMMY
     */
1301
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
Haojun Liao 已提交
1302
      int32_t functionId = pQuery->pExpr1[k].base.functionId;
1303
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
1304
        pCtx[k].nStartQueryTimestamp = pDataBlockInfo->window.skey;
1305 1306 1307 1308
        aAggs[functionId].xFunction(&pCtx[k]);
      }
    }
  }
1309

1310 1311 1312 1313 1314
  if (pRuntimeEnv->timeWindowInterpo) {
    saveDataBlockLastRow(pRuntimeEnv, pDataBlockInfo, pDataBlock);
  }

  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1315
    if (pQuery->pExpr1[i].base.functionId != TSDB_FUNC_ARITHM) {
1316 1317
      continue;
    }
1318

S
TD-1848  
Shengliang Guan 已提交
1319
    tfree(sasArray[i].data);
1320
  }
1321

S
TD-1848  
Shengliang Guan 已提交
1322
  tfree(sasArray);
1323 1324
}

1325
static int32_t setGroupResultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, char *pData, int16_t type, int16_t bytes, int32_t groupIndex) {
1326 1327 1328
  if (isNull(pData, type)) {  // ignore the null value
    return -1;
  }
1329

1330
  int32_t GROUPRESULTID = 1;
1331

1332
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
1333

H
Haojun Liao 已提交
1334
  // not assign result buffer yet, add new result buffer, TODO remove it
1335 1336 1337 1338 1339 1340 1341
  char* d = pData;
  int16_t len = bytes;
  if (type == TSDB_DATA_TYPE_BINARY||type == TSDB_DATA_TYPE_NCHAR) {
    d = varDataVal(pData);
    len = varDataLen(pData);
  } else if (type == TSDB_DATA_TYPE_FLOAT || type == TSDB_DATA_TYPE_DOUBLE) {
    SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
H
Haojun Liao 已提交
1342
    qError("QInfo:%p group by not supported on double/float columns, abort", pQInfo);
1343 1344 1345 1346

    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_APP_ERROR);
  }

H
Haojun Liao 已提交
1347
  uint64_t uid = groupIndex;
H
Haojun Liao 已提交
1348 1349
  SResultRow *pResultRow = doPrepareResultRowFromKey(pRuntimeEnv, &pRuntimeEnv->windowResInfo, d, len, true, uid);
  if (pResultRow == NULL) {
1350 1351 1352 1353
    return -1;
  }

  int64_t v = -1;
1354 1355 1356 1357 1358 1359 1360 1361
  switch(type) {
    case TSDB_DATA_TYPE_BOOL:
    case TSDB_DATA_TYPE_TINYINT:  v = GET_INT8_VAL(pData);  break;
    case TSDB_DATA_TYPE_SMALLINT: v = GET_INT16_VAL(pData); break;
    case TSDB_DATA_TYPE_INT:      v = GET_INT32_VAL(pData); break;
    case TSDB_DATA_TYPE_BIGINT:   v = GET_INT64_VAL(pData); break;
  }

1362
  if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
1363 1364 1365 1366 1367 1368
    if (pResultRow->key == NULL) {
      pResultRow->key = malloc(varDataTLen(pData));
      varDataCopy(pResultRow->key, pData);
    } else {
      assert(memcmp(pResultRow->key, pData, varDataTLen(pData)) == 0);
    }
1369
  } else {
H
Haojun Liao 已提交
1370 1371
    pResultRow->win.skey = v;
    pResultRow->win.ekey = v;
1372
  }
1373

H
Haojun Liao 已提交
1374 1375
  if (pResultRow->pageId == -1) {
    int32_t ret = addNewWindowResultBuf(pResultRow, pResultBuf, GROUPRESULTID, pRuntimeEnv->numOfRowsPerPage);
1376 1377 1378 1379
    if (ret != 0) {
      return -1;
    }
  }
1380

H
Haojun Liao 已提交
1381
  setResultOutputBuf(pRuntimeEnv, pResultRow);
1382 1383 1384 1385
  initCtxOutputBuf(pRuntimeEnv);
  return TSDB_CODE_SUCCESS;
}

1386
static char *getGroupbyColumnData(SQuery *pQuery, int16_t *type, int16_t *bytes, SArray* pDataBlock) {
1387
  SSqlGroupbyExpr *pGroupbyExpr = pQuery->pGroupbyExpr;
1388

1389
  for (int32_t k = 0; k < pGroupbyExpr->numOfGroupCols; ++k) {
1390
    SColIndex* pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, k);
Y
TD-1230  
yihaoDeng 已提交
1391
    if (TSDB_COL_IS_TAG(pColIndex->flag)) {
1392 1393
      continue;
    }
1394

1395
    int16_t colIndex = -1;
1396
    int32_t colId = pColIndex->colId;
1397

1398
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
1399
      if (pQuery->colList[i].colId == colId) {
1400 1401 1402 1403
        colIndex = i;
        break;
      }
    }
1404

1405
    assert(colIndex >= 0 && colIndex < pQuery->numOfCols);
1406

1407 1408
    *type = pQuery->colList[colIndex].type;
    *bytes = pQuery->colList[colIndex].bytes;
1409
    /*
1410 1411 1412
     *  the colIndex is acquired from the first tables of all qualified tables in this vnode during query prepare
     * stage, the remain tables may not have the required column in cache actually. So, the validation of required
     * column in cache with the corresponding schema is reinforced.
1413
     */
S
TD-1057  
Shengliang Guan 已提交
1414
    int32_t numOfCols = (int32_t)taosArrayGetSize(pDataBlock);
1415

1416 1417 1418 1419 1420 1421
    for (int32_t i = 0; i < numOfCols; ++i) {
      SColumnInfoData *p = taosArrayGet(pDataBlock, i);
      if (pColIndex->colId == p->info.colId) {
        return p->pData;
      }
    }
1422
  }
1423

1424
  return NULL;
1425 1426 1427 1428
}

static int32_t doTSJoinFilter(SQueryRuntimeEnv *pRuntimeEnv, int32_t offset) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
1429

1430 1431
  STSElem         elem = tsBufGetElem(pRuntimeEnv->pTSBuf);
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1432

1433
  // compare tag first
H
Haojun Liao 已提交
1434
  if (tVariantCompare(&pCtx[0].tag, elem.tag) != 0) {
1435 1436
    return TS_JOIN_TAG_NOT_EQUALS;
  }
1437

S
TD-1057  
Shengliang Guan 已提交
1438
  TSKEY key = *(TSKEY *)((char*)pCtx[0].aInputElemBuf + TSDB_KEYSIZE * offset);
1439 1440

#if defined(_DEBUG_VIEW)
1441
  printf("elem in comp ts file:%" PRId64 ", key:%" PRId64 ", tag:%"PRIu64", query order:%d, ts order:%d, traverse:%d, index:%d\n",
H
Haojun Liao 已提交
1442
         elem.ts, key, elem.tag.i64Key, pQuery->order.order, pRuntimeEnv->pTSBuf->tsOrder,
1443 1444
         pRuntimeEnv->pTSBuf->cur.order, pRuntimeEnv->pTSBuf->cur.tsIndex);
#endif
1445

1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (key < elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key > elem.ts) {
      assert(false);
    }
  } else {
    if (key > elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key < elem.ts) {
      assert(false);
    }
  }
1459

1460 1461 1462 1463
  return TS_JOIN_TS_EQUAL;
}

static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId) {
H
Haojun Liao 已提交
1464
  SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx);
H
hjxilinx 已提交
1465
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
1466 1467 1468 1469 1470

  // in case of timestamp column, always generated results.
  if (functionId == TSDB_FUNC_TS) {
    return true;
  }
1471

1472 1473 1474
  if (pResInfo->complete || functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TS_DUMMY) {
    return false;
  }
1475

1476
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_FIRST) {
H
hjxilinx 已提交
1477 1478
    return QUERY_IS_ASC_QUERY(pQuery);
  }
1479

H
Haojun Liao 已提交
1480
  // denote the order type
1481 1482 1483 1484
  if ((functionId == TSDB_FUNC_LAST_DST || functionId == TSDB_FUNC_LAST)) {
    return pCtx->param[0].i64Key == pQuery->order.order;
  }

1485
  // in the supplementary scan, only the following functions need to be executed
H
Haojun Liao 已提交
1486
  if (IS_REVERSE_SCAN(pRuntimeEnv)) {
1487 1488
    return false;
  }
1489

1490 1491 1492
  return true;
}

H
Haojun Liao 已提交
1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531
void doRowwiseTimeWindowInterpolation(SQueryRuntimeEnv* pRuntimeEnv, SArray* pDataBlock, TSKEY prevTs, int32_t prevRowIndex, TSKEY curTs, int32_t curRowIndex, TSKEY windowKey,  int32_t type) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
    int32_t functionId = pQuery->pExpr1[k].base.functionId;
    if (functionId != TSDB_FUNC_TWA) {
      pRuntimeEnv->pCtx[k].start.key = INT64_MIN;
      continue;
    }

    SColIndex* pColIndex = &pQuery->pExpr1[k].base.colInfo;
    int16_t index = pColIndex->colIndex;
    SColumnInfoData* pColInfo = taosArrayGet(pDataBlock, index);

    assert(pColInfo->info.colId == pColIndex->colId && curTs != windowKey);
    double v1 = 0, v2 = 0, v = 0;

    if (prevRowIndex == -1) {
      GET_TYPED_DATA(v1, double, pColInfo->info.type, (char *)pRuntimeEnv->prevRow[k]);
    } else {
      GET_TYPED_DATA(v1, double, pColInfo->info.type, (char *)pColInfo->pData + prevRowIndex * pColInfo->info.bytes);
    }

    GET_TYPED_DATA(v2, double, pColInfo->info.type, (char *)pColInfo->pData + curRowIndex * pColInfo->info.bytes);

    SPoint point1 = (SPoint){.key = prevTs, .val = &v1};
    SPoint point2 = (SPoint){.key = curTs, .val = &v2};
    SPoint point  = (SPoint){.key = windowKey, .val = &v};
    taosGetLinearInterpolationVal(TSDB_DATA_TYPE_DOUBLE, &point1, &point2, &point);

    if (type == RESULT_ROW_START_INTERP) {
      pRuntimeEnv->pCtx[k].start.key = point.key;
      pRuntimeEnv->pCtx[k].start.val = v;
    } else {
      pRuntimeEnv->pCtx[k].end.key = point.key;
      pRuntimeEnv->pCtx[k].end.val = v;
    }
  }
}

H
Haojun Liao 已提交
1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568
static void setTimeWindowSKeyInterp(SQueryRuntimeEnv* pRuntimeEnv, SArray* pDataBlock, TSKEY prevTs, int32_t prevRowIndex, TSKEY ts, int32_t offset, SResultRow* pResult, STimeWindow* win) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  bool done = resultRowInterpolated(pResult, RESULT_ROW_START_INTERP);
  if (!done) {
    TSKEY key = QUERY_IS_ASC_QUERY(pQuery)? win->skey:win->ekey;
    if (key == ts) {
      setResultRowInterpo(pResult, RESULT_ROW_START_INTERP);
    } else if (prevTs != INT64_MIN && ((QUERY_IS_ASC_QUERY(pQuery) && prevTs < key) || (!QUERY_IS_ASC_QUERY(pQuery) && prevTs > key))) {
      doRowwiseTimeWindowInterpolation(pRuntimeEnv, pDataBlock, prevTs, prevRowIndex, ts, offset, key, RESULT_ROW_START_INTERP);
      setResultRowInterpo(pResult, RESULT_ROW_START_INTERP);
    } else {
      setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_START_INTERP);
    }

    setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_END_INTERP);
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
      pRuntimeEnv->pCtx[k].size = 1;
    }
  } else {
    setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_START_INTERP);
  }
}

static void setTimeWindowEKeyInterp(SQueryRuntimeEnv* pRuntimeEnv, SArray* pDataBlock, TSKEY prevTs, int32_t prevRowIndex, TSKEY ts, int32_t offset, SResultRow* pResult, STimeWindow* win) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  TSKEY key = QUERY_IS_ASC_QUERY(pQuery)? win->ekey:win->skey;
  doRowwiseTimeWindowInterpolation(pRuntimeEnv, pDataBlock, prevTs, prevRowIndex, ts, offset, key, RESULT_ROW_END_INTERP);
  setResultRowInterpo(pResult, RESULT_ROW_END_INTERP);

  setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_START_INTERP);
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    pRuntimeEnv->pCtx[i].size = 0;
  }
}

1569
static void rowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis, SDataBlockInfo *pDataBlockInfo,
H
Haojun Liao 已提交
1570
    SResultRowInfo *pWindowResInfo, SArray *pDataBlock) {
1571
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1572
  bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);
1573

1574
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1575
  STableQueryInfo* item = pQuery->current;
H
Haojun Liao 已提交
1576 1577 1578 1579

  SColumnInfoData* pColumnInfoData = (SColumnInfoData *)taosArrayGet(pDataBlock, 0);

  TSKEY  *tsCols = (pColumnInfoData->info.type == TSDB_DATA_TYPE_TIMESTAMP)? (TSKEY*) pColumnInfoData->pData:NULL;
H
Haojun Liao 已提交
1580 1581
  bool    groupbyColumnValue = pRuntimeEnv->groupbyNormalCol;

1582
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
H
Haojun Liao 已提交
1583 1584 1585
  if (sasArray == NULL) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }
1586

1587 1588
  int16_t type = 0;
  int16_t bytes = 0;
1589

1590
  char *groupbyColumnData = NULL;
H
Haojun Liao 已提交
1591
  if (groupbyColumnValue) {
1592
    groupbyColumnData = getGroupbyColumnData(pQuery, &type, &bytes, pDataBlock);
1593
  }
1594

H
Haojun Liao 已提交
1595
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
1596
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
1597
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
H
Haojun Liao 已提交
1598
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k, pQInfo->vgId);
H
Haojun Liao 已提交
1599
    pCtx[k].size = 1;
1600
  }
1601

1602 1603
  // set the input column data
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
1604
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
H
hjxilinx 已提交
1605 1606
    pFilterInfo->pData = getDataBlockImpl(pDataBlock, pFilterInfo->info.colId);
    assert(pFilterInfo->pData != NULL);
1607
  }
1608

1609
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1610

1611 1612 1613
  // from top to bottom in desc
  // from bottom to top in asc order
  if (pRuntimeEnv->pTSBuf != NULL) {
1614
    qDebug("QInfo:%p process data rows, numOfRows:%d, query order:%d, ts comp order:%d", pQInfo, pDataBlockInfo->rows,
1615 1616
           pQuery->order.order, pRuntimeEnv->pTSBuf->cur.order);
  }
1617

H
hjxilinx 已提交
1618
  int32_t offset = -1;
H
Haojun Liao 已提交
1619
  TSKEY   prevTs = *(TSKEY*) pRuntimeEnv->prevRow[0];
H
Haojun Liao 已提交
1620
  int32_t prevRowIndex = -1;
1621

1622
  for (int32_t j = 0; j < pDataBlockInfo->rows; ++j) {
H
hjxilinx 已提交
1623
    offset = GET_COL_DATA_POS(pQuery, j, step);
1624

1625
    if (pRuntimeEnv->pTSBuf != NULL) {
1626 1627
      int32_t ret = doTSJoinFilter(pRuntimeEnv, offset);
      if (ret == TS_JOIN_TAG_NOT_EQUALS) {
1628
        break;
1629
      } else if (ret == TS_JOIN_TS_NOT_EQUALS) {
1630 1631
        continue;
      } else {
1632
        assert(ret == TS_JOIN_TS_EQUAL);
1633 1634
      }
    }
1635

1636
    if (pQuery->numOfFilterCols > 0 && (!doFilterData(pQuery, offset))) {
1637 1638
      continue;
    }
1639

1640
    // interval window query, decide the time window according to the primary timestamp
H
Haojun Liao 已提交
1641
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
H
Haojun Liao 已提交
1642
      int32_t prevWindowIndex = curTimeWindowIndex(pWindowResInfo);
H
Haojun Liao 已提交
1643
      int64_t ts  = tsCols[offset];
H
Haojun Liao 已提交
1644

1645
      STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
1646

1647 1648 1649
      bool hasTimeWindow  = false;
      SResultRow* pResult = NULL;
      int32_t ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo, &win, masterScan, &hasTimeWindow, &pResult);
1650 1651 1652
      if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
        continue;
      }
1653

1654 1655 1656
      if (!hasTimeWindow) {
        continue;
      }
H
Haojun Liao 已提交
1657

1658 1659
      // window start key interpolation
      if (pRuntimeEnv->timeWindowInterpo) {
H
Haojun Liao 已提交
1660 1661 1662 1663 1664 1665
        // check for the time window end time interpolation
        int32_t curIndex = curTimeWindowIndex(pWindowResInfo);
        if (prevWindowIndex != -1 && prevWindowIndex < curIndex) {
          for (int32_t k = prevWindowIndex; k < curIndex; ++k) {
            SResultRow *pRes = pWindowResInfo->pResult[k];

H
Haojun Liao 已提交
1666 1667
            ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo, &pRes->win, masterScan, &hasTimeWindow, &pResult);
            assert(ret == TSDB_CODE_SUCCESS && !resultRowInterpolated(pResult, RESULT_ROW_END_INTERP));
H
Haojun Liao 已提交
1668

H
Haojun Liao 已提交
1669
            setTimeWindowEKeyInterp(pRuntimeEnv, pDataBlock, prevTs, prevRowIndex, ts, offset, pResult, &pRes->win);
H
Haojun Liao 已提交
1670 1671

            bool closed = getResultRowStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
H
Haojun Liao 已提交
1672
            doRowwiseApplyFunctions(pRuntimeEnv, closed, &pRes->win, offset);
H
Haojun Liao 已提交
1673 1674 1675 1676 1677 1678 1679
          }

          // restore current time window
          ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo, &win, masterScan, &hasTimeWindow,
                                        &pResult);
          if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
            continue;
1680 1681
          }
        }
1682

H
Haojun Liao 已提交
1683
        setTimeWindowSKeyInterp(pRuntimeEnv, pDataBlock, prevTs, prevRowIndex, ts, offset, pResult, &win);
1684
      }
H
Haojun Liao 已提交
1685

1686
      bool closed = getResultRowStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
1687
      doRowwiseApplyFunctions(pRuntimeEnv, closed, &win, offset);
1688

1689 1690
      STimeWindow nextWin = win;
      int32_t     index = pWindowResInfo->curIndex;
1691

1692
      while (1) {
H
Haojun Liao 已提交
1693
        getNextTimeWindow(pQuery, &nextWin);
1694
        if ((nextWin.skey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
H
Haojun Liao 已提交
1695
            (nextWin.ekey < pQuery->window.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
1696 1697
          break;
        }
1698

1699 1700 1701
        if (ts < nextWin.skey || ts > nextWin.ekey) {
          break;
        }
1702

1703
        // null data, failed to allocate more memory buffer
H
Haojun Liao 已提交
1704
        hasTimeWindow = false;
1705
        if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo, &nextWin, masterScan, &hasTimeWindow, &pResult) != TSDB_CODE_SUCCESS) {
1706 1707
          break;
        }
1708

1709
        if (hasTimeWindow) {
H
Haojun Liao 已提交
1710
          setTimeWindowSKeyInterp(pRuntimeEnv, pDataBlock, prevTs, prevRowIndex, ts, offset, pResult, &nextWin);
1711
          closed = getResultRowStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
1712
          doRowwiseApplyFunctions(pRuntimeEnv, closed, &nextWin, offset);
1713
        }
1714
      }
1715

1716 1717 1718
      pWindowResInfo->curIndex = index;
    } else {  // other queries
      // decide which group this rows belongs to according to current state value
H
Haojun Liao 已提交
1719
      if (groupbyColumnValue) {
H
hjxilinx 已提交
1720
        char *val = groupbyColumnData + bytes * offset;
1721

1722
        int32_t ret = setGroupResultOutputBuf(pRuntimeEnv, val, type, bytes, item->groupIndex);
1723 1724 1725 1726
        if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
          continue;
        }
      }
1727

1728
      for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
Haojun Liao 已提交
1729
        int32_t functionId = pQuery->pExpr1[k].base.functionId;
1730 1731 1732 1733 1734
        if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
          aAggs[functionId].xFunctionF(&pCtx[k], offset);
        }
      }
    }
1735

H
Haojun Liao 已提交
1736 1737
    prevTs = tsCols[offset];
    prevRowIndex = offset;
1738

1739 1740 1741
    if (pRuntimeEnv->pTSBuf != NULL) {
      // if timestamp filter list is empty, quit current query
      if (!tsBufNextPos(pRuntimeEnv->pTSBuf)) {
H
hjxilinx 已提交
1742
        setQueryStatus(pQuery, QUERY_COMPLETED);
1743 1744 1745 1746
        break;
      }
    }
  }
H
Haojun Liao 已提交
1747 1748 1749 1750 1751 1752 1753 1754

  assert(offset >= 0);
  if (tsCols != NULL) {
    item->lastKey = tsCols[offset] + step;
  } else {
    item->lastKey = (QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->window.ekey:pDataBlockInfo->window.skey) + step;
  }

H
Haojun Liao 已提交
1755 1756 1757
  if (pRuntimeEnv->pTSBuf != NULL) {
    item->cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);
  }
H
Haojun Liao 已提交
1758

1759 1760
  // todo refactor: extract method
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1761
    if (pQuery->pExpr1[i].base.functionId != TSDB_FUNC_ARITHM) {
1762 1763
      continue;
    }
1764

S
TD-1848  
Shengliang Guan 已提交
1765
    tfree(sasArray[i].data);
1766
  }
1767

1768 1769 1770 1771
  free(sasArray);
}

static int32_t tableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo,
H
hjxilinx 已提交
1772
                                          SDataStatis *pStatis, __block_search_fn_t searchFn, SArray *pDataBlock) {
H
hjxilinx 已提交
1773
  SQuery *pQuery = pRuntimeEnv->pQuery;
1774

H
hjxilinx 已提交
1775
  STableQueryInfo* pTableQInfo = pQuery->current;
H
Haojun Liao 已提交
1776
  SResultRowInfo*  pWindowResInfo = &pRuntimeEnv->windowResInfo;
1777

H
Haojun Liao 已提交
1778
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->groupbyNormalCol) {
1779
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
1780
  } else {
1781
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
1782
  }
1783

1784
  // update the lastkey of current table
1785
  TSKEY lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pDataBlockInfo->window.ekey : pDataBlockInfo->window.skey;
H
hjxilinx 已提交
1786
  pTableQInfo->lastKey = lastKey + GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1787

1788
  // interval query with limit applied
1789
  int32_t numOfRes = 0;
H
Haojun Liao 已提交
1790
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1791
    numOfRes = doCheckQueryCompleted(pRuntimeEnv, lastKey, pWindowResInfo);
H
Haojun Liao 已提交
1792 1793 1794 1795
  } else if (pRuntimeEnv->groupbyNormalCol) {
    closeAllTimeWindow(pWindowResInfo);
    numOfRes = pWindowResInfo->size;
  } else { // projection query
S
TD-1057  
Shengliang Guan 已提交
1796
    numOfRes = (int32_t)getNumOfResult(pRuntimeEnv);
1797

1798 1799 1800 1801
    // update the number of output result
    if (numOfRes > 0 && pQuery->checkBuffer == 1) {
      assert(numOfRes >= pQuery->rec.rows);
      pQuery->rec.rows = numOfRes;
1802

1803 1804 1805
      if (numOfRes >= pQuery->rec.threshold) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
      }
1806

1807 1808 1809
      if ((pQuery->limit.limit >= 0) && (pQuery->limit.limit + pQuery->limit.offset) <= numOfRes) {
        setQueryStatus(pQuery, QUERY_COMPLETED);
      }
1810 1811 1812 1813 1814

      if (((pTableQInfo->lastKey > pTableQInfo->win.ekey) && QUERY_IS_ASC_QUERY(pQuery)) ||
          ((pTableQInfo->lastKey < pTableQInfo->win.ekey) && (!QUERY_IS_ASC_QUERY(pQuery)))) {
        setQueryStatus(pQuery, QUERY_COMPLETED);
      }
H
Haojun Liao 已提交
1815
    }
1816
  }
1817

1818
  return numOfRes;
1819 1820
}

H
Haojun Liao 已提交
1821
void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
H
Haojun Liao 已提交
1822
                   SDataStatis *pStatis, void *param, int32_t colIndex, int32_t vgId) {
1823

H
Haojun Liao 已提交
1824 1825
  int32_t functionId = pQuery->pExpr1[colIndex].base.functionId;
  int32_t colId = pQuery->pExpr1[colIndex].base.colInfo.colId;
1826

1827
  SDataStatis *tpField = NULL;
H
Haojun Liao 已提交
1828
  pCtx->hasNull = hasNullValue(&pQuery->pExpr1[colIndex].base.colInfo, pStatis, &tpField);
1829
  pCtx->aInputElemBuf = inputData;
1830

1831
  if (tpField != NULL) {
H
Haojun Liao 已提交
1832
    pCtx->preAggVals.isSet  = true;
1833 1834
    pCtx->preAggVals.statis = *tpField;
    assert(pCtx->preAggVals.statis.numOfNull <= pBlockInfo->rows);
1835 1836 1837
  } else {
    pCtx->preAggVals.isSet = false;
  }
1838

H
Haojun Liao 已提交
1839 1840
  pCtx->preAggVals.dataBlockLoaded = (inputData != NULL);

H
Haojun Liao 已提交
1841 1842
  // limit/offset query will affect this value
  pCtx->size = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->rows - pQuery->pos : pQuery->pos + 1;
1843

H
Haojun Liao 已提交
1844
  // minimum value no matter ascending/descending order query
H
Haojun Liao 已提交
1845 1846
  pCtx->startOffset = QUERY_IS_ASC_QUERY(pQuery) ? pQuery->pos: (pQuery->pos - pCtx->size + 1);
  assert(pCtx->startOffset >= 0);
H
Haojun Liao 已提交
1847

1848 1849
  uint32_t status = aAggs[functionId].nStatus;
  if (((status & (TSDB_FUNCSTATE_SELECTIVITY | TSDB_FUNCSTATE_NEED_TS)) != 0) && (tsCol != NULL)) {
H
Haojun Liao 已提交
1850
    pCtx->ptsList = &tsCol[pCtx->startOffset];
1851
  }
1852

1853 1854 1855 1856 1857
  if (functionId >= TSDB_FUNC_FIRST_DST && functionId <= TSDB_FUNC_LAST_DST) {
    // last_dist or first_dist function
    // store the first&last timestamp into the intermediate buffer [1], the true
    // value may be null but timestamp will never be null
  } else if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_TWA ||
1858
             functionId == TSDB_FUNC_DIFF || (functionId >= TSDB_FUNC_RATE && functionId <= TSDB_FUNC_AVG_IRATE)) {
1859
    /*
H
Haojun Liao 已提交
1860
     * least squares function needs two columns of input, currently, the x value of linear equation is set to
H
Haojun Liao 已提交
1861
     * timestamp column, and the y-value is the column specified in pQuery->pExpr1[i].colIdxInBuffer
1862 1863 1864 1865 1866
     *
     * top/bottom function needs timestamp to indicate when the
     * top/bottom values emerge, so does diff function
     */
    if (functionId == TSDB_FUNC_TWA) {
1867 1868 1869 1870
       pCtx->param[1].i64Key = pQuery->window.skey;
       pCtx->param[1].nType = TSDB_DATA_TYPE_BIGINT;
       pCtx->param[2].i64Key = pQuery->window.ekey;
       pCtx->param[2].nType = TSDB_DATA_TYPE_BIGINT;
1871
    }
1872

1873 1874
  } else if (functionId == TSDB_FUNC_ARITHM) {
    pCtx->param[1].pz = param;
H
Haojun Liao 已提交
1875 1876 1877 1878 1879 1880
  } else if (functionId == TSDB_FUNC_SPREAD) {  // set the statistics data for primary time stamp column
    if (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
      pCtx->preAggVals.isSet  = true;
      pCtx->preAggVals.statis.min = pBlockInfo->window.skey;
      pCtx->preAggVals.statis.max = pBlockInfo->window.ekey;
    }
1881
  } else if (functionId == TSDB_FUNC_INTERP) {
H
Haojun Liao 已提交
1882 1883 1884
    SResultRowCellInfo* pInfo = GET_RES_INFO(pCtx);

    SInterpInfoDetail *pInterpInfo = (SInterpInfoDetail *)GET_ROWCELL_INTERBUF(pInfo);
S
TD-1057  
Shengliang Guan 已提交
1885
    pInterpInfo->type = (int8_t)pQuery->fillType;
1886 1887
    pInterpInfo->ts = pQuery->window.skey;
    pInterpInfo->primaryCol = (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX);
1888

1889 1890 1891 1892
    if (pQuery->fillVal != NULL) {
      if (isNull((const char*) &pQuery->fillVal[colIndex], pCtx->inputType)) {
        pCtx->param[1].nType = TSDB_DATA_TYPE_NULL;
      } else { // todo refactor, tVariantCreateFromBinary should handle the NULL value
H
Haojun Liao 已提交
1893 1894 1895
        if (pCtx->inputType != TSDB_DATA_TYPE_BINARY && pCtx->inputType != TSDB_DATA_TYPE_NCHAR) {
          tVariantCreateFromBinary(&pCtx->param[1], (char*) &pQuery->fillVal[colIndex], pCtx->inputBytes, pCtx->inputType);
        }
1896 1897
      }
    }
H
Haojun Liao 已提交
1898 1899 1900
  } else if (functionId == TSDB_FUNC_TS_COMP) {
    pCtx->param[0].i64Key = vgId;
    pCtx->param[0].nType = TSDB_DATA_TYPE_BIGINT;
1901
  }
1902

1903 1904 1905 1906 1907 1908
#if defined(_DEBUG_VIEW)
  //  int64_t *tsList = (int64_t *)primaryColumnData;
//  int64_t  s = tsList[0];
//  int64_t  e = tsList[size - 1];

//    if (IS_DATA_BLOCK_LOADED(blockStatus)) {
1909
//        qDebug("QInfo:%p query ts:%lld-%lld, offset:%d, rows:%d, bstatus:%d,
1910 1911 1912
//        functId:%d", GET_QINFO_ADDR(pQuery),
//               s, e, startOffset, size, blockStatus, functionId);
//    } else {
1913
//        qDebug("QInfo:%p block not loaded, bstatus:%d",
1914 1915 1916 1917 1918 1919
//        GET_QINFO_ADDR(pQuery), blockStatus);
//    }
#endif
}

// set the output buffer for the selectivity + tag query
H
Haojun Liao 已提交
1920
static int32_t setCtxTagColumnInfo(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx) {
H
Haojun Liao 已提交
1921 1922
  SQuery* pQuery = pRuntimeEnv->pQuery;

1923
  if (isSelectivityWithTagsQuery(pQuery)) {
1924
    int32_t num = 0;
1925
    int16_t tagLen = 0;
1926

1927
    SQLFunctionCtx *p = NULL;
1928
    SQLFunctionCtx **pTagCtx = calloc(pQuery->numOfOutput, POINTER_BYTES);
H
Haojun Liao 已提交
1929 1930 1931
    if (pTagCtx == NULL) {
      return TSDB_CODE_QRY_OUT_OF_MEMORY;
    }
H
Haojun Liao 已提交
1932

1933
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1934
      SSqlFuncMsg *pSqlFuncMsg = &pQuery->pExpr1[i].base;
1935

1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948
      if (pSqlFuncMsg->functionId == TSDB_FUNC_TAG_DUMMY || pSqlFuncMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        tagLen += pCtx[i].outputBytes;
        pTagCtx[num++] = &pCtx[i];
      } else if ((aAggs[pSqlFuncMsg->functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
        p = &pCtx[i];
      } else if (pSqlFuncMsg->functionId == TSDB_FUNC_TS || pSqlFuncMsg->functionId == TSDB_FUNC_TAG) {
        // tag function may be the group by tag column
        // ts may be the required primary timestamp column
        continue;
      } else {
        // the column may be the normal column, group by normal_column, the functionId is TSDB_FUNC_PRJ
      }
    }
dengyihao's avatar
dengyihao 已提交
1949 1950 1951 1952 1953
    if (p != NULL) {
      p->tagInfo.pTagCtxList = pTagCtx;
      p->tagInfo.numOfTagCols = num;
      p->tagInfo.tagsLen = tagLen;
    } else {
S
TD-1848  
Shengliang Guan 已提交
1954
      tfree(pTagCtx);
dengyihao's avatar
dengyihao 已提交
1955
    }
1956
  }
H
Haojun Liao 已提交
1957 1958

  return TSDB_CODE_SUCCESS;
1959 1960
}

1961
static int32_t setupQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv, int16_t order) {
1962
  qDebug("QInfo:%p setup runtime env", GET_QINFO_ADDR(pRuntimeEnv));
1963 1964
  SQuery *pQuery = pRuntimeEnv->pQuery;

1965
  pRuntimeEnv->pCtx = (SQLFunctionCtx *)calloc(pQuery->numOfOutput, sizeof(SQLFunctionCtx));
H
Haojun Liao 已提交
1966
  pRuntimeEnv->offset = calloc(pQuery->numOfOutput, sizeof(int16_t));
H
Haojun Liao 已提交
1967
  pRuntimeEnv->rowCellInfoOffset = calloc(pQuery->numOfOutput, sizeof(int32_t));
1968

H
Haojun Liao 已提交
1969
  if (pRuntimeEnv->offset == NULL || pRuntimeEnv->pCtx == NULL || pRuntimeEnv->rowCellInfoOffset == NULL) {
1970
    goto _clean;
1971
  }
1972

1973
  pRuntimeEnv->offset[0] = 0;
1974
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1975
    SSqlFuncMsg *pSqlFuncMsg = &pQuery->pExpr1[i].base;
1976

1977
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1978
    SColIndex* pIndex = &pSqlFuncMsg->colInfo;
1979

Y
TD-1230  
yihaoDeng 已提交
1980
    if (TSDB_COL_REQ_NULL(pIndex->flag)) {
1981 1982
      pCtx->requireNull = true;
      pIndex->flag &= ~(TSDB_COL_NULL);
Y
TD-1230  
yihaoDeng 已提交
1983
    } else {
1984 1985
      pCtx->requireNull = false;
    }
Y
TD-1230  
yihaoDeng 已提交
1986

1987 1988
    int32_t index = pSqlFuncMsg->colInfo.colIndex;
    if (TSDB_COL_IS_TAG(pIndex->flag)) {
1989
      if (pIndex->colId == TSDB_TBNAME_COLUMN_INDEX) {  // todo refactor
H
Haojun Liao 已提交
1990 1991 1992 1993
        SSchema s = tGetTableNameColumnSchema();

        pCtx->inputBytes = s.bytes;
        pCtx->inputType = s.type;
1994 1995 1996 1997
      } else {
        pCtx->inputBytes = pQuery->tagColList[index].bytes;
        pCtx->inputType = pQuery->tagColList[index].type;
      }
H
Haojun Liao 已提交
1998 1999 2000
    } else if (TSDB_COL_IS_UD_COL(pIndex->flag)) {
      pCtx->inputBytes = pSqlFuncMsg->arg[0].argBytes;
      pCtx->inputType = pSqlFuncMsg->arg[0].argType;
2001 2002 2003 2004
    } else {
      pCtx->inputBytes = pQuery->colList[index].bytes;
      pCtx->inputType = pQuery->colList[index].type;
    }
2005

2006
    assert(isValidDataType(pCtx->inputType));
2007
    pCtx->ptsOutputBuf = NULL;
2008

H
Haojun Liao 已提交
2009 2010
    pCtx->outputBytes  = pQuery->pExpr1[i].bytes;
    pCtx->outputType   = pQuery->pExpr1[i].type;
2011

H
Haojun Liao 已提交
2012 2013 2014
    pCtx->order        = pQuery->order.order;
    pCtx->functionId   = pSqlFuncMsg->functionId;
    pCtx->stableQuery  = pRuntimeEnv->stableQuery;
H
Haojun Liao 已提交
2015
    pCtx->interBufBytes = pQuery->pExpr1[i].interBytes;
2016 2017
    pCtx->start.key    = INT64_MIN;
    pCtx->end.key      = INT64_MIN;
2018

H
Haojun Liao 已提交
2019
    pCtx->numOfParams  = pSqlFuncMsg->numOfParams;
2020 2021 2022 2023
    for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
      int16_t type = pSqlFuncMsg->arg[j].argType;
      int16_t bytes = pSqlFuncMsg->arg[j].argBytes;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
2024
        tVariantCreateFromBinary(&pCtx->param[j], pSqlFuncMsg->arg[j].argValue.pz, bytes, type);
2025 2026 2027 2028
      } else {
        tVariantCreateFromBinary(&pCtx->param[j], (char *)&pSqlFuncMsg->arg[j].argValue.i64, bytes, type);
      }
    }
2029

2030 2031
    // set the order information for top/bottom query
    int32_t functionId = pCtx->functionId;
2032

2033
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
H
Haojun Liao 已提交
2034
      int32_t f = pQuery->pExpr1[0].base.functionId;
2035
      assert(f == TSDB_FUNC_TS || f == TSDB_FUNC_TS_DUMMY);
2036

2037 2038 2039 2040
      pCtx->param[2].i64Key = order;
      pCtx->param[2].nType = TSDB_DATA_TYPE_BIGINT;
      pCtx->param[3].i64Key = functionId;
      pCtx->param[3].nType = TSDB_DATA_TYPE_BIGINT;
2041

2042 2043
      pCtx->param[1].i64Key = pQuery->order.orderColId;
    }
2044

2045 2046
    if (i > 0) {
      pRuntimeEnv->offset[i] = pRuntimeEnv->offset[i - 1] + pRuntimeEnv->pCtx[i - 1].outputBytes;
H
Haojun Liao 已提交
2047
      pRuntimeEnv->rowCellInfoOffset[i] = pRuntimeEnv->rowCellInfoOffset[i - 1] + sizeof(SResultRowCellInfo) + pQuery->pExpr1[i - 1].interBytes;
2048
    }
H
Haojun Liao 已提交
2049

2050
  }
2051

2052 2053
  *(int64_t*) pRuntimeEnv->prevRow[0] = INT64_MIN;

2054
  // if it is group by normal column, do not set output buffer, the output buffer is pResult
2055 2056
  // fixed output query/multi-output query for normal table
  if (!pRuntimeEnv->groupbyNormalCol && !pRuntimeEnv->stableQuery && !QUERY_IS_INTERVAL_QUERY(pRuntimeEnv->pQuery)) {
H
Haojun Liao 已提交
2057
    resetDefaultResInfoOutputBuf(pRuntimeEnv);
2058
  }
2059

H
Haojun Liao 已提交
2060 2061 2062
  if (setCtxTagColumnInfo(pRuntimeEnv, pRuntimeEnv->pCtx) != TSDB_CODE_SUCCESS) {
    goto _clean;
  }
2063

H
Haojun Liao 已提交
2064
  qDebug("QInfo:%p init runtime completed", GET_QINFO_ADDR(pRuntimeEnv));
2065
  return TSDB_CODE_SUCCESS;
2066

2067
_clean:
S
TD-1848  
Shengliang Guan 已提交
2068
  tfree(pRuntimeEnv->pCtx);
H
Haojun Liao 已提交
2069 2070
  tfree(pRuntimeEnv->offset);
  tfree(pRuntimeEnv->rowCellInfoOffset);
2071

2072
  return TSDB_CODE_QRY_OUT_OF_MEMORY;
2073 2074
}

H
Haojun Liao 已提交
2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087
static void doFreeQueryHandle(SQInfo* pQInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;

  tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
  tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);

  pRuntimeEnv->pQueryHandle = NULL;
  pRuntimeEnv->pSecQueryHandle = NULL;

  SMemRef* pMemRef = &pQInfo->memRef;
  assert(pMemRef->ref == 0 && pMemRef->imem == NULL && pMemRef->mem == NULL);
}

2088 2089 2090 2091
static void teardownQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv) {
  if (pRuntimeEnv->pQuery == NULL) {
    return;
  }
2092

2093
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
2094
  SQInfo* pQInfo = (SQInfo*) GET_QINFO_ADDR(pRuntimeEnv);
2095

2096
  qDebug("QInfo:%p teardown runtime env", pQInfo);
H
Haojun Liao 已提交
2097
  cleanupTimeWindowInfo(&pRuntimeEnv->windowResInfo);
2098

2099
  if (pRuntimeEnv->pCtx != NULL) {
2100
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2101
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
2102

2103 2104 2105
      for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
        tVariantDestroy(&pCtx->param[j]);
      }
2106

2107
      tVariantDestroy(&pCtx->tag);
S
TD-1848  
Shengliang Guan 已提交
2108
      tfree(pCtx->tagInfo.pTagCtxList);
2109
    }
2110

S
TD-1848  
Shengliang Guan 已提交
2111
    tfree(pRuntimeEnv->pCtx);
2112
  }
2113

2114
  pRuntimeEnv->pFillInfo = taosDestroyFillInfo(pRuntimeEnv->pFillInfo);
2115

H
Haojun Liao 已提交
2116
  destroyResultBuf(pRuntimeEnv->pResultBuf);
H
Haojun Liao 已提交
2117
  doFreeQueryHandle(pQInfo);
2118

H
Haojun Liao 已提交
2119
  pRuntimeEnv->pTSBuf = tsBufDestroy(pRuntimeEnv->pTSBuf);
H
Haojun Liao 已提交
2120 2121

  tfree(pRuntimeEnv->offset);
S
TD-1848  
Shengliang Guan 已提交
2122 2123
  tfree(pRuntimeEnv->keyBuf);
  tfree(pRuntimeEnv->rowCellInfoOffset);
2124
  tfree(pRuntimeEnv->prevRow);
H
Haojun Liao 已提交
2125

H
Haojun Liao 已提交
2126 2127
  taosHashCleanup(pRuntimeEnv->pResultRowHashTable);
  pRuntimeEnv->pResultRowHashTable = NULL;
2128

H
Haojun Liao 已提交
2129
  pRuntimeEnv->pool = destroyResultRowPool(pRuntimeEnv->pool);
2130 2131
}

2132 2133 2134 2135
static bool needBuildResAfterQueryComplete(SQInfo* pQInfo) {
  return pQInfo->rspContext != NULL;
}

H
Haojun Liao 已提交
2136
#define IS_QUERY_KILLED(_q) ((_q)->code == TSDB_CODE_TSC_QUERY_CANCELLED)
2137

2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156
static bool isQueryKilled(SQInfo *pQInfo) {
  if (IS_QUERY_KILLED(pQInfo)) {
    return true;
  }

  // query has been executed more than tsShellActivityTimer, and the retrieve has not arrived
  // abort current query execution.
  if (pQInfo->owner != 0 && ((taosGetTimestampSec() - pQInfo->startExecTs) > getMaximumIdleDurationSec()) &&
      (!needBuildResAfterQueryComplete(pQInfo))) {

    assert(pQInfo->startExecTs != 0);
    qDebug("QInfo:%p retrieve not arrive beyond %d sec, abort current query execution, start:%"PRId64", current:%d", pQInfo, 1,
           pQInfo->startExecTs, taosGetTimestampSec());
    return true;
  }

  return false;
}

H
Haojun Liao 已提交
2157
static void setQueryKilled(SQInfo *pQInfo) { pQInfo->code = TSDB_CODE_TSC_QUERY_CANCELLED;}
H
hjxilinx 已提交
2158

H
Haojun Liao 已提交
2159 2160 2161
static bool isFixedOutputQuery(SQueryRuntimeEnv* pRuntimeEnv) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
2162 2163
    return false;
  }
2164

2165
  // Note:top/bottom query is fixed output query
H
Haojun Liao 已提交
2166
  if (pRuntimeEnv->topBotQuery || pRuntimeEnv->groupbyNormalCol) {
2167 2168
    return true;
  }
2169

2170
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2171
    SSqlFuncMsg *pExprMsg = &pQuery->pExpr1[i].base;
2172

2173 2174
    // ignore the ts_comp function
    if (i == 0 && pExprMsg->functionId == TSDB_FUNC_PRJ && pExprMsg->numOfParams == 1 &&
2175
        pExprMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
2176 2177
      continue;
    }
2178

2179 2180 2181
    if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
2182

2183 2184 2185 2186
    if (!IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus)) {
      return true;
    }
  }
2187

2188 2189 2190
  return false;
}

2191
// todo refactor with isLastRowQuery
2192
bool isPointInterpoQuery(SQuery *pQuery) {
2193
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2194
    int32_t functionID = pQuery->pExpr1[i].base.functionId;
2195
    if (functionID == TSDB_FUNC_INTERP) {
2196 2197 2198
      return true;
    }
  }
2199

2200 2201 2202 2203
  return false;
}

// TODO REFACTOR:MERGE WITH CLIENT-SIDE FUNCTION
H
hjxilinx 已提交
2204
static bool isSumAvgRateQuery(SQuery *pQuery) {
2205
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2206
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
2207 2208 2209
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
2210

2211 2212 2213 2214 2215
    if (functionId == TSDB_FUNC_SUM_RATE || functionId == TSDB_FUNC_SUM_IRATE || functionId == TSDB_FUNC_AVG_RATE ||
        functionId == TSDB_FUNC_AVG_IRATE) {
      return true;
    }
  }
2216

2217 2218 2219
  return false;
}

H
hjxilinx 已提交
2220
static bool isFirstLastRowQuery(SQuery *pQuery) {
2221
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2222
    int32_t functionID = pQuery->pExpr1[i].base.functionId;
2223 2224 2225 2226
    if (functionID == TSDB_FUNC_LAST_ROW) {
      return true;
    }
  }
2227

2228 2229 2230
  return false;
}

H
hjxilinx 已提交
2231
static bool needReverseScan(SQuery *pQuery) {
2232
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2233
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
2234 2235 2236
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG) {
      continue;
    }
2237

2238
    if ((functionId == TSDB_FUNC_FIRST || functionId == TSDB_FUNC_FIRST_DST) && !QUERY_IS_ASC_QUERY(pQuery)) {
2239 2240
      return true;
    }
2241 2242

    if (functionId == TSDB_FUNC_LAST || functionId == TSDB_FUNC_LAST_DST) {
H
Haojun Liao 已提交
2243
      // the scan order to acquire the last result of the specified column
H
Haojun Liao 已提交
2244
      int32_t order = (int32_t)pQuery->pExpr1[i].base.arg->argValue.i64;
H
Haojun Liao 已提交
2245 2246 2247
      if (order != pQuery->order.order) {
        return true;
      }
2248
    }
2249
  }
2250

2251 2252
  return false;
}
H
hjxilinx 已提交
2253

H
Haojun Liao 已提交
2254 2255 2256 2257
/**
 * The following 4 kinds of query are treated as the tags query
 * tagprj, tid_tag query, count(tbname), 'abc' (user defined constant value column) query
 */
H
hjxilinx 已提交
2258 2259
static bool onlyQueryTags(SQuery* pQuery) {
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2260
    SExprInfo* pExprInfo = &pQuery->pExpr1[i];
H
Haojun Liao 已提交
2261 2262

    int32_t functionId = pExprInfo->base.functionId;
H
Haojun Liao 已提交
2263 2264 2265 2266

    if (functionId != TSDB_FUNC_TAGPRJ &&
        functionId != TSDB_FUNC_TID_TAG &&
        (!(functionId == TSDB_FUNC_COUNT && pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX)) &&
Y
TD-1230  
yihaoDeng 已提交
2267
        (!(functionId == TSDB_FUNC_PRJ && TSDB_COL_IS_UD_COL(pExprInfo->base.colInfo.flag)))) {
H
hjxilinx 已提交
2268 2269 2270
      return false;
    }
  }
2271

H
hjxilinx 已提交
2272 2273 2274
  return true;
}

2275 2276
/////////////////////////////////////////////////////////////////////////////////////////////

H
Haojun Liao 已提交
2277
void getAlignQueryTimeWindow(SQuery *pQuery, int64_t key, int64_t keyFirst, int64_t keyLast, STimeWindow *win) {
2278 2279
  assert(key >= keyFirst && key <= keyLast && pQuery->interval.sliding <= pQuery->interval.interval);
  win->skey = taosTimeTruncate(key, &pQuery->interval, pQuery->precision);
H
Haojun Liao 已提交
2280 2281

  /*
2282
   * if the realSkey > INT64_MAX - pQuery->interval.interval, the query duration between
H
Haojun Liao 已提交
2283 2284
   * realSkey and realEkey must be less than one interval.Therefore, no need to adjust the query ranges.
   */
2285 2286
  if (keyFirst > (INT64_MAX - pQuery->interval.interval)) {
    assert(keyLast - keyFirst < pQuery->interval.interval);
2287
    win->ekey = INT64_MAX;
2288 2289
  } else if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
    win->ekey = taosTimeAdd(win->skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
H
Haojun Liao 已提交
2290
  } else {
2291
    win->ekey = win->skey + pQuery->interval.interval - 1;
2292 2293 2294 2295 2296
  }
}

static void setScanLimitationByResultBuffer(SQuery *pQuery) {
  if (isTopBottomQuery(pQuery)) {
2297
    pQuery->checkBuffer = 0;
2298
  } else if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
2299
    pQuery->checkBuffer = 0;
2300 2301
  } else {
    bool hasMultioutput = false;
2302
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2303
      SSqlFuncMsg *pExprMsg = &pQuery->pExpr1[i].base;
2304 2305 2306
      if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        continue;
      }
2307

2308 2309 2310 2311 2312
      hasMultioutput = IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus);
      if (!hasMultioutput) {
        break;
      }
    }
2313

2314
    pQuery->checkBuffer = hasMultioutput ? 1 : 0;
2315 2316 2317 2318 2319 2320
  }
}

/*
 * todo add more parameters to check soon..
 */
2321
bool colIdCheck(SQuery *pQuery) {
2322 2323
  // load data column information is incorrect
  for (int32_t i = 0; i < pQuery->numOfCols - 1; ++i) {
2324
    if (pQuery->colList[i].colId == pQuery->colList[i + 1].colId) {
S
slguan 已提交
2325
      qError("QInfo:%p invalid data load column for query", GET_QINFO_ADDR(pQuery));
2326 2327 2328
      return false;
    }
  }
2329

2330 2331 2332 2333 2334 2335
  return true;
}

// todo ignore the avg/sum/min/max/count/stddev/top/bottom functions, of which
// the scan order is not matter
static bool onlyOneQueryType(SQuery *pQuery, int32_t functId, int32_t functIdDst) {
2336
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2337
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
2338

2339 2340 2341 2342
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG ||
        functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
2343

2344 2345 2346 2347
    if (functionId != functId && functionId != functIdDst) {
      return false;
    }
  }
2348

2349 2350 2351 2352 2353 2354 2355
  return true;
}

static bool onlyFirstQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_FIRST, TSDB_FUNC_FIRST_DST); }

static bool onlyLastQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_LAST, TSDB_FUNC_LAST_DST); }

H
Haojun Liao 已提交
2356
// todo refactor, add iterator
2357 2358
static void doExchangeTimeWindow(SQInfo* pQInfo, STimeWindow* win) {
  size_t t = taosArrayGetSize(pQInfo->tableGroupInfo.pGroupList);
H
Haojun Liao 已提交
2359
  for(int32_t i = 0; i < t; ++i) {
2360
    SArray* p1 = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, i);
H
Haojun Liao 已提交
2361 2362 2363

    size_t len = taosArrayGetSize(p1);
    for(int32_t j = 0; j < len; ++j) {
2364
      STableKeyInfo* pInfo = taosArrayGet(p1, j);
H
Haojun Liao 已提交
2365

2366 2367 2368 2369
      // update the new lastkey if it is equalled to the value of the old skey
      if (pInfo->lastKey == win->ekey) {
        pInfo->lastKey = win->skey;
      }
H
Haojun Liao 已提交
2370 2371 2372 2373
    }
  }
}

2374
static void changeExecuteScanOrder(SQInfo *pQInfo, SQueryTableMsg* pQueryMsg, bool stableQuery) {
H
Haojun Liao 已提交
2375 2376
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;

2377 2378 2379
  // in case of point-interpolation query, use asc order scan
  char msg[] = "QInfo:%p scan order changed for %s query, old:%d, new:%d, qrange exchanged, old qrange:%" PRId64
               "-%" PRId64 ", new qrange:%" PRId64 "-%" PRId64;
2380

2381 2382
  // todo handle the case the the order irrelevant query type mixed up with order critical query type
  // descending order query for last_row query
H
Haojun Liao 已提交
2383
  if (isFirstLastRowQuery(pQuery)) {
H
Haojun Liao 已提交
2384
    qDebug("QInfo:%p scan order changed for last_row query, old:%d, new:%d", pQInfo, pQuery->order.order, TSDB_ORDER_ASC);
2385

H
Haojun Liao 已提交
2386
    pQuery->order.order = TSDB_ORDER_ASC;
H
Haojun Liao 已提交
2387 2388 2389
    if (pQuery->window.skey > pQuery->window.ekey) {
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
2390

2391 2392
    return;
  }
2393

H
Haojun Liao 已提交
2394
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr) && pQuery->order.order == TSDB_ORDER_DESC) {
H
Haojun Liao 已提交
2395
    pQuery->order.order = TSDB_ORDER_ASC;
H
Haojun Liao 已提交
2396 2397 2398
    if (pQuery->window.skey > pQuery->window.ekey) {
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
H
Haojun Liao 已提交
2399

2400
    doExchangeTimeWindow(pQInfo, &pQuery->window);
H
Haojun Liao 已提交
2401 2402 2403
    return;
  }

2404
  if (isPointInterpoQuery(pQuery) && pQuery->interval.interval == 0) {
H
Haojun Liao 已提交
2405 2406 2407 2408 2409
    if (!QUERY_IS_ASC_QUERY(pQuery)) {
      qDebug(msg, GET_QINFO_ADDR(pQuery), "interp", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
             pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
2410

2411
    pQuery->order.order = TSDB_ORDER_ASC;
2412 2413
    return;
  }
2414

2415
  if (pQuery->interval.interval == 0) {
2416 2417
    if (onlyFirstQuery(pQuery)) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
2418
        qDebug(msg, GET_QINFO_ADDR(pQuery), "only-first", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
2419 2420
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2421
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2422
        doExchangeTimeWindow(pQInfo, &pQuery->window);
2423
      }
2424

2425
      pQuery->order.order = TSDB_ORDER_ASC;
2426 2427
    } else if (onlyLastQuery(pQuery)) {
      if (QUERY_IS_ASC_QUERY(pQuery)) {
2428
        qDebug(msg, GET_QINFO_ADDR(pQuery), "only-last", pQuery->order.order, TSDB_ORDER_DESC, pQuery->window.skey,
2429 2430
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2431
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2432
        doExchangeTimeWindow(pQInfo, &pQuery->window);
2433
      }
2434

2435
      pQuery->order.order = TSDB_ORDER_DESC;
2436
    }
2437

2438
  } else {  // interval query
2439
    if (stableQuery) {
2440 2441
      if (onlyFirstQuery(pQuery)) {
        if (!QUERY_IS_ASC_QUERY(pQuery)) {
2442
          qDebug(msg, GET_QINFO_ADDR(pQuery), "only-first stable", pQuery->order.order, TSDB_ORDER_ASC,
2443 2444
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2445
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2446
          doExchangeTimeWindow(pQInfo, &pQuery->window);
2447
        }
2448

2449
        pQuery->order.order = TSDB_ORDER_ASC;
2450 2451
      } else if (onlyLastQuery(pQuery)) {
        if (QUERY_IS_ASC_QUERY(pQuery)) {
2452
          qDebug(msg, GET_QINFO_ADDR(pQuery), "only-last stable", pQuery->order.order, TSDB_ORDER_DESC,
2453 2454
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2455
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2456
          doExchangeTimeWindow(pQInfo, &pQuery->window);
2457
        }
2458

2459
        pQuery->order.order = TSDB_ORDER_DESC;
2460 2461 2462 2463 2464 2465 2466 2467
      }
    }
  }
}

static int32_t getInitialPageNum(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  int32_t INITIAL_RESULT_ROWS_VALUE = 16;
2468

2469
  int32_t num = 0;
2470

2471 2472
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
    num = 128;
H
Haojun Liao 已提交
2473
  } else if (QUERY_IS_INTERVAL_QUERY(pQuery)) {  // time window query, allocate one page for each table
2474
    size_t s = pQInfo->tableqinfoGroupInfo.numOfTables;
S
TD-1057  
Shengliang Guan 已提交
2475
    num = (int32_t)(MAX(s, INITIAL_RESULT_ROWS_VALUE));
2476 2477
  } else {    // for super table query, one page for each subset
    num = 1;  // pQInfo->pSidSet->numOfSubSet;
2478
  }
2479

2480 2481 2482 2483
  assert(num > 0);
  return num;
}

2484 2485
static void getIntermediateBufInfo(SQueryRuntimeEnv* pRuntimeEnv, int32_t* ps, int32_t* rowsize) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
2486
  int32_t MIN_ROWS_PER_PAGE = 4;
2487

S
TD-1057  
Shengliang Guan 已提交
2488
  *rowsize = (int32_t)(pQuery->rowSize * GET_ROW_PARAM_FOR_MULTIOUTPUT(pQuery, pRuntimeEnv->topBotQuery, pRuntimeEnv->stableQuery));
2489 2490 2491 2492
  int32_t overhead = sizeof(tFilePage);

  // one page contains at least two rows
  *ps = DEFAULT_INTERN_BUF_PAGE_SIZE;
H
Haojun Liao 已提交
2493
  while(((*rowsize) * MIN_ROWS_PER_PAGE) > (*ps) - overhead) {
2494 2495 2496 2497
    *ps = (*ps << 1u);
  }

  pRuntimeEnv->numOfRowsPerPage = ((*ps) - sizeof(tFilePage)) / (*rowsize);
2498
  assert(pRuntimeEnv->numOfRowsPerPage <= MAX_ROWS_PER_RESBUF_PAGE);
2499 2500
}

H
Haojun Liao 已提交
2501
#define IS_PREFILTER_TYPE(_t) ((_t) != TSDB_DATA_TYPE_BINARY && (_t) != TSDB_DATA_TYPE_NCHAR)
2502

H
Haojun Liao 已提交
2503 2504 2505 2506
static bool needToLoadDataBlock(SQueryRuntimeEnv* pRuntimeEnv, SDataStatis *pDataStatis, SQLFunctionCtx *pCtx,
    int32_t numOfRows) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
  if (pDataStatis == NULL || (pQuery->numOfFilterCols == 0 && (!pRuntimeEnv->topBotQuery))) {
2507 2508 2509 2510 2511
    return true;
  }

  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
2512

H
Haojun Liao 已提交
2513 2514 2515 2516 2517 2518 2519 2520
    int32_t index = -1;
    for(int32_t i = 0; i < pQuery->numOfCols; ++i) {
      if (pDataStatis[i].colId == pFilterInfo->info.colId) {
        index = i;
        break;
      }
    }

2521
    // no statistics data, load the true data block
H
Haojun Liao 已提交
2522
    if (index == -1) {
H
Haojun Liao 已提交
2523
      return true;
2524
    }
2525

2526
    // not support pre-filter operation on binary/nchar data type
H
Haojun Liao 已提交
2527
    if (!IS_PREFILTER_TYPE(pFilterInfo->info.type)) {
H
Haojun Liao 已提交
2528
      return true;
2529
    }
2530

2531
    // all data in current column are NULL, no need to check its boundary value
H
Haojun Liao 已提交
2532
    if (pDataStatis[index].numOfNull == numOfRows) {
2533 2534 2535 2536 2537 2538 2539 2540 2541

      // if isNULL query exists, load the null data column
      for (int32_t j = 0; j < pFilterInfo->numOfFilters; ++j) {
        SColumnFilterElem *pFilterElem = &pFilterInfo->pFilters[j];
        if (pFilterElem->fp == isNull_filter) {
          return true;
        }
      }

2542 2543
      continue;
    }
2544

H
Haojun Liao 已提交
2545 2546 2547
    SDataStatis* pDataBlockst = &pDataStatis[index];

    if (pFilterInfo->info.type == TSDB_DATA_TYPE_FLOAT) {
S
TD-1057  
Shengliang Guan 已提交
2548 2549
      float minval = (float)(*(double *)(&pDataBlockst->min));
      float maxval = (float)(*(double *)(&pDataBlockst->max));
2550

2551 2552 2553 2554 2555 2556 2557
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&minval, (char *)&maxval)) {
          return true;
        }
      }
    } else {
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
H
Haojun Liao 已提交
2558
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&pDataBlockst->min, (char *)&pDataBlockst->max)) {
2559 2560 2561 2562 2563
          return true;
        }
      }
    }
  }
2564

H
Haojun Liao 已提交
2565 2566
  if (pRuntimeEnv->topBotQuery) {
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2567
      int32_t functionId = pQuery->pExpr1[i].base.functionId;
H
Haojun Liao 已提交
2568 2569 2570 2571 2572
      if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
        return topbot_datablock_filter(&pCtx[i], functionId, (char *)&pDataStatis[i].min, (char *)&pDataStatis[i].max);
      }
    }
  }
2573

H
Haojun Liao 已提交
2574
  return false;
2575 2576
}

H
Haojun Liao 已提交
2577 2578 2579 2580 2581 2582 2583 2584
static bool overlapWithTimeWindow(SQuery* pQuery, SDataBlockInfo* pBlockInfo) {
  STimeWindow w = {0};

  TSKEY sk = MIN(pQuery->window.skey, pQuery->window.ekey);
  TSKEY ek = MAX(pQuery->window.skey, pQuery->window.ekey);

  if (QUERY_IS_ASC_QUERY(pQuery)) {
    getAlignQueryTimeWindow(pQuery, pBlockInfo->window.skey, sk, ek, &w);
H
Haojun Liao 已提交
2585
    assert(w.ekey >= pBlockInfo->window.skey);
H
Haojun Liao 已提交
2586

H
Haojun Liao 已提交
2587
    if (w.ekey < pBlockInfo->window.ekey) {
H
Haojun Liao 已提交
2588 2589 2590 2591
      return true;
    }

    while(1) {
H
Haojun Liao 已提交
2592
      getNextTimeWindow(pQuery, &w);
H
Haojun Liao 已提交
2593
      if (w.skey > pBlockInfo->window.ekey) {
H
Haojun Liao 已提交
2594 2595 2596
        break;
      }

H
Haojun Liao 已提交
2597 2598
      assert(w.ekey > pBlockInfo->window.ekey);
      if (w.skey <= pBlockInfo->window.ekey && w.skey > pBlockInfo->window.skey) {
H
Haojun Liao 已提交
2599 2600 2601 2602 2603
        return true;
      }
    }
  } else {
    getAlignQueryTimeWindow(pQuery, pBlockInfo->window.ekey, sk, ek, &w);
H
Haojun Liao 已提交
2604 2605 2606
    assert(w.skey <= pBlockInfo->window.ekey);

    if (w.skey > pBlockInfo->window.skey) {
H
Haojun Liao 已提交
2607 2608 2609 2610
      return true;
    }

    while(1) {
H
Haojun Liao 已提交
2611
      getNextTimeWindow(pQuery, &w);
H
Haojun Liao 已提交
2612 2613 2614 2615
      if (w.ekey < pBlockInfo->window.skey) {
        break;
      }

H
Haojun Liao 已提交
2616 2617
      assert(w.skey < pBlockInfo->window.skey);
      if (w.ekey < pBlockInfo->window.ekey && w.ekey >= pBlockInfo->window.skey) {
H
Haojun Liao 已提交
2618 2619 2620 2621 2622 2623 2624 2625
        return true;
      }
    }
  }

  return false;
}

H
Haojun Liao 已提交
2626
int32_t loadDataBlockOnDemand(SQueryRuntimeEnv *pRuntimeEnv, SResultRowInfo * pWindowResInfo, void* pQueryHandle, SDataBlockInfo* pBlockInfo, SDataStatis **pStatis, SArray** pDataBlock, uint32_t* status) {
2627
  SQuery *pQuery = pRuntimeEnv->pQuery;
2628

H
Haojun Liao 已提交
2629 2630
  *status = BLK_DATA_NO_NEEDED;

H
Haojun Liao 已提交
2631
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf > 0) {
2632
    *status = BLK_DATA_ALL_NEEDED;
H
Haojun Liao 已提交
2633
  } else { // check if this data block is required to load
H
Haojun Liao 已提交
2634

H
Haojun Liao 已提交
2635
    // Calculate all time windows that are overlapping or contain current data block.
2636
    // If current data block is contained by all possible time window, do not load current data block.
H
Haojun Liao 已提交
2637
    if (QUERY_IS_INTERVAL_QUERY(pQuery) && overlapWithTimeWindow(pQuery, pBlockInfo)) {
2638
      *status = BLK_DATA_ALL_NEEDED;
2639
    }
2640

2641
    if ((*status) != BLK_DATA_ALL_NEEDED) {
H
Haojun Liao 已提交
2642 2643 2644 2645
      // the pCtx[i] result is belonged to previous time window since the outputBuf has not been set yet,
      // the filter result may be incorrect. So in case of interval query, we need to set the correct time output buffer
      if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
        bool hasTimeWindow = false;
2646 2647
        SResultRow* pResult = NULL;

H
Haojun Liao 已提交
2648 2649 2650 2651 2652
        bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);

        TSKEY k = QUERY_IS_ASC_QUERY(pQuery)? pBlockInfo->window.skey:pBlockInfo->window.ekey;

        STimeWindow win = getActiveTimeWindow(pWindowResInfo, k, pQuery);
2653
        if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pBlockInfo, &win, masterScan, &hasTimeWindow, &pResult) !=
H
Haojun Liao 已提交
2654 2655 2656 2657 2658
            TSDB_CODE_SUCCESS) {
          // todo handle error in set result for timewindow
        }
      }

H
Haojun Liao 已提交
2659
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2660
        SSqlFuncMsg* pSqlFunc = &pQuery->pExpr1[i].base;
H
Haojun Liao 已提交
2661 2662 2663

        int32_t functionId = pSqlFunc->functionId;
        int32_t colId = pSqlFunc->colInfo.colId;
2664 2665
        (*status) |= aAggs[functionId].dataReqFunc(&pRuntimeEnv->pCtx[i], pBlockInfo->window.skey, pBlockInfo->window.ekey, colId);
        if (((*status) & BLK_DATA_ALL_NEEDED) == BLK_DATA_ALL_NEEDED) {
H
Haojun Liao 已提交
2666 2667 2668
          break;
        }
      }
2669 2670
    }
  }
2671

2672
  if ((*status) == BLK_DATA_NO_NEEDED) {
H
Haojun Liao 已提交
2673 2674
    qDebug("QInfo:%p data block discard, brange:%"PRId64 "-%"PRId64", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
           pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
2675
    pRuntimeEnv->summary.discardBlocks += 1;
2676 2677 2678 2679
  } else if ((*status) == BLK_DATA_STATIS_NEEDED) {

    // this function never returns error?
    tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis);
2680

2681
    pRuntimeEnv->summary.loadBlockStatis += 1;
2682

2683
    if (*pStatis == NULL) { // data block statistics does not exist, load data block
H
Haojun Liao 已提交
2684
      *pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2685
      pRuntimeEnv->summary.totalCheckedRows += pBlockInfo->rows;
2686 2687
    }
  } else {
2688
    assert((*status) == BLK_DATA_ALL_NEEDED);
2689

2690
    // load the data block statistics to perform further filter
H
Haojun Liao 已提交
2691
    pRuntimeEnv->summary.loadBlockStatis += 1;
2692
    tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis);
2693

H
Haojun Liao 已提交
2694
    if (!needToLoadDataBlock(pRuntimeEnv, *pStatis, pRuntimeEnv->pCtx, pBlockInfo->rows)) {
2695 2696
      // current block has been discard due to filter applied
      pRuntimeEnv->summary.discardBlocks += 1;
H
Haojun Liao 已提交
2697 2698
      qDebug("QInfo:%p data block discard, brange:%"PRId64 "-%"PRId64", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
          pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
2699
      (*status) = BLK_DATA_DISCARD;
2700
    }
2701

2702
    pRuntimeEnv->summary.totalCheckedRows += pBlockInfo->rows;
H
Haojun Liao 已提交
2703
    pRuntimeEnv->summary.loadBlocks += 1;
H
Haojun Liao 已提交
2704
    *pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2705 2706 2707
    if (*pDataBlock == NULL) {
      return terrno;
    }
2708
  }
2709

H
Haojun Liao 已提交
2710
  return TSDB_CODE_SUCCESS;
2711 2712
}

H
hjxilinx 已提交
2713
int32_t binarySearchForKey(char *pValue, int num, TSKEY key, int order) {
2714
  int32_t midPos = -1;
H
Haojun Liao 已提交
2715
  int32_t numOfRows;
2716

2717 2718 2719
  if (num <= 0) {
    return -1;
  }
2720

2721
  assert(order == TSDB_ORDER_ASC || order == TSDB_ORDER_DESC);
2722 2723

  TSKEY * keyList = (TSKEY *)pValue;
2724
  int32_t firstPos = 0;
2725
  int32_t lastPos = num - 1;
2726

2727
  if (order == TSDB_ORDER_DESC) {
H
hjxilinx 已提交
2728 2729 2730 2731 2732
    // find the first position which is smaller than the key
    while (1) {
      if (key >= keyList[lastPos]) return lastPos;
      if (key == keyList[firstPos]) return firstPos;
      if (key < keyList[firstPos]) return firstPos - 1;
2733

H
Haojun Liao 已提交
2734 2735
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2736

H
hjxilinx 已提交
2737 2738 2739 2740 2741 2742 2743 2744
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
2745

H
hjxilinx 已提交
2746 2747 2748 2749 2750
  } else {
    // find the first position which is bigger than the key
    while (1) {
      if (key <= keyList[firstPos]) return firstPos;
      if (key == keyList[lastPos]) return lastPos;
2751

H
hjxilinx 已提交
2752 2753 2754 2755 2756 2757 2758
      if (key > keyList[lastPos]) {
        lastPos = lastPos + 1;
        if (lastPos >= num)
          return -1;
        else
          return lastPos;
      }
2759

H
Haojun Liao 已提交
2760 2761
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2762

H
hjxilinx 已提交
2763 2764 2765 2766 2767 2768 2769 2770 2771
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
  }
2772

H
hjxilinx 已提交
2773 2774 2775
  return midPos;
}

2776 2777 2778 2779 2780 2781 2782 2783
static void ensureOutputBufferSimple(SQueryRuntimeEnv* pRuntimeEnv, int32_t capacity) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  if (capacity < pQuery->rec.capacity) {
    return;
  }

  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2784
    int32_t bytes = pQuery->pExpr1[i].bytes;
2785 2786 2787
    assert(bytes > 0 && capacity > 0);

    char *tmp = realloc(pQuery->sdata[i], bytes * capacity + sizeof(tFilePage));
H
Haojun Liao 已提交
2788
    if (tmp == NULL) {
H
Haojun Liao 已提交
2789
      longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
2790 2791 2792 2793 2794 2795 2796 2797
    } else {
      pQuery->sdata[i] = (tFilePage *)tmp;
    }

    // set the pCtx output buffer position
    pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data;
  }

2798
  qDebug("QInfo:%p realloc output buffer to inc output buffer from: %" PRId64 " rows to:%d rows", GET_QINFO_ADDR(pRuntimeEnv),
2799 2800 2801 2802 2803
         pQuery->rec.capacity, capacity);

  pQuery->rec.capacity = capacity;
}

2804
// TODO merge with enuserOutputBufferSimple
2805 2806 2807
static void ensureOutputBuffer(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pBlockInfo) {
  // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
  SQuery* pQuery = pRuntimeEnv->pQuery;
2808
  if (!QUERY_IS_INTERVAL_QUERY(pQuery) && !pRuntimeEnv->groupbyNormalCol && !isFixedOutputQuery(pRuntimeEnv) && !isTSCompQuery(pQuery)) {
2809
    SResultRec *pRec = &pQuery->rec;
2810

2811
    if (pQuery->rec.capacity - pQuery->rec.rows < pBlockInfo->rows) {
S
TD-1057  
Shengliang Guan 已提交
2812 2813
      int32_t remain = (int32_t)(pRec->capacity - pRec->rows);
      int32_t newSize = (int32_t)(pRec->capacity + (pBlockInfo->rows - remain));
2814

2815
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2816
        int32_t bytes = pQuery->pExpr1[i].bytes;
H
Haojun Liao 已提交
2817 2818
        assert(bytes > 0 && newSize > 0);

2819
        char *tmp = realloc(pQuery->sdata[i], bytes * newSize + sizeof(tFilePage));
H
Haojun Liao 已提交
2820
        if (tmp == NULL) {
H
Haojun Liao 已提交
2821
          longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
2822
        } else {
2823
          memset(tmp + sizeof(tFilePage) + bytes * pRec->rows, 0, (size_t)((newSize - pRec->rows) * bytes));
2824 2825
          pQuery->sdata[i] = (tFilePage *)tmp;
        }
H
Haojun Liao 已提交
2826

2827 2828
        // set the pCtx output buffer position
        pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data + pRec->rows * bytes;
H
Haojun Liao 已提交
2829

H
Haojun Liao 已提交
2830
        int32_t functionId = pQuery->pExpr1[i].base.functionId;
2831 2832 2833 2834
        if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
          pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
        }
      }
H
Haojun Liao 已提交
2835

2836
      qDebug("QInfo:%p realloc output buffer, new size: %d rows, old:%" PRId64 ", remain:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv),
2837
             newSize, pRec->capacity, newSize - pRec->rows);
2838

2839 2840 2841 2842 2843
      pRec->capacity = newSize;
    }
  }
}

2844 2845 2846 2847 2848
static void doSetInitialTimewindow(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pBlockInfo) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  if (QUERY_IS_INTERVAL_QUERY(pQuery) && pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL) {
    STimeWindow w = TSWINDOW_INITIALIZER;
H
Haojun Liao 已提交
2849
    SResultRowInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864

    if (QUERY_IS_ASC_QUERY(pQuery)) {
      getAlignQueryTimeWindow(pQuery, pBlockInfo->window.skey, pBlockInfo->window.skey, pQuery->window.ekey, &w);
      pWindowResInfo->startTime = w.skey;
      pWindowResInfo->prevSKey = w.skey;
    } else {
      // the start position of the first time window in the endpoint that spreads beyond the queried last timestamp
      getAlignQueryTimeWindow(pQuery, pBlockInfo->window.ekey, pQuery->window.ekey, pBlockInfo->window.ekey, &w);

      pWindowResInfo->startTime = pQuery->window.skey;
      pWindowResInfo->prevSKey = w.skey;
    }
  }
}

2865 2866
static int64_t doScanAllDataBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
2867
  STableQueryInfo* pTableQueryInfo = pQuery->current;
H
Haojun Liao 已提交
2868
  SQueryCostInfo*  summary  = &pRuntimeEnv->summary;
2869

2870
  qDebug("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", lastkey:%" PRId64 ", order:%d",
H
hjxilinx 已提交
2871 2872
         GET_QINFO_ADDR(pRuntimeEnv), pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, pTableQueryInfo->lastKey,
         pQuery->order.order);
2873

2874
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
H
Haojun Liao 已提交
2875
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
2876

H
Haojun Liao 已提交
2877
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
2878
  while (tsdbNextDataBlock(pQueryHandle)) {
H
Haojun Liao 已提交
2879
    summary->totalBlocks += 1;
H
Haojun Liao 已提交
2880

2881
    if (isQueryKilled(GET_QINFO_ADDR(pRuntimeEnv))) {
H
Haojun Liao 已提交
2882
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
2883
    }
2884

H
Haojun Liao 已提交
2885
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
2886
    doSetInitialTimewindow(pRuntimeEnv, &blockInfo);
2887

H
hjxilinx 已提交
2888
    // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
2889
    ensureOutputBuffer(pRuntimeEnv, &blockInfo);
2890

2891
    SDataStatis *pStatis = NULL;
2892 2893
    SArray *     pDataBlock = NULL;
    uint32_t     status = 0;
2894

H
Haojun Liao 已提交
2895
    int32_t ret = loadDataBlockOnDemand(pRuntimeEnv, &pRuntimeEnv->windowResInfo, pQueryHandle, &blockInfo, &pStatis, &pDataBlock, &status);
2896
    if (ret != TSDB_CODE_SUCCESS) {
2897 2898 2899
      break;
    }

2900 2901 2902 2903 2904 2905
    if (status == BLK_DATA_DISCARD) {
      pQuery->current->lastKey =
              QUERY_IS_ASC_QUERY(pQuery) ? blockInfo.window.ekey + step : blockInfo.window.skey + step;
      continue;
    }

H
Haojun Liao 已提交
2906 2907
    // query start position can not move into tableApplyFunctionsOnBlock due to limit/offset condition
    pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : blockInfo.rows - 1;
H
hjxilinx 已提交
2908
    int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, binarySearchForKey, pDataBlock);
2909

H
Haojun Liao 已提交
2910
    summary->totalRows += blockInfo.rows;
2911
    qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
2912
           blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes, pQuery->current->lastKey);
2913

2914 2915
    // while the output buffer is full or limit/offset is applied, query may be paused here
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL | QUERY_COMPLETED)) {
H
hjxilinx 已提交
2916
      break;
2917 2918
    }
  }
2919

H
Haojun Liao 已提交
2920 2921 2922 2923
  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }

H
hjxilinx 已提交
2924
  // if the result buffer is not full, set the query complete
2925 2926 2927
  if (!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
2928

H
Haojun Liao 已提交
2929
  if (QUERY_IS_INTERVAL_QUERY(pQuery) && (IS_MASTER_SCAN(pRuntimeEnv)|| pRuntimeEnv->scanFlag == REPEAT_SCAN)) {
H
hjxilinx 已提交
2930
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
2931
      closeAllTimeWindow(&pRuntimeEnv->windowResInfo);
H
hjxilinx 已提交
2932
      pRuntimeEnv->windowResInfo.curIndex = pRuntimeEnv->windowResInfo.size - 1;  // point to the last time window
2933 2934 2935 2936
    } else {
      assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
    }
  }
2937

2938
  return 0;
2939 2940 2941 2942 2943 2944
}

/*
 * set tag value in SQLFunctionCtx
 * e.g.,tag information into input buffer
 */
2945
static void doSetTagValueInParam(void *tsdb, void* pTable, int32_t tagColId, tVariant *tag, int16_t type, int16_t bytes) {
H
[td-90]  
Haojun Liao 已提交
2946
  tVariantDestroy(tag);
2947

2948
  if (tagColId == TSDB_TBNAME_COLUMN_INDEX) {
2949
    char* val = tsdbGetTableName(pTable);
H
[td-90]  
Haojun Liao 已提交
2950
    assert(val != NULL);
2951

H
[td-90]  
Haojun Liao 已提交
2952
    tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), TSDB_DATA_TYPE_BINARY);
2953
  } else {
2954
    char* val = tsdbGetTableTagVal(pTable, tagColId, type, bytes);
H
[td-90]  
Haojun Liao 已提交
2955 2956 2957 2958
    if (val == NULL) {
      tag->nType = TSDB_DATA_TYPE_NULL;
      return;
    }
2959

H
hjxilinx 已提交
2960
    if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
H
Hongze Cheng 已提交
2961
      if (isNull(val, type)) {
H
Haojun Liao 已提交
2962 2963 2964 2965
        tag->nType = TSDB_DATA_TYPE_NULL;
        return;
      }

H
[td-90]  
Haojun Liao 已提交
2966
      tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), type);
H
hjxilinx 已提交
2967
    } else {
H
Haojun Liao 已提交
2968 2969 2970 2971 2972
      if (isNull(val, type)) {
        tag->nType = TSDB_DATA_TYPE_NULL;
        return;
      }

H
[td-90]  
Haojun Liao 已提交
2973
      tVariantCreateFromBinary(tag, val, bytes, type);
H
hjxilinx 已提交
2974
    }
2975
  }
2976 2977
}

2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989
static SColumnInfo* doGetTagColumnInfoById(SColumnInfo* pTagColList, int32_t numOfTags, int16_t colId) {
  assert(pTagColList != NULL && numOfTags > 0);

  for(int32_t i = 0; i < numOfTags; ++i) {
    if (pTagColList[i].colId == colId) {
      return &pTagColList[i];
    }
  }

  return NULL;
}

2990
void setTagVal(SQueryRuntimeEnv *pRuntimeEnv, void *pTable, void *tsdb) {
2991
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
2992
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
2993

H
Haojun Liao 已提交
2994
  SExprInfo *pExprInfo = &pQuery->pExpr1[0];
H
[td-90]  
Haojun Liao 已提交
2995 2996
  if (pQuery->numOfOutput == 1 && pExprInfo->base.functionId == TSDB_FUNC_TS_COMP) {
    assert(pExprInfo->base.numOfParams == 1);
H
Haojun Liao 已提交
2997

S
TD-1057  
Shengliang Guan 已提交
2998
    int16_t tagColId = (int16_t)pExprInfo->base.arg->argValue.i64;
2999
    SColumnInfo* pColInfo = doGetTagColumnInfoById(pQuery->tagColList, pQuery->numOfTags, tagColId);
H
Haojun Liao 已提交
3000

3001
    doSetTagValueInParam(tsdb, pTable, tagColId, &pRuntimeEnv->pCtx[0].tag, pColInfo->type, pColInfo->bytes);
3002 3003
  } else {
    // set tag value, by which the results are aggregated.
3004
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
H
Haojun Liao 已提交
3005
      SExprInfo* pLocalExprInfo = &pQuery->pExpr1[idx];
3006

3007
      // ts_comp column required the tag value for join filter
H
Haojun Liao 已提交
3008
      if (!TSDB_COL_IS_TAG(pLocalExprInfo->base.colInfo.flag)) {
3009 3010
        continue;
      }
3011

3012
      // todo use tag column index to optimize performance
3013
      doSetTagValueInParam(tsdb, pTable, pLocalExprInfo->base.colInfo.colId, &pRuntimeEnv->pCtx[idx].tag,
H
Haojun Liao 已提交
3014
                           pLocalExprInfo->type, pLocalExprInfo->bytes);
3015
    }
3016

3017
    // set the join tag for first column
H
[td-90]  
Haojun Liao 已提交
3018
    SSqlFuncMsg *pFuncMsg = &pExprInfo->base;
3019 3020
    if ((pFuncMsg->functionId == TSDB_FUNC_TS || pFuncMsg->functionId == TSDB_FUNC_PRJ) && pRuntimeEnv->pTSBuf != NULL &&
        pFuncMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
3021
      assert(pFuncMsg->numOfParams == 1);
H
Haojun Liao 已提交
3022

3023 3024
      int16_t      tagColId = (int16_t)pExprInfo->base.arg->argValue.i64;
      SColumnInfo *pColInfo = doGetTagColumnInfoById(pQuery->tagColList, pQuery->numOfTags, tagColId);
H
Haojun Liao 已提交
3025

3026
      doSetTagValueInParam(tsdb, pTable, tagColId, &pRuntimeEnv->pCtx[0].tag, pColInfo->type, pColInfo->bytes);
H
Haojun Liao 已提交
3027

3028 3029 3030 3031 3032 3033 3034 3035
      int16_t tagType = pRuntimeEnv->pCtx[0].tag.nType;
      if (tagType == TSDB_DATA_TYPE_BINARY || tagType == TSDB_DATA_TYPE_NCHAR) {
        qDebug("QInfo:%p set tag value for join comparison, colId:%" PRId64 ", val:%s", pQInfo,
               pExprInfo->base.arg->argValue.i64, pRuntimeEnv->pCtx[0].tag.pz);
      } else {
        qDebug("QInfo:%p set tag value for join comparison, colId:%" PRId64 ", val:%" PRId64, pQInfo,
               pExprInfo->base.arg->argValue.i64, pRuntimeEnv->pCtx[0].tag.i64Key);
      }
3036 3037 3038 3039
    }
  }
}

H
Haojun Liao 已提交
3040
static void doMerge(SQueryRuntimeEnv *pRuntimeEnv, int64_t timestamp, SResultRow *pWindowRes, bool mergeFlag) {
3041 3042
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
3043

3044
  tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes->pageId);
H
Haojun Liao 已提交
3045

3046
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
3047
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
3048 3049 3050
    if (!mergeFlag) {
      pCtx[i].aOutputBuf = pCtx[i].aOutputBuf + pCtx[i].outputBytes;
      pCtx[i].currentStage = FIRST_STAGE_MERGE;
3051

3052
      RESET_RESULT_INFO(pCtx[i].resultInfo);
3053 3054
      aAggs[functionId].init(&pCtx[i]);
    }
3055

3056 3057
    pCtx[i].hasNull = true;
    pCtx[i].nStartQueryTimestamp = timestamp;
H
Haojun Liao 已提交
3058
    pCtx[i].aInputElemBuf = getPosInResultPage(pRuntimeEnv, i, pWindowRes, page);
3059

3060 3061 3062
    // in case of tag column, the tag information should be extracted from input buffer
    if (functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TAG) {
      tVariantDestroy(&pCtx[i].tag);
3063

3064 3065 3066 3067 3068 3069
      int32_t type = pCtx[i].outputType;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        tVariantCreateFromBinary(&pCtx[i].tag, varDataVal(pCtx[i].aInputElemBuf), varDataLen(pCtx[i].aInputElemBuf), type);
      } else {
        tVariantCreateFromBinary(&pCtx[i].tag, pCtx[i].aInputElemBuf, pCtx[i].inputBytes, pCtx[i].inputType);
      }
3070

3071 3072
    }
  }
3073

3074
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
3075
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
3076 3077 3078
    if (functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
3079

3080 3081 3082 3083
    aAggs[functionId].distMergeFunc(&pCtx[i]);
  }
}

3084
static UNUSED_FUNC void printBinaryData(int32_t functionId, char *data, int32_t srcDataType) {
3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_LAST_DST) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_BINARY:
        printf("%" PRId64 ",%s\t", *(TSKEY *)data, (data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int8_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int16_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 ",%" PRId64 "\t", *(TSKEY *)data, *(TSKEY *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int32_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%" PRId64 ",%f\t", *(TSKEY *)data, *(float *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%" PRId64 ",%lf\t", *(TSKEY *)data, *(double *)(data + TSDB_KEYSIZE + 1));
        break;
    }
  } else if (functionId == TSDB_FUNC_AVG) {
    printf("%lf,%d\t", *(double *)data, *(int32_t *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_SPREAD) {
    printf("%lf,%lf\t", *(double *)data, *(double *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_TWA) {
    data += 1;
    printf("%lf,%" PRId64 ",%" PRId64 ",%" PRId64 "\t", *(double *)data, *(int64_t *)(data + 8),
           *(int64_t *)(data + 16), *(int64_t *)(data + 24));
  } else if (functionId == TSDB_FUNC_MIN || functionId == TSDB_FUNC_MAX) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%d\t", *(int8_t *)data);
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%d\t", *(int16_t *)data);
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 "\t", *(int64_t *)data);
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%d\t", *(int *)data);
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%f\t", *(float *)data);
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%f\t", *(float *)data);
        break;
    }
  } else if (functionId == TSDB_FUNC_SUM) {
    if (srcDataType == TSDB_DATA_TYPE_FLOAT || srcDataType == TSDB_DATA_TYPE_DOUBLE) {
      printf("%lf\t", *(float *)data);
    } else {
      printf("%" PRId64 "\t", *(int64_t *)data);
    }
  } else {
    printf("%s\t", data);
  }
}

3153
void UNUSED_FUNC displayInterResult(tFilePage **pdata, SQueryRuntimeEnv* pRuntimeEnv, int32_t numOfRows) {
3154
  SQuery* pQuery = pRuntimeEnv->pQuery;
3155
  int32_t numOfCols = pQuery->numOfOutput;
3156
  printf("super table query intermediate result, total:%d\n", numOfRows);
3157

3158 3159
  for (int32_t j = 0; j < numOfRows; ++j) {
    for (int32_t i = 0; i < numOfCols; ++i) {
3160

H
Haojun Liao 已提交
3161
      switch (pQuery->pExpr1[i].type) {
3162
        case TSDB_DATA_TYPE_BINARY: {
H
Haojun Liao 已提交
3163 3164
          int32_t type = pQuery->pExpr1[i].type;
          printBinaryData(pQuery->pExpr1[i].base.functionId, pdata[i]->data + pQuery->pExpr1[i].bytes * j,
3165 3166 3167 3168 3169
                          type);
          break;
        }
        case TSDB_DATA_TYPE_TIMESTAMP:
        case TSDB_DATA_TYPE_BIGINT:
H
Haojun Liao 已提交
3170
          printf("%" PRId64 "\t", *(int64_t *)(pdata[i]->data + pQuery->pExpr1[i].bytes * j));
3171 3172
          break;
        case TSDB_DATA_TYPE_INT:
H
Haojun Liao 已提交
3173
          printf("%d\t", *(int32_t *)(pdata[i]->data + pQuery->pExpr1[i].bytes * j));
3174 3175
          break;
        case TSDB_DATA_TYPE_FLOAT:
H
Haojun Liao 已提交
3176
          printf("%f\t", *(float *)(pdata[i]->data + pQuery->pExpr1[i].bytes * j));
3177 3178
          break;
        case TSDB_DATA_TYPE_DOUBLE:
H
Haojun Liao 已提交
3179
          printf("%lf\t", *(double *)(pdata[i]->data + pQuery->pExpr1[i].bytes * j));
3180 3181 3182 3183 3184 3185 3186 3187
          break;
      }
    }
    printf("\n");
  }
}

typedef struct SCompSupporter {
H
hjxilinx 已提交
3188 3189 3190
  STableQueryInfo **pTableQueryInfo;
  int32_t *         position;
  SQInfo *          pQInfo;
3191 3192 3193 3194 3195
} SCompSupporter;

int32_t tableResultComparFn(const void *pLeft, const void *pRight, void *param) {
  int32_t left = *(int32_t *)pLeft;
  int32_t right = *(int32_t *)pRight;
3196

3197 3198
  SCompSupporter *  supporter = (SCompSupporter *)param;
  SQueryRuntimeEnv *pRuntimeEnv = &supporter->pQInfo->runtimeEnv;
3199

3200 3201
  int32_t leftPos = supporter->position[left];
  int32_t rightPos = supporter->position[right];
3202

3203 3204 3205 3206
  /* left source is exhausted */
  if (leftPos == -1) {
    return 1;
  }
3207

3208 3209 3210 3211
  /* right source is exhausted*/
  if (rightPos == -1) {
    return -1;
  }
3212

H
Haojun Liao 已提交
3213
  SResultRowInfo *pWindowResInfo1 = &supporter->pTableQueryInfo[left]->windowResInfo;
3214
  SResultRow * pWindowRes1 = getResultRow(pWindowResInfo1, leftPos);
3215
  tFilePage *page1 = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes1->pageId);
3216

H
Haojun Liao 已提交
3217
  char *b1 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes1, page1);
3218
  TSKEY leftTimestamp = GET_INT64_VAL(b1);
3219

H
Haojun Liao 已提交
3220
  SResultRowInfo *pWindowResInfo2 = &supporter->pTableQueryInfo[right]->windowResInfo;
3221
  SResultRow * pWindowRes2 = getResultRow(pWindowResInfo2, rightPos);
3222
  tFilePage *page2 = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes2->pageId);
3223

H
Haojun Liao 已提交
3224
  char *b2 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes2, page2);
3225
  TSKEY rightTimestamp = GET_INT64_VAL(b2);
3226

3227 3228 3229
  if (leftTimestamp == rightTimestamp) {
    return 0;
  }
3230

3231 3232 3233
  return leftTimestamp > rightTimestamp ? 1 : -1;
}

3234
int32_t mergeIntoGroupResult(SQInfo *pQInfo) {
H
Haojun Liao 已提交
3235
  int64_t st = taosGetTimestampUs();
3236
  int32_t ret = TSDB_CODE_SUCCESS;
3237

S
TD-1057  
Shengliang Guan 已提交
3238
  int32_t numOfGroups = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
3239

3240
  while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
3241
    SArray *group = GET_TABLEGROUP(pQInfo, pQInfo->groupIndex);
3242
    ret = mergeIntoGroupResultImpl(pQInfo, group);
3243 3244 3245 3246
    if (ret < 0) {  // not enough disk space to save the data into disk
      return -1;
    }

3247
    pQInfo->groupIndex += 1;
3248 3249

    // this group generates at least one result, return results
3250 3251 3252
    if (ret > 0) {
      break;
    }
3253

H
Haojun Liao 已提交
3254
    assert(pQInfo->groupResInfo.numOfDataPages == 0);
3255
    qDebug("QInfo:%p no result in group %d, continue", pQInfo, pQInfo->groupIndex - 1);
3256
  }
3257

H
Haojun Liao 已提交
3258
  SGroupResInfo* info = &pQInfo->groupResInfo;
3259
  if (pQInfo->groupIndex == numOfGroups && info->pageId == info->numOfDataPages) {
H
Haojun Liao 已提交
3260 3261 3262
    SET_STABLE_QUERY_OVER(pQInfo);
  }

H
Haojun Liao 已提交
3263 3264 3265
  int64_t elapsedTime = taosGetTimestampUs() - st;
  qDebug("QInfo:%p merge res data into group, index:%d, total group:%d, elapsed time:%" PRId64 "us", pQInfo,
         pQInfo->groupIndex - 1, numOfGroups, elapsedTime);
3266

H
Haojun Liao 已提交
3267
  pQInfo->runtimeEnv.summary.firstStageMergeTime += elapsedTime;
3268 3269 3270 3271
  return TSDB_CODE_SUCCESS;
}

void copyResToQueryResultBuf(SQInfo *pQInfo, SQuery *pQuery) {
H
Haojun Liao 已提交
3272 3273 3274
  SGroupResInfo* pGroupResInfo = &pQInfo->groupResInfo;

  // all results have been return to client, try next group
3275
  if (pGroupResInfo->pageId == pGroupResInfo->numOfDataPages) {
H
Haojun Liao 已提交
3276
    pGroupResInfo->numOfDataPages = 0;
3277 3278
    pGroupResInfo->pageId = 0;
    pGroupResInfo->rowId = 0;
3279

3280
    // current results of group has been sent to client, try next group
3281
    if (mergeIntoGroupResult(pQInfo) != TSDB_CODE_SUCCESS) {
3282 3283
      return;  // failed to save data in the disk
    }
3284

3285
    // check if all results has been sent to client
S
TD-1057  
Shengliang Guan 已提交
3286
    int32_t numOfGroup = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
H
Haojun Liao 已提交
3287
    if (pGroupResInfo->numOfDataPages == 0 && pQInfo->groupIndex == numOfGroup) {
H
Haojun Liao 已提交
3288
      SET_STABLE_QUERY_OVER(pQInfo);
3289 3290
      return;
    }
3291
  }
3292 3293

  SQueryRuntimeEnv *   pRuntimeEnv = &pQInfo->runtimeEnv;
3294
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
3295

H
Haojun Liao 已提交
3296 3297
  int32_t id = pQInfo->groupResInfo.groupId;
  SIDList list = getDataBufPagesIdList(pResultBuf, id);
3298

3299
  int32_t offset = 0;
H
Haojun Liao 已提交
3300 3301 3302 3303 3304 3305
  int32_t numOfCopiedRows = 0;

  size_t size = taosArrayGetSize(list);
  assert(size == pGroupResInfo->numOfDataPages);

  bool done = false;
H
Haojun Liao 已提交
3306 3307 3308 3309

  //TODO add API for release none-dirty pages
//  SPageInfo* prev = NULL;

3310
  for (int32_t j = pGroupResInfo->pageId; j < size; ++j) {
H
Haojun Liao 已提交
3311
    SPageInfo* pi = *(SPageInfo**) taosArrayGet(list, j);
H
Haojun Liao 已提交
3312 3313
    tFilePage* pData = getResBufPage(pResultBuf, pi->pageId);

H
Haojun Liao 已提交
3314 3315 3316 3317 3318 3319 3320 3321 3322 3323
    // release previous buffer pages
//    if (prev == NULL) {
//      prev = pi;
//    } else {
//      if (prev->pageId != pi->pageId) {
//        releaseResBufPageInfo(pResultBuf, prev);
//        prev = pi;
//      }
//    }

3324 3325
    assert(pData->num > 0 && pData->num <= pRuntimeEnv->numOfRowsPerPage && pGroupResInfo->rowId < pData->num);
    int32_t numOfRes = (int32_t)(pData->num - pGroupResInfo->rowId);
H
Haojun Liao 已提交
3326 3327

    if (numOfRes > pQuery->rec.capacity - offset) {
3328
      numOfCopiedRows = (int32_t)(pQuery->rec.capacity - offset);
3329
      pGroupResInfo->rowId += numOfCopiedRows;
H
Haojun Liao 已提交
3330 3331
      done = true;
    } else {
3332
      numOfCopiedRows = (int32_t)pData->num;
H
Haojun Liao 已提交
3333

3334 3335
      pGroupResInfo->pageId += 1;
      pGroupResInfo->rowId = 0;
H
Haojun Liao 已提交
3336
    }
3337

3338
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3339
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
3340
      char *  pDest = pQuery->sdata[i]->data;
3341

H
Haojun Liao 已提交
3342 3343
      memcpy(pDest + offset * bytes, pData->data + pRuntimeEnv->offset[i] * pRuntimeEnv->numOfRowsPerPage,
          (size_t)bytes * numOfCopiedRows);
3344
    }
3345

H
Haojun Liao 已提交
3346 3347 3348 3349
    offset += numOfCopiedRows;
    if (done) {
      break;
    }
3350
  }
3351

3352
  assert(pQuery->rec.rows == 0);
H
Haojun Liao 已提交
3353
  pQuery->rec.rows += offset;
3354 3355
}

3356 3357 3358
int64_t getNumOfResultWindowRes(SQueryRuntimeEnv* pRuntimeEnv, SResultRow *pResultRow) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

3359
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
3360
    int32_t functionId = pQuery->pExpr1[j].base.functionId;
3361

3362 3363 3364 3365 3366 3367 3368
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
      continue;
    }
3369

3370
    SResultRowCellInfo *pResultInfo = getResultCell(pRuntimeEnv, pResultRow, j);
H
Haojun Liao 已提交
3371
    assert(pResultInfo != NULL);
3372

H
Haojun Liao 已提交
3373 3374
    if (pResultInfo->numOfRes > 0) {
      return pResultInfo->numOfRes;
3375 3376
    }
  }
3377

H
Haojun Liao 已提交
3378
  return 0;
3379 3380
}

3381
int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *pGroup) {
3382
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
3383
  SQuery *          pQuery = pRuntimeEnv->pQuery;
3384

3385
  size_t size = taosArrayGetSize(pGroup);
3386
  tFilePage **buffer = pQuery->sdata;
3387

H
Haojun Liao 已提交
3388
  int32_t *posList = calloc(size, sizeof(int32_t));
H
hjxilinx 已提交
3389
  STableQueryInfo **pTableList = malloc(POINTER_BYTES * size);
3390

3391
  if (pTableList == NULL || posList == NULL) {
S
TD-1848  
Shengliang Guan 已提交
3392 3393
    tfree(posList);
    tfree(pTableList);
3394 3395

    qError("QInfo:%p failed alloc memory", pQInfo);
H
Haojun Liao 已提交
3396
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
3397 3398
  }

3399
  // todo opt for the case of one table per group
3400
  int32_t numOfTables = 0;
H
Haojun Liao 已提交
3401 3402 3403
  SIDList pageList = NULL;
  int32_t tid = -1;

3404
  for (int32_t i = 0; i < size; ++i) {
3405
    STableQueryInfo *item = taosArrayGetP(pGroup, i);
3406

H
Haojun Liao 已提交
3407
    SIDList list = getDataBufPagesIdList(pRuntimeEnv->pResultBuf, TSDB_TABLEID(item->pTable)->tid);
H
Haojun Liao 已提交
3408
    if (taosArrayGetSize(list) > 0 && item->windowResInfo.size > 0) {
H
Haojun Liao 已提交
3409
      pTableList[numOfTables++] = item;
3410 3411
      tid = TSDB_TABLEID(item->pTable)->tid;
      pageList = list;
3412 3413
    }
  }
3414

H
Haojun Liao 已提交
3415
  // there is no data in current group
3416
  if (numOfTables == 0) {
S
TD-1848  
Shengliang Guan 已提交
3417 3418
    tfree(posList);
    tfree(pTableList);
3419
    return 0;
H
Haojun Liao 已提交
3420
  } else if (numOfTables == 1) { // no need to merge results since only one table in each group
S
TD-1848  
Shengliang Guan 已提交
3421 3422
    tfree(posList);
    tfree(pTableList);
H
Haojun Liao 已提交
3423 3424 3425

    SGroupResInfo* pGroupResInfo = &pQInfo->groupResInfo;

3426
    pGroupResInfo->numOfDataPages = (int32_t)taosArrayGetSize(pageList);
H
Haojun Liao 已提交
3427
    pGroupResInfo->groupId = tid;
3428 3429
    pGroupResInfo->pageId = 0;
    pGroupResInfo->rowId = 0;
H
Haojun Liao 已提交
3430 3431

    return pGroupResInfo->numOfDataPages;
3432
  }
3433

3434
  SCompSupporter cs = {pTableList, posList, pQInfo};
3435

3436
  SLoserTreeInfo *pTree = NULL;
3437
  tLoserTreeCreate(&pTree, numOfTables, &cs, tableResultComparFn);
3438

3439
  SResultRow* pRow = getNewResultRow(pRuntimeEnv->pool);
H
Haojun Liao 已提交
3440
  resetMergeResultBuf(pRuntimeEnv, pRuntimeEnv->pCtx, pRow);
3441

H
Haojun Liao 已提交
3442
  pQInfo->groupResInfo.groupId = getMergeResultGroupId(pQInfo->groupIndex);
H
Haojun Liao 已提交
3443

H
Haojun Liao 已提交
3444
  // todo add windowRes iterator
3445 3446
  int64_t lastTimestamp = -1;
  int64_t startt = taosGetTimestampMs();
3447

3448
  while (1) {
3449
    if (isQueryKilled(pQInfo)) {
3450
      qDebug("QInfo:%p it is already killed, abort", pQInfo);
H
Haojun Liao 已提交
3451

S
TD-1848  
Shengliang Guan 已提交
3452 3453 3454
      tfree(pTableList);
      tfree(posList);
      tfree(pTree);
3455 3456 3457
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
    }

3458
    int32_t pos = pTree->pNode[0].index;
3459

H
Haojun Liao 已提交
3460
    SResultRowInfo *pWindowResInfo = &pTableList[pos]->windowResInfo;
3461
    SResultRow  *pWindowRes = getResultRow(pWindowResInfo, cs.position[pos]);
3462
    tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes->pageId);
3463

H
Haojun Liao 已提交
3464
    char *b = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes, page);
3465
    TSKEY ts = GET_INT64_VAL(b);
3466

3467
    assert(ts == pWindowRes->win.skey);
3468
    int64_t num = getNumOfResultWindowRes(pRuntimeEnv, pWindowRes);
3469 3470
    if (num <= 0) {
      cs.position[pos] += 1;
3471

3472 3473
      if (cs.position[pos] >= pWindowResInfo->size) {
        cs.position[pos] = -1;
3474

3475
        // all input sources are exhausted
3476
        if (--numOfTables == 0) {
3477 3478 3479 3480 3481 3482 3483
          break;
        }
      }
    } else {
      if (ts == lastTimestamp) {  // merge with the last one
        doMerge(pRuntimeEnv, ts, pWindowRes, true);
      } else {  // copy data to disk buffer
3484
        if (buffer[0]->num == pQuery->rec.capacity) {
H
Haojun Liao 已提交
3485
          if (flushFromResultBuf(pRuntimeEnv, &pQInfo->groupResInfo) != TSDB_CODE_SUCCESS) {
3486 3487
            return -1;
          }
3488

H
Haojun Liao 已提交
3489
          resetMergeResultBuf(pRuntimeEnv, pRuntimeEnv->pCtx, pRow);
3490
        }
3491

3492
        doMerge(pRuntimeEnv, ts, pWindowRes, false);
3493
        buffer[0]->num += 1;
3494
      }
3495

3496
      lastTimestamp = ts;
3497

H
Haojun Liao 已提交
3498
      // move to the next element of current entry
3499
      int32_t currentPageId = pWindowRes->pageId;
H
Haojun Liao 已提交
3500

3501 3502 3503
      cs.position[pos] += 1;
      if (cs.position[pos] >= pWindowResInfo->size) {
        cs.position[pos] = -1;
3504

3505
        // all input sources are exhausted
3506
        if (--numOfTables == 0) {
3507 3508
          break;
        }
H
Haojun Liao 已提交
3509 3510
      } else {
        // current page is not needed anymore
3511
        SResultRow  *pNextWindowRes = getResultRow(pWindowResInfo, cs.position[pos]);
3512
        if (pNextWindowRes->pageId != currentPageId) {
H
Haojun Liao 已提交
3513 3514
          releaseResBufPage(pRuntimeEnv->pResultBuf, page);
        }
3515 3516
      }
    }
3517

3518 3519
    tLoserTreeAdjust(pTree, pos + pTree->numOfEntries);
  }
3520

3521
  if (buffer[0]->num != 0) {  // there are data in buffer
H
Haojun Liao 已提交
3522
    if (flushFromResultBuf(pRuntimeEnv, &pQInfo->groupResInfo) != TSDB_CODE_SUCCESS) {
S
slguan 已提交
3523
      qError("QInfo:%p failed to flush data into temp file, abort query", pQInfo);
3524

S
TD-1848  
Shengliang Guan 已提交
3525 3526 3527
      tfree(pTree);
      tfree(pTableList);
      tfree(posList);
3528 3529 3530
      return -1;
    }
  }
3531

3532 3533 3534
  int64_t endt = taosGetTimestampMs();

#ifdef _DEBUG_VIEW
3535
  displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
3536
#endif
3537

3538
  qDebug("QInfo:%p result merge completed for group:%d, elapsed time:%" PRId64 " ms", pQInfo, pQInfo->groupIndex, endt - startt);
3539

S
TD-1848  
Shengliang Guan 已提交
3540 3541 3542
  tfree(pTableList);
  tfree(posList);
  tfree(pTree);
3543

S
TD-1848  
Shengliang Guan 已提交
3544 3545
//  tfree(pResultInfo);
//  tfree(buf);
H
Haojun Liao 已提交
3546 3547

  return pQInfo->groupResInfo.numOfDataPages;
3548 3549
}

H
Haojun Liao 已提交
3550 3551
int32_t flushFromResultBuf(SQueryRuntimeEnv* pRuntimeEnv, SGroupResInfo* pGroupResInfo) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3552

3553
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
3554

3555 3556
  // the base value for group result, since the maximum number of table for each vnode will not exceed 100,000.
  int32_t pageId = -1;
3557
  int32_t capacity = pResultBuf->numOfRowsPerPage;
3558

H
Haojun Liao 已提交
3559
  int32_t remain = (int32_t) pQuery->sdata[0]->num;
3560
  int32_t offset = 0;
3561

3562
  while (remain > 0) {
H
Haojun Liao 已提交
3563 3564
    int32_t rows = (remain > capacity)? capacity:remain;
    assert(rows > 0);
3565

H
Haojun Liao 已提交
3566 3567 3568
    // get the output buffer page
    tFilePage *buf = getNewDataBuf(pResultBuf, pGroupResInfo->groupId, &pageId);
    buf->num = rows;
3569

H
Haojun Liao 已提交
3570
    // pagewisely copy to dest buffer
3571
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3572
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
3573

H
Haojun Liao 已提交
3574 3575
      char* output = buf->data + pRuntimeEnv->offset[i] * pRuntimeEnv->numOfRowsPerPage;
      char* src = ((char *) pQuery->sdata[i]->data) + offset * bytes;
3576
      memcpy(output, src, (size_t)(buf->num * bytes));
3577
    }
3578

H
Haojun Liao 已提交
3579 3580 3581 3582
    offset += rows;
    remain -= rows;

    pGroupResInfo->numOfDataPages += 1;
3583
  }
3584

3585 3586 3587
  return TSDB_CODE_SUCCESS;
}

H
Haojun Liao 已提交
3588 3589 3590
void resetMergeResultBuf(SQueryRuntimeEnv* pRuntimeEnv, SQLFunctionCtx *pCtx, SResultRow *pRow) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

3591
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
3592
    pCtx[k].aOutputBuf = pQuery->sdata[k]->data - pCtx[k].outputBytes;
3593 3594
    pCtx[k].size = 1;
    pCtx[k].startOffset = 0;
H
Haojun Liao 已提交
3595
    pCtx[k].resultInfo = getResultCell(pRuntimeEnv, pRow, k);
3596

3597
    pQuery->sdata[k]->num = 0;
3598 3599 3600
  }
}

3601 3602 3603 3604
static void updateTableQueryInfoForReverseScan(SQuery *pQuery, STableQueryInfo *pTableQueryInfo) {
  if (pTableQueryInfo == NULL) {
    return;
  }
3605

H
Haojun Liao 已提交
3606
  // order has changed already
3607
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
3608 3609
  if (pTableQueryInfo->lastKey == pTableQueryInfo->win.skey) {
    // do nothing, no results
H
Haojun Liao 已提交
3610
  } else {// NOTE: even win.skey != lastKey, the results may not generated.
H
Haojun Liao 已提交
3611 3612 3613
    pTableQueryInfo->win.ekey = pTableQueryInfo->lastKey + step;
  }

3614 3615
  SWAP(pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, TSKEY);
  pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
3616

3617 3618
  SWITCH_ORDER(pTableQueryInfo->cur.order);
  pTableQueryInfo->cur.vgroupIndex = -1;
H
Haojun Liao 已提交
3619 3620 3621

  // set the index at the end of time window
  pTableQueryInfo->windowResInfo.curIndex = pTableQueryInfo->windowResInfo.size - 1;
3622 3623
}

H
Haojun Liao 已提交
3624
static void disableFuncInReverseScanImpl(SQueryRuntimeEnv* pRuntimeEnv, SResultRowInfo *pWindowResInfo, int32_t order) {
3625
  SQuery* pQuery = pRuntimeEnv->pQuery;
3626

3627
  for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
3628
    bool closed = getResultRowStatus(pWindowResInfo, i);
3629
    if (!closed) {
3630 3631
      continue;
    }
3632

3633
    SResultRow *pRow = getResultRow(pWindowResInfo, i);
3634

3635
    // open/close the specified query for each group result
3636
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
3637
      int32_t functId = pQuery->pExpr1[j].base.functionId;
3638
      SResultRowCellInfo* pInfo = getResultCell(pRuntimeEnv, pRow, j);
3639

3640 3641
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
3642
        pInfo->complete = false;
3643
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
3644
        pInfo->complete = true;
3645 3646 3647 3648 3649
      }
    }
  }
}

3650 3651
void disableFuncInReverseScan(SQInfo *pQInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
3652
  SQuery *pQuery = pRuntimeEnv->pQuery;
3653
  int32_t order = pQuery->order.order;
3654

3655
  // group by normal columns and interval query on normal table
H
Haojun Liao 已提交
3656
  SResultRowInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
3657
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3658
    disableFuncInReverseScanImpl(pRuntimeEnv, pWindowResInfo, order);
3659
  } else {  // for simple result of table query,
3660
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {  // todo refactor
H
Haojun Liao 已提交
3661
      int32_t functId = pQuery->pExpr1[j].base.functionId;
3662

3663
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[j];
3664 3665 3666
      if (pCtx->resultInfo == NULL) {
        continue; // resultInfo is NULL, means no data checked in previous scan
      }
3667

3668 3669
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
3670 3671 3672 3673 3674 3675
        pCtx->resultInfo->complete = false;
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
        pCtx->resultInfo->complete = true;
      }
    }
  }
H
Haojun Liao 已提交
3676 3677 3678 3679
}

static void setupQueryRangeForReverseScan(SQInfo* pQInfo) {
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
S
Shengliang Guan 已提交
3680
  int32_t numOfGroups = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
H
Haojun Liao 已提交
3681

H
hjxilinx 已提交
3682
  for(int32_t i = 0; i < numOfGroups; ++i) {
H
Haojun Liao 已提交
3683
    SArray *group = GET_TABLEGROUP(pQInfo, i);
H
Haojun Liao 已提交
3684 3685
    SArray *tableKeyGroup = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, i);

H
hjxilinx 已提交
3686 3687
    size_t t = taosArrayGetSize(group);
    for (int32_t j = 0; j < t; ++j) {
3688 3689
      STableQueryInfo *pCheckInfo = taosArrayGetP(group, j);
      updateTableQueryInfoForReverseScan(pQuery, pCheckInfo);
H
Haojun Liao 已提交
3690

H
Haojun Liao 已提交
3691 3692
      // update the last key in tableKeyInfo list, the tableKeyInfo is used to build the tsdbQueryHandle and decide
      // the start check timestamp of tsdbQueryHandle
H
Haojun Liao 已提交
3693 3694 3695 3696
      STableKeyInfo *pTableKeyInfo = taosArrayGet(tableKeyGroup, j);
      pTableKeyInfo->lastKey = pCheckInfo->lastKey;

      assert(pCheckInfo->pTable == pTableKeyInfo->pTable);
H
hjxilinx 已提交
3697 3698
    }
  }
3699 3700
}

3701
void switchCtxOrder(SQueryRuntimeEnv *pRuntimeEnv) {
3702
  SQuery *pQuery = pRuntimeEnv->pQuery;
3703
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
3704
    SWITCH_ORDER(pRuntimeEnv->pCtx[i].order);
3705 3706 3707
  }
}

3708
int32_t initResultRow(SResultRow *pResultRow) {
H
Haojun Liao 已提交
3709
  pResultRow->pCellInfo = (SResultRowCellInfo*)((char*)pResultRow + sizeof(SResultRow));
3710 3711
  pResultRow->pageId = -1;
  pResultRow->rowId = -1;
B
Bomin Zhang 已提交
3712
  return TSDB_CODE_SUCCESS;
3713 3714
}

H
Haojun Liao 已提交
3715
void resetDefaultResInfoOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
3716
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
3717

H
Haojun Liao 已提交
3718 3719 3720
  int32_t tid = 0;
  int64_t uid = getResultInfoUId(pRuntimeEnv);
  SResultRow* pRow = doPrepareResultRowFromKey(pRuntimeEnv, &pRuntimeEnv->windowResInfo, (char *)&tid, sizeof(tid), true, uid);
3721

3722
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3723 3724
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
    pCtx->aOutputBuf = pQuery->sdata[i]->data;
3725

3726 3727 3728 3729
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT/TAGPRJ/PRJ/TAG etc.
     */
3730 3731 3732
    SResultRowCellInfo* pCellInfo = getResultCell(pRuntimeEnv, pRow, i);
    RESET_RESULT_INFO(pCellInfo);
    pCtx->resultInfo = pCellInfo;
3733

3734
    // set the timestamp output buffer for top/bottom/diff query
H
Haojun Liao 已提交
3735
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
3736 3737 3738
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3739

H
Haojun Liao 已提交
3740
    memset(pQuery->sdata[i]->data, 0, (size_t)(pQuery->pExpr1[i].bytes * pQuery->rec.capacity));
3741
  }
3742

3743 3744 3745 3746 3747
  initCtxOutputBuf(pRuntimeEnv);
}

void forwardCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, int64_t output) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3748

3749
  // reset the execution contexts
3750
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
3751
    int32_t functionId = pQuery->pExpr1[j].base.functionId;
3752
    assert(functionId != TSDB_FUNC_DIFF);
3753

3754 3755 3756 3757
    // set next output position
    if (IS_OUTER_FORWARD(aAggs[functionId].nStatus)) {
      pRuntimeEnv->pCtx[j].aOutputBuf += pRuntimeEnv->pCtx[j].outputBytes * output;
    }
3758

3759 3760 3761 3762 3763 3764 3765 3766
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      /*
       * NOTE: for top/bottom query, the value of first column of output (timestamp) are assigned
       * in the procedure of top/bottom routine
       * the output buffer in top/bottom routine is ptsOutputBuf, so we need to forward the output buffer
       *
       * diff function is handled in multi-output function
       */
S
TD-1057  
Shengliang Guan 已提交
3767
      pRuntimeEnv->pCtx[j].ptsOutputBuf = (char*)pRuntimeEnv->pCtx[j].ptsOutputBuf + TSDB_KEYSIZE * output;
3768
    }
3769

3770
    RESET_RESULT_INFO(pRuntimeEnv->pCtx[j].resultInfo);
3771 3772 3773 3774 3775
  }
}

void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3776

3777
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
3778
    int32_t functionId = pQuery->pExpr1[j].base.functionId;
3779
    pRuntimeEnv->pCtx[j].currentStage = 0;
3780

H
Haojun Liao 已提交
3781
    SResultRowCellInfo* pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
H
Haojun Liao 已提交
3782 3783 3784
    if (pResInfo->initialized) {
      continue;
    }
3785

3786 3787 3788 3789
    aAggs[functionId].init(&pRuntimeEnv->pCtx[j]);
  }
}

3790
void skipResults(SQueryRuntimeEnv *pRuntimeEnv) {
3791
  SQuery *pQuery = pRuntimeEnv->pQuery;
3792
  if (pQuery->rec.rows == 0 || pQuery->limit.offset == 0) {
3793 3794
    return;
  }
3795

3796
  if (pQuery->rec.rows <= pQuery->limit.offset) {
3797
    qDebug("QInfo:%p skip rows:%" PRId64 ", new offset:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), pQuery->rec.rows,
3798
        pQuery->limit.offset - pQuery->rec.rows);
3799

3800 3801
    pQuery->limit.offset -= pQuery->rec.rows;
    pQuery->rec.rows = 0;
3802

H
Haojun Liao 已提交
3803
    resetDefaultResInfoOutputBuf(pRuntimeEnv);
3804

H
Haojun Liao 已提交
3805
    // clear the buffer full flag if exists
3806
    CLEAR_QUERY_STATUS(pQuery, QUERY_RESBUF_FULL);
3807
  } else {
3808
    int64_t numOfSkip = pQuery->limit.offset;
3809
    pQuery->rec.rows -= numOfSkip;
3810
    pQuery->limit.offset = 0;
3811

3812
    qDebug("QInfo:%p skip row:%"PRId64", new offset:%d, numOfRows remain:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), numOfSkip,
3813
           0, pQuery->rec.rows);
3814

3815
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
3816
      int32_t functionId = pQuery->pExpr1[i].base.functionId;
3817
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
H
Haojun Liao 已提交
3818

3819
      memmove(pQuery->sdata[i]->data, (char*)pQuery->sdata[i]->data + bytes * numOfSkip, (size_t)(pQuery->rec.rows * bytes));
H
Haojun Liao 已提交
3820
      pRuntimeEnv->pCtx[i].aOutputBuf = ((char*) pQuery->sdata[i]->data) + pQuery->rec.rows * bytes;
3821

3822
      if (functionId == TSDB_FUNC_DIFF || functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
3823
        pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
3824 3825
      }
    }
3826

S
TD-1057  
Shengliang Guan 已提交
3827
    updateNumOfResult(pRuntimeEnv, (int32_t)pQuery->rec.rows);
3828 3829 3830 3831 3832 3833 3834 3835
  }
}

void setQueryStatus(SQuery *pQuery, int8_t status) {
  if (status == QUERY_NOT_COMPLETED) {
    pQuery->status = status;
  } else {
    // QUERY_NOT_COMPLETED is not compatible with any other status, so clear its position first
3836
    CLEAR_QUERY_STATUS(pQuery, QUERY_NOT_COMPLETED);
3837 3838 3839 3840 3841 3842
    pQuery->status |= status;
  }
}

bool needScanDataBlocksAgain(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3843

H
hjxilinx 已提交
3844
  bool toContinue = false;
H
Haojun Liao 已提交
3845
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3846
    // for each group result, call the finalize function for each column
H
Haojun Liao 已提交
3847
    SResultRowInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
3848

3849
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
3850
      SResultRow *pResult = getResultRow(pWindowResInfo, i);
3851
      if (!pResult->closed) {
3852 3853
        continue;
      }
3854

3855
      setResultOutputBuf(pRuntimeEnv, pResult);
3856

3857
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
3858
        int16_t functId = pQuery->pExpr1[j].base.functionId;
3859 3860 3861
        if (functId == TSDB_FUNC_TS) {
          continue;
        }
3862

3863
        aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
H
Haojun Liao 已提交
3864
        SResultRowCellInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3865

3866 3867 3868 3869
        toContinue |= (!pResInfo->complete);
      }
    }
  } else {
3870
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
3871
      int16_t functId = pQuery->pExpr1[j].base.functionId;
3872 3873 3874
      if (functId == TSDB_FUNC_TS) {
        continue;
      }
3875

3876
      aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
H
Haojun Liao 已提交
3877
      SResultRowCellInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3878

3879 3880 3881
      toContinue |= (!pResInfo->complete);
    }
  }
3882

3883 3884 3885
  return toContinue;
}

H
Haojun Liao 已提交
3886
static SQueryStatusInfo getQueryStatusInfo(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
3887
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3888
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3889

H
Haojun Liao 已提交
3890 3891
  assert((start <= pTableQueryInfo->lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (start >= pTableQueryInfo->lastKey && !QUERY_IS_ASC_QUERY(pQuery)));
3892

3893
  SQueryStatusInfo info = {
H
hjxilinx 已提交
3894
      .status      = pQuery->status,
3895
      .windowIndex = pRuntimeEnv->windowResInfo.curIndex,
H
Haojun Liao 已提交
3896
      .lastKey     = start,
3897
  };
3898

S
TD-1057  
Shengliang Guan 已提交
3899 3900 3901 3902 3903
  TIME_WINDOW_COPY(info.w, pQuery->window);
  TIME_WINDOW_COPY(info.curWindow, pTableQueryInfo->win);

  info.curWindow.skey = start;

3904 3905 3906
  return info;
}

3907 3908 3909 3910
static void setEnvBeforeReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQInfo *pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery *pQuery = pRuntimeEnv->pQuery;

3911 3912 3913
  pStatus->cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);  // save the cursor
  if (pRuntimeEnv->pTSBuf) {
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
3914 3915
    bool ret = tsBufNextPos(pRuntimeEnv->pTSBuf);
    assert(ret);
3916
  }
3917

3918
  // reverse order time range
3919 3920 3921
  pQuery->window = pStatus->curWindow;
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);

3922
  SWITCH_ORDER(pQuery->order.order);
3923 3924 3925 3926 3927 3928 3929

  if (QUERY_IS_ASC_QUERY(pQuery)) {
    assert(pQuery->window.skey <= pQuery->window.ekey);
  } else {
    assert(pQuery->window.skey >= pQuery->window.ekey);
  }

3930
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
H
Haojun Liao 已提交
3931
  STsdbQueryCond cond = createTsdbQueryCond(pQuery, &pQuery->window);
S
TD-1057  
Shengliang Guan 已提交
3932

H
Haojun Liao 已提交
3933 3934 3935 3936 3937
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
  disableFuncInReverseScan(pQInfo);
  setupQueryRangeForReverseScan(pQInfo);

3938 3939 3940 3941
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
3942

H
Haojun Liao 已提交
3943
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef);
B
Bomin Zhang 已提交
3944 3945 3946
  if (pRuntimeEnv->pSecQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
3947 3948
}

3949 3950
static void clearEnvAfterReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3951
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3952

3953 3954
  SWITCH_ORDER(pQuery->order.order);
  switchCtxOrder(pRuntimeEnv);
3955

3956 3957 3958 3959
  tsBufSetCursor(pRuntimeEnv->pTSBuf, &pStatus->cur);
  if (pRuntimeEnv->pTSBuf) {
    pRuntimeEnv->pTSBuf->cur.order = pQuery->order.order;
  }
3960

3961
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
3962

3963
  // update the pQuery->window.skey and pQuery->window.ekey to limit the scan scope of sliding query during reverse scan
H
hjxilinx 已提交
3964
  pTableQueryInfo->lastKey = pStatus->lastKey;
3965
  pQuery->status = pStatus->status;
3966

H
hjxilinx 已提交
3967
  pTableQueryInfo->win = pStatus->w;
3968
  pQuery->window = pTableQueryInfo->win;
3969 3970
}

H
Haojun Liao 已提交
3971 3972 3973 3974 3975 3976 3977
static void restoreTimeWindow(STableGroupInfo* pTableGroupInfo, STsdbQueryCond* pCond) {
  assert(pTableGroupInfo->numOfTables == 1);
  SArray* pTableKeyGroup = taosArrayGetP(pTableGroupInfo->pGroupList, 0);
  STableKeyInfo* pKeyInfo = taosArrayGet(pTableKeyGroup, 0);
  pKeyInfo->lastKey = pCond->twindow.skey;
}

3978
void scanOneTableDataBlocks(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
H
hjxilinx 已提交
3979
  SQInfo *pQInfo = (SQInfo *) GET_QINFO_ADDR(pRuntimeEnv);
3980
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3981
  STableQueryInfo *pTableQueryInfo = pQuery->current;
3982

3983
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
3984

3985
  // store the start query position
H
Haojun Liao 已提交
3986
  SQueryStatusInfo qstatus = getQueryStatusInfo(pRuntimeEnv, start);
3987

3988 3989
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
3990

3991 3992
  while (1) {
    doScanAllDataBlocks(pRuntimeEnv);
3993

3994 3995
    if (pRuntimeEnv->scanFlag == MASTER_SCAN) {
      qstatus.status = pQuery->status;
3996 3997 3998 3999

      // do nothing if no data blocks are found qualified during scan
      if (qstatus.lastKey != pTableQueryInfo->lastKey) {
        qstatus.curWindow.ekey = pTableQueryInfo->lastKey - step;
4000
      } else { // the lastkey does not increase, which means no data checked yet
H
Haojun Liao 已提交
4001
        qDebug("QInfo:%p no results generated in this scan", pQInfo);
4002
      }
H
Haojun Liao 已提交
4003 4004

      qstatus.lastKey = pTableQueryInfo->lastKey;
4005
    }
4006

4007
    if (!needScanDataBlocksAgain(pRuntimeEnv)) {
4008
      // restore the status code and jump out of loop
4009
      if (pRuntimeEnv->scanFlag == REPEAT_SCAN) {
4010
        pQuery->status = qstatus.status;
4011
      }
4012

4013 4014
      break;
    }
4015

4016 4017
    if (pRuntimeEnv->pSecQueryHandle != NULL) {
      tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
4018
    }
4019

H
Haojun Liao 已提交
4020
    STsdbQueryCond cond = createTsdbQueryCond(pQuery, &qstatus.curWindow);
H
Haojun Liao 已提交
4021
    restoreTimeWindow(&pQInfo->tableGroupInfo, &cond);
H
Haojun Liao 已提交
4022
    pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef);
B
Bomin Zhang 已提交
4023 4024 4025
    if (pRuntimeEnv->pSecQueryHandle == NULL) {
      longjmp(pRuntimeEnv->env, terrno);
    }
4026

4027
    pRuntimeEnv->windowResInfo.curIndex = qstatus.windowIndex;
4028 4029
    setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
    pRuntimeEnv->scanFlag = REPEAT_SCAN;
4030

4031
    qDebug("QInfo:%p start to repeat scan data blocks due to query func required, qrange:%"PRId64"-%"PRId64, pQInfo,
4032
        cond.twindow.skey, cond.twindow.ekey);
4033

4034
    // check if query is killed or not
4035
    if (isQueryKilled(pQInfo)) {
H
Haojun Liao 已提交
4036
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4037 4038
    }
  }
4039

H
hjxilinx 已提交
4040
  if (!needReverseScan(pQuery)) {
4041 4042
    return;
  }
4043

4044
  setEnvBeforeReverseScan(pRuntimeEnv, &qstatus);
4045

4046
  // reverse scan from current position
4047
  qDebug("QInfo:%p start to reverse scan", pQInfo);
4048
  doScanAllDataBlocks(pRuntimeEnv);
4049 4050

  clearEnvAfterReverseScan(pRuntimeEnv, &qstatus);
4051 4052
}

H
hjxilinx 已提交
4053
void finalizeQueryResult(SQueryRuntimeEnv *pRuntimeEnv) {
4054
  SQuery *pQuery = pRuntimeEnv->pQuery;
4055

H
Haojun Liao 已提交
4056
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
4057
    // for each group result, call the finalize function for each column
H
Haojun Liao 已提交
4058
    SResultRowInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
4059
    if (pRuntimeEnv->groupbyNormalCol) {
4060 4061
      closeAllTimeWindow(pWindowResInfo);
    }
4062

4063
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
H
Haojun Liao 已提交
4064
      SResultRow *buf = pWindowResInfo->pResult[i];
4065 4066 4067
      if (!isWindowResClosed(pWindowResInfo, i)) {
        continue;
      }
4068

4069
      setResultOutputBuf(pRuntimeEnv, buf);
4070

4071
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
4072
        aAggs[pQuery->pExpr1[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
4073
      }
4074

4075 4076 4077 4078
      /*
       * set the number of output results for group by normal columns, the number of output rows usually is 1 except
       * the top and bottom query
       */
S
TD-1057  
Shengliang Guan 已提交
4079
      buf->numOfRows = (uint16_t)getNumOfResult(pRuntimeEnv);
4080
    }
4081

4082
  } else {
4083
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
4084
      aAggs[pQuery->pExpr1[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
4085 4086 4087 4088 4089
    }
  }
}

static bool hasMainOutput(SQuery *pQuery) {
4090
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
4091
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
4092

4093 4094 4095 4096
    if (functionId != TSDB_FUNC_TS && functionId != TSDB_FUNC_TAG && functionId != TSDB_FUNC_TAGPRJ) {
      return true;
    }
  }
4097

4098 4099 4100
  return false;
}

H
Haojun Liao 已提交
4101
static STableQueryInfo *createTableQueryInfo(SQueryRuntimeEnv *pRuntimeEnv, void* pTable, STimeWindow win, void* buf) {
H
Haojun Liao 已提交
4102
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
4103

H
Haojun Liao 已提交
4104
  STableQueryInfo *pTableQueryInfo = buf;
4105

H
hjxilinx 已提交
4106 4107
  pTableQueryInfo->win = win;
  pTableQueryInfo->lastKey = win.skey;
4108

4109
  pTableQueryInfo->pTable = pTable;
4110
  pTableQueryInfo->cur.vgroupIndex = -1;
4111

H
Haojun Liao 已提交
4112 4113
  // set more initial size of interval/groupby query
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyNormalCol) {
H
Haojun Liao 已提交
4114 4115
    int32_t initialSize = 128;
    int32_t code = initWindowResInfo(&pTableQueryInfo->windowResInfo, initialSize, TSDB_DATA_TYPE_INT);
B
Bomin Zhang 已提交
4116 4117 4118
    if (code != TSDB_CODE_SUCCESS) {
      return NULL;
    }
H
Haojun Liao 已提交
4119
  } else { // in other aggregate query, do not initialize the windowResInfo
H
Haojun Liao 已提交
4120 4121
  }

4122 4123 4124
  return pTableQueryInfo;
}

H
Haojun Liao 已提交
4125
void destroyTableQueryInfoImpl(STableQueryInfo *pTableQueryInfo) {
4126 4127 4128
  if (pTableQueryInfo == NULL) {
    return;
  }
4129

H
Haojun Liao 已提交
4130
  tVariantDestroy(&pTableQueryInfo->tag);
H
Haojun Liao 已提交
4131
  cleanupTimeWindowInfo(&pTableQueryInfo->windowResInfo);
4132 4133 4134 4135 4136
}

/**
 * set output buffer for different group
 * @param pRuntimeEnv
4137
 * @param pDataBlockInfo
4138
 */
H
Haojun Liao 已提交
4139
void setExecutionContext(SQInfo *pQInfo, int32_t groupIndex, TSKEY nextKey) {
4140
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
Haojun Liao 已提交
4141
  STableQueryInfo  *pTableQueryInfo = pRuntimeEnv->pQuery->current;
H
Haojun Liao 已提交
4142
  SResultRowInfo   *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
4143

H
Haojun Liao 已提交
4144 4145
  // lastKey needs to be updated
  pTableQueryInfo->lastKey = nextKey;
H
Haojun Liao 已提交
4146 4147 4148 4149

  if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
    setAdditionalInfo(pQInfo, pTableQueryInfo->pTable, pTableQueryInfo);
  }
H
Haojun Liao 已提交
4150

H
Haojun Liao 已提交
4151 4152 4153
  if (pRuntimeEnv->prevGroupId != INT32_MIN && pRuntimeEnv->prevGroupId == groupIndex) {
    return;
  }
4154

H
Haojun Liao 已提交
4155
  uint64_t uid = getResultInfoUId(pRuntimeEnv);
H
Haojun Liao 已提交
4156
  SResultRow *pResultRow = doPrepareResultRowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&groupIndex,
H
Haojun Liao 已提交
4157
      sizeof(groupIndex), true, uid);
H
Haojun Liao 已提交
4158
  if (pResultRow == NULL) {
4159 4160
    return;
  }
4161

4162 4163 4164 4165
  /*
   * not assign result buffer yet, add new result buffer
   * all group belong to one result set, and each group result has different group id so set the id to be one
   */
H
Haojun Liao 已提交
4166 4167
  if (pResultRow->pageId == -1) {
    if (addNewWindowResultBuf(pResultRow, pRuntimeEnv->pResultBuf, groupIndex, pRuntimeEnv->numOfRowsPerPage) !=
4168 4169 4170 4171
        TSDB_CODE_SUCCESS) {
      return;
    }
  }
4172

H
Haojun Liao 已提交
4173 4174
  // record the current active group id
  pRuntimeEnv->prevGroupId = groupIndex;
H
Haojun Liao 已提交
4175
  setResultOutputBuf(pRuntimeEnv, pResultRow);
4176 4177 4178
  initCtxOutputBuf(pRuntimeEnv);
}

4179
void setResultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SResultRow *pResult) {
4180
  SQuery *pQuery = pRuntimeEnv->pQuery;
4181

4182
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
4183
  tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pageId);
H
Haojun Liao 已提交
4184

4185
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
4186
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
H
Haojun Liao 已提交
4187
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult, page);
4188

H
Haojun Liao 已提交
4189
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
4190 4191 4192
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
4193

4194
    /*
4195
     * set the output buffer information and intermediate buffer,
4196 4197
     * not all queries require the interResultBuf, such as COUNT
     */
4198
    pCtx->resultInfo = getResultCell(pRuntimeEnv, pResult, i);
4199 4200 4201
  }
}

H
Haojun Liao 已提交
4202
void setResultRowOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SResultRow *pResult) {
H
Haojun Liao 已提交
4203
  SQuery *pQuery = pRuntimeEnv->pQuery;
4204

H
Haojun Liao 已提交
4205
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
4206
  tFilePage* bufPage = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pageId);
H
Haojun Liao 已提交
4207

H
Haojun Liao 已提交
4208 4209 4210
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];

4211
    pCtx->resultInfo = getResultCell(pRuntimeEnv, pResult, i);
H
Haojun Liao 已提交
4212
    if (pCtx->resultInfo->initialized && pCtx->resultInfo->complete) {
H
Haojun Liao 已提交
4213 4214
      continue;
    }
4215

H
Haojun Liao 已提交
4216
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult, bufPage);
H
Haojun Liao 已提交
4217
    pCtx->currentStage = 0;
4218

H
Haojun Liao 已提交
4219 4220 4221 4222
    int32_t functionId = pCtx->functionId;
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
4223

H
Haojun Liao 已提交
4224 4225 4226 4227 4228 4229
    if (!pCtx->resultInfo->initialized) {
      aAggs[functionId].init(pCtx);
    }
  }
}

4230
int32_t setAdditionalInfo(SQInfo *pQInfo, void* pTable, STableQueryInfo *pTableQueryInfo) {
4231
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4232

4233
  setTagVal(pRuntimeEnv, pTable, pQInfo->tsdb);
4234

4235 4236
  // both the master and supplement scan needs to set the correct ts comp start position
  if (pRuntimeEnv->pTSBuf != NULL) {
H
Haojun Liao 已提交
4237 4238
    tVariant* pTag = &pRuntimeEnv->pCtx[0].tag;

H
Haojun Liao 已提交
4239 4240
    if (pTableQueryInfo->cur.vgroupIndex == -1) {
      tVariantAssign(&pTableQueryInfo->tag, pTag);
4241

H
Haojun Liao 已提交
4242 4243
      STSElem elem = tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, pQInfo->vgId, &pTableQueryInfo->tag);

H
Haojun Liao 已提交
4244
      // failed to find data with the specified tag value and vnodeId
4245
      if (!tsBufIsValidElem(&elem)) {
H
Haojun Liao 已提交
4246 4247 4248
        if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
          qError("QInfo:%p failed to find tag:%s in ts_comp", pQInfo, pTag->pz);
        } else {
H
Haojun Liao 已提交
4249
          qError("QInfo:%p failed to find tag:%" PRId64 " in ts_comp", pQInfo, pTag->i64Key);
H
Haojun Liao 已提交
4250 4251 4252 4253 4254
        }

        return false;
      }

H
Haojun Liao 已提交
4255
      // keep the cursor info of current meter
H
Haojun Liao 已提交
4256 4257
      pTableQueryInfo->cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);
      if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
H
Haojun Liao 已提交
4258
        qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex);
H
Haojun Liao 已提交
4259
      } else {
H
Haojun Liao 已提交
4260
        qDebug("QInfo:%p find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex);
H
Haojun Liao 已提交
4261 4262
      }

H
Haojun Liao 已提交
4263 4264
    } else {
      tsBufSetCursor(pRuntimeEnv->pTSBuf, &pTableQueryInfo->cur);
H
Haojun Liao 已提交
4265 4266

      if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
H
Haojun Liao 已提交
4267
        qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex);
H
Haojun Liao 已提交
4268
      } else {
H
Haojun Liao 已提交
4269
        qDebug("QInfo:%p find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex);
H
Haojun Liao 已提交
4270
      }
4271 4272
    }
  }
4273

4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285
  return 0;
}

/*
 * There are two cases to handle:
 *
 * 1. Query range is not set yet (queryRangeSet = 0). we need to set the query range info, including pQuery->lastKey,
 *    pQuery->window.skey, and pQuery->eKey.
 * 2. Query range is set and query is in progress. There may be another result with the same query ranges to be
 *    merged during merge stage. In this case, we need the pTableQueryInfo->lastResRows to decide if there
 *    is a previous result generated or not.
 */
H
hjxilinx 已提交
4286
void setIntervalQueryRange(SQInfo *pQInfo, TSKEY key) {
4287 4288
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
4289
  STableQueryInfo *pTableQueryInfo = pQuery->current;
4290

4291 4292 4293
  if (pTableQueryInfo->queryRangeSet) {
    pTableQueryInfo->lastKey = key;
  } else {
4294
    pTableQueryInfo->win.skey = key;
4295
    STimeWindow win = {.skey = key, .ekey = pQuery->window.ekey};
4296

4297 4298 4299 4300 4301
    // for too small query range, no data in this interval.
    if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey < pQuery->window.skey)) ||
        (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey < pQuery->window.ekey))) {
      return;
    }
4302

4303 4304 4305 4306 4307 4308
    /**
     * In handling the both ascending and descending order super table query, we need to find the first qualified
     * timestamp of this table, and then set the first qualified start timestamp.
     * In ascending query, key is the first qualified timestamp. However, in the descending order query, additional
     * operations involve.
     */
H
Haojun Liao 已提交
4309
    STimeWindow     w = TSWINDOW_INITIALIZER;
H
Haojun Liao 已提交
4310
    SResultRowInfo *pWindowResInfo = &pTableQueryInfo->windowResInfo;
4311

H
Haojun Liao 已提交
4312 4313
    TSKEY sk = MIN(win.skey, win.ekey);
    TSKEY ek = MAX(win.skey, win.ekey);
H
Haojun Liao 已提交
4314
    getAlignQueryTimeWindow(pQuery, win.skey, sk, ek, &w);
4315
    pWindowResInfo->startTime = pTableQueryInfo->win.skey;  // windowSKey may be 0 in case of 1970 timestamp
4316

4317 4318
    if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
4319
        assert(win.ekey == pQuery->window.ekey);
4320
      }
4321

4322
      pWindowResInfo->prevSKey = w.skey;
4323
    }
4324

4325
    pTableQueryInfo->queryRangeSet = 1;
4326
    pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
4327 4328 4329 4330
  }
}

bool requireTimestamp(SQuery *pQuery) {
4331
  for (int32_t i = 0; i < pQuery->numOfOutput; i++) {
H
Haojun Liao 已提交
4332
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345
    if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_NEED_TS) != 0) {
      return true;
    }
  }
  return false;
}

bool needPrimaryTimestampCol(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo) {
  /*
   * 1. if skey or ekey locates in this block, we need to load the timestamp column to decide the precise position
   * 2. if there are top/bottom, first_dst/last_dst functions, we need to load timestamp column in any cases;
   */
  STimeWindow *w = &pDataBlockInfo->window;
H
hjxilinx 已提交
4346
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4347

H
hjxilinx 已提交
4348
  bool loadPrimaryTS = (pTableQueryInfo->lastKey >= w->skey && pTableQueryInfo->lastKey <= w->ekey) ||
4349 4350
                       (pQuery->window.ekey >= w->skey && pQuery->window.ekey <= w->ekey) || requireTimestamp(pQuery);

4351 4352 4353
  return loadPrimaryTS;
}

H
Haojun Liao 已提交
4354
static int32_t doCopyToSData(SQInfo *pQInfo, SResultRowInfo *pResultInfo, int32_t orderType) {
4355 4356
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4357

4358 4359 4360
  int32_t numOfResult = 0;
  int32_t startIdx = 0;
  int32_t step = -1;
4361

4362
  qDebug("QInfo:%p start to copy data from windowResInfo to query buf", pQInfo);
4363
  int32_t totalSet = numOfClosedTimeWindow(pResultInfo);
H
Haojun Liao 已提交
4364
  SResultRow** result = pResultInfo->pResult;
4365

4366
  if (orderType == TSDB_ORDER_ASC) {
4367
    startIdx = pQInfo->groupIndex;
4368 4369
    step = 1;
  } else {  // desc order copy all data
4370
    startIdx = totalSet - pQInfo->groupIndex - 1;
4371 4372
    step = -1;
  }
4373

H
Haojun Liao 已提交
4374 4375
  SGroupResInfo* pGroupResInfo = &pQInfo->groupResInfo;

4376
  for (int32_t i = startIdx; (i < totalSet) && (i >= 0); i += step) {
4377
    if (result[i]->numOfRows == 0) {
4378
      pQInfo->groupIndex += 1;
4379
      pGroupResInfo->rowId = 0;
4380 4381
      continue;
    }
4382

4383
    int32_t numOfRowsToCopy = result[i]->numOfRows - pGroupResInfo->rowId;
4384
    int32_t oldOffset = pGroupResInfo->rowId;
4385

4386
    /*
H
Haojun Liao 已提交
4387 4388
     * current output space is not enough to accommodate all data of this page, only partial results
     * will be copied to SQuery object's result buffer
4389
     */
4390
    if (numOfRowsToCopy > pQuery->rec.capacity - numOfResult) {
H
Haojun Liao 已提交
4391
      numOfRowsToCopy = (int32_t) pQuery->rec.capacity - numOfResult;
4392
      pGroupResInfo->rowId += numOfRowsToCopy;
4393
    } else {
4394
      pGroupResInfo->rowId = 0;
4395
      pQInfo->groupIndex += 1;
4396
    }
4397

4398
    tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, result[i]->pageId);
H
Haojun Liao 已提交
4399

4400
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
4401
      int32_t size = pRuntimeEnv->pCtx[j].outputBytes;
4402

4403
      char *out = pQuery->sdata[j]->data + numOfResult * size;
4404
      char *in = getPosInResultPage(pRuntimeEnv, j, result[i], page);
4405 4406
      memcpy(out, in + oldOffset * size, size * numOfRowsToCopy);
    }
4407

4408
    numOfResult += numOfRowsToCopy;
4409 4410 4411
    if (numOfResult == pQuery->rec.capacity) {
      break;
    }
4412
  }
4413

4414
  qDebug("QInfo:%p copy data to query buf completed", pQInfo);
4415 4416

#ifdef _DEBUG_VIEW
4417
  displayInterResult(pQuery->sdata, pRuntimeEnv, numOfResult);
4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430
#endif
  return numOfResult;
}

/**
 * copyFromWindowResToSData support copy data in ascending/descending order
 * For interval query of both super table and table, copy the data in ascending order, since the output results are
 * ordered in SWindowResutl already. While handling the group by query for both table and super table,
 * all group result are completed already.
 *
 * @param pQInfo
 * @param result
 */
H
Haojun Liao 已提交
4431
void copyFromWindowResToSData(SQInfo *pQInfo, SResultRowInfo *pResultInfo) {
4432
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
4433

4434
  int32_t orderType = (pQuery->pGroupbyExpr != NULL) ? pQuery->pGroupbyExpr->orderType : TSDB_ORDER_ASC;
4435
  int32_t numOfResult = doCopyToSData(pQInfo, pResultInfo, orderType);
4436

4437
  pQuery->rec.rows += numOfResult;
4438

4439
  assert(pQuery->rec.rows <= pQuery->rec.capacity);
4440 4441
}

H
Haojun Liao 已提交
4442
static void updateWindowResNumOfRes(SQueryRuntimeEnv *pRuntimeEnv) {
4443
  SQuery *pQuery = pRuntimeEnv->pQuery;
4444

4445
  // update the number of result for each, only update the number of rows for the corresponding window result.
H
Haojun Liao 已提交
4446 4447 4448
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
    return;
  }
4449

H
Haojun Liao 已提交
4450
  for (int32_t i = 0; i < pRuntimeEnv->windowResInfo.size; ++i) {
H
Haojun Liao 已提交
4451
    SResultRow *pResult = pRuntimeEnv->windowResInfo.pResult[i];
4452

H
Haojun Liao 已提交
4453 4454 4455 4456
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
      int32_t functionId = pRuntimeEnv->pCtx[j].functionId;
      if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
        continue;
4457
      }
H
Haojun Liao 已提交
4458

4459 4460
      SResultRowCellInfo* pCell = getResultCell(pRuntimeEnv, pResult, j);
      pResult->numOfRows = (uint16_t)(MAX(pResult->numOfRows, pCell->numOfRes));
4461 4462 4463 4464
    }
  }
}

H
Haojun Liao 已提交
4465
static void stableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo, SDataStatis *pStatis,
4466
    SArray *pDataBlock, __block_search_fn_t searchFn) {
4467
  SQuery *         pQuery = pRuntimeEnv->pQuery;
4468
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4469

H
Haojun Liao 已提交
4470
  SResultRowInfo * pWindowResInfo = &pTableQueryInfo->windowResInfo;
H
hjxilinx 已提交
4471
  pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : pDataBlockInfo->rows - 1;
4472

H
Haojun Liao 已提交
4473
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->groupbyNormalCol) {
4474
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
4475
  } else {
4476
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
4477
  }
H
Haojun Liao 已提交
4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489

  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
    bool ascQuery = QUERY_IS_ASC_QUERY(pQuery);

    // TODO refactor
    if ((pTableQueryInfo->lastKey >= pTableQueryInfo->win.ekey && ascQuery) || (pTableQueryInfo->lastKey <= pTableQueryInfo->win.ekey && (!ascQuery))) {
      closeAllTimeWindow(pWindowResInfo);
      pWindowResInfo->curIndex = pWindowResInfo->size - 1;
    } else {
      updateResultRowCurrentIndex(pWindowResInfo, pTableQueryInfo->lastKey, ascQuery);
    }
  }
4490 4491
}

H
Haojun Liao 已提交
4492
bool queryHasRemainResForTableQuery(SQueryRuntimeEnv* pRuntimeEnv) {
4493 4494
  SQuery *pQuery = pRuntimeEnv->pQuery;
  SFillInfo *pFillInfo = pRuntimeEnv->pFillInfo;
4495

H
Haojun Liao 已提交
4496
  if (pQuery->limit.limit > 0 && pQuery->rec.total >= pQuery->limit.limit) {
4497 4498
    return false;
  }
4499

4500
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
H
Haojun Liao 已提交
4501
    // There are results not returned to client yet, so filling applied to the remain result is required firstly.
H
Haojun Liao 已提交
4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516
    int32_t remain = taosNumOfRemainRows(pFillInfo);
    if (remain > 0) {
      return true;
    }

    /*
     * While the code reaches here, there are no results remains now.
     * If query is not completed yet, the gaps between two results blocks need to be handled after next data block
     * is retrieved from TSDB.
     *
     * NOTE: If the result set is not the first block, the gap in front of the result set will be filled. If the result
     * set is the FIRST result block, the gap between the start time of query time window and the timestamp of the
     * first result row in the actual result set will fill nothing.
     */
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
H
Haojun Liao 已提交
4517
      int32_t numOfTotal = (int32_t)getNumOfResWithFill(pFillInfo, pQuery->window.ekey, (int32_t)pQuery->rec.capacity);
H
Haojun Liao 已提交
4518 4519 4520 4521 4522 4523
      return numOfTotal > 0;
    }

  } else {
    // there are results waiting for returned to client.
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED) &&
H
Haojun Liao 已提交
4524
        (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) &&
H
Haojun Liao 已提交
4525 4526 4527
        (pRuntimeEnv->windowResInfo.size > 0)) {
      return true;
    }
4528
  }
4529 4530

  return false;
4531 4532
}

H
Haojun Liao 已提交
4533 4534 4535 4536
static int16_t getNumOfFinalResCol(SQuery* pQuery) {
  return pQuery->pExpr2 == NULL? pQuery->numOfOutput:pQuery->numOfExpr2;
}

4537
static void doCopyQueryResultToMsg(SQInfo *pQInfo, int32_t numOfRows, char *data) {
4538
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
4539

H
Haojun Liao 已提交
4540 4541
  if (pQuery->pExpr2 == NULL) {
    for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
H
Haojun Liao 已提交
4542
      int32_t bytes = pQuery->pExpr1[col].bytes;
4543

H
Haojun Liao 已提交
4544 4545 4546 4547 4548 4549 4550 4551 4552 4553
      memmove(data, pQuery->sdata[col]->data, bytes * numOfRows);
      data += bytes * numOfRows;
    }
  } else {
    for (int32_t col = 0; col < pQuery->numOfExpr2; ++col) {
      int32_t bytes = pQuery->pExpr2[col].bytes;

      memmove(data, pQuery->sdata[col]->data, bytes * numOfRows);
      data += bytes * numOfRows;
    }
4554
  }
4555

H
Haojun Liao 已提交
4556
  int32_t numOfTables = (int32_t) taosHashGetSize(pQInfo->arrTableIdInfo);
weixin_48148422's avatar
weixin_48148422 已提交
4557 4558
  *(int32_t*)data = htonl(numOfTables);
  data += sizeof(int32_t);
4559 4560 4561

  STableIdInfo* item = taosHashIterate(pQInfo->arrTableIdInfo, NULL);
  while(item) {
weixin_48148422's avatar
weixin_48148422 已提交
4562
    STableIdInfo* pDst = (STableIdInfo*)data;
4563 4564 4565 4566
    pDst->uid = htobe64(item->uid);
    pDst->tid = htonl(item->tid);
    pDst->key = htobe64(item->key);

weixin_48148422's avatar
weixin_48148422 已提交
4567
    data += sizeof(STableIdInfo);
4568
    item = taosHashIterate(pQInfo->arrTableIdInfo, item);
weixin_48148422's avatar
weixin_48148422 已提交
4569 4570
  }

H
Haojun Liao 已提交
4571
  // Check if query is completed or not for stable query or normal table query respectively.
H
hjxilinx 已提交
4572
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4573
    if (pQInfo->runtimeEnv.stableQuery) {
H
Haojun Liao 已提交
4574
      if (IS_STASBLE_QUERY_OVER(pQInfo)) {
4575 4576 4577
        setQueryStatus(pQuery, QUERY_OVER);
      }
    } else {
H
Haojun Liao 已提交
4578
      if (!queryHasRemainResForTableQuery(&pQInfo->runtimeEnv)) {
4579 4580
        setQueryStatus(pQuery, QUERY_OVER);
      }
4581
    }
H
hjxilinx 已提交
4582
  }
4583 4584
}

H
Haojun Liao 已提交
4585
int32_t doFillGapsInResults(SQueryRuntimeEnv* pRuntimeEnv, tFilePage **pDst, int32_t *numOfFilled) {
4586
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
4587
  SQuery *pQuery = pRuntimeEnv->pQuery;
4588
  SFillInfo* pFillInfo = pRuntimeEnv->pFillInfo;
4589

4590
  while (1) {
H
Haojun Liao 已提交
4591
    int32_t ret = (int32_t)taosFillResultDataBlock(pFillInfo, (tFilePage**)pQuery->sdata, (int32_t)pQuery->rec.capacity);
4592

4593
    // todo apply limit output function
4594 4595
    /* reached the start position of according to offset value, return immediately */
    if (pQuery->limit.offset == 0) {
4596
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows", pQInfo, pFillInfo->numOfRows, ret);
4597 4598
      return ret;
    }
4599

4600
    if (pQuery->limit.offset < ret) {
4601
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%" PRId64 ". Discard due to offset, remain:%" PRId64 ", new offset:%d",
4602
             pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, ret - pQuery->limit.offset, 0);
4603

S
TD-1057  
Shengliang Guan 已提交
4604
      ret -= (int32_t)pQuery->limit.offset;
4605
      // todo !!!!there exactly number of interpo is not valid.
4606
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
4607 4608
        memmove(pDst[i]->data, pDst[i]->data + pQuery->pExpr1[i].bytes * pQuery->limit.offset,
                ret * pQuery->pExpr1[i].bytes);
4609
      }
4610

4611 4612 4613
      pQuery->limit.offset = 0;
      return ret;
    } else {
4614
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%" PRId64 ". Discard due to offset, "
B
Bomin Zhang 已提交
4615
             "remain:%d, new offset:%" PRId64, pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, 0,
4616
          pQuery->limit.offset - ret);
4617

4618
      pQuery->limit.offset -= ret;
4619
      pQuery->rec.rows = 0;
4620 4621
      ret = 0;
    }
4622

H
Haojun Liao 已提交
4623
    if (!queryHasRemainResForTableQuery(pRuntimeEnv)) {
4624 4625 4626 4627 4628
      return ret;
    }
  }
}

4629
static void queryCostStatis(SQInfo *pQInfo) {
4630
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4631
  SQueryCostInfo *pSummary = &pRuntimeEnv->summary;
H
Haojun Liao 已提交
4632

H
Haojun Liao 已提交
4633
  uint64_t hashSize = taosHashGetMemSize(pQInfo->runtimeEnv.pResultRowHashTable);
H
Haojun Liao 已提交
4634 4635 4636
  hashSize += taosHashGetMemSize(pQInfo->tableqinfoGroupInfo.map);
  pSummary->hashSize = hashSize;

H
Haojun Liao 已提交
4637 4638 4639
  // add the merge time
  pSummary->elapsedTime += pSummary->firstStageMergeTime;

H
Haojun Liao 已提交
4640 4641 4642
  SResultRowPool* p = pQInfo->runtimeEnv.pool;
  pSummary->winInfoSize = getResultRowPoolMemSize(p);
  pSummary->numOfTimeWindows = getNumOfAllocatedResultRows(p);
4643

H
Haojun Liao 已提交
4644 4645 4646
  qDebug("QInfo:%p :cost summary: elapsed time:%"PRId64" us, first merge:%"PRId64" us, total blocks:%d, "
         "load block statis:%d, load data block:%d, total rows:%"PRId64 ", check rows:%"PRId64,
         pQInfo, pSummary->elapsedTime, pSummary->firstStageMergeTime, pSummary->totalBlocks, pSummary->loadBlockStatis,
H
Haojun Liao 已提交
4647
         pSummary->loadBlocks, pSummary->totalRows, pSummary->totalCheckedRows);
4648

4649
  qDebug("QInfo:%p :cost summary: winResPool size:%.2f Kb, numOfWin:%"PRId64", tableInfoSize:%.2f Kb, hashTable:%.2f Kb", pQInfo, pSummary->winInfoSize/1024.0,
H
Haojun Liao 已提交
4650
      pSummary->numOfTimeWindows, pSummary->tableInfoSize/1024.0, pSummary->hashSize/1024.0);
4651 4652
}

4653 4654
static void updateOffsetVal(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pBlockInfo) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
4655
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4656

4657
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
4658

4659
  if (pQuery->limit.offset == pBlockInfo->rows) {  // current block will ignore completed
H
hjxilinx 已提交
4660
    pTableQueryInfo->lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->window.ekey + step : pBlockInfo->window.skey + step;
4661 4662 4663
    pQuery->limit.offset = 0;
    return;
  }
4664

4665
  if (QUERY_IS_ASC_QUERY(pQuery)) {
S
TD-1057  
Shengliang Guan 已提交
4666
    pQuery->pos = (int32_t)pQuery->limit.offset;
4667
  } else {
S
TD-1057  
Shengliang Guan 已提交
4668
    pQuery->pos = pBlockInfo->rows - (int32_t)pQuery->limit.offset - 1;
4669
  }
4670

4671
  assert(pQuery->pos >= 0 && pQuery->pos <= pBlockInfo->rows - 1);
4672

4673
  SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
4674
  SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);
4675

4676
  // update the pQuery->limit.offset value, and pQuery->pos value
H
Haojun Liao 已提交
4677
  TSKEY *keys = (TSKEY *) pColInfoData->pData;
4678 4679

  // update the offset value
H
hjxilinx 已提交
4680
  pTableQueryInfo->lastKey = keys[pQuery->pos];
4681
  pQuery->limit.offset = 0;
4682

H
hjxilinx 已提交
4683
  int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, pBlockInfo, NULL, binarySearchForKey, pDataBlock);
4684

4685
  qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
4686
         pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows, numOfRes, pQuery->current->lastKey);
4687
}
4688

4689 4690 4691 4692 4693
void skipBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;

  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0) {
    return;
4694
  }
4695

4696 4697 4698
  pQuery->pos = 0;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

H
hjxilinx 已提交
4699
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4700
  TsdbQueryHandleT pQueryHandle = pRuntimeEnv->pQueryHandle;
4701

H
Haojun Liao 已提交
4702
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4703
  while (tsdbNextDataBlock(pQueryHandle)) {
4704
    if (isQueryKilled(GET_QINFO_ADDR(pRuntimeEnv))) {
H
Haojun Liao 已提交
4705
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4706
    }
4707

H
Haojun Liao 已提交
4708
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
4709

4710 4711
    if (pQuery->limit.offset > blockInfo.rows) {
      pQuery->limit.offset -= blockInfo.rows;
H
hjxilinx 已提交
4712 4713
      pTableQueryInfo->lastKey = (QUERY_IS_ASC_QUERY(pQuery)) ? blockInfo.window.ekey : blockInfo.window.skey;
      pTableQueryInfo->lastKey += step;
4714

4715
      qDebug("QInfo:%p skip rows:%d, offset:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv), blockInfo.rows,
4716 4717
             pQuery->limit.offset);
    } else {  // find the appropriated start position in current block
4718 4719 4720
      updateOffsetVal(pRuntimeEnv, &blockInfo);
      break;
    }
4721
  }
H
Haojun Liao 已提交
4722 4723 4724 4725

  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }
4726
}
4727

H
Haojun Liao 已提交
4728 4729
static TSKEY doSkipIntervalProcess(SQueryRuntimeEnv* pRuntimeEnv, STimeWindow* win, SDataBlockInfo* pBlockInfo, STableQueryInfo* pTableQueryInfo) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
4730
  SResultRowInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777

  assert(pQuery->limit.offset == 0);
  STimeWindow tw = *win;
  getNextTimeWindow(pQuery, &tw);

  if ((tw.skey <= pBlockInfo->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (tw.ekey >= pBlockInfo->window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {

    // load the data block and check data remaining in current data block
    // TODO optimize performance
    SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
    SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

    tw = *win;
    int32_t startPos =
        getNextQualifiedWindow(pRuntimeEnv, &tw, pBlockInfo, pColInfoData->pData, binarySearchForKey, -1);
    assert(startPos >= 0);

    // set the abort info
    pQuery->pos = startPos;

    // reset the query start timestamp
    pTableQueryInfo->win.skey = ((TSKEY *)pColInfoData->pData)[startPos];
    pQuery->window.skey = pTableQueryInfo->win.skey;
    TSKEY key = pTableQueryInfo->win.skey;

    pWindowResInfo->prevSKey = tw.skey;
    int32_t index = pRuntimeEnv->windowResInfo.curIndex;

    int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, pBlockInfo, NULL, binarySearchForKey, pDataBlock);
    pRuntimeEnv->windowResInfo.curIndex = index;  // restore the window index

    qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%" PRId64,
           GET_QINFO_ADDR(pRuntimeEnv), pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows, numOfRes,
           pQuery->current->lastKey);

    return key;
  } else {  // do nothing
    pQuery->window.skey = tw.skey;
    pWindowResInfo->prevSKey = tw.skey;

    return tw.skey;
  }

  return true;
}

H
Haojun Liao 已提交
4778
static bool skipTimeInterval(SQueryRuntimeEnv *pRuntimeEnv, TSKEY* start) {
4779
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796

  // get the first unclosed time window
  bool assign = false;
  for(int32_t i = 0; i < pRuntimeEnv->windowResInfo.size; ++i) {
    if (pRuntimeEnv->windowResInfo.pResult[i]->closed) {
      continue;
    }

    assign = true;
    *start = pRuntimeEnv->windowResInfo.pResult[i]->win.skey;
  }

  if (!assign) {
    *start = pQuery->current->lastKey;
  }

  assert(*start <= pQuery->current->lastKey);
4797

4798
  // if queried with value filter, do NOT forward query start position
4799
  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->pFillInfo != NULL) {
4800
    return true;
4801
  }
4802

4803
  /*
4804 4805
   * 1. for interval without interpolation query we forward pQuery->interval.interval at a time for
   *    pQuery->limit.offset times. Since hole exists, pQuery->interval.interval*pQuery->limit.offset value is
4806 4807
   *    not valid. otherwise, we only forward pQuery->limit.offset number of points
   */
4808
  assert(pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL);
4809

H
Haojun Liao 已提交
4810
  STimeWindow w = TSWINDOW_INITIALIZER;
4811

H
Haojun Liao 已提交
4812
  SResultRowInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
hjxilinx 已提交
4813
  STableQueryInfo *pTableQueryInfo = pQuery->current;
4814

H
Haojun Liao 已提交
4815
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4816
  while (tsdbNextDataBlock(pRuntimeEnv->pQueryHandle)) {
H
Haojun Liao 已提交
4817
    tsdbRetrieveDataBlockInfo(pRuntimeEnv->pQueryHandle, &blockInfo);
4818

H
Haojun Liao 已提交
4819 4820
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
H
Haojun Liao 已提交
4821
        getAlignQueryTimeWindow(pQuery, blockInfo.window.skey, blockInfo.window.skey, pQuery->window.ekey, &w);
H
Haojun Liao 已提交
4822 4823 4824
        pWindowResInfo->startTime = w.skey;
        pWindowResInfo->prevSKey = w.skey;
      }
4825
    } else {
H
Haojun Liao 已提交
4826
      getAlignQueryTimeWindow(pQuery, blockInfo.window.ekey, pQuery->window.ekey, blockInfo.window.ekey, &w);
4827

4828 4829 4830
      pWindowResInfo->startTime = pQuery->window.skey;
      pWindowResInfo->prevSKey = w.skey;
    }
4831

4832 4833
    // the first time window
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, pWindowResInfo->prevSKey, pQuery);
4834

4835
    while (pQuery->limit.offset > 0) {
H
Haojun Liao 已提交
4836 4837
      STimeWindow tw = win;

4838 4839 4840 4841
      if ((win.ekey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (win.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        pQuery->limit.offset -= 1;
        pWindowResInfo->prevSKey = win.skey;
H
Haojun Liao 已提交
4842
      }
4843

H
Haojun Liao 已提交
4844 4845
      // current window does not ended in current data block, try next data block
      getNextTimeWindow(pQuery, &tw);
4846
      if (pQuery->limit.offset == 0) {
H
Haojun Liao 已提交
4847 4848
        *start = doSkipIntervalProcess(pRuntimeEnv, &win, &blockInfo, pTableQueryInfo);
        return true;
4849 4850
      }

H
Haojun Liao 已提交
4851 4852 4853 4854 4855 4856 4857
      /*
       * If the next time window still starts from current data block,
       * load the primary timestamp column first, and then find the start position for the next queried time window.
       * Note that only the primary timestamp column is required.
       * TODO: Optimize for this cases. All data blocks are not needed to be loaded, only if the first actually required
       * time window resides in current data block.
       */
4858 4859 4860 4861 4862
      if ((tw.skey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (tw.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
        SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

H
Haojun Liao 已提交
4863 4864 4865 4866 4867 4868
        if ((win.ekey > blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
            (win.ekey < blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
          pQuery->limit.offset -= 1;
        }

        if (pQuery->limit.offset == 0) {
H
Haojun Liao 已提交
4869 4870
          *start = doSkipIntervalProcess(pRuntimeEnv, &win, &blockInfo, pTableQueryInfo);
          return true;
H
Haojun Liao 已提交
4871 4872 4873 4874 4875
        } else {
          tw = win;
          int32_t startPos =
              getNextQualifiedWindow(pRuntimeEnv, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey, -1);
          assert(startPos >= 0);
4876

H
Haojun Liao 已提交
4877 4878 4879 4880 4881 4882
          // set the abort info
          pQuery->pos = startPos;
          pTableQueryInfo->lastKey = ((TSKEY *)pColInfoData->pData)[startPos];
          pWindowResInfo->prevSKey = tw.skey;
          win = tw;
        }
4883
      } else {
H
Haojun Liao 已提交
4884
        break;  // offset is not 0, and next time window begins or ends in the next block.
4885 4886 4887
      }
    }
  }
4888

H
Haojun Liao 已提交
4889 4890 4891 4892 4893
  // check for error
  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }

4894 4895 4896
  return true;
}

H
Haojun Liao 已提交
4897 4898
static void doDestroyTableQueryInfo(STableGroupInfo* pTableqinfoGroupInfo);

B
Bomin Zhang 已提交
4899
static int32_t setupQueryHandle(void* tsdb, SQInfo* pQInfo, bool isSTableQuery) {
B
Bomin Zhang 已提交
4900
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4901 4902
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

B
Bomin Zhang 已提交
4903
  if (onlyQueryTags(pQuery)) {
B
Bomin Zhang 已提交
4904
    return TSDB_CODE_SUCCESS;
B
Bomin Zhang 已提交
4905 4906
  }

H
Haojun Liao 已提交
4907
  if (isSTableQuery && (!QUERY_IS_INTERVAL_QUERY(pQuery)) && (!isFixedOutputQuery(pRuntimeEnv))) {
B
Bomin Zhang 已提交
4908
    return TSDB_CODE_SUCCESS;
B
Bomin Zhang 已提交
4909
  }
4910

H
Haojun Liao 已提交
4911
  STsdbQueryCond cond = createTsdbQueryCond(pQuery, &pQuery->window);
S
TD-1057  
Shengliang Guan 已提交
4912

B
Bomin Zhang 已提交
4913
  if (!isSTableQuery
4914
    && (pQInfo->tableqinfoGroupInfo.numOfTables == 1)
4915
    && (cond.order == TSDB_ORDER_ASC)
H
Haojun Liao 已提交
4916
    && (!QUERY_IS_INTERVAL_QUERY(pQuery))
B
Bomin Zhang 已提交
4917
    && (!isGroupbyNormalCol(pQuery->pGroupbyExpr))
H
Haojun Liao 已提交
4918
    && (!isFixedOutputQuery(pRuntimeEnv))
B
Bomin Zhang 已提交
4919
  ) {
H
Haojun Liao 已提交
4920
    SArray* pa = GET_TABLEGROUP(pQInfo, 0);
4921 4922
    STableQueryInfo* pCheckInfo = taosArrayGetP(pa, 0);
    cond.twindow = pCheckInfo->win;
4923
  }
B
Bomin Zhang 已提交
4924

B
Bomin Zhang 已提交
4925
  terrno = TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
4926
  if (isFirstLastRowQuery(pQuery)) {
H
Haojun Liao 已提交
4927
    pRuntimeEnv->pQueryHandle = tsdbQueryLastRow(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef);
H
Haojun Liao 已提交
4928

H
Haojun Liao 已提交
4929 4930 4931
    // update the query time window
    pQuery->window = cond.twindow;

H
Haojun Liao 已提交
4932
    if (pQInfo->tableGroupInfo.numOfTables == 0) {
H
Haojun Liao 已提交
4933
      pQInfo->tableqinfoGroupInfo.numOfTables = 0;
H
Haojun Liao 已提交
4934 4935 4936 4937
    } else {
      size_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
      for(int32_t i = 0; i < numOfGroups; ++i) {
        SArray *group = GET_TABLEGROUP(pQInfo, i);
H
Haojun Liao 已提交
4938

H
Haojun Liao 已提交
4939 4940 4941
        size_t t = taosArrayGetSize(group);
        for (int32_t j = 0; j < t; ++j) {
          STableQueryInfo *pCheckInfo = taosArrayGetP(group, j);
H
Haojun Liao 已提交
4942

H
Haojun Liao 已提交
4943 4944 4945
          pCheckInfo->win = pQuery->window;
          pCheckInfo->lastKey = pCheckInfo->win.skey;
        }
H
Haojun Liao 已提交
4946 4947
      }
    }
4948
  } else if (isPointInterpoQuery(pQuery)) {
H
Haojun Liao 已提交
4949
    pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef);
H
Haojun Liao 已提交
4950
  } else {
H
Haojun Liao 已提交
4951
    pRuntimeEnv->pQueryHandle = tsdbQueryTables(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef);
H
Haojun Liao 已提交
4952
  }
4953

B
Bomin Zhang 已提交
4954
  return terrno;
B
Bomin Zhang 已提交
4955 4956
}

H
Haojun Liao 已提交
4957
static SFillColInfo* createFillColInfo(SQuery* pQuery) {
H
Haojun Liao 已提交
4958
  int32_t numOfCols = getNumOfFinalResCol(pQuery);
4959
  int32_t offset = 0;
4960

4961
  SFillColInfo* pFillCol = calloc(numOfCols, sizeof(SFillColInfo));
H
Haojun Liao 已提交
4962 4963 4964 4965
  if (pFillCol == NULL) {
    return NULL;
  }

H
Haojun Liao 已提交
4966
  // TODO refactor
4967
  for(int32_t i = 0; i < numOfCols; ++i) {
H
Haojun Liao 已提交
4968
    SExprInfo* pExprInfo = (pQuery->pExpr2 == NULL)? &pQuery->pExpr1[i]:&pQuery->pExpr2[i];
4969

4970
    pFillCol[i].col.bytes  = pExprInfo->bytes;
S
TD-1057  
Shengliang Guan 已提交
4971
    pFillCol[i].col.type   = (int8_t)pExprInfo->type;
4972
    pFillCol[i].col.offset = offset;
H
Haojun Liao 已提交
4973
    pFillCol[i].tagIndex   = -2;
4974 4975
    pFillCol[i].flag       = TSDB_COL_NORMAL;    // always be ta normal column for table query
    pFillCol[i].functionId = pExprInfo->base.functionId;
4976
    pFillCol[i].fillVal.i = pQuery->fillVal[i];
4977

4978 4979
    offset += pExprInfo->bytes;
  }
4980

4981 4982 4983
  return pFillCol;
}

4984
int32_t doInitQInfo(SQInfo *pQInfo, STSBuf *pTsBuf, void *tsdb, int32_t vgId, bool isSTableQuery) {
4985 4986 4987
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;

  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
4988 4989 4990

  pRuntimeEnv->topBotQuery = isTopBottomQuery(pQuery);
  pRuntimeEnv->hasTagResults = hasTagValOutput(pQuery);
4991
  pRuntimeEnv->timeWindowInterpo = timeWindowInterpoRequired(pQuery);
4992 4993

  setScanLimitationByResultBuffer(pQuery);
H
Haojun Liao 已提交
4994

H
Haojun Liao 已提交
4995
  int32_t code = setupQueryHandle(tsdb, pQInfo, isSTableQuery);
B
Bomin Zhang 已提交
4996 4997 4998
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }
4999

5000
  pQInfo->tsdb = tsdb;
5001
  pQInfo->vgId = vgId;
5002 5003

  pRuntimeEnv->pQuery = pQuery;
H
Haojun Liao 已提交
5004
  pRuntimeEnv->pTSBuf = pTsBuf;
5005
  pRuntimeEnv->cur.vgroupIndex = -1;
5006
  pRuntimeEnv->stableQuery = isSTableQuery;
H
Haojun Liao 已提交
5007
  pRuntimeEnv->prevGroupId = INT32_MIN;
H
Haojun Liao 已提交
5008
  pRuntimeEnv->groupbyNormalCol = isGroupbyNormalCol(pQuery->pGroupbyExpr);
5009

H
Haojun Liao 已提交
5010
  if (pTsBuf != NULL) {
5011 5012 5013 5014
    int16_t order = (pQuery->order.order == pRuntimeEnv->pTSBuf->tsOrder) ? TSDB_ORDER_ASC : TSDB_ORDER_DESC;
    tsBufSetTraverseOrder(pRuntimeEnv->pTSBuf, order);
  }

5015 5016 5017
  int32_t ps = DEFAULT_PAGE_SIZE;
  int32_t rowsize = 0;
  getIntermediateBufInfo(pRuntimeEnv, &ps, &rowsize);
H
Haojun Liao 已提交
5018
  int32_t TENMB = 1024*1024*10;
5019

H
Haojun Liao 已提交
5020
  if (isSTableQuery && !onlyQueryTags(pRuntimeEnv->pQuery)) {
H
Haojun Liao 已提交
5021
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rowsize, ps, TENMB, pQInfo);
5022 5023 5024 5025
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

H
Haojun Liao 已提交
5026
    if (!QUERY_IS_INTERVAL_QUERY(pQuery)) {
5027
      int16_t type = TSDB_DATA_TYPE_NULL;
H
Haojun Liao 已提交
5028
      if (pRuntimeEnv->groupbyNormalCol) {  // group by columns not tags;
5029 5030 5031 5032 5033
        type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
      } else {
        type = TSDB_DATA_TYPE_INT;  // group id
      }

H
Haojun Liao 已提交
5034
      code = initWindowResInfo(&pRuntimeEnv->windowResInfo, 8, type);
B
Bomin Zhang 已提交
5035 5036 5037
      if (code != TSDB_CODE_SUCCESS) {
        return code;
      }
5038
    }
H
Haojun Liao 已提交
5039
  } else if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery) || (!isSTableQuery)) {
5040 5041
    int32_t numOfResultRows = getInitialPageNum(pQInfo);
    getIntermediateBufInfo(pRuntimeEnv, &ps, &rowsize);
H
Haojun Liao 已提交
5042
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rowsize, ps, TENMB, pQInfo);
5043 5044 5045 5046 5047
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

    int16_t type = TSDB_DATA_TYPE_NULL;
H
Haojun Liao 已提交
5048
    if (pRuntimeEnv->groupbyNormalCol) {
5049 5050 5051 5052 5053
      type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
    } else {
      type = TSDB_DATA_TYPE_TIMESTAMP;
    }

H
Haojun Liao 已提交
5054
    code = initWindowResInfo(&pRuntimeEnv->windowResInfo, numOfResultRows, type);
B
Bomin Zhang 已提交
5055 5056 5057
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }
5058 5059
  }

H
Haojun Liao 已提交
5060 5061 5062 5063 5064 5065
  // create runtime environment
  code = setupQueryRuntimeEnv(pRuntimeEnv, pQuery->order.order);
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }

5066
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
H
Haojun Liao 已提交
5067
    SFillColInfo* pColInfo = createFillColInfo(pQuery);
H
Haojun Liao 已提交
5068 5069 5070 5071 5072 5073
    STimeWindow w = TSWINDOW_INITIALIZER;

    TSKEY sk = MIN(pQuery->window.skey, pQuery->window.ekey);
    TSKEY ek = MAX(pQuery->window.skey, pQuery->window.ekey);
    getAlignQueryTimeWindow(pQuery, pQuery->window.skey, sk, ek, &w);

H
Haojun Liao 已提交
5074
    int32_t numOfCols = getNumOfFinalResCol(pQuery);
H
Haojun Liao 已提交
5075
    pRuntimeEnv->pFillInfo = taosInitFillInfo(pQuery->order.order, w.skey, 0, (int32_t)pQuery->rec.capacity, numOfCols,
5076
                                              pQuery->interval.sliding, pQuery->interval.slidingUnit, (int8_t)pQuery->precision,
H
Haojun Liao 已提交
5077
                                              pQuery->fillType, pColInfo, pQInfo);
5078
  }
5079

H
Haojun Liao 已提交
5080
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
5081
  return TSDB_CODE_SUCCESS;
5082 5083
}

5084
static void enableExecutionForNextTable(SQueryRuntimeEnv *pRuntimeEnv) {
5085
  SQuery *pQuery = pRuntimeEnv->pQuery;
5086

5087
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
5088
    SResultRowCellInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[i]);
5089 5090 5091 5092 5093 5094
    if (pResInfo != NULL) {
      pResInfo->complete = false;
    }
  }
}

H
Haojun Liao 已提交
5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111
static FORCE_INLINE void setEnvForEachBlock(SQInfo* pQInfo, STableQueryInfo* pTableQueryInfo, SDataBlockInfo* pBlockInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

  if (!QUERY_IS_INTERVAL_QUERY(pQuery)) {
    setExecutionContext(pQInfo, pTableQueryInfo->groupIndex, pBlockInfo->window.ekey + step);
  } else {  // interval query
    TSKEY nextKey = pBlockInfo->window.skey;
    setIntervalQueryRange(pQInfo, nextKey);

    if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
      setAdditionalInfo(pQInfo, pTableQueryInfo->pTable, pTableQueryInfo);
    }
  }
}

H
Haojun Liao 已提交
5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125
static void doTableQueryInfoTimeWindowCheck(SQuery* pQuery, STableQueryInfo* pTableQueryInfo) {
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    assert(
        (pTableQueryInfo->win.skey <= pTableQueryInfo->win.ekey) &&
        (pTableQueryInfo->lastKey >= pTableQueryInfo->win.skey) &&
        (pTableQueryInfo->win.skey >= pQuery->window.skey && pTableQueryInfo->win.ekey <= pQuery->window.ekey));
  } else {
    assert(
        (pTableQueryInfo->win.skey >= pTableQueryInfo->win.ekey) &&
        (pTableQueryInfo->lastKey <= pTableQueryInfo->win.skey) &&
        (pTableQueryInfo->win.skey <= pQuery->window.skey && pTableQueryInfo->win.ekey >= pQuery->window.ekey));
  }
}

H
Haojun Liao 已提交
5126
static int64_t scanMultiTableDataBlocks(SQInfo *pQInfo) {
5127
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
Haojun Liao 已提交
5128 5129
  SQuery*           pQuery = pRuntimeEnv->pQuery;
  SQueryCostInfo*   summary  = &pRuntimeEnv->summary;
H
Haojun Liao 已提交
5130

H
hjxilinx 已提交
5131
  int64_t st = taosGetTimestampMs();
5132

5133
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
H
Haojun Liao 已提交
5134
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
5135

H
Haojun Liao 已提交
5136 5137
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

H
Haojun Liao 已提交
5138
  while (tsdbNextDataBlock(pQueryHandle)) {
5139
    summary->totalBlocks += 1;
H
Haojun Liao 已提交
5140

5141
    if (isQueryKilled(pQInfo)) {
H
Haojun Liao 已提交
5142
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
5143
    }
5144

H
Haojun Liao 已提交
5145
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
H
Haojun Liao 已提交
5146 5147 5148 5149
    STableQueryInfo **pTableQueryInfo = (STableQueryInfo**) taosHashGet(pQInfo->tableqinfoGroupInfo.map, &blockInfo.tid, sizeof(blockInfo.tid));
    if(pTableQueryInfo == NULL) {
      break;
    }
5150

H
Haojun Liao 已提交
5151
    pQuery->current = *pTableQueryInfo;
H
Haojun Liao 已提交
5152
    doTableQueryInfoTimeWindowCheck(pQuery, *pTableQueryInfo);
5153

H
Haojun Liao 已提交
5154
    if (!pRuntimeEnv->groupbyNormalCol) {
H
Haojun Liao 已提交
5155
      setEnvForEachBlock(pQInfo, *pTableQueryInfo, &blockInfo);
5156
    }
5157

5158
    uint32_t     status = 0;
H
Haojun Liao 已提交
5159 5160
    SDataStatis *pStatis = NULL;
    SArray      *pDataBlock = NULL;
5161

H
Haojun Liao 已提交
5162
    int32_t ret = loadDataBlockOnDemand(pRuntimeEnv, &pQuery->current->windowResInfo, pQueryHandle, &blockInfo, &pStatis, &pDataBlock, &status);
5163 5164 5165 5166 5167
    if (ret != TSDB_CODE_SUCCESS) {
      break;
    }

    if (status == BLK_DATA_DISCARD) {
H
Haojun Liao 已提交
5168
      pQuery->current->lastKey = QUERY_IS_ASC_QUERY(pQuery)? blockInfo.window.ekey + step : blockInfo.window.skey + step;
H
Haojun Liao 已提交
5169 5170 5171
      continue;
    }

5172 5173
    summary->totalRows += blockInfo.rows;
    stableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, pDataBlock, binarySearchForKey);
H
Haojun Liao 已提交
5174

H
Haojun Liao 已提交
5175 5176 5177 5178
    qDebug("QInfo:%p check data block completed, uid:%"PRId64", tid:%d, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, "
           "lastKey:%" PRId64,
           pQInfo, blockInfo.uid, blockInfo.tid, blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows,
           pQuery->current->lastKey);
5179
  }
5180

H
Haojun Liao 已提交
5181 5182 5183 5184
  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }

H
Haojun Liao 已提交
5185 5186
  updateWindowResNumOfRes(pRuntimeEnv);

H
hjxilinx 已提交
5187 5188
  int64_t et = taosGetTimestampMs();
  return et - st;
5189 5190
}

5191 5192
static bool multiTableMultioutputHelper(SQInfo *pQInfo, int32_t index) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
5193
  SQuery *          pQuery = pRuntimeEnv->pQuery;
5194

5195
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
H
Haojun Liao 已提交
5196
  SArray *group = GET_TABLEGROUP(pQInfo, 0);
5197
  STableQueryInfo* pCheckInfo = taosArrayGetP(group, index);
5198

H
Haojun Liao 已提交
5199 5200 5201
  if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
    setTagVal(pRuntimeEnv, pCheckInfo->pTable, pQInfo->tsdb);
  }
5202

H
Haojun Liao 已提交
5203
  STableId* id = TSDB_TABLEID(pCheckInfo->pTable);
5204
  qDebug("QInfo:%p query on (%d): uid:%" PRIu64 ", tid:%d, qrange:%" PRId64 "-%" PRId64, pQInfo, index,
H
Haojun Liao 已提交
5205
         id->uid, id->tid, pCheckInfo->lastKey, pCheckInfo->win.ekey);
5206

5207
  STsdbQueryCond cond = {
5208
      .twindow   = {pCheckInfo->lastKey, pCheckInfo->win.ekey},
H
hjxilinx 已提交
5209 5210
      .order     = pQuery->order.order,
      .colList   = pQuery->colList,
5211
      .numOfCols = pQuery->numOfCols,
5212
  };
5213

H
hjxilinx 已提交
5214
  // todo refactor
5215
  SArray *g1 = taosArrayInit(1, POINTER_BYTES);
H
Haojun Liao 已提交
5216 5217 5218 5219
  SArray *tx = taosArrayInit(1, sizeof(STableKeyInfo));

  STableKeyInfo info = {.pTable = pCheckInfo->pTable, .lastKey = pCheckInfo->lastKey};
  taosArrayPush(tx, &info);
5220

5221
  taosArrayPush(g1, &tx);
5222
  STableGroupInfo gp = {.numOfTables = 1, .pGroupList = g1};
5223

5224
  // include only current table
5225 5226 5227 5228
  if (pRuntimeEnv->pQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
    pRuntimeEnv->pQueryHandle = NULL;
  }
5229

H
Haojun Liao 已提交
5230
  pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo, &pQInfo->memRef);
5231 5232
  taosArrayDestroy(tx);
  taosArrayDestroy(g1);
B
Bomin Zhang 已提交
5233 5234 5235
  if (pRuntimeEnv->pQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
5236

5237
  if (pRuntimeEnv->pTSBuf != NULL) {
H
Haojun Liao 已提交
5238 5239
      tVariant* pTag = &pRuntimeEnv->pCtx[0].tag;

5240
    if (pRuntimeEnv->cur.vgroupIndex == -1) {
H
Haojun Liao 已提交
5241
      STSElem elem = tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, pQInfo->vgId, pTag);
H
Haojun Liao 已提交
5242
      // failed to find data with the specified tag value and vnodeId
5243
      if (!tsBufIsValidElem(&elem)) {
H
Haojun Liao 已提交
5244 5245 5246 5247 5248 5249
        if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
          qError("QInfo:%p failed to find tag:%s in ts_comp", pQInfo, pTag->pz);
        } else {
          qError("QInfo:%p failed to find tag:%"PRId64" in ts_comp", pQInfo, pTag->i64Key);
        }

5250
        return false;
H
Haojun Liao 已提交
5251 5252
      } else {
        STSCursor cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);
H
Haojun Liao 已提交
5253 5254 5255 5256 5257 5258 5259 5260

        if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
          qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz,
                 cur.blockIndex, cur.tsIndex);
        } else {
          qDebug("QInfo:%p find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key,
                 cur.blockIndex, cur.tsIndex);
        }
5261 5262
      }
    } else {
H
Haojun Liao 已提交
5263
      STSElem elem = tsBufGetElem(pRuntimeEnv->pTSBuf);
H
Haojun Liao 已提交
5264
      if (tVariantCompare(elem.tag, &pRuntimeEnv->pCtx[0].tag) != 0) {
H
Haojun Liao 已提交
5265

H
Haojun Liao 已提交
5266
        STSElem elem1 = tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, pQInfo->vgId, pTag);
H
Haojun Liao 已提交
5267
        // failed to find data with the specified tag value and vnodeId
5268
        if (!tsBufIsValidElem(&elem1)) {
H
Haojun Liao 已提交
5269 5270 5271 5272 5273
          if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
            qError("QInfo:%p failed to find tag:%s in ts_comp", pQInfo, pTag->pz);
          } else {
            qError("QInfo:%p failed to find tag:%"PRId64" in ts_comp", pQInfo, pTag->i64Key);
          }
H
Haojun Liao 已提交
5274

H
Haojun Liao 已提交
5275
          return false;
H
Haojun Liao 已提交
5276 5277
        } else {
          STSCursor cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);
H
Haojun Liao 已提交
5278 5279 5280 5281 5282
          if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
            qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, cur.blockIndex, cur.tsIndex);
          } else {
            qDebug("QInfo:%p find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key, cur.blockIndex, cur.tsIndex);
          }
H
Haojun Liao 已提交
5283
        }
H
Haojun Liao 已提交
5284

H
Haojun Liao 已提交
5285 5286
      } else {
        tsBufSetCursor(pRuntimeEnv->pTSBuf, &pRuntimeEnv->cur);
H
Haojun Liao 已提交
5287
        STSCursor cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);
H
Haojun Liao 已提交
5288 5289 5290 5291 5292
        if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
          qDebug("QInfo:%p continue scan ts_comp file, tag:%s blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, cur.blockIndex, cur.tsIndex);
        } else {
          qDebug("QInfo:%p continue scan ts_comp file, tag:%"PRId64" blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key, cur.blockIndex, cur.tsIndex);
        }
H
Haojun Liao 已提交
5293
      }
5294 5295
    }
  }
5296

5297
  initCtxOutputBuf(pRuntimeEnv);
5298 5299 5300
  return true;
}

H
Haojun Liao 已提交
5301
STsdbQueryCond createTsdbQueryCond(SQuery* pQuery, STimeWindow* win) {
5302 5303 5304 5305 5306 5307
  STsdbQueryCond cond = {
      .colList   = pQuery->colList,
      .order     = pQuery->order.order,
      .numOfCols = pQuery->numOfCols,
  };

H
Haojun Liao 已提交
5308
  TIME_WINDOW_COPY(cond.twindow, *win);
5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335
  return cond;
}

static STableIdInfo createTableIdInfo(SQuery* pQuery) {
  assert(pQuery != NULL && pQuery->current != NULL);

  STableIdInfo tidInfo;
  STableId* id = TSDB_TABLEID(pQuery->current->pTable);

  tidInfo.uid = id->uid;
  tidInfo.tid = id->tid;
  tidInfo.key = pQuery->current->lastKey;

  return tidInfo;
}

static void updateTableIdInfo(SQuery* pQuery, SHashObj* pTableIdInfo) {
  STableIdInfo tidInfo = createTableIdInfo(pQuery);
  STableIdInfo* idinfo = taosHashGet(pTableIdInfo, &tidInfo.tid, sizeof(tidInfo.tid));
  if (idinfo != NULL) {
    assert(idinfo->tid == tidInfo.tid && idinfo->uid == tidInfo.uid);
    idinfo->key = tidInfo.key;
  } else {
    taosHashPut(pTableIdInfo, &tidInfo.tid, sizeof(tidInfo.tid), &tidInfo, sizeof(STableIdInfo));
  }
}

5336 5337 5338 5339 5340 5341 5342
/**
 * super table query handler
 * 1. super table projection query, group-by on normal columns query, ts-comp query
 * 2. point interpolation query, last row query
 *
 * @param pQInfo
 */
5343
static void sequentialTableProcess(SQInfo *pQInfo) {
5344
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
5345
  SQuery *          pQuery = pRuntimeEnv->pQuery;
5346
  setQueryStatus(pQuery, QUERY_COMPLETED);
5347

H
Haojun Liao 已提交
5348
  size_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
5349

5350
  if (isPointInterpoQuery(pQuery)) {
H
Haojun Liao 已提交
5351
    resetDefaultResInfoOutputBuf(pRuntimeEnv);
5352
    assert(pQuery->limit.offset == 0 && pQuery->limit.limit != 0);
5353

5354
    while (pQInfo->groupIndex < numOfGroups) {
5355
      SArray *group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex);
H
Haojun Liao 已提交
5356

5357 5358
      qDebug("QInfo:%p point interpolation query on group:%d, total group:%" PRIzu ", current group:%p", pQInfo,
             pQInfo->groupIndex, numOfGroups, group);
H
Haojun Liao 已提交
5359
      STsdbQueryCond cond = createTsdbQueryCond(pQuery, &pQuery->window);
S
TD-1057  
Shengliang Guan 已提交
5360

H
Haojun Liao 已提交
5361 5362 5363
      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);
5364

H
Haojun Liao 已提交
5365 5366 5367 5368 5369 5370 5371
      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }
5372

H
Haojun Liao 已提交
5373
      pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(pQInfo->tsdb, &cond, &gp, pQInfo, &pQInfo->memRef);
B
Bomin Zhang 已提交
5374 5375 5376 5377 5378 5379

      taosArrayDestroy(tx);
      taosArrayDestroy(g1);
      if (pRuntimeEnv->pQueryHandle == NULL) {
        longjmp(pRuntimeEnv->env, terrno);
      }
H
Haojun Liao 已提交
5380

H
Haojun Liao 已提交
5381
      initCtxOutputBuf(pRuntimeEnv);
5382

5383
      SArray *s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle);
5384
      assert(taosArrayGetSize(s) >= 1);
5385

5386
      setTagVal(pRuntimeEnv, taosArrayGetP(s, 0), pQInfo->tsdb);
dengyihao's avatar
dengyihao 已提交
5387
      taosArrayDestroy(s);
H
Haojun Liao 已提交
5388

H
Haojun Liao 已提交
5389
      // here we simply set the first table as current table
5390
      SArray *first = GET_TABLEGROUP(pQInfo, pQInfo->groupIndex);
5391 5392
      pQuery->current = taosArrayGetP(first, 0);

5393
      scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
5394

H
Haojun Liao 已提交
5395 5396 5397 5398 5399
      int64_t numOfRes = getNumOfResult(pRuntimeEnv);
      if (numOfRes > 0) {
        pQuery->rec.rows += numOfRes;
        forwardCtxOutputBuf(pRuntimeEnv, numOfRes);
      }
5400

H
Haojun Liao 已提交
5401 5402 5403 5404 5405
      skipResults(pRuntimeEnv);
      pQInfo->groupIndex += 1;

      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
5406 5407 5408 5409 5410 5411

      if (pQuery->rec.rows >= pQuery->rec.capacity) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
        break;
      }
    }
5412
  } else if (pRuntimeEnv->groupbyNormalCol) {  // group-by on normal columns query
5413
    while (pQInfo->groupIndex < numOfGroups) {
5414
      SArray *group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex);
5415

5416 5417
      qDebug("QInfo:%p group by normal columns group:%d, total group:%" PRIzu "", pQInfo, pQInfo->groupIndex,
             numOfGroups);
5418

H
Haojun Liao 已提交
5419
      STsdbQueryCond cond = createTsdbQueryCond(pQuery, &pQuery->window);
S
TD-1057  
Shengliang Guan 已提交
5420

5421 5422 5423 5424 5425 5426 5427 5428 5429 5430 5431 5432
      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);

      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }

H
Haojun Liao 已提交
5433
      // no need to update the lastkey for each table
H
Haojun Liao 已提交
5434
      pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo, &pQInfo->memRef);
H
Haojun Liao 已提交
5435

B
Bomin Zhang 已提交
5436 5437
      taosArrayDestroy(g1);
      taosArrayDestroy(tx);
B
Bomin Zhang 已提交
5438 5439 5440
      if (pRuntimeEnv->pQueryHandle == NULL) {
        longjmp(pRuntimeEnv->env, terrno);
      }
5441

5442
      SArray *s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle);
5443 5444
      assert(taosArrayGetSize(s) >= 1);

5445
      setTagVal(pRuntimeEnv, taosArrayGetP(s, 0), pQInfo->tsdb);
5446 5447 5448 5449 5450

      // here we simply set the first table as current table
      scanMultiTableDataBlocks(pQInfo);
      pQInfo->groupIndex += 1;

H
Haojun Liao 已提交
5451
      SResultRowInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
5452

5453
      // no results generated for current group, continue to try the next group
5454
      taosArrayDestroy(s);
5455 5456 5457 5458 5459
      if (pWindowResInfo->size <= 0) {
        continue;
      }

      for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
5460
        pWindowResInfo->pResult[i]->closed = true;  // enable return all results for group by normal columns
5461

H
Haojun Liao 已提交
5462
        SResultRow *pResult = pWindowResInfo->pResult[i];
5463
        for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
5464
          SResultRowCellInfo *pCell = getResultCell(pRuntimeEnv, pResult, j);
5465
          pResult->numOfRows = (uint16_t)(MAX(pResult->numOfRows, pCell->numOfRes));
5466 5467 5468
        }
      }

5469
      qDebug("QInfo:%p generated groupby columns results %d rows for group %d completed", pQInfo, pWindowResInfo->size,
5470
             pQInfo->groupIndex);
5471 5472 5473 5474 5475 5476
      int32_t currentGroupIndex = pQInfo->groupIndex;

      pQuery->rec.rows = 0;
      pQInfo->groupIndex = 0;

      ensureOutputBufferSimple(pRuntimeEnv, pWindowResInfo->size);
5477
      copyFromWindowResToSData(pQInfo, pWindowResInfo);
5478

5479
      pQInfo->groupIndex = currentGroupIndex;  // restore the group index
5480 5481 5482 5483
      assert(pQuery->rec.rows == pWindowResInfo->size);

      clearClosedTimeWindow(pRuntimeEnv);
      break;
5484
    }
H
Haojun Liao 已提交
5485
  } else if (pRuntimeEnv->queryWindowIdentical && pRuntimeEnv->pTSBuf == NULL && !isTSCompQuery(pQuery)) {
5486 5487 5488 5489 5490 5491 5492 5493 5494 5495
    //super table projection query with identical query time range for all tables.
    SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
    resetDefaultResInfoOutputBuf(pRuntimeEnv);

    SArray *group = GET_TABLEGROUP(pQInfo, 0);
    assert(taosArrayGetSize(group) == pQInfo->tableqinfoGroupInfo.numOfTables &&
           1 == taosArrayGetSize(pQInfo->tableqinfoGroupInfo.pGroupList));

    void *pQueryHandle = pRuntimeEnv->pQueryHandle;
    if (pQueryHandle == NULL) {
H
Haojun Liao 已提交
5496
      STsdbQueryCond con = createTsdbQueryCond(pQuery, &pQuery->window);
5497 5498 5499 5500 5501 5502 5503 5504 5505 5506 5507
      pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &con, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef);
      pQueryHandle = pRuntimeEnv->pQueryHandle;
    }

    // skip blocks without load the actual data block from file if no filter condition present
    //    skipBlocks(&pQInfo->runtimeEnv);
    //    if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0) {
    //      setQueryStatus(pQuery, QUERY_COMPLETED);
    //      return;
    //    }

H
Haojun Liao 已提交
5508 5509 5510 5511 5512 5513
    if (pQuery->prjInfo.vgroupLimit != -1) {
      assert(pQuery->limit.limit == -1 && pQuery->limit.offset == 0);
    } else if (pQuery->limit.limit != -1) {
      assert(pQuery->prjInfo.vgroupLimit == -1);
    }

5514
    bool hasMoreBlock = true;
H
Haojun Liao 已提交
5515
    int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
5516 5517 5518 5519
    SQueryCostInfo *summary = &pRuntimeEnv->summary;
    while ((hasMoreBlock = tsdbNextDataBlock(pQueryHandle)) == true) {
      summary->totalBlocks += 1;

5520
      if (isQueryKilled(pQInfo)) {
5521 5522 5523 5524 5525
        longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
      }

      tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
      STableQueryInfo **pTableQueryInfo =
H
Haojun Liao 已提交
5526
          (STableQueryInfo **) taosHashGet(pQInfo->tableqinfoGroupInfo.map, &blockInfo.tid, sizeof(blockInfo.tid));
5527 5528 5529 5530 5531
      if (pTableQueryInfo == NULL) {
        break;
      }

      pQuery->current = *pTableQueryInfo;
H
Haojun Liao 已提交
5532
      doTableQueryInfoTimeWindowCheck(pQuery, *pTableQueryInfo);
5533 5534 5535 5536 5537

      if (pRuntimeEnv->hasTagResults) {
        setTagVal(pRuntimeEnv, pQuery->current->pTable, pQInfo->tsdb);
      }

H
Haojun Liao 已提交
5538 5539 5540 5541 5542 5543 5544 5545 5546 5547 5548 5549 5550 5551 5552 5553 5554 5555 5556
      if (pQuery->prjInfo.vgroupLimit > 0 && pQuery->current->windowResInfo.size > pQuery->prjInfo.vgroupLimit) {
        pQuery->current->lastKey =
                QUERY_IS_ASC_QUERY(pQuery) ? blockInfo.window.ekey + step : blockInfo.window.skey + step;
        continue;
      }

      // it is a super table ordered projection query, check for the number of output for each vgroup
      if (pQuery->prjInfo.vgroupLimit > 0 && pQuery->rec.rows >= pQuery->prjInfo.vgroupLimit) {
        if (QUERY_IS_ASC_QUERY(pQuery) && blockInfo.window.skey >= pQuery->prjInfo.ts) {
          pQuery->current->lastKey =
                  QUERY_IS_ASC_QUERY(pQuery) ? blockInfo.window.ekey + step : blockInfo.window.skey + step;
          continue;
        } else if (!QUERY_IS_ASC_QUERY(pQuery) && blockInfo.window.ekey <= pQuery->prjInfo.ts) {
          pQuery->current->lastKey =
                  QUERY_IS_ASC_QUERY(pQuery) ? blockInfo.window.ekey + step : blockInfo.window.skey + step;
          continue;
        }
      }

5557 5558
      uint32_t     status = 0;
      SDataStatis *pStatis = NULL;
5559
      SArray      *pDataBlock = NULL;
5560 5561 5562 5563 5564 5565 5566

      int32_t ret = loadDataBlockOnDemand(pRuntimeEnv, &pQuery->current->windowResInfo, pQueryHandle, &blockInfo,
                                          &pStatis, &pDataBlock, &status);
      if (ret != TSDB_CODE_SUCCESS) {
        break;
      }

H
Haojun Liao 已提交
5567 5568 5569 5570 5571
      if(status == BLK_DATA_DISCARD) {
        pQuery->current->lastKey =
                QUERY_IS_ASC_QUERY(pQuery) ? blockInfo.window.ekey + step : blockInfo.window.skey + step;
        continue;
      }
5572

H
Haojun Liao 已提交
5573
      ensureOutputBuffer(pRuntimeEnv, &blockInfo);
H
Haojun Liao 已提交
5574 5575
      int64_t prev = getNumOfResult(pRuntimeEnv);

5576 5577 5578 5579 5580 5581 5582 5583 5584 5585
      pQuery->pos = QUERY_IS_ASC_QUERY(pQuery) ? 0 : blockInfo.rows - 1;
      int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, binarySearchForKey, pDataBlock);

      summary->totalRows += blockInfo.rows;
      qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%" PRId64,
             GET_QINFO_ADDR(pRuntimeEnv), blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes,
             pQuery->current->lastKey);

      pQuery->rec.rows = getNumOfResult(pRuntimeEnv);

H
Haojun Liao 已提交
5586
      int64_t inc = pQuery->rec.rows - prev;
H
Haojun Liao 已提交
5587
      pQuery->current->windowResInfo.size += (int32_t) inc;
H
Haojun Liao 已提交
5588

5589 5590 5591 5592 5593
      // the flag may be set by tableApplyFunctionsOnBlock, clear it here
      CLEAR_QUERY_STATUS(pQuery, QUERY_COMPLETED);

      updateTableIdInfo(pQuery, pQInfo->arrTableIdInfo);

H
Haojun Liao 已提交
5594 5595 5596 5597 5598 5599 5600 5601 5602 5603 5604 5605 5606 5607 5608 5609
      if (pQuery->prjInfo.vgroupLimit >= 0) {
        if (((pQuery->rec.rows + pQuery->rec.total) < pQuery->prjInfo.vgroupLimit) || ((pQuery->rec.rows + pQuery->rec.total) > pQuery->prjInfo.vgroupLimit && prev < pQuery->prjInfo.vgroupLimit)) {
          if (QUERY_IS_ASC_QUERY(pQuery) && pQuery->prjInfo.ts < blockInfo.window.ekey) {
            pQuery->prjInfo.ts = blockInfo.window.ekey;
          } else if (!QUERY_IS_ASC_QUERY(pQuery) && pQuery->prjInfo.ts > blockInfo.window.skey) {
            pQuery->prjInfo.ts = blockInfo.window.skey;
          }
        }
      } else {
        // the limitation of output result is reached, set the query completed
        skipResults(pRuntimeEnv);
        if (limitResults(pRuntimeEnv)) {
          setQueryStatus(pQuery, QUERY_COMPLETED);
          SET_STABLE_QUERY_OVER(pQInfo);
          break;
        }
5610 5611 5612 5613 5614 5615 5616 5617 5618 5619 5620 5621
      }

      // while the output buffer is full or limit/offset is applied, query may be paused here
      if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL|QUERY_COMPLETED)) {
        break;
      }
    }

    if (!hasMoreBlock) {
      setQueryStatus(pQuery, QUERY_COMPLETED);
      SET_STABLE_QUERY_OVER(pQInfo);
    }
5622 5623
  } else {
    /*
5624 5625 5626
     * the following two cases handled here.
     * 1. ts-comp query, and 2. the super table projection query with different query time range for each table.
     * If the subgroup index is larger than 0, results generated by group by tbname,k is existed.
5627 5628
     * we need to return it to client in the first place.
     */
5629
    if (pQInfo->groupIndex > 0) {
5630
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5631
      pQuery->rec.total += pQuery->rec.rows;
5632

5633
      if (pQuery->rec.rows > 0) {
5634 5635 5636
        return;
      }
    }
5637

5638
    // all data have returned already
5639
    if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
5640 5641
      return;
    }
5642

H
Haojun Liao 已提交
5643
    resetDefaultResInfoOutputBuf(pRuntimeEnv);
5644
    resetTimeWindowInfo(pRuntimeEnv, &pRuntimeEnv->windowResInfo);
5645

H
Haojun Liao 已提交
5646
    SArray *group = GET_TABLEGROUP(pQInfo, 0);
5647 5648
    assert(taosArrayGetSize(group) == pQInfo->tableqinfoGroupInfo.numOfTables &&
           1 == taosArrayGetSize(pQInfo->tableqinfoGroupInfo.pGroupList));
5649

5650
    while (pQInfo->tableIndex < pQInfo->tableqinfoGroupInfo.numOfTables) {
5651
      if (isQueryKilled(pQInfo)) {
H
Haojun Liao 已提交
5652
        longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
5653
      }
5654

5655
      pQuery->current = taosArrayGetP(group, pQInfo->tableIndex);
5656
      if (!multiTableMultioutputHelper(pQInfo, pQInfo->tableIndex)) {
5657
        pQInfo->tableIndex++;
5658 5659
        continue;
      }
5660

H
hjxilinx 已提交
5661
      // TODO handle the limit offset problem
5662
      if (pQuery->numOfFilterCols == 0 && pQuery->limit.offset > 0) {
5663 5664
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
          pQInfo->tableIndex++;
5665 5666 5667
          continue;
        }
      }
5668

5669
      scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
5670
      skipResults(pRuntimeEnv);
5671

5672
      // the limitation of output result is reached, set the query completed
5673
      if (limitResults(pRuntimeEnv)) {
H
Haojun Liao 已提交
5674
        SET_STABLE_QUERY_OVER(pQInfo);
5675 5676
        break;
      }
5677

5678 5679
      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
5680

5681
      if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
5682 5683 5684 5685 5686 5687
        /*
         * query range is identical in terms of all meters involved in query,
         * so we need to restore them at the *beginning* of query on each meter,
         * not the consecutive query on meter on which is aborted due to buffer limitation
         * to ensure that, we can reset the query range once query on a meter is completed.
         */
5688
        pQInfo->tableIndex++;
5689
        updateTableIdInfo(pQuery, pQInfo->arrTableIdInfo);
weixin_48148422's avatar
weixin_48148422 已提交
5690

5691
        // if the buffer is full or group by each table, we need to jump out of the loop
H
Haojun Liao 已提交
5692
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
5693 5694
          break;
        }
5695

H
Haojun Liao 已提交
5696 5697 5698 5699
        if (pRuntimeEnv->pTSBuf != NULL) {
          pRuntimeEnv->cur = pRuntimeEnv->pTSBuf->cur;
        }

5700
      } else {
5701
        // all data in the result buffer are skipped due to the offset, continue to retrieve data from current meter
5702 5703
        if (pQuery->rec.rows == 0) {
          assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
5704 5705
          continue;
        } else {
5706 5707 5708
          // buffer is full, wait for the next round to retrieve data from current meter
          assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
          break;
5709 5710 5711
        }
      }
    }
H
Haojun Liao 已提交
5712

5713
    if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
H
Haojun Liao 已提交
5714 5715
      setQueryStatus(pQuery, QUERY_COMPLETED);
    }
5716

5717 5718 5719 5720 5721 5722 5723 5724 5725 5726 5727 5728 5729 5730
    /*
     * 1. super table projection query, group-by on normal columns query, ts-comp query
     * 2. point interpolation query, last row query
     *
     * group-by on normal columns query and last_row query do NOT invoke the finalizer here,
     * since the finalize stage will be done at the client side.
     *
     * projection query, point interpolation query do not need the finalizer.
     *
     * Only the ts-comp query requires the finalizer function to be executed here.
     */
    if (isTSCompQuery(pQuery)) {
      finalizeQueryResult(pRuntimeEnv);
    }
5731

5732 5733 5734
    if (pRuntimeEnv->pTSBuf != NULL) {
      pRuntimeEnv->cur = pRuntimeEnv->pTSBuf->cur;
    }
5735

5736 5737 5738 5739 5740
    qDebug("QInfo %p numOfTables:%" PRIu64 ", index:%d, numOfGroups:%" PRIzu ", %" PRId64
           " points returned, total:%" PRId64 ", offset:%" PRId64,
           pQInfo, (uint64_t)pQInfo->tableqinfoGroupInfo.numOfTables, pQInfo->tableIndex, numOfGroups, pQuery->rec.rows,
           pQuery->rec.total, pQuery->limit.offset);
  }
5741 5742
}

5743 5744 5745 5746
static void doSaveContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

5747 5748 5749
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
  SWITCH_ORDER(pQuery->order.order);
5750

5751
  if (pRuntimeEnv->pTSBuf != NULL) {
H
Haojun Liao 已提交
5752
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
5753
  }
5754

H
Haojun Liao 已提交
5755
  STsdbQueryCond cond = createTsdbQueryCond(pQuery, &pQuery->window);
S
TD-1057  
Shengliang Guan 已提交
5756

5757 5758 5759 5760
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
H
Haojun Liao 已提交
5761

H
Haojun Liao 已提交
5762 5763 5764 5765 5766
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
  disableFuncInReverseScan(pQInfo);
  setupQueryRangeForReverseScan(pQInfo);

H
Haojun Liao 已提交
5767
  pRuntimeEnv->prevGroupId = INT32_MIN;
H
Haojun Liao 已提交
5768
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef);
B
Bomin Zhang 已提交
5769 5770 5771
  if (pRuntimeEnv->pSecQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
H
hjxilinx 已提交
5772 5773
}

5774 5775 5776 5777
static void doRestoreContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

H
hjxilinx 已提交
5778
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
5779

5780
  if (pRuntimeEnv->pTSBuf != NULL) {
5781
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
5782
  }
5783

5784
  switchCtxOrder(pRuntimeEnv);
5785 5786 5787
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
}

5788 5789 5790
static void doCloseAllTimeWindowAfterScan(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
Haojun Liao 已提交
5791
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
H
Haojun Liao 已提交
5792
    size_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
5793
    for (int32_t i = 0; i < numOfGroup; ++i) {
H
Haojun Liao 已提交
5794
      SArray *group = GET_TABLEGROUP(pQInfo, i);
5795

5796
      size_t num = taosArrayGetSize(group);
5797
      for (int32_t j = 0; j < num; ++j) {
5798 5799
        STableQueryInfo* item = taosArrayGetP(group, j);
        closeAllTimeWindow(&item->windowResInfo);
5800
      }
H
hjxilinx 已提交
5801 5802 5803 5804 5805 5806 5807
    }
  } else {  // close results for group result
    closeAllTimeWindow(&pQInfo->runtimeEnv.windowResInfo);
  }
}

static void multiTableQueryProcess(SQInfo *pQInfo) {
5808 5809 5810
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

5811
  if (pQInfo->groupIndex > 0) {
5812
    /*
5813
     * if the groupIndex > 0, the query process must be completed yet, we only need to
5814 5815
     * copy the data into output buffer
     */
H
Haojun Liao 已提交
5816
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
5817 5818
      copyResToQueryResultBuf(pQInfo, pQuery);
#ifdef _DEBUG_VIEW
5819
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
5820 5821
#endif
    } else {
5822
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5823
    }
5824

5825
    qDebug("QInfo:%p current:%"PRId64", total:%"PRId64"", pQInfo, pQuery->rec.rows, pQuery->rec.total);
5826 5827
    return;
  }
5828

5829
  qDebug("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", order:%d, forward scan start", pQInfo,
5830 5831
         pQuery->window.skey, pQuery->window.ekey, pQuery->order.order);

H
hjxilinx 已提交
5832
  // do check all qualified data blocks
H
Haojun Liao 已提交
5833
  int64_t el = scanMultiTableDataBlocks(pQInfo);
5834
  qDebug("QInfo:%p master scan completed, elapsed time: %" PRId64 "ms, reverse scan start", pQInfo, el);
5835

H
hjxilinx 已提交
5836
  // query error occurred or query is killed, abort current execution
5837
  if (pQInfo->code != TSDB_CODE_SUCCESS || isQueryKilled(pQInfo)) {
5838
    qDebug("QInfo:%p query killed or error occurred, code:%s, abort", pQInfo, tstrerror(pQInfo->code));
H
Haojun Liao 已提交
5839
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
5840
  }
5841

H
hjxilinx 已提交
5842 5843
  // close all time window results
  doCloseAllTimeWindowAfterScan(pQInfo);
5844

H
hjxilinx 已提交
5845 5846
  if (needReverseScan(pQuery)) {
    doSaveContext(pQInfo);
5847

H
Haojun Liao 已提交
5848
    el = scanMultiTableDataBlocks(pQInfo);
5849
    qDebug("QInfo:%p reversed scan completed, elapsed time: %" PRId64 "ms", pQInfo, el);
5850

H
Haojun Liao 已提交
5851
    doRestoreContext(pQInfo);
H
hjxilinx 已提交
5852
  } else {
5853
    qDebug("QInfo:%p no need to do reversed scan, query completed", pQInfo);
5854
  }
5855

5856
  setQueryStatus(pQuery, QUERY_COMPLETED);
5857

5858
  if (pQInfo->code != TSDB_CODE_SUCCESS || isQueryKilled(pQInfo)) {
5859
    qDebug("QInfo:%p query killed or error occurred, code:%s, abort", pQInfo, tstrerror(pQInfo->code));
H
Haojun Liao 已提交
5860
    //TODO finalizeQueryResult may cause SEGSEV, since the memory may not allocated yet, add a cleanup function instead
H
Haojun Liao 已提交
5861
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
H
hjxilinx 已提交
5862
  }
5863

H
Haojun Liao 已提交
5864
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || isSumAvgRateQuery(pQuery)) {
5865
    if (mergeIntoGroupResult(pQInfo) == TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
5866
      copyResToQueryResultBuf(pQInfo, pQuery);
5867 5868

#ifdef _DEBUG_VIEW
5869
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
5870 5871 5872
#endif
    }
  } else {  // not a interval query
5873
    copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5874
  }
5875

5876
  // handle the limitation of output buffer
5877
  qDebug("QInfo:%p points returned:%" PRId64 ", total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
5878 5879
}

H
Haojun Liao 已提交
5880 5881 5882 5883 5884 5885 5886 5887 5888 5889 5890 5891 5892 5893 5894 5895 5896 5897

static char *getArithemicInputSrc(void *param, const char *name, int32_t colId) {
  SArithmeticSupport *pSupport = (SArithmeticSupport *) param;
  SExprInfo* pExprInfo = (SExprInfo*) pSupport->exprList;

  int32_t index = -1;
  for (int32_t i = 0; i < pSupport->numOfCols; ++i) {
    if (colId == pExprInfo[i].base.resColId) {
      index = i;
      break;
    }
  }

  assert(index >= 0 && index < pSupport->numOfCols);
  return pSupport->data[index] + pSupport->offset * pExprInfo[index].bytes;
}

static void doSecondaryArithmeticProcess(SQuery* pQuery) {
H
Haojun Liao 已提交
5898 5899 5900
  if (pQuery->numOfExpr2 == 0) {
    return;
  }
H
Haojun Liao 已提交
5901

H
Haojun Liao 已提交
5902
  SArithmeticSupport arithSup = {0};
H
Haojun Liao 已提交
5903 5904 5905
  tFilePage **data = calloc(pQuery->numOfExpr2, POINTER_BYTES);
  for (int32_t i = 0; i < pQuery->numOfExpr2; ++i) {
    int32_t bytes = pQuery->pExpr2[i].bytes;
P
plum-lihui 已提交
5906
    data[i] = (tFilePage *)malloc((size_t)(bytes * pQuery->rec.rows) + sizeof(tFilePage));
H
Haojun Liao 已提交
5907 5908
  }

H
Haojun Liao 已提交
5909 5910 5911 5912
  arithSup.offset = 0;
  arithSup.numOfCols = (int32_t)pQuery->numOfOutput;
  arithSup.exprList  = pQuery->pExpr1;
  arithSup.data      = calloc(arithSup.numOfCols, POINTER_BYTES);
H
Haojun Liao 已提交
5913

H
Haojun Liao 已提交
5914 5915
  for (int32_t k = 0; k < arithSup.numOfCols; ++k) {
    arithSup.data[k] = pQuery->sdata[k]->data;
H
Haojun Liao 已提交
5916 5917 5918 5919 5920 5921 5922 5923 5924 5925
  }

  for (int i = 0; i < pQuery->numOfExpr2; ++i) {
    SExprInfo *pExpr = &pQuery->pExpr2[i];

    // calculate the result from several other columns
    SSqlFuncMsg* pSqlFunc = &pExpr->base;
    if (pSqlFunc->functionId != TSDB_FUNC_ARITHM) {

      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
5926 5927
        if (pSqlFunc->functionId == pQuery->pExpr1[j].base.functionId &&
            pSqlFunc->colInfo.colId == pQuery->pExpr1[j].base.colInfo.colId) {
P
plum-lihui 已提交
5928
          memcpy(data[i]->data, pQuery->sdata[j]->data, (size_t)(pQuery->pExpr1[j].bytes * pQuery->rec.rows));
H
Haojun Liao 已提交
5929 5930 5931 5932
          break;
        }
      }
    } else {
H
Haojun Liao 已提交
5933 5934
      arithSup.pArithExpr = pExpr;
      tExprTreeCalcTraverse(arithSup.pArithExpr->pExpr, (int32_t)pQuery->rec.rows, data[i]->data, &arithSup, TSDB_ORDER_ASC,
H
Haojun Liao 已提交
5935 5936 5937 5938 5939
                            getArithemicInputSrc);
    }
  }

  for (int32_t i = 0; i < pQuery->numOfExpr2; ++i) {
P
plum-lihui 已提交
5940
    memcpy(pQuery->sdata[i]->data, data[i]->data, (size_t)(pQuery->pExpr2[i].bytes * pQuery->rec.rows));
H
Haojun Liao 已提交
5941 5942
  }

H
Haojun Liao 已提交
5943 5944 5945 5946 5947 5948
  for (int32_t i = 0; i < pQuery->numOfExpr2; ++i) {
    tfree(data[i]);
  }

  tfree(data);
  tfree(arithSup.data);
H
Haojun Liao 已提交
5949 5950
}

5951 5952 5953 5954 5955 5956
/*
 * in each query, this function will be called only once, no retry for further result.
 *
 * select count(*)/top(field,k)/avg(field name) from table_name [where ts>now-1a];
 * select count(*) from table_name group by status_column;
 */
H
hjxilinx 已提交
5957
static void tableFixedOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
5958
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
5959

H
hjxilinx 已提交
5960
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
5961
  if (!pRuntimeEnv->topBotQuery && pQuery->limit.offset > 0) {  // no need to execute, since the output will be ignore.
H
Haojun Liao 已提交
5962 5963
    return;
  }
5964

H
hjxilinx 已提交
5965
  pQuery->current = pTableInfo;  // set current query table info
5966

5967
  scanOneTableDataBlocks(pRuntimeEnv, pTableInfo->lastKey);
H
hjxilinx 已提交
5968
  finalizeQueryResult(pRuntimeEnv);
5969

H
Haojun Liao 已提交
5970 5971 5972 5973
  // since the numOfRows must be identical for all sql functions that are allowed to be executed simutaneously.
  pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
  doSecondaryArithmeticProcess(pQuery);

5974
  if (isQueryKilled(pQInfo)) {
H
Haojun Liao 已提交
5975
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
5976
  }
5977

5978
  skipResults(pRuntimeEnv);
5979
  limitResults(pRuntimeEnv);
5980 5981
}

H
hjxilinx 已提交
5982
static void tableMultiOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
5983
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
5984

H
hjxilinx 已提交
5985 5986
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
5987

5988 5989
  // for ts_comp query, re-initialized is not allowed
  if (!isTSCompQuery(pQuery)) {
H
Haojun Liao 已提交
5990
    resetDefaultResInfoOutputBuf(pRuntimeEnv);
5991
  }
5992

5993 5994 5995 5996 5997 5998
  // skip blocks without load the actual data block from file if no filter condition present
  skipBlocks(&pQInfo->runtimeEnv);
  if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return;
  }
5999 6000

  while (1) {
6001
    scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
H
hjxilinx 已提交
6002
    finalizeQueryResult(pRuntimeEnv);
6003

6004 6005
    pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
    if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols > 0 && pQuery->rec.rows > 0) {
6006
      skipResults(pRuntimeEnv);
6007 6008 6009
    }

    /*
H
hjxilinx 已提交
6010 6011
     * 1. if pQuery->size == 0, pQuery->limit.offset >= 0, still need to check data
     * 2. if pQuery->size > 0, pQuery->limit.offset must be 0
6012
     */
6013
    if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
6014 6015 6016
      break;
    }

6017
    qDebug("QInfo:%p skip current result, offset:%" PRId64 ", next qrange:%" PRId64 "-%" PRId64,
B
Bomin Zhang 已提交
6018
           pQInfo, pQuery->limit.offset, pQuery->current->lastKey, pQuery->current->win.ekey);
6019

H
Haojun Liao 已提交
6020
    resetDefaultResInfoOutputBuf(pRuntimeEnv);
6021 6022
  }

6023
  limitResults(pRuntimeEnv);
6024
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
6025
    qDebug("QInfo:%p query paused due to output limitation, next qrange:%" PRId64 "-%" PRId64, pQInfo,
H
hjxilinx 已提交
6026
        pQuery->current->lastKey, pQuery->window.ekey);
weixin_48148422's avatar
weixin_48148422 已提交
6027
  } else if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
6028 6029
    STableIdInfo tidInfo = createTableIdInfo(pQuery);
    taosHashPut(pQInfo->arrTableIdInfo, &tidInfo.tid, sizeof(tidInfo.tid), &tidInfo, sizeof(STableIdInfo));
6030 6031
  }

6032 6033 6034
  if (!isTSCompQuery(pQuery)) {
    assert(pQuery->rec.rows <= pQuery->rec.capacity);
  }
6035 6036
}

H
Haojun Liao 已提交
6037
static void tableIntervalProcessImpl(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
6038
  SQuery *pQuery = pRuntimeEnv->pQuery;
6039

6040
  while (1) {
6041
    scanOneTableDataBlocks(pRuntimeEnv, start);
6042

6043
    assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_NOT_COMPLETED));
H
hjxilinx 已提交
6044
    finalizeQueryResult(pRuntimeEnv);
6045

6046 6047 6048
    // here we can ignore the records in case of no interpolation
    // todo handle offset, in case of top/bottom interval query
    if ((pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL) && pQuery->limit.offset > 0 &&
6049
        pQuery->fillType == TSDB_FILL_NONE) {
6050 6051
      // maxOutput <= 0, means current query does not generate any results
      int32_t numOfClosed = numOfClosedTimeWindow(&pRuntimeEnv->windowResInfo);
6052

S
TD-1057  
Shengliang Guan 已提交
6053
      int32_t c = (int32_t)(MIN(numOfClosed, pQuery->limit.offset));
H
Haojun Liao 已提交
6054
      clearFirstNWindowRes(pRuntimeEnv, c);
6055 6056
      pQuery->limit.offset -= c;
    }
6057

6058
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED | QUERY_RESBUF_FULL)) {
6059 6060 6061 6062 6063
      break;
    }
  }
}

6064
// handle time interval query on table
H
hjxilinx 已提交
6065
static void tableIntervalProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
6066 6067
  SQueryRuntimeEnv *pRuntimeEnv = &(pQInfo->runtimeEnv);

H
hjxilinx 已提交
6068 6069
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
6070

H
Haojun Liao 已提交
6071
  int32_t numOfFilled = 0;
H
Haojun Liao 已提交
6072
  TSKEY newStartKey = TSKEY_INITIAL_VAL;
6073

6074
  // skip blocks without load the actual data block from file if no filter condition present
H
Haojun Liao 已提交
6075 6076 6077 6078 6079 6080
  if (!pRuntimeEnv->groupbyNormalCol) {
    skipTimeInterval(pRuntimeEnv, &newStartKey);
    if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0 && pRuntimeEnv->pFillInfo == NULL) {
      setQueryStatus(pQuery, QUERY_COMPLETED);
      return;
    }
6081 6082
  }

6083
  while (1) {
H
Haojun Liao 已提交
6084
    tableIntervalProcessImpl(pRuntimeEnv, newStartKey);
6085

H
Haojun Liao 已提交
6086
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
6087
      pQInfo->groupIndex = 0;  // always start from 0
6088
      pQuery->rec.rows = 0;
6089
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
6090

H
Haojun Liao 已提交
6091
      clearFirstNWindowRes(pRuntimeEnv, pQInfo->groupIndex);
6092
    }
6093

H
Haojun Liao 已提交
6094
    // no result generated, abort
H
Haojun Liao 已提交
6095
    if (pQuery->rec.rows == 0 || pRuntimeEnv->groupbyNormalCol) {
H
Haojun Liao 已提交
6096 6097 6098 6099 6100
      break;
    }

    doSecondaryArithmeticProcess(pQuery);
    
6101
    // the offset is handled at prepare stage if no interpolation involved
H
Haojun Liao 已提交
6102
    if (pQuery->fillType == TSDB_FILL_NONE) {
6103
      limitResults(pRuntimeEnv);
6104 6105
      break;
    } else {
S
TD-1057  
Shengliang Guan 已提交
6106
      taosFillSetStartInfo(pRuntimeEnv->pFillInfo, (int32_t)pQuery->rec.rows, pQuery->window.ekey);
H
Haojun Liao 已提交
6107
      taosFillCopyInputDataFromFilePage(pRuntimeEnv->pFillInfo, (const tFilePage**) pQuery->sdata);
H
Haojun Liao 已提交
6108
      numOfFilled = 0;
6109

H
Haojun Liao 已提交
6110
      pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, &numOfFilled);
6111
      if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
6112
        limitResults(pRuntimeEnv);
6113 6114
        break;
      }
6115

6116
      // no result generated yet, continue retrieve data
6117
      pQuery->rec.rows = 0;
6118 6119
    }
  }
6120

6121
  // all data scanned, the group by normal column can return
H
Haojun Liao 已提交
6122
  if (pRuntimeEnv->groupbyNormalCol) {  // todo refactor with merge interval time result
H
Haojun Liao 已提交
6123 6124
    // maxOutput <= 0, means current query does not generate any results
    int32_t numOfClosed = numOfClosedTimeWindow(&pRuntimeEnv->windowResInfo);
6125

H
Haojun Liao 已提交
6126 6127
    if ((pQuery->limit.offset > 0 && pQuery->limit.offset < numOfClosed) || pQuery->limit.offset == 0) {
      // skip offset result rows
H
Haojun Liao 已提交
6128
      clearFirstNWindowRes(pRuntimeEnv, (int32_t) pQuery->limit.offset);
H
Haojun Liao 已提交
6129 6130 6131 6132

      pQuery->rec.rows   = 0;
      pQInfo->groupIndex = 0;
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
H
Haojun Liao 已提交
6133
      clearFirstNWindowRes(pRuntimeEnv, pQInfo->groupIndex);
H
Haojun Liao 已提交
6134 6135 6136 6137

      doSecondaryArithmeticProcess(pQuery);
      limitResults(pRuntimeEnv);
    }
6138 6139 6140
  }
}

6141 6142 6143 6144
static void tableQueryImpl(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

H
Haojun Liao 已提交
6145
  if (queryHasRemainResForTableQuery(pRuntimeEnv)) {
6146

H
Haojun Liao 已提交
6147 6148 6149 6150 6151 6152 6153 6154 6155 6156 6157 6158
    if (pQuery->fillType != TSDB_FILL_NONE) {
      /*
       * There are remain results that are not returned due to result interpolation
       * So, we do keep in this procedure instead of launching retrieve procedure for next results.
       */
      int32_t numOfFilled = 0;
      pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, &numOfFilled);

      if (pQuery->rec.rows > 0) {
        limitResults(pRuntimeEnv);
      }

H
Haojun Liao 已提交
6159
      qDebug("QInfo:%p current:%" PRId64 " returned, total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total);
6160
      return;
H
Haojun Liao 已提交
6161
    } else {
6162
      pQuery->rec.rows = 0;
6163
      pQInfo->groupIndex = 0;  // always start from 0
6164

6165
      if (pRuntimeEnv->windowResInfo.size > 0) {
6166
        copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
H
Haojun Liao 已提交
6167
        clearFirstNWindowRes(pRuntimeEnv, pQInfo->groupIndex);
6168

6169
        if (pQuery->rec.rows > 0) {
6170
          qDebug("QInfo:%p %"PRId64" rows returned from group results, total:%"PRId64"", pQInfo, pQuery->rec.rows, pQuery->rec.total);
H
Haojun Liao 已提交
6171 6172 6173

          // there are not data remains
          if (pRuntimeEnv->windowResInfo.size <= 0) {
H
Haojun Liao 已提交
6174
            qDebug("QInfo:%p query over, %"PRId64" rows are returned", pQInfo, pQuery->rec.total);
H
Haojun Liao 已提交
6175 6176
          }

6177 6178 6179 6180 6181
          return;
        }
      }
    }
  }
6182

H
hjxilinx 已提交
6183
  // number of points returned during this query
6184
  pQuery->rec.rows = 0;
6185
  int64_t st = taosGetTimestampUs();
6186

6187
  assert(pQInfo->tableqinfoGroupInfo.numOfTables == 1);
H
Haojun Liao 已提交
6188
  SArray* g = GET_TABLEGROUP(pQInfo, 0);
6189
  STableQueryInfo* item = taosArrayGetP(g, 0);
6190

6191
  // group by normal column, sliding window query, interval query are handled by interval query processor
H
Haojun Liao 已提交
6192
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyNormalCol) {  // interval (down sampling operation)
6193
    tableIntervalProcess(pQInfo, item);
H
Haojun Liao 已提交
6194
  } else if (isFixedOutputQuery(pRuntimeEnv)) {
6195
    tableFixedOutputProcess(pQInfo, item);
6196 6197
  } else {  // diff/add/multiply/subtract/division
    assert(pQuery->checkBuffer == 1);
6198
    tableMultiOutputProcess(pQInfo, item);
6199
  }
6200

6201
  // record the total elapsed time
6202
  pRuntimeEnv->summary.elapsedTime += (taosGetTimestampUs() - st);
6203
  assert(pQInfo->tableqinfoGroupInfo.numOfTables == 1);
6204 6205
}

6206
static void stableQueryImpl(SQInfo *pQInfo) {
H
Haojun Liao 已提交
6207 6208
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *pQuery = pRuntimeEnv->pQuery;
6209
  pQuery->rec.rows = 0;
6210

6211
  int64_t st = taosGetTimestampUs();
6212

H
Haojun Liao 已提交
6213
  if (QUERY_IS_INTERVAL_QUERY(pQuery) ||
6214
      (isFixedOutputQuery(pRuntimeEnv) && (!isPointInterpoQuery(pQuery)) && (!pRuntimeEnv->groupbyNormalCol))) {
H
hjxilinx 已提交
6215
    multiTableQueryProcess(pQInfo);
6216
  } else {
6217
    assert((pQuery->checkBuffer == 1 && pQuery->interval.interval == 0) || isPointInterpoQuery(pQuery) ||
H
Haojun Liao 已提交
6218
            isFirstLastRowQuery(pQuery) || pRuntimeEnv->groupbyNormalCol);
6219

6220
    sequentialTableProcess(pQInfo);
6221
  }
6222

H
hjxilinx 已提交
6223
  // record the total elapsed time
6224
  pQInfo->runtimeEnv.summary.elapsedTime += (taosGetTimestampUs() - st);
H
hjxilinx 已提交
6225 6226
}

6227
static int32_t getColumnIndexInSource(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
6228
  int32_t j = 0;
6229

6230
  if (TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) {
H
Haojun Liao 已提交
6231
    if (pExprMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
6232
      return TSDB_TBNAME_COLUMN_INDEX;
H
Haojun Liao 已提交
6233 6234
    }

6235 6236 6237 6238
    while(j < pQueryMsg->numOfTags) {
      if (pExprMsg->colInfo.colId == pTagCols[j].colId) {
        return j;
      }
6239

6240 6241
      j += 1;
    }
6242

Y
TD-1230  
yihaoDeng 已提交
6243
  } else if (TSDB_COL_IS_UD_COL(pExprMsg->colInfo.flag)) {  // user specified column data
H
Haojun Liao 已提交
6244
    return TSDB_UD_COLUMN_INDEX;
6245 6246 6247 6248 6249
  } else {
    while (j < pQueryMsg->numOfCols) {
      if (pExprMsg->colInfo.colId == pQueryMsg->colList[j].colId) {
        return j;
      }
6250

6251
      j += 1;
6252 6253
    }
  }
6254
  assert(0);
6255
  return -1;
6256 6257
}

6258 6259 6260
bool validateExprColumnInfo(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
  int32_t j = getColumnIndexInSource(pQueryMsg, pExprMsg, pTagCols);
  return j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags;
6261 6262
}

6263
static bool validateQueryMsg(SQueryTableMsg *pQueryMsg) {
6264 6265
  if (pQueryMsg->interval.interval < 0) {
    qError("qmsg:%p illegal value of interval time %" PRId64, pQueryMsg, pQueryMsg->interval.interval);
6266
    return false;
6267 6268
  }

H
hjxilinx 已提交
6269
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
6270
    qError("qmsg:%p illegal value of numOfTables %d", pQueryMsg, pQueryMsg->numOfTables);
6271
    return false;
6272 6273
  }

H
hjxilinx 已提交
6274
  if (pQueryMsg->numOfGroupCols < 0) {
S
slguan 已提交
6275
    qError("qmsg:%p illegal value of numOfGroupbyCols %d", pQueryMsg, pQueryMsg->numOfGroupCols);
6276
    return false;
6277 6278
  }

6279 6280
  if (pQueryMsg->numOfOutput > TSDB_MAX_COLUMNS || pQueryMsg->numOfOutput <= 0) {
    qError("qmsg:%p illegal value of output columns %d", pQueryMsg, pQueryMsg->numOfOutput);
6281
    return false;
6282 6283
  }

6284 6285 6286 6287 6288 6289 6290 6291 6292 6293
  return true;
}

static bool validateQuerySourceCols(SQueryTableMsg *pQueryMsg, SSqlFuncMsg** pExprMsg) {
  int32_t numOfTotal = pQueryMsg->numOfCols + pQueryMsg->numOfTags;
  if (pQueryMsg->numOfCols < 0 || pQueryMsg->numOfTags < 0 || numOfTotal > TSDB_MAX_COLUMNS) {
    qError("qmsg:%p illegal value of numOfCols %d numOfTags:%d", pQueryMsg, pQueryMsg->numOfCols, pQueryMsg->numOfTags);
    return false;
  } else if (numOfTotal == 0) {
    for(int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
H
Haojun Liao 已提交
6294 6295 6296 6297 6298
      SSqlFuncMsg* pFuncMsg = pExprMsg[i];

      if ((pFuncMsg->functionId == TSDB_FUNC_TAGPRJ) ||
          (pFuncMsg->functionId == TSDB_FUNC_TID_TAG && pFuncMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) ||
          (pFuncMsg->functionId == TSDB_FUNC_COUNT && pFuncMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX)) {
6299
        continue;
6300
      }
6301

6302
      return false;
6303 6304
    }
  }
6305

6306
  return true;
6307 6308
}

6309
static char *createTableIdList(SQueryTableMsg *pQueryMsg, char *pMsg, SArray **pTableIdList) {
H
hjxilinx 已提交
6310
  assert(pQueryMsg->numOfTables > 0);
6311

weixin_48148422's avatar
weixin_48148422 已提交
6312
  *pTableIdList = taosArrayInit(pQueryMsg->numOfTables, sizeof(STableIdInfo));
6313

weixin_48148422's avatar
weixin_48148422 已提交
6314 6315
  for (int32_t j = 0; j < pQueryMsg->numOfTables; ++j) {
    STableIdInfo* pTableIdInfo = (STableIdInfo *)pMsg;
6316

6317
    pTableIdInfo->tid = htonl(pTableIdInfo->tid);
H
hjxilinx 已提交
6318 6319
    pTableIdInfo->uid = htobe64(pTableIdInfo->uid);
    pTableIdInfo->key = htobe64(pTableIdInfo->key);
6320

H
hjxilinx 已提交
6321 6322 6323
    taosArrayPush(*pTableIdList, pTableIdInfo);
    pMsg += sizeof(STableIdInfo);
  }
6324

H
hjxilinx 已提交
6325 6326
  return pMsg;
}
6327

6328
/**
H
hjxilinx 已提交
6329
 * pQueryMsg->head has been converted before this function is called.
6330
 *
H
hjxilinx 已提交
6331
 * @param pQueryMsg
6332 6333 6334 6335
 * @param pTableIdList
 * @param pExpr
 * @return
 */
H
Haojun Liao 已提交
6336
static int32_t convertQueryMsg(SQueryTableMsg *pQueryMsg, SArray **pTableIdList, SSqlFuncMsg ***pExpr, SSqlFuncMsg ***pSecStageExpr,
weixin_48148422's avatar
weixin_48148422 已提交
6337
                               char **tagCond, char** tbnameCond, SColIndex **groupbyCols, SColumnInfo** tagCols) {
6338 6339
  int32_t code = TSDB_CODE_SUCCESS;

6340 6341 6342 6343
  if (taosCheckVersion(pQueryMsg->version, version, 3) != 0) {
    return TSDB_CODE_QRY_INVALID_MSG;
  }

6344 6345 6346 6347
  pQueryMsg->numOfTables = htonl(pQueryMsg->numOfTables);

  pQueryMsg->window.skey = htobe64(pQueryMsg->window.skey);
  pQueryMsg->window.ekey = htobe64(pQueryMsg->window.ekey);
6348 6349 6350
  pQueryMsg->interval.interval = htobe64(pQueryMsg->interval.interval);
  pQueryMsg->interval.sliding = htobe64(pQueryMsg->interval.sliding);
  pQueryMsg->interval.offset = htobe64(pQueryMsg->interval.offset);
6351 6352
  pQueryMsg->limit = htobe64(pQueryMsg->limit);
  pQueryMsg->offset = htobe64(pQueryMsg->offset);
H
Haojun Liao 已提交
6353
  pQueryMsg->vgroupLimit = htobe64(pQueryMsg->vgroupLimit);
H
hjxilinx 已提交
6354

6355 6356
  pQueryMsg->order = htons(pQueryMsg->order);
  pQueryMsg->orderColId = htons(pQueryMsg->orderColId);
H
Haojun Liao 已提交
6357
  pQueryMsg->queryType = htonl(pQueryMsg->queryType);
weixin_48148422's avatar
weixin_48148422 已提交
6358
  pQueryMsg->tagNameRelType = htons(pQueryMsg->tagNameRelType);
6359 6360

  pQueryMsg->numOfCols = htons(pQueryMsg->numOfCols);
6361
  pQueryMsg->numOfOutput = htons(pQueryMsg->numOfOutput);
H
hjxilinx 已提交
6362
  pQueryMsg->numOfGroupCols = htons(pQueryMsg->numOfGroupCols);
6363 6364 6365
  pQueryMsg->tagCondLen = htons(pQueryMsg->tagCondLen);
  pQueryMsg->tsOffset = htonl(pQueryMsg->tsOffset);
  pQueryMsg->tsLen = htonl(pQueryMsg->tsLen);
H
hjxilinx 已提交
6366
  pQueryMsg->tsNumOfBlocks = htonl(pQueryMsg->tsNumOfBlocks);
6367
  pQueryMsg->tsOrder = htonl(pQueryMsg->tsOrder);
6368
  pQueryMsg->numOfTags = htonl(pQueryMsg->numOfTags);
H
Haojun Liao 已提交
6369
  pQueryMsg->secondStageOutput = htonl(pQueryMsg->secondStageOutput);
6370

6371
  // query msg safety check
6372
  if (!validateQueryMsg(pQueryMsg)) {
6373 6374
    code = TSDB_CODE_QRY_INVALID_MSG;
    goto _cleanup;
6375 6376
  }

H
hjxilinx 已提交
6377 6378
  char *pMsg = (char *)(pQueryMsg->colList) + sizeof(SColumnInfo) * pQueryMsg->numOfCols;
  for (int32_t col = 0; col < pQueryMsg->numOfCols; ++col) {
6379 6380
    SColumnInfo *pColInfo = &pQueryMsg->colList[col];

H
hjxilinx 已提交
6381
    pColInfo->colId = htons(pColInfo->colId);
6382
    pColInfo->type = htons(pColInfo->type);
H
hjxilinx 已提交
6383 6384
    pColInfo->bytes = htons(pColInfo->bytes);
    pColInfo->numOfFilters = htons(pColInfo->numOfFilters);
6385

H
hjxilinx 已提交
6386
    assert(pColInfo->type >= TSDB_DATA_TYPE_BOOL && pColInfo->type <= TSDB_DATA_TYPE_NCHAR);
6387

H
hjxilinx 已提交
6388
    int32_t numOfFilters = pColInfo->numOfFilters;
6389
    if (numOfFilters > 0) {
H
hjxilinx 已提交
6390
      pColInfo->filters = calloc(numOfFilters, sizeof(SColumnFilterInfo));
H
Haojun Liao 已提交
6391 6392 6393 6394
      if (pColInfo->filters == NULL) {
        code = TSDB_CODE_QRY_OUT_OF_MEMORY;
        goto _cleanup;
      }
6395 6396 6397
    }

    for (int32_t f = 0; f < numOfFilters; ++f) {
6398
      SColumnFilterInfo *pFilterMsg = (SColumnFilterInfo *)pMsg;
6399

6400 6401
      SColumnFilterInfo *pColFilter = &pColInfo->filters[f];
      pColFilter->filterstr = htons(pFilterMsg->filterstr);
6402 6403 6404

      pMsg += sizeof(SColumnFilterInfo);

6405 6406
      if (pColFilter->filterstr) {
        pColFilter->len = htobe64(pFilterMsg->len);
6407

6408
        pColFilter->pz = (int64_t)calloc(1, (size_t)(pColFilter->len + 1 * TSDB_NCHAR_SIZE)); // note: null-terminator
H
Haojun Liao 已提交
6409 6410 6411 6412 6413
        if (pColFilter->pz == 0) {
          code = TSDB_CODE_QRY_OUT_OF_MEMORY;
          goto _cleanup;
        }

6414
        memcpy((void *)pColFilter->pz, pMsg, (size_t)pColFilter->len);
6415
        pMsg += (pColFilter->len + 1);
6416
      } else {
6417 6418
        pColFilter->lowerBndi = htobe64(pFilterMsg->lowerBndi);
        pColFilter->upperBndi = htobe64(pFilterMsg->upperBndi);
6419 6420
      }

6421 6422
      pColFilter->lowerRelOptr = htons(pFilterMsg->lowerRelOptr);
      pColFilter->upperRelOptr = htons(pFilterMsg->upperRelOptr);
6423 6424 6425
    }
  }

6426
  *pExpr = calloc(pQueryMsg->numOfOutput, POINTER_BYTES);
H
Haojun Liao 已提交
6427 6428 6429 6430 6431
  if (*pExpr == NULL) {
    code = TSDB_CODE_QRY_OUT_OF_MEMORY;
    goto _cleanup;
  }

6432
  SSqlFuncMsg *pExprMsg = (SSqlFuncMsg *)pMsg;
6433

6434
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
6435
    (*pExpr)[i] = pExprMsg;
6436

6437
    pExprMsg->colInfo.colIndex = htons(pExprMsg->colInfo.colIndex);
6438
    pExprMsg->colInfo.colId = htons(pExprMsg->colInfo.colId);
H
Haojun Liao 已提交
6439 6440 6441 6442
    pExprMsg->colInfo.flag  = htons(pExprMsg->colInfo.flag);
    pExprMsg->functionId    = htons(pExprMsg->functionId);
    pExprMsg->numOfParams   = htons(pExprMsg->numOfParams);
    pExprMsg->resColId      = htons(pExprMsg->resColId);
6443

6444
    pMsg += sizeof(SSqlFuncMsg);
6445 6446

    for (int32_t j = 0; j < pExprMsg->numOfParams; ++j) {
6447
      pExprMsg->arg[j].argType = htons(pExprMsg->arg[j].argType);
6448 6449 6450 6451
      pExprMsg->arg[j].argBytes = htons(pExprMsg->arg[j].argBytes);

      if (pExprMsg->arg[j].argType == TSDB_DATA_TYPE_BINARY) {
        pExprMsg->arg[j].argValue.pz = pMsg;
6452
        pMsg += pExprMsg->arg[j].argBytes;  // one more for the string terminated char.
6453 6454 6455 6456 6457
      } else {
        pExprMsg->arg[j].argValue.i64 = htobe64(pExprMsg->arg[j].argValue.i64);
      }
    }

H
Haojun Liao 已提交
6458 6459
    int16_t functionId = pExprMsg->functionId;
    if (functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ || functionId == TSDB_FUNC_TAG_DUMMY) {
Y
TD-1230  
yihaoDeng 已提交
6460
      if (!TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) {  // ignore the column  index check for arithmetic expression.
6461 6462
        code = TSDB_CODE_QRY_INVALID_MSG;
        goto _cleanup;
6463 6464
      }
    } else {
6465
//      if (!validateExprColumnInfo(pQueryMsg, pExprMsg)) {
6466
//        return TSDB_CODE_QRY_INVALID_MSG;
6467
//      }
6468 6469
    }

6470
    pExprMsg = (SSqlFuncMsg *)pMsg;
6471
  }
6472

6473
  if (!validateQuerySourceCols(pQueryMsg, *pExpr)) {
6474
    code = TSDB_CODE_QRY_INVALID_MSG;
dengyihao's avatar
dengyihao 已提交
6475
    goto _cleanup;
6476
  }
6477

H
Haojun Liao 已提交
6478 6479 6480 6481 6482 6483 6484 6485 6486 6487 6488 6489 6490 6491 6492 6493 6494 6495 6496 6497 6498 6499 6500 6501 6502 6503 6504 6505 6506 6507 6508 6509 6510 6511 6512 6513 6514 6515 6516 6517 6518 6519 6520
  if (pQueryMsg->secondStageOutput) {
    pExprMsg = (SSqlFuncMsg *)pMsg;
    *pSecStageExpr = calloc(pQueryMsg->secondStageOutput, POINTER_BYTES);
    
    for (int32_t i = 0; i < pQueryMsg->secondStageOutput; ++i) {
      (*pSecStageExpr)[i] = pExprMsg;

      pExprMsg->colInfo.colIndex = htons(pExprMsg->colInfo.colIndex);
      pExprMsg->colInfo.colId = htons(pExprMsg->colInfo.colId);
      pExprMsg->colInfo.flag = htons(pExprMsg->colInfo.flag);
      pExprMsg->functionId = htons(pExprMsg->functionId);
      pExprMsg->numOfParams = htons(pExprMsg->numOfParams);

      pMsg += sizeof(SSqlFuncMsg);

      for (int32_t j = 0; j < pExprMsg->numOfParams; ++j) {
        pExprMsg->arg[j].argType = htons(pExprMsg->arg[j].argType);
        pExprMsg->arg[j].argBytes = htons(pExprMsg->arg[j].argBytes);

        if (pExprMsg->arg[j].argType == TSDB_DATA_TYPE_BINARY) {
          pExprMsg->arg[j].argValue.pz = pMsg;
          pMsg += pExprMsg->arg[j].argBytes;  // one more for the string terminated char.
        } else {
          pExprMsg->arg[j].argValue.i64 = htobe64(pExprMsg->arg[j].argValue.i64);
        }
      }

      int16_t functionId = pExprMsg->functionId;
      if (functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ || functionId == TSDB_FUNC_TAG_DUMMY) {
        if (!TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) {  // ignore the column  index check for arithmetic expression.
          code = TSDB_CODE_QRY_INVALID_MSG;
          goto _cleanup;
        }
      } else {
//      if (!validateExprColumnInfo(pQueryMsg, pExprMsg)) {
//        return TSDB_CODE_QRY_INVALID_MSG;
//      }
      }

      pExprMsg = (SSqlFuncMsg *)pMsg;
    }
  }

H
hjxilinx 已提交
6521
  pMsg = createTableIdList(pQueryMsg, pMsg, pTableIdList);
6522

H
hjxilinx 已提交
6523
  if (pQueryMsg->numOfGroupCols > 0) {  // group by tag columns
6524
    *groupbyCols = malloc(pQueryMsg->numOfGroupCols * sizeof(SColIndex));
6525 6526 6527 6528
    if (*groupbyCols == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }
6529 6530 6531

    for (int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
      (*groupbyCols)[i].colId = *(int16_t *)pMsg;
6532
      pMsg += sizeof((*groupbyCols)[i].colId);
6533 6534

      (*groupbyCols)[i].colIndex = *(int16_t *)pMsg;
6535 6536
      pMsg += sizeof((*groupbyCols)[i].colIndex);

6537
      (*groupbyCols)[i].flag = *(int16_t *)pMsg;
6538 6539 6540 6541 6542
      pMsg += sizeof((*groupbyCols)[i].flag);

      memcpy((*groupbyCols)[i].name, pMsg, tListLen(groupbyCols[i]->name));
      pMsg += tListLen((*groupbyCols)[i].name);
    }
6543

H
hjxilinx 已提交
6544 6545
    pQueryMsg->orderByIdx = htons(pQueryMsg->orderByIdx);
    pQueryMsg->orderType = htons(pQueryMsg->orderType);
6546 6547
  }

6548 6549
  pQueryMsg->fillType = htons(pQueryMsg->fillType);
  if (pQueryMsg->fillType != TSDB_FILL_NONE) {
6550
    pQueryMsg->fillVal = (uint64_t)(pMsg);
6551 6552

    int64_t *v = (int64_t *)pMsg;
6553
    for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
6554 6555
      v[i] = htobe64(v[i]);
    }
6556

6557
    pMsg += sizeof(int64_t) * pQueryMsg->numOfOutput;
6558
  }
6559

6560 6561
  if (pQueryMsg->numOfTags > 0) {
    (*tagCols) = calloc(1, sizeof(SColumnInfo) * pQueryMsg->numOfTags);
H
Haojun Liao 已提交
6562 6563 6564 6565 6566
    if (*tagCols == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }

6567 6568
    for (int32_t i = 0; i < pQueryMsg->numOfTags; ++i) {
      SColumnInfo* pTagCol = (SColumnInfo*) pMsg;
6569

6570 6571 6572 6573
      pTagCol->colId = htons(pTagCol->colId);
      pTagCol->bytes = htons(pTagCol->bytes);
      pTagCol->type  = htons(pTagCol->type);
      pTagCol->numOfFilters = 0;
6574

6575
      (*tagCols)[i] = *pTagCol;
6576
      pMsg += sizeof(SColumnInfo);
6577
    }
H
hjxilinx 已提交
6578
  }
6579

6580 6581 6582
  // the tag query condition expression string is located at the end of query msg
  if (pQueryMsg->tagCondLen > 0) {
    *tagCond = calloc(1, pQueryMsg->tagCondLen);
H
Haojun Liao 已提交
6583 6584 6585 6586 6587 6588

    if (*tagCond == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;

    }
6589 6590 6591
    memcpy(*tagCond, pMsg, pQueryMsg->tagCondLen);
    pMsg += pQueryMsg->tagCondLen;
  }
6592

weixin_48148422's avatar
weixin_48148422 已提交
6593
  if (*pMsg != 0) {
6594
    size_t len = strlen(pMsg) + 1;
6595

6596
    *tbnameCond = malloc(len);
6597 6598 6599 6600 6601
    if (*tbnameCond == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }

weixin_48148422's avatar
weixin_48148422 已提交
6602
    strcpy(*tbnameCond, pMsg);
6603
    pMsg += len;
weixin_48148422's avatar
weixin_48148422 已提交
6604
  }
6605

6606
  qDebug("qmsg:%p query %d tables, type:%d, qrange:%" PRId64 "-%" PRId64 ", numOfGroupbyTagCols:%d, order:%d, "
H
Haojun Liao 已提交
6607 6608
         "outputCols:%d, numOfCols:%d, interval:%" PRId64 ", fillType:%d, comptsLen:%d, compNumOfBlocks:%d, limit:%" PRId64 ", offset:%" PRId64,
         pQueryMsg, pQueryMsg->numOfTables, pQueryMsg->queryType, pQueryMsg->window.skey, pQueryMsg->window.ekey, pQueryMsg->numOfGroupCols,
6609
         pQueryMsg->order, pQueryMsg->numOfOutput, pQueryMsg->numOfCols, pQueryMsg->interval.interval,
H
Haojun Liao 已提交
6610
         pQueryMsg->fillType, pQueryMsg->tsLen, pQueryMsg->tsNumOfBlocks, pQueryMsg->limit, pQueryMsg->offset);
6611 6612

  return TSDB_CODE_SUCCESS;
dengyihao's avatar
dengyihao 已提交
6613 6614

_cleanup:
S
TD-1848  
Shengliang Guan 已提交
6615
  tfree(*pExpr);
dengyihao's avatar
dengyihao 已提交
6616 6617
  taosArrayDestroy(*pTableIdList);
  *pTableIdList = NULL;
S
TD-1848  
Shengliang Guan 已提交
6618 6619 6620 6621
  tfree(*tbnameCond);
  tfree(*groupbyCols);
  tfree(*tagCols);
  tfree(*tagCond);
6622 6623

  return code;
6624 6625
}

H
Haojun Liao 已提交
6626 6627
static int32_t buildArithmeticExprFromMsg(SExprInfo *pArithExprInfo, SQueryTableMsg *pQueryMsg) {
  qDebug("qmsg:%p create arithmetic expr from binary", pQueryMsg);
weixin_48148422's avatar
weixin_48148422 已提交
6628 6629

  tExprNode* pExprNode = NULL;
dengyihao's avatar
dengyihao 已提交
6630
  TRY(TSDB_MAX_TAG_CONDITIONS) {
weixin_48148422's avatar
weixin_48148422 已提交
6631 6632 6633
    pExprNode = exprTreeFromBinary(pArithExprInfo->base.arg[0].argValue.pz, pArithExprInfo->base.arg[0].argBytes);
  } CATCH( code ) {
    CLEANUP_EXECUTE();
dengyihao's avatar
TD-816  
dengyihao 已提交
6634
    qError("qmsg:%p failed to create arithmetic expression string from:%s, reason: %s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz, tstrerror(code));
weixin_48148422's avatar
weixin_48148422 已提交
6635 6636 6637
    return code;
  } END_TRY

H
hjxilinx 已提交
6638
  if (pExprNode == NULL) {
6639
    qError("qmsg:%p failed to create arithmetic expression string from:%s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz);
6640
    return TSDB_CODE_QRY_APP_ERROR;
6641
  }
6642

6643
  pArithExprInfo->pExpr = pExprNode;
6644 6645 6646
  return TSDB_CODE_SUCCESS;
}

H
Haojun Liao 已提交
6647
static int32_t createQueryFuncExprFromMsg(SQueryTableMsg *pQueryMsg, int32_t numOfOutput, SExprInfo **pExprInfo, SSqlFuncMsg **pExprMsg,
6648 6649
    SColumnInfo* pTagCols) {
  *pExprInfo = NULL;
H
hjxilinx 已提交
6650
  int32_t code = TSDB_CODE_SUCCESS;
6651

H
Haojun Liao 已提交
6652
  SExprInfo *pExprs = (SExprInfo *)calloc(pQueryMsg->numOfOutput, sizeof(SExprInfo));
6653
  if (pExprs == NULL) {
6654
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
6655 6656 6657 6658 6659
  }

  bool    isSuperTable = QUERY_IS_STABLE_QUERY(pQueryMsg->queryType);
  int16_t tagLen = 0;

H
Haojun Liao 已提交
6660
  for (int32_t i = 0; i < numOfOutput; ++i) {
6661
    pExprs[i].base = *pExprMsg[i];
6662
    pExprs[i].bytes = 0;
6663 6664 6665 6666

    int16_t type = 0;
    int16_t bytes = 0;

6667
    // parse the arithmetic expression
6668
    if (pExprs[i].base.functionId == TSDB_FUNC_ARITHM) {
H
Haojun Liao 已提交
6669
      code = buildArithmeticExprFromMsg(&pExprs[i], pQueryMsg);
6670

6671
      if (code != TSDB_CODE_SUCCESS) {
S
TD-1848  
Shengliang Guan 已提交
6672
        tfree(pExprs);
6673
        return code;
6674 6675
      }

6676
      type  = TSDB_DATA_TYPE_DOUBLE;
6677
      bytes = tDataTypeDesc[type].nSize;
H
Haojun Liao 已提交
6678
    } else if (pExprs[i].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX && pExprs[i].base.functionId == TSDB_FUNC_TAGPRJ) {  // parse the normal column
H
Haojun Liao 已提交
6679
      SSchema s = tGetTableNameColumnSchema();
H
Haojun Liao 已提交
6680
      type = s.type;
H
Haojun Liao 已提交
6681
      bytes = s.bytes;
6682 6683
    } else if (pExprs[i].base.colInfo.colId <= TSDB_UD_COLUMN_INDEX) {
      // it is a user-defined constant value column
H
Haojun Liao 已提交
6684 6685
      assert(pExprs[i].base.functionId == TSDB_FUNC_PRJ);

6686 6687
      type = pExprs[i].base.arg[1].argType;
      bytes = pExprs[i].base.arg[1].argBytes;
H
Haojun Liao 已提交
6688 6689 6690 6691 6692

      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        bytes += VARSTR_HEADER_SIZE;
      }
    } else {
6693
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
dengyihao's avatar
dengyihao 已提交
6694
      assert(j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags);
H
Haojun Liao 已提交
6695

dengyihao's avatar
dengyihao 已提交
6696
      if (pExprs[i].base.colInfo.colId != TSDB_TBNAME_COLUMN_INDEX && j >= 0) {
H
Haojun Liao 已提交
6697 6698 6699 6700
        SColumnInfo* pCol = (TSDB_COL_IS_TAG(pExprs[i].base.colInfo.flag))? &pTagCols[j]:&pQueryMsg->colList[j];
        type = pCol->type;
        bytes = pCol->bytes;
      } else {
H
Haojun Liao 已提交
6701
        SSchema s = tGetTableNameColumnSchema();
H
hjxilinx 已提交
6702

H
Haojun Liao 已提交
6703 6704 6705
        type  = s.type;
        bytes = s.bytes;
      }
6706 6707
    }

S
TD-1057  
Shengliang Guan 已提交
6708
    int32_t param = (int32_t)pExprs[i].base.arg[0].argValue.i64;
6709
    if (getResultDataInfo(type, bytes, pExprs[i].base.functionId, param, &pExprs[i].type, &pExprs[i].bytes,
6710
                          &pExprs[i].interBytes, 0, isSuperTable) != TSDB_CODE_SUCCESS) {
S
TD-1848  
Shengliang Guan 已提交
6711
      tfree(pExprs);
6712
      return TSDB_CODE_QRY_INVALID_MSG;
6713 6714
    }

6715
    if (pExprs[i].base.functionId == TSDB_FUNC_TAG_DUMMY || pExprs[i].base.functionId == TSDB_FUNC_TS_DUMMY) {
6716
      tagLen += pExprs[i].bytes;
6717
    }
6718
    assert(isValidDataType(pExprs[i].type));
6719 6720 6721
  }

  // TODO refactor
H
Haojun Liao 已提交
6722
  for (int32_t i = 0; i < numOfOutput; ++i) {
6723 6724
    pExprs[i].base = *pExprMsg[i];
    int16_t functId = pExprs[i].base.functionId;
6725

6726
    if (functId == TSDB_FUNC_TOP || functId == TSDB_FUNC_BOTTOM) {
6727
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
6728 6729 6730 6731 6732 6733 6734 6735 6736
      if (j < 0 || j >= pQueryMsg->numOfCols) {
        assert(0);
      } else {
        SColumnInfo *pCol = &pQueryMsg->colList[j];
        int32_t ret =
            getResultDataInfo(pCol->type, pCol->bytes, functId, (int32_t)pExprs[i].base.arg[0].argValue.i64,
                              &pExprs[i].type, &pExprs[i].bytes, &pExprs[i].interBytes, tagLen, isSuperTable);
        assert(ret == TSDB_CODE_SUCCESS);
      }
6737 6738 6739
    }
  }

6740
  *pExprInfo = pExprs;
6741 6742 6743
  return TSDB_CODE_SUCCESS;
}

6744
static SSqlGroupbyExpr *createGroupbyExprFromMsg(SQueryTableMsg *pQueryMsg, SColIndex *pColIndex, int32_t *code) {
6745 6746 6747 6748 6749
  if (pQueryMsg->numOfGroupCols == 0) {
    return NULL;
  }

  // using group by tag columns
6750
  SSqlGroupbyExpr *pGroupbyExpr = (SSqlGroupbyExpr *)calloc(1, sizeof(SSqlGroupbyExpr));
6751
  if (pGroupbyExpr == NULL) {
6752
    *code = TSDB_CODE_QRY_OUT_OF_MEMORY;
6753 6754 6755 6756 6757 6758 6759
    return NULL;
  }

  pGroupbyExpr->numOfGroupCols = pQueryMsg->numOfGroupCols;
  pGroupbyExpr->orderType = pQueryMsg->orderType;
  pGroupbyExpr->orderIndex = pQueryMsg->orderByIdx;

6760 6761 6762 6763
  pGroupbyExpr->columnInfo = taosArrayInit(pQueryMsg->numOfGroupCols, sizeof(SColIndex));
  for(int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
    taosArrayPush(pGroupbyExpr->columnInfo, &pColIndex[i]);
  }
6764

6765 6766 6767
  return pGroupbyExpr;
}

6768
static int32_t createFilterInfo(void *pQInfo, SQuery *pQuery) {
6769
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
6770
    if (pQuery->colList[i].numOfFilters > 0) {
6771 6772 6773 6774 6775 6776 6777 6778 6779
      pQuery->numOfFilterCols++;
    }
  }

  if (pQuery->numOfFilterCols == 0) {
    return TSDB_CODE_SUCCESS;
  }

  pQuery->pFilterInfo = calloc(1, sizeof(SSingleColumnFilterInfo) * pQuery->numOfFilterCols);
H
Haojun Liao 已提交
6780 6781 6782
  if (pQuery->pFilterInfo == NULL) {
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
  }
6783 6784

  for (int32_t i = 0, j = 0; i < pQuery->numOfCols; ++i) {
6785
    if (pQuery->colList[i].numOfFilters > 0) {
6786 6787
      SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[j];

B
Bomin Zhang 已提交
6788
      memcpy(&pFilterInfo->info, &pQuery->colList[i], sizeof(SColumnInfo));
6789
      pFilterInfo->info = pQuery->colList[i];
6790

6791
      pFilterInfo->numOfFilters = pQuery->colList[i].numOfFilters;
6792
      pFilterInfo->pFilters = calloc(pFilterInfo->numOfFilters, sizeof(SColumnFilterElem));
H
Haojun Liao 已提交
6793 6794 6795
      if (pFilterInfo->pFilters == NULL) {
        return TSDB_CODE_QRY_OUT_OF_MEMORY;
      }
6796 6797 6798

      for (int32_t f = 0; f < pFilterInfo->numOfFilters; ++f) {
        SColumnFilterElem *pSingleColFilter = &pFilterInfo->pFilters[f];
6799
        pSingleColFilter->filterInfo = pQuery->colList[i].filters[f];
6800 6801 6802 6803 6804

        int32_t lower = pSingleColFilter->filterInfo.lowerRelOptr;
        int32_t upper = pSingleColFilter->filterInfo.upperRelOptr;

        if (lower == TSDB_RELATION_INVALID && upper == TSDB_RELATION_INVALID) {
S
slguan 已提交
6805
          qError("QInfo:%p invalid filter info", pQInfo);
6806
          return TSDB_CODE_QRY_INVALID_MSG;
6807 6808
        }

6809 6810
        int16_t type  = pQuery->colList[i].type;
        int16_t bytes = pQuery->colList[i].bytes;
6811

6812 6813 6814
        // todo refactor
        __filter_func_t *rangeFilterArray = getRangeFilterFuncArray(type);
        __filter_func_t *filterArray = getValueFilterFuncArray(type);
6815 6816

        if (rangeFilterArray == NULL && filterArray == NULL) {
S
slguan 已提交
6817
          qError("QInfo:%p failed to get filter function, invalid data type:%d", pQInfo, type);
6818
          return TSDB_CODE_QRY_INVALID_MSG;
6819 6820
        }

6821
        if ((lower == TSDB_RELATION_GREATER_EQUAL || lower == TSDB_RELATION_GREATER) &&
6822
            (upper == TSDB_RELATION_LESS_EQUAL || upper == TSDB_RELATION_LESS)) {
dengyihao's avatar
dengyihao 已提交
6823
          assert(rangeFilterArray != NULL);
6824
          if (lower == TSDB_RELATION_GREATER_EQUAL) {
6825 6826 6827 6828 6829 6830 6831 6832 6833 6834 6835 6836 6837
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[4];
            } else {
              pSingleColFilter->fp = rangeFilterArray[2];
            }
          } else {
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[3];
            } else {
              pSingleColFilter->fp = rangeFilterArray[1];
            }
          }
        } else {  // set callback filter function
dengyihao's avatar
dengyihao 已提交
6838
          assert(filterArray != NULL);
6839 6840 6841 6842
          if (lower != TSDB_RELATION_INVALID) {
            pSingleColFilter->fp = filterArray[lower];

            if (upper != TSDB_RELATION_INVALID) {
dengyihao's avatar
dengyihao 已提交
6843
              qError("pQInfo:%p failed to get filter function, invalid filter condition: %d", pQInfo, type);
6844
              return TSDB_CODE_QRY_INVALID_MSG;
6845 6846 6847 6848 6849 6850 6851 6852 6853 6854 6855 6856 6857 6858 6859 6860
            }
          } else {
            pSingleColFilter->fp = filterArray[upper];
          }
        }
        assert(pSingleColFilter->fp != NULL);
        pSingleColFilter->bytes = bytes;
      }

      j++;
    }
  }

  return TSDB_CODE_SUCCESS;
}

6861
static void doUpdateExprColumnIndex(SQuery *pQuery) {
H
Haojun Liao 已提交
6862
  assert(pQuery->pExpr1 != NULL && pQuery != NULL);
6863

6864
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
Haojun Liao 已提交
6865
    SSqlFuncMsg *pSqlExprMsg = &pQuery->pExpr1[k].base;
6866
    if (pSqlExprMsg->functionId == TSDB_FUNC_ARITHM) {
6867 6868
      continue;
    }
6869

6870
    // todo opt performance
H
Haojun Liao 已提交
6871
    SColIndex *pColIndex = &pSqlExprMsg->colInfo;
H
Haojun Liao 已提交
6872
    if (TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
6873 6874
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfCols; ++f) {
H
Haojun Liao 已提交
6875 6876
        if (pColIndex->colId == pQuery->colList[f].colId) {
          pColIndex->colIndex = f;
6877 6878 6879
          break;
        }
      }
H
Haojun Liao 已提交
6880 6881

      assert(f < pQuery->numOfCols);
6882 6883
    } else if (pColIndex->colId <= TSDB_UD_COLUMN_INDEX) {
      // do nothing for user-defined constant value result columns
6884
    } else {
6885 6886
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfTags; ++f) {
H
Haojun Liao 已提交
6887 6888
        if (pColIndex->colId == pQuery->tagColList[f].colId) {
          pColIndex->colIndex = f;
6889 6890
          break;
        }
6891
      }
6892

6893
      assert(f < pQuery->numOfTags || pColIndex->colId == TSDB_TBNAME_COLUMN_INDEX);
6894 6895 6896 6897
    }
  }
}

dengyihao's avatar
dengyihao 已提交
6898 6899
static void freeQInfo(SQInfo *pQInfo);

H
Haojun Liao 已提交
6900 6901 6902
static void calResultBufSize(SQuery* pQuery) {
  const int32_t RESULT_MSG_MIN_SIZE  = 1024 * (1024 + 512);  // bytes
  const int32_t RESULT_MSG_MIN_ROWS  = 8192;
S
TD-1057  
Shengliang Guan 已提交
6903
  const float RESULT_THRESHOLD_RATIO = 0.85f;
H
Haojun Liao 已提交
6904

6905 6906 6907 6908 6909
  if (isProjQuery(pQuery)) {
    int32_t numOfRes = RESULT_MSG_MIN_SIZE / pQuery->rowSize;
    if (numOfRes < RESULT_MSG_MIN_ROWS) {
      numOfRes = RESULT_MSG_MIN_ROWS;
    }
H
Haojun Liao 已提交
6910

6911
    pQuery->rec.capacity  = numOfRes;
S
TD-1057  
Shengliang Guan 已提交
6912
    pQuery->rec.threshold = (int32_t)(numOfRes * RESULT_THRESHOLD_RATIO);
6913 6914
  } else {  // in case of non-prj query, a smaller output buffer will be used.
    pQuery->rec.capacity = 4096;
S
TD-1057  
Shengliang Guan 已提交
6915
    pQuery->rec.threshold = (int32_t)(pQuery->rec.capacity * RESULT_THRESHOLD_RATIO);
6916
  }
H
Haojun Liao 已提交
6917 6918
}

6919
static SQInfo *createQInfoImpl(SQueryTableMsg *pQueryMsg, SSqlGroupbyExpr *pGroupbyExpr, SExprInfo *pExprs,
H
Haojun Liao 已提交
6920
                               SExprInfo *pSecExprs, STableGroupInfo *pTableGroupInfo, SColumnInfo* pTagCols, bool stableQuery) {
B
Bomin Zhang 已提交
6921 6922 6923
  int16_t numOfCols = pQueryMsg->numOfCols;
  int16_t numOfOutput = pQueryMsg->numOfOutput;

6924 6925
  SQInfo *pQInfo = (SQInfo *)calloc(1, sizeof(SQInfo));
  if (pQInfo == NULL) {
B
Bomin Zhang 已提交
6926
    goto _cleanup_qinfo;
6927
  }
6928

B
Bomin Zhang 已提交
6929 6930 6931
  // to make sure third party won't overwrite this structure
  pQInfo->signature = pQInfo;
  pQInfo->tableGroupInfo = *pTableGroupInfo;
6932 6933

  SQuery *pQuery = calloc(1, sizeof(SQuery));
B
Bomin Zhang 已提交
6934 6935 6936
  if (pQuery == NULL) {
    goto _cleanup_query;
  }
H
Haojun Liao 已提交
6937

6938 6939
  pQInfo->runtimeEnv.pQuery = pQuery;

6940
  pQuery->numOfCols       = numOfCols;
H
hjxilinx 已提交
6941
  pQuery->numOfOutput     = numOfOutput;
6942 6943 6944
  pQuery->limit.limit     = pQueryMsg->limit;
  pQuery->limit.offset    = pQueryMsg->offset;
  pQuery->order.order     = pQueryMsg->order;
6945
  pQuery->order.orderColId = pQueryMsg->orderColId;
H
Haojun Liao 已提交
6946
  pQuery->pExpr1          = pExprs;
H
Haojun Liao 已提交
6947 6948
  pQuery->pExpr2          = pSecExprs;
  pQuery->numOfExpr2      = pQueryMsg->secondStageOutput;
6949
  pQuery->pGroupbyExpr    = pGroupbyExpr;
6950
  memcpy(&pQuery->interval, &pQueryMsg->interval, sizeof(pQuery->interval));
6951
  pQuery->fillType        = pQueryMsg->fillType;
6952
  pQuery->numOfTags       = pQueryMsg->numOfTags;
B
Bomin Zhang 已提交
6953
  pQuery->tagColList      = pTagCols;
H
Haojun Liao 已提交
6954 6955
  pQuery->prjInfo.vgroupLimit = pQueryMsg->vgroupLimit;
  pQuery->prjInfo.ts      = (pQueryMsg->order == TSDB_ORDER_ASC)? INT64_MIN:INT64_MAX;
H
Haojun Liao 已提交
6956

6957
  pQuery->colList = calloc(numOfCols, sizeof(SSingleColumnFilterInfo));
6958
  if (pQuery->colList == NULL) {
6959
    goto _cleanup;
6960
  }
6961

6962
  int32_t srcSize = 0;
H
hjxilinx 已提交
6963
  for (int16_t i = 0; i < numOfCols; ++i) {
6964
    pQuery->colList[i] = pQueryMsg->colList[i];
6965
    pQuery->colList[i].filters = tscFilterInfoClone(pQueryMsg->colList[i].filters, pQuery->colList[i].numOfFilters);
6966
    srcSize += pQuery->colList[i].bytes;
H
hjxilinx 已提交
6967
  }
6968

6969
  // calculate the result row size
6970 6971 6972
  for (int16_t col = 0; col < numOfOutput; ++col) {
    assert(pExprs[col].bytes > 0);
    pQuery->rowSize += pExprs[col].bytes;
6973
  }
6974

6975
  doUpdateExprColumnIndex(pQuery);
6976

6977
  int32_t ret = createFilterInfo(pQInfo, pQuery);
6978
  if (ret != TSDB_CODE_SUCCESS) {
6979
    goto _cleanup;
6980 6981 6982
  }

  // prepare the result buffer
6983
  pQuery->sdata = (tFilePage **)calloc(pQuery->numOfOutput, POINTER_BYTES);
6984
  if (pQuery->sdata == NULL) {
6985
    goto _cleanup;
6986 6987
  }

H
Haojun Liao 已提交
6988
  calResultBufSize(pQuery);
6989

6990
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
6991
    // allocate additional memory for interResults that are usually larger then final results
H
Haojun Liao 已提交
6992
    // TODO refactor
H
Haojun Liao 已提交
6993 6994 6995 6996 6997 6998 6999 7000
    int16_t bytes = 0;
    if (pQuery->pExpr2 == NULL || col > pQuery->numOfExpr2) {
      bytes = pExprs[col].bytes;
    } else {
      bytes = MAX(pQuery->pExpr2[col].bytes, pExprs[col].bytes);
    }

    size_t size = (size_t)((pQuery->rec.capacity + 1) * bytes + pExprs[col].interBytes + sizeof(tFilePage));
7001
    pQuery->sdata[col] = (tFilePage *)calloc(1, size);
7002
    if (pQuery->sdata[col] == NULL) {
7003
      goto _cleanup;
7004 7005 7006
    }
  }

7007
  if (pQuery->fillType != TSDB_FILL_NONE) {
7008 7009
    pQuery->fillVal = malloc(sizeof(int64_t) * pQuery->numOfOutput);
    if (pQuery->fillVal == NULL) {
7010
      goto _cleanup;
7011 7012 7013
    }

    // the first column is the timestamp
7014
    memcpy(pQuery->fillVal, (char *)pQueryMsg->fillVal, pQuery->numOfOutput * sizeof(int64_t));
7015 7016
  }

dengyihao's avatar
dengyihao 已提交
7017 7018 7019 7020 7021 7022
  size_t numOfGroups = 0;
  if (pTableGroupInfo->pGroupList != NULL) {
    numOfGroups = taosArrayGetSize(pTableGroupInfo->pGroupList);

    pQInfo->tableqinfoGroupInfo.pGroupList = taosArrayInit(numOfGroups, POINTER_BYTES);
    pQInfo->tableqinfoGroupInfo.numOfTables = pTableGroupInfo->numOfTables;
H
Haojun Liao 已提交
7023
    pQInfo->tableqinfoGroupInfo.map = taosHashInit(pTableGroupInfo->numOfTables,
H
Haojun Liao 已提交
7024
                                                   taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_NO_LOCK);
H
Haojun Liao 已提交
7025
  }
7026

H
Haojun Liao 已提交
7027
  pQInfo->runtimeEnv.interBufSize = getOutputInterResultBufSize(pQuery);
H
Haojun Liao 已提交
7028 7029
  pQInfo->runtimeEnv.summary.tableInfoSize += (pTableGroupInfo->numOfTables * sizeof(STableQueryInfo));

H
Haojun Liao 已提交
7030
  pQInfo->runtimeEnv.pResultRowHashTable = taosHashInit(pTableGroupInfo->numOfTables, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_NO_LOCK);
H
Haojun Liao 已提交
7031
  pQInfo->runtimeEnv.keyBuf = malloc(TSDB_MAX_BYTES_PER_ROW);
H
Haojun Liao 已提交
7032
  pQInfo->runtimeEnv.pool = initResultRowPool(getWindowResultSize(&pQInfo->runtimeEnv));
7033 7034 7035 7036 7037 7038 7039 7040
  pQInfo->runtimeEnv.prevRow = malloc(POINTER_BYTES * pQuery->numOfCols + srcSize);

  char* start = POINTER_BYTES * pQuery->numOfCols + (char*) pQInfo->runtimeEnv.prevRow;
  pQInfo->runtimeEnv.prevRow[0] = start;

  for(int32_t i = 1; i < pQuery->numOfCols; ++i) {
    pQInfo->runtimeEnv.prevRow[i] = pQInfo->runtimeEnv.prevRow[i - 1] + pQuery->colList[i-1].bytes;
  }
H
Haojun Liao 已提交
7041

H
Haojun Liao 已提交
7042
  pQInfo->pBuf = calloc(pTableGroupInfo->numOfTables, sizeof(STableQueryInfo));
H
Haojun Liao 已提交
7043 7044 7045 7046
  if (pQInfo->pBuf == NULL) {
    goto _cleanup;
  }

H
Haojun Liao 已提交
7047
  // NOTE: pTableCheckInfo need to update the query time range and the lastKey info
7048
  pQInfo->arrTableIdInfo = taosHashInit(pTableGroupInfo->numOfTables, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false, HASH_NO_LOCK);
7049
  pQInfo->dataReady = QUERY_RESULT_NOT_READY;
H
Haojun Liao 已提交
7050
  pQInfo->rspContext = NULL;
7051
  pthread_mutex_init(&pQInfo->lock, NULL);
H
Haojun Liao 已提交
7052
  tsem_init(&pQInfo->ready, 0, 0);
7053 7054 7055 7056 7057

  pQuery->pos = -1;
  pQuery->window = pQueryMsg->window;
  changeExecuteScanOrder(pQInfo, pQueryMsg, stableQuery);

7058
  pQInfo->runtimeEnv.queryWindowIdentical = true;
7059
  STimeWindow window = pQuery->window;
H
Haojun Liao 已提交
7060

H
Haojun Liao 已提交
7061
  int32_t index = 0;
H
hjxilinx 已提交
7062
  for(int32_t i = 0; i < numOfGroups; ++i) {
7063
    SArray* pa = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, i);
7064

H
Haojun Liao 已提交
7065
    size_t s = taosArrayGetSize(pa);
7066
    SArray* p1 = taosArrayInit(s, POINTER_BYTES);
B
Bomin Zhang 已提交
7067 7068 7069
    if (p1 == NULL) {
      goto _cleanup;
    }
H
Haojun Liao 已提交
7070

Y
yihaoDeng 已提交
7071
    taosArrayPush(pQInfo->tableqinfoGroupInfo.pGroupList, &p1);
7072

H
hjxilinx 已提交
7073
    for(int32_t j = 0; j < s; ++j) {
H
Haojun Liao 已提交
7074
      STableKeyInfo* info = taosArrayGet(pa, j);
7075

7076
      window.skey = info->lastKey;
7077 7078 7079 7080 7081
      if (info->lastKey != pQuery->window.skey) {
        pQInfo->runtimeEnv.queryWindowIdentical = false;
      }

      void* buf = (char*) pQInfo->pBuf + index * sizeof(STableQueryInfo);
H
Haojun Liao 已提交
7082
      STableQueryInfo* item = createTableQueryInfo(&pQInfo->runtimeEnv, info->pTable, window, buf);
B
Bomin Zhang 已提交
7083 7084 7085
      if (item == NULL) {
        goto _cleanup;
      }
H
Haojun Liao 已提交
7086

7087
      item->groupIndex = i;
H
hjxilinx 已提交
7088
      taosArrayPush(p1, &item);
7089 7090

      STableId* id = TSDB_TABLEID(info->pTable);
H
Haojun Liao 已提交
7091 7092
      taosHashPut(pQInfo->tableqinfoGroupInfo.map, &id->tid, sizeof(id->tid), &item, POINTER_BYTES);
      index += 1;
H
hjxilinx 已提交
7093 7094
    }
  }
7095

7096
  colIdCheck(pQuery);
7097

7098
  qDebug("qmsg:%p QInfo:%p created", pQueryMsg, pQInfo);
7099 7100
  return pQInfo;

B
Bomin Zhang 已提交
7101
_cleanup_qinfo:
H
Haojun Liao 已提交
7102
  tsdbDestroyTableGroup(pTableGroupInfo);
B
Bomin Zhang 已提交
7103 7104

_cleanup_query:
7105 7106 7107 7108
  if (pGroupbyExpr != NULL) {
    taosArrayDestroy(pGroupbyExpr->columnInfo);
    free(pGroupbyExpr);
  }
H
Haojun Liao 已提交
7109

S
TD-1848  
Shengliang Guan 已提交
7110
  tfree(pTagCols);
B
Bomin Zhang 已提交
7111 7112 7113 7114 7115 7116
  for (int32_t i = 0; i < numOfOutput; ++i) {
    SExprInfo* pExprInfo = &pExprs[i];
    if (pExprInfo->pExpr != NULL) {
      tExprTreeDestroy(&pExprInfo->pExpr, NULL);
    }
  }
H
Haojun Liao 已提交
7117

S
TD-1848  
Shengliang Guan 已提交
7118
  tfree(pExprs);
B
Bomin Zhang 已提交
7119

7120
_cleanup:
dengyihao's avatar
dengyihao 已提交
7121
  freeQInfo(pQInfo);
7122 7123 7124
  return NULL;
}

H
hjxilinx 已提交
7125
static bool isValidQInfo(void *param) {
H
hjxilinx 已提交
7126 7127 7128 7129
  SQInfo *pQInfo = (SQInfo *)param;
  if (pQInfo == NULL) {
    return false;
  }
7130

H
hjxilinx 已提交
7131 7132 7133 7134
  /*
   * pQInfo->signature may be changed by another thread, so we assign value of signature
   * into local variable, then compare by using local variable
   */
7135
  uint64_t sig = (uint64_t)pQInfo->signature;
H
hjxilinx 已提交
7136 7137 7138
  return (sig == (uint64_t)pQInfo);
}

7139
static int32_t initQInfo(SQueryTableMsg *pQueryMsg, void *tsdb, int32_t vgId, SQInfo *pQInfo, bool isSTable) {
H
hjxilinx 已提交
7140
  int32_t code = TSDB_CODE_SUCCESS;
7141
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
7142

H
hjxilinx 已提交
7143
  STSBuf *pTSBuf = NULL;
7144
  if (pQueryMsg->tsLen > 0) { // open new file to save the result
H
Haojun Liao 已提交
7145
    char *tsBlock = (char *) pQueryMsg + pQueryMsg->tsOffset;
H
Haojun Liao 已提交
7146
    pTSBuf = tsBufCreateFromCompBlocks(tsBlock, pQueryMsg->tsNumOfBlocks, pQueryMsg->tsLen, pQueryMsg->tsOrder, vgId);
7147

H
hjxilinx 已提交
7148
    tsBufResetPos(pTSBuf);
dengyihao's avatar
dengyihao 已提交
7149
    bool ret = tsBufNextPos(pTSBuf);
7150

dengyihao's avatar
dengyihao 已提交
7151
    UNUSED(ret);
H
hjxilinx 已提交
7152
  }
Y
TD-1665  
yihaoDeng 已提交
7153 7154
  
  pQuery->precision = tsdbGetCfg(tsdb)->precision;
7155

7156 7157
  if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey > pQuery->window.ekey)) ||
      (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey > pQuery->window.skey))) {
7158
    qDebug("QInfo:%p no result in time range %" PRId64 "-%" PRId64 ", order %d", pQInfo, pQuery->window.skey,
7159
           pQuery->window.ekey, pQuery->order.order);
7160
    setQueryStatus(pQuery, QUERY_COMPLETED);
B
Bomin Zhang 已提交
7161
    pQInfo->tableqinfoGroupInfo.numOfTables = 0;
7162 7163
    return TSDB_CODE_SUCCESS;
  }
7164

7165
  if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
7166
    qDebug("QInfo:%p no table qualified for tag filter, abort query", pQInfo);
7167 7168 7169
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return TSDB_CODE_SUCCESS;
  }
H
hjxilinx 已提交
7170 7171

  // filter the qualified
7172
  if ((code = doInitQInfo(pQInfo, pTSBuf, tsdb, vgId, isSTable)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
7173 7174
    goto _error;
  }
7175

H
hjxilinx 已提交
7176 7177 7178 7179
  return code;

_error:
  // table query ref will be decrease during error handling
7180
  freeQInfo(pQInfo);
H
hjxilinx 已提交
7181 7182 7183
  return code;
}

B
Bomin Zhang 已提交
7184
static void freeColumnFilterInfo(SColumnFilterInfo* pFilter, int32_t numOfFilters) {
H
Haojun Liao 已提交
7185
    if (pFilter == NULL || numOfFilters == 0) {
B
Bomin Zhang 已提交
7186 7187
      return;
    }
H
Haojun Liao 已提交
7188

B
Bomin Zhang 已提交
7189 7190 7191 7192 7193
    for (int32_t i = 0; i < numOfFilters; i++) {
      if (pFilter[i].filterstr) {
        free((void*)(pFilter[i].pz));
      }
    }
H
Haojun Liao 已提交
7194

B
Bomin Zhang 已提交
7195 7196 7197
    free(pFilter);
}

H
Haojun Liao 已提交
7198 7199
static void doDestroyTableQueryInfo(STableGroupInfo* pTableqinfoGroupInfo) {
  if (pTableqinfoGroupInfo->pGroupList != NULL) {
H
Haojun Liao 已提交
7200
    int32_t numOfGroups = (int32_t) taosArrayGetSize(pTableqinfoGroupInfo->pGroupList);
H
Haojun Liao 已提交
7201 7202 7203 7204 7205 7206 7207 7208 7209 7210 7211 7212 7213 7214 7215 7216 7217 7218 7219 7220 7221
    for (int32_t i = 0; i < numOfGroups; ++i) {
      SArray *p = taosArrayGetP(pTableqinfoGroupInfo->pGroupList, i);

      size_t num = taosArrayGetSize(p);
      for(int32_t j = 0; j < num; ++j) {
        STableQueryInfo* item = taosArrayGetP(p, j);
        destroyTableQueryInfoImpl(item);
      }

      taosArrayDestroy(p);
    }
  }

  taosArrayDestroy(pTableqinfoGroupInfo->pGroupList);
  taosHashCleanup(pTableqinfoGroupInfo->map);

  pTableqinfoGroupInfo->pGroupList = NULL;
  pTableqinfoGroupInfo->map = NULL;
  pTableqinfoGroupInfo->numOfTables = 0;
}

H
Haojun Liao 已提交
7222 7223 7224 7225 7226 7227 7228 7229 7230 7231 7232 7233 7234 7235 7236 7237
static void* destroyQueryFuncExpr(SExprInfo* pExprInfo, int32_t numOfExpr) {
  if (pExprInfo == NULL) {
    assert(numOfExpr == 0);
    return NULL;
  }

  for (int32_t i = 0; i < numOfExpr; ++i) {
    if (pExprInfo[i].pExpr != NULL) {
      tExprNodeDestroy(pExprInfo[i].pExpr, NULL);
    }
  }

  tfree(pExprInfo);
  return NULL;
}

H
hjxilinx 已提交
7238 7239 7240 7241
static void freeQInfo(SQInfo *pQInfo) {
  if (!isValidQInfo(pQInfo)) {
    return;
  }
7242

7243
  qDebug("QInfo:%p start to free QInfo", pQInfo);
7244

H
Haojun Liao 已提交
7245
  releaseQueryBuf(pQInfo->tableqinfoGroupInfo.numOfTables);
H
Haojun Liao 已提交
7246

7247
  teardownQueryRuntimeEnv(&pQInfo->runtimeEnv);
7248

H
Haojun Liao 已提交
7249 7250 7251 7252
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  if (pQuery != NULL) {
    if (pQuery->sdata != NULL) {
      for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
S
TD-1848  
Shengliang Guan 已提交
7253
        tfree(pQuery->sdata[col]);
H
Haojun Liao 已提交
7254
      }
S
TD-1848  
Shengliang Guan 已提交
7255
      tfree(pQuery->sdata);
H
hjxilinx 已提交
7256
    }
7257

H
Haojun Liao 已提交
7258
    if (pQuery->fillVal != NULL) {
S
TD-1848  
Shengliang Guan 已提交
7259
      tfree(pQuery->fillVal);
H
Haojun Liao 已提交
7260
    }
7261

H
Haojun Liao 已提交
7262 7263 7264
    for (int32_t i = 0; i < pQuery->numOfFilterCols; ++i) {
      SSingleColumnFilterInfo *pColFilter = &pQuery->pFilterInfo[i];
      if (pColFilter->numOfFilters > 0) {
S
TD-1848  
Shengliang Guan 已提交
7265
        tfree(pColFilter->pFilters);
H
hjxilinx 已提交
7266
      }
H
hjxilinx 已提交
7267
    }
7268

H
Haojun Liao 已提交
7269 7270
    pQuery->pExpr1 = destroyQueryFuncExpr(pQuery->pExpr1, pQuery->numOfOutput);
    pQuery->pExpr2 = destroyQueryFuncExpr(pQuery->pExpr2, pQuery->numOfExpr2);
7271

S
TD-1848  
Shengliang Guan 已提交
7272 7273
    tfree(pQuery->tagColList);
    tfree(pQuery->pFilterInfo);
H
Haojun Liao 已提交
7274 7275 7276 7277 7278 7279

    if (pQuery->colList != NULL) {
      for (int32_t i = 0; i < pQuery->numOfCols; i++) {
        SColumnInfo *column = pQuery->colList + i;
        freeColumnFilterInfo(column->filters, column->numOfFilters);
      }
S
TD-1848  
Shengliang Guan 已提交
7280
      tfree(pQuery->colList);
H
Haojun Liao 已提交
7281 7282
    }

H
Haojun Liao 已提交
7283 7284 7285 7286 7287
    if (pQuery->pGroupbyExpr != NULL) {
      taosArrayDestroy(pQuery->pGroupbyExpr->columnInfo);
      tfree(pQuery->pGroupbyExpr);
    }

S
TD-1848  
Shengliang Guan 已提交
7288
    tfree(pQuery);
H
hjxilinx 已提交
7289
  }
7290

H
Haojun Liao 已提交
7291
  doDestroyTableQueryInfo(&pQInfo->tableqinfoGroupInfo);
H
Haojun Liao 已提交
7292

S
TD-1848  
Shengliang Guan 已提交
7293
  tfree(pQInfo->pBuf);
H
Haojun Liao 已提交
7294
  tsdbDestroyTableGroup(&pQInfo->tableGroupInfo);
7295
  taosHashCleanup(pQInfo->arrTableIdInfo);
7296

7297
  pQInfo->signature = 0;
7298

7299
  qDebug("QInfo:%p QInfo is freed", pQInfo);
7300

S
TD-1848  
Shengliang Guan 已提交
7301
  tfree(pQInfo);
H
hjxilinx 已提交
7302 7303
}

H
hjxilinx 已提交
7304
static size_t getResultSize(SQInfo *pQInfo, int64_t *numOfRows) {
7305 7306
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
hjxilinx 已提交
7307 7308 7309 7310 7311 7312 7313 7314 7315 7316 7317
  /*
   * get the file size and set the numOfRows to be the file size, since for tsComp query,
   * the returned row size is equalled to 1
   * TODO handle the case that the file is too large to send back one time
   */
  if (isTSCompQuery(pQuery) && (*numOfRows) > 0) {
    struct stat fstat;
    if (stat(pQuery->sdata[0]->data, &fstat) == 0) {
      *numOfRows = fstat.st_size;
      return fstat.st_size;
    } else {
S
slguan 已提交
7318
      qError("QInfo:%p failed to get file info, path:%s, reason:%s", pQInfo, pQuery->sdata[0]->data, strerror(errno));
H
hjxilinx 已提交
7319 7320 7321
      return 0;
    }
  } else {
7322
    return (size_t)(pQuery->rowSize * (*numOfRows));
7323
  }
H
hjxilinx 已提交
7324
}
7325

H
hjxilinx 已提交
7326 7327 7328
static int32_t doDumpQueryResult(SQInfo *pQInfo, char *data) {
  // the remained number of retrieved rows, not the interpolated result
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
7329

H
hjxilinx 已提交
7330 7331 7332
  // load data from file to msg buffer
  if (isTSCompQuery(pQuery)) {
    int32_t fd = open(pQuery->sdata[0]->data, O_RDONLY, 0666);
7333

H
hjxilinx 已提交
7334 7335
    // make sure file exist
    if (FD_VALID(fd)) {
7336 7337 7338
      uint64_t s = lseek(fd, 0, SEEK_END);

      qDebug("QInfo:%p ts comp data return, file:%s, size:%"PRId64, pQInfo, pQuery->sdata[0]->data, s);
H
Haojun Liao 已提交
7339
      if (lseek(fd, 0, SEEK_SET) >= 0) {
H
Haojun Liao 已提交
7340
        size_t sz = read(fd, data, (uint32_t) s);
7341 7342 7343
        if(sz < s) {  // todo handle error
          assert(0);
        }
H
Haojun Liao 已提交
7344
      } else {
7345
        UNUSED(s);
dengyihao's avatar
dengyihao 已提交
7346
      }
H
Haojun Liao 已提交
7347

H
hjxilinx 已提交
7348 7349 7350
      close(fd);
      unlink(pQuery->sdata[0]->data);
    } else {
dengyihao's avatar
dengyihao 已提交
7351
      // todo return the error code to client and handle invalid fd
S
slguan 已提交
7352
      qError("QInfo:%p failed to open tmp file to send ts-comp data to client, path:%s, reason:%s", pQInfo,
H
hjxilinx 已提交
7353
             pQuery->sdata[0]->data, strerror(errno));
dengyihao's avatar
dengyihao 已提交
7354
      if (fd != -1) {
7355
        close(fd);
dengyihao's avatar
dengyihao 已提交
7356
      }
H
hjxilinx 已提交
7357
    }
7358

H
hjxilinx 已提交
7359 7360 7361 7362
    // all data returned, set query over
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
      setQueryStatus(pQuery, QUERY_OVER);
    }
H
hjxilinx 已提交
7363
  } else {
S
TD-1057  
Shengliang Guan 已提交
7364
    doCopyQueryResultToMsg(pQInfo, (int32_t)pQuery->rec.rows, data);
7365
  }
7366

7367
  pQuery->rec.total += pQuery->rec.rows;
7368
  qDebug("QInfo:%p current numOfRes rows:%" PRId64 ", total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total);
7369

7370
  if (pQuery->limit.limit > 0 && pQuery->limit.limit == pQuery->rec.total) {
7371
    qDebug("QInfo:%p results limitation reached, limitation:%"PRId64, pQInfo, pQuery->limit.limit);
7372 7373
    setQueryStatus(pQuery, QUERY_OVER);
  }
7374

H
hjxilinx 已提交
7375
  return TSDB_CODE_SUCCESS;
7376 7377
}

7378 7379 7380 7381 7382 7383 7384
typedef struct SQueryMgmt {
  SCacheObj      *qinfoPool;      // query handle pool
  int32_t         vgId;
  bool            closed;
  pthread_mutex_t lock;
} SQueryMgmt;

7385
int32_t qCreateQueryInfo(void* tsdb, int32_t vgId, SQueryTableMsg* pQueryMsg, qinfo_t* pQInfo) {
7386
  assert(pQueryMsg != NULL && tsdb != NULL);
7387 7388

  int32_t code = TSDB_CODE_SUCCESS;
7389

H
Haojun Liao 已提交
7390 7391
  char            *tagCond      = NULL;
  char            *tbnameCond   = NULL;
7392
  SArray          *pTableIdList = NULL;
H
Haojun Liao 已提交
7393 7394 7395 7396 7397
  SSqlFuncMsg    **pExprMsg     = NULL;
  SSqlFuncMsg    **pSecExprMsg  = NULL;
  SExprInfo       *pExprs       = NULL;
  SExprInfo       *pSecExprs    = NULL;

7398 7399 7400
  SColIndex       *pGroupColIndex = NULL;
  SColumnInfo     *pTagColumnInfo = NULL;
  SSqlGroupbyExpr *pGroupbyExpr   = NULL;
7401

H
Haojun Liao 已提交
7402
  code = convertQueryMsg(pQueryMsg, &pTableIdList, &pExprMsg, &pSecExprMsg, &tagCond, &tbnameCond, &pGroupColIndex, &pTagColumnInfo);
7403
  if (code != TSDB_CODE_SUCCESS) {
B
Bomin Zhang 已提交
7404
    goto _over;
7405 7406
  }

H
hjxilinx 已提交
7407
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
7408
    qError("Invalid number of tables to query, numOfTables:%d", pQueryMsg->numOfTables);
7409
    code = TSDB_CODE_QRY_INVALID_MSG;
H
hjxilinx 已提交
7410
    goto _over;
7411 7412
  }

H
hjxilinx 已提交
7413
  if (pTableIdList == NULL || taosArrayGetSize(pTableIdList) == 0) {
S
slguan 已提交
7414
    qError("qmsg:%p, SQueryTableMsg wrong format", pQueryMsg);
7415
    code = TSDB_CODE_QRY_INVALID_MSG;
H
hjxilinx 已提交
7416
    goto _over;
7417 7418
  }

H
Haojun Liao 已提交
7419
  if ((code = createQueryFuncExprFromMsg(pQueryMsg, pQueryMsg->numOfOutput, &pExprs, pExprMsg, pTagColumnInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
7420
    goto _over;
7421 7422
  }

H
Haojun Liao 已提交
7423
  if (pSecExprMsg != NULL) {
H
Haojun Liao 已提交
7424
    if ((code = createQueryFuncExprFromMsg(pQueryMsg, pQueryMsg->secondStageOutput, &pSecExprs, pSecExprMsg, pTagColumnInfo)) != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
7425 7426 7427 7428
      goto _over;
    }
  }

dengyihao's avatar
dengyihao 已提交
7429
  pGroupbyExpr = createGroupbyExprFromMsg(pQueryMsg, pGroupColIndex, &code);
H
hjxilinx 已提交
7430
  if ((pGroupbyExpr == NULL && pQueryMsg->numOfGroupCols != 0) || code != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
7431
    goto _over;
7432
  }
7433

H
hjxilinx 已提交
7434
  bool isSTableQuery = false;
7435
  STableGroupInfo tableGroupInfo = {0};
7436 7437
  int64_t st = taosGetTimestampUs();

H
Haojun Liao 已提交
7438
  if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_TABLE_QUERY)) {
weixin_48148422's avatar
weixin_48148422 已提交
7439
    STableIdInfo *id = taosArrayGet(pTableIdList, 0);
H
Haojun Liao 已提交
7440

7441
    qDebug("qmsg:%p query normal table, uid:%"PRId64", tid:%d", pQueryMsg, id->uid, id->tid);
H
Haojun Liao 已提交
7442
    if ((code = tsdbGetOneTableGroup(tsdb, id->uid, pQueryMsg->window.skey, &tableGroupInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
7443
      goto _over;
7444
    }
H
Haojun Liao 已提交
7445
  } else if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY|TSDB_QUERY_TYPE_STABLE_QUERY)) {
7446
    isSTableQuery = true;
H
Haojun Liao 已提交
7447 7448 7449

    // also note there's possibility that only one table in the super table
    if (!TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY)) {
weixin_48148422's avatar
weixin_48148422 已提交
7450 7451 7452 7453 7454 7455 7456
      STableIdInfo *id = taosArrayGet(pTableIdList, 0);

      // group by normal column, do not pass the group by condition to tsdb to group table into different group
      int32_t numOfGroupByCols = pQueryMsg->numOfGroupCols;
      if (pQueryMsg->numOfGroupCols == 1 && !TSDB_COL_IS_TAG(pGroupColIndex->flag)) {
        numOfGroupByCols = 0;
      }
7457 7458

      qDebug("qmsg:%p query stable, uid:%"PRId64", tid:%d", pQueryMsg, id->uid, id->tid);
H
Haojun Liao 已提交
7459 7460 7461
      code = tsdbQuerySTableByTagCond(tsdb, id->uid, pQueryMsg->window.skey, tagCond, pQueryMsg->tagCondLen,
          pQueryMsg->tagNameRelType, tbnameCond, &tableGroupInfo, pGroupColIndex, numOfGroupByCols);

7462
      if (code != TSDB_CODE_SUCCESS) {
7463
        qError("qmsg:%p failed to query stable, reason: %s", pQueryMsg, tstrerror(code));
7464 7465
        goto _over;
      }
weixin_48148422's avatar
weixin_48148422 已提交
7466
    } else {
7467 7468 7469 7470
      code = tsdbGetTableGroupFromIdList(tsdb, pTableIdList, &tableGroupInfo);
      if (code != TSDB_CODE_SUCCESS) {
        goto _over;
      }
H
Haojun Liao 已提交
7471

S
TD-1057  
Shengliang Guan 已提交
7472
      qDebug("qmsg:%p query on %" PRIzu " tables in one group from client", pQueryMsg, tableGroupInfo.numOfTables);
7473
    }
7474 7475

    int64_t el = taosGetTimestampUs() - st;
S
TD-1057  
Shengliang Guan 已提交
7476
    qDebug("qmsg:%p tag filter completed, numOfTables:%" PRIzu ", elapsed time:%"PRId64"us", pQueryMsg, tableGroupInfo.numOfTables, el);
H
hjxilinx 已提交
7477
  } else {
7478
    assert(0);
7479
  }
7480

H
Haojun Liao 已提交
7481 7482 7483 7484 7485
  code = checkForQueryBuf(tableGroupInfo.numOfTables);
  if (code != TSDB_CODE_SUCCESS) {  // not enough query buffer, abort
    goto _over;
  }

H
Haojun Liao 已提交
7486
  (*pQInfo) = createQInfoImpl(pQueryMsg, pGroupbyExpr, pExprs, pSecExprs, &tableGroupInfo, pTagColumnInfo, isSTableQuery);
H
Haojun Liao 已提交
7487

dengyihao's avatar
dengyihao 已提交
7488
  pExprs = NULL;
H
Haojun Liao 已提交
7489
  pSecExprs = NULL;
dengyihao's avatar
dengyihao 已提交
7490 7491
  pGroupbyExpr = NULL;
  pTagColumnInfo = NULL;
7492

7493
  if ((*pQInfo) == NULL) {
7494
    code = TSDB_CODE_QRY_OUT_OF_MEMORY;
H
hjxilinx 已提交
7495
    goto _over;
7496
  }
7497

7498
  code = initQInfo(pQueryMsg, tsdb, vgId, *pQInfo, isSTableQuery);
7499

H
hjxilinx 已提交
7500
_over:
dengyihao's avatar
dengyihao 已提交
7501 7502 7503
  free(tagCond);
  free(tbnameCond);
  free(pGroupColIndex);
H
Haojun Liao 已提交
7504

dengyihao's avatar
dengyihao 已提交
7505 7506
  if (pGroupbyExpr != NULL) {
    taosArrayDestroy(pGroupbyExpr->columnInfo);
dengyihao's avatar
dengyihao 已提交
7507
    free(pGroupbyExpr);
7508
  }
H
Haojun Liao 已提交
7509

dengyihao's avatar
dengyihao 已提交
7510 7511
  free(pTagColumnInfo);
  free(pExprs);
H
Haojun Liao 已提交
7512 7513
  free(pSecExprs);

dengyihao's avatar
dengyihao 已提交
7514
  free(pExprMsg);
H
Haojun Liao 已提交
7515 7516
  free(pSecExprMsg);

H
hjxilinx 已提交
7517
  taosArrayDestroy(pTableIdList);
7518

B
Bomin Zhang 已提交
7519 7520 7521 7522 7523
  for (int32_t i = 0; i < pQueryMsg->numOfCols; i++) {
    SColumnInfo* column = pQueryMsg->colList + i;
    freeColumnFilterInfo(column->filters, column->numOfFilters);
  }

H
Haojun Liao 已提交
7524
  //pQInfo already freed in initQInfo, but *pQInfo may not pointer to null;
7525 7526 7527 7528
  if (code != TSDB_CODE_SUCCESS) {
    *pQInfo = NULL;
  }

7529
  // if failed to add ref for all tables in this query, abort current query
7530
  return code;
H
hjxilinx 已提交
7531 7532
}

H
Haojun Liao 已提交
7533
void qDestroyQueryInfo(qinfo_t qHandle) {
H
Haojun Liao 已提交
7534 7535 7536 7537 7538
  SQInfo* pQInfo = (SQInfo*) qHandle;
  if (!isValidQInfo(pQInfo)) {
    return;
  }

H
Haojun Liao 已提交
7539 7540 7541
  qDebug("QInfo:%p query completed", pQInfo);
  queryCostStatis(pQInfo);   // print the query cost summary
  freeQInfo(pQInfo);
H
Haojun Liao 已提交
7542 7543
}

7544 7545 7546 7547 7548 7549
static bool doBuildResCheck(SQInfo* pQInfo) {
  bool buildRes = false;

  pthread_mutex_lock(&pQInfo->lock);

  pQInfo->dataReady = QUERY_RESULT_READY;
7550
  buildRes = needBuildResAfterQueryComplete(pQInfo);
7551

H
Haojun Liao 已提交
7552 7553
  // clear qhandle owner, it must be in the secure area. other thread may run ahead before current, after it is
  // put into task to be executed.
7554 7555
  assert(pQInfo->owner == taosGetPthreadId());
  pQInfo->owner = 0;
H
Haojun Liao 已提交
7556

H
Haojun Liao 已提交
7557
  pthread_mutex_unlock(&pQInfo->lock);
H
Haojun Liao 已提交
7558

7559
  // used in retrieve blocking model.
H
Haojun Liao 已提交
7560
  tsem_post(&pQInfo->ready);
7561 7562 7563
  return buildRes;
}

7564
bool qTableQuery(qinfo_t qinfo) {
7565
  SQInfo *pQInfo = (SQInfo *)qinfo;
H
Haojun Liao 已提交
7566
  assert(pQInfo && pQInfo->signature == pQInfo);
7567
  int64_t threadId = taosGetPthreadId();
7568

7569 7570 7571 7572
  int64_t curOwner = 0;
  if ((curOwner = atomic_val_compare_exchange_64(&pQInfo->owner, 0, threadId)) != 0) {
    qError("QInfo:%p qhandle is now executed by thread:%p", pQInfo, (void*) curOwner);
    pQInfo->code = TSDB_CODE_QRY_IN_EXEC;
7573
    return false;
H
hjxilinx 已提交
7574
  }
7575

7576 7577 7578
  pQInfo->startExecTs = taosGetTimestampSec();

  if (isQueryKilled(pQInfo)) {
7579
    qDebug("QInfo:%p it is already killed, abort", pQInfo);
7580
    return doBuildResCheck(pQInfo);
H
hjxilinx 已提交
7581
  }
7582

7583 7584
  if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
    qDebug("QInfo:%p no table exists for query, abort", pQInfo);
7585 7586
    setQueryStatus(pQInfo->runtimeEnv.pQuery, QUERY_COMPLETED);
    return doBuildResCheck(pQInfo);
7587 7588 7589
  }

  // error occurs, record the error code and return to client
H
Haojun Liao 已提交
7590
  int32_t ret = setjmp(pQInfo->runtimeEnv.env);
7591 7592
  if (ret != TSDB_CODE_SUCCESS) {
    pQInfo->code = ret;
H
Haojun Liao 已提交
7593
    qDebug("QInfo:%p query abort due to error/cancel occurs, code:%s", pQInfo, tstrerror(pQInfo->code));
7594
    return doBuildResCheck(pQInfo);
7595 7596
  }

7597
  qDebug("QInfo:%p query task is launched", pQInfo);
7598

7599
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
7600
  if (onlyQueryTags(pQInfo->runtimeEnv.pQuery)) {
H
Haojun Liao 已提交
7601
    assert(pQInfo->runtimeEnv.pQueryHandle == NULL);
7602
    buildTagQueryResult(pQInfo);
H
hjxilinx 已提交
7603
  } else if (pQInfo->runtimeEnv.stableQuery) {
7604
    stableQueryImpl(pQInfo);
H
hjxilinx 已提交
7605
  } else {
7606
    tableQueryImpl(pQInfo);
H
hjxilinx 已提交
7607
  }
7608

7609
  SQuery* pQuery = pRuntimeEnv->pQuery;
7610
  if (isQueryKilled(pQInfo)) {
7611 7612
    qDebug("QInfo:%p query is killed", pQInfo);
  } else if (pQuery->rec.rows == 0) {
S
TD-1057  
Shengliang Guan 已提交
7613
    qDebug("QInfo:%p over, %" PRIzu " tables queried, %"PRId64" rows are returned", pQInfo, pQInfo->tableqinfoGroupInfo.numOfTables, pQuery->rec.total);
7614 7615 7616 7617 7618
  } else {
    qDebug("QInfo:%p query paused, %" PRId64 " rows returned, numOfTotal:%" PRId64 " rows",
           pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
  }

7619
  return doBuildResCheck(pQInfo);
H
hjxilinx 已提交
7620 7621
}

7622
int32_t qRetrieveQueryResultInfo(qinfo_t qinfo, bool* buildRes, void* pRspContext) {
7623 7624
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
7625
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
H
Haojun Liao 已提交
7626
    qError("QInfo:%p invalid qhandle", pQInfo);
7627
    return TSDB_CODE_QRY_INVALID_QHANDLE;
H
hjxilinx 已提交
7628
  }
7629

7630
  *buildRes = false;
H
Haojun Liao 已提交
7631
  if (IS_QUERY_KILLED(pQInfo)) {
B
Bomin Zhang 已提交
7632
    qDebug("QInfo:%p query is killed, code:0x%08x", pQInfo, pQInfo->code);
H
hjxilinx 已提交
7633
    return pQInfo->code;
H
hjxilinx 已提交
7634
  }
7635

7636
  int32_t code = TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
7637

H
Haojun Liao 已提交
7638
  if (tsRetrieveBlockingModel) {
7639
    pQInfo->rspContext = pRspContext;
H
Haojun Liao 已提交
7640
    tsem_wait(&pQInfo->ready);
7641
    *buildRes = true;
H
Haojun Liao 已提交
7642
    code = pQInfo->code;
7643
  } else {
H
Haojun Liao 已提交
7644
    SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
7645

H
Haojun Liao 已提交
7646 7647
    pthread_mutex_lock(&pQInfo->lock);

7648
    assert(pQInfo->rspContext == NULL);
H
Haojun Liao 已提交
7649 7650
    if (pQInfo->dataReady == QUERY_RESULT_READY) {
      *buildRes = true;
7651 7652
      qDebug("QInfo:%p retrieve result info, rowsize:%d, rows:%" PRId64 ", code:%s", pQInfo, pQuery->rowSize,
             pQuery->rec.rows, tstrerror(pQInfo->code));
H
Haojun Liao 已提交
7653 7654 7655 7656 7657 7658 7659 7660 7661 7662
    } else {
      *buildRes = false;
      qDebug("QInfo:%p retrieve req set query return result after paused", pQInfo);
      pQInfo->rspContext = pRspContext;
      assert(pQInfo->rspContext != NULL);
    }

    code = pQInfo->code;
    pthread_mutex_unlock(&pQInfo->lock);
  }
H
Haojun Liao 已提交
7663

7664
  return code;
H
hjxilinx 已提交
7665
}
7666

7667
int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *contLen, bool* continueExec) {
7668 7669
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
7670
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
7671
    return TSDB_CODE_QRY_INVALID_QHANDLE;
7672
  }
7673

7674
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
7675 7676
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  size_t  size = getResultSize(pQInfo, &pQuery->rec.rows);
7677

weixin_48148422's avatar
weixin_48148422 已提交
7678
  size += sizeof(int32_t);
7679
  size += sizeof(STableIdInfo) * taosHashGetSize(pQInfo->arrTableIdInfo);
7680

S
TD-1057  
Shengliang Guan 已提交
7681
  *contLen = (int32_t)(size + sizeof(SRetrieveTableRsp));
7682

B
Bomin Zhang 已提交
7683 7684
  // todo proper handle failed to allocate memory,
  // current solution only avoid crash, but cannot return error code to client
7685
  *pRsp = (SRetrieveTableRsp *)rpcMallocCont(*contLen);
B
Bomin Zhang 已提交
7686 7687 7688
  if (*pRsp == NULL) {
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
  }
7689

S
TD-1057  
Shengliang Guan 已提交
7690
  (*pRsp)->numOfRows = htonl((int32_t)pQuery->rec.rows);
7691

H
Haojun Liao 已提交
7692
  if (pQInfo->code == TSDB_CODE_SUCCESS) {
7693
    (*pRsp)->offset   = htobe64(pQuery->limit.offset);
7694
    (*pRsp)->useconds = htobe64(pRuntimeEnv->summary.elapsedTime);
7695
  } else {
7696 7697
    (*pRsp)->offset   = 0;
    (*pRsp)->useconds = htobe64(pRuntimeEnv->summary.elapsedTime);
7698
  }
7699

7700
  (*pRsp)->precision = htons(pQuery->precision);
H
Haojun Liao 已提交
7701 7702
  if (pQuery->rec.rows > 0 && pQInfo->code == TSDB_CODE_SUCCESS) {
    doDumpQueryResult(pQInfo, (*pRsp)->data);
7703
  } else {
H
hjxilinx 已提交
7704
    setQueryStatus(pQuery, QUERY_OVER);
7705
  }
7706

7707
  pQInfo->rspContext = NULL;
7708
  pQInfo->dataReady  = QUERY_RESULT_NOT_READY;
7709

H
Haojun Liao 已提交
7710
  if (IS_QUERY_KILLED(pQInfo) || Q_STATUS_EQUAL(pQuery->status, QUERY_OVER)) {
H
Haojun Liao 已提交
7711
    // here current thread hold the refcount, so it is safe to free tsdbQueryHandle.
7712
    *continueExec = false;
7713
    (*pRsp)->completed = 1;  // notify no more result to client
7714
  } else {
7715
    *continueExec = true;
7716
    qDebug("QInfo:%p has more results waits for client retrieve", pQInfo);
7717 7718
  }

H
Haojun Liao 已提交
7719
  return pQInfo->code;
7720
}
H
hjxilinx 已提交
7721

7722 7723 7724 7725 7726 7727 7728 7729
int32_t qQueryCompleted(qinfo_t qinfo) {
  SQInfo *pQInfo = (SQInfo *)qinfo;

  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
    return TSDB_CODE_QRY_INVALID_QHANDLE;
  }

  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
7730
  return isQueryKilled(pQInfo) || Q_STATUS_EQUAL(pQuery->status, QUERY_OVER);
7731 7732
}

H
Haojun Liao 已提交
7733
int32_t qKillQuery(qinfo_t qinfo) {
H
Haojun Liao 已提交
7734 7735 7736 7737 7738 7739 7740
  SQInfo *pQInfo = (SQInfo *)qinfo;

  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
    return TSDB_CODE_QRY_INVALID_QHANDLE;
  }

  setQueryKilled(pQInfo);
7741 7742 7743

  // Wait for the query executing thread being stopped/
  // Once the query is stopped, the owner of qHandle will be cleared immediately.
7744
  while (pQInfo->owner != 0) {
7745 7746 7747
    taosMsleep(100);
  }

H
Haojun Liao 已提交
7748 7749 7750
  return TSDB_CODE_SUCCESS;
}

7751 7752 7753 7754 7755 7756 7757 7758 7759 7760 7761 7762 7763 7764 7765 7766
static void doSetTagValueToResultBuf(char* output, const char* val, int16_t type, int16_t bytes) {
  if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
    if (val == NULL) {
      setVardataNull(output, type);
    } else {
      memcpy(output, val, varDataTLen(val));
    }
  } else {
    if (val == NULL) {
      setNull(output, type, bytes);
    } else {  // todo here stop will cause client crash
      memcpy(output, val, bytes);
    }
  }
}

H
hjxilinx 已提交
7767 7768 7769
static void buildTagQueryResult(SQInfo* pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
7770

H
Haojun Liao 已提交
7771
  size_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
H
Haojun Liao 已提交
7772
  assert(numOfGroup == 0 || numOfGroup == 1);
7773

H
Haojun Liao 已提交
7774
  if (numOfGroup == 0) {
7775 7776
    return;
  }
7777

H
Haojun Liao 已提交
7778
  SArray* pa = GET_TABLEGROUP(pQInfo, 0);
7779

H
Haojun Liao 已提交
7780
  size_t num = taosArrayGetSize(pa);
7781
  assert(num == pQInfo->tableqinfoGroupInfo.numOfTables);
7782

H
Haojun Liao 已提交
7783
  int32_t count = 0;
H
Haojun Liao 已提交
7784
  int32_t functionId = pQuery->pExpr1[0].base.functionId;
7785 7786
  if (functionId == TSDB_FUNC_TID_TAG) { // return the tags & table Id
    assert(pQuery->numOfOutput == 1);
7787

H
Haojun Liao 已提交
7788
    SExprInfo* pExprInfo = &pQuery->pExpr1[0];
7789
    int32_t rsize = pExprInfo->bytes;
H
Haojun Liao 已提交
7790
    count = 0;
7791

H
Haojun Liao 已提交
7792 7793 7794 7795 7796 7797 7798 7799 7800 7801 7802
    int16_t bytes = pExprInfo->bytes;
    int16_t type = pExprInfo->type;

    for(int32_t i = 0; i < pQuery->numOfTags; ++i) {
      if (pQuery->tagColList[i].colId == pExprInfo->base.colInfo.colId) {
        bytes = pQuery->tagColList[i].bytes;
        type = pQuery->tagColList[i].type;
        break;
      }
    }

H
Haojun Liao 已提交
7803 7804
    while(pQInfo->tableIndex < num && count < pQuery->rec.capacity) {
      int32_t i = pQInfo->tableIndex++;
7805
      STableQueryInfo *item = taosArrayGetP(pa, i);
7806

7807
      char *output = pQuery->sdata[0]->data + count * rsize;
7808
      varDataSetLen(output, rsize - VARSTR_HEADER_SIZE);
7809

7810
      output = varDataVal(output);
H
Haojun Liao 已提交
7811
      STableId* id = TSDB_TABLEID(item->pTable);
7812

7813 7814 7815
      *(int16_t *)output = 0;
      output += sizeof(int16_t);

H
Haojun Liao 已提交
7816 7817
      *(int64_t *)output = id->uid;  // memory align problem, todo serialize
      output += sizeof(id->uid);
7818

H
Haojun Liao 已提交
7819 7820
      *(int32_t *)output = id->tid;
      output += sizeof(id->tid);
7821

7822
      *(int32_t *)output = pQInfo->vgId;
7823
      output += sizeof(pQInfo->vgId);
7824

7825
      if (pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
7826
        char* data = tsdbGetTableName(item->pTable);
7827
        memcpy(output, data, varDataTLen(data));
H
[td-90]  
Haojun Liao 已提交
7828
      } else {
7829 7830
        char* data = tsdbGetTableTagVal(item->pTable, pExprInfo->base.colInfo.colId, type, bytes);
        doSetTagValueToResultBuf(output, data, type, bytes);
H
[td-90]  
Haojun Liao 已提交
7831
      }
7832

H
Haojun Liao 已提交
7833
      count += 1;
7834
    }
7835

7836
    qDebug("QInfo:%p create (tableId, tag) info completed, rows:%d", pQInfo, count);
7837

H
Haojun Liao 已提交
7838 7839 7840 7841
  } else if (functionId == TSDB_FUNC_COUNT) {// handle the "count(tbname)" query
    *(int64_t*) pQuery->sdata[0]->data = num;

    count = 1;
H
Haojun Liao 已提交
7842
    SET_STABLE_QUERY_OVER(pQInfo);
7843
    qDebug("QInfo:%p create count(tbname) query, res:%d rows:1", pQInfo, count);
7844
  } else {  // return only the tags|table name etc.
H
Haojun Liao 已提交
7845
    count = 0;
H
Haojun Liao 已提交
7846
    SSchema tbnameSchema = tGetTableNameColumnSchema();
7847

S
TD-1057  
Shengliang Guan 已提交
7848
    int32_t maxNumOfTables = (int32_t)pQuery->rec.capacity;
7849
    if (pQuery->limit.limit >= 0 && pQuery->limit.limit < pQuery->rec.capacity) {
S
TD-1057  
Shengliang Guan 已提交
7850
      maxNumOfTables = (int32_t)pQuery->limit.limit;
7851 7852
    }

7853
    while(pQInfo->tableIndex < num && count < maxNumOfTables) {
H
Haojun Liao 已提交
7854
      int32_t i = pQInfo->tableIndex++;
7855

7856 7857 7858 7859 7860 7861
      // discard current result due to offset
      if (pQuery->limit.offset > 0) {
        pQuery->limit.offset -= 1;
        continue;
      }

H
Haojun Liao 已提交
7862
      SExprInfo* pExprInfo = pQuery->pExpr1;
7863
      STableQueryInfo* item = taosArrayGetP(pa, i);
7864

7865 7866
      char *data = NULL, *dst = NULL;
      int16_t type = 0, bytes = 0;
7867
      for(int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
7868
        // not assign value in case of user defined constant output column
Y
TD-1230  
yihaoDeng 已提交
7869
        if (TSDB_COL_IS_UD_COL(pExprInfo[j].base.colInfo.flag)) {
H
Haojun Liao 已提交
7870 7871
          continue;
        }
7872

7873
        if (pExprInfo[j].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
7874 7875 7876 7877 7878 7879 7880 7881
          bytes = tbnameSchema.bytes;
          type = tbnameSchema.type;

          data = tsdbGetTableName(item->pTable);
          dst = pQuery->sdata[j]->data + count * tbnameSchema.bytes;
        } else {
          type = pExprInfo[j].type;
          bytes = pExprInfo[j].bytes;
7882

7883 7884
          data = tsdbGetTableTagVal(item->pTable, pExprInfo[j].base.colInfo.colId, type, bytes);
          dst = pQuery->sdata[j]->data + count * pExprInfo[j].bytes;
7885

7886
        }
7887 7888

        doSetTagValueToResultBuf(dst, data, type, bytes);
H
hjxilinx 已提交
7889
      }
H
Haojun Liao 已提交
7890
      count += 1;
H
hjxilinx 已提交
7891
    }
7892

7893
    qDebug("QInfo:%p create tag values results completed, rows:%d", pQInfo, count);
H
hjxilinx 已提交
7894
  }
7895

H
Haojun Liao 已提交
7896
  pQuery->rec.rows = count;
H
hjxilinx 已提交
7897
  setQueryStatus(pQuery, QUERY_COMPLETED);
H
hjxilinx 已提交
7898 7899
}

H
Haojun Liao 已提交
7900
static int64_t getQuerySupportBufSize(size_t numOfTables) {
H
Haojun Liao 已提交
7901 7902 7903 7904
  size_t s1 = sizeof(STableQueryInfo);
  size_t s2 = sizeof(SHashNode);

//  size_t s3 = sizeof(STableCheckInfo);  buffer consumption in tsdb
H
Haojun Liao 已提交
7905
  return (int64_t)((s1 + s2) * 1.5 * numOfTables);
H
Haojun Liao 已提交
7906 7907
}

H
Haojun Liao 已提交
7908
int32_t checkForQueryBuf(size_t numOfTables) {
H
Haojun Liao 已提交
7909 7910
  int64_t t = getQuerySupportBufSize(numOfTables);
  if (tsQueryBufferSize < 0) {
H
Haojun Liao 已提交
7911
    return TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
7912 7913 7914 7915 7916 7917 7918 7919 7920 7921 7922 7923 7924 7925 7926 7927 7928 7929 7930
  } else if (tsQueryBufferSize > 0) {

    while(1) {
      int64_t s = tsQueryBufferSize;
      int64_t remain = s - t;
      if (remain >= 0) {
        if (atomic_val_compare_exchange_64(&tsQueryBufferSize, s, remain) == s) {
          return TSDB_CODE_SUCCESS;
        }
      } else {
        return TSDB_CODE_QRY_NOT_ENOUGH_BUFFER;
      }
    }
  }

  // disable query processing if the value of tsQueryBufferSize is zero.
  return TSDB_CODE_QRY_NOT_ENOUGH_BUFFER;
}

H
Haojun Liao 已提交
7931
void releaseQueryBuf(size_t numOfTables) {
H
Haojun Liao 已提交
7932 7933 7934 7935 7936 7937 7938 7939 7940 7941
  if (tsQueryBufferSize <= 0) {
    return;
  }

  int64_t t = getQuerySupportBufSize(numOfTables);

  // restore value is not enough buffer available
  atomic_add_fetch_64(&tsQueryBufferSize, t);
}

7942 7943 7944 7945 7946 7947 7948
void* qGetResultRetrieveMsg(qinfo_t qinfo) {
  SQInfo* pQInfo = (SQInfo*) qinfo;
  assert(pQInfo != NULL);

  return pQInfo->rspContext;
}

7949 7950 7951 7952 7953 7954 7955
void freeqinfoFn(void *qhandle) {
  void** handle = qhandle;
  if (handle == NULL || *handle == NULL) {
    return;
  }

  qKillQuery(*handle);
H
Haojun Liao 已提交
7956
  qDestroyQueryInfo(*handle);
7957 7958 7959
}

void* qOpenQueryMgmt(int32_t vgId) {
H
Haojun Liao 已提交
7960
  const int32_t REFRESH_HANDLE_INTERVAL = 30; // every 30 seconds, refresh handle pool
7961 7962 7963 7964

  char cacheName[128] = {0};
  sprintf(cacheName, "qhandle_%d", vgId);

7965
  SQueryMgmt* pQueryMgmt = calloc(1, sizeof(SQueryMgmt));
H
Haojun Liao 已提交
7966 7967 7968 7969
  if (pQueryMgmt == NULL) {
    terrno = TSDB_CODE_QRY_OUT_OF_MEMORY;
    return NULL;
  }
7970

S
TD-1530  
Shengliang Guan 已提交
7971
  pQueryMgmt->qinfoPool = taosCacheInit(TSDB_CACHE_PTR_KEY, REFRESH_HANDLE_INTERVAL, true, freeqinfoFn, cacheName);
7972 7973 7974 7975
  pQueryMgmt->closed    = false;
  pQueryMgmt->vgId      = vgId;

  pthread_mutex_init(&pQueryMgmt->lock, NULL);
7976 7977

  qDebug("vgId:%d, open querymgmt success", vgId);
7978
  return pQueryMgmt;
7979 7980
}

H
Haojun Liao 已提交
7981
static void queryMgmtKillQueryFn(void* handle) {
H
Hui Li 已提交
7982 7983
  void** fp = (void**)handle;
  qKillQuery(*fp);
H
Haojun Liao 已提交
7984 7985 7986
}

void qQueryMgmtNotifyClosed(void* pQMgmt) {
7987 7988 7989 7990 7991 7992 7993
  if (pQMgmt == NULL) {
    return;
  }

  SQueryMgmt* pQueryMgmt = pQMgmt;
  qDebug("vgId:%d, set querymgmt closed, wait for all queries cancelled", pQueryMgmt->vgId);

H
Haojun Liao 已提交
7994
//  pthread_mutex_lock(&pQueryMgmt->lock);
7995
  pQueryMgmt->closed = true;
H
Haojun Liao 已提交
7996
//  pthread_mutex_unlock(&pQueryMgmt->lock);
7997

H
Haojun Liao 已提交
7998
  taosCacheRefresh(pQueryMgmt->qinfoPool, queryMgmtKillQueryFn);
7999 8000 8001 8002 8003 8004 8005 8006 8007 8008 8009 8010 8011 8012 8013 8014 8015
}

void qCleanupQueryMgmt(void* pQMgmt) {
  if (pQMgmt == NULL) {
    return;
  }

  SQueryMgmt* pQueryMgmt = pQMgmt;
  int32_t vgId = pQueryMgmt->vgId;

  assert(pQueryMgmt->closed);

  SCacheObj* pqinfoPool = pQueryMgmt->qinfoPool;
  pQueryMgmt->qinfoPool = NULL;

  taosCacheCleanup(pqinfoPool);
  pthread_mutex_destroy(&pQueryMgmt->lock);
S
TD-1848  
Shengliang Guan 已提交
8016
  tfree(pQueryMgmt);
8017

S
Shengliang Guan 已提交
8018
  qDebug("vgId:%d, queryMgmt cleanup completed", vgId);
8019 8020
}

8021
void** qRegisterQInfo(void* pMgmt, uint64_t qInfo) {
8022
  if (pMgmt == NULL) {
8023
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
8024 8025 8026 8027 8028
    return NULL;
  }

  SQueryMgmt *pQueryMgmt = pMgmt;
  if (pQueryMgmt->qinfoPool == NULL) {
8029
    qError("QInfo:%p failed to add qhandle into qMgmt, since qMgmt is closed", (void *)qInfo);
8030
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
8031 8032 8033
    return NULL;
  }

H
Haojun Liao 已提交
8034
//  pthread_mutex_lock(&pQueryMgmt->lock);
8035
  if (pQueryMgmt->closed) {
H
Haojun Liao 已提交
8036
//    pthread_mutex_unlock(&pQueryMgmt->lock);
8037
    qError("QInfo:%p failed to add qhandle into cache, since qMgmt is colsing", (void *)qInfo);
8038
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
8039 8040
    return NULL;
  } else {
S
TD-1530  
Shengliang Guan 已提交
8041
    TSDB_CACHE_PTR_TYPE handleVal = (TSDB_CACHE_PTR_TYPE) qInfo;
8042 8043
    void** handle = taosCachePut(pQueryMgmt->qinfoPool, &handleVal, sizeof(TSDB_CACHE_PTR_TYPE), &qInfo, sizeof(TSDB_CACHE_PTR_TYPE),
        (getMaximumIdleDurationSec()*1000));
H
Haojun Liao 已提交
8044
//    pthread_mutex_unlock(&pQueryMgmt->lock);
8045 8046 8047 8048 8049

    return handle;
  }
}

S
TD-1530  
Shengliang Guan 已提交
8050
void** qAcquireQInfo(void* pMgmt, uint64_t _key) {
8051 8052
  SQueryMgmt *pQueryMgmt = pMgmt;

B
Bomin Zhang 已提交
8053 8054 8055 8056 8057 8058 8059
  if (pQueryMgmt->closed) {
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
    return NULL;
  }

  if (pQueryMgmt->qinfoPool == NULL) {
    terrno = TSDB_CODE_QRY_INVALID_QHANDLE;
8060 8061 8062
    return NULL;
  }

S
TD-1530  
Shengliang Guan 已提交
8063 8064
  TSDB_CACHE_PTR_TYPE key = (TSDB_CACHE_PTR_TYPE)_key;
  void** handle = taosCacheAcquireByKey(pQueryMgmt->qinfoPool, &key, sizeof(TSDB_CACHE_PTR_TYPE));
8065
  if (handle == NULL || *handle == NULL) {
B
Bomin Zhang 已提交
8066
    terrno = TSDB_CODE_QRY_INVALID_QHANDLE;
8067 8068 8069 8070 8071 8072
    return NULL;
  } else {
    return handle;
  }
}

H
Haojun Liao 已提交
8073
void** qReleaseQInfo(void* pMgmt, void* pQInfo, bool freeHandle) {
8074 8075 8076 8077 8078
  SQueryMgmt *pQueryMgmt = pMgmt;
  if (pQueryMgmt->qinfoPool == NULL) {
    return NULL;
  }

H
Haojun Liao 已提交
8079
  taosCacheRelease(pQueryMgmt->qinfoPool, pQInfo, freeHandle);
8080 8081 8082
  return 0;
}

8083