qExecutor.c 233.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
#include "os.h"
H
Haojun Liao 已提交
16 17
#include "qFill.h"
#include "taosmsg.h"
18 19
#include "tcache.h"
#include "tglobal.h"
20

H
Haojun Liao 已提交
21
#include "exception.h"
22
#include "hash.h"
H
Haojun Liao 已提交
23 24 25 26
#include "qAst.h"
#include "qExecutor.h"
#include "qResultbuf.h"
#include "qUtil.h"
H
hjxilinx 已提交
27
#include "query.h"
S
slguan 已提交
28
#include "queryLog.h"
29
#include "tlosertree.h"
30

H
Haojun Liao 已提交
31
#define MAX_ROWS_PER_RESBUF_PAGE  ((1u<<12) - 1)
32 33 34 35 36

/**
 * check if the primary column is load by default, otherwise, the program will
 * forced to load primary column explicitly.
 */
37 38
#define Q_STATUS_EQUAL(p, s)  (((p) & (s)) != 0)
#define TSDB_COL_IS_TAG(f)    (((f)&TSDB_COL_TAG) != 0)
H
Haojun Liao 已提交
39 40 41
#define TSDB_COL_IS_NORMAL_COL(f)    ((f) == TSDB_COL_NORMAL)
#define TSDB_COL_IS_UD_COL(f)   ((f) == TSDB_COL_UDC)

42 43
#define QUERY_IS_ASC_QUERY(q) (GET_FORWARD_DIRECTION_FACTOR((q)->order.order) == QUERY_ASC_FORWARD_STEP)

44
#define IS_MASTER_SCAN(runtime)        ((runtime)->scanFlag == MASTER_SCAN)
H
hjxilinx 已提交
45
#define IS_REVERSE_SCAN(runtime)       ((runtime)->scanFlag == REVERSE_SCAN)
46
#define SET_MASTER_SCAN_FLAG(runtime)  ((runtime)->scanFlag = MASTER_SCAN)
H
hjxilinx 已提交
47
#define SET_REVERSE_SCAN_FLAG(runtime) ((runtime)->scanFlag = REVERSE_SCAN)
48

H
Haojun Liao 已提交
49
#define GET_QINFO_ADDR(x) ((SQInfo *)((char *)(x)-offsetof(SQInfo, runtimeEnv)))
50

51
#define GET_COL_DATA_POS(query, index, step) ((query)->pos + (index) * (step))
52
#define SWITCH_ORDER(n) (((n) = ((n) == TSDB_ORDER_ASC) ? TSDB_ORDER_DESC : TSDB_ORDER_ASC))
53

H
Haojun Liao 已提交
54 55
#define SDATA_BLOCK_INITIALIZER (SDataBlockInfo) {{0}, 0}

S
TD-1057  
Shengliang Guan 已提交
56 57 58 59 60
#define TIME_WINDOW_COPY(_dst, _src)  do {\
   _dst.skey = _src.skey;\
   _dst.ekey = _src.ekey;\
} while (0);

61
enum {
H
hjxilinx 已提交
62
  // when query starts to execute, this status will set
63 64
  QUERY_NOT_COMPLETED = 0x1u,

H
hjxilinx 已提交
65 66
  /* result output buffer is full, current query is paused.
   * this status is only exist in group-by clause and diff/add/division/multiply/ query.
67
   */
68 69
  QUERY_RESBUF_FULL = 0x2u,

H
hjxilinx 已提交
70 71 72
  /* query is over
   * 1. this status is used in one row result query process, e.g., count/sum/first/last/ avg...etc.
   * 2. when all data within queried time window, it is also denoted as query_completed
73
   */
74
  QUERY_COMPLETED = 0x4u,
75

H
hjxilinx 已提交
76 77
  /* when the result is not completed return to client, this status will be
   * usually used in case of interval query with interpolation option
78
   */
79
  QUERY_OVER = 0x8u,
80
};
81 82

enum {
83 84
  TS_JOIN_TS_EQUAL       = 0,
  TS_JOIN_TS_NOT_EQUALS  = 1,
85 86 87
  TS_JOIN_TAG_NOT_EQUALS = 2,
};

88
typedef struct {
89 90 91 92 93 94
  int32_t     status;       // query status
  TSKEY       lastKey;      // the lastKey value before query executed
  STimeWindow w;            // whole query time window
  STimeWindow curWindow;    // current query window
  int32_t     windowIndex;  // index of active time window result for interval query
  STSCursor   cur;
95 96
} SQueryStatusInfo;

H
Haojun Liao 已提交
97
#if 0
H
Haojun Liao 已提交
98
static UNUSED_FUNC void *u_malloc (size_t __size) {
H
Haojun Liao 已提交
99
  uint32_t v = rand();
H
Haojun Liao 已提交
100 101

  if (v % 1000 <= 0) {
H
Haojun Liao 已提交
102 103
    return NULL;
  } else {
H
Haojun Liao 已提交
104
    return malloc(__size);
H
Haojun Liao 已提交
105
  }
H
Haojun Liao 已提交
106 107
}

H
Haojun Liao 已提交
108 109
static UNUSED_FUNC void* u_calloc(size_t num, size_t __size) {
  uint32_t v = rand();
H
Haojun Liao 已提交
110
  if (v % 1000 <= 0) {
H
Haojun Liao 已提交
111 112 113 114 115 116
    return NULL;
  } else {
    return calloc(num, __size);
  }
}

H
Haojun Liao 已提交
117 118 119 120 121 122 123 124 125
static UNUSED_FUNC void* u_realloc(void* p, size_t __size) {
  uint32_t v = rand();
  if (v % 5 <= 1) {
    return NULL;
  } else {
    return realloc(p, __size);
  }
}

H
Haojun Liao 已提交
126
#define calloc  u_calloc
H
Haojun Liao 已提交
127
#define malloc  u_malloc
H
Haojun Liao 已提交
128
#define realloc u_realloc
H
Haojun Liao 已提交
129
#endif
H
Haojun Liao 已提交
130

131
#define CLEAR_QUERY_STATUS(q, st)   ((q)->status &= (~(st)))
H
Haojun Liao 已提交
132 133 134
#define GET_NUM_OF_TABLEGROUP(q)    taosArrayGetSize((q)->tableqinfoGroupInfo.pGroupList)
#define GET_TABLEGROUP(q, _index)   ((SArray*) taosArrayGetP((q)->tableqinfoGroupInfo.pGroupList, (_index)))

135
static void setQueryStatus(SQuery *pQuery, int8_t status);
H
Haojun Liao 已提交
136
static void finalizeQueryResult(SQueryRuntimeEnv *pRuntimeEnv);
137

H
Haojun Liao 已提交
138
#define QUERY_IS_INTERVAL_QUERY(_q) ((_q)->intervalTime > 0)
139

H
Haojun Liao 已提交
140 141 142 143 144 145 146 147
// previous time window may not be of the same size of pQuery->intervalTime
#define GET_NEXT_TIMEWINDOW(_q, tw)                                   \
  do {                                                                \
    int32_t factor = GET_FORWARD_DIRECTION_FACTOR((_q)->order.order); \
    (tw)->skey += ((_q)->slidingTime * factor);                       \
    (tw)->ekey = (tw)->skey + ((_q)->intervalTime - 1);               \
  } while (0)

148 149
#define SET_STABLE_QUERY_OVER(_q) ((_q)->tableIndex = (int32_t)((_q)->tableqinfoGroupInfo.numOfTables))
#define IS_STASBLE_QUERY_OVER(_q) ((_q)->tableIndex >= (int32_t)((_q)->tableqinfoGroupInfo.numOfTables))
H
Haojun Liao 已提交
150

H
hjxilinx 已提交
151
// todo move to utility
152
static int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *group);
153

H
hjxilinx 已提交
154
static void setWindowResOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult);
H
Haojun Liao 已提交
155
static void setWindowResOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult);
156 157
static void resetMergeResultBuf(SQuery *pQuery, SQLFunctionCtx *pCtx, SResultInfo *pResultInfo);
static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId);
158

159 160 161
static void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
                          SDataStatis *pStatis, void *param, int32_t colIndex);

162
static void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
H
Haojun Liao 已提交
163
static void destroyTableQueryInfo(STableQueryInfo *pTableQueryInfo);
164 165
static void resetCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
static bool hasMainOutput(SQuery *pQuery);
H
hjxilinx 已提交
166
static void buildTagQueryResult(SQInfo *pQInfo);
167

168
static int32_t setAdditionalInfo(SQInfo *pQInfo, void *pTable, STableQueryInfo *pTableQueryInfo);
H
Haojun Liao 已提交
169
static int32_t flushFromResultBuf(SQueryRuntimeEnv* pRuntimeEnv, SGroupResInfo* pGroupResInfo);
170

171
bool doFilterData(SQuery *pQuery, int32_t elemPos) {
172 173
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
174

S
TD-1057  
Shengliang Guan 已提交
175
    char *pElem = (char*)pFilterInfo->pData + pFilterInfo->info.bytes * elemPos;
176
    if (isNull(pElem, pFilterInfo->info.type)) {
177 178
      return false;
    }
179

180 181
    bool qualified = false;
    for (int32_t j = 0; j < pFilterInfo->numOfFilters; ++j) {
182
      SColumnFilterElem *pFilterElem = &pFilterInfo->pFilters[j];
183

184 185 186 187 188
      if (pFilterElem->fp(pFilterElem, pElem, pElem)) {
        qualified = true;
        break;
      }
    }
189

190 191 192 193
    if (!qualified) {
      return false;
    }
  }
194

195 196 197 198 199 200
  return true;
}

int64_t getNumOfResult(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  bool    hasMainFunction = hasMainOutput(pQuery);
201

202
  int64_t maxOutput = 0;
203
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
204
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
205

206 207 208 209 210 211 212 213
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (hasMainFunction &&
        (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ)) {
      continue;
    }
214

215 216 217 218 219
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
    if (pResInfo != NULL && maxOutput < pResInfo->numOfRes) {
      maxOutput = pResInfo->numOfRes;
    }
  }
220

221
  assert(maxOutput >= 0);
222 223 224
  return maxOutput;
}

225 226 227 228 229
/*
 * the value of number of result needs to be update due to offset value upated.
 */
void updateNumOfResult(SQueryRuntimeEnv *pRuntimeEnv, int32_t numOfRes) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
230

231 232
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
233

H
Haojun Liao 已提交
234 235 236 237 238
    int16_t functionId = pRuntimeEnv->pCtx[j].functionId;
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ ||
        functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
239

H
Haojun Liao 已提交
240
    assert(pResInfo->numOfRes > numOfRes);
241 242 243 244
    pResInfo->numOfRes = numOfRes;
  }
}

H
Haojun Liao 已提交
245
static UNUSED_FUNC int32_t getGroupResultId(int32_t groupIndex) {
H
Haojun Liao 已提交
246
  int32_t base = 20000000;
247 248 249 250 251 252 253
  return base + (groupIndex * 10000);
}

bool isGroupbyNormalCol(SSqlGroupbyExpr *pGroupbyExpr) {
  if (pGroupbyExpr == NULL || pGroupbyExpr->numOfGroupCols == 0) {
    return false;
  }
254

255
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
256
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
257
    if (pColIndex->flag == TSDB_COL_NORMAL) {
258
      //make sure the normal column locates at the second position if tbname exists in group by clause
259
      if (pGroupbyExpr->numOfGroupCols > 1) {
260
        assert(pColIndex->colIndex > 0);
261
      }
262

263 264 265
      return true;
    }
  }
266

267 268 269 270 271
  return false;
}

int16_t getGroupbyColumnType(SQuery *pQuery, SSqlGroupbyExpr *pGroupbyExpr) {
  assert(pGroupbyExpr != NULL);
272

273 274
  int32_t colId = -2;
  int16_t type = TSDB_DATA_TYPE_NULL;
275

276
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
277
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
278 279 280 281 282
    if (pColIndex->flag == TSDB_COL_NORMAL) {
      colId = pColIndex->colId;
      break;
    }
  }
283

284
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
285 286
    if (colId == pQuery->colList[i].colId) {
      type = pQuery->colList[i].type;
287 288 289
      break;
    }
  }
290

291 292 293 294 295 296
  return type;
}

bool isSelectivityWithTagsQuery(SQuery *pQuery) {
  bool    hasTags = false;
  int32_t numOfSelectivity = 0;
297

298
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
299
    int32_t functId = pQuery->pSelectExpr[i].base.functionId;
300 301 302 303
    if (functId == TSDB_FUNC_TAG_DUMMY || functId == TSDB_FUNC_TS_DUMMY) {
      hasTags = true;
      continue;
    }
304

305 306 307 308
    if ((aAggs[functId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
      numOfSelectivity++;
    }
  }
309

310 311 312
  if (numOfSelectivity > 0 && hasTags) {
    return true;
  }
313

314 315 316
  return false;
}

317 318 319 320 321 322 323 324 325 326 327
bool isProjQuery(SQuery *pQuery) {
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    int32_t functId = pQuery->pSelectExpr[i].base.functionId;
    if (functId != TSDB_FUNC_PRJ && functId != TSDB_FUNC_TAGPRJ) {
      return false;
    }
  }

  return true;
}

328
bool isTSCompQuery(SQuery *pQuery) { return pQuery->pSelectExpr[0].base.functionId == TSDB_FUNC_TS_COMP; }
329

330 331 332
static bool limitResults(SQueryRuntimeEnv* pRuntimeEnv) {
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery* pQuery = pRuntimeEnv->pQuery;
333

334 335
  if ((pQuery->limit.limit > 0) && (pQuery->rec.total + pQuery->rec.rows > pQuery->limit.limit)) {
    pQuery->rec.rows = pQuery->limit.limit - pQuery->rec.total;
336

337
    qDebug("QInfo:%p discard remain data due to result limitation, limit:%"PRId64", current return:%" PRId64 ", total:%"PRId64,
338 339
        pQInfo, pQuery->limit.limit, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
    assert(pQuery->rec.rows >= 0);
340 341 342
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return true;
  }
343

344 345 346 347
  return false;
}

static bool isTopBottomQuery(SQuery *pQuery) {
348
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
349
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
350 351 352
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
353

354 355 356 357
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      return true;
    }
  }
358

359 360 361
  return false;
}

H
Haojun Liao 已提交
362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379
static bool hasTagValOutput(SQuery* pQuery) {
  SExprInfo *pExprInfo = &pQuery->pSelectExpr[0];
  if (pQuery->numOfOutput == 1 && pExprInfo->base.functionId == TSDB_FUNC_TS_COMP) {
    return true;
  } else {  // set tag value, by which the results are aggregated.
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
      SExprInfo *pLocalExprInfo = &pQuery->pSelectExpr[idx];

      // ts_comp column required the tag value for join filter
      if (TSDB_COL_IS_TAG(pLocalExprInfo->base.colInfo.flag)) {
        return true;
      }
    }
  }

  return false;
}

380 381 382 383 384 385 386 387
/**
 * @param pQuery
 * @param col
 * @param pDataBlockInfo
 * @param pStatis
 * @param pColStatis
 * @return
 */
H
Haojun Liao 已提交
388
static bool hasNullValue(SColIndex* pColIndex, SDataStatis *pStatis, SDataStatis **pColStatis) {
H
Haojun Liao 已提交
389
  if (pStatis != NULL && TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
H
Haojun Liao 已提交
390 391
    *pColStatis = &pStatis[pColIndex->colIndex];
    assert((*pColStatis)->colId == pColIndex->colId);
H
hjxilinx 已提交
392 393
  } else {
    *pColStatis = NULL;
394
  }
395

H
Haojun Liao 已提交
396
  if (TSDB_COL_IS_TAG(pColIndex->flag) || TSDB_COL_IS_UD_COL(pColIndex->flag) || pColIndex->colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
H
Haojun Liao 已提交
397 398 399
    return false;
  }

400 401 402
  if ((*pColStatis) != NULL && (*pColStatis)->numOfNull == 0) {
    return false;
  }
403

404 405 406 407
  return true;
}

static SWindowResult *doSetTimeWindowFromKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, char *pData,
408
                                             int16_t bytes, bool masterscan) {
409
  SQuery *pQuery = pRuntimeEnv->pQuery;
410

411
  int32_t *p1 = (int32_t *) taosHashGet(pWindowResInfo->hashList, pData, bytes);
412 413
  if (p1 != NULL) {
    pWindowResInfo->curIndex = *p1;
414
  } else {
H
Haojun Liao 已提交
415 416 417
    if (!masterscan) {  // not master scan, do not add new timewindow
      return NULL;
    }
418

H
Haojun Liao 已提交
419 420
    // more than the capacity, reallocate the resources
    if (pWindowResInfo->size >= pWindowResInfo->capacity) {
421 422
      int64_t newCap = 0;
      if (pWindowResInfo->capacity > 10000) {
S
Shengliang Guan 已提交
423
        newCap = (int64_t)(pWindowResInfo->capacity * 1.25);
424
      } else {
S
Shengliang Guan 已提交
425
        newCap = (int64_t)(pWindowResInfo->capacity * 1.5);
426 427
      }

428
      char *t = realloc(pWindowResInfo->pResult, (size_t)(newCap * sizeof(SWindowResult)));
429
      pRuntimeEnv->summary.internalSupSize += (newCap - pWindowResInfo->capacity) * sizeof(SWindowResult);
430
      pRuntimeEnv->summary.numOfTimeWindows += (newCap - pWindowResInfo->capacity);
431

432 433
      if (t == NULL) {
        longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
434
      }
435

436 437
      pWindowResInfo->pResult = (SWindowResult *)t;

S
TD-1057  
Shengliang Guan 已提交
438
      int32_t inc = (int32_t)newCap - pWindowResInfo->capacity;
439 440
      memset(&pWindowResInfo->pResult[pWindowResInfo->capacity], 0, sizeof(SWindowResult) * inc);

441 442
      pRuntimeEnv->summary.internalSupSize += (pQuery->numOfOutput * sizeof(SResultInfo) + pRuntimeEnv->interBufSize) * inc;

H
Haojun Liao 已提交
443
      for (int32_t i = pWindowResInfo->capacity; i < newCap; ++i) {
H
Haojun Liao 已提交
444 445 446 447
        int32_t ret = createQueryResultInfo(pQuery, &pWindowResInfo->pResult[i], pRuntimeEnv->stableQuery, pRuntimeEnv->interBufSize);
        if (ret != TSDB_CODE_SUCCESS) {
          longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
        }
H
Haojun Liao 已提交
448 449
      }

S
TD-1057  
Shengliang Guan 已提交
450
      pWindowResInfo->capacity = (int32_t)newCap;
451
    }
H
Haojun Liao 已提交
452 453 454 455

    // add a new result set for a new group
    pWindowResInfo->curIndex = pWindowResInfo->size++;
    taosHashPut(pWindowResInfo->hashList, pData, bytes, (char *)&pWindowResInfo->curIndex, sizeof(int32_t));
456
  }
457

458 459 460 461 462
  // too many time window in query
  if (pWindowResInfo->size > MAX_INTERVAL_TIME_WINDOW) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_TOO_MANY_TIMEWINDOW);
  }

463 464 465 466 467 468
  return getWindowResult(pWindowResInfo, pWindowResInfo->curIndex);
}

// get the correct time window according to the handled timestamp
static STimeWindow getActiveTimeWindow(SWindowResInfo *pWindowResInfo, int64_t ts, SQuery *pQuery) {
  STimeWindow w = {0};
469

470 471 472 473
  if (pWindowResInfo->curIndex == -1) {  // the first window, from the previous stored value
    w.skey = pWindowResInfo->prevSKey;
    w.ekey = w.skey + pQuery->intervalTime - 1;
  } else {
474 475 476
    int32_t slot = curTimeWindowIndex(pWindowResInfo);
    SWindowResult* pWindowRes = getWindowResult(pWindowResInfo, slot);
    w = GET_TIMEWINDOW(pWindowResInfo, pWindowRes);
477
  }
478

479 480
  if (w.skey > ts || w.ekey < ts) {
    int64_t st = w.skey;
481

482 483 484
    if (st > ts) {
      st -= ((st - ts + pQuery->slidingTime - 1) / pQuery->slidingTime) * pQuery->slidingTime;
    }
485

486 487 488 489
    int64_t et = st + pQuery->intervalTime - 1;
    if (et < ts) {
      st += ((ts - et + pQuery->slidingTime - 1) / pQuery->slidingTime) * pQuery->slidingTime;
    }
490

491 492 493
    w.skey = st;
    w.ekey = w.skey + pQuery->intervalTime - 1;
  }
494

495 496 497 498 499 500 501
  /*
   * query border check, skey should not be bounded by the query time range, since the value skey will
   * be used as the time window index value. So we only change ekey of time window accordingly.
   */
  if (w.ekey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) {
    w.ekey = pQuery->window.ekey;
  }
502

503
  assert(ts >= w.skey && ts <= w.ekey);
504

505 506 507 508 509 510 511 512
  return w;
}

static int32_t addNewWindowResultBuf(SWindowResult *pWindowRes, SDiskbasedResultBuf *pResultBuf, int32_t sid,
                                     int32_t numOfRowsPerPage) {
  if (pWindowRes->pos.pageId != -1) {
    return 0;
  }
513

514
  tFilePage *pData = NULL;
515

516 517 518
  // in the first scan, new space needed for results
  int32_t pageId = -1;
  SIDList list = getDataBufPagesIdList(pResultBuf, sid);
519

H
Haojun Liao 已提交
520
  if (taosArrayGetSize(list) == 0) {
521 522
    pData = getNewDataBuf(pResultBuf, sid, &pageId);
  } else {
H
Haojun Liao 已提交
523 524 525
    SPageInfo* pi = getLastPageInfo(list);
    pData = getResBufPage(pResultBuf, pi->pageId);
    pageId = pi->pageId;
526

527
    if (pData->num >= numOfRowsPerPage) {
H
Haojun Liao 已提交
528 529 530
      // release current page first, and prepare the next one
      releaseResBufPageInfo(pResultBuf, pi);

531 532
      pData = getNewDataBuf(pResultBuf, sid, &pageId);
      if (pData != NULL) {
533
        assert(pData->num == 0);  // number of elements must be 0 for new allocated buffer
534 535 536
      }
    }
  }
537

538 539 540
  if (pData == NULL) {
    return -1;
  }
541

542 543 544
  // set the number of rows in current disk page
  if (pWindowRes->pos.pageId == -1) {  // not allocated yet, allocate new buffer
    pWindowRes->pos.pageId = pageId;
S
TD-1057  
Shengliang Guan 已提交
545
    pWindowRes->pos.rowId = (int32_t)(pData->num++);
H
Haojun Liao 已提交
546 547

    assert(pWindowRes->pos.pageId >= 0);
548
  }
549

550 551 552 553
  return 0;
}

static int32_t setWindowOutputBufByKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, int32_t sid,
554
                                       STimeWindow *win, bool masterscan, bool* newWind) {
555 556
  assert(win->skey <= win->ekey);
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
557

558 559
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&win->skey,
      TSDB_KEYSIZE, masterscan);
560
  if (pWindowRes == NULL) {
561 562 563
    *newWind = false;

    return masterscan? -1:0;
564
  }
565

566
  *newWind = true;
H
Haojun Liao 已提交
567

568 569 570
  // not assign result buffer yet, add new result buffer
  if (pWindowRes->pos.pageId == -1) {
    int32_t ret = addNewWindowResultBuf(pWindowRes, pResultBuf, sid, pRuntimeEnv->numOfRowsPerPage);
H
Haojun Liao 已提交
571
    if (ret != TSDB_CODE_SUCCESS) {
572 573 574
      return -1;
    }
  }
575

576
  // set time window for current result
577
  pWindowRes->skey = win->skey;
578

H
Haojun Liao 已提交
579
  setWindowResOutputBufInitCtx(pRuntimeEnv, pWindowRes);
580 581 582
  return TSDB_CODE_SUCCESS;
}

583
static bool getTimeWindowResStatus(SWindowResInfo *pWindowResInfo, int32_t slot) {
584
  assert(slot >= 0 && slot < pWindowResInfo->size);
585
  return pWindowResInfo->pResult[slot].closed;
586 587
}

H
Haojun Liao 已提交
588
static FORCE_INLINE int32_t getForwardStepsInBlock(int32_t numOfRows, __block_search_fn_t searchFn, TSKEY ekey, int16_t pos,
589 590
                                      int16_t order, int64_t *pData) {
  int32_t forwardStep = 0;
591

H
Haojun Liao 已提交
592 593 594 595
  if (order == TSDB_ORDER_ASC) {
    int32_t end = searchFn((char*) &pData[pos], numOfRows - pos, ekey, order);
    if (end >= 0) {
      forwardStep = end;
596

H
Haojun Liao 已提交
597 598 599 600 601 602 603 604 605 606 607 608
      if (pData[end + pos] == ekey) {
        forwardStep += 1;
      }
    }
  } else {
    int32_t end = searchFn((char *)pData, pos + 1, ekey, order);
    if (end >= 0) {
      forwardStep = pos - end;

      if (pData[end] == ekey) {
        forwardStep += 1;
      }
609 610
    }
  }
611

H
Haojun Liao 已提交
612
  assert(forwardStep > 0);
613 614 615 616 617 618
  return forwardStep;
}

/**
 * NOTE: the query status only set for the first scan of master scan.
 */
619
static int32_t doCheckQueryCompleted(SQueryRuntimeEnv *pRuntimeEnv, TSKEY lastKey, SWindowResInfo *pWindowResInfo) {
620
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
621
  if (pRuntimeEnv->scanFlag != MASTER_SCAN || (!QUERY_IS_INTERVAL_QUERY(pQuery))) {
622
    return pWindowResInfo->size;
623
  }
624

625
  // no qualified results exist, abort check
626
  int32_t numOfClosed = 0;
627

628
  if (pWindowResInfo->size == 0) {
629
    return pWindowResInfo->size;
630
  }
631

632
  // query completed
H
hjxilinx 已提交
633 634
  if ((lastKey >= pQuery->current->win.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (lastKey <= pQuery->current->win.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
635
    closeAllTimeWindow(pWindowResInfo);
636

637 638 639 640
    pWindowResInfo->curIndex = pWindowResInfo->size - 1;
    setQueryStatus(pQuery, QUERY_COMPLETED | QUERY_RESBUF_FULL);
  } else {  // set the current index to be the last unclosed window
    int32_t i = 0;
641
    int64_t skey = TSKEY_INITIAL_VAL;
642

643 644
    for (i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *pResult = &pWindowResInfo->pResult[i];
645
      if (pResult->closed) {
646
        numOfClosed += 1;
647 648
        continue;
      }
649

650 651 652
      TSKEY ekey = pResult->skey + pWindowResInfo->interval;
      if ((ekey <= lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (pResult->skey >= lastKey && !QUERY_IS_ASC_QUERY(pQuery))) {
653 654
        closeTimeWindow(pWindowResInfo, i);
      } else {
655
        skey = pResult->skey;
656 657 658
        break;
      }
    }
659

660
    // all windows are closed, set the last one to be the skey
661
    if (skey == TSKEY_INITIAL_VAL) {
662 663 664 665 666
      assert(i == pWindowResInfo->size);
      pWindowResInfo->curIndex = pWindowResInfo->size - 1;
    } else {
      pWindowResInfo->curIndex = i;
    }
667

668
    pWindowResInfo->prevSKey = pWindowResInfo->pResult[pWindowResInfo->curIndex].skey;
669

670 671
    // the number of completed slots are larger than the threshold, return current generated results to client.
    if (numOfClosed > pWindowResInfo->threshold) {
672
      qDebug("QInfo:%p total result window:%d closed:%d, reached the output threshold %d, return",
673
          GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size, numOfClosed, pQuery->rec.threshold);
674

675
      setQueryStatus(pQuery, QUERY_RESBUF_FULL);
676
    } else {
677
      qDebug("QInfo:%p total result window:%d already closed:%d", GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size,
678
             numOfClosed);
679 680
    }
  }
681

682 683 684 685 686
  // output has reached the limitation, set query completed
  if (pQuery->limit.limit > 0 && (pQuery->limit.limit + pQuery->limit.offset) <= numOfClosed &&
      pRuntimeEnv->scanFlag == MASTER_SCAN) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
687

688
  assert(pWindowResInfo->prevSKey != TSKEY_INITIAL_VAL);
689
  return numOfClosed;
690 691 692
}

static int32_t getNumOfRowsInTimeWindow(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo, TSKEY *pPrimaryColumn,
H
hjxilinx 已提交
693
                                        int32_t startPos, TSKEY ekey, __block_search_fn_t searchFn, bool updateLastKey) {
694
  assert(startPos >= 0 && startPos < pDataBlockInfo->rows);
695

696 697 698
  int32_t num = -1;
  int32_t order = pQuery->order.order;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(order);
699

H
hjxilinx 已提交
700
  STableQueryInfo* item = pQuery->current;
701

702 703
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (ekey < pDataBlockInfo->window.ekey) {
704
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
H
Haojun Liao 已提交
705 706
      if (updateLastKey) { // update the last key
        item->lastKey = pPrimaryColumn[startPos + (num - 1)] + step;
707 708
      }
    } else {
709
      num = pDataBlockInfo->rows - startPos;
710
      if (updateLastKey) {
H
hjxilinx 已提交
711
        item->lastKey = pDataBlockInfo->window.ekey + step;
712 713 714 715
      }
    }
  } else {  // desc
    if (ekey > pDataBlockInfo->window.skey) {
716
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
H
Haojun Liao 已提交
717 718
      if (updateLastKey) {  // update the last key
        item->lastKey = pPrimaryColumn[startPos - (num - 1)] + step;
719 720 721 722
      }
    } else {
      num = startPos + 1;
      if (updateLastKey) {
H
hjxilinx 已提交
723
        item->lastKey = pDataBlockInfo->window.skey + step;
724 725 726
      }
    }
  }
727

H
Haojun Liao 已提交
728
  assert(num > 0);
729 730 731
  return num;
}

732
static void doBlockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, bool closed, STimeWindow *pWin,
H
Haojun Liao 已提交
733
                                      int32_t offset, int32_t forwardStep, TSKEY *tsBuf, int32_t numOfTotal) {
734 735
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
736

737
  if (IS_MASTER_SCAN(pRuntimeEnv) || closed) {
738 739
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
740

741 742 743
      pCtx[k].nStartQueryTimestamp = pWin->skey;
      pCtx[k].size = forwardStep;
      pCtx[k].startOffset = (QUERY_IS_ASC_QUERY(pQuery)) ? offset : offset - (forwardStep - 1);
744

745 746 747
      if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
        pCtx[k].ptsList = &tsBuf[offset];
      }
748

749 750 751 752
      // not a whole block involved in query processing, statistics data can not be used
      if (forwardStep != numOfTotal) {
        pCtx[k].preAggVals.isSet = false;
      }
H
Haojun Liao 已提交
753

754 755 756
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunction(&pCtx[k]);
      }
757 758 759 760
    }
  }
}

761
static void doRowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, bool closed, STimeWindow *pWin, int32_t offset) {
762 763
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
764

765
  if (IS_MASTER_SCAN(pRuntimeEnv) || closed) {
766 767
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
      pCtx[k].nStartQueryTimestamp = pWin->skey;
768

769 770 771 772
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunctionF(&pCtx[k], offset);
      }
773 774 775 776
    }
  }
}

H
Haojun Liao 已提交
777 778
static int32_t getNextQualifiedWindow(SQueryRuntimeEnv *pRuntimeEnv, STimeWindow *pNext, SDataBlockInfo *pDataBlockInfo,
    TSKEY *primaryKeys, __block_search_fn_t searchFn, int32_t prevPosition) {
779
  SQuery *pQuery = pRuntimeEnv->pQuery;
780

H
Haojun Liao 已提交
781
  GET_NEXT_TIMEWINDOW(pQuery, pNext);
782

H
Haojun Liao 已提交
783
  // next time window is not in current block
H
Haojun Liao 已提交
784 785
  if ((pNext->skey > pDataBlockInfo->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (pNext->ekey < pDataBlockInfo->window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
H
Haojun Liao 已提交
786 787
    return -1;
  }
788

H
Haojun Liao 已提交
789 790
  TSKEY startKey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
791
    startKey = pNext->skey;
H
Haojun Liao 已提交
792 793
    if (startKey < pQuery->window.skey) {
      startKey = pQuery->window.skey;
794
    }
H
Haojun Liao 已提交
795
  } else {
H
Haojun Liao 已提交
796
    startKey = pNext->ekey;
H
Haojun Liao 已提交
797 798
    if (startKey > pQuery->window.skey) {
      startKey = pQuery->window.skey;
799
    }
H
Haojun Liao 已提交
800
  }
801

H
Haojun Liao 已提交
802 803 804 805 806 807 808 809
  int32_t startPos = 0;
  // tumbling time window query, a special case of sliding time window query
  if (pQuery->slidingTime == pQuery->intervalTime && prevPosition != -1) {
    int32_t factor = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
    startPos = prevPosition + factor;
  } else {
    startPos = searchFn((char *)primaryKeys, pDataBlockInfo->rows, startKey, pQuery->order.order);
  }
810

H
Haojun Liao 已提交
811 812 813 814
  /*
   * This time window does not cover any data, try next time window,
   * this case may happen when the time window is too small
   */
H
Haojun Liao 已提交
815
  if (QUERY_IS_ASC_QUERY(pQuery) && primaryKeys[startPos] > pNext->ekey) {
H
Haojun Liao 已提交
816
    TSKEY next = primaryKeys[startPos];
817

H
Haojun Liao 已提交
818 819 820
    pNext->ekey += ((next - pNext->ekey + pQuery->slidingTime - 1)/pQuery->slidingTime) * pQuery->slidingTime;
    pNext->skey = pNext->ekey - pQuery->intervalTime + 1;
  } else if ((!QUERY_IS_ASC_QUERY(pQuery)) && primaryKeys[startPos] < pNext->skey) {
H
Haojun Liao 已提交
821
    TSKEY next = primaryKeys[startPos];
822

H
Haojun Liao 已提交
823 824
    pNext->skey -= ((pNext->skey - next + pQuery->slidingTime - 1) / pQuery->slidingTime) * pQuery->slidingTime;
    pNext->ekey = pNext->skey + pQuery->intervalTime - 1;
825
  }
826

H
Haojun Liao 已提交
827
  return startPos;
828 829
}

H
Haojun Liao 已提交
830
static FORCE_INLINE TSKEY reviseWindowEkey(SQuery *pQuery, STimeWindow *pWindow) {
831 832 833 834 835 836 837 838 839 840 841 842
  TSKEY ekey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    ekey = pWindow->ekey;
    if (ekey > pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  } else {
    ekey = pWindow->skey;
    if (ekey < pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  }
843

844 845 846
  return ekey;
}

H
hjxilinx 已提交
847 848
//todo binary search
static void* getDataBlockImpl(SArray* pDataBlock, int32_t colId) {
S
TD-1057  
Shengliang Guan 已提交
849
  int32_t numOfCols = (int32_t)taosArrayGetSize(pDataBlock);
850

H
hjxilinx 已提交
851 852 853 854 855 856
  for (int32_t i = 0; i < numOfCols; ++i) {
    SColumnInfoData *p = taosArrayGet(pDataBlock, i);
    if (colId == p->info.colId) {
      return p->pData;
    }
  }
857

H
hjxilinx 已提交
858 859 860 861
  return NULL;
}

static char *getDataBlock(SQueryRuntimeEnv *pRuntimeEnv, SArithmeticSupport *sas, int32_t col, int32_t size,
862
                    SArray *pDataBlock) {
dengyihao's avatar
dengyihao 已提交
863 864 865
  if (pDataBlock == NULL) {
    return NULL;
  }
866

H
Haojun Liao 已提交
867
  char *dataBlock = NULL;
H
Haojun Liao 已提交
868
  SQuery *pQuery = pRuntimeEnv->pQuery;
869
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
870

871
  int32_t functionId = pQuery->pSelectExpr[col].base.functionId;
872
  if (functionId == TSDB_FUNC_ARITHM) {
873
    sas->pArithExpr = &pQuery->pSelectExpr[col];
874

875 876 877 878 879 880
    // set the start offset to be the lowest start position, no matter asc/desc query order
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      pCtx->startOffset = pQuery->pos;
    } else {
      pCtx->startOffset = pQuery->pos - (size - 1);
    }
881

882 883 884 885
    sas->offset  = 0;
    sas->colList = pQuery->colList;
    sas->numOfCols = pQuery->numOfCols;
    sas->data    = calloc(pQuery->numOfCols, POINTER_BYTES);
886

H
Haojun Liao 已提交
887
    if (sas->data == NULL) {
H
Haojun Liao 已提交
888
      finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
H
Haojun Liao 已提交
889 890 891
      longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
    }

892
    // here the pQuery->colList and sas->colList are identical
S
TD-1057  
Shengliang Guan 已提交
893
    int32_t numOfCols = (int32_t)taosArrayGetSize(pDataBlock);
894
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
895
      SColumnInfo *pColMsg = &pQuery->colList[i];
896

897 898 899 900 901 902 903 904
      dataBlock = NULL;
      for (int32_t k = 0; k < numOfCols; ++k) {  //todo refactor
        SColumnInfoData *p = taosArrayGet(pDataBlock, k);
        if (pColMsg->colId == p->info.colId) {
          dataBlock = p->pData;
          break;
        }
      }
905

906
      assert(dataBlock != NULL);
907
      sas->data[i] = dataBlock;  // start from the offset
908
    }
909

910
  } else {  // other type of query function
911
    SColIndex *pCol = &pQuery->pSelectExpr[col].base.colInfo;
H
Haojun Liao 已提交
912
    if (TSDB_COL_IS_NORMAL_COL(pCol->flag)) {
H
Haojun Liao 已提交
913 914 915 916 917
      SColIndex* pColIndex = &pQuery->pSelectExpr[col].base.colInfo;
      SColumnInfoData *p = taosArrayGet(pDataBlock, pColIndex->colIndex);
      assert(p->info.colId == pColIndex->colId);

      dataBlock = p->pData;
H
Haojun Liao 已提交
918 919
    } else {
      dataBlock = NULL;
920 921
    }
  }
922

923 924 925 926
  return dataBlock;
}

/**
H
Haojun Liao 已提交
927
 * todo set the last value for pQueryTableInfo as in rowwiseapplyfunctions
928 929
 * @param pRuntimeEnv
 * @param forwardStep
930
 * @param tsCols
931 932 933 934 935
 * @param pFields
 * @param isDiskFileBlock
 * @return                  the incremental number of output value, so it maybe 0 for fixed number of query,
 *                          such as count/min/max etc.
 */
936
static void blockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis,
937 938
                                       SDataBlockInfo *pDataBlockInfo, SWindowResInfo *pWindowResInfo,
                                       __block_search_fn_t searchFn, SArray *pDataBlock) {
939
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
940 941
  bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);

942 943
  SQuery *pQuery = pRuntimeEnv->pQuery;
  TSKEY  *tsCols = NULL;
944
  if (pDataBlock != NULL) {
945
    SColumnInfoData* pColInfo = taosArrayGet(pDataBlock, 0);
946
    tsCols = (TSKEY *)(pColInfo->pData);
947
  }
948

949
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
H
Haojun Liao 已提交
950
  if (sasArray == NULL) {
H
Haojun Liao 已提交
951
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
H
Haojun Liao 已提交
952 953
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }
954

955
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
956
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
957
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k);
958
  }
959

960
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
961 962
  if (QUERY_IS_INTERVAL_QUERY(pQuery)/* && tsCols != NULL*/) {
    TSKEY ts = TSKEY_INITIAL_VAL;
963

H
Haojun Liao 已提交
964 965 966 967 968 969 970 971
    if (tsCols == NULL) {
      ts = QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->window.skey:pDataBlockInfo->window.ekey;
    } else {
      int32_t offset = GET_COL_DATA_POS(pQuery, 0, step);
      ts = tsCols[offset];
    }

    bool        hasTimeWindow = false;
972
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
H
Haojun Liao 已提交
973 974
    if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &win, masterScan, &hasTimeWindow) !=
        TSDB_CODE_SUCCESS) {
S
Shengliang Guan 已提交
975
      taosTFree(sasArray);
H
hjxilinx 已提交
976
      return;
977
    }
978

H
Haojun Liao 已提交
979 980 981
    int32_t forwardStep = 0;
    int32_t startPos = pQuery->pos;

982
    if (hasTimeWindow) {
H
Haojun Liao 已提交
983
      TSKEY ekey = reviseWindowEkey(pQuery, &win);
H
Haojun Liao 已提交
984
      forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, pQuery->pos, ekey, searchFn, true);
985

986
      bool pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
H
Haojun Liao 已提交
987
      doBlockwiseApplyFunctions(pRuntimeEnv, pStatus, &win, startPos, forwardStep, tsCols, pDataBlockInfo->rows);
988
    }
989

990 991
    int32_t     index = pWindowResInfo->curIndex;
    STimeWindow nextWin = win;
992

993
    while (1) {
H
Haojun Liao 已提交
994 995
      int32_t prevEndPos = (forwardStep - 1) * step + startPos;
      startPos = getNextQualifiedWindow(pRuntimeEnv, &nextWin, pDataBlockInfo, tsCols, searchFn, prevEndPos);
996 997 998
      if (startPos < 0) {
        break;
      }
999

1000
      // null data, failed to allocate more memory buffer
H
Haojun Liao 已提交
1001
      hasTimeWindow = false;
H
Haojun Liao 已提交
1002 1003
      if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &nextWin, masterScan,
                                  &hasTimeWindow) != TSDB_CODE_SUCCESS) {
1004 1005
        break;
      }
1006

1007 1008 1009 1010 1011
      if (!hasTimeWindow) {
        continue;
      }

      TSKEY ekey = reviseWindowEkey(pQuery, &nextWin);
H
Haojun Liao 已提交
1012
      forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, startPos, ekey, searchFn, true);
1013

1014 1015
      bool closed = getTimeWindowResStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
      doBlockwiseApplyFunctions(pRuntimeEnv, closed, &nextWin, startPos, forwardStep, tsCols, pDataBlockInfo->rows);
1016
    }
1017

1018 1019 1020 1021 1022 1023 1024
    pWindowResInfo->curIndex = index;
  } else {
    /*
     * the sqlfunctionCtx parameters should be set done before all functions are invoked,
     * since the selectivity + tag_prj query needs all parameters been set done.
     * tag_prj function are changed to be TSDB_FUNC_TAG_DUMMY
     */
1025
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
1026
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
1027 1028 1029 1030 1031
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunction(&pCtx[k]);
      }
    }
  }
1032

1033 1034 1035 1036
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    if (pQuery->pSelectExpr[i].base.functionId != TSDB_FUNC_ARITHM) {
      continue;
    }
1037

S
Shengliang Guan 已提交
1038
    taosTFree(sasArray[i].data);
1039
  }
1040

S
Shengliang Guan 已提交
1041
  taosTFree(sasArray);
1042 1043 1044 1045 1046 1047
}

static int32_t setGroupResultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, char *pData, int16_t type, int16_t bytes) {
  if (isNull(pData, type)) {  // ignore the null value
    return -1;
  }
1048

1049
  int32_t GROUPRESULTID = 1;
1050

1051
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
1052

1053 1054 1055 1056 1057 1058 1059 1060 1061 1062
  int64_t v = -1;
  // not assign result buffer yet, add new result buffer
  switch(type) {
    case TSDB_DATA_TYPE_BOOL:
    case TSDB_DATA_TYPE_TINYINT:  v = GET_INT8_VAL(pData);  break;
    case TSDB_DATA_TYPE_SMALLINT: v = GET_INT16_VAL(pData); break;
    case TSDB_DATA_TYPE_INT:      v = GET_INT32_VAL(pData); break;
    case TSDB_DATA_TYPE_BIGINT:   v = GET_INT64_VAL(pData); break;
  }

1063
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, &pRuntimeEnv->windowResInfo, pData, bytes, true);
1064 1065 1066
  if (pWindowRes == NULL) {
    return -1;
  }
1067

1068 1069
  pWindowRes->skey = v;
  assert(pRuntimeEnv->windowResInfo.interval == 0);
1070

1071 1072 1073 1074 1075 1076
  if (pWindowRes->pos.pageId == -1) {
    int32_t ret = addNewWindowResultBuf(pWindowRes, pResultBuf, GROUPRESULTID, pRuntimeEnv->numOfRowsPerPage);
    if (ret != 0) {
      return -1;
    }
  }
1077

1078 1079 1080 1081 1082
  setWindowResOutputBuf(pRuntimeEnv, pWindowRes);
  initCtxOutputBuf(pRuntimeEnv);
  return TSDB_CODE_SUCCESS;
}

1083
static char *getGroupbyColumnData(SQuery *pQuery, int16_t *type, int16_t *bytes, SArray* pDataBlock) {
1084
  SSqlGroupbyExpr *pGroupbyExpr = pQuery->pGroupbyExpr;
1085

1086
  for (int32_t k = 0; k < pGroupbyExpr->numOfGroupCols; ++k) {
1087 1088
    SColIndex* pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, k);
    if (pColIndex->flag == TSDB_COL_TAG) {
1089 1090
      continue;
    }
1091

1092
    int16_t colIndex = -1;
1093
    int32_t colId = pColIndex->colId;
1094

1095
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
1096
      if (pQuery->colList[i].colId == colId) {
1097 1098 1099 1100
        colIndex = i;
        break;
      }
    }
1101

1102
    assert(colIndex >= 0 && colIndex < pQuery->numOfCols);
1103

1104 1105
    *type = pQuery->colList[colIndex].type;
    *bytes = pQuery->colList[colIndex].bytes;
1106
    /*
1107 1108 1109
     *  the colIndex is acquired from the first tables of all qualified tables in this vnode during query prepare
     * stage, the remain tables may not have the required column in cache actually. So, the validation of required
     * column in cache with the corresponding schema is reinforced.
1110
     */
S
TD-1057  
Shengliang Guan 已提交
1111
    int32_t numOfCols = (int32_t)taosArrayGetSize(pDataBlock);
1112

1113 1114 1115 1116 1117 1118
    for (int32_t i = 0; i < numOfCols; ++i) {
      SColumnInfoData *p = taosArrayGet(pDataBlock, i);
      if (pColIndex->colId == p->info.colId) {
        return p->pData;
      }
    }
1119
  }
1120

1121
  return NULL;
1122 1123 1124 1125
}

static int32_t doTSJoinFilter(SQueryRuntimeEnv *pRuntimeEnv, int32_t offset) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
1126

1127 1128
  STSElem         elem = tsBufGetElem(pRuntimeEnv->pTSBuf);
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1129

1130
  // compare tag first
1131
  if (tVariantCompare(&pCtx[0].tag, &elem.tag) != 0) {
1132 1133
    return TS_JOIN_TAG_NOT_EQUALS;
  }
1134

S
TD-1057  
Shengliang Guan 已提交
1135
  TSKEY key = *(TSKEY *)((char*)pCtx[0].aInputElemBuf + TSDB_KEYSIZE * offset);
1136 1137

#if defined(_DEBUG_VIEW)
1138 1139
  printf("elem in comp ts file:%" PRId64 ", key:%" PRId64 ", tag:%"PRIu64", query order:%d, ts order:%d, traverse:%d, index:%d\n",
         elem.ts, key, elem.tag, pQuery->order.order, pRuntimeEnv->pTSBuf->tsOrder,
1140 1141
         pRuntimeEnv->pTSBuf->cur.order, pRuntimeEnv->pTSBuf->cur.tsIndex);
#endif
1142

1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (key < elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key > elem.ts) {
      assert(false);
    }
  } else {
    if (key > elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key < elem.ts) {
      assert(false);
    }
  }
1156

1157 1158 1159 1160 1161
  return TS_JOIN_TS_EQUAL;
}

static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId) {
  SResultInfo *pResInfo = GET_RES_INFO(pCtx);
H
hjxilinx 已提交
1162
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
1163 1164 1165 1166 1167

  // in case of timestamp column, always generated results.
  if (functionId == TSDB_FUNC_TS) {
    return true;
  }
1168

1169 1170 1171
  if (pResInfo->complete || functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TS_DUMMY) {
    return false;
  }
1172

1173
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_FIRST) {
H
hjxilinx 已提交
1174 1175
    return QUERY_IS_ASC_QUERY(pQuery);
  }
1176 1177 1178 1179 1180 1181

  // todo add comments
  if ((functionId == TSDB_FUNC_LAST_DST || functionId == TSDB_FUNC_LAST)) {
    return pCtx->param[0].i64Key == pQuery->order.order;
  }

1182
  // in the supplementary scan, only the following functions need to be executed
H
Haojun Liao 已提交
1183
  if (IS_REVERSE_SCAN(pRuntimeEnv)) {
1184 1185
    return false;
  }
1186

1187 1188 1189
  return true;
}

1190 1191
static void rowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis, SDataBlockInfo *pDataBlockInfo,
    SWindowResInfo *pWindowResInfo, SArray *pDataBlock) {
1192
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1193
  bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);
1194

1195
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1196
  STableQueryInfo* item = pQuery->current;
H
Haojun Liao 已提交
1197 1198 1199 1200

  SColumnInfoData* pColumnInfoData = (SColumnInfoData *)taosArrayGet(pDataBlock, 0);

  TSKEY  *tsCols = (pColumnInfoData->info.type == TSDB_DATA_TYPE_TIMESTAMP)? (TSKEY*) pColumnInfoData->pData:NULL;
H
Haojun Liao 已提交
1201 1202
  bool    groupbyColumnValue = pRuntimeEnv->groupbyNormalCol;

1203
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
H
Haojun Liao 已提交
1204
  if (sasArray == NULL) {
H
Haojun Liao 已提交
1205
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
H
Haojun Liao 已提交
1206 1207
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }
1208

1209 1210
  int16_t type = 0;
  int16_t bytes = 0;
1211

1212
  char *groupbyColumnData = NULL;
H
Haojun Liao 已提交
1213
  if (groupbyColumnValue) {
1214
    groupbyColumnData = getGroupbyColumnData(pQuery, &type, &bytes, pDataBlock);
1215
  }
1216

1217
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
1218
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
1219
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k);
1220
  }
1221

1222 1223
  // set the input column data
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
1224
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
H
hjxilinx 已提交
1225 1226
    pFilterInfo->pData = getDataBlockImpl(pDataBlock, pFilterInfo->info.colId);
    assert(pFilterInfo->pData != NULL);
1227
  }
1228

1229
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1230

1231 1232 1233
  // from top to bottom in desc
  // from bottom to top in asc order
  if (pRuntimeEnv->pTSBuf != NULL) {
H
Haojun Liao 已提交
1234
    SQInfo *pQInfo = (SQInfo *)GET_QINFO_ADDR(pRuntimeEnv);
1235
    qDebug("QInfo:%p process data rows, numOfRows:%d, query order:%d, ts comp order:%d", pQInfo, pDataBlockInfo->rows,
1236 1237
           pQuery->order.order, pRuntimeEnv->pTSBuf->cur.order);
  }
1238

1239
  int32_t j = 0;
H
hjxilinx 已提交
1240
  int32_t offset = -1;
1241

1242
  for (j = 0; j < pDataBlockInfo->rows; ++j) {
H
hjxilinx 已提交
1243
    offset = GET_COL_DATA_POS(pQuery, j, step);
1244

1245 1246 1247 1248 1249 1250 1251 1252 1253 1254
    if (pRuntimeEnv->pTSBuf != NULL) {
      int32_t r = doTSJoinFilter(pRuntimeEnv, offset);
      if (r == TS_JOIN_TAG_NOT_EQUALS) {
        break;
      } else if (r == TS_JOIN_TS_NOT_EQUALS) {
        continue;
      } else {
        assert(r == TS_JOIN_TS_EQUAL);
      }
    }
1255

1256
    if (pQuery->numOfFilterCols > 0 && (!doFilterData(pQuery, offset))) {
1257 1258
      continue;
    }
1259

1260
    // interval window query, decide the time window according to the primary timestamp
H
Haojun Liao 已提交
1261
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1262
      int64_t     ts = tsCols[offset];
1263
      STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
1264

1265 1266
      bool hasTimeWindow = false;
      int32_t ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &win, masterScan, &hasTimeWindow);
1267 1268 1269
      if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
        continue;
      }
1270

1271 1272 1273 1274
      if (!hasTimeWindow) {
        continue;
      }

1275 1276
      bool closed = getTimeWindowResStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
      doRowwiseApplyFunctions(pRuntimeEnv, closed, &win, offset);
1277

1278 1279
      STimeWindow nextWin = win;
      int32_t     index = pWindowResInfo->curIndex;
1280

1281
      while (1) {
H
Haojun Liao 已提交
1282
        GET_NEXT_TIMEWINDOW(pQuery, &nextWin);
1283
        if ((nextWin.skey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
H
Haojun Liao 已提交
1284
            (nextWin.skey < pQuery->window.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
1285 1286
          break;
        }
1287

1288 1289 1290
        if (ts < nextWin.skey || ts > nextWin.ekey) {
          break;
        }
1291

1292
        // null data, failed to allocate more memory buffer
H
Haojun Liao 已提交
1293
        hasTimeWindow = false;
1294
        if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &nextWin, masterScan, &hasTimeWindow) != TSDB_CODE_SUCCESS) {
1295 1296
          break;
        }
1297

1298
        if (hasTimeWindow) {
1299 1300
          closed = getTimeWindowResStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
          doRowwiseApplyFunctions(pRuntimeEnv, closed, &nextWin, offset);
1301
        }
1302
      }
1303

1304 1305 1306
      pWindowResInfo->curIndex = index;
    } else {  // other queries
      // decide which group this rows belongs to according to current state value
H
Haojun Liao 已提交
1307
      if (groupbyColumnValue) {
H
hjxilinx 已提交
1308
        char *val = groupbyColumnData + bytes * offset;
1309

H
hjxilinx 已提交
1310
        int32_t ret = setGroupResultOutputBuf(pRuntimeEnv, val, type, bytes);
1311 1312 1313 1314
        if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
          continue;
        }
      }
1315

1316
      for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
1317
        int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
1318 1319 1320 1321 1322
        if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
          aAggs[functionId].xFunctionF(&pCtx[k], offset);
        }
      }
    }
1323

1324 1325 1326
    if (pRuntimeEnv->pTSBuf != NULL) {
      // if timestamp filter list is empty, quit current query
      if (!tsBufNextPos(pRuntimeEnv->pTSBuf)) {
H
hjxilinx 已提交
1327
        setQueryStatus(pQuery, QUERY_COMPLETED);
1328 1329 1330 1331
        break;
      }
    }
  }
H
Haojun Liao 已提交
1332 1333 1334 1335 1336 1337 1338 1339

  assert(offset >= 0);
  if (tsCols != NULL) {
    item->lastKey = tsCols[offset] + step;
  } else {
    item->lastKey = (QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->window.ekey:pDataBlockInfo->window.skey) + step;
  }

1340 1341 1342 1343 1344
  // todo refactor: extract method
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    if (pQuery->pSelectExpr[i].base.functionId != TSDB_FUNC_ARITHM) {
      continue;
    }
1345

S
Shengliang Guan 已提交
1346
    taosTFree(sasArray[i].data);
1347
  }
1348

1349 1350 1351 1352
  free(sasArray);
}

static int32_t tableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo,
H
hjxilinx 已提交
1353
                                          SDataStatis *pStatis, __block_search_fn_t searchFn, SArray *pDataBlock) {
H
hjxilinx 已提交
1354
  SQuery *pQuery = pRuntimeEnv->pQuery;
1355

H
hjxilinx 已提交
1356 1357
  STableQueryInfo* pTableQInfo = pQuery->current;
  SWindowResInfo*  pWindowResInfo = &pRuntimeEnv->windowResInfo;
1358

H
Haojun Liao 已提交
1359
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->groupbyNormalCol) {
1360
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
1361
  } else {
1362
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
1363
  }
1364

1365
  // update the lastkey of current table
1366
  TSKEY lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pDataBlockInfo->window.ekey : pDataBlockInfo->window.skey;
H
hjxilinx 已提交
1367
  pTableQInfo->lastKey = lastKey + GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1368

1369
  // interval query with limit applied
1370
  int32_t numOfRes = 0;
H
Haojun Liao 已提交
1371
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1372 1373
    numOfRes = doCheckQueryCompleted(pRuntimeEnv, lastKey, pWindowResInfo);
  } else {
S
TD-1057  
Shengliang Guan 已提交
1374
    numOfRes = (int32_t)getNumOfResult(pRuntimeEnv);
1375

1376 1377 1378 1379
    // update the number of output result
    if (numOfRes > 0 && pQuery->checkBuffer == 1) {
      assert(numOfRes >= pQuery->rec.rows);
      pQuery->rec.rows = numOfRes;
1380

1381 1382 1383
      if (numOfRes >= pQuery->rec.threshold) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
      }
1384

1385 1386 1387
      if ((pQuery->limit.limit >= 0) && (pQuery->limit.limit + pQuery->limit.offset) <= numOfRes) {
        setQueryStatus(pQuery, QUERY_COMPLETED);
      }
1388 1389 1390 1391 1392

      if (((pTableQInfo->lastKey > pTableQInfo->win.ekey) && QUERY_IS_ASC_QUERY(pQuery)) ||
          ((pTableQInfo->lastKey < pTableQInfo->win.ekey) && (!QUERY_IS_ASC_QUERY(pQuery)))) {
        setQueryStatus(pQuery, QUERY_COMPLETED);
      }
H
Haojun Liao 已提交
1393
    }
1394
  }
1395

1396
  return numOfRes;
1397 1398
}

H
Haojun Liao 已提交
1399
void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
1400
                   SDataStatis *pStatis, void *param, int32_t colIndex) {
1401

1402 1403
  int32_t functionId = pQuery->pSelectExpr[colIndex].base.functionId;
  int32_t colId = pQuery->pSelectExpr[colIndex].base.colInfo.colId;
1404

1405
  SDataStatis *tpField = NULL;
H
Haojun Liao 已提交
1406
  pCtx->hasNull = hasNullValue(&pQuery->pSelectExpr[colIndex].base.colInfo, pStatis, &tpField);
1407
  pCtx->aInputElemBuf = inputData;
1408

1409
  if (tpField != NULL) {
H
Haojun Liao 已提交
1410
    pCtx->preAggVals.isSet  = true;
1411 1412
    pCtx->preAggVals.statis = *tpField;
    assert(pCtx->preAggVals.statis.numOfNull <= pBlockInfo->rows);
1413 1414 1415
  } else {
    pCtx->preAggVals.isSet = false;
  }
1416

H
Haojun Liao 已提交
1417 1418
  pCtx->preAggVals.dataBlockLoaded = (inputData != NULL);

H
Haojun Liao 已提交
1419 1420 1421
  // limit/offset query will affect this value
  pCtx->startOffset = QUERY_IS_ASC_QUERY(pQuery) ? pQuery->pos:0;
  pCtx->size = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->rows - pQuery->pos : pQuery->pos + 1;
1422

1423 1424
  uint32_t status = aAggs[functionId].nStatus;
  if (((status & (TSDB_FUNCSTATE_SELECTIVITY | TSDB_FUNCSTATE_NEED_TS)) != 0) && (tsCol != NULL)) {
H
Haojun Liao 已提交
1425
    pCtx->ptsList = tsCol;
1426
  }
1427

1428 1429 1430 1431 1432
  if (functionId >= TSDB_FUNC_FIRST_DST && functionId <= TSDB_FUNC_LAST_DST) {
    // last_dist or first_dist function
    // store the first&last timestamp into the intermediate buffer [1], the true
    // value may be null but timestamp will never be null
  } else if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_TWA ||
1433
             functionId == TSDB_FUNC_DIFF || (functionId >= TSDB_FUNC_RATE && functionId <= TSDB_FUNC_AVG_IRATE)) {
1434
    /*
H
Haojun Liao 已提交
1435
     * least squares function needs two columns of input, currently, the x value of linear equation is set to
1436 1437 1438 1439 1440 1441 1442 1443 1444 1445
     * timestamp column, and the y-value is the column specified in pQuery->pSelectExpr[i].colIdxInBuffer
     *
     * top/bottom function needs timestamp to indicate when the
     * top/bottom values emerge, so does diff function
     */
    if (functionId == TSDB_FUNC_TWA) {
      STwaInfo *pTWAInfo = GET_RES_INFO(pCtx)->interResultBuf;
      pTWAInfo->SKey = pQuery->window.skey;
      pTWAInfo->EKey = pQuery->window.ekey;
    }
1446

1447 1448
  } else if (functionId == TSDB_FUNC_ARITHM) {
    pCtx->param[1].pz = param;
H
Haojun Liao 已提交
1449 1450 1451 1452 1453 1454
  } else if (functionId == TSDB_FUNC_SPREAD) {  // set the statistics data for primary time stamp column
    if (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
      pCtx->preAggVals.isSet  = true;
      pCtx->preAggVals.statis.min = pBlockInfo->window.skey;
      pCtx->preAggVals.statis.max = pBlockInfo->window.ekey;
    }
1455 1456
  } else if (functionId == TSDB_FUNC_INTERP) {
    SInterpInfoDetail *pInterpInfo = GET_RES_INFO(pCtx)->interResultBuf;
S
TD-1057  
Shengliang Guan 已提交
1457
    pInterpInfo->type = (int8_t)pQuery->fillType;
1458 1459
    pInterpInfo->ts = pQuery->window.skey;
    pInterpInfo->primaryCol = (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX);
1460

1461 1462 1463 1464 1465 1466 1467
    if (pQuery->fillVal != NULL) {
      if (isNull((const char*) &pQuery->fillVal[colIndex], pCtx->inputType)) {
        pCtx->param[1].nType = TSDB_DATA_TYPE_NULL;
      } else { // todo refactor, tVariantCreateFromBinary should handle the NULL value
        tVariantCreateFromBinary(&pCtx->param[1], (char*) &pQuery->fillVal[colIndex], pCtx->inputBytes, pCtx->inputType);
      }
    }
1468
  }
1469

1470 1471 1472 1473 1474 1475
#if defined(_DEBUG_VIEW)
  //  int64_t *tsList = (int64_t *)primaryColumnData;
//  int64_t  s = tsList[0];
//  int64_t  e = tsList[size - 1];

//    if (IS_DATA_BLOCK_LOADED(blockStatus)) {
1476
//        qDebug("QInfo:%p query ts:%lld-%lld, offset:%d, rows:%d, bstatus:%d,
1477 1478 1479
//        functId:%d", GET_QINFO_ADDR(pQuery),
//               s, e, startOffset, size, blockStatus, functionId);
//    } else {
1480
//        qDebug("QInfo:%p block not loaded, bstatus:%d",
1481 1482 1483 1484 1485 1486
//        GET_QINFO_ADDR(pQuery), blockStatus);
//    }
#endif
}

// set the output buffer for the selectivity + tag query
H
Haojun Liao 已提交
1487
static int32_t setCtxTagColumnInfo(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx) {
H
Haojun Liao 已提交
1488 1489
  SQuery* pQuery = pRuntimeEnv->pQuery;

1490
  if (isSelectivityWithTagsQuery(pQuery)) {
1491
    int32_t num = 0;
1492
    int16_t tagLen = 0;
1493

1494
    SQLFunctionCtx *p = NULL;
1495
    SQLFunctionCtx **pTagCtx = calloc(pQuery->numOfOutput, POINTER_BYTES);
H
Haojun Liao 已提交
1496 1497 1498
    if (pTagCtx == NULL) {
      return TSDB_CODE_QRY_OUT_OF_MEMORY;
    }
H
Haojun Liao 已提交
1499

1500
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1501
      SSqlFuncMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].base;
1502

1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515
      if (pSqlFuncMsg->functionId == TSDB_FUNC_TAG_DUMMY || pSqlFuncMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        tagLen += pCtx[i].outputBytes;
        pTagCtx[num++] = &pCtx[i];
      } else if ((aAggs[pSqlFuncMsg->functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
        p = &pCtx[i];
      } else if (pSqlFuncMsg->functionId == TSDB_FUNC_TS || pSqlFuncMsg->functionId == TSDB_FUNC_TAG) {
        // tag function may be the group by tag column
        // ts may be the required primary timestamp column
        continue;
      } else {
        // the column may be the normal column, group by normal_column, the functionId is TSDB_FUNC_PRJ
      }
    }
dengyihao's avatar
dengyihao 已提交
1516 1517 1518 1519 1520
    if (p != NULL) {
      p->tagInfo.pTagCtxList = pTagCtx;
      p->tagInfo.numOfTagCols = num;
      p->tagInfo.tagsLen = tagLen;
    } else {
1521
      taosTFree(pTagCtx);
dengyihao's avatar
dengyihao 已提交
1522
    }
1523
  }
H
Haojun Liao 已提交
1524 1525

  return TSDB_CODE_SUCCESS;
1526 1527
}

H
Haojun Liao 已提交
1528 1529
static FORCE_INLINE void setWindowResultInfo(SResultInfo *pResultInfo, SQuery *pQuery, bool isStableQuery, char* buf) {
  char* p = buf;
1530
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1531 1532 1533 1534
    int32_t size = pQuery->pSelectExpr[i].interBytes;
    setResultInfoBuf(&pResultInfo[i], size, isStableQuery, p);

    p += size;
1535 1536 1537
  }
}

1538
static int32_t setupQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv, int16_t order) {
1539
  qDebug("QInfo:%p setup runtime env", GET_QINFO_ADDR(pRuntimeEnv));
1540 1541
  SQuery *pQuery = pRuntimeEnv->pQuery;

H
Haojun Liao 已提交
1542 1543 1544
  size_t size = pRuntimeEnv->interBufSize + pQuery->numOfOutput * sizeof(SResultInfo);

  pRuntimeEnv->resultInfo = calloc(1, size);
1545
  pRuntimeEnv->pCtx = (SQLFunctionCtx *)calloc(pQuery->numOfOutput, sizeof(SQLFunctionCtx));
1546

1547
  if (pRuntimeEnv->resultInfo == NULL || pRuntimeEnv->pCtx == NULL) {
1548
    goto _clean;
1549
  }
1550

1551
  pRuntimeEnv->offset[0] = 0;
1552
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1553
    SSqlFuncMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].base;
1554

1555
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1556
    SColIndex* pIndex = &pSqlFuncMsg->colInfo;
1557

1558 1559
    int32_t index = pSqlFuncMsg->colInfo.colIndex;
    if (TSDB_COL_IS_TAG(pIndex->flag)) {
1560
      if (pIndex->colId == TSDB_TBNAME_COLUMN_INDEX) {  // todo refactor
H
Haojun Liao 已提交
1561 1562 1563 1564
        SSchema s = tGetTableNameColumnSchema();

        pCtx->inputBytes = s.bytes;
        pCtx->inputType = s.type;
1565 1566 1567 1568
      } else {
        pCtx->inputBytes = pQuery->tagColList[index].bytes;
        pCtx->inputType = pQuery->tagColList[index].type;
      }
H
Haojun Liao 已提交
1569 1570 1571
    } else if (TSDB_COL_IS_UD_COL(pIndex->flag)) {
      pCtx->inputBytes = pSqlFuncMsg->arg[0].argBytes;
      pCtx->inputType = pSqlFuncMsg->arg[0].argType;
1572 1573 1574 1575
    } else {
      pCtx->inputBytes = pQuery->colList[index].bytes;
      pCtx->inputType = pQuery->colList[index].type;
    }
1576

1577
    assert(isValidDataType(pCtx->inputType));
1578
    pCtx->ptsOutputBuf = NULL;
1579

1580 1581
    pCtx->outputBytes = pQuery->pSelectExpr[i].bytes;
    pCtx->outputType = pQuery->pSelectExpr[i].type;
1582

1583 1584
    pCtx->order = pQuery->order.order;
    pCtx->functionId = pSqlFuncMsg->functionId;
1585

1586 1587 1588 1589 1590
    pCtx->numOfParams = pSqlFuncMsg->numOfParams;
    for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
      int16_t type = pSqlFuncMsg->arg[j].argType;
      int16_t bytes = pSqlFuncMsg->arg[j].argBytes;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
1591
        tVariantCreateFromBinary(&pCtx->param[j], pSqlFuncMsg->arg[j].argValue.pz, bytes, type);
1592 1593 1594 1595
      } else {
        tVariantCreateFromBinary(&pCtx->param[j], (char *)&pSqlFuncMsg->arg[j].argValue.i64, bytes, type);
      }
    }
1596

1597 1598
    // set the order information for top/bottom query
    int32_t functionId = pCtx->functionId;
1599

1600
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
1601
      int32_t f = pQuery->pSelectExpr[0].base.functionId;
1602
      assert(f == TSDB_FUNC_TS || f == TSDB_FUNC_TS_DUMMY);
1603

1604 1605 1606 1607
      pCtx->param[2].i64Key = order;
      pCtx->param[2].nType = TSDB_DATA_TYPE_BIGINT;
      pCtx->param[3].i64Key = functionId;
      pCtx->param[3].nType = TSDB_DATA_TYPE_BIGINT;
1608

1609 1610
      pCtx->param[1].i64Key = pQuery->order.orderColId;
    }
1611

1612 1613 1614 1615
    if (i > 0) {
      pRuntimeEnv->offset[i] = pRuntimeEnv->offset[i - 1] + pRuntimeEnv->pCtx[i - 1].outputBytes;
    }
  }
1616

H
Haojun Liao 已提交
1617
  char* buf = (char*) pRuntimeEnv->resultInfo + sizeof(SResultInfo) * pQuery->numOfOutput;
H
Haojun Liao 已提交
1618

1619
  // set the intermediate result output buffer
H
Haojun Liao 已提交
1620
  setWindowResultInfo(pRuntimeEnv->resultInfo, pQuery, pRuntimeEnv->stableQuery, buf);
1621

1622
  // if it is group by normal column, do not set output buffer, the output buffer is pResult
1623
  if (!pRuntimeEnv->groupbyNormalCol && !pRuntimeEnv->stableQuery) {
1624 1625
    resetCtxOutputBuf(pRuntimeEnv);
  }
1626

H
Haojun Liao 已提交
1627 1628 1629
  if (setCtxTagColumnInfo(pRuntimeEnv, pRuntimeEnv->pCtx) != TSDB_CODE_SUCCESS) {
    goto _clean;
  }
1630

H
Haojun Liao 已提交
1631
  qDebug("QInfo:%p init runtime completed", GET_QINFO_ADDR(pRuntimeEnv));
1632
  return TSDB_CODE_SUCCESS;
1633

1634
_clean:
S
Shengliang Guan 已提交
1635 1636
  taosTFree(pRuntimeEnv->resultInfo);
  taosTFree(pRuntimeEnv->pCtx);
1637

1638
  return TSDB_CODE_QRY_OUT_OF_MEMORY;
1639 1640 1641 1642 1643 1644
}

static void teardownQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv) {
  if (pRuntimeEnv->pQuery == NULL) {
    return;
  }
1645

1646
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1647
  SQInfo* pQInfo = (SQInfo*) GET_QINFO_ADDR(pRuntimeEnv);
1648

1649
  qDebug("QInfo:%p teardown runtime env", pQInfo);
H
Haojun Liao 已提交
1650
  cleanupTimeWindowInfo(&pRuntimeEnv->windowResInfo);
1651

1652
  if (pRuntimeEnv->pCtx != NULL) {
1653
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1654
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1655

1656 1657 1658
      for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
        tVariantDestroy(&pCtx->param[j]);
      }
1659

1660
      tVariantDestroy(&pCtx->tag);
S
Shengliang Guan 已提交
1661
      taosTFree(pCtx->tagInfo.pTagCtxList);
1662
    }
1663

S
Shengliang Guan 已提交
1664 1665
    taosTFree(pRuntimeEnv->resultInfo);
    taosTFree(pRuntimeEnv->pCtx);
1666
  }
1667

H
Haojun Liao 已提交
1668
  pRuntimeEnv->pFillInfo = taosDestoryFillInfo(pRuntimeEnv->pFillInfo);
1669

H
Haojun Liao 已提交
1670
  destroyResultBuf(pRuntimeEnv->pResultBuf);
1671
  tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
1672
  tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
1673

H
Haojun Liao 已提交
1674
  pRuntimeEnv->pTSBuf = tsBufDestroy(pRuntimeEnv->pTSBuf);
1675 1676
}

H
Haojun Liao 已提交
1677
#define IS_QUERY_KILLED(_q) ((_q)->code == TSDB_CODE_TSC_QUERY_CANCELLED)
1678

H
Haojun Liao 已提交
1679
static void setQueryKilled(SQInfo *pQInfo) { pQInfo->code = TSDB_CODE_TSC_QUERY_CANCELLED;}
H
hjxilinx 已提交
1680

H
Haojun Liao 已提交
1681 1682 1683
static bool isFixedOutputQuery(SQueryRuntimeEnv* pRuntimeEnv) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1684 1685
    return false;
  }
1686

1687
  // Note:top/bottom query is fixed output query
H
Haojun Liao 已提交
1688
  if (pRuntimeEnv->topBotQuery || pRuntimeEnv->groupbyNormalCol) {
1689 1690
    return true;
  }
1691

1692
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1693
    SSqlFuncMsg *pExprMsg = &pQuery->pSelectExpr[i].base;
1694

1695 1696
    // ignore the ts_comp function
    if (i == 0 && pExprMsg->functionId == TSDB_FUNC_PRJ && pExprMsg->numOfParams == 1 &&
1697
        pExprMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
1698 1699
      continue;
    }
1700

1701 1702 1703
    if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
1704

1705 1706 1707 1708
    if (!IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus)) {
      return true;
    }
  }
1709

1710 1711 1712
  return false;
}

1713
// todo refactor with isLastRowQuery
H
hjxilinx 已提交
1714
static bool isPointInterpoQuery(SQuery *pQuery) {
1715
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1716
    int32_t functionID = pQuery->pSelectExpr[i].base.functionId;
1717
    if (functionID == TSDB_FUNC_INTERP) {
1718 1719 1720
      return true;
    }
  }
1721

1722 1723 1724 1725
  return false;
}

// TODO REFACTOR:MERGE WITH CLIENT-SIDE FUNCTION
H
hjxilinx 已提交
1726
static bool isSumAvgRateQuery(SQuery *pQuery) {
1727
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1728
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1729 1730 1731
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
1732

1733 1734 1735 1736 1737
    if (functionId == TSDB_FUNC_SUM_RATE || functionId == TSDB_FUNC_SUM_IRATE || functionId == TSDB_FUNC_AVG_RATE ||
        functionId == TSDB_FUNC_AVG_IRATE) {
      return true;
    }
  }
1738

1739 1740 1741
  return false;
}

H
hjxilinx 已提交
1742
static bool isFirstLastRowQuery(SQuery *pQuery) {
1743
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1744
    int32_t functionID = pQuery->pSelectExpr[i].base.functionId;
1745 1746 1747 1748
    if (functionID == TSDB_FUNC_LAST_ROW) {
      return true;
    }
  }
1749

1750 1751 1752
  return false;
}

H
hjxilinx 已提交
1753
static bool needReverseScan(SQuery *pQuery) {
1754
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1755
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1756 1757 1758
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG) {
      continue;
    }
1759

1760
    if ((functionId == TSDB_FUNC_FIRST || functionId == TSDB_FUNC_FIRST_DST) && !QUERY_IS_ASC_QUERY(pQuery)) {
1761 1762
      return true;
    }
1763 1764

    if (functionId == TSDB_FUNC_LAST || functionId == TSDB_FUNC_LAST_DST) {
S
TD-1057  
Shengliang Guan 已提交
1765
      int32_t order = (int32_t)pQuery->pSelectExpr[i].base.arg->argValue.i64;
1766 1767
      return order != pQuery->order.order;
    }
1768
  }
1769

1770 1771
  return false;
}
H
hjxilinx 已提交
1772

H
Haojun Liao 已提交
1773 1774 1775 1776
/**
 * The following 4 kinds of query are treated as the tags query
 * tagprj, tid_tag query, count(tbname), 'abc' (user defined constant value column) query
 */
H
hjxilinx 已提交
1777 1778
static bool onlyQueryTags(SQuery* pQuery) {
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1779 1780 1781
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];

    int32_t functionId = pExprInfo->base.functionId;
H
Haojun Liao 已提交
1782 1783 1784 1785 1786

    if (functionId != TSDB_FUNC_TAGPRJ &&
        functionId != TSDB_FUNC_TID_TAG &&
        (!(functionId == TSDB_FUNC_COUNT && pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX)) &&
        (!(functionId == TSDB_FUNC_PRJ && pExprInfo->base.colInfo.flag == TSDB_COL_UDC))) {
H
hjxilinx 已提交
1787 1788 1789
      return false;
    }
  }
1790

H
hjxilinx 已提交
1791 1792 1793
  return true;
}

1794 1795
/////////////////////////////////////////////////////////////////////////////////////////////

H
Haojun Liao 已提交
1796
void getAlignQueryTimeWindow(SQuery *pQuery, int64_t key, int64_t keyFirst, int64_t keyLast, STimeWindow *win) {
1797
  assert(key >= keyFirst && key <= keyLast && pQuery->slidingTime <= pQuery->intervalTime);
H
Haojun Liao 已提交
1798
  win->skey = taosGetIntervalStartTimestamp(key, pQuery->slidingTime, pQuery->intervalTime, pQuery->slidingTimeUnit, pQuery->precision);
H
Haojun Liao 已提交
1799 1800 1801 1802 1803

  /*
   * if the realSkey > INT64_MAX - pQuery->intervalTime, the query duration between
   * realSkey and realEkey must be less than one interval.Therefore, no need to adjust the query ranges.
   */
1804 1805 1806 1807
  if (keyFirst > (INT64_MAX - pQuery->intervalTime)) {
    assert(keyLast - keyFirst < pQuery->intervalTime);
    win->ekey = INT64_MAX;
    return;
H
Haojun Liao 已提交
1808 1809
  } else {
    win->ekey = win->skey + pQuery->intervalTime - 1;
1810 1811 1812 1813 1814
  }
}

static void setScanLimitationByResultBuffer(SQuery *pQuery) {
  if (isTopBottomQuery(pQuery)) {
1815
    pQuery->checkBuffer = 0;
1816
  } else if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
1817
    pQuery->checkBuffer = 0;
1818 1819
  } else {
    bool hasMultioutput = false;
1820
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1821
      SSqlFuncMsg *pExprMsg = &pQuery->pSelectExpr[i].base;
1822 1823 1824
      if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        continue;
      }
1825

1826 1827 1828 1829 1830
      hasMultioutput = IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus);
      if (!hasMultioutput) {
        break;
      }
    }
1831

1832
    pQuery->checkBuffer = hasMultioutput ? 1 : 0;
1833 1834 1835 1836 1837 1838
  }
}

/*
 * todo add more parameters to check soon..
 */
1839
bool colIdCheck(SQuery *pQuery) {
1840 1841
  // load data column information is incorrect
  for (int32_t i = 0; i < pQuery->numOfCols - 1; ++i) {
1842
    if (pQuery->colList[i].colId == pQuery->colList[i + 1].colId) {
S
slguan 已提交
1843
      qError("QInfo:%p invalid data load column for query", GET_QINFO_ADDR(pQuery));
1844 1845 1846
      return false;
    }
  }
1847

1848 1849 1850 1851 1852 1853
  return true;
}

// todo ignore the avg/sum/min/max/count/stddev/top/bottom functions, of which
// the scan order is not matter
static bool onlyOneQueryType(SQuery *pQuery, int32_t functId, int32_t functIdDst) {
1854
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1855
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1856

1857 1858 1859 1860
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG ||
        functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
1861

1862 1863 1864 1865
    if (functionId != functId && functionId != functIdDst) {
      return false;
    }
  }
1866

1867 1868 1869 1870 1871 1872 1873
  return true;
}

static bool onlyFirstQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_FIRST, TSDB_FUNC_FIRST_DST); }

static bool onlyLastQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_LAST, TSDB_FUNC_LAST_DST); }

H
Haojun Liao 已提交
1874 1875 1876 1877 1878 1879
// todo refactor, add iterator
static void doExchangeTimeWindow(SQInfo* pQInfo) {
  size_t t = GET_NUM_OF_TABLEGROUP(pQInfo);
  for(int32_t i = 0; i < t; ++i) {
    SArray* p1 = GET_TABLEGROUP(pQInfo, i);

H
Haojun Liao 已提交
1880
    SArray* tableKeyGroup = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, i);
H
Haojun Liao 已提交
1881 1882 1883 1884
    size_t len = taosArrayGetSize(p1);
    for(int32_t j = 0; j < len; ++j) {
      STableQueryInfo* pTableQueryInfo = (STableQueryInfo*) taosArrayGetP(p1, j);
      SWAP(pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, TSKEY);
H
Haojun Liao 已提交
1885 1886 1887

      STableKeyInfo* pInfo = taosArrayGet(tableKeyGroup, j);
      pInfo->lastKey = pTableQueryInfo->win.skey;
H
Haojun Liao 已提交
1888 1889 1890 1891
    }
  }
}

H
Haojun Liao 已提交
1892 1893 1894
static void changeExecuteScanOrder(SQInfo *pQInfo, bool stableQuery) {
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;

1895 1896 1897
  // in case of point-interpolation query, use asc order scan
  char msg[] = "QInfo:%p scan order changed for %s query, old:%d, new:%d, qrange exchanged, old qrange:%" PRId64
               "-%" PRId64 ", new qrange:%" PRId64 "-%" PRId64;
1898

1899 1900 1901
  // todo handle the case the the order irrelevant query type mixed up with order critical query type
  // descending order query for last_row query
  if (isFirstLastRowQuery(pQuery)) {
1902
    qDebug("QInfo:%p scan order changed for last_row query, old:%d, new:%d", GET_QINFO_ADDR(pQuery),
H
Haojun Liao 已提交
1903
           pQuery->order.order, TSDB_ORDER_ASC);
1904

H
Haojun Liao 已提交
1905 1906 1907 1908
    pQuery->order.order = TSDB_ORDER_ASC;
    if (pQuery->window.skey > pQuery->window.ekey) {
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
1909 1910
    return;
  }
1911

1912 1913
  if (isPointInterpoQuery(pQuery) && pQuery->intervalTime == 0) {
    if (!QUERY_IS_ASC_QUERY(pQuery)) {
1914
      qDebug(msg, GET_QINFO_ADDR(pQuery), "interp", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
1915
             pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);
1916 1917
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
1918

1919
    pQuery->order.order = TSDB_ORDER_ASC;
1920 1921
    return;
  }
1922

1923 1924 1925
  if (pQuery->intervalTime == 0) {
    if (onlyFirstQuery(pQuery)) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
1926
        qDebug(msg, GET_QINFO_ADDR(pQuery), "only-first", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
1927 1928
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1929
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
H
Haojun Liao 已提交
1930
        doExchangeTimeWindow(pQInfo);
1931
      }
1932

1933
      pQuery->order.order = TSDB_ORDER_ASC;
1934 1935
    } else if (onlyLastQuery(pQuery)) {
      if (QUERY_IS_ASC_QUERY(pQuery)) {
1936
        qDebug(msg, GET_QINFO_ADDR(pQuery), "only-last", pQuery->order.order, TSDB_ORDER_DESC, pQuery->window.skey,
1937 1938
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1939
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
H
Haojun Liao 已提交
1940
        doExchangeTimeWindow(pQInfo);
1941
      }
1942

1943
      pQuery->order.order = TSDB_ORDER_DESC;
1944
    }
1945

1946
  } else {  // interval query
1947
    if (stableQuery) {
1948 1949
      if (onlyFirstQuery(pQuery)) {
        if (!QUERY_IS_ASC_QUERY(pQuery)) {
1950
          qDebug(msg, GET_QINFO_ADDR(pQuery), "only-first stable", pQuery->order.order, TSDB_ORDER_ASC,
1951 1952
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1953 1954
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
        }
1955

1956
        pQuery->order.order = TSDB_ORDER_ASC;
1957 1958
      } else if (onlyLastQuery(pQuery)) {
        if (QUERY_IS_ASC_QUERY(pQuery)) {
1959
          qDebug(msg, GET_QINFO_ADDR(pQuery), "only-last stable", pQuery->order.order, TSDB_ORDER_DESC,
1960 1961
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1962 1963
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
        }
1964

1965
        pQuery->order.order = TSDB_ORDER_DESC;
1966 1967 1968 1969 1970 1971 1972 1973
      }
    }
  }
}

static int32_t getInitialPageNum(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  int32_t INITIAL_RESULT_ROWS_VALUE = 16;
1974

1975
  int32_t num = 0;
1976

1977 1978
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
    num = 128;
H
Haojun Liao 已提交
1979
  } else if (QUERY_IS_INTERVAL_QUERY(pQuery)) {  // time window query, allocate one page for each table
1980
    size_t s = pQInfo->tableqinfoGroupInfo.numOfTables;
S
TD-1057  
Shengliang Guan 已提交
1981
    num = (int32_t)(MAX(s, INITIAL_RESULT_ROWS_VALUE));
1982 1983
  } else {    // for super table query, one page for each subset
    num = 1;  // pQInfo->pSidSet->numOfSubSet;
1984
  }
1985

1986 1987 1988 1989
  assert(num > 0);
  return num;
}

1990 1991 1992
static void getIntermediateBufInfo(SQueryRuntimeEnv* pRuntimeEnv, int32_t* ps, int32_t* rowsize) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

S
TD-1057  
Shengliang Guan 已提交
1993
  *rowsize = (int32_t)(pQuery->rowSize * GET_ROW_PARAM_FOR_MULTIOUTPUT(pQuery, pRuntimeEnv->topBotQuery, pRuntimeEnv->stableQuery));
1994 1995 1996 1997 1998 1999 2000 2001 2002
  int32_t overhead = sizeof(tFilePage);

  // one page contains at least two rows
  *ps = DEFAULT_INTERN_BUF_PAGE_SIZE;
  while(((*rowsize) * 2) > (*ps) - overhead) {
    *ps = (*ps << 1u);
  }

  pRuntimeEnv->numOfRowsPerPage = ((*ps) - sizeof(tFilePage)) / (*rowsize);
2003
  assert(pRuntimeEnv->numOfRowsPerPage <= MAX_ROWS_PER_RESBUF_PAGE);
2004 2005
}

H
Haojun Liao 已提交
2006
#define IS_PREFILTER_TYPE(_t) ((_t) != TSDB_DATA_TYPE_BINARY && (_t) != TSDB_DATA_TYPE_NCHAR)
2007

H
Haojun Liao 已提交
2008 2009 2010 2011
static bool needToLoadDataBlock(SQueryRuntimeEnv* pRuntimeEnv, SDataStatis *pDataStatis, SQLFunctionCtx *pCtx,
    int32_t numOfRows) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
  if (pDataStatis == NULL || (pQuery->numOfFilterCols == 0 && (!pRuntimeEnv->topBotQuery))) {
2012 2013 2014 2015 2016
    return true;
  }

  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
2017

H
Haojun Liao 已提交
2018 2019 2020 2021 2022 2023 2024 2025
    int32_t index = -1;
    for(int32_t i = 0; i < pQuery->numOfCols; ++i) {
      if (pDataStatis[i].colId == pFilterInfo->info.colId) {
        index = i;
        break;
      }
    }

H
Haojun Liao 已提交
2026
    // no statistics data
H
Haojun Liao 已提交
2027
    if (index == -1) {
H
Haojun Liao 已提交
2028
      return true;
2029
    }
2030

2031
    // not support pre-filter operation on binary/nchar data type
H
Haojun Liao 已提交
2032
    if (!IS_PREFILTER_TYPE(pFilterInfo->info.type)) {
H
Haojun Liao 已提交
2033
      return true;
2034
    }
2035

2036
    // all points in current column are NULL, no need to check its boundary value
H
Haojun Liao 已提交
2037
    if (pDataStatis[index].numOfNull == numOfRows) {
2038 2039
      continue;
    }
2040

H
Haojun Liao 已提交
2041 2042 2043
    SDataStatis* pDataBlockst = &pDataStatis[index];

    if (pFilterInfo->info.type == TSDB_DATA_TYPE_FLOAT) {
S
TD-1057  
Shengliang Guan 已提交
2044 2045
      float minval = (float)(*(double *)(&pDataBlockst->min));
      float maxval = (float)(*(double *)(&pDataBlockst->max));
2046

2047 2048 2049 2050 2051 2052 2053
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&minval, (char *)&maxval)) {
          return true;
        }
      }
    } else {
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
H
Haojun Liao 已提交
2054
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&pDataBlockst->min, (char *)&pDataBlockst->max)) {
2055 2056 2057 2058 2059
          return true;
        }
      }
    }
  }
2060

H
Haojun Liao 已提交
2061 2062 2063 2064 2065 2066 2067 2068
  if (pRuntimeEnv->topBotQuery) {
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
      int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
      if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
        return topbot_datablock_filter(&pCtx[i], functionId, (char *)&pDataStatis[i].min, (char *)&pDataStatis[i].max);
      }
    }
  }
2069

H
Haojun Liao 已提交
2070
  return false;
2071 2072
}

H
Haojun Liao 已提交
2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119
#define PT_IN_WINDOW(_p, _w)  ((_p) > (_w).skey && (_p) < (_w).ekey)

static bool overlapWithTimeWindow(SQuery* pQuery, SDataBlockInfo* pBlockInfo) {
  STimeWindow w = {0};

  TSKEY sk = MIN(pQuery->window.skey, pQuery->window.ekey);
  TSKEY ek = MAX(pQuery->window.skey, pQuery->window.ekey);


  if (QUERY_IS_ASC_QUERY(pQuery)) {
    getAlignQueryTimeWindow(pQuery, pBlockInfo->window.skey, sk, ek, &w);

    if (PT_IN_WINDOW(w.ekey, pBlockInfo->window)) {
      return true;
    }

    while(1) {
      GET_NEXT_TIMEWINDOW(pQuery, &w);
      if (w.skey > pBlockInfo->window.skey) {
        break;
      }

      if (PT_IN_WINDOW(w.skey, pBlockInfo->window) || PT_IN_WINDOW(w.ekey, pBlockInfo->window)) {
        return true;
      }
    }
  } else {
    getAlignQueryTimeWindow(pQuery, pBlockInfo->window.ekey, sk, ek, &w);
    if (PT_IN_WINDOW(w.skey, pBlockInfo->window)) {
      return true;
    }

    while(1) {
      GET_NEXT_TIMEWINDOW(pQuery, &w);
      if (w.ekey < pBlockInfo->window.skey) {
        break;
      }

      if (PT_IN_WINDOW(w.skey, pBlockInfo->window) || PT_IN_WINDOW(w.ekey, pBlockInfo->window)) {
        return true;
      }
    }
  }

  return false;
}

H
Haojun Liao 已提交
2120
int32_t loadDataBlockOnDemand(SQueryRuntimeEnv *pRuntimeEnv, void* pQueryHandle, SDataBlockInfo* pBlockInfo, SDataStatis **pStatis, SArray** pDataBlock) {
2121
  SQuery *pQuery = pRuntimeEnv->pQuery;
2122

H
Haojun Liao 已提交
2123
  uint32_t status = 0;
H
Haojun Liao 已提交
2124
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf > 0) {
H
Haojun Liao 已提交
2125 2126
    status = BLK_DATA_ALL_NEEDED;
  } else { // check if this data block is required to load
H
Haojun Liao 已提交
2127

H
Haojun Liao 已提交
2128
    // Calculate all time windows that are overlapping or contain current data block.
2129
    // If current data block is contained by all possible time window, do not load current data block.
H
Haojun Liao 已提交
2130 2131
    if (QUERY_IS_INTERVAL_QUERY(pQuery) && overlapWithTimeWindow(pQuery, pBlockInfo)) {
      status = BLK_DATA_ALL_NEEDED;
2132
    }
2133

H
Haojun Liao 已提交
2134 2135 2136 2137 2138 2139 2140 2141
    if (status != BLK_DATA_ALL_NEEDED) {
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        SSqlFuncMsg* pSqlFunc = &pQuery->pSelectExpr[i].base;

        int32_t functionId = pSqlFunc->functionId;
        int32_t colId = pSqlFunc->colInfo.colId;

        status |= aAggs[functionId].dataReqFunc(&pRuntimeEnv->pCtx[i], pBlockInfo->window.skey, pBlockInfo->window.ekey, colId);
H
Haojun Liao 已提交
2142
        if ((status & BLK_DATA_ALL_NEEDED) == BLK_DATA_ALL_NEEDED) {
H
Haojun Liao 已提交
2143 2144 2145
          break;
        }
      }
2146 2147
    }
  }
2148

H
Haojun Liao 已提交
2149 2150 2151
  if (status == BLK_DATA_NO_NEEDED) {
    qDebug("QInfo:%p data block discard, brange:%"PRId64 "-%"PRId64", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
           pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
2152
    pRuntimeEnv->summary.discardBlocks += 1;
H
Haojun Liao 已提交
2153
  } else if (status == BLK_DATA_STATIS_NEEDED) {
H
hjxilinx 已提交
2154
    if (tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
2155
      //        return DISK_DATA_LOAD_FAILED;
2156
    }
2157

2158
    pRuntimeEnv->summary.loadBlockStatis += 1;
2159

2160
    if (*pStatis == NULL) { // data block statistics does not exist, load data block
H
Haojun Liao 已提交
2161
      *pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2162
      pRuntimeEnv->summary.totalCheckedRows += pBlockInfo->rows;
2163 2164
    }
  } else {
H
Haojun Liao 已提交
2165
    assert(status == BLK_DATA_ALL_NEEDED);
2166

2167
    // load the data block statistics to perform further filter
H
Haojun Liao 已提交
2168
    pRuntimeEnv->summary.loadBlockStatis += 1;
H
hjxilinx 已提交
2169
    if (tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis) != TSDB_CODE_SUCCESS) {
2170
    }
2171

H
Haojun Liao 已提交
2172
    if (!needToLoadDataBlock(pRuntimeEnv, *pStatis, pRuntimeEnv->pCtx, pBlockInfo->rows)) {
2173 2174
      // current block has been discard due to filter applied
      pRuntimeEnv->summary.discardBlocks += 1;
H
Haojun Liao 已提交
2175 2176 2177
      qDebug("QInfo:%p data block discard, brange:%"PRId64 "-%"PRId64", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
          pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
      return BLK_DATA_DISCARD;
2178
    }
2179

2180
    pRuntimeEnv->summary.totalCheckedRows += pBlockInfo->rows;
H
Haojun Liao 已提交
2181
    pRuntimeEnv->summary.loadBlocks += 1;
H
Haojun Liao 已提交
2182
    *pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2183
  }
2184

H
Haojun Liao 已提交
2185
  return TSDB_CODE_SUCCESS;
2186 2187
}

H
hjxilinx 已提交
2188
int32_t binarySearchForKey(char *pValue, int num, TSKEY key, int order) {
2189
  int32_t midPos = -1;
H
Haojun Liao 已提交
2190
  int32_t numOfRows;
2191

2192 2193 2194
  if (num <= 0) {
    return -1;
  }
2195

2196
  assert(order == TSDB_ORDER_ASC || order == TSDB_ORDER_DESC);
2197 2198

  TSKEY * keyList = (TSKEY *)pValue;
2199
  int32_t firstPos = 0;
2200
  int32_t lastPos = num - 1;
2201

2202
  if (order == TSDB_ORDER_DESC) {
H
hjxilinx 已提交
2203 2204 2205 2206 2207
    // find the first position which is smaller than the key
    while (1) {
      if (key >= keyList[lastPos]) return lastPos;
      if (key == keyList[firstPos]) return firstPos;
      if (key < keyList[firstPos]) return firstPos - 1;
2208

H
Haojun Liao 已提交
2209 2210
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2211

H
hjxilinx 已提交
2212 2213 2214 2215 2216 2217 2218 2219
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
2220

H
hjxilinx 已提交
2221 2222 2223 2224 2225
  } else {
    // find the first position which is bigger than the key
    while (1) {
      if (key <= keyList[firstPos]) return firstPos;
      if (key == keyList[lastPos]) return lastPos;
2226

H
hjxilinx 已提交
2227 2228 2229 2230 2231 2232 2233
      if (key > keyList[lastPos]) {
        lastPos = lastPos + 1;
        if (lastPos >= num)
          return -1;
        else
          return lastPos;
      }
2234

H
Haojun Liao 已提交
2235 2236
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2237

H
hjxilinx 已提交
2238 2239 2240 2241 2242 2243 2244 2245 2246
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
  }
2247

H
hjxilinx 已提交
2248 2249 2250
  return midPos;
}

2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263
static void ensureOutputBufferSimple(SQueryRuntimeEnv* pRuntimeEnv, int32_t capacity) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  if (capacity < pQuery->rec.capacity) {
    return;
  }

  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    int32_t bytes = pQuery->pSelectExpr[i].bytes;
    assert(bytes > 0 && capacity > 0);

    char *tmp = realloc(pQuery->sdata[i], bytes * capacity + sizeof(tFilePage));
    if (tmp == NULL) {  // todo handle the oom
H
Haojun Liao 已提交
2264
      longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
2265 2266 2267 2268 2269 2270 2271 2272
    } else {
      pQuery->sdata[i] = (tFilePage *)tmp;
    }

    // set the pCtx output buffer position
    pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data;
  }

2273
  qDebug("QInfo:%p realloc output buffer to inc output buffer from: %" PRId64 " rows to:%d rows", GET_QINFO_ADDR(pRuntimeEnv),
2274 2275 2276 2277 2278
         pQuery->rec.capacity, capacity);

  pQuery->rec.capacity = capacity;
}

2279 2280 2281
static void ensureOutputBuffer(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pBlockInfo) {
  // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
  SQuery* pQuery = pRuntimeEnv->pQuery;
2282
  if (!QUERY_IS_INTERVAL_QUERY(pQuery) && !pRuntimeEnv->groupbyNormalCol && !isFixedOutputQuery(pRuntimeEnv) && !isTSCompQuery(pQuery)) {
2283
    SResultRec *pRec = &pQuery->rec;
2284

2285
    if (pQuery->rec.capacity - pQuery->rec.rows < pBlockInfo->rows) {
S
TD-1057  
Shengliang Guan 已提交
2286 2287
      int32_t remain = (int32_t)(pRec->capacity - pRec->rows);
      int32_t newSize = (int32_t)(pRec->capacity + (pBlockInfo->rows - remain));
2288

2289 2290
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        int32_t bytes = pQuery->pSelectExpr[i].bytes;
H
Haojun Liao 已提交
2291 2292
        assert(bytes > 0 && newSize > 0);

2293 2294
        char *tmp = realloc(pQuery->sdata[i], bytes * newSize + sizeof(tFilePage));
        if (tmp == NULL) {  // todo handle the oom
H
Haojun Liao 已提交
2295
          longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
2296
        } else {
2297
          memset(tmp + sizeof(tFilePage) + bytes * pRec->rows, 0, (size_t)((newSize - pRec->rows) * bytes));
2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309
          pQuery->sdata[i] = (tFilePage *)tmp;
        }
        
        // set the pCtx output buffer position
        pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data + pRec->rows * bytes;
        
        int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
        if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
          pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
        }
      }
      
2310
      qDebug("QInfo:%p realloc output buffer, new size: %d rows, old:%" PRId64 ", remain:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv),
2311
             newSize, pRec->capacity, newSize - pRec->rows);
2312

2313 2314 2315 2316 2317
      pRec->capacity = newSize;
    }
  }
}

2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338
static void doSetInitialTimewindow(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pBlockInfo) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  if (QUERY_IS_INTERVAL_QUERY(pQuery) && pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL) {
    STimeWindow w = TSWINDOW_INITIALIZER;
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;

    if (QUERY_IS_ASC_QUERY(pQuery)) {
      getAlignQueryTimeWindow(pQuery, pBlockInfo->window.skey, pBlockInfo->window.skey, pQuery->window.ekey, &w);
      pWindowResInfo->startTime = w.skey;
      pWindowResInfo->prevSKey = w.skey;
    } else {
      // the start position of the first time window in the endpoint that spreads beyond the queried last timestamp
      getAlignQueryTimeWindow(pQuery, pBlockInfo->window.ekey, pQuery->window.ekey, pBlockInfo->window.ekey, &w);

      pWindowResInfo->startTime = pQuery->window.skey;
      pWindowResInfo->prevSKey = w.skey;
    }
  }
}

2339 2340
static int64_t doScanAllDataBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
2341
  STableQueryInfo* pTableQueryInfo = pQuery->current;
H
Haojun Liao 已提交
2342
  SQueryCostInfo*  summary  = &pRuntimeEnv->summary;
2343

2344
  qDebug("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", lastkey:%" PRId64 ", order:%d",
H
hjxilinx 已提交
2345 2346
         GET_QINFO_ADDR(pRuntimeEnv), pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, pTableQueryInfo->lastKey,
         pQuery->order.order);
2347

2348
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
H
Haojun Liao 已提交
2349
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
2350

H
Haojun Liao 已提交
2351
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
2352
  while (tsdbNextDataBlock(pQueryHandle)) {
H
Haojun Liao 已提交
2353
    summary->totalBlocks += 1;
H
Haojun Liao 已提交
2354

H
Haojun Liao 已提交
2355
    if (IS_QUERY_KILLED(GET_QINFO_ADDR(pRuntimeEnv))) {
H
Haojun Liao 已提交
2356
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
2357
    }
2358

H
Haojun Liao 已提交
2359
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
2360
    doSetInitialTimewindow(pRuntimeEnv, &blockInfo);
2361

H
hjxilinx 已提交
2362
    // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
2363
    ensureOutputBuffer(pRuntimeEnv, &blockInfo);
2364

2365
    SDataStatis *pStatis = NULL;
H
Haojun Liao 已提交
2366 2367 2368 2369 2370
    SArray *pDataBlock   = NULL;
    if (loadDataBlockOnDemand(pRuntimeEnv, pQueryHandle, &blockInfo, &pStatis, &pDataBlock) == BLK_DATA_DISCARD) {
      pQuery->current->lastKey = QUERY_IS_ASC_QUERY(pQuery)? blockInfo.window.ekey + step:blockInfo.window.skey + step;
      continue;
    }
2371

2372 2373 2374 2375 2376
    if (terrno != TSDB_CODE_SUCCESS) { // load data block failed, abort query
      longjmp(pRuntimeEnv->env, terrno);
      break;
    }

H
Haojun Liao 已提交
2377 2378
    // query start position can not move into tableApplyFunctionsOnBlock due to limit/offset condition
    pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : blockInfo.rows - 1;
H
hjxilinx 已提交
2379
    int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, binarySearchForKey, pDataBlock);
2380

H
Haojun Liao 已提交
2381
    summary->totalRows += blockInfo.rows;
2382
    qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
2383
           blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes, pQuery->current->lastKey);
2384

2385 2386
    // while the output buffer is full or limit/offset is applied, query may be paused here
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL | QUERY_COMPLETED)) {
H
hjxilinx 已提交
2387
      break;
2388 2389
    }
  }
2390

H
Haojun Liao 已提交
2391 2392 2393 2394
  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }

H
hjxilinx 已提交
2395
  // if the result buffer is not full, set the query complete
2396 2397 2398
  if (!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
2399

H
Haojun Liao 已提交
2400
  if (QUERY_IS_INTERVAL_QUERY(pQuery) && IS_MASTER_SCAN(pRuntimeEnv)) {
H
hjxilinx 已提交
2401
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
2402
      closeAllTimeWindow(&pRuntimeEnv->windowResInfo);
H
hjxilinx 已提交
2403
      pRuntimeEnv->windowResInfo.curIndex = pRuntimeEnv->windowResInfo.size - 1;  // point to the last time window
2404 2405 2406 2407
    } else {
      assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
    }
  }
2408

2409
  return 0;
2410 2411 2412 2413 2414 2415
}

/*
 * set tag value in SQLFunctionCtx
 * e.g.,tag information into input buffer
 */
2416
static void doSetTagValueInParam(void *tsdb, void* pTable, int32_t tagColId, tVariant *tag, int16_t type, int16_t bytes) {
H
[td-90]  
Haojun Liao 已提交
2417
  tVariantDestroy(tag);
2418

2419
  if (tagColId == TSDB_TBNAME_COLUMN_INDEX) {
2420
    char* val = tsdbGetTableName(pTable);
H
[td-90]  
Haojun Liao 已提交
2421
    assert(val != NULL);
2422

H
[td-90]  
Haojun Liao 已提交
2423
    tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), TSDB_DATA_TYPE_BINARY);
2424
  } else {
2425
    char* val = tsdbGetTableTagVal(pTable, tagColId, type, bytes);
H
[td-90]  
Haojun Liao 已提交
2426 2427 2428 2429
    if (val == NULL) {
      tag->nType = TSDB_DATA_TYPE_NULL;
      return;
    }
2430

H
hjxilinx 已提交
2431
    if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
H
Hongze Cheng 已提交
2432
      if (isNull(val, type)) {
H
Haojun Liao 已提交
2433 2434 2435 2436
        tag->nType = TSDB_DATA_TYPE_NULL;
        return;
      }

H
[td-90]  
Haojun Liao 已提交
2437
      tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), type);
H
hjxilinx 已提交
2438
    } else {
H
Haojun Liao 已提交
2439 2440 2441 2442 2443
      if (isNull(val, type)) {
        tag->nType = TSDB_DATA_TYPE_NULL;
        return;
      }

H
[td-90]  
Haojun Liao 已提交
2444
      tVariantCreateFromBinary(tag, val, bytes, type);
H
hjxilinx 已提交
2445
    }
2446
  }
2447 2448
}

2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460
static SColumnInfo* doGetTagColumnInfoById(SColumnInfo* pTagColList, int32_t numOfTags, int16_t colId) {
  assert(pTagColList != NULL && numOfTags > 0);

  for(int32_t i = 0; i < numOfTags; ++i) {
    if (pTagColList[i].colId == colId) {
      return &pTagColList[i];
    }
  }

  return NULL;
}

2461
void setTagVal(SQueryRuntimeEnv *pRuntimeEnv, void *pTable, void *tsdb) {
2462
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
2463
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
2464

H
[td-90]  
Haojun Liao 已提交
2465 2466 2467
  SExprInfo *pExprInfo = &pQuery->pSelectExpr[0];
  if (pQuery->numOfOutput == 1 && pExprInfo->base.functionId == TSDB_FUNC_TS_COMP) {
    assert(pExprInfo->base.numOfParams == 1);
H
Haojun Liao 已提交
2468

S
TD-1057  
Shengliang Guan 已提交
2469
    int16_t tagColId = (int16_t)pExprInfo->base.arg->argValue.i64;
2470
    SColumnInfo* pColInfo = doGetTagColumnInfoById(pQuery->tagColList, pQuery->numOfTags, tagColId);
H
Haojun Liao 已提交
2471

2472
    doSetTagValueInParam(tsdb, pTable, tagColId, &pRuntimeEnv->pCtx[0].tag, pColInfo->type, pColInfo->bytes);
2473 2474
  } else {
    // set tag value, by which the results are aggregated.
2475
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
H
Haojun Liao 已提交
2476
      SExprInfo* pLocalExprInfo = &pQuery->pSelectExpr[idx];
2477

2478
      // ts_comp column required the tag value for join filter
H
Haojun Liao 已提交
2479
      if (!TSDB_COL_IS_TAG(pLocalExprInfo->base.colInfo.flag)) {
2480 2481
        continue;
      }
2482

2483
      // todo use tag column index to optimize performance
2484
      doSetTagValueInParam(tsdb, pTable, pLocalExprInfo->base.colInfo.colId, &pRuntimeEnv->pCtx[idx].tag,
H
Haojun Liao 已提交
2485
                           pLocalExprInfo->type, pLocalExprInfo->bytes);
2486
    }
2487

2488
    // set the join tag for first column
H
[td-90]  
Haojun Liao 已提交
2489
    SSqlFuncMsg *pFuncMsg = &pExprInfo->base;
2490 2491
    if ((pFuncMsg->functionId == TSDB_FUNC_TS || pFuncMsg->functionId == TSDB_FUNC_PRJ) && pRuntimeEnv->pTSBuf != NULL &&
        pFuncMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
2492
      assert(pFuncMsg->numOfParams == 1);
H
Haojun Liao 已提交
2493

S
TD-1057  
Shengliang Guan 已提交
2494
      int16_t tagColId = (int16_t)pExprInfo->base.arg->argValue.i64;
2495
      SColumnInfo* pColInfo = doGetTagColumnInfoById(pQuery->tagColList, pQuery->numOfTags, tagColId);
H
Haojun Liao 已提交
2496

2497
      doSetTagValueInParam(tsdb, pTable, tagColId, &pRuntimeEnv->pCtx[0].tag, pColInfo->type, pColInfo->bytes);
2498
      qDebug("QInfo:%p set tag value for join comparison, colId:%" PRId64 ", val:%"PRId64, pQInfo, pExprInfo->base.arg->argValue.i64,
B
Bomin Zhang 已提交
2499
          pRuntimeEnv->pCtx[0].tag.i64Key)
2500 2501 2502 2503 2504 2505 2506
    }
  }
}

static void doMerge(SQueryRuntimeEnv *pRuntimeEnv, int64_t timestamp, SWindowResult *pWindowRes, bool mergeFlag) {
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
2507

H
Haojun Liao 已提交
2508 2509
  tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes->pos.pageId);

2510
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2511
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2512 2513 2514
    if (!mergeFlag) {
      pCtx[i].aOutputBuf = pCtx[i].aOutputBuf + pCtx[i].outputBytes;
      pCtx[i].currentStage = FIRST_STAGE_MERGE;
2515

2516
      RESET_RESULT_INFO(pCtx[i].resultInfo);
2517 2518
      aAggs[functionId].init(&pCtx[i]);
    }
2519

2520 2521
    pCtx[i].hasNull = true;
    pCtx[i].nStartQueryTimestamp = timestamp;
H
Haojun Liao 已提交
2522
    pCtx[i].aInputElemBuf = getPosInResultPage(pRuntimeEnv, i, pWindowRes, page);
2523

2524 2525 2526
    // in case of tag column, the tag information should be extracted from input buffer
    if (functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TAG) {
      tVariantDestroy(&pCtx[i].tag);
2527

2528 2529 2530 2531 2532 2533
      int32_t type = pCtx[i].outputType;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        tVariantCreateFromBinary(&pCtx[i].tag, varDataVal(pCtx[i].aInputElemBuf), varDataLen(pCtx[i].aInputElemBuf), type);
      } else {
        tVariantCreateFromBinary(&pCtx[i].tag, pCtx[i].aInputElemBuf, pCtx[i].inputBytes, pCtx[i].inputType);
      }
2534

2535 2536
    }
  }
2537

2538
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2539
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2540 2541 2542
    if (functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
2543

2544 2545 2546 2547
    aAggs[functionId].distMergeFunc(&pCtx[i]);
  }
}

2548
static UNUSED_FUNC void printBinaryData(int32_t functionId, char *data, int32_t srcDataType) {
2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_LAST_DST) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_BINARY:
        printf("%" PRId64 ",%s\t", *(TSKEY *)data, (data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int8_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int16_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 ",%" PRId64 "\t", *(TSKEY *)data, *(TSKEY *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int32_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%" PRId64 ",%f\t", *(TSKEY *)data, *(float *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%" PRId64 ",%lf\t", *(TSKEY *)data, *(double *)(data + TSDB_KEYSIZE + 1));
        break;
    }
  } else if (functionId == TSDB_FUNC_AVG) {
    printf("%lf,%d\t", *(double *)data, *(int32_t *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_SPREAD) {
    printf("%lf,%lf\t", *(double *)data, *(double *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_TWA) {
    data += 1;
    printf("%lf,%" PRId64 ",%" PRId64 ",%" PRId64 "\t", *(double *)data, *(int64_t *)(data + 8),
           *(int64_t *)(data + 16), *(int64_t *)(data + 24));
  } else if (functionId == TSDB_FUNC_MIN || functionId == TSDB_FUNC_MAX) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%d\t", *(int8_t *)data);
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%d\t", *(int16_t *)data);
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 "\t", *(int64_t *)data);
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%d\t", *(int *)data);
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%f\t", *(float *)data);
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%f\t", *(float *)data);
        break;
    }
  } else if (functionId == TSDB_FUNC_SUM) {
    if (srcDataType == TSDB_DATA_TYPE_FLOAT || srcDataType == TSDB_DATA_TYPE_DOUBLE) {
      printf("%lf\t", *(float *)data);
    } else {
      printf("%" PRId64 "\t", *(int64_t *)data);
    }
  } else {
    printf("%s\t", data);
  }
}

2617
void UNUSED_FUNC displayInterResult(tFilePage **pdata, SQueryRuntimeEnv* pRuntimeEnv, int32_t numOfRows) {
2618
  SQuery* pQuery = pRuntimeEnv->pQuery;
2619
  int32_t numOfCols = pQuery->numOfOutput;
2620
  printf("super table query intermediate result, total:%d\n", numOfRows);
2621

2622 2623
  for (int32_t j = 0; j < numOfRows; ++j) {
    for (int32_t i = 0; i < numOfCols; ++i) {
2624

2625
      switch (pQuery->pSelectExpr[i].type) {
2626
        case TSDB_DATA_TYPE_BINARY: {
2627
          int32_t type = pQuery->pSelectExpr[i].type;
2628
          printBinaryData(pQuery->pSelectExpr[i].base.functionId, pdata[i]->data + pQuery->pSelectExpr[i].bytes * j,
2629 2630 2631 2632 2633
                          type);
          break;
        }
        case TSDB_DATA_TYPE_TIMESTAMP:
        case TSDB_DATA_TYPE_BIGINT:
2634
          printf("%" PRId64 "\t", *(int64_t *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2635 2636
          break;
        case TSDB_DATA_TYPE_INT:
2637
          printf("%d\t", *(int32_t *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2638 2639
          break;
        case TSDB_DATA_TYPE_FLOAT:
2640
          printf("%f\t", *(float *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2641 2642
          break;
        case TSDB_DATA_TYPE_DOUBLE:
2643
          printf("%lf\t", *(double *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2644 2645 2646 2647 2648 2649 2650 2651
          break;
      }
    }
    printf("\n");
  }
}

typedef struct SCompSupporter {
H
hjxilinx 已提交
2652 2653 2654
  STableQueryInfo **pTableQueryInfo;
  int32_t *         position;
  SQInfo *          pQInfo;
2655 2656 2657 2658 2659
} SCompSupporter;

int32_t tableResultComparFn(const void *pLeft, const void *pRight, void *param) {
  int32_t left = *(int32_t *)pLeft;
  int32_t right = *(int32_t *)pRight;
2660

2661 2662
  SCompSupporter *  supporter = (SCompSupporter *)param;
  SQueryRuntimeEnv *pRuntimeEnv = &supporter->pQInfo->runtimeEnv;
2663

2664 2665
  int32_t leftPos = supporter->position[left];
  int32_t rightPos = supporter->position[right];
2666

2667 2668 2669 2670
  /* left source is exhausted */
  if (leftPos == -1) {
    return 1;
  }
2671

2672 2673 2674 2675
  /* right source is exhausted*/
  if (rightPos == -1) {
    return -1;
  }
2676

H
hjxilinx 已提交
2677
  SWindowResInfo *pWindowResInfo1 = &supporter->pTableQueryInfo[left]->windowResInfo;
2678
  SWindowResult * pWindowRes1 = getWindowResult(pWindowResInfo1, leftPos);
H
Haojun Liao 已提交
2679
  tFilePage *page1 = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes1->pos.pageId);
2680

H
Haojun Liao 已提交
2681
  char *b1 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes1, page1);
2682
  TSKEY leftTimestamp = GET_INT64_VAL(b1);
2683

H
hjxilinx 已提交
2684
  SWindowResInfo *pWindowResInfo2 = &supporter->pTableQueryInfo[right]->windowResInfo;
2685
  SWindowResult * pWindowRes2 = getWindowResult(pWindowResInfo2, rightPos);
H
Haojun Liao 已提交
2686
  tFilePage *page2 = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes2->pos.pageId);
2687

H
Haojun Liao 已提交
2688
  char *b2 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes2, page2);
2689
  TSKEY rightTimestamp = GET_INT64_VAL(b2);
2690

2691 2692 2693
  if (leftTimestamp == rightTimestamp) {
    return 0;
  }
2694

2695 2696 2697
  return leftTimestamp > rightTimestamp ? 1 : -1;
}

2698
int32_t mergeIntoGroupResult(SQInfo *pQInfo) {
H
Haojun Liao 已提交
2699
  int64_t st = taosGetTimestampUs();
2700
  int32_t ret = TSDB_CODE_SUCCESS;
2701

S
TD-1057  
Shengliang Guan 已提交
2702
  int32_t numOfGroups = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
2703

2704
  while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
2705
    SArray *group = GET_TABLEGROUP(pQInfo, pQInfo->groupIndex);
2706
    ret = mergeIntoGroupResultImpl(pQInfo, group);
2707 2708 2709 2710
    if (ret < 0) {  // not enough disk space to save the data into disk
      return -1;
    }

2711
    pQInfo->groupIndex += 1;
2712 2713

    // this group generates at least one result, return results
2714 2715 2716
    if (ret > 0) {
      break;
    }
2717

H
Haojun Liao 已提交
2718
    assert(pQInfo->groupResInfo.numOfDataPages == 0);
2719
    qDebug("QInfo:%p no result in group %d, continue", pQInfo, pQInfo->groupIndex - 1);
2720
  }
2721

H
Haojun Liao 已提交
2722 2723
  SGroupResInfo* info = &pQInfo->groupResInfo;
  if (pQInfo->groupIndex == numOfGroups && info->pos.pageId == info->numOfDataPages) {
H
Haojun Liao 已提交
2724 2725 2726
    SET_STABLE_QUERY_OVER(pQInfo);
  }

H
Haojun Liao 已提交
2727 2728 2729
  int64_t elapsedTime = taosGetTimestampUs() - st;
  qDebug("QInfo:%p merge res data into group, index:%d, total group:%d, elapsed time:%" PRId64 "us", pQInfo,
         pQInfo->groupIndex - 1, numOfGroups, elapsedTime);
2730

H
Haojun Liao 已提交
2731
  pQInfo->runtimeEnv.summary.firstStageMergeTime += elapsedTime;
2732 2733 2734 2735
  return TSDB_CODE_SUCCESS;
}

void copyResToQueryResultBuf(SQInfo *pQInfo, SQuery *pQuery) {
H
Haojun Liao 已提交
2736 2737 2738 2739 2740 2741
  SGroupResInfo* pGroupResInfo = &pQInfo->groupResInfo;

  // all results have been return to client, try next group
  if (pGroupResInfo->pos.pageId == pGroupResInfo->numOfDataPages) {
    pGroupResInfo->numOfDataPages = 0;
    pGroupResInfo->pos.rowId = 0;
2742

2743
    // current results of group has been sent to client, try next group
2744
    if (mergeIntoGroupResult(pQInfo) != TSDB_CODE_SUCCESS) {
2745 2746
      return;  // failed to save data in the disk
    }
2747

2748
    // check if all results has been sent to client
S
TD-1057  
Shengliang Guan 已提交
2749
    int32_t numOfGroup = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
H
Haojun Liao 已提交
2750
    if (pGroupResInfo->numOfDataPages == 0 && pQInfo->groupIndex == numOfGroup) {
H
Haojun Liao 已提交
2751
      SET_STABLE_QUERY_OVER(pQInfo);
2752 2753
      return;
    }
2754
  }
2755 2756

  SQueryRuntimeEnv *   pRuntimeEnv = &pQInfo->runtimeEnv;
2757
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
2758

H
Haojun Liao 已提交
2759 2760
  int32_t id = pQInfo->groupResInfo.groupId;
  SIDList list = getDataBufPagesIdList(pResultBuf, id);
2761

2762
  int32_t offset = 0;
H
Haojun Liao 已提交
2763 2764 2765 2766 2767 2768 2769
  int32_t numOfCopiedRows = 0;

  size_t size = taosArrayGetSize(list);
  assert(size == pGroupResInfo->numOfDataPages);

  bool done = false;
  for (int32_t j = pGroupResInfo->pos.pageId; j < size; ++j) {
H
Haojun Liao 已提交
2770
    SPageInfo* pi = *(SPageInfo**) taosArrayGet(list, j);
H
Haojun Liao 已提交
2771 2772 2773
    tFilePage* pData = getResBufPage(pResultBuf, pi->pageId);

    assert(pData->num > 0 && pData->num <= pRuntimeEnv->numOfRowsPerPage && pGroupResInfo->pos.rowId < pData->num);
2774
    int32_t numOfRes = (int32_t)(pData->num - pGroupResInfo->pos.rowId);
H
Haojun Liao 已提交
2775 2776

    if (numOfRes > pQuery->rec.capacity - offset) {
2777
      numOfCopiedRows = (int32_t)(pQuery->rec.capacity - offset);
H
Haojun Liao 已提交
2778 2779 2780
      pGroupResInfo->pos.rowId += numOfCopiedRows;
      done = true;
    } else {
2781
      numOfCopiedRows = (int32_t)pData->num;
H
Haojun Liao 已提交
2782 2783 2784 2785

      pGroupResInfo->pos.pageId += 1;
      pGroupResInfo->pos.rowId = 0;
    }
2786

2787
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2788
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
2789
      char *  pDest = pQuery->sdata[i]->data;
2790

H
Haojun Liao 已提交
2791 2792
      memcpy(pDest + offset * bytes, pData->data + pRuntimeEnv->offset[i] * pRuntimeEnv->numOfRowsPerPage,
          (size_t)bytes * numOfCopiedRows);
2793
    }
2794

H
Haojun Liao 已提交
2795 2796 2797 2798
    offset += numOfCopiedRows;
    if (done) {
      break;
    }
2799
  }
2800

2801
  assert(pQuery->rec.rows == 0);
H
Haojun Liao 已提交
2802
  pQuery->rec.rows += offset;
2803 2804
}

H
Haojun Liao 已提交
2805
int64_t getNumOfResultWindowRes(SQuery *pQuery, SWindowResult *pWindowRes) {
2806
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
2807
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
2808

2809 2810 2811 2812 2813 2814 2815
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
      continue;
    }
2816

2817
    SResultInfo *pResultInfo = &pWindowRes->resultInfo[j];
H
Haojun Liao 已提交
2818
    assert(pResultInfo != NULL);
2819

H
Haojun Liao 已提交
2820 2821
    if (pResultInfo->numOfRes > 0) {
      return pResultInfo->numOfRes;
2822 2823
    }
  }
2824

H
Haojun Liao 已提交
2825
  return 0;
2826 2827
}

2828
int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *pGroup) {
2829
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
2830
  SQuery *          pQuery = pRuntimeEnv->pQuery;
2831

2832
  size_t size = taosArrayGetSize(pGroup);
2833
  tFilePage **buffer = pQuery->sdata;
2834

H
Haojun Liao 已提交
2835
  int32_t *posList = calloc(size, sizeof(int32_t));
H
hjxilinx 已提交
2836
  STableQueryInfo **pTableList = malloc(POINTER_BYTES * size);
2837

2838
  if (pTableList == NULL || posList == NULL) {
S
Shengliang Guan 已提交
2839 2840
    taosTFree(posList);
    taosTFree(pTableList);
2841 2842

    qError("QInfo:%p failed alloc memory", pQInfo);
H
Haojun Liao 已提交
2843
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
2844 2845
  }

2846
  // todo opt for the case of one table per group
2847
  int32_t numOfTables = 0;
H
Haojun Liao 已提交
2848 2849 2850
  SIDList pageList = NULL;
  int32_t tid = -1;

2851
  for (int32_t i = 0; i < size; ++i) {
2852
    STableQueryInfo *item = taosArrayGetP(pGroup, i);
2853

H
Haojun Liao 已提交
2854
    SIDList list = getDataBufPagesIdList(pRuntimeEnv->pResultBuf, TSDB_TABLEID(item->pTable)->tid);
H
Haojun Liao 已提交
2855 2856 2857
    pageList = list;
    tid = TSDB_TABLEID(item->pTable)->tid;

H
Haojun Liao 已提交
2858
    if (taosArrayGetSize(list) > 0 && item->windowResInfo.size > 0) {
H
Haojun Liao 已提交
2859
      pTableList[numOfTables++] = item;
2860 2861
    }
  }
2862

H
Haojun Liao 已提交
2863
  // there is no data in current group
2864
  if (numOfTables == 0) {
S
Shengliang Guan 已提交
2865 2866
    taosTFree(posList);
    taosTFree(pTableList);
2867
    return 0;
H
Haojun Liao 已提交
2868
  } else if (numOfTables == 1) { // no need to merge results since only one table in each group
H
Haojun Liao 已提交
2869 2870 2871 2872 2873
    taosTFree(posList);
    taosTFree(pTableList);

    SGroupResInfo* pGroupResInfo = &pQInfo->groupResInfo;

2874
    pGroupResInfo->numOfDataPages = (int32_t)taosArrayGetSize(pageList);
H
Haojun Liao 已提交
2875 2876 2877 2878 2879
    pGroupResInfo->groupId = tid;
    pGroupResInfo->pos.pageId = 0;
    pGroupResInfo->pos.rowId = 0;

    return pGroupResInfo->numOfDataPages;
2880
  }
2881

2882
  SCompSupporter cs = {pTableList, posList, pQInfo};
2883

2884
  SLoserTreeInfo *pTree = NULL;
2885
  tLoserTreeCreate(&pTree, numOfTables, &cs, tableResultComparFn);
2886

2887
  SResultInfo *pResultInfo = calloc(pQuery->numOfOutput, sizeof(SResultInfo));
H
Haojun Liao 已提交
2888 2889 2890 2891
  if (pResultInfo == NULL) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }

H
Haojun Liao 已提交
2892
  char* buf = calloc(1, pRuntimeEnv->interBufSize);
H
Haojun Liao 已提交
2893 2894 2895 2896
  if (buf == NULL) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }

H
Haojun Liao 已提交
2897
  setWindowResultInfo(pResultInfo, pQuery, pRuntimeEnv->stableQuery, buf);
2898
  resetMergeResultBuf(pQuery, pRuntimeEnv->pCtx, pResultInfo);
2899

H
Haojun Liao 已提交
2900 2901
  pQInfo->groupResInfo.groupId = getGroupResultId(pQInfo->groupIndex);

H
Haojun Liao 已提交
2902
  // todo add windowRes iterator
2903 2904
  int64_t lastTimestamp = -1;
  int64_t startt = taosGetTimestampMs();
2905

2906
  while (1) {
2907 2908
    if (IS_QUERY_KILLED(pQInfo)) {
      qDebug("QInfo:%p it is already killed, abort", pQInfo);
H
Haojun Liao 已提交
2909 2910 2911 2912 2913 2914 2915

      taosTFree(pTableList);
      taosTFree(posList);
      taosTFree(pTree);
      taosTFree(pResultInfo);
      taosTFree(buf);

2916 2917 2918
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
    }

2919
    int32_t pos = pTree->pNode[0].index;
2920

H
hjxilinx 已提交
2921
    SWindowResInfo *pWindowResInfo = &pTableList[pos]->windowResInfo;
H
Haojun Liao 已提交
2922
    SWindowResult  *pWindowRes = getWindowResult(pWindowResInfo, cs.position[pos]);
H
Haojun Liao 已提交
2923
    tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes->pos.pageId);
2924

H
Haojun Liao 已提交
2925
    char *b = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes, page);
2926
    TSKEY ts = GET_INT64_VAL(b);
2927

2928
    assert(ts == pWindowRes->skey);
H
Haojun Liao 已提交
2929
    int64_t num = getNumOfResultWindowRes(pQuery, pWindowRes);
2930 2931
    if (num <= 0) {
      cs.position[pos] += 1;
2932

2933 2934
      if (cs.position[pos] >= pWindowResInfo->size) {
        cs.position[pos] = -1;
2935

2936
        // all input sources are exhausted
2937
        if (--numOfTables == 0) {
2938 2939 2940 2941 2942 2943 2944
          break;
        }
      }
    } else {
      if (ts == lastTimestamp) {  // merge with the last one
        doMerge(pRuntimeEnv, ts, pWindowRes, true);
      } else {  // copy data to disk buffer
2945
        if (buffer[0]->num == pQuery->rec.capacity) {
H
Haojun Liao 已提交
2946
          if (flushFromResultBuf(pRuntimeEnv, &pQInfo->groupResInfo) != TSDB_CODE_SUCCESS) {
2947 2948
            return -1;
          }
2949

2950 2951
          resetMergeResultBuf(pQuery, pRuntimeEnv->pCtx, pResultInfo);
        }
2952

2953
        doMerge(pRuntimeEnv, ts, pWindowRes, false);
2954
        buffer[0]->num += 1;
2955
      }
2956

2957
      lastTimestamp = ts;
2958

H
Haojun Liao 已提交
2959 2960 2961
      // move to the next element of current entry
      int32_t currentPageId = pWindowRes->pos.pageId;

2962 2963 2964
      cs.position[pos] += 1;
      if (cs.position[pos] >= pWindowResInfo->size) {
        cs.position[pos] = -1;
2965

2966
        // all input sources are exhausted
2967
        if (--numOfTables == 0) {
2968 2969
          break;
        }
H
Haojun Liao 已提交
2970 2971 2972 2973 2974 2975
      } else {
        // current page is not needed anymore
        SWindowResult  *pNextWindowRes = getWindowResult(pWindowResInfo, cs.position[pos]);
        if (pNextWindowRes->pos.pageId != currentPageId) {
          releaseResBufPage(pRuntimeEnv->pResultBuf, page);
        }
2976 2977
      }
    }
2978

2979 2980
    tLoserTreeAdjust(pTree, pos + pTree->numOfEntries);
  }
2981

2982
  if (buffer[0]->num != 0) {  // there are data in buffer
H
Haojun Liao 已提交
2983
    if (flushFromResultBuf(pRuntimeEnv, &pQInfo->groupResInfo) != TSDB_CODE_SUCCESS) {
S
slguan 已提交
2984
      qError("QInfo:%p failed to flush data into temp file, abort query", pQInfo);
2985

S
Shengliang Guan 已提交
2986 2987 2988 2989
      taosTFree(pTree);
      taosTFree(pTableList);
      taosTFree(posList);
      taosTFree(pResultInfo);
2990

2991 2992 2993
      return -1;
    }
  }
2994

2995 2996 2997
  int64_t endt = taosGetTimestampMs();

#ifdef _DEBUG_VIEW
2998
  displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
2999
#endif
3000

3001
  qDebug("QInfo:%p result merge completed for group:%d, elapsed time:%" PRId64 " ms", pQInfo, pQInfo->groupIndex, endt - startt);
3002

S
Shengliang Guan 已提交
3003 3004 3005
  taosTFree(pTableList);
  taosTFree(posList);
  taosTFree(pTree);
3006

S
Shengliang Guan 已提交
3007 3008
  taosTFree(pResultInfo);
  taosTFree(buf);
H
Haojun Liao 已提交
3009 3010

  return pQInfo->groupResInfo.numOfDataPages;
3011 3012
}

H
Haojun Liao 已提交
3013 3014
int32_t flushFromResultBuf(SQueryRuntimeEnv* pRuntimeEnv, SGroupResInfo* pGroupResInfo) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3015

3016
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
3017

3018 3019
  // the base value for group result, since the maximum number of table for each vnode will not exceed 100,000.
  int32_t pageId = -1;
3020
  int32_t capacity = pResultBuf->numOfRowsPerPage;
3021

H
Haojun Liao 已提交
3022
  int32_t remain = (int32_t) pQuery->sdata[0]->num;
3023
  int32_t offset = 0;
3024

3025
  while (remain > 0) {
H
Haojun Liao 已提交
3026 3027
    int32_t rows = (remain > capacity)? capacity:remain;
    assert(rows > 0);
3028

H
Haojun Liao 已提交
3029 3030 3031
    // get the output buffer page
    tFilePage *buf = getNewDataBuf(pResultBuf, pGroupResInfo->groupId, &pageId);
    buf->num = rows;
3032

H
Haojun Liao 已提交
3033
    // pagewisely copy to dest buffer
3034
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3035
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
3036

H
Haojun Liao 已提交
3037 3038
      char* output = buf->data + pRuntimeEnv->offset[i] * pRuntimeEnv->numOfRowsPerPage;
      char* src = ((char *) pQuery->sdata[i]->data) + offset * bytes;
3039
      memcpy(output, src, (size_t)(buf->num * bytes));
3040
    }
3041

H
Haojun Liao 已提交
3042 3043 3044 3045
    offset += rows;
    remain -= rows;

    pGroupResInfo->numOfDataPages += 1;
3046
  }
3047

3048 3049 3050 3051
  return TSDB_CODE_SUCCESS;
}

void resetMergeResultBuf(SQuery *pQuery, SQLFunctionCtx *pCtx, SResultInfo *pResultInfo) {
3052
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
3053
    pCtx[k].aOutputBuf = pQuery->sdata[k]->data - pCtx[k].outputBytes;
3054 3055 3056
    pCtx[k].size = 1;
    pCtx[k].startOffset = 0;
    pCtx[k].resultInfo = &pResultInfo[k];
3057

3058
    pQuery->sdata[k]->num = 0;
3059 3060 3061
  }
}

3062 3063 3064 3065
static void updateTableQueryInfoForReverseScan(SQuery *pQuery, STableQueryInfo *pTableQueryInfo) {
  if (pTableQueryInfo == NULL) {
    return;
  }
3066

H
Haojun Liao 已提交
3067
  // order has changed already
3068
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
3069

H
Haojun Liao 已提交
3070 3071 3072 3073 3074 3075
  // TODO validate the assertion
//  if (!QUERY_IS_ASC_QUERY(pQuery)) {
//    assert(pTableQueryInfo->win.ekey >= pTableQueryInfo->lastKey + step);
//  } else {
//    assert(pTableQueryInfo->win.ekey <= pTableQueryInfo->lastKey + step);
//  }
H
Haojun Liao 已提交
3076 3077 3078 3079 3080 3081 3082

  if (pTableQueryInfo->lastKey == pTableQueryInfo->win.skey) {
    // do nothing, no results
  } else {
    pTableQueryInfo->win.ekey = pTableQueryInfo->lastKey + step;
  }

3083 3084
  SWAP(pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, TSKEY);
  pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
3085

3086 3087
  SWITCH_ORDER(pTableQueryInfo->cur.order);
  pTableQueryInfo->cur.vgroupIndex = -1;
H
Haojun Liao 已提交
3088 3089 3090

  // set the index at the end of time window
  pTableQueryInfo->windowResInfo.curIndex = pTableQueryInfo->windowResInfo.size - 1;
3091 3092 3093 3094
}

static void disableFuncInReverseScanImpl(SQInfo* pQInfo, SWindowResInfo *pWindowResInfo, int32_t order) {
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
3095

3096
  for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
3097 3098
    bool closed = getTimeWindowResStatus(pWindowResInfo, i);
    if (!closed) {
3099 3100
      continue;
    }
3101

3102
    SWindowResult *buf = getWindowResult(pWindowResInfo, i);
3103

3104
    // open/close the specified query for each group result
3105
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3106
      int32_t functId = pQuery->pSelectExpr[j].base.functionId;
3107

3108 3109
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
3110 3111 3112 3113 3114 3115 3116 3117
        buf->resultInfo[j].complete = false;
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
        buf->resultInfo[j].complete = true;
      }
    }
  }
}

3118 3119
void disableFuncInReverseScan(SQInfo *pQInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
3120
  SQuery *pQuery = pRuntimeEnv->pQuery;
3121
  int32_t order = pQuery->order.order;
3122

3123 3124
  // group by normal columns and interval query on normal table
  SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
3125
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3126
    disableFuncInReverseScanImpl(pQInfo, pWindowResInfo, order);
3127
  } else {  // for simple result of table query,
3128
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {  // todo refactor
3129
      int32_t functId = pQuery->pSelectExpr[j].base.functionId;
3130

3131
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[j];
3132 3133 3134
      if (pCtx->resultInfo == NULL) {
        continue; // resultInfo is NULL, means no data checked in previous scan
      }
3135

3136 3137
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
3138 3139 3140 3141 3142 3143
        pCtx->resultInfo->complete = false;
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
        pCtx->resultInfo->complete = true;
      }
    }
  }
H
Haojun Liao 已提交
3144 3145 3146 3147
}

static void setupQueryRangeForReverseScan(SQInfo* pQInfo) {
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
S
Shengliang Guan 已提交
3148
  int32_t numOfGroups = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
H
Haojun Liao 已提交
3149

H
hjxilinx 已提交
3150
  for(int32_t i = 0; i < numOfGroups; ++i) {
H
Haojun Liao 已提交
3151
    SArray *group = GET_TABLEGROUP(pQInfo, i);
H
Haojun Liao 已提交
3152 3153
    SArray *tableKeyGroup = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, i);

H
hjxilinx 已提交
3154 3155
    size_t t = taosArrayGetSize(group);
    for (int32_t j = 0; j < t; ++j) {
3156 3157
      STableQueryInfo *pCheckInfo = taosArrayGetP(group, j);
      updateTableQueryInfoForReverseScan(pQuery, pCheckInfo);
H
Haojun Liao 已提交
3158

H
Haojun Liao 已提交
3159 3160
      // update the last key in tableKeyInfo list, the tableKeyInfo is used to build the tsdbQueryHandle and decide
      // the start check timestamp of tsdbQueryHandle
H
Haojun Liao 已提交
3161 3162 3163 3164
      STableKeyInfo *pTableKeyInfo = taosArrayGet(tableKeyGroup, j);
      pTableKeyInfo->lastKey = pCheckInfo->lastKey;

      assert(pCheckInfo->pTable == pTableKeyInfo->pTable);
H
hjxilinx 已提交
3165 3166
    }
  }
3167 3168
}

3169
void switchCtxOrder(SQueryRuntimeEnv *pRuntimeEnv) {
3170
  SQuery *pQuery = pRuntimeEnv->pQuery;
3171
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
3172
    SWITCH_ORDER(pRuntimeEnv->pCtx[i].order);
3173 3174 3175
  }
}

H
Haojun Liao 已提交
3176
int32_t createQueryResultInfo(SQuery *pQuery, SWindowResult *pResultRow, bool isSTableQuery, size_t interBufSize) {
3177
  int32_t numOfCols = pQuery->numOfOutput;
3178

H
Haojun Liao 已提交
3179 3180
  size_t size = numOfCols * sizeof(SResultInfo) + interBufSize;
  pResultRow->resultInfo = calloc(1, size);
B
Bomin Zhang 已提交
3181 3182 3183
  if (pResultRow->resultInfo == NULL) {
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
  }
3184

H
Haojun Liao 已提交
3185
  pResultRow->pos = (SPosInfo) {-1, -1};
3186

H
Haojun Liao 已提交
3187
  char* buf = (char*) pResultRow->resultInfo + numOfCols * sizeof(SResultInfo);
H
Haojun Liao 已提交
3188

3189
  // set the intermediate result output buffer
H
Haojun Liao 已提交
3190
  setWindowResultInfo(pResultRow->resultInfo, pQuery, isSTableQuery, buf);
B
Bomin Zhang 已提交
3191
  return TSDB_CODE_SUCCESS;
3192 3193 3194 3195
}

void resetCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3196

3197
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3198 3199
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
    pCtx->aOutputBuf = pQuery->sdata[i]->data;
3200

3201 3202 3203 3204
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT/TAGPRJ/PRJ/TAG etc.
     */
3205
    RESET_RESULT_INFO(&pRuntimeEnv->resultInfo[i]);
3206
    pCtx->resultInfo = &pRuntimeEnv->resultInfo[i];
3207

3208
    // set the timestamp output buffer for top/bottom/diff query
3209
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3210 3211 3212
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3213

3214
    memset(pQuery->sdata[i]->data, 0, (size_t)(pQuery->pSelectExpr[i].bytes * pQuery->rec.capacity));
3215
  }
3216

3217 3218 3219 3220 3221
  initCtxOutputBuf(pRuntimeEnv);
}

void forwardCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, int64_t output) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3222

3223
  // reset the execution contexts
3224
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3225
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
3226
    assert(functionId != TSDB_FUNC_DIFF);
3227

3228 3229 3230 3231
    // set next output position
    if (IS_OUTER_FORWARD(aAggs[functionId].nStatus)) {
      pRuntimeEnv->pCtx[j].aOutputBuf += pRuntimeEnv->pCtx[j].outputBytes * output;
    }
3232

3233 3234 3235 3236 3237 3238 3239 3240
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      /*
       * NOTE: for top/bottom query, the value of first column of output (timestamp) are assigned
       * in the procedure of top/bottom routine
       * the output buffer in top/bottom routine is ptsOutputBuf, so we need to forward the output buffer
       *
       * diff function is handled in multi-output function
       */
S
TD-1057  
Shengliang Guan 已提交
3241
      pRuntimeEnv->pCtx[j].ptsOutputBuf = (char*)pRuntimeEnv->pCtx[j].ptsOutputBuf + TSDB_KEYSIZE * output;
3242
    }
3243

3244
    RESET_RESULT_INFO(pRuntimeEnv->pCtx[j].resultInfo);
3245 3246 3247 3248 3249
  }
}

void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3250

3251
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3252
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
3253
    pRuntimeEnv->pCtx[j].currentStage = 0;
3254

H
Haojun Liao 已提交
3255 3256 3257 3258
    SResultInfo* pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
    if (pResInfo->initialized) {
      continue;
    }
3259

3260 3261 3262 3263
    aAggs[functionId].init(&pRuntimeEnv->pCtx[j]);
  }
}

3264
void skipResults(SQueryRuntimeEnv *pRuntimeEnv) {
3265
  SQuery *pQuery = pRuntimeEnv->pQuery;
3266
  if (pQuery->rec.rows == 0 || pQuery->limit.offset == 0) {
3267 3268
    return;
  }
3269

3270
  if (pQuery->rec.rows <= pQuery->limit.offset) {
3271
    qDebug("QInfo:%p skip rows:%" PRId64 ", new offset:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), pQuery->rec.rows,
3272
        pQuery->limit.offset - pQuery->rec.rows);
3273

3274 3275
    pQuery->limit.offset -= pQuery->rec.rows;
    pQuery->rec.rows = 0;
3276

3277
    resetCtxOutputBuf(pRuntimeEnv);
3278

H
Haojun Liao 已提交
3279
    // clear the buffer full flag if exists
3280
    CLEAR_QUERY_STATUS(pQuery, QUERY_RESBUF_FULL);
3281
  } else {
3282
    int64_t numOfSkip = pQuery->limit.offset;
3283
    pQuery->rec.rows -= numOfSkip;
3284
    pQuery->limit.offset = 0;
3285

3286
    qDebug("QInfo:%p skip row:%"PRId64", new offset:%d, numOfRows remain:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), numOfSkip,
3287
           0, pQuery->rec.rows);
3288

3289
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3290
      int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3291
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
3292
      
3293
      memmove(pQuery->sdata[i]->data, (char*)pQuery->sdata[i]->data + bytes * numOfSkip, (size_t)(pQuery->rec.rows * bytes));
H
Haojun Liao 已提交
3294
      pRuntimeEnv->pCtx[i].aOutputBuf = ((char*) pQuery->sdata[i]->data) + pQuery->rec.rows * bytes;
3295

3296
      if (functionId == TSDB_FUNC_DIFF || functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
3297
        pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
3298 3299
      }
    }
3300

S
TD-1057  
Shengliang Guan 已提交
3301
    updateNumOfResult(pRuntimeEnv, (int32_t)pQuery->rec.rows);
3302 3303 3304 3305 3306 3307 3308 3309
  }
}

void setQueryStatus(SQuery *pQuery, int8_t status) {
  if (status == QUERY_NOT_COMPLETED) {
    pQuery->status = status;
  } else {
    // QUERY_NOT_COMPLETED is not compatible with any other status, so clear its position first
3310
    CLEAR_QUERY_STATUS(pQuery, QUERY_NOT_COMPLETED);
3311 3312 3313 3314 3315 3316
    pQuery->status |= status;
  }
}

bool needScanDataBlocksAgain(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3317

H
hjxilinx 已提交
3318
  bool toContinue = false;
H
Haojun Liao 已提交
3319
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3320 3321
    // for each group result, call the finalize function for each column
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
3322

3323 3324
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *pResult = getWindowResult(pWindowResInfo, i);
3325
      if (!pResult->closed) {
3326 3327
        continue;
      }
3328

3329
      setWindowResOutputBuf(pRuntimeEnv, pResult);
3330

3331
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3332
        int16_t functId = pQuery->pSelectExpr[j].base.functionId;
3333 3334 3335
        if (functId == TSDB_FUNC_TS) {
          continue;
        }
3336

3337 3338
        aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
        SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3339

3340 3341 3342 3343
        toContinue |= (!pResInfo->complete);
      }
    }
  } else {
3344
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3345
      int16_t functId = pQuery->pSelectExpr[j].base.functionId;
3346 3347 3348
      if (functId == TSDB_FUNC_TS) {
        continue;
      }
3349

3350 3351
      aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
      SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3352

3353 3354 3355
      toContinue |= (!pResInfo->complete);
    }
  }
3356

3357 3358 3359
  return toContinue;
}

H
Haojun Liao 已提交
3360
static SQueryStatusInfo getQueryStatusInfo(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
3361
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3362
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3363

H
Haojun Liao 已提交
3364 3365
  assert((start <= pTableQueryInfo->lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (start >= pTableQueryInfo->lastKey && !QUERY_IS_ASC_QUERY(pQuery)));
3366

3367
  SQueryStatusInfo info = {
H
hjxilinx 已提交
3368
      .status      = pQuery->status,
3369
      .windowIndex = pRuntimeEnv->windowResInfo.curIndex,
H
Haojun Liao 已提交
3370
      .lastKey     = start,
3371
  };
3372

S
TD-1057  
Shengliang Guan 已提交
3373 3374 3375 3376 3377
  TIME_WINDOW_COPY(info.w, pQuery->window);
  TIME_WINDOW_COPY(info.curWindow, pTableQueryInfo->win);

  info.curWindow.skey = start;

3378 3379 3380
  return info;
}

3381 3382 3383 3384
static void setEnvBeforeReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQInfo *pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery *pQuery = pRuntimeEnv->pQuery;

3385 3386 3387
  pStatus->cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);  // save the cursor
  if (pRuntimeEnv->pTSBuf) {
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
3388 3389
    bool ret = tsBufNextPos(pRuntimeEnv->pTSBuf);
    assert(ret);
3390
  }
3391

3392
  // reverse order time range
3393 3394 3395
  pQuery->window = pStatus->curWindow;
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);

3396
  SWITCH_ORDER(pQuery->order.order);
3397 3398 3399 3400 3401 3402 3403

  if (QUERY_IS_ASC_QUERY(pQuery)) {
    assert(pQuery->window.skey <= pQuery->window.ekey);
  } else {
    assert(pQuery->window.skey >= pQuery->window.ekey);
  }

3404
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
3405

3406
  STsdbQueryCond cond = {
H
hjxilinx 已提交
3407
      .order   = pQuery->order.order,
3408
      .colList = pQuery->colList,
3409 3410
      .numOfCols = pQuery->numOfCols,
  };
3411

S
TD-1057  
Shengliang Guan 已提交
3412 3413
  TIME_WINDOW_COPY(cond.twindow, pQuery->window);

H
Haojun Liao 已提交
3414 3415 3416 3417 3418
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
  disableFuncInReverseScan(pQInfo);
  setupQueryRangeForReverseScan(pQInfo);

3419 3420 3421 3422
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
3423

3424
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
B
Bomin Zhang 已提交
3425 3426 3427
  if (pRuntimeEnv->pSecQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
3428 3429
}

3430 3431
static void clearEnvAfterReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3432
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3433

3434 3435
  SWITCH_ORDER(pQuery->order.order);
  switchCtxOrder(pRuntimeEnv);
3436

3437 3438 3439 3440
  tsBufSetCursor(pRuntimeEnv->pTSBuf, &pStatus->cur);
  if (pRuntimeEnv->pTSBuf) {
    pRuntimeEnv->pTSBuf->cur.order = pQuery->order.order;
  }
3441

3442
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
3443

3444
  // update the pQuery->window.skey and pQuery->window.ekey to limit the scan scope of sliding query during reverse scan
H
hjxilinx 已提交
3445
  pTableQueryInfo->lastKey = pStatus->lastKey;
3446
  pQuery->status = pStatus->status;
3447

H
hjxilinx 已提交
3448
  pTableQueryInfo->win = pStatus->w;
3449
  pQuery->window = pTableQueryInfo->win;
3450 3451
}

H
Haojun Liao 已提交
3452 3453 3454 3455 3456 3457 3458
static void restoreTimeWindow(STableGroupInfo* pTableGroupInfo, STsdbQueryCond* pCond) {
  assert(pTableGroupInfo->numOfTables == 1);
  SArray* pTableKeyGroup = taosArrayGetP(pTableGroupInfo->pGroupList, 0);
  STableKeyInfo* pKeyInfo = taosArrayGet(pTableKeyGroup, 0);
  pKeyInfo->lastKey = pCond->twindow.skey;
}

3459
void scanOneTableDataBlocks(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
H
hjxilinx 已提交
3460
  SQInfo *pQInfo = (SQInfo *) GET_QINFO_ADDR(pRuntimeEnv);
3461
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3462
  STableQueryInfo *pTableQueryInfo = pQuery->current;
3463

3464
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
3465

3466
  // store the start query position
H
Haojun Liao 已提交
3467
  SQueryStatusInfo qstatus = getQueryStatusInfo(pRuntimeEnv, start);
3468

3469 3470
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
3471

3472 3473
  while (1) {
    doScanAllDataBlocks(pRuntimeEnv);
3474

3475 3476
    if (pRuntimeEnv->scanFlag == MASTER_SCAN) {
      qstatus.status = pQuery->status;
3477 3478 3479 3480 3481 3482

      // do nothing if no data blocks are found qualified during scan
      if (qstatus.lastKey != pTableQueryInfo->lastKey) {
        qstatus.curWindow.ekey = pTableQueryInfo->lastKey - step;
      }

3483
      qstatus.lastKey = pTableQueryInfo->lastKey;
3484
    }
3485

3486
    if (!needScanDataBlocksAgain(pRuntimeEnv)) {
3487
      // restore the status code and jump out of loop
3488
      if (pRuntimeEnv->scanFlag == REPEAT_SCAN) {
3489
        pQuery->status = qstatus.status;
3490
      }
3491

3492 3493
      break;
    }
3494

3495
    STsdbQueryCond cond = {
H
hjxilinx 已提交
3496
        .order   = pQuery->order.order,
3497
        .colList = pQuery->colList,
3498
        .numOfCols = pQuery->numOfCols,
3499
    };
3500

S
TD-1057  
Shengliang Guan 已提交
3501 3502
    TIME_WINDOW_COPY(cond.twindow, qstatus.curWindow);

3503 3504
    if (pRuntimeEnv->pSecQueryHandle != NULL) {
      tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
3505
    }
3506

H
Haojun Liao 已提交
3507
    restoreTimeWindow(&pQInfo->tableGroupInfo, &cond);
3508
    pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
B
Bomin Zhang 已提交
3509 3510 3511
    if (pRuntimeEnv->pSecQueryHandle == NULL) {
      longjmp(pRuntimeEnv->env, terrno);
    }
3512

3513
    pRuntimeEnv->windowResInfo.curIndex = qstatus.windowIndex;
3514 3515
    setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
    pRuntimeEnv->scanFlag = REPEAT_SCAN;
3516

3517
    qDebug("QInfo:%p start to repeat scan data blocks due to query func required, qrange:%"PRId64"-%"PRId64, pQInfo,
3518
        cond.twindow.skey, cond.twindow.ekey);
3519

3520
    // check if query is killed or not
H
Haojun Liao 已提交
3521
    if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
3522 3523
      finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
3524 3525
    }
  }
3526

H
hjxilinx 已提交
3527
  if (!needReverseScan(pQuery)) {
3528 3529
    return;
  }
3530

3531
  setEnvBeforeReverseScan(pRuntimeEnv, &qstatus);
3532

3533
  // reverse scan from current position
3534
  qDebug("QInfo:%p start to reverse scan", pQInfo);
3535
  doScanAllDataBlocks(pRuntimeEnv);
3536 3537

  clearEnvAfterReverseScan(pRuntimeEnv, &qstatus);
3538 3539
}

H
hjxilinx 已提交
3540
void finalizeQueryResult(SQueryRuntimeEnv *pRuntimeEnv) {
3541
  SQuery *pQuery = pRuntimeEnv->pQuery;
3542

H
Haojun Liao 已提交
3543
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3544 3545
    // for each group result, call the finalize function for each column
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
3546
    if (pRuntimeEnv->groupbyNormalCol) {
3547 3548
      closeAllTimeWindow(pWindowResInfo);
    }
3549

3550 3551 3552 3553 3554
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *buf = &pWindowResInfo->pResult[i];
      if (!isWindowResClosed(pWindowResInfo, i)) {
        continue;
      }
3555

3556
      setWindowResOutputBuf(pRuntimeEnv, buf);
3557

3558
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3559
        aAggs[pQuery->pSelectExpr[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3560
      }
3561

3562 3563 3564 3565
      /*
       * set the number of output results for group by normal columns, the number of output rows usually is 1 except
       * the top and bottom query
       */
S
TD-1057  
Shengliang Guan 已提交
3566
      buf->numOfRows = (uint16_t)getNumOfResult(pRuntimeEnv);
3567
    }
3568

3569
  } else {
3570
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3571
      aAggs[pQuery->pSelectExpr[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3572 3573 3574 3575 3576
    }
  }
}

static bool hasMainOutput(SQuery *pQuery) {
3577
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3578
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3579

3580 3581 3582 3583
    if (functionId != TSDB_FUNC_TS && functionId != TSDB_FUNC_TAG && functionId != TSDB_FUNC_TAGPRJ) {
      return true;
    }
  }
3584

3585 3586 3587
  return false;
}

H
Haojun Liao 已提交
3588
static STableQueryInfo *createTableQueryInfo(SQueryRuntimeEnv *pRuntimeEnv, void* pTable, STimeWindow win, void* buf) {
H
Haojun Liao 已提交
3589
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
3590

H
Haojun Liao 已提交
3591
  STableQueryInfo *pTableQueryInfo = buf;
3592

H
hjxilinx 已提交
3593 3594
  pTableQueryInfo->win = win;
  pTableQueryInfo->lastKey = win.skey;
3595

3596
  pTableQueryInfo->pTable = pTable;
3597
  pTableQueryInfo->cur.vgroupIndex = -1;
3598

H
Haojun Liao 已提交
3599 3600
  // set more initial size of interval/groupby query
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyNormalCol) {
H
Haojun Liao 已提交
3601
    int32_t initialSize = 16;
H
Haojun Liao 已提交
3602
    int32_t initialThreshold = 100;
B
Bomin Zhang 已提交
3603 3604 3605 3606
    int32_t code = initWindowResInfo(&pTableQueryInfo->windowResInfo, pRuntimeEnv, initialSize, initialThreshold, TSDB_DATA_TYPE_INT);
    if (code != TSDB_CODE_SUCCESS) {
      return NULL;
    }
H
Haojun Liao 已提交
3607
  } else { // in other aggregate query, do not initialize the windowResInfo
H
Haojun Liao 已提交
3608 3609
  }

3610 3611 3612
  return pTableQueryInfo;
}

H
Haojun Liao 已提交
3613
void destroyTableQueryInfo(STableQueryInfo *pTableQueryInfo) {
3614 3615 3616
  if (pTableQueryInfo == NULL) {
    return;
  }
3617

H
Haojun Liao 已提交
3618
  cleanupTimeWindowInfo(&pTableQueryInfo->windowResInfo);
3619 3620 3621 3622 3623
}

/**
 * set output buffer for different group
 * @param pRuntimeEnv
3624
 * @param pDataBlockInfo
3625
 */
H
Haojun Liao 已提交
3626
void setExecutionContext(SQInfo *pQInfo, int32_t groupIndex, TSKEY nextKey) {
3627
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
Haojun Liao 已提交
3628 3629 3630
  STableQueryInfo  *pTableQueryInfo = pRuntimeEnv->pQuery->current;
  SWindowResInfo   *pWindowResInfo = &pRuntimeEnv->windowResInfo;

H
Haojun Liao 已提交
3631 3632
  // lastKey needs to be updated
  pTableQueryInfo->lastKey = nextKey;
H
Haojun Liao 已提交
3633 3634 3635 3636

  if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
    setAdditionalInfo(pQInfo, pTableQueryInfo->pTable, pTableQueryInfo);
  }
H
Haojun Liao 已提交
3637

H
Haojun Liao 已提交
3638 3639 3640
  if (pRuntimeEnv->prevGroupId != INT32_MIN && pRuntimeEnv->prevGroupId == groupIndex) {
    return;
  }
3641

3642 3643
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&groupIndex,
      sizeof(groupIndex), true);
3644 3645 3646
  if (pWindowRes == NULL) {
    return;
  }
3647

3648 3649 3650 3651 3652
  /*
   * not assign result buffer yet, add new result buffer
   * all group belong to one result set, and each group result has different group id so set the id to be one
   */
  if (pWindowRes->pos.pageId == -1) {
3653
    if (addNewWindowResultBuf(pWindowRes, pRuntimeEnv->pResultBuf, groupIndex, pRuntimeEnv->numOfRowsPerPage) !=
3654 3655 3656 3657
        TSDB_CODE_SUCCESS) {
      return;
    }
  }
3658

H
Haojun Liao 已提交
3659 3660
  // record the current active group id
  pRuntimeEnv->prevGroupId = groupIndex;
3661 3662 3663 3664
  setWindowResOutputBuf(pRuntimeEnv, pWindowRes);
  initCtxOutputBuf(pRuntimeEnv);
}

H
Haojun Liao 已提交
3665
void setWindowResOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult) {
3666
  SQuery *pQuery = pRuntimeEnv->pQuery;
3667

3668
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
H
Haojun Liao 已提交
3669 3670
  tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pos.pageId);

3671
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3672
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
H
Haojun Liao 已提交
3673
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult, page);
3674

3675
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3676 3677 3678
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3679

3680 3681 3682 3683 3684
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT
     */
    pCtx->resultInfo = &pResult->resultInfo[i];
3685

3686 3687 3688 3689 3690 3691
    // set super table query flag
    SResultInfo *pResInfo = GET_RES_INFO(pCtx);
    pResInfo->superTableQ = pRuntimeEnv->stableQuery;
  }
}

H
Haojun Liao 已提交
3692 3693
void setWindowResOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3694

H
Haojun Liao 已提交
3695
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
H
Haojun Liao 已提交
3696 3697
  tFilePage* bufPage = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pos.pageId);

H
Haojun Liao 已提交
3698 3699 3700 3701
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];

    pCtx->resultInfo = &pResult->resultInfo[i];
H
Haojun Liao 已提交
3702
    if (pCtx->resultInfo->initialized && pCtx->resultInfo->complete) {
H
Haojun Liao 已提交
3703 3704
      continue;
    }
3705

H
Haojun Liao 已提交
3706
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult, bufPage);
H
Haojun Liao 已提交
3707
    pCtx->currentStage = 0;
3708

H
Haojun Liao 已提交
3709 3710 3711 3712
    int32_t functionId = pCtx->functionId;
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3713

H
Haojun Liao 已提交
3714 3715 3716 3717 3718
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT
     */
    pCtx->resultInfo->superTableQ = pRuntimeEnv->stableQuery;     // set super table query flag
3719

H
Haojun Liao 已提交
3720 3721 3722 3723 3724 3725
    if (!pCtx->resultInfo->initialized) {
      aAggs[functionId].init(pCtx);
    }
  }
}

3726
int32_t setAdditionalInfo(SQInfo *pQInfo, void* pTable, STableQueryInfo *pTableQueryInfo) {
3727
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
3728

3729
  setTagVal(pRuntimeEnv, pTable, pQInfo->tsdb);
3730

3731 3732
  // both the master and supplement scan needs to set the correct ts comp start position
  if (pRuntimeEnv->pTSBuf != NULL) {
3733
    if (pTableQueryInfo->cur.vgroupIndex == -1) {
3734 3735
      tVariantAssign(&pTableQueryInfo->tag, &pRuntimeEnv->pCtx[0].tag);
      tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, 0, &pTableQueryInfo->tag);
3736

3737 3738 3739 3740 3741 3742
      // keep the cursor info of current meter
      pTableQueryInfo->cur = pRuntimeEnv->pTSBuf->cur;
    } else {
      tsBufSetCursor(pRuntimeEnv->pTSBuf, &pTableQueryInfo->cur);
    }
  }
3743

3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755
  return 0;
}

/*
 * There are two cases to handle:
 *
 * 1. Query range is not set yet (queryRangeSet = 0). we need to set the query range info, including pQuery->lastKey,
 *    pQuery->window.skey, and pQuery->eKey.
 * 2. Query range is set and query is in progress. There may be another result with the same query ranges to be
 *    merged during merge stage. In this case, we need the pTableQueryInfo->lastResRows to decide if there
 *    is a previous result generated or not.
 */
H
hjxilinx 已提交
3756
void setIntervalQueryRange(SQInfo *pQInfo, TSKEY key) {
3757 3758
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3759
  STableQueryInfo *pTableQueryInfo = pQuery->current;
3760

3761 3762 3763
  if (pTableQueryInfo->queryRangeSet) {
    pTableQueryInfo->lastKey = key;
  } else {
3764
    pTableQueryInfo->win.skey = key;
3765
    STimeWindow win = {.skey = key, .ekey = pQuery->window.ekey};
3766

3767 3768 3769 3770 3771
    // for too small query range, no data in this interval.
    if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey < pQuery->window.skey)) ||
        (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey < pQuery->window.ekey))) {
      return;
    }
3772

3773 3774 3775 3776 3777 3778
    /**
     * In handling the both ascending and descending order super table query, we need to find the first qualified
     * timestamp of this table, and then set the first qualified start timestamp.
     * In ascending query, key is the first qualified timestamp. However, in the descending order query, additional
     * operations involve.
     */
H
Haojun Liao 已提交
3779
    STimeWindow     w = TSWINDOW_INITIALIZER;
3780
    SWindowResInfo *pWindowResInfo = &pTableQueryInfo->windowResInfo;
3781

H
Haojun Liao 已提交
3782 3783
    TSKEY sk = MIN(win.skey, win.ekey);
    TSKEY ek = MAX(win.skey, win.ekey);
H
Haojun Liao 已提交
3784
    getAlignQueryTimeWindow(pQuery, win.skey, sk, ek, &w);
3785
    pWindowResInfo->startTime = pTableQueryInfo->win.skey;  // windowSKey may be 0 in case of 1970 timestamp
3786

3787 3788
    if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
3789
        assert(win.ekey == pQuery->window.ekey);
3790
      }
3791

3792
      pWindowResInfo->prevSKey = w.skey;
3793
    }
3794

3795
    pTableQueryInfo->queryRangeSet = 1;
3796
    pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
3797 3798 3799 3800
  }
}

bool requireTimestamp(SQuery *pQuery) {
3801
  for (int32_t i = 0; i < pQuery->numOfOutput; i++) {
3802
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815
    if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_NEED_TS) != 0) {
      return true;
    }
  }
  return false;
}

bool needPrimaryTimestampCol(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo) {
  /*
   * 1. if skey or ekey locates in this block, we need to load the timestamp column to decide the precise position
   * 2. if there are top/bottom, first_dst/last_dst functions, we need to load timestamp column in any cases;
   */
  STimeWindow *w = &pDataBlockInfo->window;
H
hjxilinx 已提交
3816
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3817

H
hjxilinx 已提交
3818
  bool loadPrimaryTS = (pTableQueryInfo->lastKey >= w->skey && pTableQueryInfo->lastKey <= w->ekey) ||
3819 3820
                       (pQuery->window.ekey >= w->skey && pQuery->window.ekey <= w->ekey) || requireTimestamp(pQuery);

3821 3822 3823
  return loadPrimaryTS;
}

3824
static int32_t doCopyToSData(SQInfo *pQInfo, SWindowResInfo *pResultInfo, int32_t orderType) {
3825 3826
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
3827

3828 3829 3830
  int32_t numOfResult = 0;
  int32_t startIdx = 0;
  int32_t step = -1;
3831

3832
  qDebug("QInfo:%p start to copy data from windowResInfo to query buf", pQInfo);
3833 3834
  int32_t totalSet = numOfClosedTimeWindow(pResultInfo);
  SWindowResult* result = pResultInfo->pResult;
3835

3836
  if (orderType == TSDB_ORDER_ASC) {
3837
    startIdx = pQInfo->groupIndex;
3838 3839
    step = 1;
  } else {  // desc order copy all data
3840
    startIdx = totalSet - pQInfo->groupIndex - 1;
3841 3842
    step = -1;
  }
3843

H
Haojun Liao 已提交
3844 3845
  SGroupResInfo* pGroupResInfo = &pQInfo->groupResInfo;

3846
  for (int32_t i = startIdx; (i < totalSet) && (i >= 0); i += step) {
3847
    if (result[i].numOfRows == 0) {
3848
      pQInfo->groupIndex += 1;
H
Haojun Liao 已提交
3849
      pGroupResInfo->pos.rowId = 0;
3850 3851
      continue;
    }
3852

H
Haojun Liao 已提交
3853 3854
    int32_t numOfRowsToCopy = result[i].numOfRows - pGroupResInfo->pos.rowId;
    int32_t oldOffset = pGroupResInfo->pos.rowId;
3855

3856
    /*
H
Haojun Liao 已提交
3857 3858
     * current output space is not enough to accommodate all data of this page, only partial results
     * will be copied to SQuery object's result buffer
3859
     */
3860
    if (numOfRowsToCopy > pQuery->rec.capacity - numOfResult) {
H
Haojun Liao 已提交
3861 3862
      numOfRowsToCopy = (int32_t) pQuery->rec.capacity - numOfResult;
      pGroupResInfo->pos.rowId += numOfRowsToCopy;
3863
    } else {
H
Haojun Liao 已提交
3864
      pGroupResInfo->pos.rowId = 0;
3865
      pQInfo->groupIndex += 1;
3866
    }
3867

H
Haojun Liao 已提交
3868 3869
    tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, result[i].pos.pageId);

3870
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3871
      int32_t size = pRuntimeEnv->pCtx[j].outputBytes;
3872

3873
      char *out = pQuery->sdata[j]->data + numOfResult * size;
H
Haojun Liao 已提交
3874
      char *in = getPosInResultPage(pRuntimeEnv, j, &result[i], page);
3875 3876
      memcpy(out, in + oldOffset * size, size * numOfRowsToCopy);
    }
3877

3878
    numOfResult += numOfRowsToCopy;
3879 3880 3881
    if (numOfResult == pQuery->rec.capacity) {
      break;
    }
3882
  }
3883

3884
  qDebug("QInfo:%p copy data to query buf completed", pQInfo);
3885 3886

#ifdef _DEBUG_VIEW
3887
  displayInterResult(pQuery->sdata, pRuntimeEnv, numOfResult);
3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900
#endif
  return numOfResult;
}

/**
 * copyFromWindowResToSData support copy data in ascending/descending order
 * For interval query of both super table and table, copy the data in ascending order, since the output results are
 * ordered in SWindowResutl already. While handling the group by query for both table and super table,
 * all group result are completed already.
 *
 * @param pQInfo
 * @param result
 */
3901
void copyFromWindowResToSData(SQInfo *pQInfo, SWindowResInfo *pResultInfo) {
3902
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
3903

3904
  int32_t orderType = (pQuery->pGroupbyExpr != NULL) ? pQuery->pGroupbyExpr->orderType : TSDB_ORDER_ASC;
3905
  int32_t numOfResult = doCopyToSData(pQInfo, pResultInfo, orderType);
3906

3907
  pQuery->rec.rows += numOfResult;
3908

3909
  assert(pQuery->rec.rows <= pQuery->rec.capacity);
3910 3911
}

H
Haojun Liao 已提交
3912
static void updateWindowResNumOfRes(SQueryRuntimeEnv *pRuntimeEnv) {
3913
  SQuery *pQuery = pRuntimeEnv->pQuery;
3914

3915
  // update the number of result for each, only update the number of rows for the corresponding window result.
H
Haojun Liao 已提交
3916 3917 3918
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
    return;
  }
3919

H
Haojun Liao 已提交
3920 3921
  for (int32_t i = 0; i < pRuntimeEnv->windowResInfo.size; ++i) {
    SWindowResult *pResult = &pRuntimeEnv->windowResInfo.pResult[i];
3922

H
Haojun Liao 已提交
3923 3924 3925 3926
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
      int32_t functionId = pRuntimeEnv->pCtx[j].functionId;
      if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
        continue;
3927
      }
H
Haojun Liao 已提交
3928

3929
      pResult->numOfRows = (uint16_t)(MAX(pResult->numOfRows, pResult->resultInfo[j].numOfRes));
3930 3931 3932 3933
    }
  }
}

H
Haojun Liao 已提交
3934
static void stableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo, SDataStatis *pStatis,
3935
    SArray *pDataBlock, __block_search_fn_t searchFn) {
3936
  SQuery *         pQuery = pRuntimeEnv->pQuery;
3937
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3938

3939
  SWindowResInfo * pWindowResInfo = &pTableQueryInfo->windowResInfo;
H
hjxilinx 已提交
3940
  pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : pDataBlockInfo->rows - 1;
3941

H
Haojun Liao 已提交
3942
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->groupbyNormalCol) {
3943
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
3944
  } else {
3945
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
3946 3947 3948
  }
}

H
Haojun Liao 已提交
3949
bool queryHasRemainResForTableQuery(SQueryRuntimeEnv* pRuntimeEnv) {
3950 3951
  SQuery *pQuery = pRuntimeEnv->pQuery;
  SFillInfo *pFillInfo = pRuntimeEnv->pFillInfo;
3952

H
Haojun Liao 已提交
3953
  if (pQuery->limit.limit > 0 && pQuery->rec.total >= pQuery->limit.limit) {
3954 3955
    return false;
  }
3956

3957
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
H
Haojun Liao 已提交
3958
    // There are results not returned to client yet, so filling applied to the remain result is required firstly.
H
Haojun Liao 已提交
3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973
    int32_t remain = taosNumOfRemainRows(pFillInfo);
    if (remain > 0) {
      return true;
    }

    /*
     * While the code reaches here, there are no results remains now.
     * If query is not completed yet, the gaps between two results blocks need to be handled after next data block
     * is retrieved from TSDB.
     *
     * NOTE: If the result set is not the first block, the gap in front of the result set will be filled. If the result
     * set is the FIRST result block, the gap between the start time of query time window and the timestamp of the
     * first result row in the actual result set will fill nothing.
     */
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
S
TD-1057  
Shengliang Guan 已提交
3974
      int32_t numOfTotal = (int32_t)getFilledNumOfRes(pFillInfo, pQuery->window.ekey, (int32_t)pQuery->rec.capacity);
H
Haojun Liao 已提交
3975 3976 3977 3978 3979 3980
      return numOfTotal > 0;
    }

  } else {
    // there are results waiting for returned to client.
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED) &&
H
Haojun Liao 已提交
3981
        (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) &&
H
Haojun Liao 已提交
3982 3983 3984
        (pRuntimeEnv->windowResInfo.size > 0)) {
      return true;
    }
3985
  }
3986 3987

  return false;
3988 3989 3990
}

static void doCopyQueryResultToMsg(SQInfo *pQInfo, int32_t numOfRows, char *data) {
3991
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
3992

3993 3994
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
    int32_t bytes = pQuery->pSelectExpr[col].bytes;
3995

3996 3997 3998
    memmove(data, pQuery->sdata[col]->data, bytes * numOfRows);
    data += bytes * numOfRows;
  }
3999

weixin_48148422's avatar
weixin_48148422 已提交
4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011
  int32_t numOfTables = (int32_t)taosArrayGetSize(pQInfo->arrTableIdInfo);
  *(int32_t*)data = htonl(numOfTables);
  data += sizeof(int32_t);
  for(int32_t i = 0; i < numOfTables; i++) {
    STableIdInfo* pSrc = taosArrayGet(pQInfo->arrTableIdInfo, i);
    STableIdInfo* pDst = (STableIdInfo*)data;
    pDst->uid = htobe64(pSrc->uid);
    pDst->tid = htonl(pSrc->tid);
    pDst->key = htobe64(pSrc->key);
    data += sizeof(STableIdInfo);
  }

H
Haojun Liao 已提交
4012
  // Check if query is completed or not for stable query or normal table query respectively.
H
hjxilinx 已提交
4013
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4014
    if (pQInfo->runtimeEnv.stableQuery) {
H
Haojun Liao 已提交
4015
      if (IS_STASBLE_QUERY_OVER(pQInfo)) {
4016 4017 4018
        setQueryStatus(pQuery, QUERY_OVER);
      }
    } else {
H
Haojun Liao 已提交
4019
      if (!queryHasRemainResForTableQuery(&pQInfo->runtimeEnv)) {
4020 4021
        setQueryStatus(pQuery, QUERY_OVER);
      }
4022
    }
H
hjxilinx 已提交
4023
  }
4024 4025
}

H
Haojun Liao 已提交
4026
int32_t doFillGapsInResults(SQueryRuntimeEnv* pRuntimeEnv, tFilePage **pDst, int32_t *numOfFilled) {
4027
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
4028
  SQuery *pQuery = pRuntimeEnv->pQuery;
4029
  SFillInfo* pFillInfo = pRuntimeEnv->pFillInfo;
4030

4031
  while (1) {
S
TD-1057  
Shengliang Guan 已提交
4032
    int32_t ret = (int32_t)taosGenerateDataBlock(pFillInfo, (tFilePage**)pQuery->sdata, (int32_t)pQuery->rec.capacity);
4033

4034
    // todo apply limit output function
4035 4036
    /* reached the start position of according to offset value, return immediately */
    if (pQuery->limit.offset == 0) {
4037
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows", pQInfo, pFillInfo->numOfRows, ret);
4038 4039
      return ret;
    }
4040

4041
    if (pQuery->limit.offset < ret) {
4042
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%" PRId64 ". Discard due to offset, remain:%" PRId64 ", new offset:%d",
4043
             pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, ret - pQuery->limit.offset, 0);
4044

S
TD-1057  
Shengliang Guan 已提交
4045
      ret -= (int32_t)pQuery->limit.offset;
4046 4047
      // todo !!!!there exactly number of interpo is not valid.
      // todo refactor move to the beginning of buffer
4048 4049 4050
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        memmove(pDst[i]->data, pDst[i]->data + pQuery->pSelectExpr[i].bytes * pQuery->limit.offset,
                ret * pQuery->pSelectExpr[i].bytes);
4051
      }
4052

4053 4054 4055
      pQuery->limit.offset = 0;
      return ret;
    } else {
4056
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%" PRId64 ". Discard due to offset, "
B
Bomin Zhang 已提交
4057
             "remain:%d, new offset:%" PRId64, pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, 0,
4058
          pQuery->limit.offset - ret);
4059

4060
      pQuery->limit.offset -= ret;
4061
      pQuery->rec.rows = 0;
4062 4063
      ret = 0;
    }
4064

H
Haojun Liao 已提交
4065
    if (!queryHasRemainResForTableQuery(pRuntimeEnv)) {
4066 4067 4068 4069 4070
      return ret;
    }
  }
}

4071
static void queryCostStatis(SQInfo *pQInfo) {
4072
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4073
  SQueryCostInfo *pSummary = &pRuntimeEnv->summary;
H
Haojun Liao 已提交
4074

H
Haojun Liao 已提交
4075 4076 4077
  // add the merge time
  pSummary->elapsedTime += pSummary->firstStageMergeTime;

H
Haojun Liao 已提交
4078 4079 4080
  qDebug("QInfo:%p :cost summary: elapsed time:%"PRId64" us, first merge:%"PRId64" us, total blocks:%d, "
         "load block statis:%d, load data block:%d, total rows:%"PRId64 ", check rows:%"PRId64,
         pQInfo, pSummary->elapsedTime, pSummary->firstStageMergeTime, pSummary->totalBlocks, pSummary->loadBlockStatis,
H
Haojun Liao 已提交
4081
         pSummary->loadBlocks, pSummary->totalRows, pSummary->totalCheckedRows);
4082

H
Haojun Liao 已提交
4083
  qDebug("QInfo:%p :cost summary: internal size:%"PRId64"B, numOfWin:%"PRId64, pQInfo, pSummary->internalSupSize,
4084
      pSummary->numOfTimeWindows);
4085 4086
}

4087 4088
static void updateOffsetVal(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pBlockInfo) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
4089
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4090

4091
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
4092

4093
  if (pQuery->limit.offset == pBlockInfo->rows) {  // current block will ignore completed
H
hjxilinx 已提交
4094
    pTableQueryInfo->lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->window.ekey + step : pBlockInfo->window.skey + step;
4095 4096 4097
    pQuery->limit.offset = 0;
    return;
  }
4098

4099
  if (QUERY_IS_ASC_QUERY(pQuery)) {
S
TD-1057  
Shengliang Guan 已提交
4100
    pQuery->pos = (int32_t)pQuery->limit.offset;
4101
  } else {
S
TD-1057  
Shengliang Guan 已提交
4102
    pQuery->pos = pBlockInfo->rows - (int32_t)pQuery->limit.offset - 1;
4103
  }
4104

4105
  assert(pQuery->pos >= 0 && pQuery->pos <= pBlockInfo->rows - 1);
4106

4107
  SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
4108
  SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);
4109

4110
  // update the pQuery->limit.offset value, and pQuery->pos value
H
Haojun Liao 已提交
4111
  TSKEY *keys = (TSKEY *) pColInfoData->pData;
4112 4113

  // update the offset value
H
hjxilinx 已提交
4114
  pTableQueryInfo->lastKey = keys[pQuery->pos];
4115
  pQuery->limit.offset = 0;
4116

H
hjxilinx 已提交
4117
  int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, pBlockInfo, NULL, binarySearchForKey, pDataBlock);
4118

4119
  qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
4120
         pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows, numOfRes, pQuery->current->lastKey);
4121
}
4122

4123 4124 4125 4126 4127
void skipBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;

  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0) {
    return;
4128
  }
4129

4130 4131 4132
  pQuery->pos = 0;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

H
hjxilinx 已提交
4133
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4134
  TsdbQueryHandleT pQueryHandle = pRuntimeEnv->pQueryHandle;
4135

H
Haojun Liao 已提交
4136
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4137
  while (tsdbNextDataBlock(pQueryHandle)) {
H
Haojun Liao 已提交
4138
    if (IS_QUERY_KILLED(GET_QINFO_ADDR(pRuntimeEnv))) {
H
Haojun Liao 已提交
4139 4140
      finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4141
    }
4142

H
Haojun Liao 已提交
4143
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
4144

4145 4146
    if (pQuery->limit.offset > blockInfo.rows) {
      pQuery->limit.offset -= blockInfo.rows;
H
hjxilinx 已提交
4147 4148
      pTableQueryInfo->lastKey = (QUERY_IS_ASC_QUERY(pQuery)) ? blockInfo.window.ekey : blockInfo.window.skey;
      pTableQueryInfo->lastKey += step;
4149

4150
      qDebug("QInfo:%p skip rows:%d, offset:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv), blockInfo.rows,
4151 4152
             pQuery->limit.offset);
    } else {  // find the appropriated start position in current block
4153 4154 4155
      updateOffsetVal(pRuntimeEnv, &blockInfo);
      break;
    }
4156
  }
H
Haojun Liao 已提交
4157 4158 4159 4160

  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }
4161
}
4162

H
Haojun Liao 已提交
4163
static bool skipTimeInterval(SQueryRuntimeEnv *pRuntimeEnv, TSKEY* start) {
4164
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
4165
  *start = pQuery->current->lastKey;
4166

4167
  // if queried with value filter, do NOT forward query start position
4168
  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->pFillInfo != NULL) {
4169
    return true;
4170
  }
4171

4172 4173 4174 4175 4176
  /*
   * 1. for interval without interpolation query we forward pQuery->intervalTime at a time for
   *    pQuery->limit.offset times. Since hole exists, pQuery->intervalTime*pQuery->limit.offset value is
   *    not valid. otherwise, we only forward pQuery->limit.offset number of points
   */
4177
  assert(pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL);
4178

H
Haojun Liao 已提交
4179
  STimeWindow w = TSWINDOW_INITIALIZER;
4180

4181
  SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
hjxilinx 已提交
4182
  STableQueryInfo *pTableQueryInfo = pQuery->current;
4183

H
Haojun Liao 已提交
4184
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4185
  while (tsdbNextDataBlock(pRuntimeEnv->pQueryHandle)) {
H
Haojun Liao 已提交
4186
    tsdbRetrieveDataBlockInfo(pRuntimeEnv->pQueryHandle, &blockInfo);
4187

H
Haojun Liao 已提交
4188 4189
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
H
Haojun Liao 已提交
4190
        getAlignQueryTimeWindow(pQuery, blockInfo.window.skey, blockInfo.window.skey, pQuery->window.ekey, &w);
H
Haojun Liao 已提交
4191 4192 4193
        pWindowResInfo->startTime = w.skey;
        pWindowResInfo->prevSKey = w.skey;
      }
4194
    } else {
H
Haojun Liao 已提交
4195
      getAlignQueryTimeWindow(pQuery, blockInfo.window.ekey, pQuery->window.ekey, blockInfo.window.ekey, &w);
4196

4197 4198 4199
      pWindowResInfo->startTime = pQuery->window.skey;
      pWindowResInfo->prevSKey = w.skey;
    }
4200

4201 4202
    // the first time window
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, pWindowResInfo->prevSKey, pQuery);
4203

4204 4205 4206 4207 4208 4209
    while (pQuery->limit.offset > 0) {
      if ((win.ekey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (win.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        pQuery->limit.offset -= 1;
        pWindowResInfo->prevSKey = win.skey;
      }
4210

4211
      STimeWindow tw = win;
H
Haojun Liao 已提交
4212
      GET_NEXT_TIMEWINDOW(pQuery, &tw);
4213

4214
      if (pQuery->limit.offset == 0) {
4215 4216
        if ((tw.skey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
            (tw.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
H
Haojun Liao 已提交
4217 4218
          // load the data block and check data remaining in current data block
          // TODO optimize performance
4219 4220 4221
          SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
          SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

4222 4223
          tw = win;
          int32_t startPos =
H
Haojun Liao 已提交
4224
              getNextQualifiedWindow(pRuntimeEnv, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey, -1);
4225 4226 4227 4228
          assert(startPos >= 0);

          // set the abort info
          pQuery->pos = startPos;
4229

H
Haojun Liao 已提交
4230 4231 4232 4233
          // reset the query start timestamp
          pTableQueryInfo->win.skey = ((TSKEY *)pColInfoData->pData)[startPos];
          pQuery->window.skey = pTableQueryInfo->win.skey;
          *start = pTableQueryInfo->win.skey;
4234

4235
          pWindowResInfo->prevSKey = tw.skey;
H
Haojun Liao 已提交
4236
          int32_t index = pRuntimeEnv->windowResInfo.curIndex;
4237

H
hjxilinx 已提交
4238
          int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, NULL, binarySearchForKey, pDataBlock);
H
Haojun Liao 已提交
4239
          pRuntimeEnv->windowResInfo.curIndex = index;  // restore the window index
4240

4241
          qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64,
4242
                 GET_QINFO_ADDR(pRuntimeEnv), blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes, pQuery->current->lastKey);
4243

4244
          return true;
H
Haojun Liao 已提交
4245 4246 4247 4248
        } else { // do nothing
          *start = tw.skey;
          pQuery->window.skey = tw.skey;
          pWindowResInfo->prevSKey = tw.skey;
4249
          return true;
4250 4251 4252
        }
      }

H
Haojun Liao 已提交
4253 4254 4255 4256 4257 4258 4259
      /*
       * If the next time window still starts from current data block,
       * load the primary timestamp column first, and then find the start position for the next queried time window.
       * Note that only the primary timestamp column is required.
       * TODO: Optimize for this cases. All data blocks are not needed to be loaded, only if the first actually required
       * time window resides in current data block.
       */
4260 4261 4262 4263 4264 4265 4266
      if ((tw.skey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (tw.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
        SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

        tw = win;
        int32_t startPos =
H
Haojun Liao 已提交
4267
            getNextQualifiedWindow(pRuntimeEnv, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey, -1);
4268 4269 4270 4271
        assert(startPos >= 0);

        // set the abort info
        pQuery->pos = startPos;
H
hjxilinx 已提交
4272
        pTableQueryInfo->lastKey = ((TSKEY *)pColInfoData->pData)[startPos];
4273 4274
        pWindowResInfo->prevSKey = tw.skey;
        win = tw;
4275
      } else {
H
Haojun Liao 已提交
4276
        break;  // offset is not 0, and next time window begins or ends in the next block.
4277 4278 4279
      }
    }
  }
4280

H
Haojun Liao 已提交
4281 4282 4283 4284 4285
  // check for error
  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }

4286 4287 4288
  return true;
}

B
Bomin Zhang 已提交
4289
static int32_t setupQueryHandle(void* tsdb, SQInfo* pQInfo, bool isSTableQuery) {
B
Bomin Zhang 已提交
4290
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4291 4292
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

B
Bomin Zhang 已提交
4293
  if (onlyQueryTags(pQuery)) {
B
Bomin Zhang 已提交
4294
    return TSDB_CODE_SUCCESS;
B
Bomin Zhang 已提交
4295 4296
  }

H
Haojun Liao 已提交
4297
  if (isSTableQuery && (!QUERY_IS_INTERVAL_QUERY(pQuery)) && (!isFixedOutputQuery(pRuntimeEnv))) {
B
Bomin Zhang 已提交
4298
    return TSDB_CODE_SUCCESS;
B
Bomin Zhang 已提交
4299
  }
4300 4301

  STsdbQueryCond cond = {
B
Bomin Zhang 已提交
4302 4303 4304
    .order   = pQuery->order.order,
    .colList = pQuery->colList,
    .numOfCols = pQuery->numOfCols,
4305
  };
weixin_48148422's avatar
weixin_48148422 已提交
4306

S
TD-1057  
Shengliang Guan 已提交
4307 4308
  TIME_WINDOW_COPY(cond.twindow, pQuery->window);

B
Bomin Zhang 已提交
4309
  if (!isSTableQuery
4310
    && (pQInfo->tableqinfoGroupInfo.numOfTables == 1)
4311
    && (cond.order == TSDB_ORDER_ASC)
H
Haojun Liao 已提交
4312
    && (!QUERY_IS_INTERVAL_QUERY(pQuery))
B
Bomin Zhang 已提交
4313
    && (!isGroupbyNormalCol(pQuery->pGroupbyExpr))
H
Haojun Liao 已提交
4314
    && (!isFixedOutputQuery(pRuntimeEnv))
B
Bomin Zhang 已提交
4315
  ) {
H
Haojun Liao 已提交
4316
    SArray* pa = GET_TABLEGROUP(pQInfo, 0);
4317 4318
    STableQueryInfo* pCheckInfo = taosArrayGetP(pa, 0);
    cond.twindow = pCheckInfo->win;
4319
  }
B
Bomin Zhang 已提交
4320

B
Bomin Zhang 已提交
4321
  terrno = TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
4322
  if (isFirstLastRowQuery(pQuery)) {
4323
    pRuntimeEnv->pQueryHandle = tsdbQueryLastRow(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
H
Haojun Liao 已提交
4324 4325 4326 4327

    // update the query time window
    pQuery->window = cond.twindow;

H
Haojun Liao 已提交
4328
    size_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
H
Haojun Liao 已提交
4329 4330 4331 4332 4333 4334 4335
    for(int32_t i = 0; i < numOfGroups; ++i) {
      SArray *group = GET_TABLEGROUP(pQInfo, i);

      size_t t = taosArrayGetSize(group);
      for (int32_t j = 0; j < t; ++j) {
        STableQueryInfo *pCheckInfo = taosArrayGetP(group, j);

H
Haojun Liao 已提交
4336 4337
        pCheckInfo->win = pQuery->window;
        pCheckInfo->lastKey = pCheckInfo->win.skey;
H
Haojun Liao 已提交
4338 4339
      }
    }
4340
  } else if (isPointInterpoQuery(pQuery)) {
4341
    pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
H
Haojun Liao 已提交
4342
  } else {
4343
    pRuntimeEnv->pQueryHandle = tsdbQueryTables(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
H
Haojun Liao 已提交
4344
  }
4345

B
Bomin Zhang 已提交
4346
  return terrno;
B
Bomin Zhang 已提交
4347 4348
}

4349 4350 4351
static SFillColInfo* taosCreateFillColInfo(SQuery* pQuery) {
  int32_t numOfCols = pQuery->numOfOutput;
  int32_t offset = 0;
4352

4353
  SFillColInfo* pFillCol = calloc(numOfCols, sizeof(SFillColInfo));
H
Haojun Liao 已提交
4354 4355 4356 4357
  if (pFillCol == NULL) {
    return NULL;
  }

4358 4359
  for(int32_t i = 0; i < numOfCols; ++i) {
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];
4360

4361
    pFillCol[i].col.bytes  = pExprInfo->bytes;
S
TD-1057  
Shengliang Guan 已提交
4362
    pFillCol[i].col.type   = (int8_t)pExprInfo->type;
4363 4364 4365
    pFillCol[i].col.offset = offset;
    pFillCol[i].flag       = TSDB_COL_NORMAL;    // always be ta normal column for table query
    pFillCol[i].functionId = pExprInfo->base.functionId;
4366
    pFillCol[i].fillVal.i = pQuery->fillVal[i];
4367

4368 4369
    offset += pExprInfo->bytes;
  }
4370

4371 4372 4373
  return pFillCol;
}

4374
int32_t doInitQInfo(SQInfo *pQInfo, STSBuf *pTsBuf, void *tsdb, int32_t vgId, bool isSTableQuery) {
4375 4376
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;

4377
  int32_t code = TSDB_CODE_SUCCESS;
4378
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
4379

4380
  pQuery->precision = tsdbGetCfg(tsdb)->precision;
H
Haojun Liao 已提交
4381 4382
  pRuntimeEnv->topBotQuery = isTopBottomQuery(pQuery);
  pRuntimeEnv->hasTagResults = hasTagValOutput(pQuery);
4383 4384

  setScanLimitationByResultBuffer(pQuery);
H
Haojun Liao 已提交
4385 4386 4387

  // NOTE: pTableCheckInfo need to update the query time range and the lastKey info
  // TODO fixme
H
Haojun Liao 已提交
4388
  changeExecuteScanOrder(pQInfo, false);
4389

B
Bomin Zhang 已提交
4390 4391 4392 4393
  code = setupQueryHandle(tsdb, pQInfo, isSTableQuery);
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }
4394

4395
  pQInfo->tsdb = tsdb;
4396
  pQInfo->vgId = vgId;
4397 4398

  pRuntimeEnv->pQuery = pQuery;
H
Haojun Liao 已提交
4399
  pRuntimeEnv->pTSBuf = pTsBuf;
4400
  pRuntimeEnv->cur.vgroupIndex = -1;
4401
  pRuntimeEnv->stableQuery = isSTableQuery;
H
Haojun Liao 已提交
4402
  pRuntimeEnv->prevGroupId = INT32_MIN;
H
Haojun Liao 已提交
4403
  pRuntimeEnv->groupbyNormalCol = isGroupbyNormalCol(pQuery->pGroupbyExpr);
4404

H
Haojun Liao 已提交
4405
  if (pTsBuf != NULL) {
4406 4407 4408 4409 4410 4411 4412 4413 4414 4415
    int16_t order = (pQuery->order.order == pRuntimeEnv->pTSBuf->tsOrder) ? TSDB_ORDER_ASC : TSDB_ORDER_DESC;
    tsBufSetTraverseOrder(pRuntimeEnv->pTSBuf, order);
  }

  // create runtime environment
  code = setupQueryRuntimeEnv(pRuntimeEnv, pQuery->order.order);
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }

4416 4417 4418
  int32_t ps = DEFAULT_PAGE_SIZE;
  int32_t rowsize = 0;
  getIntermediateBufInfo(pRuntimeEnv, &ps, &rowsize);
H
Haojun Liao 已提交
4419
  int32_t TWOMB = 1024*1024*2;
4420

H
Haojun Liao 已提交
4421
  if (isSTableQuery && !onlyQueryTags(pRuntimeEnv->pQuery)) {
H
Haojun Liao 已提交
4422
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rowsize, ps, TWOMB, pQInfo);
4423 4424 4425 4426
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

H
Haojun Liao 已提交
4427
    if (!QUERY_IS_INTERVAL_QUERY(pQuery)) {
4428
      int16_t type = TSDB_DATA_TYPE_NULL;
4429
      int32_t threshold = 0;
4430

H
Haojun Liao 已提交
4431
      if (pRuntimeEnv->groupbyNormalCol) {  // group by columns not tags;
4432
        type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
4433
        threshold = 4000;
4434 4435
      } else {
        type = TSDB_DATA_TYPE_INT;  // group id
S
TD-1057  
Shengliang Guan 已提交
4436
        threshold = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
4437 4438 4439
        if (threshold < 8) {
          threshold = 8;
        }
4440 4441
      }

4442
      code = initWindowResInfo(&pRuntimeEnv->windowResInfo, pRuntimeEnv, 8, threshold, type);
B
Bomin Zhang 已提交
4443 4444 4445
      if (code != TSDB_CODE_SUCCESS) {
        return code;
      }
4446
    }
H
Haojun Liao 已提交
4447
  } else if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
4448 4449
    int32_t numOfResultRows = getInitialPageNum(pQInfo);
    getIntermediateBufInfo(pRuntimeEnv, &ps, &rowsize);
H
Haojun Liao 已提交
4450
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rowsize, ps, TWOMB, pQInfo);
4451 4452 4453 4454 4455
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

    int16_t type = TSDB_DATA_TYPE_NULL;
H
Haojun Liao 已提交
4456
    if (pRuntimeEnv->groupbyNormalCol) {
4457 4458 4459 4460 4461
      type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
    } else {
      type = TSDB_DATA_TYPE_TIMESTAMP;
    }

4462
    code = initWindowResInfo(&pRuntimeEnv->windowResInfo, pRuntimeEnv, numOfResultRows, 4096, type);
B
Bomin Zhang 已提交
4463 4464 4465
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }
4466 4467
  }

4468
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
4469
    SFillColInfo* pColInfo = taosCreateFillColInfo(pQuery);
H
Haojun Liao 已提交
4470 4471 4472 4473 4474 4475
    STimeWindow w = TSWINDOW_INITIALIZER;

    TSKEY sk = MIN(pQuery->window.skey, pQuery->window.ekey);
    TSKEY ek = MAX(pQuery->window.skey, pQuery->window.ekey);
    getAlignQueryTimeWindow(pQuery, pQuery->window.skey, sk, ek, &w);

S
TD-1057  
Shengliang Guan 已提交
4476 4477
    pRuntimeEnv->pFillInfo = taosInitFillInfo(pQuery->order.order, w.skey, 0, (int32_t)pQuery->rec.capacity, pQuery->numOfOutput,
                                              pQuery->slidingTime, pQuery->slidingTimeUnit, (int8_t)pQuery->precision,
H
Haojun Liao 已提交
4478
                                              pQuery->fillType, pColInfo);
4479
  }
4480

H
Haojun Liao 已提交
4481
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
4482
  return TSDB_CODE_SUCCESS;
4483 4484
}

4485
static void enableExecutionForNextTable(SQueryRuntimeEnv *pRuntimeEnv) {
4486
  SQuery *pQuery = pRuntimeEnv->pQuery;
4487

4488
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
4489 4490 4491 4492 4493 4494 4495
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[i]);
    if (pResInfo != NULL) {
      pResInfo->complete = false;
    }
  }
}

H
Haojun Liao 已提交
4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512
static FORCE_INLINE void setEnvForEachBlock(SQInfo* pQInfo, STableQueryInfo* pTableQueryInfo, SDataBlockInfo* pBlockInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

  if (!QUERY_IS_INTERVAL_QUERY(pQuery)) {
    setExecutionContext(pQInfo, pTableQueryInfo->groupIndex, pBlockInfo->window.ekey + step);
  } else {  // interval query
    TSKEY nextKey = pBlockInfo->window.skey;
    setIntervalQueryRange(pQInfo, nextKey);

    if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
      setAdditionalInfo(pQInfo, pTableQueryInfo->pTable, pTableQueryInfo);
    }
  }
}

H
Haojun Liao 已提交
4513
static int64_t scanMultiTableDataBlocks(SQInfo *pQInfo) {
4514
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
Haojun Liao 已提交
4515 4516
  SQuery*           pQuery = pRuntimeEnv->pQuery;
  SQueryCostInfo*   summary  = &pRuntimeEnv->summary;
4517
  
H
hjxilinx 已提交
4518
  int64_t st = taosGetTimestampMs();
4519

4520
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
H
Haojun Liao 已提交
4521
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4522

H
Haojun Liao 已提交
4523 4524
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

H
Haojun Liao 已提交
4525
  while (tsdbNextDataBlock(pQueryHandle)) {
4526
    summary->totalBlocks += 1;
H
Haojun Liao 已提交
4527
    
H
Haojun Liao 已提交
4528
    if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
4529
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4530
    }
4531

H
Haojun Liao 已提交
4532
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
H
Haojun Liao 已提交
4533 4534 4535 4536
    STableQueryInfo **pTableQueryInfo = (STableQueryInfo**) taosHashGet(pQInfo->tableqinfoGroupInfo.map, &blockInfo.tid, sizeof(blockInfo.tid));
    if(pTableQueryInfo == NULL) {
      break;
    }
4537

H
Haojun Liao 已提交
4538
    pQuery->current = *pTableQueryInfo;
H
Haojun Liao 已提交
4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      assert(
          ((*pTableQueryInfo)->win.skey <= (*pTableQueryInfo)->win.ekey) &&
          ((*pTableQueryInfo)->lastKey >= (*pTableQueryInfo)->win.skey) &&
          ((*pTableQueryInfo)->win.skey >= pQuery->window.skey && (*pTableQueryInfo)->win.ekey <= pQuery->window.ekey));
    } else {
      assert(
          ((*pTableQueryInfo)->win.skey >= (*pTableQueryInfo)->win.ekey) &&
          ((*pTableQueryInfo)->lastKey <= (*pTableQueryInfo)->win.skey) &&
          ((*pTableQueryInfo)->win.skey <= pQuery->window.skey && (*pTableQueryInfo)->win.ekey >= pQuery->window.ekey));
    }
4550

H
Haojun Liao 已提交
4551
    if (!pRuntimeEnv->groupbyNormalCol) {
H
Haojun Liao 已提交
4552
      setEnvForEachBlock(pQInfo, *pTableQueryInfo, &blockInfo);
4553
    }
4554

H
Haojun Liao 已提交
4555 4556 4557 4558 4559 4560 4561
    SDataStatis *pStatis = NULL;
    SArray *pDataBlock = NULL;
    if (loadDataBlockOnDemand(pRuntimeEnv, pQueryHandle, &blockInfo, &pStatis, &pDataBlock) == BLK_DATA_DISCARD) {
      pQuery->current->lastKey = QUERY_IS_ASC_QUERY(pQuery)? blockInfo.window.ekey + step:blockInfo.window.skey + step;
      continue;
    }

4562 4563 4564
    summary->totalRows += blockInfo.rows;
    stableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, pDataBlock, binarySearchForKey);
  
H
Haojun Liao 已提交
4565 4566 4567 4568
    qDebug("QInfo:%p check data block completed, uid:%"PRId64", tid:%d, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, "
           "lastKey:%" PRId64,
           pQInfo, blockInfo.uid, blockInfo.tid, blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows,
           pQuery->current->lastKey);
4569
  }
4570

H
Haojun Liao 已提交
4571 4572 4573 4574
  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }

H
Haojun Liao 已提交
4575 4576
  updateWindowResNumOfRes(pRuntimeEnv);

H
hjxilinx 已提交
4577 4578
  int64_t et = taosGetTimestampMs();
  return et - st;
4579 4580
}

4581 4582
static bool multiTableMultioutputHelper(SQInfo *pQInfo, int32_t index) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4583
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4584

4585
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
H
Haojun Liao 已提交
4586
  SArray *group = GET_TABLEGROUP(pQInfo, 0);
4587
  STableQueryInfo* pCheckInfo = taosArrayGetP(group, index);
4588

H
Haojun Liao 已提交
4589 4590 4591
  if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
    setTagVal(pRuntimeEnv, pCheckInfo->pTable, pQInfo->tsdb);
  }
4592

H
Haojun Liao 已提交
4593
  STableId* id = TSDB_TABLEID(pCheckInfo->pTable);
4594
  qDebug("QInfo:%p query on (%d): uid:%" PRIu64 ", tid:%d, qrange:%" PRId64 "-%" PRId64, pQInfo, index,
H
Haojun Liao 已提交
4595
         id->uid, id->tid, pCheckInfo->lastKey, pCheckInfo->win.ekey);
4596

4597
  STsdbQueryCond cond = {
4598
      .twindow   = {pCheckInfo->lastKey, pCheckInfo->win.ekey},
H
hjxilinx 已提交
4599 4600
      .order     = pQuery->order.order,
      .colList   = pQuery->colList,
4601
      .numOfCols = pQuery->numOfCols,
4602
  };
4603

H
hjxilinx 已提交
4604
  // todo refactor
4605
  SArray *g1 = taosArrayInit(1, POINTER_BYTES);
H
Haojun Liao 已提交
4606 4607 4608 4609
  SArray *tx = taosArrayInit(1, sizeof(STableKeyInfo));

  STableKeyInfo info = {.pTable = pCheckInfo->pTable, .lastKey = pCheckInfo->lastKey};
  taosArrayPush(tx, &info);
4610

4611
  taosArrayPush(g1, &tx);
4612
  STableGroupInfo gp = {.numOfTables = 1, .pGroupList = g1};
4613

4614
  // include only current table
4615 4616 4617 4618
  if (pRuntimeEnv->pQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
    pRuntimeEnv->pQueryHandle = NULL;
  }
4619

H
Haojun Liao 已提交
4620
  pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo);
4621 4622
  taosArrayDestroy(tx);
  taosArrayDestroy(g1);
B
Bomin Zhang 已提交
4623 4624 4625
  if (pRuntimeEnv->pQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
4626

4627
  if (pRuntimeEnv->pTSBuf != NULL) {
4628
    if (pRuntimeEnv->cur.vgroupIndex == -1) {
4629
      STSElem elem = tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, 0, &pRuntimeEnv->pCtx[0].tag);
4630

4631 4632 4633 4634 4635 4636 4637 4638
      // failed to find data with the specified tag value
      if (elem.vnode < 0) {
        return false;
      }
    } else {
      tsBufSetCursor(pRuntimeEnv->pTSBuf, &pRuntimeEnv->cur);
    }
  }
4639

4640
  initCtxOutputBuf(pRuntimeEnv);
4641 4642 4643 4644 4645 4646 4647 4648 4649 4650
  return true;
}

/**
 * super table query handler
 * 1. super table projection query, group-by on normal columns query, ts-comp query
 * 2. point interpolation query, last row query
 *
 * @param pQInfo
 */
4651
static void sequentialTableProcess(SQInfo *pQInfo) {
4652
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4653
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4654
  setQueryStatus(pQuery, QUERY_COMPLETED);
4655

H
Haojun Liao 已提交
4656
  size_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
4657

H
Haojun Liao 已提交
4658
  if (isPointInterpoQuery(pQuery) || isFirstLastRowQuery(pQuery)) {
4659 4660
    resetCtxOutputBuf(pRuntimeEnv);
    assert(pQuery->limit.offset == 0 && pQuery->limit.limit != 0);
4661

4662
    while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
4663
      SArray* group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex);
4664

S
TD-1057  
Shengliang Guan 已提交
4665
      qDebug("QInfo:%p last_row query on group:%d, total group:%" PRIzu ", current group:%p", pQInfo, pQInfo->groupIndex,
dengyihao's avatar
dengyihao 已提交
4666
             numOfGroups, group);
H
Haojun Liao 已提交
4667 4668 4669 4670 4671 4672 4673

      STsdbQueryCond cond = {
          .colList = pQuery->colList,
          .order   = pQuery->order.order,
          .numOfCols = pQuery->numOfCols,
      };

S
TD-1057  
Shengliang Guan 已提交
4674 4675
      TIME_WINDOW_COPY(cond.twindow, pQuery->window);

H
Haojun Liao 已提交
4676 4677 4678
      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);
4679

H
Haojun Liao 已提交
4680 4681 4682 4683 4684 4685 4686
      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }
4687

4688
      if (isFirstLastRowQuery(pQuery)) {
H
Haojun Liao 已提交
4689
        assert(0);  // last_row query switch to other routine to handle
H
Haojun Liao 已提交
4690
      } else {
H
Haojun Liao 已提交
4691
        pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(pQInfo->tsdb, &cond, &gp, pQInfo);
4692
      }
B
Bomin Zhang 已提交
4693 4694 4695 4696 4697 4698

      taosArrayDestroy(tx);
      taosArrayDestroy(g1);
      if (pRuntimeEnv->pQueryHandle == NULL) {
        longjmp(pRuntimeEnv->env, terrno);
      }
H
Haojun Liao 已提交
4699

H
Haojun Liao 已提交
4700
      initCtxOutputBuf(pRuntimeEnv);
4701

4702
      SArray* s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle);
4703
      assert(taosArrayGetSize(s) >= 1);
4704

4705
      setTagVal(pRuntimeEnv, taosArrayGetP(s, 0), pQInfo->tsdb);
4706 4707 4708
      if (isFirstLastRowQuery(pQuery)) {
        assert(taosArrayGetSize(s) == 1);
      }
H
Haojun Liao 已提交
4709

dengyihao's avatar
dengyihao 已提交
4710
      taosArrayDestroy(s);
H
Haojun Liao 已提交
4711

H
Haojun Liao 已提交
4712
      // here we simply set the first table as current table
4713 4714 4715
      SArray* first = GET_TABLEGROUP(pQInfo, pQInfo->groupIndex);
      pQuery->current = taosArrayGetP(first, 0);

4716
      scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
4717

H
Haojun Liao 已提交
4718 4719 4720 4721 4722
      int64_t numOfRes = getNumOfResult(pRuntimeEnv);
      if (numOfRes > 0) {
        pQuery->rec.rows += numOfRes;
        forwardCtxOutputBuf(pRuntimeEnv, numOfRes);
      }
4723

H
Haojun Liao 已提交
4724 4725 4726 4727 4728
      skipResults(pRuntimeEnv);
      pQInfo->groupIndex += 1;

      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
4729 4730 4731 4732 4733 4734

      if (pQuery->rec.rows >= pQuery->rec.capacity) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
        break;
      }
    }
H
Haojun Liao 已提交
4735
  } else if (pRuntimeEnv->groupbyNormalCol) { // group-by on normal columns query
4736
    while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
4737
      SArray* group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex);
4738

S
TD-1057  
Shengliang Guan 已提交
4739
      qDebug("QInfo:%p group by normal columns group:%d, total group:%" PRIzu "", pQInfo, pQInfo->groupIndex, numOfGroups);
4740 4741 4742 4743 4744 4745 4746

      STsdbQueryCond cond = {
          .colList = pQuery->colList,
          .order   = pQuery->order.order,
          .numOfCols = pQuery->numOfCols,
      };

S
TD-1057  
Shengliang Guan 已提交
4747 4748
      TIME_WINDOW_COPY(cond.twindow, pQuery->window);

4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760
      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);

      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }

H
Haojun Liao 已提交
4761
      // no need to update the lastkey for each table
4762
      pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo);
H
Haojun Liao 已提交
4763

B
Bomin Zhang 已提交
4764 4765
      taosArrayDestroy(g1);
      taosArrayDestroy(tx);
B
Bomin Zhang 已提交
4766 4767 4768
      if (pRuntimeEnv->pQueryHandle == NULL) {
        longjmp(pRuntimeEnv->env, terrno);
      }
4769

4770
      SArray* s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle);
4771 4772
      assert(taosArrayGetSize(s) >= 1);

4773
      setTagVal(pRuntimeEnv, taosArrayGetP(s, 0), pQInfo->tsdb);
4774 4775 4776 4777 4778 4779 4780 4781

      // here we simply set the first table as current table
      scanMultiTableDataBlocks(pQInfo);
      pQInfo->groupIndex += 1;

      SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;

        // no results generated for current group, continue to try the next group
4782
      taosArrayDestroy(s);
4783 4784 4785 4786 4787
      if (pWindowResInfo->size <= 0) {
        continue;
      }

      for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
4788
        pWindowResInfo->pResult[i].closed = true; // enable return all results for group by normal columns
4789 4790 4791

        SWindowResult *pResult = &pWindowResInfo->pResult[i];
        for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
4792
          pResult->numOfRows = (uint16_t)(MAX(pResult->numOfRows, pResult->resultInfo[j].numOfRes));
4793 4794 4795
        }
      }

4796
      qDebug("QInfo:%p generated groupby columns results %d rows for group %d completed", pQInfo, pWindowResInfo->size,
4797 4798 4799 4800 4801 4802 4803
          pQInfo->groupIndex);
      int32_t currentGroupIndex = pQInfo->groupIndex;

      pQuery->rec.rows = 0;
      pQInfo->groupIndex = 0;

      ensureOutputBufferSimple(pRuntimeEnv, pWindowResInfo->size);
4804
      copyFromWindowResToSData(pQInfo, pWindowResInfo);
4805 4806 4807 4808 4809 4810

      pQInfo->groupIndex = currentGroupIndex;  //restore the group index
      assert(pQuery->rec.rows == pWindowResInfo->size);

      clearClosedTimeWindow(pRuntimeEnv);
      break;
4811 4812 4813
    }
  } else {
    /*
4814
     * 1. super table projection query, 2. ts-comp query
4815 4816 4817
     * if the subgroup index is larger than 0, results generated by group by tbname,k is existed.
     * we need to return it to client in the first place.
     */
4818
    if (pQInfo->groupIndex > 0) {
4819
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
4820
      pQuery->rec.total += pQuery->rec.rows;
4821

4822
      if (pQuery->rec.rows > 0) {
4823 4824 4825
        return;
      }
    }
4826

4827
    // all data have returned already
4828
    if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
4829 4830
      return;
    }
4831

4832 4833
    resetCtxOutputBuf(pRuntimeEnv);
    resetTimeWindowInfo(pRuntimeEnv, &pRuntimeEnv->windowResInfo);
4834

H
Haojun Liao 已提交
4835
    SArray *group = GET_TABLEGROUP(pQInfo, 0);
4836 4837
    assert(taosArrayGetSize(group) == pQInfo->tableqinfoGroupInfo.numOfTables &&
           1 == taosArrayGetSize(pQInfo->tableqinfoGroupInfo.pGroupList));
4838

4839
    while (pQInfo->tableIndex < pQInfo->tableqinfoGroupInfo.numOfTables) {
H
Haojun Liao 已提交
4840
      if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
4841
        longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4842
      }
4843

4844
      pQuery->current = taosArrayGetP(group, pQInfo->tableIndex);
4845
      if (!multiTableMultioutputHelper(pQInfo, pQInfo->tableIndex)) {
4846
        pQInfo->tableIndex++;
4847 4848
        continue;
      }
4849

H
hjxilinx 已提交
4850
      // TODO handle the limit offset problem
4851
      if (pQuery->numOfFilterCols == 0 && pQuery->limit.offset > 0) {
4852 4853
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
          pQInfo->tableIndex++;
4854 4855 4856
          continue;
        }
      }
4857

4858
      scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
4859
      skipResults(pRuntimeEnv);
4860

4861
      // the limitation of output result is reached, set the query completed
4862
      if (limitResults(pRuntimeEnv)) {
H
Haojun Liao 已提交
4863
        SET_STABLE_QUERY_OVER(pQInfo);
4864 4865
        break;
      }
4866

4867 4868
      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
4869

4870
      if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4871 4872 4873 4874 4875 4876
        /*
         * query range is identical in terms of all meters involved in query,
         * so we need to restore them at the *beginning* of query on each meter,
         * not the consecutive query on meter on which is aborted due to buffer limitation
         * to ensure that, we can reset the query range once query on a meter is completed.
         */
4877
        pQInfo->tableIndex++;
weixin_48148422's avatar
weixin_48148422 已提交
4878

H
Haojun Liao 已提交
4879
        STableIdInfo tidInfo = {0};
4880

H
Haojun Liao 已提交
4881 4882 4883
        STableId* id = TSDB_TABLEID(pQuery->current->pTable);
        tidInfo.uid = id->uid;
        tidInfo.tid = id->tid;
weixin_48148422's avatar
weixin_48148422 已提交
4884
        tidInfo.key = pQuery->current->lastKey;
weixin_48148422's avatar
weixin_48148422 已提交
4885 4886
        taosArrayPush(pQInfo->arrTableIdInfo, &tidInfo);

4887
        // if the buffer is full or group by each table, we need to jump out of the loop
H
Haojun Liao 已提交
4888
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
4889 4890
          break;
        }
4891

4892
      } else {
4893
        // all data in the result buffer are skipped due to the offset, continue to retrieve data from current meter
4894 4895
        if (pQuery->rec.rows == 0) {
          assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
4896 4897
          continue;
        } else {
4898 4899 4900
          // buffer is full, wait for the next round to retrieve data from current meter
          assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
          break;
4901 4902 4903
        }
      }
    }
H
Haojun Liao 已提交
4904

4905
    if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
H
Haojun Liao 已提交
4906 4907
      setQueryStatus(pQuery, QUERY_COMPLETED);
    }
4908
  }
4909

4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921
  /*
   * 1. super table projection query, group-by on normal columns query, ts-comp query
   * 2. point interpolation query, last row query
   *
   * group-by on normal columns query and last_row query do NOT invoke the finalizer here,
   * since the finalize stage will be done at the client side.
   *
   * projection query, point interpolation query do not need the finalizer.
   *
   * Only the ts-comp query requires the finalizer function to be executed here.
   */
  if (isTSCompQuery(pQuery)) {
H
hjxilinx 已提交
4922
    finalizeQueryResult(pRuntimeEnv);
4923
  }
4924

4925 4926 4927
  if (pRuntimeEnv->pTSBuf != NULL) {
    pRuntimeEnv->cur = pRuntimeEnv->pTSBuf->cur;
  }
4928

4929
  qDebug(
S
TD-1057  
Shengliang Guan 已提交
4930
      "QInfo %p numOfTables:%"PRIu64", index:%d, numOfGroups:%" PRIzu ", %"PRId64" points returned, total:%"PRId64", offset:%" PRId64,
4931
      pQInfo, pQInfo->tableqinfoGroupInfo.numOfTables, pQInfo->tableIndex, numOfGroups, pQuery->rec.rows, pQuery->rec.total,
4932
      pQuery->limit.offset);
4933 4934
}

4935 4936 4937 4938
static void doSaveContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

4939 4940 4941
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
  SWITCH_ORDER(pQuery->order.order);
4942

4943
  if (pRuntimeEnv->pTSBuf != NULL) {
4944
    pRuntimeEnv->pTSBuf->cur.order = pQuery->order.order;
4945
  }
4946

4947 4948 4949 4950 4951
  STsdbQueryCond cond = {
      .order   = pQuery->order.order,
      .colList = pQuery->colList,
      .numOfCols = pQuery->numOfCols,
  };
H
Haojun Liao 已提交
4952

S
TD-1057  
Shengliang Guan 已提交
4953 4954
  TIME_WINDOW_COPY(cond.twindow, pQuery->window);

4955 4956 4957 4958
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
H
Haojun Liao 已提交
4959

H
Haojun Liao 已提交
4960 4961 4962 4963 4964
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
  disableFuncInReverseScan(pQInfo);
  setupQueryRangeForReverseScan(pQInfo);

H
Haojun Liao 已提交
4965
  pRuntimeEnv->prevGroupId = INT32_MIN;
4966
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
B
Bomin Zhang 已提交
4967 4968 4969
  if (pRuntimeEnv->pSecQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
H
hjxilinx 已提交
4970 4971
}

4972 4973 4974 4975
static void doRestoreContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

H
hjxilinx 已提交
4976
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
4977

4978
  if (pRuntimeEnv->pTSBuf != NULL) {
4979
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
4980
  }
4981

4982
  switchCtxOrder(pRuntimeEnv);
4983 4984 4985
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
}

4986 4987 4988
static void doCloseAllTimeWindowAfterScan(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
Haojun Liao 已提交
4989
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
H
Haojun Liao 已提交
4990
    size_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
4991
    for (int32_t i = 0; i < numOfGroup; ++i) {
H
Haojun Liao 已提交
4992
      SArray *group = GET_TABLEGROUP(pQInfo, i);
4993

4994
      size_t num = taosArrayGetSize(group);
4995
      for (int32_t j = 0; j < num; ++j) {
4996 4997
        STableQueryInfo* item = taosArrayGetP(group, j);
        closeAllTimeWindow(&item->windowResInfo);
4998
      }
H
hjxilinx 已提交
4999 5000 5001 5002 5003 5004 5005
    }
  } else {  // close results for group result
    closeAllTimeWindow(&pQInfo->runtimeEnv.windowResInfo);
  }
}

static void multiTableQueryProcess(SQInfo *pQInfo) {
5006 5007 5008
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

5009
  if (pQInfo->groupIndex > 0) {
5010
    /*
5011
     * if the groupIndex > 0, the query process must be completed yet, we only need to
5012 5013
     * copy the data into output buffer
     */
H
Haojun Liao 已提交
5014
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
5015 5016
      copyResToQueryResultBuf(pQInfo, pQuery);
#ifdef _DEBUG_VIEW
5017
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
5018 5019
#endif
    } else {
5020
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5021
    }
5022

5023
    qDebug("QInfo:%p current:%"PRId64", total:%"PRId64"", pQInfo, pQuery->rec.rows, pQuery->rec.total);
5024 5025
    return;
  }
5026

5027
  qDebug("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", order:%d, forward scan start", pQInfo,
5028 5029
         pQuery->window.skey, pQuery->window.ekey, pQuery->order.order);

H
hjxilinx 已提交
5030
  // do check all qualified data blocks
H
Haojun Liao 已提交
5031
  int64_t el = scanMultiTableDataBlocks(pQInfo);
5032
  qDebug("QInfo:%p master scan completed, elapsed time: %" PRId64 "ms, reverse scan start", pQInfo, el);
5033

H
hjxilinx 已提交
5034
  // query error occurred or query is killed, abort current execution
H
Haojun Liao 已提交
5035
  if (pQInfo->code != TSDB_CODE_SUCCESS || IS_QUERY_KILLED(pQInfo)) {
5036
    qDebug("QInfo:%p query killed or error occurred, code:%s, abort", pQInfo, tstrerror(pQInfo->code));
H
Haojun Liao 已提交
5037 5038
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
5039
  }
5040

H
hjxilinx 已提交
5041 5042
  // close all time window results
  doCloseAllTimeWindowAfterScan(pQInfo);
5043

H
hjxilinx 已提交
5044 5045
  if (needReverseScan(pQuery)) {
    doSaveContext(pQInfo);
5046

H
Haojun Liao 已提交
5047
    el = scanMultiTableDataBlocks(pQInfo);
5048
    qDebug("QInfo:%p reversed scan completed, elapsed time: %" PRId64 "ms", pQInfo, el);
5049

H
Haojun Liao 已提交
5050
    doRestoreContext(pQInfo);
H
hjxilinx 已提交
5051
  } else {
5052
    qDebug("QInfo:%p no need to do reversed scan, query completed", pQInfo);
5053
  }
5054

5055
  setQueryStatus(pQuery, QUERY_COMPLETED);
5056

H
Haojun Liao 已提交
5057
  if (pQInfo->code != TSDB_CODE_SUCCESS || IS_QUERY_KILLED(pQInfo)) {
5058
    qDebug("QInfo:%p query killed or error occurred, code:%s, abort", pQInfo, tstrerror(pQInfo->code));
H
Haojun Liao 已提交
5059 5060
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
H
hjxilinx 已提交
5061
  }
5062

H
Haojun Liao 已提交
5063
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || isSumAvgRateQuery(pQuery)) {
5064
    if (mergeIntoGroupResult(pQInfo) == TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
5065
      copyResToQueryResultBuf(pQInfo, pQuery);
5066 5067

#ifdef _DEBUG_VIEW
5068
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
5069 5070 5071
#endif
    }
  } else {  // not a interval query
5072
    copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5073
  }
5074

5075
  // handle the limitation of output buffer
5076
  qDebug("QInfo:%p points returned:%" PRId64 ", total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
5077 5078 5079 5080 5081 5082 5083 5084
}

/*
 * in each query, this function will be called only once, no retry for further result.
 *
 * select count(*)/top(field,k)/avg(field name) from table_name [where ts>now-1a];
 * select count(*) from table_name group by status_column;
 */
H
hjxilinx 已提交
5085
static void tableFixedOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
5086
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
5087

H
hjxilinx 已提交
5088
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
5089
  if (!pRuntimeEnv->topBotQuery && pQuery->limit.offset > 0) {  // no need to execute, since the output will be ignore.
H
Haojun Liao 已提交
5090 5091
    return;
  }
5092

H
hjxilinx 已提交
5093
  pQuery->current = pTableInfo;  // set current query table info
5094

5095
  scanOneTableDataBlocks(pRuntimeEnv, pTableInfo->lastKey);
H
hjxilinx 已提交
5096
  finalizeQueryResult(pRuntimeEnv);
5097

H
Haojun Liao 已提交
5098
  if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
5099 5100
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
5101
  }
5102

H
Haojun Liao 已提交
5103
  // since the numOfRows must be identical for all sql functions that are allowed to be executed simutaneously.
5104
  pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
5105

5106
  skipResults(pRuntimeEnv);
5107
  limitResults(pRuntimeEnv);
5108 5109
}

H
hjxilinx 已提交
5110
static void tableMultiOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
5111
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
5112

H
hjxilinx 已提交
5113 5114
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
5115

5116 5117 5118 5119
  // for ts_comp query, re-initialized is not allowed
  if (!isTSCompQuery(pQuery)) {
    resetCtxOutputBuf(pRuntimeEnv);
  }
5120

5121 5122 5123 5124 5125 5126
  // skip blocks without load the actual data block from file if no filter condition present
  skipBlocks(&pQInfo->runtimeEnv);
  if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return;
  }
5127 5128

  while (1) {
5129
    scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
H
hjxilinx 已提交
5130
    finalizeQueryResult(pRuntimeEnv);
5131

5132 5133
    pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
    if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols > 0 && pQuery->rec.rows > 0) {
5134
      skipResults(pRuntimeEnv);
5135 5136 5137
    }

    /*
H
hjxilinx 已提交
5138 5139
     * 1. if pQuery->size == 0, pQuery->limit.offset >= 0, still need to check data
     * 2. if pQuery->size > 0, pQuery->limit.offset must be 0
5140
     */
5141
    if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
5142 5143 5144
      break;
    }

5145
    qDebug("QInfo:%p skip current result, offset:%" PRId64 ", next qrange:%" PRId64 "-%" PRId64,
B
Bomin Zhang 已提交
5146
           pQInfo, pQuery->limit.offset, pQuery->current->lastKey, pQuery->current->win.ekey);
5147 5148 5149 5150

    resetCtxOutputBuf(pRuntimeEnv);
  }

5151
  limitResults(pRuntimeEnv);
5152
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
5153
    qDebug("QInfo:%p query paused due to output limitation, next qrange:%" PRId64 "-%" PRId64, pQInfo,
H
hjxilinx 已提交
5154
        pQuery->current->lastKey, pQuery->window.ekey);
weixin_48148422's avatar
weixin_48148422 已提交
5155 5156
  } else if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
    STableIdInfo tidInfo;
B
Bomin Zhang 已提交
5157
    STableId* id = TSDB_TABLEID(pQuery->current->pTable);
5158

H
Haojun Liao 已提交
5159 5160
    tidInfo.uid = id->uid;
    tidInfo.tid = id->tid;
weixin_48148422's avatar
weixin_48148422 已提交
5161 5162
    tidInfo.key = pQuery->current->lastKey;
    taosArrayPush(pQInfo->arrTableIdInfo, &tidInfo);
5163 5164
  }

5165 5166 5167
  if (!isTSCompQuery(pQuery)) {
    assert(pQuery->rec.rows <= pQuery->rec.capacity);
  }
5168 5169
}

H
Haojun Liao 已提交
5170
static void tableIntervalProcessImpl(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
5171
  SQuery *pQuery = pRuntimeEnv->pQuery;
5172

5173
  while (1) {
5174
    scanOneTableDataBlocks(pRuntimeEnv, start);
5175

5176
    assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_NOT_COMPLETED));
H
hjxilinx 已提交
5177
    finalizeQueryResult(pRuntimeEnv);
5178

5179 5180 5181
    // here we can ignore the records in case of no interpolation
    // todo handle offset, in case of top/bottom interval query
    if ((pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL) && pQuery->limit.offset > 0 &&
5182
        pQuery->fillType == TSDB_FILL_NONE) {
5183 5184
      // maxOutput <= 0, means current query does not generate any results
      int32_t numOfClosed = numOfClosedTimeWindow(&pRuntimeEnv->windowResInfo);
5185

S
TD-1057  
Shengliang Guan 已提交
5186
      int32_t c = (int32_t)(MIN(numOfClosed, pQuery->limit.offset));
5187 5188 5189
      clearFirstNTimeWindow(pRuntimeEnv, c);
      pQuery->limit.offset -= c;
    }
5190

5191
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED | QUERY_RESBUF_FULL)) {
5192 5193 5194 5195 5196
      break;
    }
  }
}

5197
// handle time interval query on table
H
hjxilinx 已提交
5198
static void tableIntervalProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
5199 5200
  SQueryRuntimeEnv *pRuntimeEnv = &(pQInfo->runtimeEnv);

H
hjxilinx 已提交
5201 5202
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
5203

H
Haojun Liao 已提交
5204
  int32_t numOfFilled = 0;
H
Haojun Liao 已提交
5205
  TSKEY newStartKey = TSKEY_INITIAL_VAL;
5206

5207
  // skip blocks without load the actual data block from file if no filter condition present
H
Haojun Liao 已提交
5208
  skipTimeInterval(pRuntimeEnv, &newStartKey);
5209
  if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0 && pRuntimeEnv->pFillInfo == NULL) {
5210 5211 5212 5213
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return;
  }

5214
  while (1) {
H
Haojun Liao 已提交
5215
    tableIntervalProcessImpl(pRuntimeEnv, newStartKey);
5216

H
Haojun Liao 已提交
5217
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
5218
      pQInfo->groupIndex = 0;  // always start from 0
5219
      pQuery->rec.rows = 0;
5220
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5221

5222
      clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
5223
    }
5224

5225
    // the offset is handled at prepare stage if no interpolation involved
5226
    if (pQuery->fillType == TSDB_FILL_NONE || pQuery->rec.rows == 0) {
5227
      limitResults(pRuntimeEnv);
5228 5229
      break;
    } else {
S
TD-1057  
Shengliang Guan 已提交
5230
      taosFillSetStartInfo(pRuntimeEnv->pFillInfo, (int32_t)pQuery->rec.rows, pQuery->window.ekey);
5231
      taosFillCopyInputDataFromFilePage(pRuntimeEnv->pFillInfo, (tFilePage**) pQuery->sdata);
H
Haojun Liao 已提交
5232
      numOfFilled = 0;
5233

H
Haojun Liao 已提交
5234
      pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, &numOfFilled);
5235
      if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
5236
        limitResults(pRuntimeEnv);
5237 5238
        break;
      }
5239

5240
      // no result generated yet, continue retrieve data
5241
      pQuery->rec.rows = 0;
5242 5243
    }
  }
5244

5245
  // all data scanned, the group by normal column can return
H
Haojun Liao 已提交
5246
  if (pRuntimeEnv->groupbyNormalCol) {  // todo refactor with merge interval time result
5247
    pQInfo->groupIndex = 0;
5248
    pQuery->rec.rows = 0;
5249
    copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5250
    clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
5251 5252 5253
  }
}

5254 5255 5256 5257
static void tableQueryImpl(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

H
Haojun Liao 已提交
5258
  if (queryHasRemainResForTableQuery(pRuntimeEnv)) {
5259

H
Haojun Liao 已提交
5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271
    if (pQuery->fillType != TSDB_FILL_NONE) {
      /*
       * There are remain results that are not returned due to result interpolation
       * So, we do keep in this procedure instead of launching retrieve procedure for next results.
       */
      int32_t numOfFilled = 0;
      pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, &numOfFilled);

      if (pQuery->rec.rows > 0) {
        limitResults(pRuntimeEnv);
      }

H
Haojun Liao 已提交
5272
      qDebug("QInfo:%p current:%" PRId64 " returned, total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total);
5273
      return;
H
Haojun Liao 已提交
5274
    } else {
5275
      pQuery->rec.rows = 0;
5276
      pQInfo->groupIndex = 0;  // always start from 0
5277

5278
      if (pRuntimeEnv->windowResInfo.size > 0) {
5279
        copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5280
        clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
5281

5282
        if (pQuery->rec.rows > 0) {
5283
          qDebug("QInfo:%p %"PRId64" rows returned from group results, total:%"PRId64"", pQInfo, pQuery->rec.rows, pQuery->rec.total);
H
Haojun Liao 已提交
5284 5285 5286

          // there are not data remains
          if (pRuntimeEnv->windowResInfo.size <= 0) {
H
Haojun Liao 已提交
5287
            qDebug("QInfo:%p query over, %"PRId64" rows are returned", pQInfo, pQuery->rec.total);
H
Haojun Liao 已提交
5288 5289
          }

5290 5291 5292 5293 5294
          return;
        }
      }
    }
  }
5295

H
hjxilinx 已提交
5296
  // number of points returned during this query
5297
  pQuery->rec.rows = 0;
5298
  int64_t st = taosGetTimestampUs();
5299

5300
  assert(pQInfo->tableqinfoGroupInfo.numOfTables == 1);
H
Haojun Liao 已提交
5301
  SArray* g = GET_TABLEGROUP(pQInfo, 0);
5302
  STableQueryInfo* item = taosArrayGetP(g, 0);
5303

5304
  // group by normal column, sliding window query, interval query are handled by interval query processor
H
Haojun Liao 已提交
5305
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyNormalCol) {  // interval (down sampling operation)
5306
    tableIntervalProcess(pQInfo, item);
H
Haojun Liao 已提交
5307
  } else if (isFixedOutputQuery(pRuntimeEnv)) {
5308
    tableFixedOutputProcess(pQInfo, item);
5309 5310
  } else {  // diff/add/multiply/subtract/division
    assert(pQuery->checkBuffer == 1);
5311
    tableMultiOutputProcess(pQInfo, item);
5312
  }
5313

5314
  // record the total elapsed time
5315
  pRuntimeEnv->summary.elapsedTime += (taosGetTimestampUs() - st);
5316
  assert(pQInfo->tableqinfoGroupInfo.numOfTables == 1);
5317 5318
}

5319
static void stableQueryImpl(SQInfo *pQInfo) {
H
Haojun Liao 已提交
5320 5321
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *pQuery = pRuntimeEnv->pQuery;
5322
  pQuery->rec.rows = 0;
5323

5324
  int64_t st = taosGetTimestampUs();
5325

H
Haojun Liao 已提交
5326
  if (QUERY_IS_INTERVAL_QUERY(pQuery) ||
5327
      (isFixedOutputQuery(pRuntimeEnv) && (!isPointInterpoQuery(pQuery)) && (!pRuntimeEnv->groupbyNormalCol))) {
H
hjxilinx 已提交
5328
    multiTableQueryProcess(pQInfo);
5329
  } else {
5330
    assert((pQuery->checkBuffer == 1 && pQuery->intervalTime == 0) || isPointInterpoQuery(pQuery) ||
H
Haojun Liao 已提交
5331
            isFirstLastRowQuery(pQuery) || pRuntimeEnv->groupbyNormalCol);
5332

5333
    sequentialTableProcess(pQInfo);
5334
  }
5335

H
hjxilinx 已提交
5336
  // record the total elapsed time
5337
  pQInfo->runtimeEnv.summary.elapsedTime += (taosGetTimestampUs() - st);
H
hjxilinx 已提交
5338 5339
}

5340
static int32_t getColumnIndexInSource(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
5341
  int32_t j = 0;
5342

5343
  if (TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) {
H
Haojun Liao 已提交
5344
    if (pExprMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
5345
      return TSDB_TBNAME_COLUMN_INDEX;
H
Haojun Liao 已提交
5346 5347
    }

5348 5349 5350 5351
    while(j < pQueryMsg->numOfTags) {
      if (pExprMsg->colInfo.colId == pTagCols[j].colId) {
        return j;
      }
5352

5353 5354
      j += 1;
    }
5355

H
Haojun Liao 已提交
5356 5357
  } else if (pExprMsg->colInfo.flag == TSDB_COL_UDC) {  // user specified column data
    return TSDB_UD_COLUMN_INDEX;
5358 5359 5360 5361 5362
  } else {
    while (j < pQueryMsg->numOfCols) {
      if (pExprMsg->colInfo.colId == pQueryMsg->colList[j].colId) {
        return j;
      }
5363

5364
      j += 1;
5365 5366
    }
  }
5367
  assert(0);
5368 5369
}

5370 5371 5372
bool validateExprColumnInfo(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
  int32_t j = getColumnIndexInSource(pQueryMsg, pExprMsg, pTagCols);
  return j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags;
5373 5374
}

5375
static bool validateQueryMsg(SQueryTableMsg *pQueryMsg) {
H
hjxilinx 已提交
5376
  if (pQueryMsg->intervalTime < 0) {
5377
    qError("qmsg:%p illegal value of interval time %" PRId64, pQueryMsg, pQueryMsg->intervalTime);
5378
    return false;
5379 5380
  }

H
hjxilinx 已提交
5381
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
5382
    qError("qmsg:%p illegal value of numOfTables %d", pQueryMsg, pQueryMsg->numOfTables);
5383
    return false;
5384 5385
  }

H
hjxilinx 已提交
5386
  if (pQueryMsg->numOfGroupCols < 0) {
S
slguan 已提交
5387
    qError("qmsg:%p illegal value of numOfGroupbyCols %d", pQueryMsg, pQueryMsg->numOfGroupCols);
5388
    return false;
5389 5390
  }

5391 5392
  if (pQueryMsg->numOfOutput > TSDB_MAX_COLUMNS || pQueryMsg->numOfOutput <= 0) {
    qError("qmsg:%p illegal value of output columns %d", pQueryMsg, pQueryMsg->numOfOutput);
5393
    return false;
5394 5395
  }

5396 5397 5398 5399 5400 5401 5402 5403 5404 5405
  return true;
}

static bool validateQuerySourceCols(SQueryTableMsg *pQueryMsg, SSqlFuncMsg** pExprMsg) {
  int32_t numOfTotal = pQueryMsg->numOfCols + pQueryMsg->numOfTags;
  if (pQueryMsg->numOfCols < 0 || pQueryMsg->numOfTags < 0 || numOfTotal > TSDB_MAX_COLUMNS) {
    qError("qmsg:%p illegal value of numOfCols %d numOfTags:%d", pQueryMsg, pQueryMsg->numOfCols, pQueryMsg->numOfTags);
    return false;
  } else if (numOfTotal == 0) {
    for(int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
H
Haojun Liao 已提交
5406 5407 5408 5409 5410
      SSqlFuncMsg* pFuncMsg = pExprMsg[i];

      if ((pFuncMsg->functionId == TSDB_FUNC_TAGPRJ) ||
          (pFuncMsg->functionId == TSDB_FUNC_TID_TAG && pFuncMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) ||
          (pFuncMsg->functionId == TSDB_FUNC_COUNT && pFuncMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX)) {
5411
        continue;
5412
      }
5413

5414
      return false;
5415 5416
    }
  }
5417

5418
  return true;
5419 5420
}

5421
static char *createTableIdList(SQueryTableMsg *pQueryMsg, char *pMsg, SArray **pTableIdList) {
H
hjxilinx 已提交
5422
  assert(pQueryMsg->numOfTables > 0);
5423

weixin_48148422's avatar
weixin_48148422 已提交
5424
  *pTableIdList = taosArrayInit(pQueryMsg->numOfTables, sizeof(STableIdInfo));
5425

weixin_48148422's avatar
weixin_48148422 已提交
5426 5427
  for (int32_t j = 0; j < pQueryMsg->numOfTables; ++j) {
    STableIdInfo* pTableIdInfo = (STableIdInfo *)pMsg;
5428

5429
    pTableIdInfo->tid = htonl(pTableIdInfo->tid);
H
hjxilinx 已提交
5430 5431
    pTableIdInfo->uid = htobe64(pTableIdInfo->uid);
    pTableIdInfo->key = htobe64(pTableIdInfo->key);
5432

H
hjxilinx 已提交
5433 5434 5435
    taosArrayPush(*pTableIdList, pTableIdInfo);
    pMsg += sizeof(STableIdInfo);
  }
5436

H
hjxilinx 已提交
5437 5438
  return pMsg;
}
5439

5440
/**
H
hjxilinx 已提交
5441
 * pQueryMsg->head has been converted before this function is called.
5442
 *
H
hjxilinx 已提交
5443
 * @param pQueryMsg
5444 5445 5446 5447
 * @param pTableIdList
 * @param pExpr
 * @return
 */
5448
static int32_t convertQueryMsg(SQueryTableMsg *pQueryMsg, SArray **pTableIdList, SSqlFuncMsg ***pExpr,
weixin_48148422's avatar
weixin_48148422 已提交
5449
                               char **tagCond, char** tbnameCond, SColIndex **groupbyCols, SColumnInfo** tagCols) {
5450 5451
  int32_t code = TSDB_CODE_SUCCESS;

5452 5453 5454 5455 5456 5457 5458 5459
  pQueryMsg->numOfTables = htonl(pQueryMsg->numOfTables);

  pQueryMsg->window.skey = htobe64(pQueryMsg->window.skey);
  pQueryMsg->window.ekey = htobe64(pQueryMsg->window.ekey);
  pQueryMsg->intervalTime = htobe64(pQueryMsg->intervalTime);
  pQueryMsg->slidingTime = htobe64(pQueryMsg->slidingTime);
  pQueryMsg->limit = htobe64(pQueryMsg->limit);
  pQueryMsg->offset = htobe64(pQueryMsg->offset);
H
hjxilinx 已提交
5460

5461 5462
  pQueryMsg->order = htons(pQueryMsg->order);
  pQueryMsg->orderColId = htons(pQueryMsg->orderColId);
H
Haojun Liao 已提交
5463
  pQueryMsg->queryType = htonl(pQueryMsg->queryType);
weixin_48148422's avatar
weixin_48148422 已提交
5464
  pQueryMsg->tagNameRelType = htons(pQueryMsg->tagNameRelType);
5465 5466

  pQueryMsg->numOfCols = htons(pQueryMsg->numOfCols);
5467
  pQueryMsg->numOfOutput = htons(pQueryMsg->numOfOutput);
H
hjxilinx 已提交
5468
  pQueryMsg->numOfGroupCols = htons(pQueryMsg->numOfGroupCols);
5469 5470 5471
  pQueryMsg->tagCondLen = htons(pQueryMsg->tagCondLen);
  pQueryMsg->tsOffset = htonl(pQueryMsg->tsOffset);
  pQueryMsg->tsLen = htonl(pQueryMsg->tsLen);
H
hjxilinx 已提交
5472
  pQueryMsg->tsNumOfBlocks = htonl(pQueryMsg->tsNumOfBlocks);
5473
  pQueryMsg->tsOrder = htonl(pQueryMsg->tsOrder);
5474
  pQueryMsg->numOfTags = htonl(pQueryMsg->numOfTags);
5475

5476
  // query msg safety check
5477
  if (!validateQueryMsg(pQueryMsg)) {
5478 5479
    code = TSDB_CODE_QRY_INVALID_MSG;
    goto _cleanup;
5480 5481
  }

H
hjxilinx 已提交
5482 5483
  char *pMsg = (char *)(pQueryMsg->colList) + sizeof(SColumnInfo) * pQueryMsg->numOfCols;
  for (int32_t col = 0; col < pQueryMsg->numOfCols; ++col) {
5484 5485
    SColumnInfo *pColInfo = &pQueryMsg->colList[col];

H
hjxilinx 已提交
5486
    pColInfo->colId = htons(pColInfo->colId);
5487
    pColInfo->type = htons(pColInfo->type);
H
hjxilinx 已提交
5488 5489
    pColInfo->bytes = htons(pColInfo->bytes);
    pColInfo->numOfFilters = htons(pColInfo->numOfFilters);
5490

H
hjxilinx 已提交
5491
    assert(pColInfo->type >= TSDB_DATA_TYPE_BOOL && pColInfo->type <= TSDB_DATA_TYPE_NCHAR);
5492

H
hjxilinx 已提交
5493
    int32_t numOfFilters = pColInfo->numOfFilters;
5494
    if (numOfFilters > 0) {
H
hjxilinx 已提交
5495
      pColInfo->filters = calloc(numOfFilters, sizeof(SColumnFilterInfo));
H
Haojun Liao 已提交
5496 5497 5498 5499
      if (pColInfo->filters == NULL) {
        code = TSDB_CODE_QRY_OUT_OF_MEMORY;
        goto _cleanup;
      }
5500 5501 5502
    }

    for (int32_t f = 0; f < numOfFilters; ++f) {
5503
      SColumnFilterInfo *pFilterMsg = (SColumnFilterInfo *)pMsg;
5504

5505 5506
      SColumnFilterInfo *pColFilter = &pColInfo->filters[f];
      pColFilter->filterstr = htons(pFilterMsg->filterstr);
5507 5508 5509

      pMsg += sizeof(SColumnFilterInfo);

5510 5511
      if (pColFilter->filterstr) {
        pColFilter->len = htobe64(pFilterMsg->len);
5512

5513
        pColFilter->pz = (int64_t)calloc(1, (size_t)(pColFilter->len + 1 * TSDB_NCHAR_SIZE)); // note: null-terminator
H
Haojun Liao 已提交
5514 5515 5516 5517 5518
        if (pColFilter->pz == 0) {
          code = TSDB_CODE_QRY_OUT_OF_MEMORY;
          goto _cleanup;
        }

5519
        memcpy((void *)pColFilter->pz, pMsg, (size_t)pColFilter->len);
5520
        pMsg += (pColFilter->len + 1);
5521
      } else {
5522 5523
        pColFilter->lowerBndi = htobe64(pFilterMsg->lowerBndi);
        pColFilter->upperBndi = htobe64(pFilterMsg->upperBndi);
5524 5525
      }

5526 5527
      pColFilter->lowerRelOptr = htons(pFilterMsg->lowerRelOptr);
      pColFilter->upperRelOptr = htons(pFilterMsg->upperRelOptr);
5528 5529 5530
    }
  }

5531
  *pExpr = calloc(pQueryMsg->numOfOutput, POINTER_BYTES);
H
Haojun Liao 已提交
5532 5533 5534 5535 5536
  if (*pExpr == NULL) {
    code = TSDB_CODE_QRY_OUT_OF_MEMORY;
    goto _cleanup;
  }

5537
  SSqlFuncMsg *pExprMsg = (SSqlFuncMsg *)pMsg;
5538

5539
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5540
    (*pExpr)[i] = pExprMsg;
5541

5542
    pExprMsg->colInfo.colIndex = htons(pExprMsg->colInfo.colIndex);
5543 5544 5545 5546
    pExprMsg->colInfo.colId = htons(pExprMsg->colInfo.colId);
    pExprMsg->colInfo.flag = htons(pExprMsg->colInfo.flag);
    pExprMsg->functionId = htons(pExprMsg->functionId);
    pExprMsg->numOfParams = htons(pExprMsg->numOfParams);
5547

5548
    pMsg += sizeof(SSqlFuncMsg);
5549 5550

    for (int32_t j = 0; j < pExprMsg->numOfParams; ++j) {
5551
      pExprMsg->arg[j].argType = htons(pExprMsg->arg[j].argType);
5552 5553 5554 5555
      pExprMsg->arg[j].argBytes = htons(pExprMsg->arg[j].argBytes);

      if (pExprMsg->arg[j].argType == TSDB_DATA_TYPE_BINARY) {
        pExprMsg->arg[j].argValue.pz = pMsg;
5556
        pMsg += pExprMsg->arg[j].argBytes;  // one more for the string terminated char.
5557 5558 5559 5560 5561
      } else {
        pExprMsg->arg[j].argValue.i64 = htobe64(pExprMsg->arg[j].argValue.i64);
      }
    }

H
Haojun Liao 已提交
5562 5563
    int16_t functionId = pExprMsg->functionId;
    if (functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ || functionId == TSDB_FUNC_TAG_DUMMY) {
5564
      if (pExprMsg->colInfo.flag != TSDB_COL_TAG) {  // ignore the column  index check for arithmetic expression.
5565 5566
        code = TSDB_CODE_QRY_INVALID_MSG;
        goto _cleanup;
5567 5568
      }
    } else {
5569
//      if (!validateExprColumnInfo(pQueryMsg, pExprMsg)) {
5570
//        return TSDB_CODE_QRY_INVALID_MSG;
5571
//      }
5572 5573
    }

5574
    pExprMsg = (SSqlFuncMsg *)pMsg;
5575
  }
5576

5577
  if (!validateQuerySourceCols(pQueryMsg, *pExpr)) {
5578
    code = TSDB_CODE_QRY_INVALID_MSG;
dengyihao's avatar
dengyihao 已提交
5579
    goto _cleanup;
5580
  }
5581

H
hjxilinx 已提交
5582
  pMsg = createTableIdList(pQueryMsg, pMsg, pTableIdList);
5583

H
hjxilinx 已提交
5584
  if (pQueryMsg->numOfGroupCols > 0) {  // group by tag columns
5585
    *groupbyCols = malloc(pQueryMsg->numOfGroupCols * sizeof(SColIndex));
5586 5587 5588 5589
    if (*groupbyCols == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }
5590 5591 5592

    for (int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
      (*groupbyCols)[i].colId = *(int16_t *)pMsg;
5593
      pMsg += sizeof((*groupbyCols)[i].colId);
5594 5595

      (*groupbyCols)[i].colIndex = *(int16_t *)pMsg;
5596 5597
      pMsg += sizeof((*groupbyCols)[i].colIndex);

5598
      (*groupbyCols)[i].flag = *(int16_t *)pMsg;
5599 5600 5601 5602 5603
      pMsg += sizeof((*groupbyCols)[i].flag);

      memcpy((*groupbyCols)[i].name, pMsg, tListLen(groupbyCols[i]->name));
      pMsg += tListLen((*groupbyCols)[i].name);
    }
5604

H
hjxilinx 已提交
5605 5606
    pQueryMsg->orderByIdx = htons(pQueryMsg->orderByIdx);
    pQueryMsg->orderType = htons(pQueryMsg->orderType);
5607 5608
  }

5609 5610
  pQueryMsg->fillType = htons(pQueryMsg->fillType);
  if (pQueryMsg->fillType != TSDB_FILL_NONE) {
5611
    pQueryMsg->fillVal = (uint64_t)(pMsg);
5612 5613

    int64_t *v = (int64_t *)pMsg;
5614
    for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5615 5616
      v[i] = htobe64(v[i]);
    }
5617

5618
    pMsg += sizeof(int64_t) * pQueryMsg->numOfOutput;
5619
  }
5620

5621 5622
  if (pQueryMsg->numOfTags > 0) {
    (*tagCols) = calloc(1, sizeof(SColumnInfo) * pQueryMsg->numOfTags);
H
Haojun Liao 已提交
5623 5624 5625 5626 5627
    if (*tagCols == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }

5628 5629
    for (int32_t i = 0; i < pQueryMsg->numOfTags; ++i) {
      SColumnInfo* pTagCol = (SColumnInfo*) pMsg;
5630

5631 5632 5633 5634
      pTagCol->colId = htons(pTagCol->colId);
      pTagCol->bytes = htons(pTagCol->bytes);
      pTagCol->type  = htons(pTagCol->type);
      pTagCol->numOfFilters = 0;
5635

5636
      (*tagCols)[i] = *pTagCol;
5637
      pMsg += sizeof(SColumnInfo);
5638
    }
H
hjxilinx 已提交
5639
  }
5640

5641 5642 5643
  // the tag query condition expression string is located at the end of query msg
  if (pQueryMsg->tagCondLen > 0) {
    *tagCond = calloc(1, pQueryMsg->tagCondLen);
H
Haojun Liao 已提交
5644 5645 5646 5647 5648 5649

    if (*tagCond == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;

    }
5650 5651 5652
    memcpy(*tagCond, pMsg, pQueryMsg->tagCondLen);
    pMsg += pQueryMsg->tagCondLen;
  }
5653

weixin_48148422's avatar
weixin_48148422 已提交
5654
  if (*pMsg != 0) {
5655
    size_t len = strlen(pMsg) + 1;
5656

5657
    *tbnameCond = malloc(len);
5658 5659 5660 5661 5662
    if (*tbnameCond == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }

weixin_48148422's avatar
weixin_48148422 已提交
5663
    strcpy(*tbnameCond, pMsg);
5664
    pMsg += len;
weixin_48148422's avatar
weixin_48148422 已提交
5665
  }
5666

5667
  qDebug("qmsg:%p query %d tables, type:%d, qrange:%" PRId64 "-%" PRId64 ", numOfGroupbyTagCols:%d, order:%d, "
H
Haojun Liao 已提交
5668 5669
         "outputCols:%d, numOfCols:%d, interval:%" PRId64 ", fillType:%d, comptsLen:%d, compNumOfBlocks:%d, limit:%" PRId64 ", offset:%" PRId64,
         pQueryMsg, pQueryMsg->numOfTables, pQueryMsg->queryType, pQueryMsg->window.skey, pQueryMsg->window.ekey, pQueryMsg->numOfGroupCols,
5670
         pQueryMsg->order, pQueryMsg->numOfOutput, pQueryMsg->numOfCols, pQueryMsg->intervalTime,
H
Haojun Liao 已提交
5671
         pQueryMsg->fillType, pQueryMsg->tsLen, pQueryMsg->tsNumOfBlocks, pQueryMsg->limit, pQueryMsg->offset);
5672 5673

  return TSDB_CODE_SUCCESS;
dengyihao's avatar
dengyihao 已提交
5674 5675

_cleanup:
S
Shengliang Guan 已提交
5676
  taosTFree(*pExpr);
dengyihao's avatar
dengyihao 已提交
5677 5678
  taosArrayDestroy(*pTableIdList);
  *pTableIdList = NULL;
S
Shengliang Guan 已提交
5679 5680 5681 5682
  taosTFree(*tbnameCond);
  taosTFree(*groupbyCols);
  taosTFree(*tagCols);
  taosTFree(*tagCond);
5683 5684

  return code;
5685 5686
}

H
hjxilinx 已提交
5687
static int32_t buildAirthmeticExprFromMsg(SExprInfo *pArithExprInfo, SQueryTableMsg *pQueryMsg) {
5688
  qDebug("qmsg:%p create arithmetic expr from binary string: %s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz);
weixin_48148422's avatar
weixin_48148422 已提交
5689 5690

  tExprNode* pExprNode = NULL;
dengyihao's avatar
dengyihao 已提交
5691
  TRY(TSDB_MAX_TAG_CONDITIONS) {
weixin_48148422's avatar
weixin_48148422 已提交
5692 5693 5694
    pExprNode = exprTreeFromBinary(pArithExprInfo->base.arg[0].argValue.pz, pArithExprInfo->base.arg[0].argBytes);
  } CATCH( code ) {
    CLEANUP_EXECUTE();
dengyihao's avatar
TD-816  
dengyihao 已提交
5695
    qError("qmsg:%p failed to create arithmetic expression string from:%s, reason: %s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz, tstrerror(code));
weixin_48148422's avatar
weixin_48148422 已提交
5696 5697 5698
    return code;
  } END_TRY

H
hjxilinx 已提交
5699
  if (pExprNode == NULL) {
5700
    qError("qmsg:%p failed to create arithmetic expression string from:%s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz);
5701
    return TSDB_CODE_QRY_APP_ERROR;
5702
  }
5703

5704
  pArithExprInfo->pExpr = pExprNode;
5705 5706 5707
  return TSDB_CODE_SUCCESS;
}

H
Haojun Liao 已提交
5708
static int32_t createQFunctionExprFromMsg(SQueryTableMsg *pQueryMsg, SExprInfo **pExprInfo, SSqlFuncMsg **pExprMsg,
5709 5710
    SColumnInfo* pTagCols) {
  *pExprInfo = NULL;
H
hjxilinx 已提交
5711
  int32_t code = TSDB_CODE_SUCCESS;
5712

H
Haojun Liao 已提交
5713
  SExprInfo *pExprs = (SExprInfo *)calloc(pQueryMsg->numOfOutput, sizeof(SExprInfo));
5714
  if (pExprs == NULL) {
5715
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
5716 5717 5718 5719 5720
  }

  bool    isSuperTable = QUERY_IS_STABLE_QUERY(pQueryMsg->queryType);
  int16_t tagLen = 0;

5721
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5722
    pExprs[i].base = *pExprMsg[i];
5723
    pExprs[i].bytes = 0;
5724 5725 5726 5727

    int16_t type = 0;
    int16_t bytes = 0;

5728
    // parse the arithmetic expression
5729
    if (pExprs[i].base.functionId == TSDB_FUNC_ARITHM) {
5730
      code = buildAirthmeticExprFromMsg(&pExprs[i], pQueryMsg);
5731

5732
      if (code != TSDB_CODE_SUCCESS) {
S
Shengliang Guan 已提交
5733
        taosTFree(pExprs);
5734
        return code;
5735 5736
      }

5737
      type  = TSDB_DATA_TYPE_DOUBLE;
5738
      bytes = tDataTypeDesc[type].nSize;
H
Haojun Liao 已提交
5739
    } else if (pExprs[i].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX && pExprs[i].base.functionId == TSDB_FUNC_TAGPRJ) {  // parse the normal column
H
Haojun Liao 已提交
5740
      SSchema s = tGetTableNameColumnSchema();
H
Haojun Liao 已提交
5741
      type = s.type;
H
Haojun Liao 已提交
5742
      bytes = s.bytes;
5743 5744
    } else if (pExprs[i].base.colInfo.colId <= TSDB_UD_COLUMN_INDEX) {
      // it is a user-defined constant value column
H
Haojun Liao 已提交
5745 5746
      assert(pExprs[i].base.functionId == TSDB_FUNC_PRJ);

5747 5748
      type = pExprs[i].base.arg[1].argType;
      bytes = pExprs[i].base.arg[1].argBytes;
H
Haojun Liao 已提交
5749 5750 5751 5752 5753

      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        bytes += VARSTR_HEADER_SIZE;
      }
    } else {
5754
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
dengyihao's avatar
dengyihao 已提交
5755
      assert(j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags);
H
Haojun Liao 已提交
5756

dengyihao's avatar
dengyihao 已提交
5757
      if (pExprs[i].base.colInfo.colId != TSDB_TBNAME_COLUMN_INDEX && j >= 0) {
H
Haojun Liao 已提交
5758 5759 5760 5761
        SColumnInfo* pCol = (TSDB_COL_IS_TAG(pExprs[i].base.colInfo.flag))? &pTagCols[j]:&pQueryMsg->colList[j];
        type = pCol->type;
        bytes = pCol->bytes;
      } else {
H
Haojun Liao 已提交
5762
        SSchema s = tGetTableNameColumnSchema();
H
hjxilinx 已提交
5763

H
Haojun Liao 已提交
5764 5765 5766
        type  = s.type;
        bytes = s.bytes;
      }
5767 5768
    }

S
TD-1057  
Shengliang Guan 已提交
5769
    int32_t param = (int32_t)pExprs[i].base.arg[0].argValue.i64;
5770
    if (getResultDataInfo(type, bytes, pExprs[i].base.functionId, param, &pExprs[i].type, &pExprs[i].bytes,
5771
                          &pExprs[i].interBytes, 0, isSuperTable) != TSDB_CODE_SUCCESS) {
S
Shengliang Guan 已提交
5772
      taosTFree(pExprs);
5773
      return TSDB_CODE_QRY_INVALID_MSG;
5774 5775
    }

5776
    if (pExprs[i].base.functionId == TSDB_FUNC_TAG_DUMMY || pExprs[i].base.functionId == TSDB_FUNC_TS_DUMMY) {
5777
      tagLen += pExprs[i].bytes;
5778
    }
5779
    assert(isValidDataType(pExprs[i].type));
5780 5781 5782
  }

  // TODO refactor
5783
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5784 5785
    pExprs[i].base = *pExprMsg[i];
    int16_t functId = pExprs[i].base.functionId;
5786

5787
    if (functId == TSDB_FUNC_TOP || functId == TSDB_FUNC_BOTTOM) {
5788
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
5789 5790 5791 5792 5793 5794 5795 5796 5797
      if (j < 0 || j >= pQueryMsg->numOfCols) {
        assert(0);
      } else {
        SColumnInfo *pCol = &pQueryMsg->colList[j];
        int32_t ret =
            getResultDataInfo(pCol->type, pCol->bytes, functId, (int32_t)pExprs[i].base.arg[0].argValue.i64,
                              &pExprs[i].type, &pExprs[i].bytes, &pExprs[i].interBytes, tagLen, isSuperTable);
        assert(ret == TSDB_CODE_SUCCESS);
      }
5798 5799 5800
    }
  }

5801
  *pExprInfo = pExprs;
5802 5803 5804
  return TSDB_CODE_SUCCESS;
}

5805
static SSqlGroupbyExpr *createGroupbyExprFromMsg(SQueryTableMsg *pQueryMsg, SColIndex *pColIndex, int32_t *code) {
5806 5807 5808 5809 5810
  if (pQueryMsg->numOfGroupCols == 0) {
    return NULL;
  }

  // using group by tag columns
5811
  SSqlGroupbyExpr *pGroupbyExpr = (SSqlGroupbyExpr *)calloc(1, sizeof(SSqlGroupbyExpr));
5812
  if (pGroupbyExpr == NULL) {
5813
    *code = TSDB_CODE_QRY_OUT_OF_MEMORY;
5814 5815 5816 5817 5818 5819 5820
    return NULL;
  }

  pGroupbyExpr->numOfGroupCols = pQueryMsg->numOfGroupCols;
  pGroupbyExpr->orderType = pQueryMsg->orderType;
  pGroupbyExpr->orderIndex = pQueryMsg->orderByIdx;

5821 5822 5823 5824
  pGroupbyExpr->columnInfo = taosArrayInit(pQueryMsg->numOfGroupCols, sizeof(SColIndex));
  for(int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
    taosArrayPush(pGroupbyExpr->columnInfo, &pColIndex[i]);
  }
5825

5826 5827 5828
  return pGroupbyExpr;
}

5829
static int32_t createFilterInfo(void *pQInfo, SQuery *pQuery) {
5830
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
5831
    if (pQuery->colList[i].numOfFilters > 0) {
5832 5833 5834 5835 5836 5837 5838 5839 5840
      pQuery->numOfFilterCols++;
    }
  }

  if (pQuery->numOfFilterCols == 0) {
    return TSDB_CODE_SUCCESS;
  }

  pQuery->pFilterInfo = calloc(1, sizeof(SSingleColumnFilterInfo) * pQuery->numOfFilterCols);
H
Haojun Liao 已提交
5841 5842 5843
  if (pQuery->pFilterInfo == NULL) {
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
  }
5844 5845

  for (int32_t i = 0, j = 0; i < pQuery->numOfCols; ++i) {
5846
    if (pQuery->colList[i].numOfFilters > 0) {
5847 5848
      SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[j];

B
Bomin Zhang 已提交
5849
      memcpy(&pFilterInfo->info, &pQuery->colList[i], sizeof(SColumnInfo));
5850
      pFilterInfo->info = pQuery->colList[i];
5851

5852
      pFilterInfo->numOfFilters = pQuery->colList[i].numOfFilters;
5853
      pFilterInfo->pFilters = calloc(pFilterInfo->numOfFilters, sizeof(SColumnFilterElem));
H
Haojun Liao 已提交
5854 5855 5856
      if (pFilterInfo->pFilters == NULL) {
        return TSDB_CODE_QRY_OUT_OF_MEMORY;
      }
5857 5858 5859

      for (int32_t f = 0; f < pFilterInfo->numOfFilters; ++f) {
        SColumnFilterElem *pSingleColFilter = &pFilterInfo->pFilters[f];
5860
        pSingleColFilter->filterInfo = pQuery->colList[i].filters[f];
5861 5862 5863 5864 5865

        int32_t lower = pSingleColFilter->filterInfo.lowerRelOptr;
        int32_t upper = pSingleColFilter->filterInfo.upperRelOptr;

        if (lower == TSDB_RELATION_INVALID && upper == TSDB_RELATION_INVALID) {
S
slguan 已提交
5866
          qError("QInfo:%p invalid filter info", pQInfo);
5867
          return TSDB_CODE_QRY_INVALID_MSG;
5868 5869
        }

5870 5871
        int16_t type  = pQuery->colList[i].type;
        int16_t bytes = pQuery->colList[i].bytes;
5872

5873 5874 5875
        // todo refactor
        __filter_func_t *rangeFilterArray = getRangeFilterFuncArray(type);
        __filter_func_t *filterArray = getValueFilterFuncArray(type);
5876 5877

        if (rangeFilterArray == NULL && filterArray == NULL) {
S
slguan 已提交
5878
          qError("QInfo:%p failed to get filter function, invalid data type:%d", pQInfo, type);
5879
          return TSDB_CODE_QRY_INVALID_MSG;
5880 5881
        }

5882
        if ((lower == TSDB_RELATION_GREATER_EQUAL || lower == TSDB_RELATION_GREATER) &&
5883
            (upper == TSDB_RELATION_LESS_EQUAL || upper == TSDB_RELATION_LESS)) {
dengyihao's avatar
dengyihao 已提交
5884
          assert(rangeFilterArray != NULL);
5885
          if (lower == TSDB_RELATION_GREATER_EQUAL) {
5886 5887 5888 5889 5890 5891 5892 5893 5894 5895 5896 5897 5898
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[4];
            } else {
              pSingleColFilter->fp = rangeFilterArray[2];
            }
          } else {
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[3];
            } else {
              pSingleColFilter->fp = rangeFilterArray[1];
            }
          }
        } else {  // set callback filter function
dengyihao's avatar
dengyihao 已提交
5899
          assert(filterArray != NULL);
5900 5901 5902 5903
          if (lower != TSDB_RELATION_INVALID) {
            pSingleColFilter->fp = filterArray[lower];

            if (upper != TSDB_RELATION_INVALID) {
dengyihao's avatar
dengyihao 已提交
5904
              qError("pQInfo:%p failed to get filter function, invalid filter condition: %d", pQInfo, type);
5905
              return TSDB_CODE_QRY_INVALID_MSG;
5906 5907 5908 5909 5910 5911 5912 5913 5914 5915 5916 5917 5918 5919 5920 5921
            }
          } else {
            pSingleColFilter->fp = filterArray[upper];
          }
        }
        assert(pSingleColFilter->fp != NULL);
        pSingleColFilter->bytes = bytes;
      }

      j++;
    }
  }

  return TSDB_CODE_SUCCESS;
}

5922
static void doUpdateExprColumnIndex(SQuery *pQuery) {
5923
  assert(pQuery->pSelectExpr != NULL && pQuery != NULL);
5924

5925
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
5926
    SSqlFuncMsg *pSqlExprMsg = &pQuery->pSelectExpr[k].base;
5927
    if (pSqlExprMsg->functionId == TSDB_FUNC_ARITHM) {
5928 5929
      continue;
    }
5930

5931
    // todo opt performance
H
Haojun Liao 已提交
5932
    SColIndex *pColIndex = &pSqlExprMsg->colInfo;
H
Haojun Liao 已提交
5933
    if (TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
5934 5935
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfCols; ++f) {
H
Haojun Liao 已提交
5936 5937
        if (pColIndex->colId == pQuery->colList[f].colId) {
          pColIndex->colIndex = f;
5938 5939 5940
          break;
        }
      }
H
Haojun Liao 已提交
5941 5942

      assert(f < pQuery->numOfCols);
5943 5944
    } else if (pColIndex->colId <= TSDB_UD_COLUMN_INDEX) {
      // do nothing for user-defined constant value result columns
5945
    } else {
5946 5947
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfTags; ++f) {
H
Haojun Liao 已提交
5948 5949
        if (pColIndex->colId == pQuery->tagColList[f].colId) {
          pColIndex->colIndex = f;
5950 5951
          break;
        }
5952
      }
5953

5954
      assert(f < pQuery->numOfTags || pColIndex->colId == TSDB_TBNAME_COLUMN_INDEX);
5955 5956 5957 5958
    }
  }
}

5959
static int compareTableIdInfo(const void* a, const void* b) {
weixin_48148422's avatar
weixin_48148422 已提交
5960 5961 5962 5963 5964 5965 5966
  const STableIdInfo* x = (const STableIdInfo*)a;
  const STableIdInfo* y = (const STableIdInfo*)b;
  if (x->uid > y->uid) return 1;
  if (x->uid < y->uid) return -1;
  return 0;
}

dengyihao's avatar
dengyihao 已提交
5967 5968
static void freeQInfo(SQInfo *pQInfo);

H
Haojun Liao 已提交
5969 5970 5971
static void calResultBufSize(SQuery* pQuery) {
  const int32_t RESULT_MSG_MIN_SIZE  = 1024 * (1024 + 512);  // bytes
  const int32_t RESULT_MSG_MIN_ROWS  = 8192;
S
TD-1057  
Shengliang Guan 已提交
5972
  const float RESULT_THRESHOLD_RATIO = 0.85f;
H
Haojun Liao 已提交
5973

5974 5975 5976 5977 5978
  if (isProjQuery(pQuery)) {
    int32_t numOfRes = RESULT_MSG_MIN_SIZE / pQuery->rowSize;
    if (numOfRes < RESULT_MSG_MIN_ROWS) {
      numOfRes = RESULT_MSG_MIN_ROWS;
    }
H
Haojun Liao 已提交
5979

5980
    pQuery->rec.capacity  = numOfRes;
S
TD-1057  
Shengliang Guan 已提交
5981
    pQuery->rec.threshold = (int32_t)(numOfRes * RESULT_THRESHOLD_RATIO);
5982 5983
  } else {  // in case of non-prj query, a smaller output buffer will be used.
    pQuery->rec.capacity = 4096;
S
TD-1057  
Shengliang Guan 已提交
5984
    pQuery->rec.threshold = (int32_t)(pQuery->rec.capacity * RESULT_THRESHOLD_RATIO);
5985
  }
H
Haojun Liao 已提交
5986 5987
}

weixin_48148422's avatar
weixin_48148422 已提交
5988
static SQInfo *createQInfoImpl(SQueryTableMsg *pQueryMsg, SArray* pTableIdList, SSqlGroupbyExpr *pGroupbyExpr, SExprInfo *pExprs,
5989
                               STableGroupInfo *pTableGroupInfo, SColumnInfo* pTagCols) {
B
Bomin Zhang 已提交
5990 5991 5992
  int16_t numOfCols = pQueryMsg->numOfCols;
  int16_t numOfOutput = pQueryMsg->numOfOutput;

5993 5994
  SQInfo *pQInfo = (SQInfo *)calloc(1, sizeof(SQInfo));
  if (pQInfo == NULL) {
B
Bomin Zhang 已提交
5995
    goto _cleanup_qinfo;
5996
  }
5997

B
Bomin Zhang 已提交
5998 5999 6000
  // to make sure third party won't overwrite this structure
  pQInfo->signature = pQInfo;
  pQInfo->tableGroupInfo = *pTableGroupInfo;
6001 6002

  SQuery *pQuery = calloc(1, sizeof(SQuery));
B
Bomin Zhang 已提交
6003 6004 6005
  if (pQuery == NULL) {
    goto _cleanup_query;
  }
H
Haojun Liao 已提交
6006

6007 6008
  pQInfo->runtimeEnv.pQuery = pQuery;

6009
  pQuery->numOfCols       = numOfCols;
H
hjxilinx 已提交
6010
  pQuery->numOfOutput     = numOfOutput;
6011 6012 6013
  pQuery->limit.limit     = pQueryMsg->limit;
  pQuery->limit.offset    = pQueryMsg->offset;
  pQuery->order.order     = pQueryMsg->order;
6014
  pQuery->order.orderColId = pQueryMsg->orderColId;
6015 6016 6017 6018
  pQuery->pSelectExpr     = pExprs;
  pQuery->pGroupbyExpr    = pGroupbyExpr;
  pQuery->intervalTime    = pQueryMsg->intervalTime;
  pQuery->slidingTime     = pQueryMsg->slidingTime;
6019
  pQuery->slidingTimeUnit = pQueryMsg->slidingTimeUnit;
6020
  pQuery->fillType        = pQueryMsg->fillType;
6021
  pQuery->numOfTags       = pQueryMsg->numOfTags;
B
Bomin Zhang 已提交
6022
  pQuery->tagColList      = pTagCols;
H
Haojun Liao 已提交
6023

6024
  pQuery->colList = calloc(numOfCols, sizeof(SSingleColumnFilterInfo));
6025
  if (pQuery->colList == NULL) {
6026
    goto _cleanup;
6027
  }
6028

H
hjxilinx 已提交
6029
  for (int16_t i = 0; i < numOfCols; ++i) {
6030
    pQuery->colList[i] = pQueryMsg->colList[i];
6031
    pQuery->colList[i].filters = tscFilterInfoClone(pQueryMsg->colList[i].filters, pQuery->colList[i].numOfFilters);
H
hjxilinx 已提交
6032
  }
6033

6034
  // calculate the result row size
6035 6036 6037
  for (int16_t col = 0; col < numOfOutput; ++col) {
    assert(pExprs[col].bytes > 0);
    pQuery->rowSize += pExprs[col].bytes;
6038
  }
6039

6040
  doUpdateExprColumnIndex(pQuery);
6041

6042
  int32_t ret = createFilterInfo(pQInfo, pQuery);
6043
  if (ret != TSDB_CODE_SUCCESS) {
6044
    goto _cleanup;
6045 6046 6047
  }

  // prepare the result buffer
6048
  pQuery->sdata = (tFilePage **)calloc(pQuery->numOfOutput, POINTER_BYTES);
6049
  if (pQuery->sdata == NULL) {
6050
    goto _cleanup;
6051 6052
  }

H
Haojun Liao 已提交
6053
  calResultBufSize(pQuery);
6054

6055
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
6056
    assert(pExprs[col].interBytes >= pExprs[col].bytes);
6057 6058

    // allocate additional memory for interResults that are usually larger then final results
6059
    size_t size = (size_t)((pQuery->rec.capacity + 1) * pExprs[col].bytes + pExprs[col].interBytes + sizeof(tFilePage));
6060
    pQuery->sdata[col] = (tFilePage *)calloc(1, size);
6061
    if (pQuery->sdata[col] == NULL) {
6062
      goto _cleanup;
6063 6064 6065
    }
  }

6066
  if (pQuery->fillType != TSDB_FILL_NONE) {
6067 6068
    pQuery->fillVal = malloc(sizeof(int64_t) * pQuery->numOfOutput);
    if (pQuery->fillVal == NULL) {
6069
      goto _cleanup;
6070 6071 6072
    }

    // the first column is the timestamp
6073
    memcpy(pQuery->fillVal, (char *)pQueryMsg->fillVal, pQuery->numOfOutput * sizeof(int64_t));
6074 6075
  }

dengyihao's avatar
dengyihao 已提交
6076 6077 6078 6079 6080 6081
  size_t numOfGroups = 0;
  if (pTableGroupInfo->pGroupList != NULL) {
    numOfGroups = taosArrayGetSize(pTableGroupInfo->pGroupList);

    pQInfo->tableqinfoGroupInfo.pGroupList = taosArrayInit(numOfGroups, POINTER_BYTES);
    pQInfo->tableqinfoGroupInfo.numOfTables = pTableGroupInfo->numOfTables;
H
Haojun Liao 已提交
6082
    pQInfo->tableqinfoGroupInfo.map = taosHashInit(pTableGroupInfo->numOfTables,
H
Haojun Liao 已提交
6083
                                                   taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, false);
H
Haojun Liao 已提交
6084
  }
6085

weixin_48148422's avatar
weixin_48148422 已提交
6086 6087
  int tableIndex = 0;
  STimeWindow window = pQueryMsg->window;
6088
  taosArraySort(pTableIdList, compareTableIdInfo);
6089

H
Haojun Liao 已提交
6090
  pQInfo->runtimeEnv.interBufSize = getOutputInterResultBufSize(pQuery);
H
Haojun Liao 已提交
6091
  pQInfo->pBuf = calloc(pTableGroupInfo->numOfTables, sizeof(STableQueryInfo));
H
Haojun Liao 已提交
6092 6093 6094 6095
  if (pQInfo->pBuf == NULL) {
    goto _cleanup;
  }

H
Haojun Liao 已提交
6096 6097
  int32_t index = 0;

H
hjxilinx 已提交
6098
  for(int32_t i = 0; i < numOfGroups; ++i) {
6099
    SArray* pa = taosArrayGetP(pTableGroupInfo->pGroupList, i);
6100

H
Haojun Liao 已提交
6101
    size_t s = taosArrayGetSize(pa);
6102
    SArray* p1 = taosArrayInit(s, POINTER_BYTES);
B
Bomin Zhang 已提交
6103 6104 6105
    if (p1 == NULL) {
      goto _cleanup;
    }
H
Haojun Liao 已提交
6106

Y
yihaoDeng 已提交
6107
    taosArrayPush(pQInfo->tableqinfoGroupInfo.pGroupList, &p1);
6108

H
hjxilinx 已提交
6109
    for(int32_t j = 0; j < s; ++j) {
H
Haojun Liao 已提交
6110
      STableKeyInfo* info = taosArrayGet(pa, j);
6111

H
Haojun Liao 已提交
6112
      STableId* id = TSDB_TABLEID(info->pTable);
H
Haojun Liao 已提交
6113
      STableIdInfo* pTableId = taosArraySearch(pTableIdList, id, compareTableIdInfo);
6114

H
Haojun Liao 已提交
6115
      window.skey = (pTableId != NULL)? pTableId->key:pQueryMsg->window.skey;
S
TD-1057  
Shengliang Guan 已提交
6116
      void* buf = (char*)pQInfo->pBuf + index * sizeof(STableQueryInfo);
H
Haojun Liao 已提交
6117

H
Haojun Liao 已提交
6118
      STableQueryInfo* item = createTableQueryInfo(&pQInfo->runtimeEnv, info->pTable, window, buf);
B
Bomin Zhang 已提交
6119 6120 6121
      if (item == NULL) {
        goto _cleanup;
      }
H
Haojun Liao 已提交
6122

6123
      item->groupIndex = i;
H
hjxilinx 已提交
6124
      taosArrayPush(p1, &item);
H
Haojun Liao 已提交
6125 6126
      taosHashPut(pQInfo->tableqinfoGroupInfo.map, &id->tid, sizeof(id->tid), &item, POINTER_BYTES);
      index += 1;
H
hjxilinx 已提交
6127 6128
    }
  }
6129

weixin_48148422's avatar
weixin_48148422 已提交
6130
  pQInfo->arrTableIdInfo = taosArrayInit(tableIndex, sizeof(STableIdInfo));
6131 6132
  pQInfo->dataReady = QUERY_RESULT_NOT_READY;
  pthread_mutex_init(&pQInfo->lock, NULL);
weixin_48148422's avatar
weixin_48148422 已提交
6133

6134
  pQuery->pos = -1;
6135
  pQuery->window = pQueryMsg->window;
6136
  colIdCheck(pQuery);
6137

6138
  qDebug("qmsg:%p QInfo:%p created", pQueryMsg, pQInfo);
6139 6140
  return pQInfo;

B
Bomin Zhang 已提交
6141
_cleanup_qinfo:
H
Haojun Liao 已提交
6142
  tsdbDestroyTableGroup(pTableGroupInfo);
B
Bomin Zhang 已提交
6143 6144

_cleanup_query:
6145 6146 6147 6148
  if (pGroupbyExpr != NULL) {
    taosArrayDestroy(pGroupbyExpr->columnInfo);
    free(pGroupbyExpr);
  }
H
Haojun Liao 已提交
6149

S
Shengliang Guan 已提交
6150
  taosTFree(pTagCols);
B
Bomin Zhang 已提交
6151 6152 6153 6154 6155 6156
  for (int32_t i = 0; i < numOfOutput; ++i) {
    SExprInfo* pExprInfo = &pExprs[i];
    if (pExprInfo->pExpr != NULL) {
      tExprTreeDestroy(&pExprInfo->pExpr, NULL);
    }
  }
H
Haojun Liao 已提交
6157

S
Shengliang Guan 已提交
6158
  taosTFree(pExprs);
B
Bomin Zhang 已提交
6159

6160
_cleanup:
dengyihao's avatar
dengyihao 已提交
6161
  freeQInfo(pQInfo);
6162 6163 6164
  return NULL;
}

H
hjxilinx 已提交
6165
static bool isValidQInfo(void *param) {
H
hjxilinx 已提交
6166 6167 6168 6169
  SQInfo *pQInfo = (SQInfo *)param;
  if (pQInfo == NULL) {
    return false;
  }
6170

H
hjxilinx 已提交
6171 6172 6173 6174
  /*
   * pQInfo->signature may be changed by another thread, so we assign value of signature
   * into local variable, then compare by using local variable
   */
6175
  uint64_t sig = (uint64_t)pQInfo->signature;
H
hjxilinx 已提交
6176 6177 6178
  return (sig == (uint64_t)pQInfo);
}

6179
static int32_t initQInfo(SQueryTableMsg *pQueryMsg, void *tsdb, int32_t vgId, SQInfo *pQInfo, bool isSTable) {
H
hjxilinx 已提交
6180
  int32_t code = TSDB_CODE_SUCCESS;
6181
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
6182

H
hjxilinx 已提交
6183 6184
  STSBuf *pTSBuf = NULL;
  if (pQueryMsg->tsLen > 0) {  // open new file to save the result
H
Haojun Liao 已提交
6185
    char *tsBlock = (char *) pQueryMsg + pQueryMsg->tsOffset;
H
hjxilinx 已提交
6186
    pTSBuf = tsBufCreateFromCompBlocks(tsBlock, pQueryMsg->tsNumOfBlocks, pQueryMsg->tsLen, pQueryMsg->tsOrder);
6187

H
hjxilinx 已提交
6188
    tsBufResetPos(pTSBuf);
dengyihao's avatar
dengyihao 已提交
6189 6190
    bool ret = tsBufNextPos(pTSBuf);
    UNUSED(ret);
H
hjxilinx 已提交
6191
  }
6192

6193 6194
  if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey > pQuery->window.ekey)) ||
      (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey > pQuery->window.skey))) {
6195
    qDebug("QInfo:%p no result in time range %" PRId64 "-%" PRId64 ", order %d", pQInfo, pQuery->window.skey,
6196
           pQuery->window.ekey, pQuery->order.order);
6197
    setQueryStatus(pQuery, QUERY_COMPLETED);
B
Bomin Zhang 已提交
6198
    pQInfo->tableqinfoGroupInfo.numOfTables = 0;
6199 6200
    return TSDB_CODE_SUCCESS;
  }
6201

6202
  if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
6203
    qDebug("QInfo:%p no table qualified for tag filter, abort query", pQInfo);
6204 6205 6206
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return TSDB_CODE_SUCCESS;
  }
H
hjxilinx 已提交
6207 6208

  // filter the qualified
6209
  if ((code = doInitQInfo(pQInfo, pTSBuf, tsdb, vgId, isSTable)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6210 6211
    goto _error;
  }
6212

H
hjxilinx 已提交
6213 6214 6215 6216
  return code;

_error:
  // table query ref will be decrease during error handling
6217
  freeQInfo(pQInfo);
H
hjxilinx 已提交
6218 6219 6220
  return code;
}

B
Bomin Zhang 已提交
6221 6222 6223 6224 6225 6226 6227 6228 6229 6230 6231 6232
static void freeColumnFilterInfo(SColumnFilterInfo* pFilter, int32_t numOfFilters) {
    if (pFilter == NULL) {
      return;
    }
    for (int32_t i = 0; i < numOfFilters; i++) {
      if (pFilter[i].filterstr) {
        free((void*)(pFilter[i].pz));
      }
    }
    free(pFilter);
}

H
hjxilinx 已提交
6233 6234 6235 6236
static void freeQInfo(SQInfo *pQInfo) {
  if (!isValidQInfo(pQInfo)) {
    return;
  }
6237

6238
  qDebug("QInfo:%p start to free QInfo", pQInfo);
6239

6240
  teardownQueryRuntimeEnv(&pQInfo->runtimeEnv);
6241

H
Haojun Liao 已提交
6242 6243 6244 6245 6246 6247 6248
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  if (pQuery != NULL) {
    if (pQuery->sdata != NULL) {
      for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
        taosTFree(pQuery->sdata[col]);
      }
      taosTFree(pQuery->sdata);
H
hjxilinx 已提交
6249
    }
6250

H
Haojun Liao 已提交
6251 6252 6253
    if (pQuery->fillVal != NULL) {
      taosTFree(pQuery->fillVal);
    }
6254

H
Haojun Liao 已提交
6255 6256 6257 6258
    for (int32_t i = 0; i < pQuery->numOfFilterCols; ++i) {
      SSingleColumnFilterInfo *pColFilter = &pQuery->pFilterInfo[i];
      if (pColFilter->numOfFilters > 0) {
        taosTFree(pColFilter->pFilters);
H
hjxilinx 已提交
6259
      }
H
hjxilinx 已提交
6260
    }
6261

H
Haojun Liao 已提交
6262 6263 6264 6265 6266 6267 6268 6269 6270 6271 6272 6273 6274 6275 6276 6277
    if (pQuery->pSelectExpr != NULL) {
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        SExprInfo *pExprInfo = &pQuery->pSelectExpr[i];

        if (pExprInfo->pExpr != NULL) {
          tExprTreeDestroy(&pExprInfo->pExpr, NULL);
        }
      }

      taosTFree(pQuery->pSelectExpr);
    }

    if (pQuery->pGroupbyExpr != NULL) {
      taosArrayDestroy(pQuery->pGroupbyExpr->columnInfo);
      taosTFree(pQuery->pGroupbyExpr);
    }
6278

H
Haojun Liao 已提交
6279 6280 6281 6282 6283 6284 6285 6286 6287 6288 6289 6290
    taosTFree(pQuery->tagColList);
    taosTFree(pQuery->pFilterInfo);

    if (pQuery->colList != NULL) {
      for (int32_t i = 0; i < pQuery->numOfCols; i++) {
        SColumnInfo *column = pQuery->colList + i;
        freeColumnFilterInfo(column->filters, column->numOfFilters);
      }
      taosTFree(pQuery->colList);
    }

    taosTFree(pQuery);
H
hjxilinx 已提交
6291
  }
6292

6293
  // todo refactor, extract method to destroytableDataInfo
B
Bomin Zhang 已提交
6294
  if (pQInfo->tableqinfoGroupInfo.pGroupList != NULL) {
S
TD-1057  
Shengliang Guan 已提交
6295
    int32_t numOfGroups = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
B
Bomin Zhang 已提交
6296 6297 6298 6299 6300 6301
    for (int32_t i = 0; i < numOfGroups; ++i) {
      SArray *p = GET_TABLEGROUP(pQInfo, i);

      size_t num = taosArrayGetSize(p);
      for(int32_t j = 0; j < num; ++j) {
        STableQueryInfo* item = taosArrayGetP(p, j);
H
Haojun Liao 已提交
6302
        destroyTableQueryInfo(item);
6303
      }
6304

B
Bomin Zhang 已提交
6305 6306
      taosArrayDestroy(p);
    }
H
hjxilinx 已提交
6307
  }
6308

S
Shengliang Guan 已提交
6309
  taosTFree(pQInfo->pBuf);
6310
  taosArrayDestroy(pQInfo->tableqinfoGroupInfo.pGroupList);
H
Haojun Liao 已提交
6311
  taosHashCleanup(pQInfo->tableqinfoGroupInfo.map);
H
Haojun Liao 已提交
6312
  tsdbDestroyTableGroup(&pQInfo->tableGroupInfo);
weixin_48148422's avatar
weixin_48148422 已提交
6313
  taosArrayDestroy(pQInfo->arrTableIdInfo);
6314

6315

6316
  pQInfo->signature = 0;
6317

6318
  qDebug("QInfo:%p QInfo is freed", pQInfo);
6319

S
Shengliang Guan 已提交
6320
  taosTFree(pQInfo);
H
hjxilinx 已提交
6321 6322
}

H
hjxilinx 已提交
6323
static size_t getResultSize(SQInfo *pQInfo, int64_t *numOfRows) {
6324 6325
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
hjxilinx 已提交
6326 6327 6328 6329 6330 6331 6332 6333 6334 6335 6336
  /*
   * get the file size and set the numOfRows to be the file size, since for tsComp query,
   * the returned row size is equalled to 1
   * TODO handle the case that the file is too large to send back one time
   */
  if (isTSCompQuery(pQuery) && (*numOfRows) > 0) {
    struct stat fstat;
    if (stat(pQuery->sdata[0]->data, &fstat) == 0) {
      *numOfRows = fstat.st_size;
      return fstat.st_size;
    } else {
S
slguan 已提交
6337
      qError("QInfo:%p failed to get file info, path:%s, reason:%s", pQInfo, pQuery->sdata[0]->data, strerror(errno));
H
hjxilinx 已提交
6338 6339 6340
      return 0;
    }
  } else {
6341
    return (size_t)(pQuery->rowSize * (*numOfRows));
6342
  }
H
hjxilinx 已提交
6343
}
6344

H
hjxilinx 已提交
6345 6346 6347
static int32_t doDumpQueryResult(SQInfo *pQInfo, char *data) {
  // the remained number of retrieved rows, not the interpolated result
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
6348

H
hjxilinx 已提交
6349 6350 6351
  // load data from file to msg buffer
  if (isTSCompQuery(pQuery)) {
    int32_t fd = open(pQuery->sdata[0]->data, O_RDONLY, 0666);
6352

H
hjxilinx 已提交
6353 6354
    // make sure file exist
    if (FD_VALID(fd)) {
6355 6356 6357
      uint64_t s = lseek(fd, 0, SEEK_END);

      qDebug("QInfo:%p ts comp data return, file:%s, size:%"PRId64, pQInfo, pQuery->sdata[0]->data, s);
H
Haojun Liao 已提交
6358
      if (lseek(fd, 0, SEEK_SET) >= 0) {
6359
        size_t sz = read(fd, data, (uint32_t)s);
6360 6361 6362
        if(sz < s) {  // todo handle error
          assert(0);
        }
H
Haojun Liao 已提交
6363
      } else {
6364
        UNUSED(s);
dengyihao's avatar
dengyihao 已提交
6365
      }
H
Haojun Liao 已提交
6366

H
hjxilinx 已提交
6367 6368 6369
      close(fd);
      unlink(pQuery->sdata[0]->data);
    } else {
dengyihao's avatar
dengyihao 已提交
6370
      // todo return the error code to client and handle invalid fd
S
slguan 已提交
6371
      qError("QInfo:%p failed to open tmp file to send ts-comp data to client, path:%s, reason:%s", pQInfo,
H
hjxilinx 已提交
6372
             pQuery->sdata[0]->data, strerror(errno));
dengyihao's avatar
dengyihao 已提交
6373
      if (fd != -1) {
6374
        close(fd);
dengyihao's avatar
dengyihao 已提交
6375
      }
H
hjxilinx 已提交
6376
    }
6377

H
hjxilinx 已提交
6378 6379 6380 6381
    // all data returned, set query over
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
      setQueryStatus(pQuery, QUERY_OVER);
    }
H
hjxilinx 已提交
6382
  } else {
S
TD-1057  
Shengliang Guan 已提交
6383
    doCopyQueryResultToMsg(pQInfo, (int32_t)pQuery->rec.rows, data);
6384
  }
6385

6386
  pQuery->rec.total += pQuery->rec.rows;
6387
  qDebug("QInfo:%p current numOfRes rows:%" PRId64 ", total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total);
6388

6389
  if (pQuery->limit.limit > 0 && pQuery->limit.limit == pQuery->rec.total) {
6390
    qDebug("QInfo:%p results limitation reached, limitation:%"PRId64, pQInfo, pQuery->limit.limit);
6391 6392
    setQueryStatus(pQuery, QUERY_OVER);
  }
6393

H
hjxilinx 已提交
6394
  return TSDB_CODE_SUCCESS;
6395 6396
}

6397 6398 6399 6400 6401 6402 6403
typedef struct SQueryMgmt {
  SCacheObj      *qinfoPool;      // query handle pool
  int32_t         vgId;
  bool            closed;
  pthread_mutex_t lock;
} SQueryMgmt;

6404
int32_t qCreateQueryInfo(void* tsdb, int32_t vgId, SQueryTableMsg* pQueryMsg, qinfo_t* pQInfo) {
6405
  assert(pQueryMsg != NULL && tsdb != NULL);
6406 6407

  int32_t code = TSDB_CODE_SUCCESS;
6408

6409 6410 6411 6412 6413 6414 6415 6416
  char            *tagCond  = NULL;
  char            *tbnameCond = NULL;
  SArray          *pTableIdList = NULL;
  SSqlFuncMsg    **pExprMsg = NULL;
  SExprInfo       *pExprs   = NULL;
  SColIndex       *pGroupColIndex = NULL;
  SColumnInfo     *pTagColumnInfo = NULL;
  SSqlGroupbyExpr *pGroupbyExpr   = NULL;
6417

6418 6419
  code = convertQueryMsg(pQueryMsg, &pTableIdList, &pExprMsg, &tagCond, &tbnameCond, &pGroupColIndex, &pTagColumnInfo);
  if (code != TSDB_CODE_SUCCESS) {
B
Bomin Zhang 已提交
6420
    goto _over;
6421 6422
  }

H
hjxilinx 已提交
6423
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
6424
    qError("Invalid number of tables to query, numOfTables:%d", pQueryMsg->numOfTables);
6425
    code = TSDB_CODE_QRY_INVALID_MSG;
H
hjxilinx 已提交
6426
    goto _over;
6427 6428
  }

H
hjxilinx 已提交
6429
  if (pTableIdList == NULL || taosArrayGetSize(pTableIdList) == 0) {
S
slguan 已提交
6430
    qError("qmsg:%p, SQueryTableMsg wrong format", pQueryMsg);
6431
    code = TSDB_CODE_QRY_INVALID_MSG;
H
hjxilinx 已提交
6432
    goto _over;
6433 6434
  }

H
Haojun Liao 已提交
6435
  if ((code = createQFunctionExprFromMsg(pQueryMsg, &pExprs, pExprMsg, pTagColumnInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6436
    goto _over;
6437 6438
  }

dengyihao's avatar
dengyihao 已提交
6439
  pGroupbyExpr = createGroupbyExprFromMsg(pQueryMsg, pGroupColIndex, &code);
H
hjxilinx 已提交
6440
  if ((pGroupbyExpr == NULL && pQueryMsg->numOfGroupCols != 0) || code != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6441
    goto _over;
6442
  }
6443

H
hjxilinx 已提交
6444
  bool isSTableQuery = false;
6445
  STableGroupInfo tableGroupInfo = {0};
6446 6447
  int64_t st = taosGetTimestampUs();

H
Haojun Liao 已提交
6448
  if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_TABLE_QUERY)) {
weixin_48148422's avatar
weixin_48148422 已提交
6449
    STableIdInfo *id = taosArrayGet(pTableIdList, 0);
H
Haojun Liao 已提交
6450

6451
    qDebug("qmsg:%p query normal table, uid:%"PRId64", tid:%d", pQueryMsg, id->uid, id->tid);
H
Haojun Liao 已提交
6452
    if ((code = tsdbGetOneTableGroup(tsdb, id->uid, pQueryMsg->window.skey, &tableGroupInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6453
      goto _over;
6454
    }
H
Haojun Liao 已提交
6455
  } else if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY|TSDB_QUERY_TYPE_STABLE_QUERY)) {
6456
    isSTableQuery = true;
H
Haojun Liao 已提交
6457 6458 6459

    // also note there's possibility that only one table in the super table
    if (!TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY)) {
weixin_48148422's avatar
weixin_48148422 已提交
6460 6461 6462 6463 6464 6465 6466
      STableIdInfo *id = taosArrayGet(pTableIdList, 0);

      // group by normal column, do not pass the group by condition to tsdb to group table into different group
      int32_t numOfGroupByCols = pQueryMsg->numOfGroupCols;
      if (pQueryMsg->numOfGroupCols == 1 && !TSDB_COL_IS_TAG(pGroupColIndex->flag)) {
        numOfGroupByCols = 0;
      }
6467 6468

      qDebug("qmsg:%p query stable, uid:%"PRId64", tid:%d", pQueryMsg, id->uid, id->tid);
H
Haojun Liao 已提交
6469 6470 6471
      code = tsdbQuerySTableByTagCond(tsdb, id->uid, pQueryMsg->window.skey, tagCond, pQueryMsg->tagCondLen,
          pQueryMsg->tagNameRelType, tbnameCond, &tableGroupInfo, pGroupColIndex, numOfGroupByCols);

6472
      if (code != TSDB_CODE_SUCCESS) {
6473
        qError("qmsg:%p failed to query stable, reason: %s", pQueryMsg, tstrerror(code));
6474 6475
        goto _over;
      }
weixin_48148422's avatar
weixin_48148422 已提交
6476
    } else {
6477 6478 6479 6480
      code = tsdbGetTableGroupFromIdList(tsdb, pTableIdList, &tableGroupInfo);
      if (code != TSDB_CODE_SUCCESS) {
        goto _over;
      }
H
Haojun Liao 已提交
6481

S
TD-1057  
Shengliang Guan 已提交
6482
      qDebug("qmsg:%p query on %" PRIzu " tables in one group from client", pQueryMsg, tableGroupInfo.numOfTables);
6483
    }
6484 6485

    int64_t el = taosGetTimestampUs() - st;
S
TD-1057  
Shengliang Guan 已提交
6486
    qDebug("qmsg:%p tag filter completed, numOfTables:%" PRIzu ", elapsed time:%"PRId64"us", pQueryMsg, tableGroupInfo.numOfTables, el);
H
hjxilinx 已提交
6487
  } else {
6488
    assert(0);
6489
  }
6490

6491
  (*pQInfo) = createQInfoImpl(pQueryMsg, pTableIdList, pGroupbyExpr, pExprs, &tableGroupInfo, pTagColumnInfo);
dengyihao's avatar
dengyihao 已提交
6492 6493 6494
  pExprs = NULL;
  pGroupbyExpr = NULL;
  pTagColumnInfo = NULL;
6495

6496
  if ((*pQInfo) == NULL) {
6497
    code = TSDB_CODE_QRY_OUT_OF_MEMORY;
H
hjxilinx 已提交
6498
    goto _over;
6499
  }
6500

6501
  code = initQInfo(pQueryMsg, tsdb, vgId, *pQInfo, isSTableQuery);
6502

H
hjxilinx 已提交
6503
_over:
dengyihao's avatar
dengyihao 已提交
6504 6505 6506
  free(tagCond);
  free(tbnameCond);
  free(pGroupColIndex);
dengyihao's avatar
dengyihao 已提交
6507 6508
  if (pGroupbyExpr != NULL) {
    taosArrayDestroy(pGroupbyExpr->columnInfo);
dengyihao's avatar
dengyihao 已提交
6509
    free(pGroupbyExpr);
6510
  }
dengyihao's avatar
dengyihao 已提交
6511 6512
  free(pTagColumnInfo);
  free(pExprs);
dengyihao's avatar
dengyihao 已提交
6513
  free(pExprMsg);
H
hjxilinx 已提交
6514
  taosArrayDestroy(pTableIdList);
6515

B
Bomin Zhang 已提交
6516 6517 6518 6519 6520
  for (int32_t i = 0; i < pQueryMsg->numOfCols; i++) {
    SColumnInfo* column = pQueryMsg->colList + i;
    freeColumnFilterInfo(column->filters, column->numOfFilters);
  }

H
Haojun Liao 已提交
6521
  //pQInfo already freed in initQInfo, but *pQInfo may not pointer to null;
6522 6523 6524 6525
  if (code != TSDB_CODE_SUCCESS) {
    *pQInfo = NULL;
  }

6526
  // if failed to add ref for all tables in this query, abort current query
6527
  return code;
H
hjxilinx 已提交
6528 6529
}

H
Haojun Liao 已提交
6530
void qDestroyQueryInfo(qinfo_t qHandle) {
H
Haojun Liao 已提交
6531 6532 6533 6534 6535
  SQInfo* pQInfo = (SQInfo*) qHandle;
  if (!isValidQInfo(pQInfo)) {
    return;
  }

H
Haojun Liao 已提交
6536 6537 6538
  qDebug("QInfo:%p query completed", pQInfo);
  queryCostStatis(pQInfo);   // print the query cost summary
  freeQInfo(pQInfo);
H
Haojun Liao 已提交
6539 6540
}

6541 6542 6543 6544 6545 6546 6547 6548 6549 6550
static bool doBuildResCheck(SQInfo* pQInfo) {
  bool buildRes = false;

  pthread_mutex_lock(&pQInfo->lock);

  pQInfo->dataReady = QUERY_RESULT_READY;
  buildRes = (pQInfo->rspContext != NULL);

  pthread_mutex_unlock(&pQInfo->lock);

H
Haojun Liao 已提交
6551
  // clear qhandle owner
6552 6553
  assert(pQInfo->owner == taosGetPthreadId());
  pQInfo->owner = 0;
H
Haojun Liao 已提交
6554

6555 6556 6557
  return buildRes;
}

6558
bool qTableQuery(qinfo_t qinfo) {
6559
  SQInfo *pQInfo = (SQInfo *)qinfo;
H
Haojun Liao 已提交
6560
  assert(pQInfo && pQInfo->signature == pQInfo);
6561
  int64_t threadId = taosGetPthreadId();
6562

6563 6564 6565 6566
  int64_t curOwner = 0;
  if ((curOwner = atomic_val_compare_exchange_64(&pQInfo->owner, 0, threadId)) != 0) {
    qError("QInfo:%p qhandle is now executed by thread:%p", pQInfo, (void*) curOwner);
    pQInfo->code = TSDB_CODE_QRY_IN_EXEC;
6567
    return false;
H
hjxilinx 已提交
6568
  }
6569

H
Haojun Liao 已提交
6570
  if (IS_QUERY_KILLED(pQInfo)) {
6571
    qDebug("QInfo:%p it is already killed, abort", pQInfo);
6572
    return doBuildResCheck(pQInfo);
H
hjxilinx 已提交
6573
  }
6574

6575 6576
  if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
    qDebug("QInfo:%p no table exists for query, abort", pQInfo);
6577 6578
    setQueryStatus(pQInfo->runtimeEnv.pQuery, QUERY_COMPLETED);
    return doBuildResCheck(pQInfo);
6579 6580 6581
  }

  // error occurs, record the error code and return to client
H
Haojun Liao 已提交
6582
  int32_t ret = setjmp(pQInfo->runtimeEnv.env);
6583 6584
  if (ret != TSDB_CODE_SUCCESS) {
    pQInfo->code = ret;
H
Haojun Liao 已提交
6585
    qDebug("QInfo:%p query abort due to error/cancel occurs, code:%s", pQInfo, tstrerror(pQInfo->code));
6586
    return doBuildResCheck(pQInfo);
6587 6588
  }

6589
  qDebug("QInfo:%p query task is launched", pQInfo);
6590

6591
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
6592
  if (onlyQueryTags(pQInfo->runtimeEnv.pQuery)) {
H
Haojun Liao 已提交
6593
    assert(pQInfo->runtimeEnv.pQueryHandle == NULL);
6594
    buildTagQueryResult(pQInfo);
H
hjxilinx 已提交
6595
  } else if (pQInfo->runtimeEnv.stableQuery) {
6596
    stableQueryImpl(pQInfo);
H
hjxilinx 已提交
6597
  } else {
6598
    tableQueryImpl(pQInfo);
H
hjxilinx 已提交
6599
  }
6600

6601
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
6602
  if (IS_QUERY_KILLED(pQInfo)) {
6603 6604
    qDebug("QInfo:%p query is killed", pQInfo);
  } else if (pQuery->rec.rows == 0) {
S
TD-1057  
Shengliang Guan 已提交
6605
    qDebug("QInfo:%p over, %" PRIzu " tables queried, %"PRId64" rows are returned", pQInfo, pQInfo->tableqinfoGroupInfo.numOfTables, pQuery->rec.total);
6606 6607 6608 6609 6610
  } else {
    qDebug("QInfo:%p query paused, %" PRId64 " rows returned, numOfTotal:%" PRId64 " rows",
           pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
  }

6611
  return doBuildResCheck(pQInfo);
H
hjxilinx 已提交
6612 6613
}

6614
int32_t qRetrieveQueryResultInfo(qinfo_t qinfo, bool* buildRes, void* pRspContext) {
6615 6616
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
6617
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
6618
    return TSDB_CODE_QRY_INVALID_QHANDLE;
H
hjxilinx 已提交
6619
  }
6620

6621
  *buildRes = false;
H
hjxilinx 已提交
6622
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
6623
  if (IS_QUERY_KILLED(pQInfo)) {
6624
    qDebug("QInfo:%p query is killed, code:%d", pQInfo, pQInfo->code);
H
hjxilinx 已提交
6625
    return pQInfo->code;
H
hjxilinx 已提交
6626
  }
6627

6628
  int32_t code = TSDB_CODE_SUCCESS;
6629 6630 6631 6632 6633 6634
  pthread_mutex_lock(&pQInfo->lock);
  if (pQInfo->dataReady == QUERY_RESULT_READY) {
    *buildRes = true;
    qDebug("QInfo:%p retrieve result info, rowsize:%d, rows:%"PRId64", code:%d", pQInfo, pQuery->rowSize, pQuery->rec.rows,
           pQInfo->code);
  } else {
H
Haojun Liao 已提交
6635
    *buildRes = false;
6636
    qDebug("QInfo:%p retrieve req set query return result after paused", pQInfo);
6637 6638
    pQInfo->rspContext = pRspContext;
  }
6639

6640
  code = pQInfo->code;
6641
  pthread_mutex_unlock(&pQInfo->lock);
6642
  return code;
H
hjxilinx 已提交
6643
}
6644

6645
int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *contLen, bool* continueExec) {
6646 6647
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
6648
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
6649
    return TSDB_CODE_QRY_INVALID_QHANDLE;
6650
  }
6651

6652
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
6653 6654
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  size_t  size = getResultSize(pQInfo, &pQuery->rec.rows);
6655

weixin_48148422's avatar
weixin_48148422 已提交
6656 6657
  size += sizeof(int32_t);
  size += sizeof(STableIdInfo) * taosArrayGetSize(pQInfo->arrTableIdInfo);
6658

S
TD-1057  
Shengliang Guan 已提交
6659
  *contLen = (int32_t)(size + sizeof(SRetrieveTableRsp));
6660

B
Bomin Zhang 已提交
6661 6662
  // todo proper handle failed to allocate memory,
  // current solution only avoid crash, but cannot return error code to client
6663
  *pRsp = (SRetrieveTableRsp *)rpcMallocCont(*contLen);
B
Bomin Zhang 已提交
6664 6665 6666
  if (*pRsp == NULL) {
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
  }
6667

S
TD-1057  
Shengliang Guan 已提交
6668
  (*pRsp)->numOfRows = htonl((int32_t)pQuery->rec.rows);
6669

H
Haojun Liao 已提交
6670
  if (pQInfo->code == TSDB_CODE_SUCCESS) {
6671
    (*pRsp)->offset   = htobe64(pQuery->limit.offset);
6672
    (*pRsp)->useconds = htobe64(pRuntimeEnv->summary.elapsedTime);
6673
  } else {
6674 6675
    (*pRsp)->offset   = 0;
    (*pRsp)->useconds = htobe64(pRuntimeEnv->summary.elapsedTime);
6676
  }
6677

6678
  (*pRsp)->precision = htons(pQuery->precision);
H
Haojun Liao 已提交
6679 6680
  if (pQuery->rec.rows > 0 && pQInfo->code == TSDB_CODE_SUCCESS) {
    doDumpQueryResult(pQInfo, (*pRsp)->data);
6681
  } else {
H
hjxilinx 已提交
6682
    setQueryStatus(pQuery, QUERY_OVER);
6683
  }
6684

6685
  pQInfo->rspContext = NULL;
6686
  pQInfo->dataReady  = QUERY_RESULT_NOT_READY;
6687

H
Haojun Liao 已提交
6688
  if (IS_QUERY_KILLED(pQInfo) || Q_STATUS_EQUAL(pQuery->status, QUERY_OVER)) {
6689
    *continueExec = false;
6690
    (*pRsp)->completed = 1;  // notify no more result to client
6691
  } else {
6692
    *continueExec = true;
6693
    qDebug("QInfo:%p has more results waits for client retrieve", pQInfo);
6694 6695
  }

H
Haojun Liao 已提交
6696
  return pQInfo->code;
6697
}
H
hjxilinx 已提交
6698

6699 6700 6701 6702 6703 6704 6705 6706 6707 6708 6709
int32_t qQueryCompleted(qinfo_t qinfo) {
  SQInfo *pQInfo = (SQInfo *)qinfo;

  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
    return TSDB_CODE_QRY_INVALID_QHANDLE;
  }

  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
  return IS_QUERY_KILLED(pQInfo) || Q_STATUS_EQUAL(pQuery->status, QUERY_OVER);
}

H
Haojun Liao 已提交
6710
int32_t qKillQuery(qinfo_t qinfo) {
H
Haojun Liao 已提交
6711 6712 6713 6714 6715 6716 6717
  SQInfo *pQInfo = (SQInfo *)qinfo;

  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
    return TSDB_CODE_QRY_INVALID_QHANDLE;
  }

  setQueryKilled(pQInfo);
6718 6719 6720

  // Wait for the query executing thread being stopped/
  // Once the query is stopped, the owner of qHandle will be cleared immediately.
6721
  while (pQInfo->owner != 0) {
6722 6723 6724
    taosMsleep(100);
  }

H
Haojun Liao 已提交
6725 6726 6727
  return TSDB_CODE_SUCCESS;
}

6728 6729 6730 6731 6732 6733 6734 6735 6736 6737 6738 6739 6740 6741 6742 6743
static void doSetTagValueToResultBuf(char* output, const char* val, int16_t type, int16_t bytes) {
  if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
    if (val == NULL) {
      setVardataNull(output, type);
    } else {
      memcpy(output, val, varDataTLen(val));
    }
  } else {
    if (val == NULL) {
      setNull(output, type, bytes);
    } else {  // todo here stop will cause client crash
      memcpy(output, val, bytes);
    }
  }
}

H
hjxilinx 已提交
6744 6745 6746
static void buildTagQueryResult(SQInfo* pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
6747

H
Haojun Liao 已提交
6748
  size_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
H
Haojun Liao 已提交
6749
  assert(numOfGroup == 0 || numOfGroup == 1);
6750

H
Haojun Liao 已提交
6751
  if (numOfGroup == 0) {
6752 6753
    return;
  }
6754

H
Haojun Liao 已提交
6755
  SArray* pa = GET_TABLEGROUP(pQInfo, 0);
6756

H
Haojun Liao 已提交
6757
  size_t num = taosArrayGetSize(pa);
6758
  assert(num == pQInfo->tableqinfoGroupInfo.numOfTables);
6759

H
Haojun Liao 已提交
6760
  int32_t count = 0;
6761 6762 6763
  int32_t functionId = pQuery->pSelectExpr[0].base.functionId;
  if (functionId == TSDB_FUNC_TID_TAG) { // return the tags & table Id
    assert(pQuery->numOfOutput == 1);
6764

6765 6766
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[0];
    int32_t rsize = pExprInfo->bytes;
H
Haojun Liao 已提交
6767
    count = 0;
6768

H
Haojun Liao 已提交
6769 6770 6771 6772 6773 6774 6775 6776 6777 6778 6779
    int16_t bytes = pExprInfo->bytes;
    int16_t type = pExprInfo->type;

    for(int32_t i = 0; i < pQuery->numOfTags; ++i) {
      if (pQuery->tagColList[i].colId == pExprInfo->base.colInfo.colId) {
        bytes = pQuery->tagColList[i].bytes;
        type = pQuery->tagColList[i].type;
        break;
      }
    }

H
Haojun Liao 已提交
6780 6781
    while(pQInfo->tableIndex < num && count < pQuery->rec.capacity) {
      int32_t i = pQInfo->tableIndex++;
6782
      STableQueryInfo *item = taosArrayGetP(pa, i);
6783

6784
      char *output = pQuery->sdata[0]->data + count * rsize;
6785
      varDataSetLen(output, rsize - VARSTR_HEADER_SIZE);
6786

6787
      output = varDataVal(output);
H
Haojun Liao 已提交
6788
      STableId* id = TSDB_TABLEID(item->pTable);
6789

6790 6791 6792
      *(int16_t *)output = 0;
      output += sizeof(int16_t);

H
Haojun Liao 已提交
6793 6794
      *(int64_t *)output = id->uid;  // memory align problem, todo serialize
      output += sizeof(id->uid);
6795

H
Haojun Liao 已提交
6796 6797
      *(int32_t *)output = id->tid;
      output += sizeof(id->tid);
6798

6799
      *(int32_t *)output = pQInfo->vgId;
6800
      output += sizeof(pQInfo->vgId);
6801

6802
      if (pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
6803
        char* data = tsdbGetTableName(item->pTable);
6804
        memcpy(output, data, varDataTLen(data));
H
[td-90]  
Haojun Liao 已提交
6805
      } else {
6806 6807
        char* data = tsdbGetTableTagVal(item->pTable, pExprInfo->base.colInfo.colId, type, bytes);
        doSetTagValueToResultBuf(output, data, type, bytes);
H
[td-90]  
Haojun Liao 已提交
6808
      }
6809

H
Haojun Liao 已提交
6810
      count += 1;
6811
    }
6812

6813
    qDebug("QInfo:%p create (tableId, tag) info completed, rows:%d", pQInfo, count);
6814

H
Haojun Liao 已提交
6815 6816 6817 6818
  } else if (functionId == TSDB_FUNC_COUNT) {// handle the "count(tbname)" query
    *(int64_t*) pQuery->sdata[0]->data = num;

    count = 1;
H
Haojun Liao 已提交
6819
    SET_STABLE_QUERY_OVER(pQInfo);
6820
    qDebug("QInfo:%p create count(tbname) query, res:%d rows:1", pQInfo, count);
6821
  } else {  // return only the tags|table name etc.
H
Haojun Liao 已提交
6822
    count = 0;
H
Haojun Liao 已提交
6823
    SSchema tbnameSchema = tGetTableNameColumnSchema();
6824

S
TD-1057  
Shengliang Guan 已提交
6825
    int32_t maxNumOfTables = (int32_t)pQuery->rec.capacity;
6826
    if (pQuery->limit.limit >= 0 && pQuery->limit.limit < pQuery->rec.capacity) {
S
TD-1057  
Shengliang Guan 已提交
6827
      maxNumOfTables = (int32_t)pQuery->limit.limit;
6828 6829
    }

6830
    while(pQInfo->tableIndex < num && count < maxNumOfTables) {
H
Haojun Liao 已提交
6831
      int32_t i = pQInfo->tableIndex++;
6832

6833 6834 6835 6836 6837 6838
      // discard current result due to offset
      if (pQuery->limit.offset > 0) {
        pQuery->limit.offset -= 1;
        continue;
      }

6839
      SExprInfo* pExprInfo = pQuery->pSelectExpr;
6840
      STableQueryInfo* item = taosArrayGetP(pa, i);
6841

6842 6843
      char *data = NULL, *dst = NULL;
      int16_t type = 0, bytes = 0;
6844
      for(int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
6845 6846 6847 6848
        // not assign value in case of user defined constant output column
        if (pExprInfo[j].base.colInfo.flag == TSDB_COL_UDC) {
          continue;
        }
6849

6850
        if (pExprInfo[j].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
6851 6852 6853 6854 6855 6856 6857 6858
          bytes = tbnameSchema.bytes;
          type = tbnameSchema.type;

          data = tsdbGetTableName(item->pTable);
          dst = pQuery->sdata[j]->data + count * tbnameSchema.bytes;
        } else {
          type = pExprInfo[j].type;
          bytes = pExprInfo[j].bytes;
6859

6860 6861
          data = tsdbGetTableTagVal(item->pTable, pExprInfo[j].base.colInfo.colId, type, bytes);
          dst = pQuery->sdata[j]->data + count * pExprInfo[j].bytes;
6862

6863
        }
6864 6865

        doSetTagValueToResultBuf(dst, data, type, bytes);
H
hjxilinx 已提交
6866
      }
H
Haojun Liao 已提交
6867
      count += 1;
H
hjxilinx 已提交
6868
    }
6869

6870
    qDebug("QInfo:%p create tag values results completed, rows:%d", pQInfo, count);
H
hjxilinx 已提交
6871
  }
6872

H
Haojun Liao 已提交
6873
  pQuery->rec.rows = count;
H
hjxilinx 已提交
6874
  setQueryStatus(pQuery, QUERY_COMPLETED);
H
hjxilinx 已提交
6875 6876
}

6877 6878 6879 6880 6881 6882 6883
void* qGetResultRetrieveMsg(qinfo_t qinfo) {
  SQInfo* pQInfo = (SQInfo*) qinfo;
  assert(pQInfo != NULL);

  return pQInfo->rspContext;
}

6884 6885 6886 6887 6888 6889 6890
void freeqinfoFn(void *qhandle) {
  void** handle = qhandle;
  if (handle == NULL || *handle == NULL) {
    return;
  }

  qKillQuery(*handle);
H
Haojun Liao 已提交
6891
  qDestroyQueryInfo(*handle);
6892 6893 6894
}

void* qOpenQueryMgmt(int32_t vgId) {
H
Haojun Liao 已提交
6895
  const int32_t REFRESH_HANDLE_INTERVAL = 30; // every 30 seconds, refresh handle pool
6896 6897 6898 6899

  char cacheName[128] = {0};
  sprintf(cacheName, "qhandle_%d", vgId);

6900
  SQueryMgmt* pQueryMgmt = calloc(1, sizeof(SQueryMgmt));
H
Haojun Liao 已提交
6901 6902 6903 6904
  if (pQueryMgmt == NULL) {
    terrno = TSDB_CODE_QRY_OUT_OF_MEMORY;
    return NULL;
  }
6905

6906 6907 6908 6909 6910
  pQueryMgmt->qinfoPool = taosCacheInit(TSDB_DATA_TYPE_BIGINT, REFRESH_HANDLE_INTERVAL, true, freeqinfoFn, cacheName);
  pQueryMgmt->closed    = false;
  pQueryMgmt->vgId      = vgId;

  pthread_mutex_init(&pQueryMgmt->lock, NULL);
6911 6912

  qDebug("vgId:%d, open querymgmt success", vgId);
6913
  return pQueryMgmt;
6914 6915
}

H
Haojun Liao 已提交
6916
static void queryMgmtKillQueryFn(void* handle) {
H
Hui Li 已提交
6917 6918
  void** fp = (void**)handle;
  qKillQuery(*fp);
H
Haojun Liao 已提交
6919 6920 6921
}

void qQueryMgmtNotifyClosed(void* pQMgmt) {
6922 6923 6924 6925 6926 6927 6928
  if (pQMgmt == NULL) {
    return;
  }

  SQueryMgmt* pQueryMgmt = pQMgmt;
  qDebug("vgId:%d, set querymgmt closed, wait for all queries cancelled", pQueryMgmt->vgId);

H
Haojun Liao 已提交
6929
//  pthread_mutex_lock(&pQueryMgmt->lock);
6930
  pQueryMgmt->closed = true;
H
Haojun Liao 已提交
6931
//  pthread_mutex_unlock(&pQueryMgmt->lock);
6932

H
Haojun Liao 已提交
6933
  taosCacheRefresh(pQueryMgmt->qinfoPool, queryMgmtKillQueryFn);
6934 6935 6936 6937 6938 6939 6940 6941 6942 6943 6944 6945 6946 6947 6948 6949 6950
}

void qCleanupQueryMgmt(void* pQMgmt) {
  if (pQMgmt == NULL) {
    return;
  }

  SQueryMgmt* pQueryMgmt = pQMgmt;
  int32_t vgId = pQueryMgmt->vgId;

  assert(pQueryMgmt->closed);

  SCacheObj* pqinfoPool = pQueryMgmt->qinfoPool;
  pQueryMgmt->qinfoPool = NULL;

  taosCacheCleanup(pqinfoPool);
  pthread_mutex_destroy(&pQueryMgmt->lock);
S
Shengliang Guan 已提交
6951
  taosTFree(pQueryMgmt);
6952

6953
  qDebug("vgId:%d queryMgmt cleanup completed", vgId);
6954 6955
}

6956
void** qRegisterQInfo(void* pMgmt, uint64_t qInfo) {
6957 6958 6959 6960
  if (pMgmt == NULL) {
    return NULL;
  }

6961
  const int32_t DEFAULT_QHANDLE_LIFE_SPAN = tsShellActivityTimer * 2 * 1000;
6962

6963 6964
  SQueryMgmt *pQueryMgmt = pMgmt;
  if (pQueryMgmt->qinfoPool == NULL) {
6965
    qError("QInfo:%p failed to add qhandle into qMgmt, since qMgmt is closed", (void *)qInfo);
6966 6967 6968
    return NULL;
  }

H
Haojun Liao 已提交
6969
//  pthread_mutex_lock(&pQueryMgmt->lock);
6970
  if (pQueryMgmt->closed) {
H
Haojun Liao 已提交
6971
//    pthread_mutex_unlock(&pQueryMgmt->lock);
6972
    qError("QInfo:%p failed to add qhandle into cache, since qMgmt is colsing", (void *)qInfo);
6973 6974
    return NULL;
  } else {
6975 6976 6977
    uint64_t handleVal = (uint64_t) qInfo;

    void** handle = taosCachePut(pQueryMgmt->qinfoPool, &handleVal, sizeof(int64_t), &qInfo, POINTER_BYTES, DEFAULT_QHANDLE_LIFE_SPAN);
H
Haojun Liao 已提交
6978
//    pthread_mutex_unlock(&pQueryMgmt->lock);
6979 6980 6981 6982 6983

    return handle;
  }
}

6984
void** qAcquireQInfo(void* pMgmt, uint64_t key) {
6985 6986 6987 6988 6989 6990
  SQueryMgmt *pQueryMgmt = pMgmt;

  if (pQueryMgmt->qinfoPool == NULL || pQueryMgmt->closed) {
    return NULL;
  }

6991
  void** handle = taosCacheAcquireByKey(pQueryMgmt->qinfoPool, &key, sizeof(uint64_t));
6992 6993 6994 6995 6996 6997 6998
  if (handle == NULL || *handle == NULL) {
    return NULL;
  } else {
    return handle;
  }
}

H
Haojun Liao 已提交
6999
void** qReleaseQInfo(void* pMgmt, void* pQInfo, bool freeHandle) {
7000 7001 7002 7003 7004
  SQueryMgmt *pQueryMgmt = pMgmt;
  if (pQueryMgmt->qinfoPool == NULL) {
    return NULL;
  }

H
Haojun Liao 已提交
7005
  taosCacheRelease(pQueryMgmt->qinfoPool, pQInfo, freeHandle);
7006 7007 7008
  return 0;
}

7009