qExecutor.c 233.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
#include "os.h"
H
Haojun Liao 已提交
16 17
#include "qFill.h"
#include "taosmsg.h"
18 19
#include "tcache.h"
#include "tglobal.h"
20

H
Haojun Liao 已提交
21
#include "exception.h"
22
#include "hash.h"
H
Haojun Liao 已提交
23 24 25 26
#include "qAst.h"
#include "qExecutor.h"
#include "qResultbuf.h"
#include "qUtil.h"
H
hjxilinx 已提交
27
#include "query.h"
S
slguan 已提交
28
#include "queryLog.h"
29
#include "tlosertree.h"
30

H
Haojun Liao 已提交
31
#define MAX_ROWS_PER_RESBUF_PAGE  ((1u<<12) - 1)
32 33 34 35 36

/**
 * check if the primary column is load by default, otherwise, the program will
 * forced to load primary column explicitly.
 */
37 38
#define Q_STATUS_EQUAL(p, s)  (((p) & (s)) != 0)
#define TSDB_COL_IS_TAG(f)    (((f)&TSDB_COL_TAG) != 0)
H
Haojun Liao 已提交
39 40 41
#define TSDB_COL_IS_NORMAL_COL(f)    ((f) == TSDB_COL_NORMAL)
#define TSDB_COL_IS_UD_COL(f)   ((f) == TSDB_COL_UDC)

42 43
#define QUERY_IS_ASC_QUERY(q) (GET_FORWARD_DIRECTION_FACTOR((q)->order.order) == QUERY_ASC_FORWARD_STEP)

44
#define IS_MASTER_SCAN(runtime)        ((runtime)->scanFlag == MASTER_SCAN)
H
hjxilinx 已提交
45
#define IS_REVERSE_SCAN(runtime)       ((runtime)->scanFlag == REVERSE_SCAN)
46
#define SET_MASTER_SCAN_FLAG(runtime)  ((runtime)->scanFlag = MASTER_SCAN)
H
hjxilinx 已提交
47
#define SET_REVERSE_SCAN_FLAG(runtime) ((runtime)->scanFlag = REVERSE_SCAN)
48

H
Haojun Liao 已提交
49
#define GET_QINFO_ADDR(x) ((SQInfo *)((char *)(x)-offsetof(SQInfo, runtimeEnv)))
50

51
#define GET_COL_DATA_POS(query, index, step) ((query)->pos + (index) * (step))
52
#define SWITCH_ORDER(n) (((n) = ((n) == TSDB_ORDER_ASC) ? TSDB_ORDER_DESC : TSDB_ORDER_ASC))
53

H
Haojun Liao 已提交
54 55
#define SDATA_BLOCK_INITIALIZER (SDataBlockInfo) {{0}, 0}

S
TD-1057  
Shengliang Guan 已提交
56 57 58 59 60
#define TIME_WINDOW_COPY(_dst, _src)  do {\
   _dst.skey = _src.skey;\
   _dst.ekey = _src.ekey;\
} while (0);

61
enum {
H
hjxilinx 已提交
62
  // when query starts to execute, this status will set
63 64
  QUERY_NOT_COMPLETED = 0x1u,

H
hjxilinx 已提交
65 66
  /* result output buffer is full, current query is paused.
   * this status is only exist in group-by clause and diff/add/division/multiply/ query.
67
   */
68 69
  QUERY_RESBUF_FULL = 0x2u,

H
hjxilinx 已提交
70 71 72
  /* query is over
   * 1. this status is used in one row result query process, e.g., count/sum/first/last/ avg...etc.
   * 2. when all data within queried time window, it is also denoted as query_completed
73
   */
74
  QUERY_COMPLETED = 0x4u,
75

H
hjxilinx 已提交
76 77
  /* when the result is not completed return to client, this status will be
   * usually used in case of interval query with interpolation option
78
   */
79
  QUERY_OVER = 0x8u,
80
};
81 82

enum {
83 84
  TS_JOIN_TS_EQUAL       = 0,
  TS_JOIN_TS_NOT_EQUALS  = 1,
85 86 87
  TS_JOIN_TAG_NOT_EQUALS = 2,
};

88
typedef struct {
89 90 91 92 93 94
  int32_t     status;       // query status
  TSKEY       lastKey;      // the lastKey value before query executed
  STimeWindow w;            // whole query time window
  STimeWindow curWindow;    // current query window
  int32_t     windowIndex;  // index of active time window result for interval query
  STSCursor   cur;
95 96
} SQueryStatusInfo;

H
Haojun Liao 已提交
97
#if 0
H
Haojun Liao 已提交
98
static UNUSED_FUNC void *u_malloc (size_t __size) {
H
Haojun Liao 已提交
99
  uint32_t v = rand();
H
Haojun Liao 已提交
100 101

  if (v % 1000 <= 0) {
H
Haojun Liao 已提交
102 103
    return NULL;
  } else {
H
Haojun Liao 已提交
104
    return malloc(__size);
H
Haojun Liao 已提交
105
  }
H
Haojun Liao 已提交
106 107
}

H
Haojun Liao 已提交
108 109
static UNUSED_FUNC void* u_calloc(size_t num, size_t __size) {
  uint32_t v = rand();
H
Haojun Liao 已提交
110
  if (v % 1000 <= 0) {
H
Haojun Liao 已提交
111 112 113 114 115 116
    return NULL;
  } else {
    return calloc(num, __size);
  }
}

H
Haojun Liao 已提交
117 118 119 120 121 122 123 124 125
static UNUSED_FUNC void* u_realloc(void* p, size_t __size) {
  uint32_t v = rand();
  if (v % 5 <= 1) {
    return NULL;
  } else {
    return realloc(p, __size);
  }
}

H
Haojun Liao 已提交
126
#define calloc  u_calloc
H
Haojun Liao 已提交
127
#define malloc  u_malloc
H
Haojun Liao 已提交
128
#define realloc u_realloc
H
Haojun Liao 已提交
129
#endif
H
Haojun Liao 已提交
130

131
#define CLEAR_QUERY_STATUS(q, st)   ((q)->status &= (~(st)))
H
Haojun Liao 已提交
132 133 134
#define GET_NUM_OF_TABLEGROUP(q)    taosArrayGetSize((q)->tableqinfoGroupInfo.pGroupList)
#define GET_TABLEGROUP(q, _index)   ((SArray*) taosArrayGetP((q)->tableqinfoGroupInfo.pGroupList, (_index)))

135
static void setQueryStatus(SQuery *pQuery, int8_t status);
H
Haojun Liao 已提交
136
static void finalizeQueryResult(SQueryRuntimeEnv *pRuntimeEnv);
137

H
Haojun Liao 已提交
138
#define QUERY_IS_INTERVAL_QUERY(_q) ((_q)->intervalTime > 0)
139

H
Haojun Liao 已提交
140 141 142 143 144 145 146 147
// previous time window may not be of the same size of pQuery->intervalTime
#define GET_NEXT_TIMEWINDOW(_q, tw)                                   \
  do {                                                                \
    int32_t factor = GET_FORWARD_DIRECTION_FACTOR((_q)->order.order); \
    (tw)->skey += ((_q)->slidingTime * factor);                       \
    (tw)->ekey = (tw)->skey + ((_q)->intervalTime - 1);               \
  } while (0)

148 149
#define SET_STABLE_QUERY_OVER(_q) ((_q)->tableIndex = (int32_t)((_q)->tableqinfoGroupInfo.numOfTables))
#define IS_STASBLE_QUERY_OVER(_q) ((_q)->tableIndex >= (int32_t)((_q)->tableqinfoGroupInfo.numOfTables))
H
Haojun Liao 已提交
150

H
hjxilinx 已提交
151
// todo move to utility
152
static int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *group);
153

H
hjxilinx 已提交
154
static void setWindowResOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult);
H
Haojun Liao 已提交
155
static void setWindowResOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult);
156 157
static void resetMergeResultBuf(SQuery *pQuery, SQLFunctionCtx *pCtx, SResultInfo *pResultInfo);
static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId);
158

159 160 161
static void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
                          SDataStatis *pStatis, void *param, int32_t colIndex);

162
static void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
H
Haojun Liao 已提交
163
static void destroyTableQueryInfo(STableQueryInfo *pTableQueryInfo);
164 165
static void resetCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
static bool hasMainOutput(SQuery *pQuery);
H
hjxilinx 已提交
166
static void buildTagQueryResult(SQInfo *pQInfo);
167

168
static int32_t setAdditionalInfo(SQInfo *pQInfo, void *pTable, STableQueryInfo *pTableQueryInfo);
H
Haojun Liao 已提交
169
static int32_t flushFromResultBuf(SQueryRuntimeEnv* pRuntimeEnv, SGroupResInfo* pGroupResInfo);
170

171
bool doFilterData(SQuery *pQuery, int32_t elemPos) {
172 173
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
174

S
TD-1057  
Shengliang Guan 已提交
175
    char *pElem = (char*)pFilterInfo->pData + pFilterInfo->info.bytes * elemPos;
176
    if (isNull(pElem, pFilterInfo->info.type)) {
177 178
      return false;
    }
179

180 181
    bool qualified = false;
    for (int32_t j = 0; j < pFilterInfo->numOfFilters; ++j) {
182
      SColumnFilterElem *pFilterElem = &pFilterInfo->pFilters[j];
183

184 185 186 187 188
      if (pFilterElem->fp(pFilterElem, pElem, pElem)) {
        qualified = true;
        break;
      }
    }
189

190 191 192 193
    if (!qualified) {
      return false;
    }
  }
194

195 196 197 198 199 200
  return true;
}

int64_t getNumOfResult(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  bool    hasMainFunction = hasMainOutput(pQuery);
201

202
  int64_t maxOutput = 0;
203
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
204
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
205

206 207 208 209 210 211 212 213
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (hasMainFunction &&
        (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ)) {
      continue;
    }
214

215 216 217 218 219
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
    if (pResInfo != NULL && maxOutput < pResInfo->numOfRes) {
      maxOutput = pResInfo->numOfRes;
    }
  }
220

221
  assert(maxOutput >= 0);
222 223 224
  return maxOutput;
}

225 226 227 228 229
/*
 * the value of number of result needs to be update due to offset value upated.
 */
void updateNumOfResult(SQueryRuntimeEnv *pRuntimeEnv, int32_t numOfRes) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
230

231 232
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
233

H
Haojun Liao 已提交
234 235 236 237 238
    int16_t functionId = pRuntimeEnv->pCtx[j].functionId;
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ ||
        functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
239

H
Haojun Liao 已提交
240
    assert(pResInfo->numOfRes > numOfRes);
241 242 243 244
    pResInfo->numOfRes = numOfRes;
  }
}

H
Haojun Liao 已提交
245
static UNUSED_FUNC int32_t getGroupResultId(int32_t groupIndex) {
H
Haojun Liao 已提交
246
  int32_t base = 20000000;
247 248 249 250 251 252 253
  return base + (groupIndex * 10000);
}

bool isGroupbyNormalCol(SSqlGroupbyExpr *pGroupbyExpr) {
  if (pGroupbyExpr == NULL || pGroupbyExpr->numOfGroupCols == 0) {
    return false;
  }
254

255
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
256
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
257
    if (pColIndex->flag == TSDB_COL_NORMAL) {
258
      //make sure the normal column locates at the second position if tbname exists in group by clause
259
      if (pGroupbyExpr->numOfGroupCols > 1) {
260
        assert(pColIndex->colIndex > 0);
261
      }
262

263 264 265
      return true;
    }
  }
266

267 268 269 270 271
  return false;
}

int16_t getGroupbyColumnType(SQuery *pQuery, SSqlGroupbyExpr *pGroupbyExpr) {
  assert(pGroupbyExpr != NULL);
272

273 274
  int32_t colId = -2;
  int16_t type = TSDB_DATA_TYPE_NULL;
275

276
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
277
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
278 279 280 281 282
    if (pColIndex->flag == TSDB_COL_NORMAL) {
      colId = pColIndex->colId;
      break;
    }
  }
283

284
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
285 286
    if (colId == pQuery->colList[i].colId) {
      type = pQuery->colList[i].type;
287 288 289
      break;
    }
  }
290

291 292 293 294 295 296
  return type;
}

bool isSelectivityWithTagsQuery(SQuery *pQuery) {
  bool    hasTags = false;
  int32_t numOfSelectivity = 0;
297

298
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
299
    int32_t functId = pQuery->pSelectExpr[i].base.functionId;
300 301 302 303
    if (functId == TSDB_FUNC_TAG_DUMMY || functId == TSDB_FUNC_TS_DUMMY) {
      hasTags = true;
      continue;
    }
304

305 306 307 308
    if ((aAggs[functId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
      numOfSelectivity++;
    }
  }
309

310 311 312
  if (numOfSelectivity > 0 && hasTags) {
    return true;
  }
313

314 315 316
  return false;
}

317 318 319 320 321 322 323 324 325 326 327
bool isProjQuery(SQuery *pQuery) {
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    int32_t functId = pQuery->pSelectExpr[i].base.functionId;
    if (functId != TSDB_FUNC_PRJ && functId != TSDB_FUNC_TAGPRJ) {
      return false;
    }
  }

  return true;
}

328
bool isTSCompQuery(SQuery *pQuery) { return pQuery->pSelectExpr[0].base.functionId == TSDB_FUNC_TS_COMP; }
329

330 331 332
static bool limitResults(SQueryRuntimeEnv* pRuntimeEnv) {
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery* pQuery = pRuntimeEnv->pQuery;
333

334 335
  if ((pQuery->limit.limit > 0) && (pQuery->rec.total + pQuery->rec.rows > pQuery->limit.limit)) {
    pQuery->rec.rows = pQuery->limit.limit - pQuery->rec.total;
336

337
    qDebug("QInfo:%p discard remain data due to result limitation, limit:%"PRId64", current return:%" PRId64 ", total:%"PRId64,
338 339
        pQInfo, pQuery->limit.limit, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
    assert(pQuery->rec.rows >= 0);
340 341 342
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return true;
  }
343

344 345 346 347
  return false;
}

static bool isTopBottomQuery(SQuery *pQuery) {
348
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
349
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
350 351 352
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
353

354 355 356 357
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      return true;
    }
  }
358

359 360 361
  return false;
}

H
Haojun Liao 已提交
362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379
static bool hasTagValOutput(SQuery* pQuery) {
  SExprInfo *pExprInfo = &pQuery->pSelectExpr[0];
  if (pQuery->numOfOutput == 1 && pExprInfo->base.functionId == TSDB_FUNC_TS_COMP) {
    return true;
  } else {  // set tag value, by which the results are aggregated.
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
      SExprInfo *pLocalExprInfo = &pQuery->pSelectExpr[idx];

      // ts_comp column required the tag value for join filter
      if (TSDB_COL_IS_TAG(pLocalExprInfo->base.colInfo.flag)) {
        return true;
      }
    }
  }

  return false;
}

380 381 382 383 384 385 386 387
/**
 * @param pQuery
 * @param col
 * @param pDataBlockInfo
 * @param pStatis
 * @param pColStatis
 * @return
 */
H
Haojun Liao 已提交
388
static bool hasNullValue(SColIndex* pColIndex, SDataStatis *pStatis, SDataStatis **pColStatis) {
H
Haojun Liao 已提交
389
  if (pStatis != NULL && TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
H
Haojun Liao 已提交
390 391
    *pColStatis = &pStatis[pColIndex->colIndex];
    assert((*pColStatis)->colId == pColIndex->colId);
H
hjxilinx 已提交
392 393
  } else {
    *pColStatis = NULL;
394
  }
395

H
Haojun Liao 已提交
396
  if (TSDB_COL_IS_TAG(pColIndex->flag) || TSDB_COL_IS_UD_COL(pColIndex->flag) || pColIndex->colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
H
Haojun Liao 已提交
397 398 399
    return false;
  }

400 401 402
  if ((*pColStatis) != NULL && (*pColStatis)->numOfNull == 0) {
    return false;
  }
403

404 405 406 407
  return true;
}

static SWindowResult *doSetTimeWindowFromKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, char *pData,
408
                                             int16_t bytes, bool masterscan) {
409
  SQuery *pQuery = pRuntimeEnv->pQuery;
410

411
  int32_t *p1 = (int32_t *) taosHashGet(pWindowResInfo->hashList, pData, bytes);
412 413
  if (p1 != NULL) {
    pWindowResInfo->curIndex = *p1;
414
  } else {
H
Haojun Liao 已提交
415 416 417
    if (!masterscan) {  // not master scan, do not add new timewindow
      return NULL;
    }
418

H
Haojun Liao 已提交
419 420
    // more than the capacity, reallocate the resources
    if (pWindowResInfo->size >= pWindowResInfo->capacity) {
421 422
      int64_t newCap = 0;
      if (pWindowResInfo->capacity > 10000) {
S
Shengliang Guan 已提交
423
        newCap = (int64_t)(pWindowResInfo->capacity * 1.25);
424
      } else {
S
Shengliang Guan 已提交
425
        newCap = (int64_t)(pWindowResInfo->capacity * 1.5);
426 427
      }

428
      char *t = realloc(pWindowResInfo->pResult, (size_t)(newCap * sizeof(SWindowResult)));
429
      pRuntimeEnv->summary.internalSupSize += (newCap - pWindowResInfo->capacity) * sizeof(SWindowResult);
430
      pRuntimeEnv->summary.numOfTimeWindows += (newCap - pWindowResInfo->capacity);
431

432 433
      if (t == NULL) {
        longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
434
      }
435

436 437
      pWindowResInfo->pResult = (SWindowResult *)t;

S
TD-1057  
Shengliang Guan 已提交
438
      int32_t inc = (int32_t)newCap - pWindowResInfo->capacity;
439 440
      memset(&pWindowResInfo->pResult[pWindowResInfo->capacity], 0, sizeof(SWindowResult) * inc);

441 442
      pRuntimeEnv->summary.internalSupSize += (pQuery->numOfOutput * sizeof(SResultInfo) + pRuntimeEnv->interBufSize) * inc;

H
Haojun Liao 已提交
443
      for (int32_t i = pWindowResInfo->capacity; i < newCap; ++i) {
H
Haojun Liao 已提交
444 445 446 447
        int32_t ret = createQueryResultInfo(pQuery, &pWindowResInfo->pResult[i], pRuntimeEnv->stableQuery, pRuntimeEnv->interBufSize);
        if (ret != TSDB_CODE_SUCCESS) {
          longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
        }
H
Haojun Liao 已提交
448 449
      }

S
TD-1057  
Shengliang Guan 已提交
450
      pWindowResInfo->capacity = (int32_t)newCap;
451
    }
H
Haojun Liao 已提交
452 453 454 455

    // add a new result set for a new group
    pWindowResInfo->curIndex = pWindowResInfo->size++;
    taosHashPut(pWindowResInfo->hashList, pData, bytes, (char *)&pWindowResInfo->curIndex, sizeof(int32_t));
456
  }
457

458 459 460 461 462
  // too many time window in query
  if (pWindowResInfo->size > MAX_INTERVAL_TIME_WINDOW) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_TOO_MANY_TIMEWINDOW);
  }

463 464 465 466 467 468
  return getWindowResult(pWindowResInfo, pWindowResInfo->curIndex);
}

// get the correct time window according to the handled timestamp
static STimeWindow getActiveTimeWindow(SWindowResInfo *pWindowResInfo, int64_t ts, SQuery *pQuery) {
  STimeWindow w = {0};
469

470 471 472 473
  if (pWindowResInfo->curIndex == -1) {  // the first window, from the previous stored value
    w.skey = pWindowResInfo->prevSKey;
    w.ekey = w.skey + pQuery->intervalTime - 1;
  } else {
474 475 476
    int32_t slot = curTimeWindowIndex(pWindowResInfo);
    SWindowResult* pWindowRes = getWindowResult(pWindowResInfo, slot);
    w = GET_TIMEWINDOW(pWindowResInfo, pWindowRes);
477
  }
478

479 480
  if (w.skey > ts || w.ekey < ts) {
    int64_t st = w.skey;
481

482 483 484
    if (st > ts) {
      st -= ((st - ts + pQuery->slidingTime - 1) / pQuery->slidingTime) * pQuery->slidingTime;
    }
485

486 487 488 489
    int64_t et = st + pQuery->intervalTime - 1;
    if (et < ts) {
      st += ((ts - et + pQuery->slidingTime - 1) / pQuery->slidingTime) * pQuery->slidingTime;
    }
490

491 492 493
    w.skey = st;
    w.ekey = w.skey + pQuery->intervalTime - 1;
  }
494

495 496 497 498 499 500 501
  /*
   * query border check, skey should not be bounded by the query time range, since the value skey will
   * be used as the time window index value. So we only change ekey of time window accordingly.
   */
  if (w.ekey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) {
    w.ekey = pQuery->window.ekey;
  }
502

503
  assert(ts >= w.skey && ts <= w.ekey);
504

505 506 507 508 509 510 511 512
  return w;
}

static int32_t addNewWindowResultBuf(SWindowResult *pWindowRes, SDiskbasedResultBuf *pResultBuf, int32_t sid,
                                     int32_t numOfRowsPerPage) {
  if (pWindowRes->pos.pageId != -1) {
    return 0;
  }
513

514
  tFilePage *pData = NULL;
515

516 517 518
  // in the first scan, new space needed for results
  int32_t pageId = -1;
  SIDList list = getDataBufPagesIdList(pResultBuf, sid);
519

H
Haojun Liao 已提交
520
  if (taosArrayGetSize(list) == 0) {
521 522
    pData = getNewDataBuf(pResultBuf, sid, &pageId);
  } else {
H
Haojun Liao 已提交
523 524 525
    SPageInfo* pi = getLastPageInfo(list);
    pData = getResBufPage(pResultBuf, pi->pageId);
    pageId = pi->pageId;
526

527
    if (pData->num >= numOfRowsPerPage) {
H
Haojun Liao 已提交
528 529 530
      // release current page first, and prepare the next one
      releaseResBufPageInfo(pResultBuf, pi);

531 532
      pData = getNewDataBuf(pResultBuf, sid, &pageId);
      if (pData != NULL) {
533
        assert(pData->num == 0);  // number of elements must be 0 for new allocated buffer
534 535 536
      }
    }
  }
537

538 539 540
  if (pData == NULL) {
    return -1;
  }
541

542 543 544
  // set the number of rows in current disk page
  if (pWindowRes->pos.pageId == -1) {  // not allocated yet, allocate new buffer
    pWindowRes->pos.pageId = pageId;
S
TD-1057  
Shengliang Guan 已提交
545
    pWindowRes->pos.rowId = (int32_t)(pData->num++);
H
Haojun Liao 已提交
546 547

    assert(pWindowRes->pos.pageId >= 0);
548
  }
549

550 551 552 553
  return 0;
}

static int32_t setWindowOutputBufByKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, int32_t sid,
554
                                       STimeWindow *win, bool masterscan, bool* newWind) {
555 556
  assert(win->skey <= win->ekey);
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
557

558 559
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&win->skey,
      TSDB_KEYSIZE, masterscan);
560
  if (pWindowRes == NULL) {
561 562 563
    *newWind = false;

    return masterscan? -1:0;
564
  }
565

566
  *newWind = true;
H
Haojun Liao 已提交
567

568 569 570
  // not assign result buffer yet, add new result buffer
  if (pWindowRes->pos.pageId == -1) {
    int32_t ret = addNewWindowResultBuf(pWindowRes, pResultBuf, sid, pRuntimeEnv->numOfRowsPerPage);
H
Haojun Liao 已提交
571
    if (ret != TSDB_CODE_SUCCESS) {
572 573 574
      return -1;
    }
  }
575

576
  // set time window for current result
577
  pWindowRes->skey = win->skey;
578

H
Haojun Liao 已提交
579
  setWindowResOutputBufInitCtx(pRuntimeEnv, pWindowRes);
580 581 582
  return TSDB_CODE_SUCCESS;
}

583
static bool getTimeWindowResStatus(SWindowResInfo *pWindowResInfo, int32_t slot) {
584
  assert(slot >= 0 && slot < pWindowResInfo->size);
585
  return pWindowResInfo->pResult[slot].closed;
586 587
}

H
Haojun Liao 已提交
588
static FORCE_INLINE int32_t getForwardStepsInBlock(int32_t numOfRows, __block_search_fn_t searchFn, TSKEY ekey, int16_t pos,
589 590
                                      int16_t order, int64_t *pData) {
  int32_t forwardStep = 0;
591

H
Haojun Liao 已提交
592 593 594 595
  if (order == TSDB_ORDER_ASC) {
    int32_t end = searchFn((char*) &pData[pos], numOfRows - pos, ekey, order);
    if (end >= 0) {
      forwardStep = end;
596

H
Haojun Liao 已提交
597 598 599 600 601 602 603 604 605 606 607 608
      if (pData[end + pos] == ekey) {
        forwardStep += 1;
      }
    }
  } else {
    int32_t end = searchFn((char *)pData, pos + 1, ekey, order);
    if (end >= 0) {
      forwardStep = pos - end;

      if (pData[end] == ekey) {
        forwardStep += 1;
      }
609 610
    }
  }
611

H
Haojun Liao 已提交
612
  assert(forwardStep > 0);
613 614 615 616 617 618
  return forwardStep;
}

/**
 * NOTE: the query status only set for the first scan of master scan.
 */
619
static int32_t doCheckQueryCompleted(SQueryRuntimeEnv *pRuntimeEnv, TSKEY lastKey, SWindowResInfo *pWindowResInfo) {
620
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
621
  if (pRuntimeEnv->scanFlag != MASTER_SCAN || (!QUERY_IS_INTERVAL_QUERY(pQuery))) {
622
    return pWindowResInfo->size;
623
  }
624

625
  // no qualified results exist, abort check
626
  int32_t numOfClosed = 0;
627

628
  if (pWindowResInfo->size == 0) {
629
    return pWindowResInfo->size;
630
  }
631

632
  // query completed
H
hjxilinx 已提交
633 634
  if ((lastKey >= pQuery->current->win.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (lastKey <= pQuery->current->win.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
635
    closeAllTimeWindow(pWindowResInfo);
636

637 638 639 640
    pWindowResInfo->curIndex = pWindowResInfo->size - 1;
    setQueryStatus(pQuery, QUERY_COMPLETED | QUERY_RESBUF_FULL);
  } else {  // set the current index to be the last unclosed window
    int32_t i = 0;
641
    int64_t skey = TSKEY_INITIAL_VAL;
642

643 644
    for (i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *pResult = &pWindowResInfo->pResult[i];
645
      if (pResult->closed) {
646
        numOfClosed += 1;
647 648
        continue;
      }
649

650 651 652
      TSKEY ekey = pResult->skey + pWindowResInfo->interval;
      if ((ekey <= lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (pResult->skey >= lastKey && !QUERY_IS_ASC_QUERY(pQuery))) {
653 654
        closeTimeWindow(pWindowResInfo, i);
      } else {
655
        skey = pResult->skey;
656 657 658
        break;
      }
    }
659

660
    // all windows are closed, set the last one to be the skey
661
    if (skey == TSKEY_INITIAL_VAL) {
662 663 664 665 666
      assert(i == pWindowResInfo->size);
      pWindowResInfo->curIndex = pWindowResInfo->size - 1;
    } else {
      pWindowResInfo->curIndex = i;
    }
667

668
    pWindowResInfo->prevSKey = pWindowResInfo->pResult[pWindowResInfo->curIndex].skey;
669

670 671
    // the number of completed slots are larger than the threshold, return current generated results to client.
    if (numOfClosed > pWindowResInfo->threshold) {
672
      qDebug("QInfo:%p total result window:%d closed:%d, reached the output threshold %d, return",
673
          GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size, numOfClosed, pQuery->rec.threshold);
674

675
      setQueryStatus(pQuery, QUERY_RESBUF_FULL);
676
    } else {
677
      qDebug("QInfo:%p total result window:%d already closed:%d", GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size,
678
             numOfClosed);
679 680
    }
  }
681

682 683 684 685 686
  // output has reached the limitation, set query completed
  if (pQuery->limit.limit > 0 && (pQuery->limit.limit + pQuery->limit.offset) <= numOfClosed &&
      pRuntimeEnv->scanFlag == MASTER_SCAN) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
687

688
  assert(pWindowResInfo->prevSKey != TSKEY_INITIAL_VAL);
689
  return numOfClosed;
690 691 692
}

static int32_t getNumOfRowsInTimeWindow(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo, TSKEY *pPrimaryColumn,
H
hjxilinx 已提交
693
                                        int32_t startPos, TSKEY ekey, __block_search_fn_t searchFn, bool updateLastKey) {
694
  assert(startPos >= 0 && startPos < pDataBlockInfo->rows);
695

696 697 698
  int32_t num = -1;
  int32_t order = pQuery->order.order;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(order);
699

H
hjxilinx 已提交
700
  STableQueryInfo* item = pQuery->current;
701

702 703
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (ekey < pDataBlockInfo->window.ekey) {
704
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
H
Haojun Liao 已提交
705 706
      if (updateLastKey) { // update the last key
        item->lastKey = pPrimaryColumn[startPos + (num - 1)] + step;
707 708
      }
    } else {
709
      num = pDataBlockInfo->rows - startPos;
710
      if (updateLastKey) {
H
hjxilinx 已提交
711
        item->lastKey = pDataBlockInfo->window.ekey + step;
712 713 714 715
      }
    }
  } else {  // desc
    if (ekey > pDataBlockInfo->window.skey) {
716
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
H
Haojun Liao 已提交
717 718
      if (updateLastKey) {  // update the last key
        item->lastKey = pPrimaryColumn[startPos - (num - 1)] + step;
719 720 721 722
      }
    } else {
      num = startPos + 1;
      if (updateLastKey) {
H
hjxilinx 已提交
723
        item->lastKey = pDataBlockInfo->window.skey + step;
724 725 726
      }
    }
  }
727

H
Haojun Liao 已提交
728
  assert(num > 0);
729 730 731
  return num;
}

732
static void doBlockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, bool closed, STimeWindow *pWin,
H
Haojun Liao 已提交
733
                                      int32_t offset, int32_t forwardStep, TSKEY *tsBuf, int32_t numOfTotal) {
734 735
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
736

737
  if (IS_MASTER_SCAN(pRuntimeEnv) || closed) {
738 739
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
740

741 742 743
      pCtx[k].nStartQueryTimestamp = pWin->skey;
      pCtx[k].size = forwardStep;
      pCtx[k].startOffset = (QUERY_IS_ASC_QUERY(pQuery)) ? offset : offset - (forwardStep - 1);
744

745 746 747
      if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
        pCtx[k].ptsList = &tsBuf[offset];
      }
748

749 750 751 752
      // not a whole block involved in query processing, statistics data can not be used
      if (forwardStep != numOfTotal) {
        pCtx[k].preAggVals.isSet = false;
      }
H
Haojun Liao 已提交
753

754 755 756
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunction(&pCtx[k]);
      }
757 758 759 760
    }
  }
}

761
static void doRowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, bool closed, STimeWindow *pWin, int32_t offset) {
762 763
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
764

765
  if (IS_MASTER_SCAN(pRuntimeEnv) || closed) {
766 767
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
      pCtx[k].nStartQueryTimestamp = pWin->skey;
768

769 770 771 772
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunctionF(&pCtx[k], offset);
      }
773 774 775 776
    }
  }
}

H
Haojun Liao 已提交
777 778
static int32_t getNextQualifiedWindow(SQueryRuntimeEnv *pRuntimeEnv, STimeWindow *pNext, SDataBlockInfo *pDataBlockInfo,
    TSKEY *primaryKeys, __block_search_fn_t searchFn, int32_t prevPosition) {
779
  SQuery *pQuery = pRuntimeEnv->pQuery;
780

H
Haojun Liao 已提交
781
  GET_NEXT_TIMEWINDOW(pQuery, pNext);
782

H
Haojun Liao 已提交
783
  // next time window is not in current block
H
Haojun Liao 已提交
784 785
  if ((pNext->skey > pDataBlockInfo->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (pNext->ekey < pDataBlockInfo->window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
H
Haojun Liao 已提交
786 787
    return -1;
  }
788

H
Haojun Liao 已提交
789 790
  TSKEY startKey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
791
    startKey = pNext->skey;
H
Haojun Liao 已提交
792 793
    if (startKey < pQuery->window.skey) {
      startKey = pQuery->window.skey;
794
    }
H
Haojun Liao 已提交
795
  } else {
H
Haojun Liao 已提交
796
    startKey = pNext->ekey;
H
Haojun Liao 已提交
797 798
    if (startKey > pQuery->window.skey) {
      startKey = pQuery->window.skey;
799
    }
H
Haojun Liao 已提交
800
  }
801

H
Haojun Liao 已提交
802 803 804 805 806 807 808 809
  int32_t startPos = 0;
  // tumbling time window query, a special case of sliding time window query
  if (pQuery->slidingTime == pQuery->intervalTime && prevPosition != -1) {
    int32_t factor = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
    startPos = prevPosition + factor;
  } else {
    startPos = searchFn((char *)primaryKeys, pDataBlockInfo->rows, startKey, pQuery->order.order);
  }
810

H
Haojun Liao 已提交
811 812 813 814
  /*
   * This time window does not cover any data, try next time window,
   * this case may happen when the time window is too small
   */
H
Haojun Liao 已提交
815
  if (QUERY_IS_ASC_QUERY(pQuery) && primaryKeys[startPos] > pNext->ekey) {
H
Haojun Liao 已提交
816
    TSKEY next = primaryKeys[startPos];
817

H
Haojun Liao 已提交
818 819 820
    pNext->ekey += ((next - pNext->ekey + pQuery->slidingTime - 1)/pQuery->slidingTime) * pQuery->slidingTime;
    pNext->skey = pNext->ekey - pQuery->intervalTime + 1;
  } else if ((!QUERY_IS_ASC_QUERY(pQuery)) && primaryKeys[startPos] < pNext->skey) {
H
Haojun Liao 已提交
821
    TSKEY next = primaryKeys[startPos];
822

H
Haojun Liao 已提交
823 824
    pNext->skey -= ((pNext->skey - next + pQuery->slidingTime - 1) / pQuery->slidingTime) * pQuery->slidingTime;
    pNext->ekey = pNext->skey + pQuery->intervalTime - 1;
825
  }
826

H
Haojun Liao 已提交
827
  return startPos;
828 829
}

H
Haojun Liao 已提交
830
static FORCE_INLINE TSKEY reviseWindowEkey(SQuery *pQuery, STimeWindow *pWindow) {
831 832 833 834 835 836 837 838 839 840 841 842
  TSKEY ekey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    ekey = pWindow->ekey;
    if (ekey > pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  } else {
    ekey = pWindow->skey;
    if (ekey < pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  }
843

844 845 846
  return ekey;
}

H
hjxilinx 已提交
847 848
//todo binary search
static void* getDataBlockImpl(SArray* pDataBlock, int32_t colId) {
S
TD-1057  
Shengliang Guan 已提交
849
  int32_t numOfCols = (int32_t)taosArrayGetSize(pDataBlock);
850

H
hjxilinx 已提交
851 852 853 854 855 856
  for (int32_t i = 0; i < numOfCols; ++i) {
    SColumnInfoData *p = taosArrayGet(pDataBlock, i);
    if (colId == p->info.colId) {
      return p->pData;
    }
  }
857

H
hjxilinx 已提交
858 859 860 861
  return NULL;
}

static char *getDataBlock(SQueryRuntimeEnv *pRuntimeEnv, SArithmeticSupport *sas, int32_t col, int32_t size,
862
                    SArray *pDataBlock) {
dengyihao's avatar
dengyihao 已提交
863 864 865
  if (pDataBlock == NULL) {
    return NULL;
  }
866

H
Haojun Liao 已提交
867
  char *dataBlock = NULL;
H
Haojun Liao 已提交
868
  SQuery *pQuery = pRuntimeEnv->pQuery;
869
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
870

871
  int32_t functionId = pQuery->pSelectExpr[col].base.functionId;
872
  if (functionId == TSDB_FUNC_ARITHM) {
873
    sas->pArithExpr = &pQuery->pSelectExpr[col];
874

875 876 877 878 879 880
    // set the start offset to be the lowest start position, no matter asc/desc query order
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      pCtx->startOffset = pQuery->pos;
    } else {
      pCtx->startOffset = pQuery->pos - (size - 1);
    }
881

882 883 884 885
    sas->offset  = 0;
    sas->colList = pQuery->colList;
    sas->numOfCols = pQuery->numOfCols;
    sas->data    = calloc(pQuery->numOfCols, POINTER_BYTES);
886

H
Haojun Liao 已提交
887
    if (sas->data == NULL) {
H
Haojun Liao 已提交
888
      finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
H
Haojun Liao 已提交
889 890 891
      longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
    }

892
    // here the pQuery->colList and sas->colList are identical
S
TD-1057  
Shengliang Guan 已提交
893
    int32_t numOfCols = (int32_t)taosArrayGetSize(pDataBlock);
894
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
895
      SColumnInfo *pColMsg = &pQuery->colList[i];
896

897 898 899 900 901 902 903 904
      dataBlock = NULL;
      for (int32_t k = 0; k < numOfCols; ++k) {  //todo refactor
        SColumnInfoData *p = taosArrayGet(pDataBlock, k);
        if (pColMsg->colId == p->info.colId) {
          dataBlock = p->pData;
          break;
        }
      }
905

906
      assert(dataBlock != NULL);
907
      sas->data[i] = dataBlock;  // start from the offset
908
    }
909

910
  } else {  // other type of query function
911
    SColIndex *pCol = &pQuery->pSelectExpr[col].base.colInfo;
H
Haojun Liao 已提交
912
    if (TSDB_COL_IS_NORMAL_COL(pCol->flag)) {
H
Haojun Liao 已提交
913 914 915 916 917
      SColIndex* pColIndex = &pQuery->pSelectExpr[col].base.colInfo;
      SColumnInfoData *p = taosArrayGet(pDataBlock, pColIndex->colIndex);
      assert(p->info.colId == pColIndex->colId);

      dataBlock = p->pData;
H
Haojun Liao 已提交
918 919
    } else {
      dataBlock = NULL;
920 921
    }
  }
922

923 924 925 926
  return dataBlock;
}

/**
H
Haojun Liao 已提交
927
 * todo set the last value for pQueryTableInfo as in rowwiseapplyfunctions
928 929
 * @param pRuntimeEnv
 * @param forwardStep
930
 * @param tsCols
931 932 933 934 935
 * @param pFields
 * @param isDiskFileBlock
 * @return                  the incremental number of output value, so it maybe 0 for fixed number of query,
 *                          such as count/min/max etc.
 */
936
static void blockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis,
937 938
                                       SDataBlockInfo *pDataBlockInfo, SWindowResInfo *pWindowResInfo,
                                       __block_search_fn_t searchFn, SArray *pDataBlock) {
939
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
940 941
  bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);

942 943
  SQuery *pQuery = pRuntimeEnv->pQuery;
  TSKEY  *tsCols = NULL;
944
  if (pDataBlock != NULL) {
945
    SColumnInfoData* pColInfo = taosArrayGet(pDataBlock, 0);
946
    tsCols = (TSKEY *)(pColInfo->pData);
947
  }
948

949
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
H
Haojun Liao 已提交
950
  if (sasArray == NULL) {
H
Haojun Liao 已提交
951
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
H
Haojun Liao 已提交
952 953
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }
954

955
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
956
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
957
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k);
958
  }
959

960
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
961 962
  if (QUERY_IS_INTERVAL_QUERY(pQuery)/* && tsCols != NULL*/) {
    TSKEY ts = TSKEY_INITIAL_VAL;
963

H
Haojun Liao 已提交
964 965 966 967 968 969 970 971
    if (tsCols == NULL) {
      ts = QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->window.skey:pDataBlockInfo->window.ekey;
    } else {
      int32_t offset = GET_COL_DATA_POS(pQuery, 0, step);
      ts = tsCols[offset];
    }

    bool        hasTimeWindow = false;
972
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
H
Haojun Liao 已提交
973 974
    if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &win, masterScan, &hasTimeWindow) !=
        TSDB_CODE_SUCCESS) {
S
Shengliang Guan 已提交
975
      taosTFree(sasArray);
H
hjxilinx 已提交
976
      return;
977
    }
978

H
Haojun Liao 已提交
979 980 981
    int32_t forwardStep = 0;
    int32_t startPos = pQuery->pos;

982
    if (hasTimeWindow) {
H
Haojun Liao 已提交
983
      TSKEY ekey = reviseWindowEkey(pQuery, &win);
H
Haojun Liao 已提交
984
      forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, pQuery->pos, ekey, searchFn, true);
985

986
      bool pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
H
Haojun Liao 已提交
987
      doBlockwiseApplyFunctions(pRuntimeEnv, pStatus, &win, startPos, forwardStep, tsCols, pDataBlockInfo->rows);
988
    }
989

990 991
    int32_t     index = pWindowResInfo->curIndex;
    STimeWindow nextWin = win;
992

993
    while (1) {
H
Haojun Liao 已提交
994 995
      int32_t prevEndPos = (forwardStep - 1) * step + startPos;
      startPos = getNextQualifiedWindow(pRuntimeEnv, &nextWin, pDataBlockInfo, tsCols, searchFn, prevEndPos);
996 997 998
      if (startPos < 0) {
        break;
      }
999

1000
      // null data, failed to allocate more memory buffer
H
Haojun Liao 已提交
1001
      hasTimeWindow = false;
H
Haojun Liao 已提交
1002 1003
      if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &nextWin, masterScan,
                                  &hasTimeWindow) != TSDB_CODE_SUCCESS) {
1004 1005
        break;
      }
1006

1007 1008 1009 1010 1011
      if (!hasTimeWindow) {
        continue;
      }

      TSKEY ekey = reviseWindowEkey(pQuery, &nextWin);
H
Haojun Liao 已提交
1012
      forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, startPos, ekey, searchFn, true);
1013

1014 1015
      bool closed = getTimeWindowResStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
      doBlockwiseApplyFunctions(pRuntimeEnv, closed, &nextWin, startPos, forwardStep, tsCols, pDataBlockInfo->rows);
1016
    }
1017

1018 1019 1020 1021 1022 1023 1024
    pWindowResInfo->curIndex = index;
  } else {
    /*
     * the sqlfunctionCtx parameters should be set done before all functions are invoked,
     * since the selectivity + tag_prj query needs all parameters been set done.
     * tag_prj function are changed to be TSDB_FUNC_TAG_DUMMY
     */
1025
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
1026
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
1027 1028 1029 1030 1031
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunction(&pCtx[k]);
      }
    }
  }
1032

1033 1034 1035 1036
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    if (pQuery->pSelectExpr[i].base.functionId != TSDB_FUNC_ARITHM) {
      continue;
    }
1037

S
Shengliang Guan 已提交
1038
    taosTFree(sasArray[i].data);
1039
  }
1040

S
Shengliang Guan 已提交
1041
  taosTFree(sasArray);
1042 1043 1044 1045 1046 1047
}

static int32_t setGroupResultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, char *pData, int16_t type, int16_t bytes) {
  if (isNull(pData, type)) {  // ignore the null value
    return -1;
  }
1048

1049
  int32_t GROUPRESULTID = 1;
1050

1051
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
1052

1053 1054 1055 1056 1057 1058 1059 1060 1061 1062
  int64_t v = -1;
  // not assign result buffer yet, add new result buffer
  switch(type) {
    case TSDB_DATA_TYPE_BOOL:
    case TSDB_DATA_TYPE_TINYINT:  v = GET_INT8_VAL(pData);  break;
    case TSDB_DATA_TYPE_SMALLINT: v = GET_INT16_VAL(pData); break;
    case TSDB_DATA_TYPE_INT:      v = GET_INT32_VAL(pData); break;
    case TSDB_DATA_TYPE_BIGINT:   v = GET_INT64_VAL(pData); break;
  }

1063
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, &pRuntimeEnv->windowResInfo, pData, bytes, true);
1064 1065 1066
  if (pWindowRes == NULL) {
    return -1;
  }
1067

1068 1069
  pWindowRes->skey = v;
  assert(pRuntimeEnv->windowResInfo.interval == 0);
1070

1071 1072 1073 1074 1075 1076
  if (pWindowRes->pos.pageId == -1) {
    int32_t ret = addNewWindowResultBuf(pWindowRes, pResultBuf, GROUPRESULTID, pRuntimeEnv->numOfRowsPerPage);
    if (ret != 0) {
      return -1;
    }
  }
1077

1078 1079 1080 1081 1082
  setWindowResOutputBuf(pRuntimeEnv, pWindowRes);
  initCtxOutputBuf(pRuntimeEnv);
  return TSDB_CODE_SUCCESS;
}

1083
static char *getGroupbyColumnData(SQuery *pQuery, int16_t *type, int16_t *bytes, SArray* pDataBlock) {
1084
  SSqlGroupbyExpr *pGroupbyExpr = pQuery->pGroupbyExpr;
1085

1086
  for (int32_t k = 0; k < pGroupbyExpr->numOfGroupCols; ++k) {
1087 1088
    SColIndex* pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, k);
    if (pColIndex->flag == TSDB_COL_TAG) {
1089 1090
      continue;
    }
1091

1092
    int16_t colIndex = -1;
1093
    int32_t colId = pColIndex->colId;
1094

1095
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
1096
      if (pQuery->colList[i].colId == colId) {
1097 1098 1099 1100
        colIndex = i;
        break;
      }
    }
1101

1102
    assert(colIndex >= 0 && colIndex < pQuery->numOfCols);
1103

1104 1105
    *type = pQuery->colList[colIndex].type;
    *bytes = pQuery->colList[colIndex].bytes;
1106
    /*
1107 1108 1109
     *  the colIndex is acquired from the first tables of all qualified tables in this vnode during query prepare
     * stage, the remain tables may not have the required column in cache actually. So, the validation of required
     * column in cache with the corresponding schema is reinforced.
1110
     */
S
TD-1057  
Shengliang Guan 已提交
1111
    int32_t numOfCols = (int32_t)taosArrayGetSize(pDataBlock);
1112

1113 1114 1115 1116 1117 1118
    for (int32_t i = 0; i < numOfCols; ++i) {
      SColumnInfoData *p = taosArrayGet(pDataBlock, i);
      if (pColIndex->colId == p->info.colId) {
        return p->pData;
      }
    }
1119
  }
1120

1121
  return NULL;
1122 1123 1124 1125
}

static int32_t doTSJoinFilter(SQueryRuntimeEnv *pRuntimeEnv, int32_t offset) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
1126

1127 1128
  STSElem         elem = tsBufGetElem(pRuntimeEnv->pTSBuf);
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1129

1130
  // compare tag first
1131
  if (tVariantCompare(&pCtx[0].tag, &elem.tag) != 0) {
1132 1133
    return TS_JOIN_TAG_NOT_EQUALS;
  }
1134

S
TD-1057  
Shengliang Guan 已提交
1135
  TSKEY key = *(TSKEY *)((char*)pCtx[0].aInputElemBuf + TSDB_KEYSIZE * offset);
1136 1137

#if defined(_DEBUG_VIEW)
1138 1139
  printf("elem in comp ts file:%" PRId64 ", key:%" PRId64 ", tag:%"PRIu64", query order:%d, ts order:%d, traverse:%d, index:%d\n",
         elem.ts, key, elem.tag, pQuery->order.order, pRuntimeEnv->pTSBuf->tsOrder,
1140 1141
         pRuntimeEnv->pTSBuf->cur.order, pRuntimeEnv->pTSBuf->cur.tsIndex);
#endif
1142

1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (key < elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key > elem.ts) {
      assert(false);
    }
  } else {
    if (key > elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key < elem.ts) {
      assert(false);
    }
  }
1156

1157 1158 1159 1160 1161
  return TS_JOIN_TS_EQUAL;
}

static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId) {
  SResultInfo *pResInfo = GET_RES_INFO(pCtx);
H
hjxilinx 已提交
1162
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
1163 1164 1165 1166 1167

  // in case of timestamp column, always generated results.
  if (functionId == TSDB_FUNC_TS) {
    return true;
  }
1168

1169 1170 1171
  if (pResInfo->complete || functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TS_DUMMY) {
    return false;
  }
1172

1173
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_FIRST) {
H
hjxilinx 已提交
1174 1175
    return QUERY_IS_ASC_QUERY(pQuery);
  }
1176 1177 1178 1179 1180 1181

  // todo add comments
  if ((functionId == TSDB_FUNC_LAST_DST || functionId == TSDB_FUNC_LAST)) {
    return pCtx->param[0].i64Key == pQuery->order.order;
  }

1182
  // in the supplementary scan, only the following functions need to be executed
H
Haojun Liao 已提交
1183
  if (IS_REVERSE_SCAN(pRuntimeEnv)) {
1184 1185
    return false;
  }
1186

1187 1188 1189
  return true;
}

1190 1191
static void rowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis, SDataBlockInfo *pDataBlockInfo,
    SWindowResInfo *pWindowResInfo, SArray *pDataBlock) {
1192
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1193
  bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);
1194

1195
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1196
  STableQueryInfo* item = pQuery->current;
H
Haojun Liao 已提交
1197 1198 1199 1200

  SColumnInfoData* pColumnInfoData = (SColumnInfoData *)taosArrayGet(pDataBlock, 0);

  TSKEY  *tsCols = (pColumnInfoData->info.type == TSDB_DATA_TYPE_TIMESTAMP)? (TSKEY*) pColumnInfoData->pData:NULL;
H
Haojun Liao 已提交
1201 1202
  bool    groupbyColumnValue = pRuntimeEnv->groupbyNormalCol;

1203
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
H
Haojun Liao 已提交
1204
  if (sasArray == NULL) {
H
Haojun Liao 已提交
1205
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
H
Haojun Liao 已提交
1206 1207
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }
1208

1209 1210
  int16_t type = 0;
  int16_t bytes = 0;
1211

1212
  char *groupbyColumnData = NULL;
H
Haojun Liao 已提交
1213
  if (groupbyColumnValue) {
1214
    groupbyColumnData = getGroupbyColumnData(pQuery, &type, &bytes, pDataBlock);
1215
  }
1216

1217
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
1218
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
1219
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k);
1220
  }
1221

1222 1223
  // set the input column data
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
1224
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
H
hjxilinx 已提交
1225 1226
    pFilterInfo->pData = getDataBlockImpl(pDataBlock, pFilterInfo->info.colId);
    assert(pFilterInfo->pData != NULL);
1227
  }
1228

1229
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1230

1231 1232 1233
  // from top to bottom in desc
  // from bottom to top in asc order
  if (pRuntimeEnv->pTSBuf != NULL) {
H
Haojun Liao 已提交
1234
    SQInfo *pQInfo = (SQInfo *)GET_QINFO_ADDR(pRuntimeEnv);
1235
    qDebug("QInfo:%p process data rows, numOfRows:%d, query order:%d, ts comp order:%d", pQInfo, pDataBlockInfo->rows,
1236 1237
           pQuery->order.order, pRuntimeEnv->pTSBuf->cur.order);
  }
1238

1239
  int32_t j = 0;
H
hjxilinx 已提交
1240
  int32_t offset = -1;
1241

1242
  for (j = 0; j < pDataBlockInfo->rows; ++j) {
H
hjxilinx 已提交
1243
    offset = GET_COL_DATA_POS(pQuery, j, step);
1244

1245 1246 1247 1248 1249 1250 1251 1252 1253 1254
    if (pRuntimeEnv->pTSBuf != NULL) {
      int32_t r = doTSJoinFilter(pRuntimeEnv, offset);
      if (r == TS_JOIN_TAG_NOT_EQUALS) {
        break;
      } else if (r == TS_JOIN_TS_NOT_EQUALS) {
        continue;
      } else {
        assert(r == TS_JOIN_TS_EQUAL);
      }
    }
1255

1256
    if (pQuery->numOfFilterCols > 0 && (!doFilterData(pQuery, offset))) {
1257 1258
      continue;
    }
1259

1260
    // interval window query, decide the time window according to the primary timestamp
H
Haojun Liao 已提交
1261
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1262
      int64_t     ts = tsCols[offset];
1263
      STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
1264

1265 1266
      bool hasTimeWindow = false;
      int32_t ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &win, masterScan, &hasTimeWindow);
1267 1268 1269
      if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
        continue;
      }
1270

1271 1272 1273 1274
      if (!hasTimeWindow) {
        continue;
      }

1275 1276
      bool closed = getTimeWindowResStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
      doRowwiseApplyFunctions(pRuntimeEnv, closed, &win, offset);
1277

1278 1279
      STimeWindow nextWin = win;
      int32_t     index = pWindowResInfo->curIndex;
1280

1281
      while (1) {
H
Haojun Liao 已提交
1282
        GET_NEXT_TIMEWINDOW(pQuery, &nextWin);
1283
        if ((nextWin.skey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
H
Haojun Liao 已提交
1284
            (nextWin.skey < pQuery->window.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
1285 1286
          break;
        }
1287

1288 1289 1290
        if (ts < nextWin.skey || ts > nextWin.ekey) {
          break;
        }
1291

1292
        // null data, failed to allocate more memory buffer
H
Haojun Liao 已提交
1293
        hasTimeWindow = false;
1294
        if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &nextWin, masterScan, &hasTimeWindow) != TSDB_CODE_SUCCESS) {
1295 1296
          break;
        }
1297

1298
        if (hasTimeWindow) {
1299 1300
          closed = getTimeWindowResStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
          doRowwiseApplyFunctions(pRuntimeEnv, closed, &nextWin, offset);
1301
        }
1302
      }
1303

1304 1305 1306
      pWindowResInfo->curIndex = index;
    } else {  // other queries
      // decide which group this rows belongs to according to current state value
H
Haojun Liao 已提交
1307
      if (groupbyColumnValue) {
H
hjxilinx 已提交
1308
        char *val = groupbyColumnData + bytes * offset;
1309

H
hjxilinx 已提交
1310
        int32_t ret = setGroupResultOutputBuf(pRuntimeEnv, val, type, bytes);
1311 1312 1313 1314
        if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
          continue;
        }
      }
1315

1316
      for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
1317
        int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
1318 1319 1320 1321 1322
        if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
          aAggs[functionId].xFunctionF(&pCtx[k], offset);
        }
      }
    }
1323

1324 1325 1326
    if (pRuntimeEnv->pTSBuf != NULL) {
      // if timestamp filter list is empty, quit current query
      if (!tsBufNextPos(pRuntimeEnv->pTSBuf)) {
H
hjxilinx 已提交
1327
        setQueryStatus(pQuery, QUERY_COMPLETED);
1328 1329 1330 1331
        break;
      }
    }
  }
H
Haojun Liao 已提交
1332 1333 1334 1335 1336 1337 1338 1339

  assert(offset >= 0);
  if (tsCols != NULL) {
    item->lastKey = tsCols[offset] + step;
  } else {
    item->lastKey = (QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->window.ekey:pDataBlockInfo->window.skey) + step;
  }

1340 1341 1342 1343 1344
  // todo refactor: extract method
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    if (pQuery->pSelectExpr[i].base.functionId != TSDB_FUNC_ARITHM) {
      continue;
    }
1345

S
Shengliang Guan 已提交
1346
    taosTFree(sasArray[i].data);
1347
  }
1348

1349 1350 1351 1352
  free(sasArray);
}

static int32_t tableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo,
H
hjxilinx 已提交
1353
                                          SDataStatis *pStatis, __block_search_fn_t searchFn, SArray *pDataBlock) {
H
hjxilinx 已提交
1354
  SQuery *pQuery = pRuntimeEnv->pQuery;
1355

H
hjxilinx 已提交
1356 1357
  STableQueryInfo* pTableQInfo = pQuery->current;
  SWindowResInfo*  pWindowResInfo = &pRuntimeEnv->windowResInfo;
1358

H
Haojun Liao 已提交
1359
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->groupbyNormalCol) {
1360
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
1361
  } else {
1362
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
1363
  }
1364

1365
  // update the lastkey of current table
1366
  TSKEY lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pDataBlockInfo->window.ekey : pDataBlockInfo->window.skey;
H
hjxilinx 已提交
1367
  pTableQInfo->lastKey = lastKey + GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1368

1369
  // interval query with limit applied
1370
  int32_t numOfRes = 0;
H
Haojun Liao 已提交
1371
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1372 1373
    numOfRes = doCheckQueryCompleted(pRuntimeEnv, lastKey, pWindowResInfo);
  } else {
S
TD-1057  
Shengliang Guan 已提交
1374
    numOfRes = (int32_t)getNumOfResult(pRuntimeEnv);
1375

1376 1377 1378 1379
    // update the number of output result
    if (numOfRes > 0 && pQuery->checkBuffer == 1) {
      assert(numOfRes >= pQuery->rec.rows);
      pQuery->rec.rows = numOfRes;
1380

1381 1382 1383
      if (numOfRes >= pQuery->rec.threshold) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
      }
1384

1385 1386 1387
      if ((pQuery->limit.limit >= 0) && (pQuery->limit.limit + pQuery->limit.offset) <= numOfRes) {
        setQueryStatus(pQuery, QUERY_COMPLETED);
      }
1388 1389 1390 1391 1392

      if (((pTableQInfo->lastKey > pTableQInfo->win.ekey) && QUERY_IS_ASC_QUERY(pQuery)) ||
          ((pTableQInfo->lastKey < pTableQInfo->win.ekey) && (!QUERY_IS_ASC_QUERY(pQuery)))) {
        setQueryStatus(pQuery, QUERY_COMPLETED);
      }
H
Haojun Liao 已提交
1393
    }
1394
  }
1395

1396
  return numOfRes;
1397 1398
}

H
Haojun Liao 已提交
1399
void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
1400
                   SDataStatis *pStatis, void *param, int32_t colIndex) {
1401

1402 1403
  int32_t functionId = pQuery->pSelectExpr[colIndex].base.functionId;
  int32_t colId = pQuery->pSelectExpr[colIndex].base.colInfo.colId;
1404

1405
  SDataStatis *tpField = NULL;
H
Haojun Liao 已提交
1406
  pCtx->hasNull = hasNullValue(&pQuery->pSelectExpr[colIndex].base.colInfo, pStatis, &tpField);
1407
  pCtx->aInputElemBuf = inputData;
1408

1409
  if (tpField != NULL) {
H
Haojun Liao 已提交
1410
    pCtx->preAggVals.isSet  = true;
1411 1412
    pCtx->preAggVals.statis = *tpField;
    assert(pCtx->preAggVals.statis.numOfNull <= pBlockInfo->rows);
1413 1414 1415
  } else {
    pCtx->preAggVals.isSet = false;
  }
1416

H
Haojun Liao 已提交
1417 1418
  pCtx->preAggVals.dataBlockLoaded = (inputData != NULL);

H
Haojun Liao 已提交
1419 1420 1421
  // limit/offset query will affect this value
  pCtx->startOffset = QUERY_IS_ASC_QUERY(pQuery) ? pQuery->pos:0;
  pCtx->size = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->rows - pQuery->pos : pQuery->pos + 1;
1422

1423 1424
  uint32_t status = aAggs[functionId].nStatus;
  if (((status & (TSDB_FUNCSTATE_SELECTIVITY | TSDB_FUNCSTATE_NEED_TS)) != 0) && (tsCol != NULL)) {
H
Haojun Liao 已提交
1425
    pCtx->ptsList = tsCol;
1426
  }
1427

1428 1429 1430 1431 1432
  if (functionId >= TSDB_FUNC_FIRST_DST && functionId <= TSDB_FUNC_LAST_DST) {
    // last_dist or first_dist function
    // store the first&last timestamp into the intermediate buffer [1], the true
    // value may be null but timestamp will never be null
  } else if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_TWA ||
1433
             functionId == TSDB_FUNC_DIFF || (functionId >= TSDB_FUNC_RATE && functionId <= TSDB_FUNC_AVG_IRATE)) {
1434
    /*
H
Haojun Liao 已提交
1435
     * least squares function needs two columns of input, currently, the x value of linear equation is set to
1436 1437 1438 1439 1440 1441 1442 1443 1444 1445
     * timestamp column, and the y-value is the column specified in pQuery->pSelectExpr[i].colIdxInBuffer
     *
     * top/bottom function needs timestamp to indicate when the
     * top/bottom values emerge, so does diff function
     */
    if (functionId == TSDB_FUNC_TWA) {
      STwaInfo *pTWAInfo = GET_RES_INFO(pCtx)->interResultBuf;
      pTWAInfo->SKey = pQuery->window.skey;
      pTWAInfo->EKey = pQuery->window.ekey;
    }
1446

1447 1448
  } else if (functionId == TSDB_FUNC_ARITHM) {
    pCtx->param[1].pz = param;
H
Haojun Liao 已提交
1449 1450 1451 1452 1453 1454
  } else if (functionId == TSDB_FUNC_SPREAD) {  // set the statistics data for primary time stamp column
    if (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
      pCtx->preAggVals.isSet  = true;
      pCtx->preAggVals.statis.min = pBlockInfo->window.skey;
      pCtx->preAggVals.statis.max = pBlockInfo->window.ekey;
    }
1455 1456
  } else if (functionId == TSDB_FUNC_INTERP) {
    SInterpInfoDetail *pInterpInfo = GET_RES_INFO(pCtx)->interResultBuf;
S
TD-1057  
Shengliang Guan 已提交
1457
    pInterpInfo->type = (int8_t)pQuery->fillType;
1458 1459
    pInterpInfo->ts = pQuery->window.skey;
    pInterpInfo->primaryCol = (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX);
1460

1461 1462 1463 1464 1465 1466 1467
    if (pQuery->fillVal != NULL) {
      if (isNull((const char*) &pQuery->fillVal[colIndex], pCtx->inputType)) {
        pCtx->param[1].nType = TSDB_DATA_TYPE_NULL;
      } else { // todo refactor, tVariantCreateFromBinary should handle the NULL value
        tVariantCreateFromBinary(&pCtx->param[1], (char*) &pQuery->fillVal[colIndex], pCtx->inputBytes, pCtx->inputType);
      }
    }
1468
  }
1469

1470 1471 1472 1473 1474 1475
#if defined(_DEBUG_VIEW)
  //  int64_t *tsList = (int64_t *)primaryColumnData;
//  int64_t  s = tsList[0];
//  int64_t  e = tsList[size - 1];

//    if (IS_DATA_BLOCK_LOADED(blockStatus)) {
1476
//        qDebug("QInfo:%p query ts:%lld-%lld, offset:%d, rows:%d, bstatus:%d,
1477 1478 1479
//        functId:%d", GET_QINFO_ADDR(pQuery),
//               s, e, startOffset, size, blockStatus, functionId);
//    } else {
1480
//        qDebug("QInfo:%p block not loaded, bstatus:%d",
1481 1482 1483 1484 1485 1486
//        GET_QINFO_ADDR(pQuery), blockStatus);
//    }
#endif
}

// set the output buffer for the selectivity + tag query
H
Haojun Liao 已提交
1487
static int32_t setCtxTagColumnInfo(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx) {
H
Haojun Liao 已提交
1488 1489
  SQuery* pQuery = pRuntimeEnv->pQuery;

1490
  if (isSelectivityWithTagsQuery(pQuery)) {
1491
    int32_t num = 0;
1492
    int16_t tagLen = 0;
1493

1494
    SQLFunctionCtx *p = NULL;
1495
    SQLFunctionCtx **pTagCtx = calloc(pQuery->numOfOutput, POINTER_BYTES);
H
Haojun Liao 已提交
1496 1497 1498
    if (pTagCtx == NULL) {
      return TSDB_CODE_QRY_OUT_OF_MEMORY;
    }
H
Haojun Liao 已提交
1499

1500
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1501
      SSqlFuncMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].base;
1502

1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515
      if (pSqlFuncMsg->functionId == TSDB_FUNC_TAG_DUMMY || pSqlFuncMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        tagLen += pCtx[i].outputBytes;
        pTagCtx[num++] = &pCtx[i];
      } else if ((aAggs[pSqlFuncMsg->functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
        p = &pCtx[i];
      } else if (pSqlFuncMsg->functionId == TSDB_FUNC_TS || pSqlFuncMsg->functionId == TSDB_FUNC_TAG) {
        // tag function may be the group by tag column
        // ts may be the required primary timestamp column
        continue;
      } else {
        // the column may be the normal column, group by normal_column, the functionId is TSDB_FUNC_PRJ
      }
    }
dengyihao's avatar
dengyihao 已提交
1516 1517 1518 1519 1520
    if (p != NULL) {
      p->tagInfo.pTagCtxList = pTagCtx;
      p->tagInfo.numOfTagCols = num;
      p->tagInfo.tagsLen = tagLen;
    } else {
1521
      taosTFree(pTagCtx);
dengyihao's avatar
dengyihao 已提交
1522
    }
1523
  }
H
Haojun Liao 已提交
1524 1525

  return TSDB_CODE_SUCCESS;
1526 1527
}

H
Haojun Liao 已提交
1528 1529
static FORCE_INLINE void setWindowResultInfo(SResultInfo *pResultInfo, SQuery *pQuery, bool isStableQuery, char* buf) {
  char* p = buf;
1530
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1531 1532 1533 1534
    int32_t size = pQuery->pSelectExpr[i].interBytes;
    setResultInfoBuf(&pResultInfo[i], size, isStableQuery, p);

    p += size;
1535 1536 1537
  }
}

1538
static int32_t setupQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv, int16_t order) {
1539
  qDebug("QInfo:%p setup runtime env", GET_QINFO_ADDR(pRuntimeEnv));
1540 1541
  SQuery *pQuery = pRuntimeEnv->pQuery;

H
Haojun Liao 已提交
1542 1543 1544
  size_t size = pRuntimeEnv->interBufSize + pQuery->numOfOutput * sizeof(SResultInfo);

  pRuntimeEnv->resultInfo = calloc(1, size);
1545
  pRuntimeEnv->pCtx = (SQLFunctionCtx *)calloc(pQuery->numOfOutput, sizeof(SQLFunctionCtx));
1546

1547
  if (pRuntimeEnv->resultInfo == NULL || pRuntimeEnv->pCtx == NULL) {
1548
    goto _clean;
1549
  }
1550

1551
  pRuntimeEnv->offset[0] = 0;
1552
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1553
    SSqlFuncMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].base;
1554

1555
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1556
    SColIndex* pIndex = &pSqlFuncMsg->colInfo;
1557

1558 1559
    int32_t index = pSqlFuncMsg->colInfo.colIndex;
    if (TSDB_COL_IS_TAG(pIndex->flag)) {
1560
      if (pIndex->colId == TSDB_TBNAME_COLUMN_INDEX) {  // todo refactor
H
Haojun Liao 已提交
1561 1562 1563 1564
        SSchema s = tGetTableNameColumnSchema();

        pCtx->inputBytes = s.bytes;
        pCtx->inputType = s.type;
1565 1566 1567 1568
      } else {
        pCtx->inputBytes = pQuery->tagColList[index].bytes;
        pCtx->inputType = pQuery->tagColList[index].type;
      }
H
Haojun Liao 已提交
1569 1570 1571
    } else if (TSDB_COL_IS_UD_COL(pIndex->flag)) {
      pCtx->inputBytes = pSqlFuncMsg->arg[0].argBytes;
      pCtx->inputType = pSqlFuncMsg->arg[0].argType;
1572 1573 1574 1575
    } else {
      pCtx->inputBytes = pQuery->colList[index].bytes;
      pCtx->inputType = pQuery->colList[index].type;
    }
1576

1577
    assert(isValidDataType(pCtx->inputType));
1578
    pCtx->ptsOutputBuf = NULL;
1579

1580 1581
    pCtx->outputBytes = pQuery->pSelectExpr[i].bytes;
    pCtx->outputType = pQuery->pSelectExpr[i].type;
1582

1583 1584
    pCtx->order = pQuery->order.order;
    pCtx->functionId = pSqlFuncMsg->functionId;
1585

1586 1587 1588 1589 1590
    pCtx->numOfParams = pSqlFuncMsg->numOfParams;
    for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
      int16_t type = pSqlFuncMsg->arg[j].argType;
      int16_t bytes = pSqlFuncMsg->arg[j].argBytes;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
1591
        tVariantCreateFromBinary(&pCtx->param[j], pSqlFuncMsg->arg[j].argValue.pz, bytes, type);
1592 1593 1594 1595
      } else {
        tVariantCreateFromBinary(&pCtx->param[j], (char *)&pSqlFuncMsg->arg[j].argValue.i64, bytes, type);
      }
    }
1596

1597 1598
    // set the order information for top/bottom query
    int32_t functionId = pCtx->functionId;
1599

1600
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
1601
      int32_t f = pQuery->pSelectExpr[0].base.functionId;
1602
      assert(f == TSDB_FUNC_TS || f == TSDB_FUNC_TS_DUMMY);
1603

1604 1605 1606 1607
      pCtx->param[2].i64Key = order;
      pCtx->param[2].nType = TSDB_DATA_TYPE_BIGINT;
      pCtx->param[3].i64Key = functionId;
      pCtx->param[3].nType = TSDB_DATA_TYPE_BIGINT;
1608

1609 1610
      pCtx->param[1].i64Key = pQuery->order.orderColId;
    }
1611

1612 1613 1614 1615
    if (i > 0) {
      pRuntimeEnv->offset[i] = pRuntimeEnv->offset[i - 1] + pRuntimeEnv->pCtx[i - 1].outputBytes;
    }
  }
1616

H
Haojun Liao 已提交
1617
  char* buf = (char*) pRuntimeEnv->resultInfo + sizeof(SResultInfo) * pQuery->numOfOutput;
H
Haojun Liao 已提交
1618

1619
  // set the intermediate result output buffer
H
Haojun Liao 已提交
1620
  setWindowResultInfo(pRuntimeEnv->resultInfo, pQuery, pRuntimeEnv->stableQuery, buf);
1621

1622
  // if it is group by normal column, do not set output buffer, the output buffer is pResult
1623
  if (!pRuntimeEnv->groupbyNormalCol && !pRuntimeEnv->stableQuery) {
1624 1625
    resetCtxOutputBuf(pRuntimeEnv);
  }
1626

H
Haojun Liao 已提交
1627 1628 1629
  if (setCtxTagColumnInfo(pRuntimeEnv, pRuntimeEnv->pCtx) != TSDB_CODE_SUCCESS) {
    goto _clean;
  }
1630

H
Haojun Liao 已提交
1631
  qDebug("QInfo:%p init runtime completed", GET_QINFO_ADDR(pRuntimeEnv));
1632
  return TSDB_CODE_SUCCESS;
1633

1634
_clean:
S
Shengliang Guan 已提交
1635 1636
  taosTFree(pRuntimeEnv->resultInfo);
  taosTFree(pRuntimeEnv->pCtx);
1637

1638
  return TSDB_CODE_QRY_OUT_OF_MEMORY;
1639 1640 1641 1642 1643 1644
}

static void teardownQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv) {
  if (pRuntimeEnv->pQuery == NULL) {
    return;
  }
1645

1646
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1647
  SQInfo* pQInfo = (SQInfo*) GET_QINFO_ADDR(pRuntimeEnv);
1648

1649
  qDebug("QInfo:%p teardown runtime env", pQInfo);
H
Haojun Liao 已提交
1650
  cleanupTimeWindowInfo(&pRuntimeEnv->windowResInfo);
1651

1652
  if (pRuntimeEnv->pCtx != NULL) {
1653
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1654
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1655

1656 1657 1658
      for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
        tVariantDestroy(&pCtx->param[j]);
      }
1659

1660
      tVariantDestroy(&pCtx->tag);
S
Shengliang Guan 已提交
1661
      taosTFree(pCtx->tagInfo.pTagCtxList);
1662
    }
1663

S
Shengliang Guan 已提交
1664 1665
    taosTFree(pRuntimeEnv->resultInfo);
    taosTFree(pRuntimeEnv->pCtx);
1666
  }
1667

H
Haojun Liao 已提交
1668
  pRuntimeEnv->pFillInfo = taosDestoryFillInfo(pRuntimeEnv->pFillInfo);
1669

H
Haojun Liao 已提交
1670
  destroyResultBuf(pRuntimeEnv->pResultBuf);
1671
  tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
1672
  tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
1673

H
Haojun Liao 已提交
1674
  pRuntimeEnv->pTSBuf = tsBufDestroy(pRuntimeEnv->pTSBuf);
1675 1676
}

H
Haojun Liao 已提交
1677
#define IS_QUERY_KILLED(_q) ((_q)->code == TSDB_CODE_TSC_QUERY_CANCELLED)
1678

H
Haojun Liao 已提交
1679
static void setQueryKilled(SQInfo *pQInfo) { pQInfo->code = TSDB_CODE_TSC_QUERY_CANCELLED;}
H
hjxilinx 已提交
1680

H
Haojun Liao 已提交
1681 1682 1683
static bool isFixedOutputQuery(SQueryRuntimeEnv* pRuntimeEnv) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1684 1685
    return false;
  }
1686

1687
  // Note:top/bottom query is fixed output query
H
Haojun Liao 已提交
1688
  if (pRuntimeEnv->topBotQuery || pRuntimeEnv->groupbyNormalCol) {
1689 1690
    return true;
  }
1691

1692
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1693
    SSqlFuncMsg *pExprMsg = &pQuery->pSelectExpr[i].base;
1694

1695 1696
    // ignore the ts_comp function
    if (i == 0 && pExprMsg->functionId == TSDB_FUNC_PRJ && pExprMsg->numOfParams == 1 &&
1697
        pExprMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
1698 1699
      continue;
    }
1700

1701 1702 1703
    if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
1704

1705 1706 1707 1708
    if (!IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus)) {
      return true;
    }
  }
1709

1710 1711 1712
  return false;
}

1713
// todo refactor with isLastRowQuery
H
hjxilinx 已提交
1714
static bool isPointInterpoQuery(SQuery *pQuery) {
1715
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1716
    int32_t functionID = pQuery->pSelectExpr[i].base.functionId;
1717
    if (functionID == TSDB_FUNC_INTERP) {
1718 1719 1720
      return true;
    }
  }
1721

1722 1723 1724 1725
  return false;
}

// TODO REFACTOR:MERGE WITH CLIENT-SIDE FUNCTION
H
hjxilinx 已提交
1726
static bool isSumAvgRateQuery(SQuery *pQuery) {
1727
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1728
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1729 1730 1731
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
1732

1733 1734 1735 1736 1737
    if (functionId == TSDB_FUNC_SUM_RATE || functionId == TSDB_FUNC_SUM_IRATE || functionId == TSDB_FUNC_AVG_RATE ||
        functionId == TSDB_FUNC_AVG_IRATE) {
      return true;
    }
  }
1738

1739 1740 1741
  return false;
}

H
hjxilinx 已提交
1742
static bool isFirstLastRowQuery(SQuery *pQuery) {
1743
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1744
    int32_t functionID = pQuery->pSelectExpr[i].base.functionId;
1745 1746 1747 1748
    if (functionID == TSDB_FUNC_LAST_ROW) {
      return true;
    }
  }
1749

1750 1751 1752
  return false;
}

H
hjxilinx 已提交
1753
static bool needReverseScan(SQuery *pQuery) {
1754
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1755
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1756 1757 1758
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG) {
      continue;
    }
1759

1760
    if ((functionId == TSDB_FUNC_FIRST || functionId == TSDB_FUNC_FIRST_DST) && !QUERY_IS_ASC_QUERY(pQuery)) {
1761 1762
      return true;
    }
1763 1764

    if (functionId == TSDB_FUNC_LAST || functionId == TSDB_FUNC_LAST_DST) {
S
TD-1057  
Shengliang Guan 已提交
1765
      int32_t order = (int32_t)pQuery->pSelectExpr[i].base.arg->argValue.i64;
1766 1767
      return order != pQuery->order.order;
    }
1768
  }
1769

1770 1771
  return false;
}
H
hjxilinx 已提交
1772

H
Haojun Liao 已提交
1773 1774 1775 1776
/**
 * The following 4 kinds of query are treated as the tags query
 * tagprj, tid_tag query, count(tbname), 'abc' (user defined constant value column) query
 */
H
hjxilinx 已提交
1777 1778
static bool onlyQueryTags(SQuery* pQuery) {
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1779 1780 1781
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];

    int32_t functionId = pExprInfo->base.functionId;
H
Haojun Liao 已提交
1782 1783 1784 1785 1786

    if (functionId != TSDB_FUNC_TAGPRJ &&
        functionId != TSDB_FUNC_TID_TAG &&
        (!(functionId == TSDB_FUNC_COUNT && pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX)) &&
        (!(functionId == TSDB_FUNC_PRJ && pExprInfo->base.colInfo.flag == TSDB_COL_UDC))) {
H
hjxilinx 已提交
1787 1788 1789
      return false;
    }
  }
1790

H
hjxilinx 已提交
1791 1792 1793
  return true;
}

1794 1795
/////////////////////////////////////////////////////////////////////////////////////////////

H
Haojun Liao 已提交
1796
void getAlignQueryTimeWindow(SQuery *pQuery, int64_t key, int64_t keyFirst, int64_t keyLast, STimeWindow *win) {
1797
  assert(key >= keyFirst && key <= keyLast && pQuery->slidingTime <= pQuery->intervalTime);
H
Haojun Liao 已提交
1798
  win->skey = taosGetIntervalStartTimestamp(key, pQuery->slidingTime, pQuery->intervalTime, pQuery->slidingTimeUnit, pQuery->precision);
H
Haojun Liao 已提交
1799 1800 1801 1802 1803

  /*
   * if the realSkey > INT64_MAX - pQuery->intervalTime, the query duration between
   * realSkey and realEkey must be less than one interval.Therefore, no need to adjust the query ranges.
   */
1804 1805 1806 1807
  if (keyFirst > (INT64_MAX - pQuery->intervalTime)) {
    assert(keyLast - keyFirst < pQuery->intervalTime);
    win->ekey = INT64_MAX;
    return;
H
Haojun Liao 已提交
1808 1809
  } else {
    win->ekey = win->skey + pQuery->intervalTime - 1;
1810 1811 1812 1813 1814
  }
}

static void setScanLimitationByResultBuffer(SQuery *pQuery) {
  if (isTopBottomQuery(pQuery)) {
1815
    pQuery->checkBuffer = 0;
1816
  } else if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
1817
    pQuery->checkBuffer = 0;
1818 1819
  } else {
    bool hasMultioutput = false;
1820
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1821
      SSqlFuncMsg *pExprMsg = &pQuery->pSelectExpr[i].base;
1822 1823 1824
      if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        continue;
      }
1825

1826 1827 1828 1829 1830
      hasMultioutput = IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus);
      if (!hasMultioutput) {
        break;
      }
    }
1831

1832
    pQuery->checkBuffer = hasMultioutput ? 1 : 0;
1833 1834 1835 1836 1837 1838
  }
}

/*
 * todo add more parameters to check soon..
 */
1839
bool colIdCheck(SQuery *pQuery) {
1840 1841
  // load data column information is incorrect
  for (int32_t i = 0; i < pQuery->numOfCols - 1; ++i) {
1842
    if (pQuery->colList[i].colId == pQuery->colList[i + 1].colId) {
S
slguan 已提交
1843
      qError("QInfo:%p invalid data load column for query", GET_QINFO_ADDR(pQuery));
1844 1845 1846
      return false;
    }
  }
1847

1848 1849 1850 1851 1852 1853
  return true;
}

// todo ignore the avg/sum/min/max/count/stddev/top/bottom functions, of which
// the scan order is not matter
static bool onlyOneQueryType(SQuery *pQuery, int32_t functId, int32_t functIdDst) {
1854
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1855
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1856

1857 1858 1859 1860
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG ||
        functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
1861

1862 1863 1864 1865
    if (functionId != functId && functionId != functIdDst) {
      return false;
    }
  }
1866

1867 1868 1869 1870 1871 1872 1873
  return true;
}

static bool onlyFirstQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_FIRST, TSDB_FUNC_FIRST_DST); }

static bool onlyLastQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_LAST, TSDB_FUNC_LAST_DST); }

H
Haojun Liao 已提交
1874 1875 1876 1877 1878 1879
// todo refactor, add iterator
static void doExchangeTimeWindow(SQInfo* pQInfo) {
  size_t t = GET_NUM_OF_TABLEGROUP(pQInfo);
  for(int32_t i = 0; i < t; ++i) {
    SArray* p1 = GET_TABLEGROUP(pQInfo, i);

H
Haojun Liao 已提交
1880
    SArray* tableKeyGroup = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, i);
H
Haojun Liao 已提交
1881 1882 1883 1884
    size_t len = taosArrayGetSize(p1);
    for(int32_t j = 0; j < len; ++j) {
      STableQueryInfo* pTableQueryInfo = (STableQueryInfo*) taosArrayGetP(p1, j);
      SWAP(pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, TSKEY);
H
Haojun Liao 已提交
1885 1886 1887

      STableKeyInfo* pInfo = taosArrayGet(tableKeyGroup, j);
      pInfo->lastKey = pTableQueryInfo->win.skey;
H
Haojun Liao 已提交
1888 1889 1890 1891
    }
  }
}

H
Haojun Liao 已提交
1892 1893 1894
static void changeExecuteScanOrder(SQInfo *pQInfo, bool stableQuery) {
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;

1895 1896 1897
  // in case of point-interpolation query, use asc order scan
  char msg[] = "QInfo:%p scan order changed for %s query, old:%d, new:%d, qrange exchanged, old qrange:%" PRId64
               "-%" PRId64 ", new qrange:%" PRId64 "-%" PRId64;
1898

1899 1900 1901
  // todo handle the case the the order irrelevant query type mixed up with order critical query type
  // descending order query for last_row query
  if (isFirstLastRowQuery(pQuery)) {
1902
    qDebug("QInfo:%p scan order changed for last_row query, old:%d, new:%d", GET_QINFO_ADDR(pQuery),
H
Haojun Liao 已提交
1903
           pQuery->order.order, TSDB_ORDER_ASC);
1904

H
Haojun Liao 已提交
1905 1906 1907 1908
    pQuery->order.order = TSDB_ORDER_ASC;
    if (pQuery->window.skey > pQuery->window.ekey) {
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
1909 1910
    return;
  }
1911

1912 1913
  if (isPointInterpoQuery(pQuery) && pQuery->intervalTime == 0) {
    if (!QUERY_IS_ASC_QUERY(pQuery)) {
1914
      qDebug(msg, GET_QINFO_ADDR(pQuery), "interp", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
1915
             pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);
1916 1917
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
1918

1919
    pQuery->order.order = TSDB_ORDER_ASC;
1920 1921
    return;
  }
1922

1923 1924 1925
  if (pQuery->intervalTime == 0) {
    if (onlyFirstQuery(pQuery)) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
1926
        qDebug(msg, GET_QINFO_ADDR(pQuery), "only-first", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
1927 1928
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1929
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
H
Haojun Liao 已提交
1930
        doExchangeTimeWindow(pQInfo);
1931
      }
1932

1933
      pQuery->order.order = TSDB_ORDER_ASC;
1934 1935
    } else if (onlyLastQuery(pQuery)) {
      if (QUERY_IS_ASC_QUERY(pQuery)) {
1936
        qDebug(msg, GET_QINFO_ADDR(pQuery), "only-last", pQuery->order.order, TSDB_ORDER_DESC, pQuery->window.skey,
1937 1938
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1939
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
H
Haojun Liao 已提交
1940
        doExchangeTimeWindow(pQInfo);
1941
      }
1942

1943
      pQuery->order.order = TSDB_ORDER_DESC;
1944
    }
1945

1946
  } else {  // interval query
1947
    if (stableQuery) {
1948 1949
      if (onlyFirstQuery(pQuery)) {
        if (!QUERY_IS_ASC_QUERY(pQuery)) {
1950
          qDebug(msg, GET_QINFO_ADDR(pQuery), "only-first stable", pQuery->order.order, TSDB_ORDER_ASC,
1951 1952
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1953 1954
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
        }
1955

1956
        pQuery->order.order = TSDB_ORDER_ASC;
1957 1958
      } else if (onlyLastQuery(pQuery)) {
        if (QUERY_IS_ASC_QUERY(pQuery)) {
1959
          qDebug(msg, GET_QINFO_ADDR(pQuery), "only-last stable", pQuery->order.order, TSDB_ORDER_DESC,
1960 1961
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1962 1963
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
        }
1964

1965
        pQuery->order.order = TSDB_ORDER_DESC;
1966 1967 1968 1969 1970 1971 1972 1973
      }
    }
  }
}

static int32_t getInitialPageNum(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  int32_t INITIAL_RESULT_ROWS_VALUE = 16;
1974

1975
  int32_t num = 0;
1976

1977 1978
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
    num = 128;
H
Haojun Liao 已提交
1979
  } else if (QUERY_IS_INTERVAL_QUERY(pQuery)) {  // time window query, allocate one page for each table
1980
    size_t s = pQInfo->tableqinfoGroupInfo.numOfTables;
S
TD-1057  
Shengliang Guan 已提交
1981
    num = (int32_t)(MAX(s, INITIAL_RESULT_ROWS_VALUE));
1982 1983
  } else {    // for super table query, one page for each subset
    num = 1;  // pQInfo->pSidSet->numOfSubSet;
1984
  }
1985

1986 1987 1988 1989
  assert(num > 0);
  return num;
}

1990 1991 1992
static void getIntermediateBufInfo(SQueryRuntimeEnv* pRuntimeEnv, int32_t* ps, int32_t* rowsize) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

S
TD-1057  
Shengliang Guan 已提交
1993
  *rowsize = (int32_t)(pQuery->rowSize * GET_ROW_PARAM_FOR_MULTIOUTPUT(pQuery, pRuntimeEnv->topBotQuery, pRuntimeEnv->stableQuery));
1994 1995 1996 1997 1998 1999 2000 2001 2002
  int32_t overhead = sizeof(tFilePage);

  // one page contains at least two rows
  *ps = DEFAULT_INTERN_BUF_PAGE_SIZE;
  while(((*rowsize) * 2) > (*ps) - overhead) {
    *ps = (*ps << 1u);
  }

  pRuntimeEnv->numOfRowsPerPage = ((*ps) - sizeof(tFilePage)) / (*rowsize);
2003
  assert(pRuntimeEnv->numOfRowsPerPage <= MAX_ROWS_PER_RESBUF_PAGE);
2004 2005
}

H
Haojun Liao 已提交
2006
#define IS_PREFILTER_TYPE(_t) ((_t) != TSDB_DATA_TYPE_BINARY && (_t) != TSDB_DATA_TYPE_NCHAR)
2007

H
Haojun Liao 已提交
2008 2009 2010 2011
static bool needToLoadDataBlock(SQueryRuntimeEnv* pRuntimeEnv, SDataStatis *pDataStatis, SQLFunctionCtx *pCtx,
    int32_t numOfRows) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
  if (pDataStatis == NULL || (pQuery->numOfFilterCols == 0 && (!pRuntimeEnv->topBotQuery))) {
2012 2013 2014 2015 2016
    return true;
  }

  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
2017

H
Haojun Liao 已提交
2018 2019 2020 2021 2022 2023 2024 2025
    int32_t index = -1;
    for(int32_t i = 0; i < pQuery->numOfCols; ++i) {
      if (pDataStatis[i].colId == pFilterInfo->info.colId) {
        index = i;
        break;
      }
    }

H
Haojun Liao 已提交
2026
    // no statistics data
H
Haojun Liao 已提交
2027
    if (index == -1) {
H
Haojun Liao 已提交
2028
      return true;
2029
    }
2030

2031
    // not support pre-filter operation on binary/nchar data type
H
Haojun Liao 已提交
2032
    if (!IS_PREFILTER_TYPE(pFilterInfo->info.type)) {
H
Haojun Liao 已提交
2033
      return true;
2034
    }
2035

2036
    // all points in current column are NULL, no need to check its boundary value
H
Haojun Liao 已提交
2037
    if (pDataStatis[index].numOfNull == numOfRows) {
2038 2039
      continue;
    }
2040

H
Haojun Liao 已提交
2041 2042 2043
    SDataStatis* pDataBlockst = &pDataStatis[index];

    if (pFilterInfo->info.type == TSDB_DATA_TYPE_FLOAT) {
S
TD-1057  
Shengliang Guan 已提交
2044 2045
      float minval = (float)(*(double *)(&pDataBlockst->min));
      float maxval = (float)(*(double *)(&pDataBlockst->max));
2046

2047 2048 2049 2050 2051 2052 2053
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&minval, (char *)&maxval)) {
          return true;
        }
      }
    } else {
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
H
Haojun Liao 已提交
2054
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&pDataBlockst->min, (char *)&pDataBlockst->max)) {
2055 2056 2057 2058 2059
          return true;
        }
      }
    }
  }
2060

H
Haojun Liao 已提交
2061 2062 2063 2064 2065 2066 2067 2068
  if (pRuntimeEnv->topBotQuery) {
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
      int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
      if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
        return topbot_datablock_filter(&pCtx[i], functionId, (char *)&pDataStatis[i].min, (char *)&pDataStatis[i].max);
      }
    }
  }
2069

H
Haojun Liao 已提交
2070
  return false;
2071 2072
}

H
Haojun Liao 已提交
2073 2074 2075 2076 2077 2078 2079 2080
static bool overlapWithTimeWindow(SQuery* pQuery, SDataBlockInfo* pBlockInfo) {
  STimeWindow w = {0};

  TSKEY sk = MIN(pQuery->window.skey, pQuery->window.ekey);
  TSKEY ek = MAX(pQuery->window.skey, pQuery->window.ekey);

  if (QUERY_IS_ASC_QUERY(pQuery)) {
    getAlignQueryTimeWindow(pQuery, pBlockInfo->window.skey, sk, ek, &w);
H
Haojun Liao 已提交
2081
    assert(w.ekey >= pBlockInfo->window.skey);
H
Haojun Liao 已提交
2082

H
Haojun Liao 已提交
2083
    if (w.ekey < pBlockInfo->window.ekey) {
H
Haojun Liao 已提交
2084 2085 2086 2087 2088
      return true;
    }

    while(1) {
      GET_NEXT_TIMEWINDOW(pQuery, &w);
H
Haojun Liao 已提交
2089
      if (w.skey > pBlockInfo->window.ekey) {
H
Haojun Liao 已提交
2090 2091 2092
        break;
      }

H
Haojun Liao 已提交
2093 2094
      assert(w.ekey > pBlockInfo->window.ekey);
      if (w.skey <= pBlockInfo->window.ekey && w.skey > pBlockInfo->window.skey) {
H
Haojun Liao 已提交
2095 2096 2097 2098 2099
        return true;
      }
    }
  } else {
    getAlignQueryTimeWindow(pQuery, pBlockInfo->window.ekey, sk, ek, &w);
H
Haojun Liao 已提交
2100 2101 2102
    assert(w.skey <= pBlockInfo->window.ekey);

    if (w.skey > pBlockInfo->window.skey) {
H
Haojun Liao 已提交
2103 2104 2105 2106 2107 2108 2109 2110 2111
      return true;
    }

    while(1) {
      GET_NEXT_TIMEWINDOW(pQuery, &w);
      if (w.ekey < pBlockInfo->window.skey) {
        break;
      }

H
Haojun Liao 已提交
2112 2113
      assert(w.skey < pBlockInfo->window.skey);
      if (w.ekey < pBlockInfo->window.ekey && w.ekey >= pBlockInfo->window.skey) {
H
Haojun Liao 已提交
2114 2115 2116 2117 2118 2119 2120 2121
        return true;
      }
    }
  }

  return false;
}

H
Haojun Liao 已提交
2122
int32_t loadDataBlockOnDemand(SQueryRuntimeEnv *pRuntimeEnv, void* pQueryHandle, SDataBlockInfo* pBlockInfo, SDataStatis **pStatis, SArray** pDataBlock) {
2123
  SQuery *pQuery = pRuntimeEnv->pQuery;
2124

H
Haojun Liao 已提交
2125
  uint32_t status = 0;
H
Haojun Liao 已提交
2126
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf > 0) {
H
Haojun Liao 已提交
2127 2128
    status = BLK_DATA_ALL_NEEDED;
  } else { // check if this data block is required to load
H
Haojun Liao 已提交
2129

H
Haojun Liao 已提交
2130
    // Calculate all time windows that are overlapping or contain current data block.
2131
    // If current data block is contained by all possible time window, do not load current data block.
H
Haojun Liao 已提交
2132 2133
    if (QUERY_IS_INTERVAL_QUERY(pQuery) && overlapWithTimeWindow(pQuery, pBlockInfo)) {
      status = BLK_DATA_ALL_NEEDED;
2134
    }
2135

H
Haojun Liao 已提交
2136 2137 2138 2139 2140 2141 2142 2143
    if (status != BLK_DATA_ALL_NEEDED) {
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        SSqlFuncMsg* pSqlFunc = &pQuery->pSelectExpr[i].base;

        int32_t functionId = pSqlFunc->functionId;
        int32_t colId = pSqlFunc->colInfo.colId;

        status |= aAggs[functionId].dataReqFunc(&pRuntimeEnv->pCtx[i], pBlockInfo->window.skey, pBlockInfo->window.ekey, colId);
H
Haojun Liao 已提交
2144
        if ((status & BLK_DATA_ALL_NEEDED) == BLK_DATA_ALL_NEEDED) {
H
Haojun Liao 已提交
2145 2146 2147
          break;
        }
      }
2148 2149
    }
  }
2150

H
Haojun Liao 已提交
2151 2152 2153
  if (status == BLK_DATA_NO_NEEDED) {
    qDebug("QInfo:%p data block discard, brange:%"PRId64 "-%"PRId64", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
           pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
2154
    pRuntimeEnv->summary.discardBlocks += 1;
H
Haojun Liao 已提交
2155
  } else if (status == BLK_DATA_STATIS_NEEDED) {
H
hjxilinx 已提交
2156
    if (tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
2157
      //        return DISK_DATA_LOAD_FAILED;
2158
    }
2159

2160
    pRuntimeEnv->summary.loadBlockStatis += 1;
2161

2162
    if (*pStatis == NULL) { // data block statistics does not exist, load data block
H
Haojun Liao 已提交
2163
      *pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2164
      pRuntimeEnv->summary.totalCheckedRows += pBlockInfo->rows;
2165 2166
    }
  } else {
H
Haojun Liao 已提交
2167
    assert(status == BLK_DATA_ALL_NEEDED);
2168

2169
    // load the data block statistics to perform further filter
H
Haojun Liao 已提交
2170
    pRuntimeEnv->summary.loadBlockStatis += 1;
H
hjxilinx 已提交
2171
    if (tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis) != TSDB_CODE_SUCCESS) {
2172
    }
2173

H
Haojun Liao 已提交
2174
    if (!needToLoadDataBlock(pRuntimeEnv, *pStatis, pRuntimeEnv->pCtx, pBlockInfo->rows)) {
2175 2176
      // current block has been discard due to filter applied
      pRuntimeEnv->summary.discardBlocks += 1;
H
Haojun Liao 已提交
2177 2178 2179
      qDebug("QInfo:%p data block discard, brange:%"PRId64 "-%"PRId64", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
          pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
      return BLK_DATA_DISCARD;
2180
    }
2181

2182
    pRuntimeEnv->summary.totalCheckedRows += pBlockInfo->rows;
H
Haojun Liao 已提交
2183
    pRuntimeEnv->summary.loadBlocks += 1;
H
Haojun Liao 已提交
2184
    *pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2185
  }
2186

H
Haojun Liao 已提交
2187
  return TSDB_CODE_SUCCESS;
2188 2189
}

H
hjxilinx 已提交
2190
int32_t binarySearchForKey(char *pValue, int num, TSKEY key, int order) {
2191
  int32_t midPos = -1;
H
Haojun Liao 已提交
2192
  int32_t numOfRows;
2193

2194 2195 2196
  if (num <= 0) {
    return -1;
  }
2197

2198
  assert(order == TSDB_ORDER_ASC || order == TSDB_ORDER_DESC);
2199 2200

  TSKEY * keyList = (TSKEY *)pValue;
2201
  int32_t firstPos = 0;
2202
  int32_t lastPos = num - 1;
2203

2204
  if (order == TSDB_ORDER_DESC) {
H
hjxilinx 已提交
2205 2206 2207 2208 2209
    // find the first position which is smaller than the key
    while (1) {
      if (key >= keyList[lastPos]) return lastPos;
      if (key == keyList[firstPos]) return firstPos;
      if (key < keyList[firstPos]) return firstPos - 1;
2210

H
Haojun Liao 已提交
2211 2212
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2213

H
hjxilinx 已提交
2214 2215 2216 2217 2218 2219 2220 2221
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
2222

H
hjxilinx 已提交
2223 2224 2225 2226 2227
  } else {
    // find the first position which is bigger than the key
    while (1) {
      if (key <= keyList[firstPos]) return firstPos;
      if (key == keyList[lastPos]) return lastPos;
2228

H
hjxilinx 已提交
2229 2230 2231 2232 2233 2234 2235
      if (key > keyList[lastPos]) {
        lastPos = lastPos + 1;
        if (lastPos >= num)
          return -1;
        else
          return lastPos;
      }
2236

H
Haojun Liao 已提交
2237 2238
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2239

H
hjxilinx 已提交
2240 2241 2242 2243 2244 2245 2246 2247 2248
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
  }
2249

H
hjxilinx 已提交
2250 2251 2252
  return midPos;
}

2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265
static void ensureOutputBufferSimple(SQueryRuntimeEnv* pRuntimeEnv, int32_t capacity) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  if (capacity < pQuery->rec.capacity) {
    return;
  }

  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    int32_t bytes = pQuery->pSelectExpr[i].bytes;
    assert(bytes > 0 && capacity > 0);

    char *tmp = realloc(pQuery->sdata[i], bytes * capacity + sizeof(tFilePage));
    if (tmp == NULL) {  // todo handle the oom
H
Haojun Liao 已提交
2266
      longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
2267 2268 2269 2270 2271 2272 2273 2274
    } else {
      pQuery->sdata[i] = (tFilePage *)tmp;
    }

    // set the pCtx output buffer position
    pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data;
  }

2275
  qDebug("QInfo:%p realloc output buffer to inc output buffer from: %" PRId64 " rows to:%d rows", GET_QINFO_ADDR(pRuntimeEnv),
2276 2277 2278 2279 2280
         pQuery->rec.capacity, capacity);

  pQuery->rec.capacity = capacity;
}

2281 2282 2283
static void ensureOutputBuffer(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pBlockInfo) {
  // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
  SQuery* pQuery = pRuntimeEnv->pQuery;
2284
  if (!QUERY_IS_INTERVAL_QUERY(pQuery) && !pRuntimeEnv->groupbyNormalCol && !isFixedOutputQuery(pRuntimeEnv) && !isTSCompQuery(pQuery)) {
2285
    SResultRec *pRec = &pQuery->rec;
2286

2287
    if (pQuery->rec.capacity - pQuery->rec.rows < pBlockInfo->rows) {
S
TD-1057  
Shengliang Guan 已提交
2288 2289
      int32_t remain = (int32_t)(pRec->capacity - pRec->rows);
      int32_t newSize = (int32_t)(pRec->capacity + (pBlockInfo->rows - remain));
2290

2291 2292
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        int32_t bytes = pQuery->pSelectExpr[i].bytes;
H
Haojun Liao 已提交
2293 2294
        assert(bytes > 0 && newSize > 0);

2295 2296
        char *tmp = realloc(pQuery->sdata[i], bytes * newSize + sizeof(tFilePage));
        if (tmp == NULL) {  // todo handle the oom
H
Haojun Liao 已提交
2297
          longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
2298
        } else {
2299
          memset(tmp + sizeof(tFilePage) + bytes * pRec->rows, 0, (size_t)((newSize - pRec->rows) * bytes));
2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311
          pQuery->sdata[i] = (tFilePage *)tmp;
        }
        
        // set the pCtx output buffer position
        pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data + pRec->rows * bytes;
        
        int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
        if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
          pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
        }
      }
      
2312
      qDebug("QInfo:%p realloc output buffer, new size: %d rows, old:%" PRId64 ", remain:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv),
2313
             newSize, pRec->capacity, newSize - pRec->rows);
2314

2315 2316 2317 2318 2319
      pRec->capacity = newSize;
    }
  }
}

2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340
static void doSetInitialTimewindow(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pBlockInfo) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  if (QUERY_IS_INTERVAL_QUERY(pQuery) && pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL) {
    STimeWindow w = TSWINDOW_INITIALIZER;
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;

    if (QUERY_IS_ASC_QUERY(pQuery)) {
      getAlignQueryTimeWindow(pQuery, pBlockInfo->window.skey, pBlockInfo->window.skey, pQuery->window.ekey, &w);
      pWindowResInfo->startTime = w.skey;
      pWindowResInfo->prevSKey = w.skey;
    } else {
      // the start position of the first time window in the endpoint that spreads beyond the queried last timestamp
      getAlignQueryTimeWindow(pQuery, pBlockInfo->window.ekey, pQuery->window.ekey, pBlockInfo->window.ekey, &w);

      pWindowResInfo->startTime = pQuery->window.skey;
      pWindowResInfo->prevSKey = w.skey;
    }
  }
}

2341 2342
static int64_t doScanAllDataBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
2343
  STableQueryInfo* pTableQueryInfo = pQuery->current;
H
Haojun Liao 已提交
2344
  SQueryCostInfo*  summary  = &pRuntimeEnv->summary;
2345

2346
  qDebug("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", lastkey:%" PRId64 ", order:%d",
H
hjxilinx 已提交
2347 2348
         GET_QINFO_ADDR(pRuntimeEnv), pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, pTableQueryInfo->lastKey,
         pQuery->order.order);
2349

2350
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
H
Haojun Liao 已提交
2351
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
2352

H
Haojun Liao 已提交
2353
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
2354
  while (tsdbNextDataBlock(pQueryHandle)) {
H
Haojun Liao 已提交
2355
    summary->totalBlocks += 1;
H
Haojun Liao 已提交
2356

H
Haojun Liao 已提交
2357
    if (IS_QUERY_KILLED(GET_QINFO_ADDR(pRuntimeEnv))) {
H
Haojun Liao 已提交
2358
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
2359
    }
2360

H
Haojun Liao 已提交
2361
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
2362
    doSetInitialTimewindow(pRuntimeEnv, &blockInfo);
2363

H
hjxilinx 已提交
2364
    // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
2365
    ensureOutputBuffer(pRuntimeEnv, &blockInfo);
2366

2367
    SDataStatis *pStatis = NULL;
H
Haojun Liao 已提交
2368 2369 2370 2371 2372
    SArray *pDataBlock   = NULL;
    if (loadDataBlockOnDemand(pRuntimeEnv, pQueryHandle, &blockInfo, &pStatis, &pDataBlock) == BLK_DATA_DISCARD) {
      pQuery->current->lastKey = QUERY_IS_ASC_QUERY(pQuery)? blockInfo.window.ekey + step:blockInfo.window.skey + step;
      continue;
    }
2373

2374 2375 2376 2377 2378
    if (terrno != TSDB_CODE_SUCCESS) { // load data block failed, abort query
      longjmp(pRuntimeEnv->env, terrno);
      break;
    }

H
Haojun Liao 已提交
2379 2380
    // query start position can not move into tableApplyFunctionsOnBlock due to limit/offset condition
    pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : blockInfo.rows - 1;
H
hjxilinx 已提交
2381
    int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, binarySearchForKey, pDataBlock);
2382

H
Haojun Liao 已提交
2383
    summary->totalRows += blockInfo.rows;
2384
    qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
2385
           blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes, pQuery->current->lastKey);
2386

2387 2388
    // while the output buffer is full or limit/offset is applied, query may be paused here
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL | QUERY_COMPLETED)) {
H
hjxilinx 已提交
2389
      break;
2390 2391
    }
  }
2392

H
Haojun Liao 已提交
2393 2394 2395 2396
  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }

H
hjxilinx 已提交
2397
  // if the result buffer is not full, set the query complete
2398 2399 2400
  if (!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
2401

H
Haojun Liao 已提交
2402
  if (QUERY_IS_INTERVAL_QUERY(pQuery) && IS_MASTER_SCAN(pRuntimeEnv)) {
H
hjxilinx 已提交
2403
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
2404
      closeAllTimeWindow(&pRuntimeEnv->windowResInfo);
H
hjxilinx 已提交
2405
      pRuntimeEnv->windowResInfo.curIndex = pRuntimeEnv->windowResInfo.size - 1;  // point to the last time window
2406 2407 2408 2409
    } else {
      assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
    }
  }
2410

2411
  return 0;
2412 2413 2414 2415 2416 2417
}

/*
 * set tag value in SQLFunctionCtx
 * e.g.,tag information into input buffer
 */
2418
static void doSetTagValueInParam(void *tsdb, void* pTable, int32_t tagColId, tVariant *tag, int16_t type, int16_t bytes) {
H
[td-90]  
Haojun Liao 已提交
2419
  tVariantDestroy(tag);
2420

2421
  if (tagColId == TSDB_TBNAME_COLUMN_INDEX) {
2422
    char* val = tsdbGetTableName(pTable);
H
[td-90]  
Haojun Liao 已提交
2423
    assert(val != NULL);
2424

H
[td-90]  
Haojun Liao 已提交
2425
    tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), TSDB_DATA_TYPE_BINARY);
2426
  } else {
2427
    char* val = tsdbGetTableTagVal(pTable, tagColId, type, bytes);
H
[td-90]  
Haojun Liao 已提交
2428 2429 2430 2431
    if (val == NULL) {
      tag->nType = TSDB_DATA_TYPE_NULL;
      return;
    }
2432

H
hjxilinx 已提交
2433
    if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
H
Hongze Cheng 已提交
2434
      if (isNull(val, type)) {
H
Haojun Liao 已提交
2435 2436 2437 2438
        tag->nType = TSDB_DATA_TYPE_NULL;
        return;
      }

H
[td-90]  
Haojun Liao 已提交
2439
      tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), type);
H
hjxilinx 已提交
2440
    } else {
H
Haojun Liao 已提交
2441 2442 2443 2444 2445
      if (isNull(val, type)) {
        tag->nType = TSDB_DATA_TYPE_NULL;
        return;
      }

H
[td-90]  
Haojun Liao 已提交
2446
      tVariantCreateFromBinary(tag, val, bytes, type);
H
hjxilinx 已提交
2447
    }
2448
  }
2449 2450
}

2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462
static SColumnInfo* doGetTagColumnInfoById(SColumnInfo* pTagColList, int32_t numOfTags, int16_t colId) {
  assert(pTagColList != NULL && numOfTags > 0);

  for(int32_t i = 0; i < numOfTags; ++i) {
    if (pTagColList[i].colId == colId) {
      return &pTagColList[i];
    }
  }

  return NULL;
}

2463
void setTagVal(SQueryRuntimeEnv *pRuntimeEnv, void *pTable, void *tsdb) {
2464
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
2465
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
2466

H
[td-90]  
Haojun Liao 已提交
2467 2468 2469
  SExprInfo *pExprInfo = &pQuery->pSelectExpr[0];
  if (pQuery->numOfOutput == 1 && pExprInfo->base.functionId == TSDB_FUNC_TS_COMP) {
    assert(pExprInfo->base.numOfParams == 1);
H
Haojun Liao 已提交
2470

S
TD-1057  
Shengliang Guan 已提交
2471
    int16_t tagColId = (int16_t)pExprInfo->base.arg->argValue.i64;
2472
    SColumnInfo* pColInfo = doGetTagColumnInfoById(pQuery->tagColList, pQuery->numOfTags, tagColId);
H
Haojun Liao 已提交
2473

2474
    doSetTagValueInParam(tsdb, pTable, tagColId, &pRuntimeEnv->pCtx[0].tag, pColInfo->type, pColInfo->bytes);
2475 2476
  } else {
    // set tag value, by which the results are aggregated.
2477
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
H
Haojun Liao 已提交
2478
      SExprInfo* pLocalExprInfo = &pQuery->pSelectExpr[idx];
2479

2480
      // ts_comp column required the tag value for join filter
H
Haojun Liao 已提交
2481
      if (!TSDB_COL_IS_TAG(pLocalExprInfo->base.colInfo.flag)) {
2482 2483
        continue;
      }
2484

2485
      // todo use tag column index to optimize performance
2486
      doSetTagValueInParam(tsdb, pTable, pLocalExprInfo->base.colInfo.colId, &pRuntimeEnv->pCtx[idx].tag,
H
Haojun Liao 已提交
2487
                           pLocalExprInfo->type, pLocalExprInfo->bytes);
2488
    }
2489

2490
    // set the join tag for first column
H
[td-90]  
Haojun Liao 已提交
2491
    SSqlFuncMsg *pFuncMsg = &pExprInfo->base;
2492 2493
    if ((pFuncMsg->functionId == TSDB_FUNC_TS || pFuncMsg->functionId == TSDB_FUNC_PRJ) && pRuntimeEnv->pTSBuf != NULL &&
        pFuncMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
2494
      assert(pFuncMsg->numOfParams == 1);
H
Haojun Liao 已提交
2495

S
TD-1057  
Shengliang Guan 已提交
2496
      int16_t tagColId = (int16_t)pExprInfo->base.arg->argValue.i64;
2497
      SColumnInfo* pColInfo = doGetTagColumnInfoById(pQuery->tagColList, pQuery->numOfTags, tagColId);
H
Haojun Liao 已提交
2498

2499
      doSetTagValueInParam(tsdb, pTable, tagColId, &pRuntimeEnv->pCtx[0].tag, pColInfo->type, pColInfo->bytes);
2500
      qDebug("QInfo:%p set tag value for join comparison, colId:%" PRId64 ", val:%"PRId64, pQInfo, pExprInfo->base.arg->argValue.i64,
B
Bomin Zhang 已提交
2501
          pRuntimeEnv->pCtx[0].tag.i64Key)
2502 2503 2504 2505 2506 2507 2508
    }
  }
}

static void doMerge(SQueryRuntimeEnv *pRuntimeEnv, int64_t timestamp, SWindowResult *pWindowRes, bool mergeFlag) {
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
2509

H
Haojun Liao 已提交
2510 2511
  tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes->pos.pageId);

2512
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2513
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2514 2515 2516
    if (!mergeFlag) {
      pCtx[i].aOutputBuf = pCtx[i].aOutputBuf + pCtx[i].outputBytes;
      pCtx[i].currentStage = FIRST_STAGE_MERGE;
2517

2518
      RESET_RESULT_INFO(pCtx[i].resultInfo);
2519 2520
      aAggs[functionId].init(&pCtx[i]);
    }
2521

2522 2523
    pCtx[i].hasNull = true;
    pCtx[i].nStartQueryTimestamp = timestamp;
H
Haojun Liao 已提交
2524
    pCtx[i].aInputElemBuf = getPosInResultPage(pRuntimeEnv, i, pWindowRes, page);
2525

2526 2527 2528
    // in case of tag column, the tag information should be extracted from input buffer
    if (functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TAG) {
      tVariantDestroy(&pCtx[i].tag);
2529

2530 2531 2532 2533 2534 2535
      int32_t type = pCtx[i].outputType;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        tVariantCreateFromBinary(&pCtx[i].tag, varDataVal(pCtx[i].aInputElemBuf), varDataLen(pCtx[i].aInputElemBuf), type);
      } else {
        tVariantCreateFromBinary(&pCtx[i].tag, pCtx[i].aInputElemBuf, pCtx[i].inputBytes, pCtx[i].inputType);
      }
2536

2537 2538
    }
  }
2539

2540
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2541
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2542 2543 2544
    if (functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
2545

2546 2547 2548 2549
    aAggs[functionId].distMergeFunc(&pCtx[i]);
  }
}

2550
static UNUSED_FUNC void printBinaryData(int32_t functionId, char *data, int32_t srcDataType) {
2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_LAST_DST) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_BINARY:
        printf("%" PRId64 ",%s\t", *(TSKEY *)data, (data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int8_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int16_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 ",%" PRId64 "\t", *(TSKEY *)data, *(TSKEY *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int32_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%" PRId64 ",%f\t", *(TSKEY *)data, *(float *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%" PRId64 ",%lf\t", *(TSKEY *)data, *(double *)(data + TSDB_KEYSIZE + 1));
        break;
    }
  } else if (functionId == TSDB_FUNC_AVG) {
    printf("%lf,%d\t", *(double *)data, *(int32_t *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_SPREAD) {
    printf("%lf,%lf\t", *(double *)data, *(double *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_TWA) {
    data += 1;
    printf("%lf,%" PRId64 ",%" PRId64 ",%" PRId64 "\t", *(double *)data, *(int64_t *)(data + 8),
           *(int64_t *)(data + 16), *(int64_t *)(data + 24));
  } else if (functionId == TSDB_FUNC_MIN || functionId == TSDB_FUNC_MAX) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%d\t", *(int8_t *)data);
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%d\t", *(int16_t *)data);
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 "\t", *(int64_t *)data);
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%d\t", *(int *)data);
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%f\t", *(float *)data);
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%f\t", *(float *)data);
        break;
    }
  } else if (functionId == TSDB_FUNC_SUM) {
    if (srcDataType == TSDB_DATA_TYPE_FLOAT || srcDataType == TSDB_DATA_TYPE_DOUBLE) {
      printf("%lf\t", *(float *)data);
    } else {
      printf("%" PRId64 "\t", *(int64_t *)data);
    }
  } else {
    printf("%s\t", data);
  }
}

2619
void UNUSED_FUNC displayInterResult(tFilePage **pdata, SQueryRuntimeEnv* pRuntimeEnv, int32_t numOfRows) {
2620
  SQuery* pQuery = pRuntimeEnv->pQuery;
2621
  int32_t numOfCols = pQuery->numOfOutput;
2622
  printf("super table query intermediate result, total:%d\n", numOfRows);
2623

2624 2625
  for (int32_t j = 0; j < numOfRows; ++j) {
    for (int32_t i = 0; i < numOfCols; ++i) {
2626

2627
      switch (pQuery->pSelectExpr[i].type) {
2628
        case TSDB_DATA_TYPE_BINARY: {
2629
          int32_t type = pQuery->pSelectExpr[i].type;
2630
          printBinaryData(pQuery->pSelectExpr[i].base.functionId, pdata[i]->data + pQuery->pSelectExpr[i].bytes * j,
2631 2632 2633 2634 2635
                          type);
          break;
        }
        case TSDB_DATA_TYPE_TIMESTAMP:
        case TSDB_DATA_TYPE_BIGINT:
2636
          printf("%" PRId64 "\t", *(int64_t *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2637 2638
          break;
        case TSDB_DATA_TYPE_INT:
2639
          printf("%d\t", *(int32_t *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2640 2641
          break;
        case TSDB_DATA_TYPE_FLOAT:
2642
          printf("%f\t", *(float *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2643 2644
          break;
        case TSDB_DATA_TYPE_DOUBLE:
2645
          printf("%lf\t", *(double *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2646 2647 2648 2649 2650 2651 2652 2653
          break;
      }
    }
    printf("\n");
  }
}

typedef struct SCompSupporter {
H
hjxilinx 已提交
2654 2655 2656
  STableQueryInfo **pTableQueryInfo;
  int32_t *         position;
  SQInfo *          pQInfo;
2657 2658 2659 2660 2661
} SCompSupporter;

int32_t tableResultComparFn(const void *pLeft, const void *pRight, void *param) {
  int32_t left = *(int32_t *)pLeft;
  int32_t right = *(int32_t *)pRight;
2662

2663 2664
  SCompSupporter *  supporter = (SCompSupporter *)param;
  SQueryRuntimeEnv *pRuntimeEnv = &supporter->pQInfo->runtimeEnv;
2665

2666 2667
  int32_t leftPos = supporter->position[left];
  int32_t rightPos = supporter->position[right];
2668

2669 2670 2671 2672
  /* left source is exhausted */
  if (leftPos == -1) {
    return 1;
  }
2673

2674 2675 2676 2677
  /* right source is exhausted*/
  if (rightPos == -1) {
    return -1;
  }
2678

H
hjxilinx 已提交
2679
  SWindowResInfo *pWindowResInfo1 = &supporter->pTableQueryInfo[left]->windowResInfo;
2680
  SWindowResult * pWindowRes1 = getWindowResult(pWindowResInfo1, leftPos);
H
Haojun Liao 已提交
2681
  tFilePage *page1 = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes1->pos.pageId);
2682

H
Haojun Liao 已提交
2683
  char *b1 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes1, page1);
2684
  TSKEY leftTimestamp = GET_INT64_VAL(b1);
2685

H
hjxilinx 已提交
2686
  SWindowResInfo *pWindowResInfo2 = &supporter->pTableQueryInfo[right]->windowResInfo;
2687
  SWindowResult * pWindowRes2 = getWindowResult(pWindowResInfo2, rightPos);
H
Haojun Liao 已提交
2688
  tFilePage *page2 = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes2->pos.pageId);
2689

H
Haojun Liao 已提交
2690
  char *b2 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes2, page2);
2691
  TSKEY rightTimestamp = GET_INT64_VAL(b2);
2692

2693 2694 2695
  if (leftTimestamp == rightTimestamp) {
    return 0;
  }
2696

2697 2698 2699
  return leftTimestamp > rightTimestamp ? 1 : -1;
}

2700
int32_t mergeIntoGroupResult(SQInfo *pQInfo) {
H
Haojun Liao 已提交
2701
  int64_t st = taosGetTimestampUs();
2702
  int32_t ret = TSDB_CODE_SUCCESS;
2703

S
TD-1057  
Shengliang Guan 已提交
2704
  int32_t numOfGroups = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
2705

2706
  while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
2707
    SArray *group = GET_TABLEGROUP(pQInfo, pQInfo->groupIndex);
2708
    ret = mergeIntoGroupResultImpl(pQInfo, group);
2709 2710 2711 2712
    if (ret < 0) {  // not enough disk space to save the data into disk
      return -1;
    }

2713
    pQInfo->groupIndex += 1;
2714 2715

    // this group generates at least one result, return results
2716 2717 2718
    if (ret > 0) {
      break;
    }
2719

H
Haojun Liao 已提交
2720
    assert(pQInfo->groupResInfo.numOfDataPages == 0);
2721
    qDebug("QInfo:%p no result in group %d, continue", pQInfo, pQInfo->groupIndex - 1);
2722
  }
2723

H
Haojun Liao 已提交
2724 2725
  SGroupResInfo* info = &pQInfo->groupResInfo;
  if (pQInfo->groupIndex == numOfGroups && info->pos.pageId == info->numOfDataPages) {
H
Haojun Liao 已提交
2726 2727 2728
    SET_STABLE_QUERY_OVER(pQInfo);
  }

H
Haojun Liao 已提交
2729 2730 2731
  int64_t elapsedTime = taosGetTimestampUs() - st;
  qDebug("QInfo:%p merge res data into group, index:%d, total group:%d, elapsed time:%" PRId64 "us", pQInfo,
         pQInfo->groupIndex - 1, numOfGroups, elapsedTime);
2732

H
Haojun Liao 已提交
2733
  pQInfo->runtimeEnv.summary.firstStageMergeTime += elapsedTime;
2734 2735 2736 2737
  return TSDB_CODE_SUCCESS;
}

void copyResToQueryResultBuf(SQInfo *pQInfo, SQuery *pQuery) {
H
Haojun Liao 已提交
2738 2739 2740 2741 2742 2743
  SGroupResInfo* pGroupResInfo = &pQInfo->groupResInfo;

  // all results have been return to client, try next group
  if (pGroupResInfo->pos.pageId == pGroupResInfo->numOfDataPages) {
    pGroupResInfo->numOfDataPages = 0;
    pGroupResInfo->pos.rowId = 0;
2744

2745
    // current results of group has been sent to client, try next group
2746
    if (mergeIntoGroupResult(pQInfo) != TSDB_CODE_SUCCESS) {
2747 2748
      return;  // failed to save data in the disk
    }
2749

2750
    // check if all results has been sent to client
S
TD-1057  
Shengliang Guan 已提交
2751
    int32_t numOfGroup = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
H
Haojun Liao 已提交
2752
    if (pGroupResInfo->numOfDataPages == 0 && pQInfo->groupIndex == numOfGroup) {
H
Haojun Liao 已提交
2753
      SET_STABLE_QUERY_OVER(pQInfo);
2754 2755
      return;
    }
2756
  }
2757 2758

  SQueryRuntimeEnv *   pRuntimeEnv = &pQInfo->runtimeEnv;
2759
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
2760

H
Haojun Liao 已提交
2761 2762
  int32_t id = pQInfo->groupResInfo.groupId;
  SIDList list = getDataBufPagesIdList(pResultBuf, id);
2763

2764
  int32_t offset = 0;
H
Haojun Liao 已提交
2765 2766 2767 2768 2769 2770 2771
  int32_t numOfCopiedRows = 0;

  size_t size = taosArrayGetSize(list);
  assert(size == pGroupResInfo->numOfDataPages);

  bool done = false;
  for (int32_t j = pGroupResInfo->pos.pageId; j < size; ++j) {
H
Haojun Liao 已提交
2772
    SPageInfo* pi = *(SPageInfo**) taosArrayGet(list, j);
H
Haojun Liao 已提交
2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787
    tFilePage* pData = getResBufPage(pResultBuf, pi->pageId);

    assert(pData->num > 0 && pData->num <= pRuntimeEnv->numOfRowsPerPage && pGroupResInfo->pos.rowId < pData->num);
    int32_t numOfRes = pData->num - pGroupResInfo->pos.rowId;

    if (numOfRes > pQuery->rec.capacity - offset) {
      numOfCopiedRows = pQuery->rec.capacity - offset;
      pGroupResInfo->pos.rowId += numOfCopiedRows;
      done = true;
    } else {
      numOfCopiedRows = pData->num;

      pGroupResInfo->pos.pageId += 1;
      pGroupResInfo->pos.rowId = 0;
    }
2788

2789
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2790
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
2791
      char *  pDest = pQuery->sdata[i]->data;
2792

H
Haojun Liao 已提交
2793 2794
      memcpy(pDest + offset * bytes, pData->data + pRuntimeEnv->offset[i] * pRuntimeEnv->numOfRowsPerPage,
          (size_t)bytes * numOfCopiedRows);
2795
    }
2796

H
Haojun Liao 已提交
2797 2798 2799 2800
    offset += numOfCopiedRows;
    if (done) {
      break;
    }
2801
  }
2802

2803
  assert(pQuery->rec.rows == 0);
H
Haojun Liao 已提交
2804
  pQuery->rec.rows += offset;
2805 2806
}

H
Haojun Liao 已提交
2807
int64_t getNumOfResultWindowRes(SQuery *pQuery, SWindowResult *pWindowRes) {
2808
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
2809
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
2810

2811 2812 2813 2814 2815 2816 2817
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
      continue;
    }
2818

2819
    SResultInfo *pResultInfo = &pWindowRes->resultInfo[j];
H
Haojun Liao 已提交
2820
    assert(pResultInfo != NULL);
2821

H
Haojun Liao 已提交
2822 2823
    if (pResultInfo->numOfRes > 0) {
      return pResultInfo->numOfRes;
2824 2825
    }
  }
2826

H
Haojun Liao 已提交
2827
  return 0;
2828 2829
}

2830
int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *pGroup) {
2831
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
2832
  SQuery *          pQuery = pRuntimeEnv->pQuery;
2833

2834
  size_t size = taosArrayGetSize(pGroup);
2835
  tFilePage **buffer = pQuery->sdata;
2836

H
Haojun Liao 已提交
2837
  int32_t *posList = calloc(size, sizeof(int32_t));
H
hjxilinx 已提交
2838
  STableQueryInfo **pTableList = malloc(POINTER_BYTES * size);
2839

2840
  if (pTableList == NULL || posList == NULL) {
S
Shengliang Guan 已提交
2841 2842
    taosTFree(posList);
    taosTFree(pTableList);
2843 2844

    qError("QInfo:%p failed alloc memory", pQInfo);
H
Haojun Liao 已提交
2845
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
2846 2847
  }

2848
  // todo opt for the case of one table per group
2849
  int32_t numOfTables = 0;
H
Haojun Liao 已提交
2850 2851 2852
  SIDList pageList = NULL;
  int32_t tid = -1;

2853
  for (int32_t i = 0; i < size; ++i) {
2854
    STableQueryInfo *item = taosArrayGetP(pGroup, i);
2855

H
Haojun Liao 已提交
2856
    SIDList list = getDataBufPagesIdList(pRuntimeEnv->pResultBuf, TSDB_TABLEID(item->pTable)->tid);
H
Haojun Liao 已提交
2857 2858 2859
    pageList = list;
    tid = TSDB_TABLEID(item->pTable)->tid;

H
Haojun Liao 已提交
2860
    if (taosArrayGetSize(list) > 0 && item->windowResInfo.size > 0) {
H
Haojun Liao 已提交
2861
      pTableList[numOfTables++] = item;
2862 2863
    }
  }
2864

H
Haojun Liao 已提交
2865
  // there is no data in current group
2866
  if (numOfTables == 0) {
S
Shengliang Guan 已提交
2867 2868
    taosTFree(posList);
    taosTFree(pTableList);
2869
    return 0;
H
Haojun Liao 已提交
2870
  } else if (numOfTables == 1) { // no need to merge results since only one table in each group
H
Haojun Liao 已提交
2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881
    taosTFree(posList);
    taosTFree(pTableList);

    SGroupResInfo* pGroupResInfo = &pQInfo->groupResInfo;

    pGroupResInfo->numOfDataPages = taosArrayGetSize(pageList);
    pGroupResInfo->groupId = tid;
    pGroupResInfo->pos.pageId = 0;
    pGroupResInfo->pos.rowId = 0;

    return pGroupResInfo->numOfDataPages;
2882
  }
2883

2884
  SCompSupporter cs = {pTableList, posList, pQInfo};
2885

2886
  SLoserTreeInfo *pTree = NULL;
2887
  tLoserTreeCreate(&pTree, numOfTables, &cs, tableResultComparFn);
2888

2889
  SResultInfo *pResultInfo = calloc(pQuery->numOfOutput, sizeof(SResultInfo));
H
Haojun Liao 已提交
2890 2891 2892 2893
  if (pResultInfo == NULL) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }

H
Haojun Liao 已提交
2894
  char* buf = calloc(1, pRuntimeEnv->interBufSize);
H
Haojun Liao 已提交
2895 2896 2897 2898
  if (buf == NULL) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }

H
Haojun Liao 已提交
2899
  setWindowResultInfo(pResultInfo, pQuery, pRuntimeEnv->stableQuery, buf);
2900
  resetMergeResultBuf(pQuery, pRuntimeEnv->pCtx, pResultInfo);
2901

H
Haojun Liao 已提交
2902 2903
  pQInfo->groupResInfo.groupId = getGroupResultId(pQInfo->groupIndex);

H
Haojun Liao 已提交
2904
  // todo add windowRes iterator
2905 2906
  int64_t lastTimestamp = -1;
  int64_t startt = taosGetTimestampMs();
2907

2908
  while (1) {
2909 2910
    if (IS_QUERY_KILLED(pQInfo)) {
      qDebug("QInfo:%p it is already killed, abort", pQInfo);
H
Haojun Liao 已提交
2911 2912 2913 2914 2915 2916 2917

      taosTFree(pTableList);
      taosTFree(posList);
      taosTFree(pTree);
      taosTFree(pResultInfo);
      taosTFree(buf);

2918 2919 2920
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
    }

2921
    int32_t pos = pTree->pNode[0].index;
2922

H
hjxilinx 已提交
2923
    SWindowResInfo *pWindowResInfo = &pTableList[pos]->windowResInfo;
H
Haojun Liao 已提交
2924
    SWindowResult  *pWindowRes = getWindowResult(pWindowResInfo, cs.position[pos]);
H
Haojun Liao 已提交
2925
    tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes->pos.pageId);
2926

H
Haojun Liao 已提交
2927
    char *b = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes, page);
2928
    TSKEY ts = GET_INT64_VAL(b);
2929

2930
    assert(ts == pWindowRes->skey);
H
Haojun Liao 已提交
2931
    int64_t num = getNumOfResultWindowRes(pQuery, pWindowRes);
2932 2933
    if (num <= 0) {
      cs.position[pos] += 1;
2934

2935 2936
      if (cs.position[pos] >= pWindowResInfo->size) {
        cs.position[pos] = -1;
2937

2938
        // all input sources are exhausted
2939
        if (--numOfTables == 0) {
2940 2941 2942 2943 2944 2945 2946
          break;
        }
      }
    } else {
      if (ts == lastTimestamp) {  // merge with the last one
        doMerge(pRuntimeEnv, ts, pWindowRes, true);
      } else {  // copy data to disk buffer
2947
        if (buffer[0]->num == pQuery->rec.capacity) {
H
Haojun Liao 已提交
2948
          if (flushFromResultBuf(pRuntimeEnv, &pQInfo->groupResInfo) != TSDB_CODE_SUCCESS) {
2949 2950
            return -1;
          }
2951

2952 2953
          resetMergeResultBuf(pQuery, pRuntimeEnv->pCtx, pResultInfo);
        }
2954

2955
        doMerge(pRuntimeEnv, ts, pWindowRes, false);
2956
        buffer[0]->num += 1;
2957
      }
2958

2959
      lastTimestamp = ts;
2960

H
Haojun Liao 已提交
2961 2962 2963
      // move to the next element of current entry
      int32_t currentPageId = pWindowRes->pos.pageId;

2964 2965 2966
      cs.position[pos] += 1;
      if (cs.position[pos] >= pWindowResInfo->size) {
        cs.position[pos] = -1;
2967

2968
        // all input sources are exhausted
2969
        if (--numOfTables == 0) {
2970 2971
          break;
        }
H
Haojun Liao 已提交
2972 2973 2974 2975 2976 2977
      } else {
        // current page is not needed anymore
        SWindowResult  *pNextWindowRes = getWindowResult(pWindowResInfo, cs.position[pos]);
        if (pNextWindowRes->pos.pageId != currentPageId) {
          releaseResBufPage(pRuntimeEnv->pResultBuf, page);
        }
2978 2979
      }
    }
2980

2981 2982
    tLoserTreeAdjust(pTree, pos + pTree->numOfEntries);
  }
2983

2984
  if (buffer[0]->num != 0) {  // there are data in buffer
H
Haojun Liao 已提交
2985
    if (flushFromResultBuf(pRuntimeEnv, &pQInfo->groupResInfo) != TSDB_CODE_SUCCESS) {
S
slguan 已提交
2986
      qError("QInfo:%p failed to flush data into temp file, abort query", pQInfo);
2987

S
Shengliang Guan 已提交
2988 2989 2990 2991
      taosTFree(pTree);
      taosTFree(pTableList);
      taosTFree(posList);
      taosTFree(pResultInfo);
2992

2993 2994 2995
      return -1;
    }
  }
2996

2997 2998 2999
  int64_t endt = taosGetTimestampMs();

#ifdef _DEBUG_VIEW
3000
  displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
3001
#endif
3002

3003
  qDebug("QInfo:%p result merge completed for group:%d, elapsed time:%" PRId64 " ms", pQInfo, pQInfo->groupIndex, endt - startt);
3004

S
Shengliang Guan 已提交
3005 3006 3007
  taosTFree(pTableList);
  taosTFree(posList);
  taosTFree(pTree);
3008

S
Shengliang Guan 已提交
3009 3010
  taosTFree(pResultInfo);
  taosTFree(buf);
H
Haojun Liao 已提交
3011 3012

  return pQInfo->groupResInfo.numOfDataPages;
3013 3014
}

H
Haojun Liao 已提交
3015 3016
int32_t flushFromResultBuf(SQueryRuntimeEnv* pRuntimeEnv, SGroupResInfo* pGroupResInfo) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3017

3018
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
3019

3020 3021
  // the base value for group result, since the maximum number of table for each vnode will not exceed 100,000.
  int32_t pageId = -1;
3022
  int32_t capacity = pResultBuf->numOfRowsPerPage;
3023

H
Haojun Liao 已提交
3024
  int32_t remain = (int32_t) pQuery->sdata[0]->num;
3025
  int32_t offset = 0;
3026

3027
  while (remain > 0) {
H
Haojun Liao 已提交
3028 3029
    int32_t rows = (remain > capacity)? capacity:remain;
    assert(rows > 0);
3030

H
Haojun Liao 已提交
3031 3032 3033
    // get the output buffer page
    tFilePage *buf = getNewDataBuf(pResultBuf, pGroupResInfo->groupId, &pageId);
    buf->num = rows;
3034

H
Haojun Liao 已提交
3035
    // pagewisely copy to dest buffer
3036
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3037
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
3038

H
Haojun Liao 已提交
3039 3040 3041
      char* output = buf->data + pRuntimeEnv->offset[i] * pRuntimeEnv->numOfRowsPerPage;
      char* src = ((char *) pQuery->sdata[i]->data) + offset * bytes;
      memcpy(output, src, buf->num * bytes);
3042
    }
3043

H
Haojun Liao 已提交
3044 3045 3046 3047
    offset += rows;
    remain -= rows;

    pGroupResInfo->numOfDataPages += 1;
3048
  }
3049

3050 3051 3052 3053
  return TSDB_CODE_SUCCESS;
}

void resetMergeResultBuf(SQuery *pQuery, SQLFunctionCtx *pCtx, SResultInfo *pResultInfo) {
3054
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
3055
    pCtx[k].aOutputBuf = pQuery->sdata[k]->data - pCtx[k].outputBytes;
3056 3057 3058
    pCtx[k].size = 1;
    pCtx[k].startOffset = 0;
    pCtx[k].resultInfo = &pResultInfo[k];
3059

3060
    pQuery->sdata[k]->num = 0;
3061 3062 3063
  }
}

3064 3065 3066 3067
static void updateTableQueryInfoForReverseScan(SQuery *pQuery, STableQueryInfo *pTableQueryInfo) {
  if (pTableQueryInfo == NULL) {
    return;
  }
3068

H
Haojun Liao 已提交
3069
  // order has changed already
3070
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
3071

H
Haojun Liao 已提交
3072 3073 3074 3075 3076 3077
  // TODO validate the assertion
//  if (!QUERY_IS_ASC_QUERY(pQuery)) {
//    assert(pTableQueryInfo->win.ekey >= pTableQueryInfo->lastKey + step);
//  } else {
//    assert(pTableQueryInfo->win.ekey <= pTableQueryInfo->lastKey + step);
//  }
H
Haojun Liao 已提交
3078 3079 3080 3081 3082 3083 3084

  if (pTableQueryInfo->lastKey == pTableQueryInfo->win.skey) {
    // do nothing, no results
  } else {
    pTableQueryInfo->win.ekey = pTableQueryInfo->lastKey + step;
  }

3085 3086
  SWAP(pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, TSKEY);
  pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
3087

3088 3089
  SWITCH_ORDER(pTableQueryInfo->cur.order);
  pTableQueryInfo->cur.vgroupIndex = -1;
H
Haojun Liao 已提交
3090 3091 3092

  // set the index at the end of time window
  pTableQueryInfo->windowResInfo.curIndex = pTableQueryInfo->windowResInfo.size - 1;
3093 3094 3095 3096
}

static void disableFuncInReverseScanImpl(SQInfo* pQInfo, SWindowResInfo *pWindowResInfo, int32_t order) {
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
3097

3098
  for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
3099 3100
    bool closed = getTimeWindowResStatus(pWindowResInfo, i);
    if (!closed) {
3101 3102
      continue;
    }
3103

3104
    SWindowResult *buf = getWindowResult(pWindowResInfo, i);
3105

3106
    // open/close the specified query for each group result
3107
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3108
      int32_t functId = pQuery->pSelectExpr[j].base.functionId;
3109

3110 3111
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
3112 3113 3114 3115 3116 3117 3118 3119
        buf->resultInfo[j].complete = false;
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
        buf->resultInfo[j].complete = true;
      }
    }
  }
}

3120 3121
void disableFuncInReverseScan(SQInfo *pQInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
3122
  SQuery *pQuery = pRuntimeEnv->pQuery;
3123
  int32_t order = pQuery->order.order;
3124

3125 3126
  // group by normal columns and interval query on normal table
  SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
3127
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3128
    disableFuncInReverseScanImpl(pQInfo, pWindowResInfo, order);
3129
  } else {  // for simple result of table query,
3130
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {  // todo refactor
3131
      int32_t functId = pQuery->pSelectExpr[j].base.functionId;
3132

3133
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[j];
3134 3135 3136
      if (pCtx->resultInfo == NULL) {
        continue; // resultInfo is NULL, means no data checked in previous scan
      }
3137

3138 3139
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
3140 3141 3142 3143 3144 3145
        pCtx->resultInfo->complete = false;
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
        pCtx->resultInfo->complete = true;
      }
    }
  }
H
Haojun Liao 已提交
3146 3147 3148 3149
}

static void setupQueryRangeForReverseScan(SQInfo* pQInfo) {
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
S
Shengliang Guan 已提交
3150
  int32_t numOfGroups = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
H
Haojun Liao 已提交
3151

H
hjxilinx 已提交
3152
  for(int32_t i = 0; i < numOfGroups; ++i) {
H
Haojun Liao 已提交
3153
    SArray *group = GET_TABLEGROUP(pQInfo, i);
H
Haojun Liao 已提交
3154 3155
    SArray *tableKeyGroup = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, i);

H
hjxilinx 已提交
3156 3157
    size_t t = taosArrayGetSize(group);
    for (int32_t j = 0; j < t; ++j) {
3158 3159
      STableQueryInfo *pCheckInfo = taosArrayGetP(group, j);
      updateTableQueryInfoForReverseScan(pQuery, pCheckInfo);
H
Haojun Liao 已提交
3160

H
Haojun Liao 已提交
3161 3162
      // update the last key in tableKeyInfo list, the tableKeyInfo is used to build the tsdbQueryHandle and decide
      // the start check timestamp of tsdbQueryHandle
H
Haojun Liao 已提交
3163 3164 3165 3166
      STableKeyInfo *pTableKeyInfo = taosArrayGet(tableKeyGroup, j);
      pTableKeyInfo->lastKey = pCheckInfo->lastKey;

      assert(pCheckInfo->pTable == pTableKeyInfo->pTable);
H
hjxilinx 已提交
3167 3168
    }
  }
3169 3170
}

3171
void switchCtxOrder(SQueryRuntimeEnv *pRuntimeEnv) {
3172
  SQuery *pQuery = pRuntimeEnv->pQuery;
3173
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
3174
    SWITCH_ORDER(pRuntimeEnv->pCtx[i].order);
3175 3176 3177
  }
}

H
Haojun Liao 已提交
3178
int32_t createQueryResultInfo(SQuery *pQuery, SWindowResult *pResultRow, bool isSTableQuery, size_t interBufSize) {
3179
  int32_t numOfCols = pQuery->numOfOutput;
3180

H
Haojun Liao 已提交
3181 3182
  size_t size = numOfCols * sizeof(SResultInfo) + interBufSize;
  pResultRow->resultInfo = calloc(1, size);
B
Bomin Zhang 已提交
3183 3184 3185
  if (pResultRow->resultInfo == NULL) {
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
  }
3186

H
Haojun Liao 已提交
3187
  pResultRow->pos = (SPosInfo) {-1, -1};
3188

H
Haojun Liao 已提交
3189
  char* buf = (char*) pResultRow->resultInfo + numOfCols * sizeof(SResultInfo);
H
Haojun Liao 已提交
3190

3191
  // set the intermediate result output buffer
H
Haojun Liao 已提交
3192
  setWindowResultInfo(pResultRow->resultInfo, pQuery, isSTableQuery, buf);
B
Bomin Zhang 已提交
3193
  return TSDB_CODE_SUCCESS;
3194 3195 3196 3197
}

void resetCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3198

3199
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3200 3201
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
    pCtx->aOutputBuf = pQuery->sdata[i]->data;
3202

3203 3204 3205 3206
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT/TAGPRJ/PRJ/TAG etc.
     */
3207
    RESET_RESULT_INFO(&pRuntimeEnv->resultInfo[i]);
3208
    pCtx->resultInfo = &pRuntimeEnv->resultInfo[i];
3209

3210
    // set the timestamp output buffer for top/bottom/diff query
3211
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3212 3213 3214
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3215

3216
    memset(pQuery->sdata[i]->data, 0, (size_t)pQuery->pSelectExpr[i].bytes * pQuery->rec.capacity);
3217
  }
3218

3219 3220 3221 3222 3223
  initCtxOutputBuf(pRuntimeEnv);
}

void forwardCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, int64_t output) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3224

3225
  // reset the execution contexts
3226
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3227
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
3228
    assert(functionId != TSDB_FUNC_DIFF);
3229

3230 3231 3232 3233
    // set next output position
    if (IS_OUTER_FORWARD(aAggs[functionId].nStatus)) {
      pRuntimeEnv->pCtx[j].aOutputBuf += pRuntimeEnv->pCtx[j].outputBytes * output;
    }
3234

3235 3236 3237 3238 3239 3240 3241 3242
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      /*
       * NOTE: for top/bottom query, the value of first column of output (timestamp) are assigned
       * in the procedure of top/bottom routine
       * the output buffer in top/bottom routine is ptsOutputBuf, so we need to forward the output buffer
       *
       * diff function is handled in multi-output function
       */
S
TD-1057  
Shengliang Guan 已提交
3243
      pRuntimeEnv->pCtx[j].ptsOutputBuf = (char*)pRuntimeEnv->pCtx[j].ptsOutputBuf + TSDB_KEYSIZE * output;
3244
    }
3245

3246
    RESET_RESULT_INFO(pRuntimeEnv->pCtx[j].resultInfo);
3247 3248 3249 3250 3251
  }
}

void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3252

3253
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3254
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
3255
    pRuntimeEnv->pCtx[j].currentStage = 0;
3256

H
Haojun Liao 已提交
3257 3258 3259 3260
    SResultInfo* pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
    if (pResInfo->initialized) {
      continue;
    }
3261

3262 3263 3264 3265
    aAggs[functionId].init(&pRuntimeEnv->pCtx[j]);
  }
}

3266
void skipResults(SQueryRuntimeEnv *pRuntimeEnv) {
3267
  SQuery *pQuery = pRuntimeEnv->pQuery;
3268
  if (pQuery->rec.rows == 0 || pQuery->limit.offset == 0) {
3269 3270
    return;
  }
3271

3272
  if (pQuery->rec.rows <= pQuery->limit.offset) {
3273
    qDebug("QInfo:%p skip rows:%" PRId64 ", new offset:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), pQuery->rec.rows,
3274
        pQuery->limit.offset - pQuery->rec.rows);
3275

3276 3277
    pQuery->limit.offset -= pQuery->rec.rows;
    pQuery->rec.rows = 0;
3278

3279
    resetCtxOutputBuf(pRuntimeEnv);
3280

H
Haojun Liao 已提交
3281
    // clear the buffer full flag if exists
3282
    CLEAR_QUERY_STATUS(pQuery, QUERY_RESBUF_FULL);
3283
  } else {
3284
    int64_t numOfSkip = pQuery->limit.offset;
3285
    pQuery->rec.rows -= numOfSkip;
3286
    pQuery->limit.offset = 0;
3287

3288
    qDebug("QInfo:%p skip row:%"PRId64", new offset:%d, numOfRows remain:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), numOfSkip,
3289
           0, pQuery->rec.rows);
3290

3291
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3292
      int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3293
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
3294
      
3295
      memmove(pQuery->sdata[i]->data, (char*)pQuery->sdata[i]->data + bytes * numOfSkip, (size_t)(pQuery->rec.rows * bytes));
H
Haojun Liao 已提交
3296
      pRuntimeEnv->pCtx[i].aOutputBuf = ((char*) pQuery->sdata[i]->data) + pQuery->rec.rows * bytes;
3297

3298
      if (functionId == TSDB_FUNC_DIFF || functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
3299
        pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
3300 3301
      }
    }
3302

S
TD-1057  
Shengliang Guan 已提交
3303
    updateNumOfResult(pRuntimeEnv, (int32_t)pQuery->rec.rows);
3304 3305 3306 3307 3308 3309 3310 3311
  }
}

void setQueryStatus(SQuery *pQuery, int8_t status) {
  if (status == QUERY_NOT_COMPLETED) {
    pQuery->status = status;
  } else {
    // QUERY_NOT_COMPLETED is not compatible with any other status, so clear its position first
3312
    CLEAR_QUERY_STATUS(pQuery, QUERY_NOT_COMPLETED);
3313 3314 3315 3316 3317 3318
    pQuery->status |= status;
  }
}

bool needScanDataBlocksAgain(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3319

H
hjxilinx 已提交
3320
  bool toContinue = false;
H
Haojun Liao 已提交
3321
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3322 3323
    // for each group result, call the finalize function for each column
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
3324

3325 3326
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *pResult = getWindowResult(pWindowResInfo, i);
3327
      if (!pResult->closed) {
3328 3329
        continue;
      }
3330

3331
      setWindowResOutputBuf(pRuntimeEnv, pResult);
3332

3333
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3334
        int16_t functId = pQuery->pSelectExpr[j].base.functionId;
3335 3336 3337
        if (functId == TSDB_FUNC_TS) {
          continue;
        }
3338

3339 3340
        aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
        SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3341

3342 3343 3344 3345
        toContinue |= (!pResInfo->complete);
      }
    }
  } else {
3346
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3347
      int16_t functId = pQuery->pSelectExpr[j].base.functionId;
3348 3349 3350
      if (functId == TSDB_FUNC_TS) {
        continue;
      }
3351

3352 3353
      aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
      SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3354

3355 3356 3357
      toContinue |= (!pResInfo->complete);
    }
  }
3358

3359 3360 3361
  return toContinue;
}

H
Haojun Liao 已提交
3362
static SQueryStatusInfo getQueryStatusInfo(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
3363
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3364
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3365

H
Haojun Liao 已提交
3366 3367
  assert((start <= pTableQueryInfo->lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (start >= pTableQueryInfo->lastKey && !QUERY_IS_ASC_QUERY(pQuery)));
3368

3369
  SQueryStatusInfo info = {
H
hjxilinx 已提交
3370
      .status      = pQuery->status,
3371
      .windowIndex = pRuntimeEnv->windowResInfo.curIndex,
H
Haojun Liao 已提交
3372
      .lastKey     = start,
3373
  };
3374

S
TD-1057  
Shengliang Guan 已提交
3375 3376 3377 3378 3379
  TIME_WINDOW_COPY(info.w, pQuery->window);
  TIME_WINDOW_COPY(info.curWindow, pTableQueryInfo->win);

  info.curWindow.skey = start;

3380 3381 3382
  return info;
}

3383 3384 3385 3386
static void setEnvBeforeReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQInfo *pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery *pQuery = pRuntimeEnv->pQuery;

3387 3388 3389
  pStatus->cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);  // save the cursor
  if (pRuntimeEnv->pTSBuf) {
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
3390 3391
    bool ret = tsBufNextPos(pRuntimeEnv->pTSBuf);
    assert(ret);
3392
  }
3393

3394
  // reverse order time range
3395 3396 3397
  pQuery->window = pStatus->curWindow;
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);

3398
  SWITCH_ORDER(pQuery->order.order);
3399 3400 3401 3402 3403 3404 3405

  if (QUERY_IS_ASC_QUERY(pQuery)) {
    assert(pQuery->window.skey <= pQuery->window.ekey);
  } else {
    assert(pQuery->window.skey >= pQuery->window.ekey);
  }

3406
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
3407

3408
  STsdbQueryCond cond = {
H
hjxilinx 已提交
3409
      .order   = pQuery->order.order,
3410
      .colList = pQuery->colList,
3411 3412
      .numOfCols = pQuery->numOfCols,
  };
3413

S
TD-1057  
Shengliang Guan 已提交
3414 3415
  TIME_WINDOW_COPY(cond.twindow, pQuery->window);

H
Haojun Liao 已提交
3416 3417 3418 3419 3420
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
  disableFuncInReverseScan(pQInfo);
  setupQueryRangeForReverseScan(pQInfo);

3421 3422 3423 3424
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
3425

3426
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
B
Bomin Zhang 已提交
3427 3428 3429
  if (pRuntimeEnv->pSecQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
3430 3431
}

3432 3433
static void clearEnvAfterReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3434
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3435

3436 3437
  SWITCH_ORDER(pQuery->order.order);
  switchCtxOrder(pRuntimeEnv);
3438

3439 3440 3441 3442
  tsBufSetCursor(pRuntimeEnv->pTSBuf, &pStatus->cur);
  if (pRuntimeEnv->pTSBuf) {
    pRuntimeEnv->pTSBuf->cur.order = pQuery->order.order;
  }
3443

3444
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
3445

3446
  // update the pQuery->window.skey and pQuery->window.ekey to limit the scan scope of sliding query during reverse scan
H
hjxilinx 已提交
3447
  pTableQueryInfo->lastKey = pStatus->lastKey;
3448
  pQuery->status = pStatus->status;
3449

H
hjxilinx 已提交
3450
  pTableQueryInfo->win = pStatus->w;
3451
  pQuery->window = pTableQueryInfo->win;
3452 3453
}

H
Haojun Liao 已提交
3454 3455 3456 3457 3458 3459 3460
static void restoreTimeWindow(STableGroupInfo* pTableGroupInfo, STsdbQueryCond* pCond) {
  assert(pTableGroupInfo->numOfTables == 1);
  SArray* pTableKeyGroup = taosArrayGetP(pTableGroupInfo->pGroupList, 0);
  STableKeyInfo* pKeyInfo = taosArrayGet(pTableKeyGroup, 0);
  pKeyInfo->lastKey = pCond->twindow.skey;
}

3461
void scanOneTableDataBlocks(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
H
hjxilinx 已提交
3462
  SQInfo *pQInfo = (SQInfo *) GET_QINFO_ADDR(pRuntimeEnv);
3463
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3464
  STableQueryInfo *pTableQueryInfo = pQuery->current;
3465

3466
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
3467

3468
  // store the start query position
H
Haojun Liao 已提交
3469
  SQueryStatusInfo qstatus = getQueryStatusInfo(pRuntimeEnv, start);
3470

3471 3472
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
3473

3474 3475
  while (1) {
    doScanAllDataBlocks(pRuntimeEnv);
3476

3477 3478
    if (pRuntimeEnv->scanFlag == MASTER_SCAN) {
      qstatus.status = pQuery->status;
3479 3480 3481 3482 3483 3484

      // do nothing if no data blocks are found qualified during scan
      if (qstatus.lastKey != pTableQueryInfo->lastKey) {
        qstatus.curWindow.ekey = pTableQueryInfo->lastKey - step;
      }

3485
      qstatus.lastKey = pTableQueryInfo->lastKey;
3486
    }
3487

3488
    if (!needScanDataBlocksAgain(pRuntimeEnv)) {
3489
      // restore the status code and jump out of loop
3490
      if (pRuntimeEnv->scanFlag == REPEAT_SCAN) {
3491
        pQuery->status = qstatus.status;
3492
      }
3493

3494 3495
      break;
    }
3496

3497
    STsdbQueryCond cond = {
H
hjxilinx 已提交
3498
        .order   = pQuery->order.order,
3499
        .colList = pQuery->colList,
3500
        .numOfCols = pQuery->numOfCols,
3501
    };
3502

S
TD-1057  
Shengliang Guan 已提交
3503 3504
    TIME_WINDOW_COPY(cond.twindow, qstatus.curWindow);

3505 3506
    if (pRuntimeEnv->pSecQueryHandle != NULL) {
      tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
3507
    }
3508

H
Haojun Liao 已提交
3509
    restoreTimeWindow(&pQInfo->tableGroupInfo, &cond);
3510
    pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
B
Bomin Zhang 已提交
3511 3512 3513
    if (pRuntimeEnv->pSecQueryHandle == NULL) {
      longjmp(pRuntimeEnv->env, terrno);
    }
3514

3515
    pRuntimeEnv->windowResInfo.curIndex = qstatus.windowIndex;
3516 3517
    setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
    pRuntimeEnv->scanFlag = REPEAT_SCAN;
3518

3519
    qDebug("QInfo:%p start to repeat scan data blocks due to query func required, qrange:%"PRId64"-%"PRId64, pQInfo,
3520
        cond.twindow.skey, cond.twindow.ekey);
3521

3522
    // check if query is killed or not
H
Haojun Liao 已提交
3523
    if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
3524 3525
      finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
3526 3527
    }
  }
3528

H
hjxilinx 已提交
3529
  if (!needReverseScan(pQuery)) {
3530 3531
    return;
  }
3532

3533
  setEnvBeforeReverseScan(pRuntimeEnv, &qstatus);
3534

3535
  // reverse scan from current position
3536
  qDebug("QInfo:%p start to reverse scan", pQInfo);
3537
  doScanAllDataBlocks(pRuntimeEnv);
3538 3539

  clearEnvAfterReverseScan(pRuntimeEnv, &qstatus);
3540 3541
}

H
hjxilinx 已提交
3542
void finalizeQueryResult(SQueryRuntimeEnv *pRuntimeEnv) {
3543
  SQuery *pQuery = pRuntimeEnv->pQuery;
3544

H
Haojun Liao 已提交
3545
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3546 3547
    // for each group result, call the finalize function for each column
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
3548
    if (pRuntimeEnv->groupbyNormalCol) {
3549 3550
      closeAllTimeWindow(pWindowResInfo);
    }
3551

3552 3553 3554 3555 3556
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *buf = &pWindowResInfo->pResult[i];
      if (!isWindowResClosed(pWindowResInfo, i)) {
        continue;
      }
3557

3558
      setWindowResOutputBuf(pRuntimeEnv, buf);
3559

3560
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3561
        aAggs[pQuery->pSelectExpr[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3562
      }
3563

3564 3565 3566 3567
      /*
       * set the number of output results for group by normal columns, the number of output rows usually is 1 except
       * the top and bottom query
       */
S
TD-1057  
Shengliang Guan 已提交
3568
      buf->numOfRows = (uint16_t)getNumOfResult(pRuntimeEnv);
3569
    }
3570

3571
  } else {
3572
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3573
      aAggs[pQuery->pSelectExpr[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3574 3575 3576 3577 3578
    }
  }
}

static bool hasMainOutput(SQuery *pQuery) {
3579
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3580
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3581

3582 3583 3584 3585
    if (functionId != TSDB_FUNC_TS && functionId != TSDB_FUNC_TAG && functionId != TSDB_FUNC_TAGPRJ) {
      return true;
    }
  }
3586

3587 3588 3589
  return false;
}

H
Haojun Liao 已提交
3590
static STableQueryInfo *createTableQueryInfo(SQueryRuntimeEnv *pRuntimeEnv, void* pTable, STimeWindow win, void* buf) {
H
Haojun Liao 已提交
3591
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
3592

H
Haojun Liao 已提交
3593
  STableQueryInfo *pTableQueryInfo = buf;
3594

H
hjxilinx 已提交
3595 3596
  pTableQueryInfo->win = win;
  pTableQueryInfo->lastKey = win.skey;
3597

3598
  pTableQueryInfo->pTable = pTable;
3599
  pTableQueryInfo->cur.vgroupIndex = -1;
3600

H
Haojun Liao 已提交
3601 3602
  // set more initial size of interval/groupby query
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyNormalCol) {
H
Haojun Liao 已提交
3603
    int32_t initialSize = 16;
H
Haojun Liao 已提交
3604
    int32_t initialThreshold = 100;
B
Bomin Zhang 已提交
3605 3606 3607 3608
    int32_t code = initWindowResInfo(&pTableQueryInfo->windowResInfo, pRuntimeEnv, initialSize, initialThreshold, TSDB_DATA_TYPE_INT);
    if (code != TSDB_CODE_SUCCESS) {
      return NULL;
    }
H
Haojun Liao 已提交
3609
  } else { // in other aggregate query, do not initialize the windowResInfo
H
Haojun Liao 已提交
3610 3611
  }

3612 3613 3614
  return pTableQueryInfo;
}

H
Haojun Liao 已提交
3615
void destroyTableQueryInfo(STableQueryInfo *pTableQueryInfo) {
3616 3617 3618
  if (pTableQueryInfo == NULL) {
    return;
  }
3619

H
Haojun Liao 已提交
3620
  cleanupTimeWindowInfo(&pTableQueryInfo->windowResInfo);
3621 3622 3623 3624 3625
}

/**
 * set output buffer for different group
 * @param pRuntimeEnv
3626
 * @param pDataBlockInfo
3627
 */
H
Haojun Liao 已提交
3628
void setExecutionContext(SQInfo *pQInfo, int32_t groupIndex, TSKEY nextKey) {
3629
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
Haojun Liao 已提交
3630 3631 3632
  STableQueryInfo  *pTableQueryInfo = pRuntimeEnv->pQuery->current;
  SWindowResInfo   *pWindowResInfo = &pRuntimeEnv->windowResInfo;

H
Haojun Liao 已提交
3633 3634
  // lastKey needs to be updated
  pTableQueryInfo->lastKey = nextKey;
H
Haojun Liao 已提交
3635 3636 3637 3638

  if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
    setAdditionalInfo(pQInfo, pTableQueryInfo->pTable, pTableQueryInfo);
  }
H
Haojun Liao 已提交
3639

H
Haojun Liao 已提交
3640 3641 3642
  if (pRuntimeEnv->prevGroupId != INT32_MIN && pRuntimeEnv->prevGroupId == groupIndex) {
    return;
  }
3643

3644 3645
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&groupIndex,
      sizeof(groupIndex), true);
3646 3647 3648
  if (pWindowRes == NULL) {
    return;
  }
3649

3650 3651 3652 3653 3654
  /*
   * not assign result buffer yet, add new result buffer
   * all group belong to one result set, and each group result has different group id so set the id to be one
   */
  if (pWindowRes->pos.pageId == -1) {
3655
    if (addNewWindowResultBuf(pWindowRes, pRuntimeEnv->pResultBuf, groupIndex, pRuntimeEnv->numOfRowsPerPage) !=
3656 3657 3658 3659
        TSDB_CODE_SUCCESS) {
      return;
    }
  }
3660

H
Haojun Liao 已提交
3661 3662
  // record the current active group id
  pRuntimeEnv->prevGroupId = groupIndex;
3663 3664 3665 3666
  setWindowResOutputBuf(pRuntimeEnv, pWindowRes);
  initCtxOutputBuf(pRuntimeEnv);
}

H
Haojun Liao 已提交
3667
void setWindowResOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult) {
3668
  SQuery *pQuery = pRuntimeEnv->pQuery;
3669

3670
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
H
Haojun Liao 已提交
3671 3672
  tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pos.pageId);

3673
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3674
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
H
Haojun Liao 已提交
3675
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult, page);
3676

3677
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3678 3679 3680
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3681

3682 3683 3684 3685 3686
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT
     */
    pCtx->resultInfo = &pResult->resultInfo[i];
3687

3688 3689 3690 3691 3692 3693
    // set super table query flag
    SResultInfo *pResInfo = GET_RES_INFO(pCtx);
    pResInfo->superTableQ = pRuntimeEnv->stableQuery;
  }
}

H
Haojun Liao 已提交
3694 3695
void setWindowResOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3696

H
Haojun Liao 已提交
3697
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
H
Haojun Liao 已提交
3698 3699
  tFilePage* bufPage = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pos.pageId);

H
Haojun Liao 已提交
3700 3701 3702 3703
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];

    pCtx->resultInfo = &pResult->resultInfo[i];
H
Haojun Liao 已提交
3704
    if (pCtx->resultInfo->initialized && pCtx->resultInfo->complete) {
H
Haojun Liao 已提交
3705 3706
      continue;
    }
3707

H
Haojun Liao 已提交
3708
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult, bufPage);
H
Haojun Liao 已提交
3709
    pCtx->currentStage = 0;
3710

H
Haojun Liao 已提交
3711 3712 3713 3714
    int32_t functionId = pCtx->functionId;
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3715

H
Haojun Liao 已提交
3716 3717 3718 3719 3720
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT
     */
    pCtx->resultInfo->superTableQ = pRuntimeEnv->stableQuery;     // set super table query flag
3721

H
Haojun Liao 已提交
3722 3723 3724 3725 3726 3727
    if (!pCtx->resultInfo->initialized) {
      aAggs[functionId].init(pCtx);
    }
  }
}

3728
int32_t setAdditionalInfo(SQInfo *pQInfo, void* pTable, STableQueryInfo *pTableQueryInfo) {
3729
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
3730

3731
  setTagVal(pRuntimeEnv, pTable, pQInfo->tsdb);
3732

3733 3734
  // both the master and supplement scan needs to set the correct ts comp start position
  if (pRuntimeEnv->pTSBuf != NULL) {
3735
    if (pTableQueryInfo->cur.vgroupIndex == -1) {
3736 3737
      tVariantAssign(&pTableQueryInfo->tag, &pRuntimeEnv->pCtx[0].tag);
      tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, 0, &pTableQueryInfo->tag);
3738

3739 3740 3741 3742 3743 3744
      // keep the cursor info of current meter
      pTableQueryInfo->cur = pRuntimeEnv->pTSBuf->cur;
    } else {
      tsBufSetCursor(pRuntimeEnv->pTSBuf, &pTableQueryInfo->cur);
    }
  }
3745

3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757
  return 0;
}

/*
 * There are two cases to handle:
 *
 * 1. Query range is not set yet (queryRangeSet = 0). we need to set the query range info, including pQuery->lastKey,
 *    pQuery->window.skey, and pQuery->eKey.
 * 2. Query range is set and query is in progress. There may be another result with the same query ranges to be
 *    merged during merge stage. In this case, we need the pTableQueryInfo->lastResRows to decide if there
 *    is a previous result generated or not.
 */
H
hjxilinx 已提交
3758
void setIntervalQueryRange(SQInfo *pQInfo, TSKEY key) {
3759 3760
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3761
  STableQueryInfo *pTableQueryInfo = pQuery->current;
3762

3763 3764 3765
  if (pTableQueryInfo->queryRangeSet) {
    pTableQueryInfo->lastKey = key;
  } else {
3766
    pTableQueryInfo->win.skey = key;
3767
    STimeWindow win = {.skey = key, .ekey = pQuery->window.ekey};
3768

3769 3770 3771 3772 3773
    // for too small query range, no data in this interval.
    if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey < pQuery->window.skey)) ||
        (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey < pQuery->window.ekey))) {
      return;
    }
3774

3775 3776 3777 3778 3779 3780
    /**
     * In handling the both ascending and descending order super table query, we need to find the first qualified
     * timestamp of this table, and then set the first qualified start timestamp.
     * In ascending query, key is the first qualified timestamp. However, in the descending order query, additional
     * operations involve.
     */
H
Haojun Liao 已提交
3781
    STimeWindow     w = TSWINDOW_INITIALIZER;
3782
    SWindowResInfo *pWindowResInfo = &pTableQueryInfo->windowResInfo;
3783

H
Haojun Liao 已提交
3784 3785
    TSKEY sk = MIN(win.skey, win.ekey);
    TSKEY ek = MAX(win.skey, win.ekey);
H
Haojun Liao 已提交
3786
    getAlignQueryTimeWindow(pQuery, win.skey, sk, ek, &w);
3787
    pWindowResInfo->startTime = pTableQueryInfo->win.skey;  // windowSKey may be 0 in case of 1970 timestamp
3788

3789 3790
    if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
3791
        assert(win.ekey == pQuery->window.ekey);
3792
      }
3793

3794
      pWindowResInfo->prevSKey = w.skey;
3795
    }
3796

3797
    pTableQueryInfo->queryRangeSet = 1;
3798
    pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
3799 3800 3801 3802
  }
}

bool requireTimestamp(SQuery *pQuery) {
3803
  for (int32_t i = 0; i < pQuery->numOfOutput; i++) {
3804
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817
    if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_NEED_TS) != 0) {
      return true;
    }
  }
  return false;
}

bool needPrimaryTimestampCol(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo) {
  /*
   * 1. if skey or ekey locates in this block, we need to load the timestamp column to decide the precise position
   * 2. if there are top/bottom, first_dst/last_dst functions, we need to load timestamp column in any cases;
   */
  STimeWindow *w = &pDataBlockInfo->window;
H
hjxilinx 已提交
3818
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3819

H
hjxilinx 已提交
3820
  bool loadPrimaryTS = (pTableQueryInfo->lastKey >= w->skey && pTableQueryInfo->lastKey <= w->ekey) ||
3821 3822
                       (pQuery->window.ekey >= w->skey && pQuery->window.ekey <= w->ekey) || requireTimestamp(pQuery);

3823 3824 3825
  return loadPrimaryTS;
}

3826
static int32_t doCopyToSData(SQInfo *pQInfo, SWindowResInfo *pResultInfo, int32_t orderType) {
3827 3828
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
3829

3830 3831 3832
  int32_t numOfResult = 0;
  int32_t startIdx = 0;
  int32_t step = -1;
3833

3834
  qDebug("QInfo:%p start to copy data from windowResInfo to query buf", pQInfo);
3835 3836
  int32_t totalSet = numOfClosedTimeWindow(pResultInfo);
  SWindowResult* result = pResultInfo->pResult;
3837

3838
  if (orderType == TSDB_ORDER_ASC) {
3839
    startIdx = pQInfo->groupIndex;
3840 3841
    step = 1;
  } else {  // desc order copy all data
3842
    startIdx = totalSet - pQInfo->groupIndex - 1;
3843 3844
    step = -1;
  }
3845

H
Haojun Liao 已提交
3846 3847
  SGroupResInfo* pGroupResInfo = &pQInfo->groupResInfo;

3848
  for (int32_t i = startIdx; (i < totalSet) && (i >= 0); i += step) {
3849
    if (result[i].numOfRows == 0) {
3850
      pQInfo->groupIndex += 1;
H
Haojun Liao 已提交
3851
      pGroupResInfo->pos.rowId = 0;
3852 3853
      continue;
    }
3854

H
Haojun Liao 已提交
3855 3856
    int32_t numOfRowsToCopy = result[i].numOfRows - pGroupResInfo->pos.rowId;
    int32_t oldOffset = pGroupResInfo->pos.rowId;
3857

3858
    /*
H
Haojun Liao 已提交
3859 3860
     * current output space is not enough to accommodate all data of this page, only partial results
     * will be copied to SQuery object's result buffer
3861
     */
3862
    if (numOfRowsToCopy > pQuery->rec.capacity - numOfResult) {
H
Haojun Liao 已提交
3863 3864
      numOfRowsToCopy = (int32_t) pQuery->rec.capacity - numOfResult;
      pGroupResInfo->pos.rowId += numOfRowsToCopy;
3865
    } else {
H
Haojun Liao 已提交
3866
      pGroupResInfo->pos.rowId = 0;
3867
      pQInfo->groupIndex += 1;
3868
    }
3869

H
Haojun Liao 已提交
3870 3871
    tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, result[i].pos.pageId);

3872
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3873
      int32_t size = pRuntimeEnv->pCtx[j].outputBytes;
3874

3875
      char *out = pQuery->sdata[j]->data + numOfResult * size;
H
Haojun Liao 已提交
3876
      char *in = getPosInResultPage(pRuntimeEnv, j, &result[i], page);
3877 3878
      memcpy(out, in + oldOffset * size, size * numOfRowsToCopy);
    }
3879

3880
    numOfResult += numOfRowsToCopy;
3881 3882 3883
    if (numOfResult == pQuery->rec.capacity) {
      break;
    }
3884
  }
3885

3886
  qDebug("QInfo:%p copy data to query buf completed", pQInfo);
3887 3888

#ifdef _DEBUG_VIEW
3889
  displayInterResult(pQuery->sdata, pRuntimeEnv, numOfResult);
3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902
#endif
  return numOfResult;
}

/**
 * copyFromWindowResToSData support copy data in ascending/descending order
 * For interval query of both super table and table, copy the data in ascending order, since the output results are
 * ordered in SWindowResutl already. While handling the group by query for both table and super table,
 * all group result are completed already.
 *
 * @param pQInfo
 * @param result
 */
3903
void copyFromWindowResToSData(SQInfo *pQInfo, SWindowResInfo *pResultInfo) {
3904
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
3905

3906
  int32_t orderType = (pQuery->pGroupbyExpr != NULL) ? pQuery->pGroupbyExpr->orderType : TSDB_ORDER_ASC;
3907
  int32_t numOfResult = doCopyToSData(pQInfo, pResultInfo, orderType);
3908

3909
  pQuery->rec.rows += numOfResult;
3910

3911
  assert(pQuery->rec.rows <= pQuery->rec.capacity);
3912 3913
}

H
Haojun Liao 已提交
3914
static void updateWindowResNumOfRes(SQueryRuntimeEnv *pRuntimeEnv) {
3915
  SQuery *pQuery = pRuntimeEnv->pQuery;
3916

3917
  // update the number of result for each, only update the number of rows for the corresponding window result.
H
Haojun Liao 已提交
3918 3919 3920
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
    return;
  }
3921

H
Haojun Liao 已提交
3922 3923
  for (int32_t i = 0; i < pRuntimeEnv->windowResInfo.size; ++i) {
    SWindowResult *pResult = &pRuntimeEnv->windowResInfo.pResult[i];
3924

H
Haojun Liao 已提交
3925 3926 3927 3928
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
      int32_t functionId = pRuntimeEnv->pCtx[j].functionId;
      if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
        continue;
3929
      }
H
Haojun Liao 已提交
3930 3931

      pResult->numOfRows = MAX(pResult->numOfRows, pResult->resultInfo[j].numOfRes);
3932 3933 3934 3935
    }
  }
}

H
Haojun Liao 已提交
3936
static void stableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo, SDataStatis *pStatis,
3937
    SArray *pDataBlock, __block_search_fn_t searchFn) {
3938
  SQuery *         pQuery = pRuntimeEnv->pQuery;
3939
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3940

3941
  SWindowResInfo * pWindowResInfo = &pTableQueryInfo->windowResInfo;
H
hjxilinx 已提交
3942
  pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : pDataBlockInfo->rows - 1;
3943

H
Haojun Liao 已提交
3944
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->groupbyNormalCol) {
3945
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
3946
  } else {
3947
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
3948 3949 3950
  }
}

H
Haojun Liao 已提交
3951
bool queryHasRemainResForTableQuery(SQueryRuntimeEnv* pRuntimeEnv) {
3952 3953
  SQuery *pQuery = pRuntimeEnv->pQuery;
  SFillInfo *pFillInfo = pRuntimeEnv->pFillInfo;
3954

H
Haojun Liao 已提交
3955
  if (pQuery->limit.limit > 0 && pQuery->rec.total >= pQuery->limit.limit) {
3956 3957
    return false;
  }
3958

3959
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
H
Haojun Liao 已提交
3960
    // There are results not returned to client yet, so filling applied to the remain result is required firstly.
H
Haojun Liao 已提交
3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975
    int32_t remain = taosNumOfRemainRows(pFillInfo);
    if (remain > 0) {
      return true;
    }

    /*
     * While the code reaches here, there are no results remains now.
     * If query is not completed yet, the gaps between two results blocks need to be handled after next data block
     * is retrieved from TSDB.
     *
     * NOTE: If the result set is not the first block, the gap in front of the result set will be filled. If the result
     * set is the FIRST result block, the gap between the start time of query time window and the timestamp of the
     * first result row in the actual result set will fill nothing.
     */
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
S
TD-1057  
Shengliang Guan 已提交
3976
      int32_t numOfTotal = (int32_t)getFilledNumOfRes(pFillInfo, pQuery->window.ekey, (int32_t)pQuery->rec.capacity);
H
Haojun Liao 已提交
3977 3978 3979 3980 3981 3982
      return numOfTotal > 0;
    }

  } else {
    // there are results waiting for returned to client.
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED) &&
H
Haojun Liao 已提交
3983
        (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) &&
H
Haojun Liao 已提交
3984 3985 3986
        (pRuntimeEnv->windowResInfo.size > 0)) {
      return true;
    }
3987
  }
3988 3989

  return false;
3990 3991 3992
}

static void doCopyQueryResultToMsg(SQInfo *pQInfo, int32_t numOfRows, char *data) {
3993
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
3994

3995 3996
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
    int32_t bytes = pQuery->pSelectExpr[col].bytes;
3997

3998 3999 4000
    memmove(data, pQuery->sdata[col]->data, bytes * numOfRows);
    data += bytes * numOfRows;
  }
4001

weixin_48148422's avatar
weixin_48148422 已提交
4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013
  int32_t numOfTables = (int32_t)taosArrayGetSize(pQInfo->arrTableIdInfo);
  *(int32_t*)data = htonl(numOfTables);
  data += sizeof(int32_t);
  for(int32_t i = 0; i < numOfTables; i++) {
    STableIdInfo* pSrc = taosArrayGet(pQInfo->arrTableIdInfo, i);
    STableIdInfo* pDst = (STableIdInfo*)data;
    pDst->uid = htobe64(pSrc->uid);
    pDst->tid = htonl(pSrc->tid);
    pDst->key = htobe64(pSrc->key);
    data += sizeof(STableIdInfo);
  }

H
Haojun Liao 已提交
4014
  // Check if query is completed or not for stable query or normal table query respectively.
H
hjxilinx 已提交
4015
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4016
    if (pQInfo->runtimeEnv.stableQuery) {
H
Haojun Liao 已提交
4017
      if (IS_STASBLE_QUERY_OVER(pQInfo)) {
4018 4019 4020
        setQueryStatus(pQuery, QUERY_OVER);
      }
    } else {
H
Haojun Liao 已提交
4021
      if (!queryHasRemainResForTableQuery(&pQInfo->runtimeEnv)) {
4022 4023
        setQueryStatus(pQuery, QUERY_OVER);
      }
4024
    }
H
hjxilinx 已提交
4025
  }
4026 4027
}

H
Haojun Liao 已提交
4028
int32_t doFillGapsInResults(SQueryRuntimeEnv* pRuntimeEnv, tFilePage **pDst, int32_t *numOfFilled) {
4029
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
4030
  SQuery *pQuery = pRuntimeEnv->pQuery;
4031
  SFillInfo* pFillInfo = pRuntimeEnv->pFillInfo;
4032

4033
  while (1) {
S
TD-1057  
Shengliang Guan 已提交
4034
    int32_t ret = (int32_t)taosGenerateDataBlock(pFillInfo, (tFilePage**)pQuery->sdata, (int32_t)pQuery->rec.capacity);
4035

4036
    // todo apply limit output function
4037 4038
    /* reached the start position of according to offset value, return immediately */
    if (pQuery->limit.offset == 0) {
4039
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows", pQInfo, pFillInfo->numOfRows, ret);
4040 4041
      return ret;
    }
4042

4043
    if (pQuery->limit.offset < ret) {
4044
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%" PRId64 ". Discard due to offset, remain:%" PRId64 ", new offset:%d",
4045
             pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, ret - pQuery->limit.offset, 0);
4046

S
TD-1057  
Shengliang Guan 已提交
4047
      ret -= (int32_t)pQuery->limit.offset;
4048 4049
      // todo !!!!there exactly number of interpo is not valid.
      // todo refactor move to the beginning of buffer
4050 4051 4052
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        memmove(pDst[i]->data, pDst[i]->data + pQuery->pSelectExpr[i].bytes * pQuery->limit.offset,
                ret * pQuery->pSelectExpr[i].bytes);
4053
      }
4054

4055 4056 4057
      pQuery->limit.offset = 0;
      return ret;
    } else {
4058
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%" PRId64 ". Discard due to offset, "
B
Bomin Zhang 已提交
4059
             "remain:%d, new offset:%" PRId64, pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, 0,
4060
          pQuery->limit.offset - ret);
4061

4062
      pQuery->limit.offset -= ret;
4063
      pQuery->rec.rows = 0;
4064 4065
      ret = 0;
    }
4066

H
Haojun Liao 已提交
4067
    if (!queryHasRemainResForTableQuery(pRuntimeEnv)) {
4068 4069 4070 4071 4072
      return ret;
    }
  }
}

4073
static void queryCostStatis(SQInfo *pQInfo) {
4074
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4075
  SQueryCostInfo *pSummary = &pRuntimeEnv->summary;
H
Haojun Liao 已提交
4076

H
Haojun Liao 已提交
4077 4078 4079
  // add the merge time
  pSummary->elapsedTime += pSummary->firstStageMergeTime;

H
Haojun Liao 已提交
4080 4081 4082
  qDebug("QInfo:%p :cost summary: elapsed time:%"PRId64" us, first merge:%"PRId64" us, total blocks:%d, "
         "load block statis:%d, load data block:%d, total rows:%"PRId64 ", check rows:%"PRId64,
         pQInfo, pSummary->elapsedTime, pSummary->firstStageMergeTime, pSummary->totalBlocks, pSummary->loadBlockStatis,
H
Haojun Liao 已提交
4083
         pSummary->loadBlocks, pSummary->totalRows, pSummary->totalCheckedRows);
4084

H
Haojun Liao 已提交
4085
  qDebug("QInfo:%p :cost summary: internal size:%"PRId64"B, numOfWin:%"PRId64, pQInfo, pSummary->internalSupSize,
4086
      pSummary->numOfTimeWindows);
4087 4088
}

4089 4090
static void updateOffsetVal(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pBlockInfo) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
4091
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4092

4093
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
4094

4095
  if (pQuery->limit.offset == pBlockInfo->rows) {  // current block will ignore completed
H
hjxilinx 已提交
4096
    pTableQueryInfo->lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->window.ekey + step : pBlockInfo->window.skey + step;
4097 4098 4099
    pQuery->limit.offset = 0;
    return;
  }
4100

4101
  if (QUERY_IS_ASC_QUERY(pQuery)) {
S
TD-1057  
Shengliang Guan 已提交
4102
    pQuery->pos = (int32_t)pQuery->limit.offset;
4103
  } else {
S
TD-1057  
Shengliang Guan 已提交
4104
    pQuery->pos = pBlockInfo->rows - (int32_t)pQuery->limit.offset - 1;
4105
  }
4106

4107
  assert(pQuery->pos >= 0 && pQuery->pos <= pBlockInfo->rows - 1);
4108

4109
  SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
4110
  SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);
4111

4112
  // update the pQuery->limit.offset value, and pQuery->pos value
H
Haojun Liao 已提交
4113
  TSKEY *keys = (TSKEY *) pColInfoData->pData;
4114 4115

  // update the offset value
H
hjxilinx 已提交
4116
  pTableQueryInfo->lastKey = keys[pQuery->pos];
4117
  pQuery->limit.offset = 0;
4118

H
hjxilinx 已提交
4119
  int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, pBlockInfo, NULL, binarySearchForKey, pDataBlock);
4120

4121
  qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
4122
         pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows, numOfRes, pQuery->current->lastKey);
4123
}
4124

4125 4126 4127 4128 4129
void skipBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;

  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0) {
    return;
4130
  }
4131

4132 4133 4134
  pQuery->pos = 0;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

H
hjxilinx 已提交
4135
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4136
  TsdbQueryHandleT pQueryHandle = pRuntimeEnv->pQueryHandle;
4137

H
Haojun Liao 已提交
4138
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4139
  while (tsdbNextDataBlock(pQueryHandle)) {
H
Haojun Liao 已提交
4140
    if (IS_QUERY_KILLED(GET_QINFO_ADDR(pRuntimeEnv))) {
H
Haojun Liao 已提交
4141 4142
      finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4143
    }
4144

H
Haojun Liao 已提交
4145
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
4146

4147 4148
    if (pQuery->limit.offset > blockInfo.rows) {
      pQuery->limit.offset -= blockInfo.rows;
H
hjxilinx 已提交
4149 4150
      pTableQueryInfo->lastKey = (QUERY_IS_ASC_QUERY(pQuery)) ? blockInfo.window.ekey : blockInfo.window.skey;
      pTableQueryInfo->lastKey += step;
4151

4152
      qDebug("QInfo:%p skip rows:%d, offset:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv), blockInfo.rows,
4153 4154
             pQuery->limit.offset);
    } else {  // find the appropriated start position in current block
4155 4156 4157
      updateOffsetVal(pRuntimeEnv, &blockInfo);
      break;
    }
4158
  }
H
Haojun Liao 已提交
4159 4160 4161 4162

  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }
4163
}
4164

H
Haojun Liao 已提交
4165
static bool skipTimeInterval(SQueryRuntimeEnv *pRuntimeEnv, TSKEY* start) {
4166
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
4167
  *start = pQuery->current->lastKey;
4168

4169
  // if queried with value filter, do NOT forward query start position
4170
  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->pFillInfo != NULL) {
4171
    return true;
4172
  }
4173

4174 4175 4176 4177 4178
  /*
   * 1. for interval without interpolation query we forward pQuery->intervalTime at a time for
   *    pQuery->limit.offset times. Since hole exists, pQuery->intervalTime*pQuery->limit.offset value is
   *    not valid. otherwise, we only forward pQuery->limit.offset number of points
   */
4179
  assert(pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL);
4180

H
Haojun Liao 已提交
4181
  STimeWindow w = TSWINDOW_INITIALIZER;
4182

4183
  SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
hjxilinx 已提交
4184
  STableQueryInfo *pTableQueryInfo = pQuery->current;
4185

H
Haojun Liao 已提交
4186
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4187
  while (tsdbNextDataBlock(pRuntimeEnv->pQueryHandle)) {
H
Haojun Liao 已提交
4188
    tsdbRetrieveDataBlockInfo(pRuntimeEnv->pQueryHandle, &blockInfo);
4189

H
Haojun Liao 已提交
4190 4191
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
H
Haojun Liao 已提交
4192
        getAlignQueryTimeWindow(pQuery, blockInfo.window.skey, blockInfo.window.skey, pQuery->window.ekey, &w);
H
Haojun Liao 已提交
4193 4194 4195
        pWindowResInfo->startTime = w.skey;
        pWindowResInfo->prevSKey = w.skey;
      }
4196
    } else {
H
Haojun Liao 已提交
4197
      getAlignQueryTimeWindow(pQuery, blockInfo.window.ekey, pQuery->window.ekey, blockInfo.window.ekey, &w);
4198

4199 4200 4201
      pWindowResInfo->startTime = pQuery->window.skey;
      pWindowResInfo->prevSKey = w.skey;
    }
4202

4203 4204
    // the first time window
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, pWindowResInfo->prevSKey, pQuery);
4205

4206 4207 4208 4209 4210 4211
    while (pQuery->limit.offset > 0) {
      if ((win.ekey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (win.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        pQuery->limit.offset -= 1;
        pWindowResInfo->prevSKey = win.skey;
      }
4212

4213
      STimeWindow tw = win;
H
Haojun Liao 已提交
4214
      GET_NEXT_TIMEWINDOW(pQuery, &tw);
4215

4216
      if (pQuery->limit.offset == 0) {
4217 4218
        if ((tw.skey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
            (tw.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
H
Haojun Liao 已提交
4219 4220
          // load the data block and check data remaining in current data block
          // TODO optimize performance
4221 4222 4223
          SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
          SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

4224 4225
          tw = win;
          int32_t startPos =
H
Haojun Liao 已提交
4226
              getNextQualifiedWindow(pRuntimeEnv, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey, -1);
4227 4228 4229 4230
          assert(startPos >= 0);

          // set the abort info
          pQuery->pos = startPos;
4231

H
Haojun Liao 已提交
4232 4233 4234 4235
          // reset the query start timestamp
          pTableQueryInfo->win.skey = ((TSKEY *)pColInfoData->pData)[startPos];
          pQuery->window.skey = pTableQueryInfo->win.skey;
          *start = pTableQueryInfo->win.skey;
4236

4237
          pWindowResInfo->prevSKey = tw.skey;
H
Haojun Liao 已提交
4238
          int32_t index = pRuntimeEnv->windowResInfo.curIndex;
4239

H
hjxilinx 已提交
4240
          int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, NULL, binarySearchForKey, pDataBlock);
H
Haojun Liao 已提交
4241
          pRuntimeEnv->windowResInfo.curIndex = index;  // restore the window index
4242

4243
          qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64,
4244
                 GET_QINFO_ADDR(pRuntimeEnv), blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes, pQuery->current->lastKey);
4245

4246
          return true;
H
Haojun Liao 已提交
4247 4248 4249 4250
        } else { // do nothing
          *start = tw.skey;
          pQuery->window.skey = tw.skey;
          pWindowResInfo->prevSKey = tw.skey;
4251
          return true;
4252 4253 4254
        }
      }

H
Haojun Liao 已提交
4255 4256 4257 4258 4259 4260 4261
      /*
       * If the next time window still starts from current data block,
       * load the primary timestamp column first, and then find the start position for the next queried time window.
       * Note that only the primary timestamp column is required.
       * TODO: Optimize for this cases. All data blocks are not needed to be loaded, only if the first actually required
       * time window resides in current data block.
       */
4262 4263 4264 4265 4266 4267 4268
      if ((tw.skey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (tw.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
        SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

        tw = win;
        int32_t startPos =
H
Haojun Liao 已提交
4269
            getNextQualifiedWindow(pRuntimeEnv, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey, -1);
4270 4271 4272 4273
        assert(startPos >= 0);

        // set the abort info
        pQuery->pos = startPos;
H
hjxilinx 已提交
4274
        pTableQueryInfo->lastKey = ((TSKEY *)pColInfoData->pData)[startPos];
4275 4276
        pWindowResInfo->prevSKey = tw.skey;
        win = tw;
4277
      } else {
H
Haojun Liao 已提交
4278
        break;  // offset is not 0, and next time window begins or ends in the next block.
4279 4280 4281
      }
    }
  }
4282

H
Haojun Liao 已提交
4283 4284 4285 4286 4287
  // check for error
  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }

4288 4289 4290
  return true;
}

B
Bomin Zhang 已提交
4291
static int32_t setupQueryHandle(void* tsdb, SQInfo* pQInfo, bool isSTableQuery) {
B
Bomin Zhang 已提交
4292
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4293 4294
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

B
Bomin Zhang 已提交
4295
  if (onlyQueryTags(pQuery)) {
B
Bomin Zhang 已提交
4296
    return TSDB_CODE_SUCCESS;
B
Bomin Zhang 已提交
4297 4298
  }

H
Haojun Liao 已提交
4299
  if (isSTableQuery && (!QUERY_IS_INTERVAL_QUERY(pQuery)) && (!isFixedOutputQuery(pRuntimeEnv))) {
B
Bomin Zhang 已提交
4300
    return TSDB_CODE_SUCCESS;
B
Bomin Zhang 已提交
4301
  }
4302 4303

  STsdbQueryCond cond = {
B
Bomin Zhang 已提交
4304 4305 4306
    .order   = pQuery->order.order,
    .colList = pQuery->colList,
    .numOfCols = pQuery->numOfCols,
4307
  };
weixin_48148422's avatar
weixin_48148422 已提交
4308

S
TD-1057  
Shengliang Guan 已提交
4309 4310
  TIME_WINDOW_COPY(cond.twindow, pQuery->window);

B
Bomin Zhang 已提交
4311
  if (!isSTableQuery
4312
    && (pQInfo->tableqinfoGroupInfo.numOfTables == 1)
4313
    && (cond.order == TSDB_ORDER_ASC)
H
Haojun Liao 已提交
4314
    && (!QUERY_IS_INTERVAL_QUERY(pQuery))
B
Bomin Zhang 已提交
4315
    && (!isGroupbyNormalCol(pQuery->pGroupbyExpr))
H
Haojun Liao 已提交
4316
    && (!isFixedOutputQuery(pRuntimeEnv))
B
Bomin Zhang 已提交
4317
  ) {
H
Haojun Liao 已提交
4318
    SArray* pa = GET_TABLEGROUP(pQInfo, 0);
4319 4320
    STableQueryInfo* pCheckInfo = taosArrayGetP(pa, 0);
    cond.twindow = pCheckInfo->win;
4321
  }
B
Bomin Zhang 已提交
4322

B
Bomin Zhang 已提交
4323
  terrno = TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
4324
  if (isFirstLastRowQuery(pQuery)) {
4325
    pRuntimeEnv->pQueryHandle = tsdbQueryLastRow(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
H
Haojun Liao 已提交
4326 4327 4328 4329

    // update the query time window
    pQuery->window = cond.twindow;

H
Haojun Liao 已提交
4330
    size_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
H
Haojun Liao 已提交
4331 4332 4333 4334 4335 4336 4337
    for(int32_t i = 0; i < numOfGroups; ++i) {
      SArray *group = GET_TABLEGROUP(pQInfo, i);

      size_t t = taosArrayGetSize(group);
      for (int32_t j = 0; j < t; ++j) {
        STableQueryInfo *pCheckInfo = taosArrayGetP(group, j);

H
Haojun Liao 已提交
4338 4339
        pCheckInfo->win = pQuery->window;
        pCheckInfo->lastKey = pCheckInfo->win.skey;
H
Haojun Liao 已提交
4340 4341
      }
    }
4342
  } else if (isPointInterpoQuery(pQuery)) {
4343
    pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
H
Haojun Liao 已提交
4344
  } else {
4345
    pRuntimeEnv->pQueryHandle = tsdbQueryTables(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
H
Haojun Liao 已提交
4346
  }
4347

B
Bomin Zhang 已提交
4348
  return terrno;
B
Bomin Zhang 已提交
4349 4350
}

4351 4352 4353
static SFillColInfo* taosCreateFillColInfo(SQuery* pQuery) {
  int32_t numOfCols = pQuery->numOfOutput;
  int32_t offset = 0;
4354

4355
  SFillColInfo* pFillCol = calloc(numOfCols, sizeof(SFillColInfo));
H
Haojun Liao 已提交
4356 4357 4358 4359
  if (pFillCol == NULL) {
    return NULL;
  }

4360 4361
  for(int32_t i = 0; i < numOfCols; ++i) {
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];
4362

4363
    pFillCol[i].col.bytes  = pExprInfo->bytes;
S
TD-1057  
Shengliang Guan 已提交
4364
    pFillCol[i].col.type   = (int8_t)pExprInfo->type;
4365 4366 4367
    pFillCol[i].col.offset = offset;
    pFillCol[i].flag       = TSDB_COL_NORMAL;    // always be ta normal column for table query
    pFillCol[i].functionId = pExprInfo->base.functionId;
4368
    pFillCol[i].fillVal.i = pQuery->fillVal[i];
4369

4370 4371
    offset += pExprInfo->bytes;
  }
4372

4373 4374 4375
  return pFillCol;
}

4376
int32_t doInitQInfo(SQInfo *pQInfo, STSBuf *pTsBuf, void *tsdb, int32_t vgId, bool isSTableQuery) {
4377 4378
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;

4379
  int32_t code = TSDB_CODE_SUCCESS;
4380
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
4381

4382
  pQuery->precision = tsdbGetCfg(tsdb)->precision;
H
Haojun Liao 已提交
4383 4384
  pRuntimeEnv->topBotQuery = isTopBottomQuery(pQuery);
  pRuntimeEnv->hasTagResults = hasTagValOutput(pQuery);
4385 4386

  setScanLimitationByResultBuffer(pQuery);
H
Haojun Liao 已提交
4387 4388 4389

  // NOTE: pTableCheckInfo need to update the query time range and the lastKey info
  // TODO fixme
H
Haojun Liao 已提交
4390
  changeExecuteScanOrder(pQInfo, false);
4391

B
Bomin Zhang 已提交
4392 4393 4394 4395
  code = setupQueryHandle(tsdb, pQInfo, isSTableQuery);
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }
4396

4397
  pQInfo->tsdb = tsdb;
4398
  pQInfo->vgId = vgId;
4399 4400

  pRuntimeEnv->pQuery = pQuery;
H
Haojun Liao 已提交
4401
  pRuntimeEnv->pTSBuf = pTsBuf;
4402
  pRuntimeEnv->cur.vgroupIndex = -1;
4403
  pRuntimeEnv->stableQuery = isSTableQuery;
H
Haojun Liao 已提交
4404
  pRuntimeEnv->prevGroupId = INT32_MIN;
H
Haojun Liao 已提交
4405
  pRuntimeEnv->groupbyNormalCol = isGroupbyNormalCol(pQuery->pGroupbyExpr);
4406

H
Haojun Liao 已提交
4407
  if (pTsBuf != NULL) {
4408 4409 4410 4411 4412 4413 4414 4415 4416 4417
    int16_t order = (pQuery->order.order == pRuntimeEnv->pTSBuf->tsOrder) ? TSDB_ORDER_ASC : TSDB_ORDER_DESC;
    tsBufSetTraverseOrder(pRuntimeEnv->pTSBuf, order);
  }

  // create runtime environment
  code = setupQueryRuntimeEnv(pRuntimeEnv, pQuery->order.order);
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }

4418 4419 4420
  int32_t ps = DEFAULT_PAGE_SIZE;
  int32_t rowsize = 0;
  getIntermediateBufInfo(pRuntimeEnv, &ps, &rowsize);
H
Haojun Liao 已提交
4421
  int32_t TWOMB = 1024*1024*2;
4422

H
Haojun Liao 已提交
4423
  if (isSTableQuery && !onlyQueryTags(pRuntimeEnv->pQuery)) {
H
Haojun Liao 已提交
4424
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rowsize, ps, TWOMB, pQInfo);
4425 4426 4427 4428
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

H
Haojun Liao 已提交
4429
    if (!QUERY_IS_INTERVAL_QUERY(pQuery)) {
4430
      int16_t type = TSDB_DATA_TYPE_NULL;
4431
      int32_t threshold = 0;
4432

H
Haojun Liao 已提交
4433
      if (pRuntimeEnv->groupbyNormalCol) {  // group by columns not tags;
4434
        type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
4435
        threshold = 4000;
4436 4437
      } else {
        type = TSDB_DATA_TYPE_INT;  // group id
S
TD-1057  
Shengliang Guan 已提交
4438
        threshold = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
4439 4440 4441
        if (threshold < 8) {
          threshold = 8;
        }
4442 4443
      }

4444
      code = initWindowResInfo(&pRuntimeEnv->windowResInfo, pRuntimeEnv, 8, threshold, type);
B
Bomin Zhang 已提交
4445 4446 4447
      if (code != TSDB_CODE_SUCCESS) {
        return code;
      }
4448
    }
H
Haojun Liao 已提交
4449
  } else if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
4450 4451
    int32_t numOfResultRows = getInitialPageNum(pQInfo);
    getIntermediateBufInfo(pRuntimeEnv, &ps, &rowsize);
H
Haojun Liao 已提交
4452
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rowsize, ps, TWOMB, pQInfo);
4453 4454 4455 4456 4457
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

    int16_t type = TSDB_DATA_TYPE_NULL;
H
Haojun Liao 已提交
4458
    if (pRuntimeEnv->groupbyNormalCol) {
4459 4460 4461 4462 4463
      type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
    } else {
      type = TSDB_DATA_TYPE_TIMESTAMP;
    }

4464
    code = initWindowResInfo(&pRuntimeEnv->windowResInfo, pRuntimeEnv, numOfResultRows, 4096, type);
B
Bomin Zhang 已提交
4465 4466 4467
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }
4468 4469
  }

4470
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
4471
    SFillColInfo* pColInfo = taosCreateFillColInfo(pQuery);
H
Haojun Liao 已提交
4472 4473 4474 4475 4476 4477
    STimeWindow w = TSWINDOW_INITIALIZER;

    TSKEY sk = MIN(pQuery->window.skey, pQuery->window.ekey);
    TSKEY ek = MAX(pQuery->window.skey, pQuery->window.ekey);
    getAlignQueryTimeWindow(pQuery, pQuery->window.skey, sk, ek, &w);

S
TD-1057  
Shengliang Guan 已提交
4478 4479
    pRuntimeEnv->pFillInfo = taosInitFillInfo(pQuery->order.order, w.skey, 0, (int32_t)pQuery->rec.capacity, pQuery->numOfOutput,
                                              pQuery->slidingTime, pQuery->slidingTimeUnit, (int8_t)pQuery->precision,
H
Haojun Liao 已提交
4480
                                              pQuery->fillType, pColInfo);
4481
  }
4482

H
Haojun Liao 已提交
4483
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
4484
  return TSDB_CODE_SUCCESS;
4485 4486
}

4487
static void enableExecutionForNextTable(SQueryRuntimeEnv *pRuntimeEnv) {
4488
  SQuery *pQuery = pRuntimeEnv->pQuery;
4489

4490
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
4491 4492 4493 4494 4495 4496 4497
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[i]);
    if (pResInfo != NULL) {
      pResInfo->complete = false;
    }
  }
}

H
Haojun Liao 已提交
4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514
static FORCE_INLINE void setEnvForEachBlock(SQInfo* pQInfo, STableQueryInfo* pTableQueryInfo, SDataBlockInfo* pBlockInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

  if (!QUERY_IS_INTERVAL_QUERY(pQuery)) {
    setExecutionContext(pQInfo, pTableQueryInfo->groupIndex, pBlockInfo->window.ekey + step);
  } else {  // interval query
    TSKEY nextKey = pBlockInfo->window.skey;
    setIntervalQueryRange(pQInfo, nextKey);

    if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
      setAdditionalInfo(pQInfo, pTableQueryInfo->pTable, pTableQueryInfo);
    }
  }
}

H
Haojun Liao 已提交
4515
static int64_t scanMultiTableDataBlocks(SQInfo *pQInfo) {
4516
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
Haojun Liao 已提交
4517 4518
  SQuery*           pQuery = pRuntimeEnv->pQuery;
  SQueryCostInfo*   summary  = &pRuntimeEnv->summary;
4519
  
H
hjxilinx 已提交
4520
  int64_t st = taosGetTimestampMs();
4521

4522
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
H
Haojun Liao 已提交
4523
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4524

H
Haojun Liao 已提交
4525 4526
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

H
Haojun Liao 已提交
4527
  while (tsdbNextDataBlock(pQueryHandle)) {
4528
    summary->totalBlocks += 1;
H
Haojun Liao 已提交
4529
    
H
Haojun Liao 已提交
4530
    if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
4531
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4532
    }
4533

H
Haojun Liao 已提交
4534
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
H
Haojun Liao 已提交
4535 4536 4537 4538
    STableQueryInfo **pTableQueryInfo = (STableQueryInfo**) taosHashGet(pQInfo->tableqinfoGroupInfo.map, &blockInfo.tid, sizeof(blockInfo.tid));
    if(pTableQueryInfo == NULL) {
      break;
    }
4539

H
Haojun Liao 已提交
4540
    pQuery->current = *pTableQueryInfo;
H
Haojun Liao 已提交
4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      assert(
          ((*pTableQueryInfo)->win.skey <= (*pTableQueryInfo)->win.ekey) &&
          ((*pTableQueryInfo)->lastKey >= (*pTableQueryInfo)->win.skey) &&
          ((*pTableQueryInfo)->win.skey >= pQuery->window.skey && (*pTableQueryInfo)->win.ekey <= pQuery->window.ekey));
    } else {
      assert(
          ((*pTableQueryInfo)->win.skey >= (*pTableQueryInfo)->win.ekey) &&
          ((*pTableQueryInfo)->lastKey <= (*pTableQueryInfo)->win.skey) &&
          ((*pTableQueryInfo)->win.skey <= pQuery->window.skey && (*pTableQueryInfo)->win.ekey >= pQuery->window.ekey));
    }
4552

H
Haojun Liao 已提交
4553
    if (!pRuntimeEnv->groupbyNormalCol) {
H
Haojun Liao 已提交
4554
      setEnvForEachBlock(pQInfo, *pTableQueryInfo, &blockInfo);
4555
    }
4556

H
Haojun Liao 已提交
4557 4558 4559 4560 4561 4562 4563
    SDataStatis *pStatis = NULL;
    SArray *pDataBlock = NULL;
    if (loadDataBlockOnDemand(pRuntimeEnv, pQueryHandle, &blockInfo, &pStatis, &pDataBlock) == BLK_DATA_DISCARD) {
      pQuery->current->lastKey = QUERY_IS_ASC_QUERY(pQuery)? blockInfo.window.ekey + step:blockInfo.window.skey + step;
      continue;
    }

4564 4565 4566
    summary->totalRows += blockInfo.rows;
    stableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, pDataBlock, binarySearchForKey);
  
H
Haojun Liao 已提交
4567 4568 4569 4570
    qDebug("QInfo:%p check data block completed, uid:%"PRId64", tid:%d, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, "
           "lastKey:%" PRId64,
           pQInfo, blockInfo.uid, blockInfo.tid, blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows,
           pQuery->current->lastKey);
4571
  }
4572

H
Haojun Liao 已提交
4573 4574 4575 4576
  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }

H
Haojun Liao 已提交
4577 4578
  updateWindowResNumOfRes(pRuntimeEnv);

H
hjxilinx 已提交
4579 4580
  int64_t et = taosGetTimestampMs();
  return et - st;
4581 4582
}

4583 4584
static bool multiTableMultioutputHelper(SQInfo *pQInfo, int32_t index) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4585
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4586

4587
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
H
Haojun Liao 已提交
4588
  SArray *group = GET_TABLEGROUP(pQInfo, 0);
4589
  STableQueryInfo* pCheckInfo = taosArrayGetP(group, index);
4590

H
Haojun Liao 已提交
4591 4592 4593
  if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
    setTagVal(pRuntimeEnv, pCheckInfo->pTable, pQInfo->tsdb);
  }
4594

H
Haojun Liao 已提交
4595
  STableId* id = TSDB_TABLEID(pCheckInfo->pTable);
4596
  qDebug("QInfo:%p query on (%d): uid:%" PRIu64 ", tid:%d, qrange:%" PRId64 "-%" PRId64, pQInfo, index,
H
Haojun Liao 已提交
4597
         id->uid, id->tid, pCheckInfo->lastKey, pCheckInfo->win.ekey);
4598

4599
  STsdbQueryCond cond = {
4600
      .twindow   = {pCheckInfo->lastKey, pCheckInfo->win.ekey},
H
hjxilinx 已提交
4601 4602
      .order     = pQuery->order.order,
      .colList   = pQuery->colList,
4603
      .numOfCols = pQuery->numOfCols,
4604
  };
4605

H
hjxilinx 已提交
4606
  // todo refactor
4607
  SArray *g1 = taosArrayInit(1, POINTER_BYTES);
H
Haojun Liao 已提交
4608 4609 4610 4611
  SArray *tx = taosArrayInit(1, sizeof(STableKeyInfo));

  STableKeyInfo info = {.pTable = pCheckInfo->pTable, .lastKey = pCheckInfo->lastKey};
  taosArrayPush(tx, &info);
4612

4613
  taosArrayPush(g1, &tx);
4614
  STableGroupInfo gp = {.numOfTables = 1, .pGroupList = g1};
4615

4616
  // include only current table
4617 4618 4619 4620
  if (pRuntimeEnv->pQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
    pRuntimeEnv->pQueryHandle = NULL;
  }
4621

H
Haojun Liao 已提交
4622
  pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo);
4623 4624
  taosArrayDestroy(tx);
  taosArrayDestroy(g1);
B
Bomin Zhang 已提交
4625 4626 4627
  if (pRuntimeEnv->pQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
4628

4629
  if (pRuntimeEnv->pTSBuf != NULL) {
4630
    if (pRuntimeEnv->cur.vgroupIndex == -1) {
4631
      STSElem elem = tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, 0, &pRuntimeEnv->pCtx[0].tag);
4632

4633 4634 4635 4636 4637 4638 4639 4640
      // failed to find data with the specified tag value
      if (elem.vnode < 0) {
        return false;
      }
    } else {
      tsBufSetCursor(pRuntimeEnv->pTSBuf, &pRuntimeEnv->cur);
    }
  }
4641

4642
  initCtxOutputBuf(pRuntimeEnv);
4643 4644 4645 4646 4647 4648 4649 4650 4651 4652
  return true;
}

/**
 * super table query handler
 * 1. super table projection query, group-by on normal columns query, ts-comp query
 * 2. point interpolation query, last row query
 *
 * @param pQInfo
 */
4653
static void sequentialTableProcess(SQInfo *pQInfo) {
4654
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4655
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4656
  setQueryStatus(pQuery, QUERY_COMPLETED);
4657

H
Haojun Liao 已提交
4658
  size_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
4659

H
Haojun Liao 已提交
4660
  if (isPointInterpoQuery(pQuery) || isFirstLastRowQuery(pQuery)) {
4661 4662
    resetCtxOutputBuf(pRuntimeEnv);
    assert(pQuery->limit.offset == 0 && pQuery->limit.limit != 0);
4663

4664
    while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
4665
      SArray* group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex);
4666

S
TD-1057  
Shengliang Guan 已提交
4667
      qDebug("QInfo:%p last_row query on group:%d, total group:%" PRIzu ", current group:%p", pQInfo, pQInfo->groupIndex,
dengyihao's avatar
dengyihao 已提交
4668
             numOfGroups, group);
H
Haojun Liao 已提交
4669 4670 4671 4672 4673 4674 4675

      STsdbQueryCond cond = {
          .colList = pQuery->colList,
          .order   = pQuery->order.order,
          .numOfCols = pQuery->numOfCols,
      };

S
TD-1057  
Shengliang Guan 已提交
4676 4677
      TIME_WINDOW_COPY(cond.twindow, pQuery->window);

H
Haojun Liao 已提交
4678 4679 4680
      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);
4681

H
Haojun Liao 已提交
4682 4683 4684 4685 4686 4687 4688
      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }
4689

4690
      if (isFirstLastRowQuery(pQuery)) {
H
Haojun Liao 已提交
4691
        assert(0);  // last_row query switch to other routine to handle
H
Haojun Liao 已提交
4692
      } else {
H
Haojun Liao 已提交
4693
        pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(pQInfo->tsdb, &cond, &gp, pQInfo);
4694
      }
B
Bomin Zhang 已提交
4695 4696 4697 4698 4699 4700

      taosArrayDestroy(tx);
      taosArrayDestroy(g1);
      if (pRuntimeEnv->pQueryHandle == NULL) {
        longjmp(pRuntimeEnv->env, terrno);
      }
H
Haojun Liao 已提交
4701

H
Haojun Liao 已提交
4702
      initCtxOutputBuf(pRuntimeEnv);
4703

4704
      SArray* s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle);
4705
      assert(taosArrayGetSize(s) >= 1);
4706

4707
      setTagVal(pRuntimeEnv, taosArrayGetP(s, 0), pQInfo->tsdb);
4708 4709 4710
      if (isFirstLastRowQuery(pQuery)) {
        assert(taosArrayGetSize(s) == 1);
      }
H
Haojun Liao 已提交
4711

dengyihao's avatar
dengyihao 已提交
4712
      taosArrayDestroy(s);
H
Haojun Liao 已提交
4713

H
Haojun Liao 已提交
4714
      // here we simply set the first table as current table
4715 4716 4717
      SArray* first = GET_TABLEGROUP(pQInfo, pQInfo->groupIndex);
      pQuery->current = taosArrayGetP(first, 0);

4718
      scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
4719

H
Haojun Liao 已提交
4720 4721 4722 4723 4724
      int64_t numOfRes = getNumOfResult(pRuntimeEnv);
      if (numOfRes > 0) {
        pQuery->rec.rows += numOfRes;
        forwardCtxOutputBuf(pRuntimeEnv, numOfRes);
      }
4725

H
Haojun Liao 已提交
4726 4727 4728 4729 4730
      skipResults(pRuntimeEnv);
      pQInfo->groupIndex += 1;

      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
4731 4732 4733 4734 4735 4736

      if (pQuery->rec.rows >= pQuery->rec.capacity) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
        break;
      }
    }
H
Haojun Liao 已提交
4737
  } else if (pRuntimeEnv->groupbyNormalCol) { // group-by on normal columns query
4738
    while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
4739
      SArray* group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex);
4740

S
TD-1057  
Shengliang Guan 已提交
4741
      qDebug("QInfo:%p group by normal columns group:%d, total group:%" PRIzu "", pQInfo, pQInfo->groupIndex, numOfGroups);
4742 4743 4744 4745 4746 4747 4748

      STsdbQueryCond cond = {
          .colList = pQuery->colList,
          .order   = pQuery->order.order,
          .numOfCols = pQuery->numOfCols,
      };

S
TD-1057  
Shengliang Guan 已提交
4749 4750
      TIME_WINDOW_COPY(cond.twindow, pQuery->window);

4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762
      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);

      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }

H
Haojun Liao 已提交
4763
      // no need to update the lastkey for each table
4764
      pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo);
H
Haojun Liao 已提交
4765

B
Bomin Zhang 已提交
4766 4767
      taosArrayDestroy(g1);
      taosArrayDestroy(tx);
B
Bomin Zhang 已提交
4768 4769 4770
      if (pRuntimeEnv->pQueryHandle == NULL) {
        longjmp(pRuntimeEnv->env, terrno);
      }
4771

4772
      SArray* s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle);
4773 4774
      assert(taosArrayGetSize(s) >= 1);

4775
      setTagVal(pRuntimeEnv, taosArrayGetP(s, 0), pQInfo->tsdb);
4776 4777 4778 4779 4780 4781 4782 4783

      // here we simply set the first table as current table
      scanMultiTableDataBlocks(pQInfo);
      pQInfo->groupIndex += 1;

      SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;

        // no results generated for current group, continue to try the next group
4784
      taosArrayDestroy(s);
4785 4786 4787 4788 4789
      if (pWindowResInfo->size <= 0) {
        continue;
      }

      for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
4790
        pWindowResInfo->pResult[i].closed = true; // enable return all results for group by normal columns
4791 4792 4793 4794 4795 4796 4797

        SWindowResult *pResult = &pWindowResInfo->pResult[i];
        for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
          pResult->numOfRows = MAX(pResult->numOfRows, pResult->resultInfo[j].numOfRes);
        }
      }

4798
      qDebug("QInfo:%p generated groupby columns results %d rows for group %d completed", pQInfo, pWindowResInfo->size,
4799 4800 4801 4802 4803 4804 4805
          pQInfo->groupIndex);
      int32_t currentGroupIndex = pQInfo->groupIndex;

      pQuery->rec.rows = 0;
      pQInfo->groupIndex = 0;

      ensureOutputBufferSimple(pRuntimeEnv, pWindowResInfo->size);
4806
      copyFromWindowResToSData(pQInfo, pWindowResInfo);
4807 4808 4809 4810 4811 4812

      pQInfo->groupIndex = currentGroupIndex;  //restore the group index
      assert(pQuery->rec.rows == pWindowResInfo->size);

      clearClosedTimeWindow(pRuntimeEnv);
      break;
4813 4814 4815
    }
  } else {
    /*
4816
     * 1. super table projection query, 2. ts-comp query
4817 4818 4819
     * if the subgroup index is larger than 0, results generated by group by tbname,k is existed.
     * we need to return it to client in the first place.
     */
4820
    if (pQInfo->groupIndex > 0) {
4821
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
4822
      pQuery->rec.total += pQuery->rec.rows;
4823

4824
      if (pQuery->rec.rows > 0) {
4825 4826 4827
        return;
      }
    }
4828

4829
    // all data have returned already
4830
    if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
4831 4832
      return;
    }
4833

4834 4835
    resetCtxOutputBuf(pRuntimeEnv);
    resetTimeWindowInfo(pRuntimeEnv, &pRuntimeEnv->windowResInfo);
4836

H
Haojun Liao 已提交
4837
    SArray *group = GET_TABLEGROUP(pQInfo, 0);
4838 4839
    assert(taosArrayGetSize(group) == pQInfo->tableqinfoGroupInfo.numOfTables &&
           1 == taosArrayGetSize(pQInfo->tableqinfoGroupInfo.pGroupList));
4840

4841
    while (pQInfo->tableIndex < pQInfo->tableqinfoGroupInfo.numOfTables) {
H
Haojun Liao 已提交
4842
      if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
4843
        longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4844
      }
4845

4846
      pQuery->current = taosArrayGetP(group, pQInfo->tableIndex);
4847
      if (!multiTableMultioutputHelper(pQInfo, pQInfo->tableIndex)) {
4848
        pQInfo->tableIndex++;
4849 4850
        continue;
      }
4851

H
hjxilinx 已提交
4852
      // TODO handle the limit offset problem
4853
      if (pQuery->numOfFilterCols == 0 && pQuery->limit.offset > 0) {
4854 4855
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
          pQInfo->tableIndex++;
4856 4857 4858
          continue;
        }
      }
4859

4860
      scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
4861
      skipResults(pRuntimeEnv);
4862

4863
      // the limitation of output result is reached, set the query completed
4864
      if (limitResults(pRuntimeEnv)) {
H
Haojun Liao 已提交
4865
        SET_STABLE_QUERY_OVER(pQInfo);
4866 4867
        break;
      }
4868

4869 4870
      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
4871

4872
      if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4873 4874 4875 4876 4877 4878
        /*
         * query range is identical in terms of all meters involved in query,
         * so we need to restore them at the *beginning* of query on each meter,
         * not the consecutive query on meter on which is aborted due to buffer limitation
         * to ensure that, we can reset the query range once query on a meter is completed.
         */
4879
        pQInfo->tableIndex++;
weixin_48148422's avatar
weixin_48148422 已提交
4880

H
Haojun Liao 已提交
4881
        STableIdInfo tidInfo = {0};
4882

H
Haojun Liao 已提交
4883 4884 4885
        STableId* id = TSDB_TABLEID(pQuery->current->pTable);
        tidInfo.uid = id->uid;
        tidInfo.tid = id->tid;
weixin_48148422's avatar
weixin_48148422 已提交
4886
        tidInfo.key = pQuery->current->lastKey;
weixin_48148422's avatar
weixin_48148422 已提交
4887 4888
        taosArrayPush(pQInfo->arrTableIdInfo, &tidInfo);

4889
        // if the buffer is full or group by each table, we need to jump out of the loop
H
Haojun Liao 已提交
4890
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
4891 4892
          break;
        }
4893

4894
      } else {
4895
        // all data in the result buffer are skipped due to the offset, continue to retrieve data from current meter
4896 4897
        if (pQuery->rec.rows == 0) {
          assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
4898 4899
          continue;
        } else {
4900 4901 4902
          // buffer is full, wait for the next round to retrieve data from current meter
          assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
          break;
4903 4904 4905
        }
      }
    }
H
Haojun Liao 已提交
4906

4907
    if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
H
Haojun Liao 已提交
4908 4909
      setQueryStatus(pQuery, QUERY_COMPLETED);
    }
4910
  }
4911

4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923
  /*
   * 1. super table projection query, group-by on normal columns query, ts-comp query
   * 2. point interpolation query, last row query
   *
   * group-by on normal columns query and last_row query do NOT invoke the finalizer here,
   * since the finalize stage will be done at the client side.
   *
   * projection query, point interpolation query do not need the finalizer.
   *
   * Only the ts-comp query requires the finalizer function to be executed here.
   */
  if (isTSCompQuery(pQuery)) {
H
hjxilinx 已提交
4924
    finalizeQueryResult(pRuntimeEnv);
4925
  }
4926

4927 4928 4929
  if (pRuntimeEnv->pTSBuf != NULL) {
    pRuntimeEnv->cur = pRuntimeEnv->pTSBuf->cur;
  }
4930

4931
  qDebug(
S
TD-1057  
Shengliang Guan 已提交
4932
      "QInfo %p numOfTables:%"PRIu64", index:%d, numOfGroups:%" PRIzu ", %"PRId64" points returned, total:%"PRId64", offset:%" PRId64,
4933
      pQInfo, pQInfo->tableqinfoGroupInfo.numOfTables, pQInfo->tableIndex, numOfGroups, pQuery->rec.rows, pQuery->rec.total,
4934
      pQuery->limit.offset);
4935 4936
}

4937 4938 4939 4940
static void doSaveContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

4941 4942 4943
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
  SWITCH_ORDER(pQuery->order.order);
4944

4945
  if (pRuntimeEnv->pTSBuf != NULL) {
4946
    pRuntimeEnv->pTSBuf->cur.order = pQuery->order.order;
4947
  }
4948

4949 4950 4951 4952 4953
  STsdbQueryCond cond = {
      .order   = pQuery->order.order,
      .colList = pQuery->colList,
      .numOfCols = pQuery->numOfCols,
  };
H
Haojun Liao 已提交
4954

S
TD-1057  
Shengliang Guan 已提交
4955 4956
  TIME_WINDOW_COPY(cond.twindow, pQuery->window);

4957 4958 4959 4960
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
H
Haojun Liao 已提交
4961

H
Haojun Liao 已提交
4962 4963 4964 4965 4966
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
  disableFuncInReverseScan(pQInfo);
  setupQueryRangeForReverseScan(pQInfo);

H
Haojun Liao 已提交
4967
  pRuntimeEnv->prevGroupId = INT32_MIN;
4968
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
B
Bomin Zhang 已提交
4969 4970 4971
  if (pRuntimeEnv->pSecQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
H
hjxilinx 已提交
4972 4973
}

4974 4975 4976 4977
static void doRestoreContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

H
hjxilinx 已提交
4978
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
4979

4980
  if (pRuntimeEnv->pTSBuf != NULL) {
4981
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
4982
  }
4983

4984
  switchCtxOrder(pRuntimeEnv);
4985 4986 4987
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
}

4988 4989 4990
static void doCloseAllTimeWindowAfterScan(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
Haojun Liao 已提交
4991
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
H
Haojun Liao 已提交
4992
    size_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
4993
    for (int32_t i = 0; i < numOfGroup; ++i) {
H
Haojun Liao 已提交
4994
      SArray *group = GET_TABLEGROUP(pQInfo, i);
4995

4996
      size_t num = taosArrayGetSize(group);
4997
      for (int32_t j = 0; j < num; ++j) {
4998 4999
        STableQueryInfo* item = taosArrayGetP(group, j);
        closeAllTimeWindow(&item->windowResInfo);
5000
      }
H
hjxilinx 已提交
5001 5002 5003 5004 5005 5006 5007
    }
  } else {  // close results for group result
    closeAllTimeWindow(&pQInfo->runtimeEnv.windowResInfo);
  }
}

static void multiTableQueryProcess(SQInfo *pQInfo) {
5008 5009 5010
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

5011
  if (pQInfo->groupIndex > 0) {
5012
    /*
5013
     * if the groupIndex > 0, the query process must be completed yet, we only need to
5014 5015
     * copy the data into output buffer
     */
H
Haojun Liao 已提交
5016
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
5017 5018
      copyResToQueryResultBuf(pQInfo, pQuery);
#ifdef _DEBUG_VIEW
5019
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
5020 5021
#endif
    } else {
5022
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5023
    }
5024

5025
    qDebug("QInfo:%p current:%"PRId64", total:%"PRId64"", pQInfo, pQuery->rec.rows, pQuery->rec.total);
5026 5027
    return;
  }
5028

5029
  qDebug("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", order:%d, forward scan start", pQInfo,
5030 5031
         pQuery->window.skey, pQuery->window.ekey, pQuery->order.order);

H
hjxilinx 已提交
5032
  // do check all qualified data blocks
H
Haojun Liao 已提交
5033
  int64_t el = scanMultiTableDataBlocks(pQInfo);
5034
  qDebug("QInfo:%p master scan completed, elapsed time: %" PRId64 "ms, reverse scan start", pQInfo, el);
5035

H
hjxilinx 已提交
5036
  // query error occurred or query is killed, abort current execution
H
Haojun Liao 已提交
5037
  if (pQInfo->code != TSDB_CODE_SUCCESS || IS_QUERY_KILLED(pQInfo)) {
5038
    qDebug("QInfo:%p query killed or error occurred, code:%s, abort", pQInfo, tstrerror(pQInfo->code));
H
Haojun Liao 已提交
5039 5040
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
5041
  }
5042

H
hjxilinx 已提交
5043 5044
  // close all time window results
  doCloseAllTimeWindowAfterScan(pQInfo);
5045

H
hjxilinx 已提交
5046 5047
  if (needReverseScan(pQuery)) {
    doSaveContext(pQInfo);
5048

H
Haojun Liao 已提交
5049
    el = scanMultiTableDataBlocks(pQInfo);
5050
    qDebug("QInfo:%p reversed scan completed, elapsed time: %" PRId64 "ms", pQInfo, el);
5051

H
Haojun Liao 已提交
5052
    doRestoreContext(pQInfo);
H
hjxilinx 已提交
5053
  } else {
5054
    qDebug("QInfo:%p no need to do reversed scan, query completed", pQInfo);
5055
  }
5056

5057
  setQueryStatus(pQuery, QUERY_COMPLETED);
5058

H
Haojun Liao 已提交
5059
  if (pQInfo->code != TSDB_CODE_SUCCESS || IS_QUERY_KILLED(pQInfo)) {
5060
    qDebug("QInfo:%p query killed or error occurred, code:%s, abort", pQInfo, tstrerror(pQInfo->code));
H
Haojun Liao 已提交
5061 5062
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
H
hjxilinx 已提交
5063
  }
5064

H
Haojun Liao 已提交
5065
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || isSumAvgRateQuery(pQuery)) {
5066
    if (mergeIntoGroupResult(pQInfo) == TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
5067
      copyResToQueryResultBuf(pQInfo, pQuery);
5068 5069

#ifdef _DEBUG_VIEW
5070
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
5071 5072 5073
#endif
    }
  } else {  // not a interval query
5074
    copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5075
  }
5076

5077
  // handle the limitation of output buffer
5078
  qDebug("QInfo:%p points returned:%" PRId64 ", total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
5079 5080 5081 5082 5083 5084 5085 5086
}

/*
 * in each query, this function will be called only once, no retry for further result.
 *
 * select count(*)/top(field,k)/avg(field name) from table_name [where ts>now-1a];
 * select count(*) from table_name group by status_column;
 */
H
hjxilinx 已提交
5087
static void tableFixedOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
5088
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
5089

H
hjxilinx 已提交
5090
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
5091
  if (!pRuntimeEnv->topBotQuery && pQuery->limit.offset > 0) {  // no need to execute, since the output will be ignore.
H
Haojun Liao 已提交
5092 5093
    return;
  }
5094

H
hjxilinx 已提交
5095
  pQuery->current = pTableInfo;  // set current query table info
5096

5097
  scanOneTableDataBlocks(pRuntimeEnv, pTableInfo->lastKey);
H
hjxilinx 已提交
5098
  finalizeQueryResult(pRuntimeEnv);
5099

H
Haojun Liao 已提交
5100
  if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
5101 5102
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
5103
  }
5104

H
Haojun Liao 已提交
5105
  // since the numOfRows must be identical for all sql functions that are allowed to be executed simutaneously.
5106
  pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
5107

5108
  skipResults(pRuntimeEnv);
5109
  limitResults(pRuntimeEnv);
5110 5111
}

H
hjxilinx 已提交
5112
static void tableMultiOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
5113
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
5114

H
hjxilinx 已提交
5115 5116
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
5117

5118 5119 5120 5121
  // for ts_comp query, re-initialized is not allowed
  if (!isTSCompQuery(pQuery)) {
    resetCtxOutputBuf(pRuntimeEnv);
  }
5122

5123 5124 5125 5126 5127 5128
  // skip blocks without load the actual data block from file if no filter condition present
  skipBlocks(&pQInfo->runtimeEnv);
  if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return;
  }
5129 5130

  while (1) {
5131
    scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
H
hjxilinx 已提交
5132
    finalizeQueryResult(pRuntimeEnv);
5133

5134 5135
    pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
    if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols > 0 && pQuery->rec.rows > 0) {
5136
      skipResults(pRuntimeEnv);
5137 5138 5139
    }

    /*
H
hjxilinx 已提交
5140 5141
     * 1. if pQuery->size == 0, pQuery->limit.offset >= 0, still need to check data
     * 2. if pQuery->size > 0, pQuery->limit.offset must be 0
5142
     */
5143
    if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
5144 5145 5146
      break;
    }

5147
    qDebug("QInfo:%p skip current result, offset:%" PRId64 ", next qrange:%" PRId64 "-%" PRId64,
B
Bomin Zhang 已提交
5148
           pQInfo, pQuery->limit.offset, pQuery->current->lastKey, pQuery->current->win.ekey);
5149 5150 5151 5152

    resetCtxOutputBuf(pRuntimeEnv);
  }

5153
  limitResults(pRuntimeEnv);
5154
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
5155
    qDebug("QInfo:%p query paused due to output limitation, next qrange:%" PRId64 "-%" PRId64, pQInfo,
H
hjxilinx 已提交
5156
        pQuery->current->lastKey, pQuery->window.ekey);
weixin_48148422's avatar
weixin_48148422 已提交
5157 5158
  } else if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
    STableIdInfo tidInfo;
B
Bomin Zhang 已提交
5159
    STableId* id = TSDB_TABLEID(pQuery->current->pTable);
5160

H
Haojun Liao 已提交
5161 5162
    tidInfo.uid = id->uid;
    tidInfo.tid = id->tid;
weixin_48148422's avatar
weixin_48148422 已提交
5163 5164
    tidInfo.key = pQuery->current->lastKey;
    taosArrayPush(pQInfo->arrTableIdInfo, &tidInfo);
5165 5166
  }

5167 5168 5169
  if (!isTSCompQuery(pQuery)) {
    assert(pQuery->rec.rows <= pQuery->rec.capacity);
  }
5170 5171
}

H
Haojun Liao 已提交
5172
static void tableIntervalProcessImpl(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
5173
  SQuery *pQuery = pRuntimeEnv->pQuery;
5174

5175
  while (1) {
5176
    scanOneTableDataBlocks(pRuntimeEnv, start);
5177

5178
    assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_NOT_COMPLETED));
H
hjxilinx 已提交
5179
    finalizeQueryResult(pRuntimeEnv);
5180

5181 5182 5183
    // here we can ignore the records in case of no interpolation
    // todo handle offset, in case of top/bottom interval query
    if ((pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL) && pQuery->limit.offset > 0 &&
5184
        pQuery->fillType == TSDB_FILL_NONE) {
5185 5186
      // maxOutput <= 0, means current query does not generate any results
      int32_t numOfClosed = numOfClosedTimeWindow(&pRuntimeEnv->windowResInfo);
5187

S
TD-1057  
Shengliang Guan 已提交
5188
      int32_t c = (int32_t)(MIN(numOfClosed, pQuery->limit.offset));
5189 5190 5191
      clearFirstNTimeWindow(pRuntimeEnv, c);
      pQuery->limit.offset -= c;
    }
5192

5193
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED | QUERY_RESBUF_FULL)) {
5194 5195 5196 5197 5198
      break;
    }
  }
}

5199
// handle time interval query on table
H
hjxilinx 已提交
5200
static void tableIntervalProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
5201 5202
  SQueryRuntimeEnv *pRuntimeEnv = &(pQInfo->runtimeEnv);

H
hjxilinx 已提交
5203 5204
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
5205

H
Haojun Liao 已提交
5206
  int32_t numOfFilled = 0;
H
Haojun Liao 已提交
5207
  TSKEY newStartKey = TSKEY_INITIAL_VAL;
5208

5209
  // skip blocks without load the actual data block from file if no filter condition present
H
Haojun Liao 已提交
5210
  skipTimeInterval(pRuntimeEnv, &newStartKey);
5211
  if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0 && pRuntimeEnv->pFillInfo == NULL) {
5212 5213 5214 5215
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return;
  }

5216
  while (1) {
H
Haojun Liao 已提交
5217
    tableIntervalProcessImpl(pRuntimeEnv, newStartKey);
5218

H
Haojun Liao 已提交
5219
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
5220
      pQInfo->groupIndex = 0;  // always start from 0
5221
      pQuery->rec.rows = 0;
5222
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5223

5224
      clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
5225
    }
5226

5227
    // the offset is handled at prepare stage if no interpolation involved
5228
    if (pQuery->fillType == TSDB_FILL_NONE || pQuery->rec.rows == 0) {
5229
      limitResults(pRuntimeEnv);
5230 5231
      break;
    } else {
S
TD-1057  
Shengliang Guan 已提交
5232
      taosFillSetStartInfo(pRuntimeEnv->pFillInfo, (int32_t)pQuery->rec.rows, pQuery->window.ekey);
5233
      taosFillCopyInputDataFromFilePage(pRuntimeEnv->pFillInfo, (tFilePage**) pQuery->sdata);
H
Haojun Liao 已提交
5234
      numOfFilled = 0;
5235

H
Haojun Liao 已提交
5236
      pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, &numOfFilled);
5237
      if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
5238
        limitResults(pRuntimeEnv);
5239 5240
        break;
      }
5241

5242
      // no result generated yet, continue retrieve data
5243
      pQuery->rec.rows = 0;
5244 5245
    }
  }
5246

5247
  // all data scanned, the group by normal column can return
H
Haojun Liao 已提交
5248
  if (pRuntimeEnv->groupbyNormalCol) {  // todo refactor with merge interval time result
5249
    pQInfo->groupIndex = 0;
5250
    pQuery->rec.rows = 0;
5251
    copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5252
    clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
5253 5254 5255
  }
}

5256 5257 5258 5259
static void tableQueryImpl(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

H
Haojun Liao 已提交
5260
  if (queryHasRemainResForTableQuery(pRuntimeEnv)) {
5261

H
Haojun Liao 已提交
5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273
    if (pQuery->fillType != TSDB_FILL_NONE) {
      /*
       * There are remain results that are not returned due to result interpolation
       * So, we do keep in this procedure instead of launching retrieve procedure for next results.
       */
      int32_t numOfFilled = 0;
      pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, &numOfFilled);

      if (pQuery->rec.rows > 0) {
        limitResults(pRuntimeEnv);
      }

H
Haojun Liao 已提交
5274
      qDebug("QInfo:%p current:%" PRId64 " returned, total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total);
5275
      return;
H
Haojun Liao 已提交
5276
    } else {
5277
      pQuery->rec.rows = 0;
5278
      pQInfo->groupIndex = 0;  // always start from 0
5279

5280
      if (pRuntimeEnv->windowResInfo.size > 0) {
5281
        copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5282
        clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
5283

5284
        if (pQuery->rec.rows > 0) {
5285
          qDebug("QInfo:%p %"PRId64" rows returned from group results, total:%"PRId64"", pQInfo, pQuery->rec.rows, pQuery->rec.total);
H
Haojun Liao 已提交
5286 5287 5288

          // there are not data remains
          if (pRuntimeEnv->windowResInfo.size <= 0) {
H
Haojun Liao 已提交
5289
            qDebug("QInfo:%p query over, %"PRId64" rows are returned", pQInfo, pQuery->rec.total);
H
Haojun Liao 已提交
5290 5291
          }

5292 5293 5294 5295 5296
          return;
        }
      }
    }
  }
5297

H
hjxilinx 已提交
5298
  // number of points returned during this query
5299
  pQuery->rec.rows = 0;
5300
  int64_t st = taosGetTimestampUs();
5301

5302
  assert(pQInfo->tableqinfoGroupInfo.numOfTables == 1);
H
Haojun Liao 已提交
5303
  SArray* g = GET_TABLEGROUP(pQInfo, 0);
5304
  STableQueryInfo* item = taosArrayGetP(g, 0);
5305

5306
  // group by normal column, sliding window query, interval query are handled by interval query processor
H
Haojun Liao 已提交
5307
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyNormalCol) {  // interval (down sampling operation)
5308
    tableIntervalProcess(pQInfo, item);
H
Haojun Liao 已提交
5309
  } else if (isFixedOutputQuery(pRuntimeEnv)) {
5310
    tableFixedOutputProcess(pQInfo, item);
5311 5312
  } else {  // diff/add/multiply/subtract/division
    assert(pQuery->checkBuffer == 1);
5313
    tableMultiOutputProcess(pQInfo, item);
5314
  }
5315

5316
  // record the total elapsed time
5317
  pRuntimeEnv->summary.elapsedTime += (taosGetTimestampUs() - st);
5318
  assert(pQInfo->tableqinfoGroupInfo.numOfTables == 1);
5319 5320
}

5321
static void stableQueryImpl(SQInfo *pQInfo) {
H
Haojun Liao 已提交
5322 5323
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *pQuery = pRuntimeEnv->pQuery;
5324
  pQuery->rec.rows = 0;
5325

5326
  int64_t st = taosGetTimestampUs();
5327

H
Haojun Liao 已提交
5328
  if (QUERY_IS_INTERVAL_QUERY(pQuery) ||
5329
      (isFixedOutputQuery(pRuntimeEnv) && (!isPointInterpoQuery(pQuery)) && (!pRuntimeEnv->groupbyNormalCol))) {
H
hjxilinx 已提交
5330
    multiTableQueryProcess(pQInfo);
5331
  } else {
5332
    assert((pQuery->checkBuffer == 1 && pQuery->intervalTime == 0) || isPointInterpoQuery(pQuery) ||
H
Haojun Liao 已提交
5333
            isFirstLastRowQuery(pQuery) || pRuntimeEnv->groupbyNormalCol);
5334

5335
    sequentialTableProcess(pQInfo);
5336
  }
5337

H
hjxilinx 已提交
5338
  // record the total elapsed time
5339
  pQInfo->runtimeEnv.summary.elapsedTime += (taosGetTimestampUs() - st);
H
hjxilinx 已提交
5340 5341
}

5342
static int32_t getColumnIndexInSource(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
5343
  int32_t j = 0;
5344

5345
  if (TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) {
H
Haojun Liao 已提交
5346
    if (pExprMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
5347
      return TSDB_TBNAME_COLUMN_INDEX;
H
Haojun Liao 已提交
5348 5349
    }

5350 5351 5352 5353
    while(j < pQueryMsg->numOfTags) {
      if (pExprMsg->colInfo.colId == pTagCols[j].colId) {
        return j;
      }
5354

5355 5356
      j += 1;
    }
5357

H
Haojun Liao 已提交
5358 5359
  } else if (pExprMsg->colInfo.flag == TSDB_COL_UDC) {  // user specified column data
    return TSDB_UD_COLUMN_INDEX;
5360 5361 5362 5363 5364
  } else {
    while (j < pQueryMsg->numOfCols) {
      if (pExprMsg->colInfo.colId == pQueryMsg->colList[j].colId) {
        return j;
      }
5365

5366
      j += 1;
5367 5368
    }
  }
5369
  assert(0);
5370 5371
}

5372 5373 5374
bool validateExprColumnInfo(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
  int32_t j = getColumnIndexInSource(pQueryMsg, pExprMsg, pTagCols);
  return j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags;
5375 5376
}

5377
static bool validateQueryMsg(SQueryTableMsg *pQueryMsg) {
H
hjxilinx 已提交
5378
  if (pQueryMsg->intervalTime < 0) {
5379
    qError("qmsg:%p illegal value of interval time %" PRId64, pQueryMsg, pQueryMsg->intervalTime);
5380
    return false;
5381 5382
  }

H
hjxilinx 已提交
5383
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
5384
    qError("qmsg:%p illegal value of numOfTables %d", pQueryMsg, pQueryMsg->numOfTables);
5385
    return false;
5386 5387
  }

H
hjxilinx 已提交
5388
  if (pQueryMsg->numOfGroupCols < 0) {
S
slguan 已提交
5389
    qError("qmsg:%p illegal value of numOfGroupbyCols %d", pQueryMsg, pQueryMsg->numOfGroupCols);
5390
    return false;
5391 5392
  }

5393 5394
  if (pQueryMsg->numOfOutput > TSDB_MAX_COLUMNS || pQueryMsg->numOfOutput <= 0) {
    qError("qmsg:%p illegal value of output columns %d", pQueryMsg, pQueryMsg->numOfOutput);
5395
    return false;
5396 5397
  }

5398 5399 5400 5401 5402 5403 5404 5405 5406 5407
  return true;
}

static bool validateQuerySourceCols(SQueryTableMsg *pQueryMsg, SSqlFuncMsg** pExprMsg) {
  int32_t numOfTotal = pQueryMsg->numOfCols + pQueryMsg->numOfTags;
  if (pQueryMsg->numOfCols < 0 || pQueryMsg->numOfTags < 0 || numOfTotal > TSDB_MAX_COLUMNS) {
    qError("qmsg:%p illegal value of numOfCols %d numOfTags:%d", pQueryMsg, pQueryMsg->numOfCols, pQueryMsg->numOfTags);
    return false;
  } else if (numOfTotal == 0) {
    for(int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
H
Haojun Liao 已提交
5408 5409 5410 5411 5412
      SSqlFuncMsg* pFuncMsg = pExprMsg[i];

      if ((pFuncMsg->functionId == TSDB_FUNC_TAGPRJ) ||
          (pFuncMsg->functionId == TSDB_FUNC_TID_TAG && pFuncMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) ||
          (pFuncMsg->functionId == TSDB_FUNC_COUNT && pFuncMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX)) {
5413
        continue;
5414
      }
5415

5416
      return false;
5417 5418
    }
  }
5419

5420
  return true;
5421 5422
}

5423
static char *createTableIdList(SQueryTableMsg *pQueryMsg, char *pMsg, SArray **pTableIdList) {
H
hjxilinx 已提交
5424
  assert(pQueryMsg->numOfTables > 0);
5425

weixin_48148422's avatar
weixin_48148422 已提交
5426
  *pTableIdList = taosArrayInit(pQueryMsg->numOfTables, sizeof(STableIdInfo));
5427

weixin_48148422's avatar
weixin_48148422 已提交
5428 5429
  for (int32_t j = 0; j < pQueryMsg->numOfTables; ++j) {
    STableIdInfo* pTableIdInfo = (STableIdInfo *)pMsg;
5430

5431
    pTableIdInfo->tid = htonl(pTableIdInfo->tid);
H
hjxilinx 已提交
5432 5433
    pTableIdInfo->uid = htobe64(pTableIdInfo->uid);
    pTableIdInfo->key = htobe64(pTableIdInfo->key);
5434

H
hjxilinx 已提交
5435 5436 5437
    taosArrayPush(*pTableIdList, pTableIdInfo);
    pMsg += sizeof(STableIdInfo);
  }
5438

H
hjxilinx 已提交
5439 5440
  return pMsg;
}
5441

5442
/**
H
hjxilinx 已提交
5443
 * pQueryMsg->head has been converted before this function is called.
5444
 *
H
hjxilinx 已提交
5445
 * @param pQueryMsg
5446 5447 5448 5449
 * @param pTableIdList
 * @param pExpr
 * @return
 */
5450
static int32_t convertQueryMsg(SQueryTableMsg *pQueryMsg, SArray **pTableIdList, SSqlFuncMsg ***pExpr,
weixin_48148422's avatar
weixin_48148422 已提交
5451
                               char **tagCond, char** tbnameCond, SColIndex **groupbyCols, SColumnInfo** tagCols) {
5452 5453
  int32_t code = TSDB_CODE_SUCCESS;

5454 5455 5456 5457 5458 5459 5460 5461
  pQueryMsg->numOfTables = htonl(pQueryMsg->numOfTables);

  pQueryMsg->window.skey = htobe64(pQueryMsg->window.skey);
  pQueryMsg->window.ekey = htobe64(pQueryMsg->window.ekey);
  pQueryMsg->intervalTime = htobe64(pQueryMsg->intervalTime);
  pQueryMsg->slidingTime = htobe64(pQueryMsg->slidingTime);
  pQueryMsg->limit = htobe64(pQueryMsg->limit);
  pQueryMsg->offset = htobe64(pQueryMsg->offset);
H
hjxilinx 已提交
5462

5463 5464
  pQueryMsg->order = htons(pQueryMsg->order);
  pQueryMsg->orderColId = htons(pQueryMsg->orderColId);
H
Haojun Liao 已提交
5465
  pQueryMsg->queryType = htonl(pQueryMsg->queryType);
weixin_48148422's avatar
weixin_48148422 已提交
5466
  pQueryMsg->tagNameRelType = htons(pQueryMsg->tagNameRelType);
5467 5468

  pQueryMsg->numOfCols = htons(pQueryMsg->numOfCols);
5469
  pQueryMsg->numOfOutput = htons(pQueryMsg->numOfOutput);
H
hjxilinx 已提交
5470
  pQueryMsg->numOfGroupCols = htons(pQueryMsg->numOfGroupCols);
5471 5472 5473
  pQueryMsg->tagCondLen = htons(pQueryMsg->tagCondLen);
  pQueryMsg->tsOffset = htonl(pQueryMsg->tsOffset);
  pQueryMsg->tsLen = htonl(pQueryMsg->tsLen);
H
hjxilinx 已提交
5474
  pQueryMsg->tsNumOfBlocks = htonl(pQueryMsg->tsNumOfBlocks);
5475
  pQueryMsg->tsOrder = htonl(pQueryMsg->tsOrder);
5476
  pQueryMsg->numOfTags = htonl(pQueryMsg->numOfTags);
5477

5478
  // query msg safety check
5479
  if (!validateQueryMsg(pQueryMsg)) {
5480 5481
    code = TSDB_CODE_QRY_INVALID_MSG;
    goto _cleanup;
5482 5483
  }

H
hjxilinx 已提交
5484 5485
  char *pMsg = (char *)(pQueryMsg->colList) + sizeof(SColumnInfo) * pQueryMsg->numOfCols;
  for (int32_t col = 0; col < pQueryMsg->numOfCols; ++col) {
5486 5487
    SColumnInfo *pColInfo = &pQueryMsg->colList[col];

H
hjxilinx 已提交
5488
    pColInfo->colId = htons(pColInfo->colId);
5489
    pColInfo->type = htons(pColInfo->type);
H
hjxilinx 已提交
5490 5491
    pColInfo->bytes = htons(pColInfo->bytes);
    pColInfo->numOfFilters = htons(pColInfo->numOfFilters);
5492

H
hjxilinx 已提交
5493
    assert(pColInfo->type >= TSDB_DATA_TYPE_BOOL && pColInfo->type <= TSDB_DATA_TYPE_NCHAR);
5494

H
hjxilinx 已提交
5495
    int32_t numOfFilters = pColInfo->numOfFilters;
5496
    if (numOfFilters > 0) {
H
hjxilinx 已提交
5497
      pColInfo->filters = calloc(numOfFilters, sizeof(SColumnFilterInfo));
H
Haojun Liao 已提交
5498 5499 5500 5501
      if (pColInfo->filters == NULL) {
        code = TSDB_CODE_QRY_OUT_OF_MEMORY;
        goto _cleanup;
      }
5502 5503 5504
    }

    for (int32_t f = 0; f < numOfFilters; ++f) {
5505
      SColumnFilterInfo *pFilterMsg = (SColumnFilterInfo *)pMsg;
5506

5507 5508
      SColumnFilterInfo *pColFilter = &pColInfo->filters[f];
      pColFilter->filterstr = htons(pFilterMsg->filterstr);
5509 5510 5511

      pMsg += sizeof(SColumnFilterInfo);

5512 5513
      if (pColFilter->filterstr) {
        pColFilter->len = htobe64(pFilterMsg->len);
5514

5515
        pColFilter->pz = (int64_t)calloc(1, (size_t)(pColFilter->len + 1 * TSDB_NCHAR_SIZE)); // note: null-terminator
H
Haojun Liao 已提交
5516 5517 5518 5519 5520
        if (pColFilter->pz == 0) {
          code = TSDB_CODE_QRY_OUT_OF_MEMORY;
          goto _cleanup;
        }

5521
        memcpy((void *)pColFilter->pz, pMsg, (size_t)pColFilter->len);
5522
        pMsg += (pColFilter->len + 1);
5523
      } else {
5524 5525
        pColFilter->lowerBndi = htobe64(pFilterMsg->lowerBndi);
        pColFilter->upperBndi = htobe64(pFilterMsg->upperBndi);
5526 5527
      }

5528 5529
      pColFilter->lowerRelOptr = htons(pFilterMsg->lowerRelOptr);
      pColFilter->upperRelOptr = htons(pFilterMsg->upperRelOptr);
5530 5531 5532
    }
  }

5533
  *pExpr = calloc(pQueryMsg->numOfOutput, POINTER_BYTES);
H
Haojun Liao 已提交
5534 5535 5536 5537 5538
  if (*pExpr == NULL) {
    code = TSDB_CODE_QRY_OUT_OF_MEMORY;
    goto _cleanup;
  }

5539
  SSqlFuncMsg *pExprMsg = (SSqlFuncMsg *)pMsg;
5540

5541
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5542
    (*pExpr)[i] = pExprMsg;
5543

5544
    pExprMsg->colInfo.colIndex = htons(pExprMsg->colInfo.colIndex);
5545 5546 5547 5548
    pExprMsg->colInfo.colId = htons(pExprMsg->colInfo.colId);
    pExprMsg->colInfo.flag = htons(pExprMsg->colInfo.flag);
    pExprMsg->functionId = htons(pExprMsg->functionId);
    pExprMsg->numOfParams = htons(pExprMsg->numOfParams);
5549

5550
    pMsg += sizeof(SSqlFuncMsg);
5551 5552

    for (int32_t j = 0; j < pExprMsg->numOfParams; ++j) {
5553
      pExprMsg->arg[j].argType = htons(pExprMsg->arg[j].argType);
5554 5555 5556 5557
      pExprMsg->arg[j].argBytes = htons(pExprMsg->arg[j].argBytes);

      if (pExprMsg->arg[j].argType == TSDB_DATA_TYPE_BINARY) {
        pExprMsg->arg[j].argValue.pz = pMsg;
5558
        pMsg += pExprMsg->arg[j].argBytes;  // one more for the string terminated char.
5559 5560 5561 5562 5563
      } else {
        pExprMsg->arg[j].argValue.i64 = htobe64(pExprMsg->arg[j].argValue.i64);
      }
    }

H
Haojun Liao 已提交
5564 5565
    int16_t functionId = pExprMsg->functionId;
    if (functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ || functionId == TSDB_FUNC_TAG_DUMMY) {
5566
      if (pExprMsg->colInfo.flag != TSDB_COL_TAG) {  // ignore the column  index check for arithmetic expression.
5567 5568
        code = TSDB_CODE_QRY_INVALID_MSG;
        goto _cleanup;
5569 5570
      }
    } else {
5571
//      if (!validateExprColumnInfo(pQueryMsg, pExprMsg)) {
5572
//        return TSDB_CODE_QRY_INVALID_MSG;
5573
//      }
5574 5575
    }

5576
    pExprMsg = (SSqlFuncMsg *)pMsg;
5577
  }
5578

5579
  if (!validateQuerySourceCols(pQueryMsg, *pExpr)) {
5580
    code = TSDB_CODE_QRY_INVALID_MSG;
dengyihao's avatar
dengyihao 已提交
5581
    goto _cleanup;
5582
  }
5583

H
hjxilinx 已提交
5584
  pMsg = createTableIdList(pQueryMsg, pMsg, pTableIdList);
5585

H
hjxilinx 已提交
5586
  if (pQueryMsg->numOfGroupCols > 0) {  // group by tag columns
5587
    *groupbyCols = malloc(pQueryMsg->numOfGroupCols * sizeof(SColIndex));
5588 5589 5590 5591
    if (*groupbyCols == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }
5592 5593 5594

    for (int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
      (*groupbyCols)[i].colId = *(int16_t *)pMsg;
5595
      pMsg += sizeof((*groupbyCols)[i].colId);
5596 5597

      (*groupbyCols)[i].colIndex = *(int16_t *)pMsg;
5598 5599
      pMsg += sizeof((*groupbyCols)[i].colIndex);

5600
      (*groupbyCols)[i].flag = *(int16_t *)pMsg;
5601 5602 5603 5604 5605
      pMsg += sizeof((*groupbyCols)[i].flag);

      memcpy((*groupbyCols)[i].name, pMsg, tListLen(groupbyCols[i]->name));
      pMsg += tListLen((*groupbyCols)[i].name);
    }
5606

H
hjxilinx 已提交
5607 5608
    pQueryMsg->orderByIdx = htons(pQueryMsg->orderByIdx);
    pQueryMsg->orderType = htons(pQueryMsg->orderType);
5609 5610
  }

5611 5612
  pQueryMsg->fillType = htons(pQueryMsg->fillType);
  if (pQueryMsg->fillType != TSDB_FILL_NONE) {
5613
    pQueryMsg->fillVal = (uint64_t)(pMsg);
5614 5615

    int64_t *v = (int64_t *)pMsg;
5616
    for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5617 5618
      v[i] = htobe64(v[i]);
    }
5619

5620
    pMsg += sizeof(int64_t) * pQueryMsg->numOfOutput;
5621
  }
5622

5623 5624
  if (pQueryMsg->numOfTags > 0) {
    (*tagCols) = calloc(1, sizeof(SColumnInfo) * pQueryMsg->numOfTags);
H
Haojun Liao 已提交
5625 5626 5627 5628 5629
    if (*tagCols == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }

5630 5631
    for (int32_t i = 0; i < pQueryMsg->numOfTags; ++i) {
      SColumnInfo* pTagCol = (SColumnInfo*) pMsg;
5632

5633 5634 5635 5636
      pTagCol->colId = htons(pTagCol->colId);
      pTagCol->bytes = htons(pTagCol->bytes);
      pTagCol->type  = htons(pTagCol->type);
      pTagCol->numOfFilters = 0;
5637

5638
      (*tagCols)[i] = *pTagCol;
5639
      pMsg += sizeof(SColumnInfo);
5640
    }
H
hjxilinx 已提交
5641
  }
5642

5643 5644 5645
  // the tag query condition expression string is located at the end of query msg
  if (pQueryMsg->tagCondLen > 0) {
    *tagCond = calloc(1, pQueryMsg->tagCondLen);
H
Haojun Liao 已提交
5646 5647 5648 5649 5650 5651

    if (*tagCond == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;

    }
5652 5653 5654
    memcpy(*tagCond, pMsg, pQueryMsg->tagCondLen);
    pMsg += pQueryMsg->tagCondLen;
  }
5655

weixin_48148422's avatar
weixin_48148422 已提交
5656
  if (*pMsg != 0) {
5657
    size_t len = strlen(pMsg) + 1;
5658

5659
    *tbnameCond = malloc(len);
5660 5661 5662 5663 5664
    if (*tbnameCond == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }

weixin_48148422's avatar
weixin_48148422 已提交
5665
    strcpy(*tbnameCond, pMsg);
5666
    pMsg += len;
weixin_48148422's avatar
weixin_48148422 已提交
5667
  }
5668

5669
  qDebug("qmsg:%p query %d tables, type:%d, qrange:%" PRId64 "-%" PRId64 ", numOfGroupbyTagCols:%d, order:%d, "
H
Haojun Liao 已提交
5670 5671
         "outputCols:%d, numOfCols:%d, interval:%" PRId64 ", fillType:%d, comptsLen:%d, compNumOfBlocks:%d, limit:%" PRId64 ", offset:%" PRId64,
         pQueryMsg, pQueryMsg->numOfTables, pQueryMsg->queryType, pQueryMsg->window.skey, pQueryMsg->window.ekey, pQueryMsg->numOfGroupCols,
5672
         pQueryMsg->order, pQueryMsg->numOfOutput, pQueryMsg->numOfCols, pQueryMsg->intervalTime,
H
Haojun Liao 已提交
5673
         pQueryMsg->fillType, pQueryMsg->tsLen, pQueryMsg->tsNumOfBlocks, pQueryMsg->limit, pQueryMsg->offset);
5674 5675

  return TSDB_CODE_SUCCESS;
dengyihao's avatar
dengyihao 已提交
5676 5677

_cleanup:
S
Shengliang Guan 已提交
5678
  taosTFree(*pExpr);
dengyihao's avatar
dengyihao 已提交
5679 5680
  taosArrayDestroy(*pTableIdList);
  *pTableIdList = NULL;
S
Shengliang Guan 已提交
5681 5682 5683 5684
  taosTFree(*tbnameCond);
  taosTFree(*groupbyCols);
  taosTFree(*tagCols);
  taosTFree(*tagCond);
5685 5686

  return code;
5687 5688
}

H
hjxilinx 已提交
5689
static int32_t buildAirthmeticExprFromMsg(SExprInfo *pArithExprInfo, SQueryTableMsg *pQueryMsg) {
5690
  qDebug("qmsg:%p create arithmetic expr from binary string: %s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz);
weixin_48148422's avatar
weixin_48148422 已提交
5691 5692

  tExprNode* pExprNode = NULL;
dengyihao's avatar
dengyihao 已提交
5693
  TRY(TSDB_MAX_TAG_CONDITIONS) {
weixin_48148422's avatar
weixin_48148422 已提交
5694 5695 5696
    pExprNode = exprTreeFromBinary(pArithExprInfo->base.arg[0].argValue.pz, pArithExprInfo->base.arg[0].argBytes);
  } CATCH( code ) {
    CLEANUP_EXECUTE();
dengyihao's avatar
TD-816  
dengyihao 已提交
5697
    qError("qmsg:%p failed to create arithmetic expression string from:%s, reason: %s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz, tstrerror(code));
weixin_48148422's avatar
weixin_48148422 已提交
5698 5699 5700
    return code;
  } END_TRY

H
hjxilinx 已提交
5701
  if (pExprNode == NULL) {
5702
    qError("qmsg:%p failed to create arithmetic expression string from:%s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz);
5703
    return TSDB_CODE_QRY_APP_ERROR;
5704
  }
5705

5706
  pArithExprInfo->pExpr = pExprNode;
5707 5708 5709
  return TSDB_CODE_SUCCESS;
}

H
Haojun Liao 已提交
5710
static int32_t createQFunctionExprFromMsg(SQueryTableMsg *pQueryMsg, SExprInfo **pExprInfo, SSqlFuncMsg **pExprMsg,
5711 5712
    SColumnInfo* pTagCols) {
  *pExprInfo = NULL;
H
hjxilinx 已提交
5713
  int32_t code = TSDB_CODE_SUCCESS;
5714

H
Haojun Liao 已提交
5715
  SExprInfo *pExprs = (SExprInfo *)calloc(pQueryMsg->numOfOutput, sizeof(SExprInfo));
5716
  if (pExprs == NULL) {
5717
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
5718 5719 5720 5721 5722
  }

  bool    isSuperTable = QUERY_IS_STABLE_QUERY(pQueryMsg->queryType);
  int16_t tagLen = 0;

5723
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5724
    pExprs[i].base = *pExprMsg[i];
5725
    pExprs[i].bytes = 0;
5726 5727 5728 5729

    int16_t type = 0;
    int16_t bytes = 0;

5730
    // parse the arithmetic expression
5731
    if (pExprs[i].base.functionId == TSDB_FUNC_ARITHM) {
5732
      code = buildAirthmeticExprFromMsg(&pExprs[i], pQueryMsg);
5733

5734
      if (code != TSDB_CODE_SUCCESS) {
S
Shengliang Guan 已提交
5735
        taosTFree(pExprs);
5736
        return code;
5737 5738
      }

5739
      type  = TSDB_DATA_TYPE_DOUBLE;
5740
      bytes = tDataTypeDesc[type].nSize;
H
Haojun Liao 已提交
5741
    } else if (pExprs[i].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX && pExprs[i].base.functionId == TSDB_FUNC_TAGPRJ) {  // parse the normal column
H
Haojun Liao 已提交
5742
      SSchema s = tGetTableNameColumnSchema();
H
Haojun Liao 已提交
5743
      type = s.type;
H
Haojun Liao 已提交
5744
      bytes = s.bytes;
5745 5746
    } else if (pExprs[i].base.colInfo.colId <= TSDB_UD_COLUMN_INDEX) {
      // it is a user-defined constant value column
H
Haojun Liao 已提交
5747 5748
      assert(pExprs[i].base.functionId == TSDB_FUNC_PRJ);

5749 5750
      type = pExprs[i].base.arg[1].argType;
      bytes = pExprs[i].base.arg[1].argBytes;
H
Haojun Liao 已提交
5751 5752 5753 5754 5755

      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        bytes += VARSTR_HEADER_SIZE;
      }
    } else {
5756
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
dengyihao's avatar
dengyihao 已提交
5757
      assert(j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags);
H
Haojun Liao 已提交
5758

dengyihao's avatar
dengyihao 已提交
5759
      if (pExprs[i].base.colInfo.colId != TSDB_TBNAME_COLUMN_INDEX && j >= 0) {
H
Haojun Liao 已提交
5760 5761 5762 5763
        SColumnInfo* pCol = (TSDB_COL_IS_TAG(pExprs[i].base.colInfo.flag))? &pTagCols[j]:&pQueryMsg->colList[j];
        type = pCol->type;
        bytes = pCol->bytes;
      } else {
H
Haojun Liao 已提交
5764
        SSchema s = tGetTableNameColumnSchema();
H
hjxilinx 已提交
5765

H
Haojun Liao 已提交
5766 5767 5768
        type  = s.type;
        bytes = s.bytes;
      }
5769 5770
    }

S
TD-1057  
Shengliang Guan 已提交
5771
    int32_t param = (int32_t)pExprs[i].base.arg[0].argValue.i64;
5772
    if (getResultDataInfo(type, bytes, pExprs[i].base.functionId, param, &pExprs[i].type, &pExprs[i].bytes,
5773
                          &pExprs[i].interBytes, 0, isSuperTable) != TSDB_CODE_SUCCESS) {
S
Shengliang Guan 已提交
5774
      taosTFree(pExprs);
5775
      return TSDB_CODE_QRY_INVALID_MSG;
5776 5777
    }

5778
    if (pExprs[i].base.functionId == TSDB_FUNC_TAG_DUMMY || pExprs[i].base.functionId == TSDB_FUNC_TS_DUMMY) {
5779
      tagLen += pExprs[i].bytes;
5780
    }
5781
    assert(isValidDataType(pExprs[i].type));
5782 5783 5784
  }

  // TODO refactor
5785
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5786 5787
    pExprs[i].base = *pExprMsg[i];
    int16_t functId = pExprs[i].base.functionId;
5788

5789
    if (functId == TSDB_FUNC_TOP || functId == TSDB_FUNC_BOTTOM) {
5790
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
5791 5792 5793 5794 5795 5796 5797 5798 5799
      if (j < 0 || j >= pQueryMsg->numOfCols) {
        assert(0);
      } else {
        SColumnInfo *pCol = &pQueryMsg->colList[j];
        int32_t ret =
            getResultDataInfo(pCol->type, pCol->bytes, functId, (int32_t)pExprs[i].base.arg[0].argValue.i64,
                              &pExprs[i].type, &pExprs[i].bytes, &pExprs[i].interBytes, tagLen, isSuperTable);
        assert(ret == TSDB_CODE_SUCCESS);
      }
5800 5801 5802
    }
  }

5803
  *pExprInfo = pExprs;
5804 5805 5806
  return TSDB_CODE_SUCCESS;
}

5807
static SSqlGroupbyExpr *createGroupbyExprFromMsg(SQueryTableMsg *pQueryMsg, SColIndex *pColIndex, int32_t *code) {
5808 5809 5810 5811 5812
  if (pQueryMsg->numOfGroupCols == 0) {
    return NULL;
  }

  // using group by tag columns
5813
  SSqlGroupbyExpr *pGroupbyExpr = (SSqlGroupbyExpr *)calloc(1, sizeof(SSqlGroupbyExpr));
5814
  if (pGroupbyExpr == NULL) {
5815
    *code = TSDB_CODE_QRY_OUT_OF_MEMORY;
5816 5817 5818 5819 5820 5821 5822
    return NULL;
  }

  pGroupbyExpr->numOfGroupCols = pQueryMsg->numOfGroupCols;
  pGroupbyExpr->orderType = pQueryMsg->orderType;
  pGroupbyExpr->orderIndex = pQueryMsg->orderByIdx;

5823 5824 5825 5826
  pGroupbyExpr->columnInfo = taosArrayInit(pQueryMsg->numOfGroupCols, sizeof(SColIndex));
  for(int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
    taosArrayPush(pGroupbyExpr->columnInfo, &pColIndex[i]);
  }
5827

5828 5829 5830
  return pGroupbyExpr;
}

5831
static int32_t createFilterInfo(void *pQInfo, SQuery *pQuery) {
5832
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
5833
    if (pQuery->colList[i].numOfFilters > 0) {
5834 5835 5836 5837 5838 5839 5840 5841 5842
      pQuery->numOfFilterCols++;
    }
  }

  if (pQuery->numOfFilterCols == 0) {
    return TSDB_CODE_SUCCESS;
  }

  pQuery->pFilterInfo = calloc(1, sizeof(SSingleColumnFilterInfo) * pQuery->numOfFilterCols);
H
Haojun Liao 已提交
5843 5844 5845
  if (pQuery->pFilterInfo == NULL) {
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
  }
5846 5847

  for (int32_t i = 0, j = 0; i < pQuery->numOfCols; ++i) {
5848
    if (pQuery->colList[i].numOfFilters > 0) {
5849 5850
      SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[j];

B
Bomin Zhang 已提交
5851
      memcpy(&pFilterInfo->info, &pQuery->colList[i], sizeof(SColumnInfo));
5852
      pFilterInfo->info = pQuery->colList[i];
5853

5854
      pFilterInfo->numOfFilters = pQuery->colList[i].numOfFilters;
5855
      pFilterInfo->pFilters = calloc(pFilterInfo->numOfFilters, sizeof(SColumnFilterElem));
H
Haojun Liao 已提交
5856 5857 5858
      if (pFilterInfo->pFilters == NULL) {
        return TSDB_CODE_QRY_OUT_OF_MEMORY;
      }
5859 5860 5861

      for (int32_t f = 0; f < pFilterInfo->numOfFilters; ++f) {
        SColumnFilterElem *pSingleColFilter = &pFilterInfo->pFilters[f];
5862
        pSingleColFilter->filterInfo = pQuery->colList[i].filters[f];
5863 5864 5865 5866 5867

        int32_t lower = pSingleColFilter->filterInfo.lowerRelOptr;
        int32_t upper = pSingleColFilter->filterInfo.upperRelOptr;

        if (lower == TSDB_RELATION_INVALID && upper == TSDB_RELATION_INVALID) {
S
slguan 已提交
5868
          qError("QInfo:%p invalid filter info", pQInfo);
5869
          return TSDB_CODE_QRY_INVALID_MSG;
5870 5871
        }

5872 5873
        int16_t type  = pQuery->colList[i].type;
        int16_t bytes = pQuery->colList[i].bytes;
5874

5875 5876 5877
        // todo refactor
        __filter_func_t *rangeFilterArray = getRangeFilterFuncArray(type);
        __filter_func_t *filterArray = getValueFilterFuncArray(type);
5878 5879

        if (rangeFilterArray == NULL && filterArray == NULL) {
S
slguan 已提交
5880
          qError("QInfo:%p failed to get filter function, invalid data type:%d", pQInfo, type);
5881
          return TSDB_CODE_QRY_INVALID_MSG;
5882 5883
        }

5884
        if ((lower == TSDB_RELATION_GREATER_EQUAL || lower == TSDB_RELATION_GREATER) &&
5885
            (upper == TSDB_RELATION_LESS_EQUAL || upper == TSDB_RELATION_LESS)) {
dengyihao's avatar
dengyihao 已提交
5886
          assert(rangeFilterArray != NULL);
5887
          if (lower == TSDB_RELATION_GREATER_EQUAL) {
5888 5889 5890 5891 5892 5893 5894 5895 5896 5897 5898 5899 5900
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[4];
            } else {
              pSingleColFilter->fp = rangeFilterArray[2];
            }
          } else {
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[3];
            } else {
              pSingleColFilter->fp = rangeFilterArray[1];
            }
          }
        } else {  // set callback filter function
dengyihao's avatar
dengyihao 已提交
5901
          assert(filterArray != NULL);
5902 5903 5904 5905
          if (lower != TSDB_RELATION_INVALID) {
            pSingleColFilter->fp = filterArray[lower];

            if (upper != TSDB_RELATION_INVALID) {
dengyihao's avatar
dengyihao 已提交
5906
              qError("pQInfo:%p failed to get filter function, invalid filter condition: %d", pQInfo, type);
5907
              return TSDB_CODE_QRY_INVALID_MSG;
5908 5909 5910 5911 5912 5913 5914 5915 5916 5917 5918 5919 5920 5921 5922 5923
            }
          } else {
            pSingleColFilter->fp = filterArray[upper];
          }
        }
        assert(pSingleColFilter->fp != NULL);
        pSingleColFilter->bytes = bytes;
      }

      j++;
    }
  }

  return TSDB_CODE_SUCCESS;
}

5924
static void doUpdateExprColumnIndex(SQuery *pQuery) {
5925
  assert(pQuery->pSelectExpr != NULL && pQuery != NULL);
5926

5927
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
5928
    SSqlFuncMsg *pSqlExprMsg = &pQuery->pSelectExpr[k].base;
5929
    if (pSqlExprMsg->functionId == TSDB_FUNC_ARITHM) {
5930 5931
      continue;
    }
5932

5933
    // todo opt performance
H
Haojun Liao 已提交
5934
    SColIndex *pColIndex = &pSqlExprMsg->colInfo;
H
Haojun Liao 已提交
5935
    if (TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
5936 5937
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfCols; ++f) {
H
Haojun Liao 已提交
5938 5939
        if (pColIndex->colId == pQuery->colList[f].colId) {
          pColIndex->colIndex = f;
5940 5941 5942
          break;
        }
      }
H
Haojun Liao 已提交
5943 5944

      assert(f < pQuery->numOfCols);
5945 5946
    } else if (pColIndex->colId <= TSDB_UD_COLUMN_INDEX) {
      // do nothing for user-defined constant value result columns
5947
    } else {
5948 5949
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfTags; ++f) {
H
Haojun Liao 已提交
5950 5951
        if (pColIndex->colId == pQuery->tagColList[f].colId) {
          pColIndex->colIndex = f;
5952 5953
          break;
        }
5954
      }
5955

5956
      assert(f < pQuery->numOfTags || pColIndex->colId == TSDB_TBNAME_COLUMN_INDEX);
5957 5958 5959 5960
    }
  }
}

5961
static int compareTableIdInfo(const void* a, const void* b) {
weixin_48148422's avatar
weixin_48148422 已提交
5962 5963 5964 5965 5966 5967 5968
  const STableIdInfo* x = (const STableIdInfo*)a;
  const STableIdInfo* y = (const STableIdInfo*)b;
  if (x->uid > y->uid) return 1;
  if (x->uid < y->uid) return -1;
  return 0;
}

dengyihao's avatar
dengyihao 已提交
5969 5970
static void freeQInfo(SQInfo *pQInfo);

H
Haojun Liao 已提交
5971 5972 5973
static void calResultBufSize(SQuery* pQuery) {
  const int32_t RESULT_MSG_MIN_SIZE  = 1024 * (1024 + 512);  // bytes
  const int32_t RESULT_MSG_MIN_ROWS  = 8192;
S
TD-1057  
Shengliang Guan 已提交
5974
  const float RESULT_THRESHOLD_RATIO = 0.85f;
H
Haojun Liao 已提交
5975

5976 5977 5978 5979 5980
  if (isProjQuery(pQuery)) {
    int32_t numOfRes = RESULT_MSG_MIN_SIZE / pQuery->rowSize;
    if (numOfRes < RESULT_MSG_MIN_ROWS) {
      numOfRes = RESULT_MSG_MIN_ROWS;
    }
H
Haojun Liao 已提交
5981

5982
    pQuery->rec.capacity  = numOfRes;
S
TD-1057  
Shengliang Guan 已提交
5983
    pQuery->rec.threshold = (int32_t)(numOfRes * RESULT_THRESHOLD_RATIO);
5984 5985
  } else {  // in case of non-prj query, a smaller output buffer will be used.
    pQuery->rec.capacity = 4096;
S
TD-1057  
Shengliang Guan 已提交
5986
    pQuery->rec.threshold = (int32_t)(pQuery->rec.capacity * RESULT_THRESHOLD_RATIO);
5987
  }
H
Haojun Liao 已提交
5988 5989
}

weixin_48148422's avatar
weixin_48148422 已提交
5990
static SQInfo *createQInfoImpl(SQueryTableMsg *pQueryMsg, SArray* pTableIdList, SSqlGroupbyExpr *pGroupbyExpr, SExprInfo *pExprs,
5991
                               STableGroupInfo *pTableGroupInfo, SColumnInfo* pTagCols) {
B
Bomin Zhang 已提交
5992 5993 5994
  int16_t numOfCols = pQueryMsg->numOfCols;
  int16_t numOfOutput = pQueryMsg->numOfOutput;

5995 5996
  SQInfo *pQInfo = (SQInfo *)calloc(1, sizeof(SQInfo));
  if (pQInfo == NULL) {
B
Bomin Zhang 已提交
5997
    goto _cleanup_qinfo;
5998
  }
5999

B
Bomin Zhang 已提交
6000 6001 6002
  // to make sure third party won't overwrite this structure
  pQInfo->signature = pQInfo;
  pQInfo->tableGroupInfo = *pTableGroupInfo;
6003 6004

  SQuery *pQuery = calloc(1, sizeof(SQuery));
B
Bomin Zhang 已提交
6005 6006 6007
  if (pQuery == NULL) {
    goto _cleanup_query;
  }
H
Haojun Liao 已提交
6008

6009 6010
  pQInfo->runtimeEnv.pQuery = pQuery;

6011
  pQuery->numOfCols       = numOfCols;
H
hjxilinx 已提交
6012
  pQuery->numOfOutput     = numOfOutput;
6013 6014 6015
  pQuery->limit.limit     = pQueryMsg->limit;
  pQuery->limit.offset    = pQueryMsg->offset;
  pQuery->order.order     = pQueryMsg->order;
6016
  pQuery->order.orderColId = pQueryMsg->orderColId;
6017 6018 6019 6020
  pQuery->pSelectExpr     = pExprs;
  pQuery->pGroupbyExpr    = pGroupbyExpr;
  pQuery->intervalTime    = pQueryMsg->intervalTime;
  pQuery->slidingTime     = pQueryMsg->slidingTime;
6021
  pQuery->slidingTimeUnit = pQueryMsg->slidingTimeUnit;
6022
  pQuery->fillType        = pQueryMsg->fillType;
6023
  pQuery->numOfTags       = pQueryMsg->numOfTags;
B
Bomin Zhang 已提交
6024
  pQuery->tagColList      = pTagCols;
H
Haojun Liao 已提交
6025

6026
  pQuery->colList = calloc(numOfCols, sizeof(SSingleColumnFilterInfo));
6027
  if (pQuery->colList == NULL) {
6028
    goto _cleanup;
6029
  }
6030

H
hjxilinx 已提交
6031
  for (int16_t i = 0; i < numOfCols; ++i) {
6032
    pQuery->colList[i] = pQueryMsg->colList[i];
6033
    pQuery->colList[i].filters = tscFilterInfoClone(pQueryMsg->colList[i].filters, pQuery->colList[i].numOfFilters);
H
hjxilinx 已提交
6034
  }
6035

6036
  // calculate the result row size
6037 6038 6039
  for (int16_t col = 0; col < numOfOutput; ++col) {
    assert(pExprs[col].bytes > 0);
    pQuery->rowSize += pExprs[col].bytes;
6040
  }
6041

6042
  doUpdateExprColumnIndex(pQuery);
6043

6044
  int32_t ret = createFilterInfo(pQInfo, pQuery);
6045
  if (ret != TSDB_CODE_SUCCESS) {
6046
    goto _cleanup;
6047 6048 6049
  }

  // prepare the result buffer
6050
  pQuery->sdata = (tFilePage **)calloc(pQuery->numOfOutput, POINTER_BYTES);
6051
  if (pQuery->sdata == NULL) {
6052
    goto _cleanup;
6053 6054
  }

H
Haojun Liao 已提交
6055
  calResultBufSize(pQuery);
6056

6057
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
6058
    assert(pExprs[col].interBytes >= pExprs[col].bytes);
6059 6060

    // allocate additional memory for interResults that are usually larger then final results
6061
    size_t size = (size_t)((pQuery->rec.capacity + 1) * pExprs[col].bytes + pExprs[col].interBytes + sizeof(tFilePage));
6062
    pQuery->sdata[col] = (tFilePage *)calloc(1, size);
6063
    if (pQuery->sdata[col] == NULL) {
6064
      goto _cleanup;
6065 6066 6067
    }
  }

6068
  if (pQuery->fillType != TSDB_FILL_NONE) {
6069 6070
    pQuery->fillVal = malloc(sizeof(int64_t) * pQuery->numOfOutput);
    if (pQuery->fillVal == NULL) {
6071
      goto _cleanup;
6072 6073 6074
    }

    // the first column is the timestamp
6075
    memcpy(pQuery->fillVal, (char *)pQueryMsg->fillVal, pQuery->numOfOutput * sizeof(int64_t));
6076 6077
  }

dengyihao's avatar
dengyihao 已提交
6078 6079 6080 6081 6082 6083
  size_t numOfGroups = 0;
  if (pTableGroupInfo->pGroupList != NULL) {
    numOfGroups = taosArrayGetSize(pTableGroupInfo->pGroupList);

    pQInfo->tableqinfoGroupInfo.pGroupList = taosArrayInit(numOfGroups, POINTER_BYTES);
    pQInfo->tableqinfoGroupInfo.numOfTables = pTableGroupInfo->numOfTables;
H
Haojun Liao 已提交
6084
    pQInfo->tableqinfoGroupInfo.map = taosHashInit(pTableGroupInfo->numOfTables,
H
Haojun Liao 已提交
6085
                                                   taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, false);
H
Haojun Liao 已提交
6086
  }
6087

weixin_48148422's avatar
weixin_48148422 已提交
6088 6089
  int tableIndex = 0;
  STimeWindow window = pQueryMsg->window;
6090
  taosArraySort(pTableIdList, compareTableIdInfo);
6091

H
Haojun Liao 已提交
6092
  pQInfo->runtimeEnv.interBufSize = getOutputInterResultBufSize(pQuery);
H
Haojun Liao 已提交
6093
  pQInfo->pBuf = calloc(pTableGroupInfo->numOfTables, sizeof(STableQueryInfo));
H
Haojun Liao 已提交
6094 6095 6096 6097
  if (pQInfo->pBuf == NULL) {
    goto _cleanup;
  }

H
Haojun Liao 已提交
6098 6099
  int32_t index = 0;

H
hjxilinx 已提交
6100
  for(int32_t i = 0; i < numOfGroups; ++i) {
6101
    SArray* pa = taosArrayGetP(pTableGroupInfo->pGroupList, i);
6102

H
Haojun Liao 已提交
6103
    size_t s = taosArrayGetSize(pa);
6104
    SArray* p1 = taosArrayInit(s, POINTER_BYTES);
B
Bomin Zhang 已提交
6105 6106 6107
    if (p1 == NULL) {
      goto _cleanup;
    }
H
Haojun Liao 已提交
6108

Y
yihaoDeng 已提交
6109
    taosArrayPush(pQInfo->tableqinfoGroupInfo.pGroupList, &p1);
6110

H
hjxilinx 已提交
6111
    for(int32_t j = 0; j < s; ++j) {
H
Haojun Liao 已提交
6112
      STableKeyInfo* info = taosArrayGet(pa, j);
6113

H
Haojun Liao 已提交
6114
      STableId* id = TSDB_TABLEID(info->pTable);
H
Haojun Liao 已提交
6115
      STableIdInfo* pTableId = taosArraySearch(pTableIdList, id, compareTableIdInfo);
6116

H
Haojun Liao 已提交
6117
      window.skey = (pTableId != NULL)? pTableId->key:pQueryMsg->window.skey;
S
TD-1057  
Shengliang Guan 已提交
6118
      void* buf = (char*)pQInfo->pBuf + index * sizeof(STableQueryInfo);
H
Haojun Liao 已提交
6119

H
Haojun Liao 已提交
6120
      STableQueryInfo* item = createTableQueryInfo(&pQInfo->runtimeEnv, info->pTable, window, buf);
B
Bomin Zhang 已提交
6121 6122 6123
      if (item == NULL) {
        goto _cleanup;
      }
H
Haojun Liao 已提交
6124

6125
      item->groupIndex = i;
H
hjxilinx 已提交
6126
      taosArrayPush(p1, &item);
H
Haojun Liao 已提交
6127 6128
      taosHashPut(pQInfo->tableqinfoGroupInfo.map, &id->tid, sizeof(id->tid), &item, POINTER_BYTES);
      index += 1;
H
hjxilinx 已提交
6129 6130
    }
  }
6131

weixin_48148422's avatar
weixin_48148422 已提交
6132
  pQInfo->arrTableIdInfo = taosArrayInit(tableIndex, sizeof(STableIdInfo));
6133 6134
  pQInfo->dataReady = QUERY_RESULT_NOT_READY;
  pthread_mutex_init(&pQInfo->lock, NULL);
weixin_48148422's avatar
weixin_48148422 已提交
6135

6136
  pQuery->pos = -1;
6137
  pQuery->window = pQueryMsg->window;
6138
  colIdCheck(pQuery);
6139

6140
  qDebug("qmsg:%p QInfo:%p created", pQueryMsg, pQInfo);
6141 6142
  return pQInfo;

B
Bomin Zhang 已提交
6143
_cleanup_qinfo:
H
Haojun Liao 已提交
6144
  tsdbDestroyTableGroup(pTableGroupInfo);
B
Bomin Zhang 已提交
6145 6146

_cleanup_query:
6147 6148 6149 6150
  if (pGroupbyExpr != NULL) {
    taosArrayDestroy(pGroupbyExpr->columnInfo);
    free(pGroupbyExpr);
  }
H
Haojun Liao 已提交
6151

S
Shengliang Guan 已提交
6152
  taosTFree(pTagCols);
B
Bomin Zhang 已提交
6153 6154 6155 6156 6157 6158
  for (int32_t i = 0; i < numOfOutput; ++i) {
    SExprInfo* pExprInfo = &pExprs[i];
    if (pExprInfo->pExpr != NULL) {
      tExprTreeDestroy(&pExprInfo->pExpr, NULL);
    }
  }
H
Haojun Liao 已提交
6159

S
Shengliang Guan 已提交
6160
  taosTFree(pExprs);
B
Bomin Zhang 已提交
6161

6162
_cleanup:
dengyihao's avatar
dengyihao 已提交
6163
  freeQInfo(pQInfo);
6164 6165 6166
  return NULL;
}

H
hjxilinx 已提交
6167
static bool isValidQInfo(void *param) {
H
hjxilinx 已提交
6168 6169 6170 6171
  SQInfo *pQInfo = (SQInfo *)param;
  if (pQInfo == NULL) {
    return false;
  }
6172

H
hjxilinx 已提交
6173 6174 6175 6176
  /*
   * pQInfo->signature may be changed by another thread, so we assign value of signature
   * into local variable, then compare by using local variable
   */
6177
  uint64_t sig = (uint64_t)pQInfo->signature;
H
hjxilinx 已提交
6178 6179 6180
  return (sig == (uint64_t)pQInfo);
}

6181
static int32_t initQInfo(SQueryTableMsg *pQueryMsg, void *tsdb, int32_t vgId, SQInfo *pQInfo, bool isSTable) {
H
hjxilinx 已提交
6182
  int32_t code = TSDB_CODE_SUCCESS;
6183
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
6184

H
hjxilinx 已提交
6185 6186
  STSBuf *pTSBuf = NULL;
  if (pQueryMsg->tsLen > 0) {  // open new file to save the result
H
Haojun Liao 已提交
6187
    char *tsBlock = (char *) pQueryMsg + pQueryMsg->tsOffset;
H
hjxilinx 已提交
6188
    pTSBuf = tsBufCreateFromCompBlocks(tsBlock, pQueryMsg->tsNumOfBlocks, pQueryMsg->tsLen, pQueryMsg->tsOrder);
6189

H
hjxilinx 已提交
6190
    tsBufResetPos(pTSBuf);
dengyihao's avatar
dengyihao 已提交
6191 6192
    bool ret = tsBufNextPos(pTSBuf);
    UNUSED(ret);
H
hjxilinx 已提交
6193
  }
6194

6195 6196
  if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey > pQuery->window.ekey)) ||
      (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey > pQuery->window.skey))) {
6197
    qDebug("QInfo:%p no result in time range %" PRId64 "-%" PRId64 ", order %d", pQInfo, pQuery->window.skey,
6198
           pQuery->window.ekey, pQuery->order.order);
6199
    setQueryStatus(pQuery, QUERY_COMPLETED);
B
Bomin Zhang 已提交
6200
    pQInfo->tableqinfoGroupInfo.numOfTables = 0;
6201 6202
    return TSDB_CODE_SUCCESS;
  }
6203

6204
  if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
6205
    qDebug("QInfo:%p no table qualified for tag filter, abort query", pQInfo);
6206 6207 6208
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return TSDB_CODE_SUCCESS;
  }
H
hjxilinx 已提交
6209 6210

  // filter the qualified
6211
  if ((code = doInitQInfo(pQInfo, pTSBuf, tsdb, vgId, isSTable)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6212 6213
    goto _error;
  }
6214

H
hjxilinx 已提交
6215 6216 6217 6218
  return code;

_error:
  // table query ref will be decrease during error handling
6219
  freeQInfo(pQInfo);
H
hjxilinx 已提交
6220 6221 6222
  return code;
}

B
Bomin Zhang 已提交
6223 6224 6225 6226 6227 6228 6229 6230 6231 6232 6233 6234
static void freeColumnFilterInfo(SColumnFilterInfo* pFilter, int32_t numOfFilters) {
    if (pFilter == NULL) {
      return;
    }
    for (int32_t i = 0; i < numOfFilters; i++) {
      if (pFilter[i].filterstr) {
        free((void*)(pFilter[i].pz));
      }
    }
    free(pFilter);
}

H
hjxilinx 已提交
6235 6236 6237 6238
static void freeQInfo(SQInfo *pQInfo) {
  if (!isValidQInfo(pQInfo)) {
    return;
  }
6239

6240
  qDebug("QInfo:%p start to free QInfo", pQInfo);
6241

6242
  teardownQueryRuntimeEnv(&pQInfo->runtimeEnv);
6243

H
Haojun Liao 已提交
6244 6245 6246 6247 6248 6249 6250
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  if (pQuery != NULL) {
    if (pQuery->sdata != NULL) {
      for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
        taosTFree(pQuery->sdata[col]);
      }
      taosTFree(pQuery->sdata);
H
hjxilinx 已提交
6251
    }
6252

H
Haojun Liao 已提交
6253 6254 6255
    if (pQuery->fillVal != NULL) {
      taosTFree(pQuery->fillVal);
    }
6256

H
Haojun Liao 已提交
6257 6258 6259 6260
    for (int32_t i = 0; i < pQuery->numOfFilterCols; ++i) {
      SSingleColumnFilterInfo *pColFilter = &pQuery->pFilterInfo[i];
      if (pColFilter->numOfFilters > 0) {
        taosTFree(pColFilter->pFilters);
H
hjxilinx 已提交
6261
      }
H
hjxilinx 已提交
6262
    }
6263

H
Haojun Liao 已提交
6264 6265 6266 6267 6268 6269 6270 6271 6272 6273 6274 6275 6276 6277 6278 6279
    if (pQuery->pSelectExpr != NULL) {
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        SExprInfo *pExprInfo = &pQuery->pSelectExpr[i];

        if (pExprInfo->pExpr != NULL) {
          tExprTreeDestroy(&pExprInfo->pExpr, NULL);
        }
      }

      taosTFree(pQuery->pSelectExpr);
    }

    if (pQuery->pGroupbyExpr != NULL) {
      taosArrayDestroy(pQuery->pGroupbyExpr->columnInfo);
      taosTFree(pQuery->pGroupbyExpr);
    }
6280

H
Haojun Liao 已提交
6281 6282 6283 6284 6285 6286 6287 6288 6289 6290 6291 6292
    taosTFree(pQuery->tagColList);
    taosTFree(pQuery->pFilterInfo);

    if (pQuery->colList != NULL) {
      for (int32_t i = 0; i < pQuery->numOfCols; i++) {
        SColumnInfo *column = pQuery->colList + i;
        freeColumnFilterInfo(column->filters, column->numOfFilters);
      }
      taosTFree(pQuery->colList);
    }

    taosTFree(pQuery);
H
hjxilinx 已提交
6293
  }
6294

6295
  // todo refactor, extract method to destroytableDataInfo
B
Bomin Zhang 已提交
6296
  if (pQInfo->tableqinfoGroupInfo.pGroupList != NULL) {
S
TD-1057  
Shengliang Guan 已提交
6297
    int32_t numOfGroups = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
B
Bomin Zhang 已提交
6298 6299 6300 6301 6302 6303
    for (int32_t i = 0; i < numOfGroups; ++i) {
      SArray *p = GET_TABLEGROUP(pQInfo, i);

      size_t num = taosArrayGetSize(p);
      for(int32_t j = 0; j < num; ++j) {
        STableQueryInfo* item = taosArrayGetP(p, j);
H
Haojun Liao 已提交
6304
        destroyTableQueryInfo(item);
6305
      }
6306

B
Bomin Zhang 已提交
6307 6308
      taosArrayDestroy(p);
    }
H
hjxilinx 已提交
6309
  }
6310

S
Shengliang Guan 已提交
6311
  taosTFree(pQInfo->pBuf);
6312
  taosArrayDestroy(pQInfo->tableqinfoGroupInfo.pGroupList);
H
Haojun Liao 已提交
6313
  taosHashCleanup(pQInfo->tableqinfoGroupInfo.map);
H
Haojun Liao 已提交
6314
  tsdbDestroyTableGroup(&pQInfo->tableGroupInfo);
weixin_48148422's avatar
weixin_48148422 已提交
6315
  taosArrayDestroy(pQInfo->arrTableIdInfo);
6316

6317

6318
  pQInfo->signature = 0;
6319

6320
  qDebug("QInfo:%p QInfo is freed", pQInfo);
6321

S
Shengliang Guan 已提交
6322
  taosTFree(pQInfo);
H
hjxilinx 已提交
6323 6324
}

H
hjxilinx 已提交
6325
static size_t getResultSize(SQInfo *pQInfo, int64_t *numOfRows) {
6326 6327
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
hjxilinx 已提交
6328 6329 6330 6331 6332 6333 6334 6335 6336 6337 6338
  /*
   * get the file size and set the numOfRows to be the file size, since for tsComp query,
   * the returned row size is equalled to 1
   * TODO handle the case that the file is too large to send back one time
   */
  if (isTSCompQuery(pQuery) && (*numOfRows) > 0) {
    struct stat fstat;
    if (stat(pQuery->sdata[0]->data, &fstat) == 0) {
      *numOfRows = fstat.st_size;
      return fstat.st_size;
    } else {
S
slguan 已提交
6339
      qError("QInfo:%p failed to get file info, path:%s, reason:%s", pQInfo, pQuery->sdata[0]->data, strerror(errno));
H
hjxilinx 已提交
6340 6341 6342
      return 0;
    }
  } else {
6343
    return (size_t)(pQuery->rowSize * (*numOfRows));
6344
  }
H
hjxilinx 已提交
6345
}
6346

H
hjxilinx 已提交
6347 6348 6349
static int32_t doDumpQueryResult(SQInfo *pQInfo, char *data) {
  // the remained number of retrieved rows, not the interpolated result
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
6350

H
hjxilinx 已提交
6351 6352 6353
  // load data from file to msg buffer
  if (isTSCompQuery(pQuery)) {
    int32_t fd = open(pQuery->sdata[0]->data, O_RDONLY, 0666);
6354

H
hjxilinx 已提交
6355 6356
    // make sure file exist
    if (FD_VALID(fd)) {
6357 6358 6359
      uint64_t s = lseek(fd, 0, SEEK_END);

      qDebug("QInfo:%p ts comp data return, file:%s, size:%"PRId64, pQInfo, pQuery->sdata[0]->data, s);
H
Haojun Liao 已提交
6360
      if (lseek(fd, 0, SEEK_SET) >= 0) {
dengyihao's avatar
dengyihao 已提交
6361
        size_t sz = read(fd, data, s);
6362 6363 6364
        if(sz < s) {  // todo handle error
          assert(0);
        }
H
Haojun Liao 已提交
6365
      } else {
6366
        UNUSED(s);
dengyihao's avatar
dengyihao 已提交
6367
      }
H
Haojun Liao 已提交
6368

H
hjxilinx 已提交
6369 6370 6371
      close(fd);
      unlink(pQuery->sdata[0]->data);
    } else {
dengyihao's avatar
dengyihao 已提交
6372
      // todo return the error code to client and handle invalid fd
S
slguan 已提交
6373
      qError("QInfo:%p failed to open tmp file to send ts-comp data to client, path:%s, reason:%s", pQInfo,
H
hjxilinx 已提交
6374
             pQuery->sdata[0]->data, strerror(errno));
dengyihao's avatar
dengyihao 已提交
6375
      if (fd != -1) {
6376
        close(fd);
dengyihao's avatar
dengyihao 已提交
6377
      }
H
hjxilinx 已提交
6378
    }
6379

H
hjxilinx 已提交
6380 6381 6382 6383
    // all data returned, set query over
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
      setQueryStatus(pQuery, QUERY_OVER);
    }
H
hjxilinx 已提交
6384
  } else {
S
TD-1057  
Shengliang Guan 已提交
6385
    doCopyQueryResultToMsg(pQInfo, (int32_t)pQuery->rec.rows, data);
6386
  }
6387

6388
  pQuery->rec.total += pQuery->rec.rows;
6389
  qDebug("QInfo:%p current numOfRes rows:%" PRId64 ", total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total);
6390

6391
  if (pQuery->limit.limit > 0 && pQuery->limit.limit == pQuery->rec.total) {
6392
    qDebug("QInfo:%p results limitation reached, limitation:%"PRId64, pQInfo, pQuery->limit.limit);
6393 6394
    setQueryStatus(pQuery, QUERY_OVER);
  }
6395

H
hjxilinx 已提交
6396
  return TSDB_CODE_SUCCESS;
6397 6398
}

6399 6400 6401 6402 6403 6404 6405
typedef struct SQueryMgmt {
  SCacheObj      *qinfoPool;      // query handle pool
  int32_t         vgId;
  bool            closed;
  pthread_mutex_t lock;
} SQueryMgmt;

6406
int32_t qCreateQueryInfo(void* tsdb, int32_t vgId, SQueryTableMsg* pQueryMsg, qinfo_t* pQInfo) {
6407
  assert(pQueryMsg != NULL && tsdb != NULL);
6408 6409

  int32_t code = TSDB_CODE_SUCCESS;
6410

6411 6412 6413 6414 6415 6416 6417 6418
  char            *tagCond  = NULL;
  char            *tbnameCond = NULL;
  SArray          *pTableIdList = NULL;
  SSqlFuncMsg    **pExprMsg = NULL;
  SExprInfo       *pExprs   = NULL;
  SColIndex       *pGroupColIndex = NULL;
  SColumnInfo     *pTagColumnInfo = NULL;
  SSqlGroupbyExpr *pGroupbyExpr   = NULL;
6419

6420 6421
  code = convertQueryMsg(pQueryMsg, &pTableIdList, &pExprMsg, &tagCond, &tbnameCond, &pGroupColIndex, &pTagColumnInfo);
  if (code != TSDB_CODE_SUCCESS) {
B
Bomin Zhang 已提交
6422
    goto _over;
6423 6424
  }

H
hjxilinx 已提交
6425
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
6426
    qError("Invalid number of tables to query, numOfTables:%d", pQueryMsg->numOfTables);
6427
    code = TSDB_CODE_QRY_INVALID_MSG;
H
hjxilinx 已提交
6428
    goto _over;
6429 6430
  }

H
hjxilinx 已提交
6431
  if (pTableIdList == NULL || taosArrayGetSize(pTableIdList) == 0) {
S
slguan 已提交
6432
    qError("qmsg:%p, SQueryTableMsg wrong format", pQueryMsg);
6433
    code = TSDB_CODE_QRY_INVALID_MSG;
H
hjxilinx 已提交
6434
    goto _over;
6435 6436
  }

H
Haojun Liao 已提交
6437
  if ((code = createQFunctionExprFromMsg(pQueryMsg, &pExprs, pExprMsg, pTagColumnInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6438
    goto _over;
6439 6440
  }

dengyihao's avatar
dengyihao 已提交
6441
  pGroupbyExpr = createGroupbyExprFromMsg(pQueryMsg, pGroupColIndex, &code);
H
hjxilinx 已提交
6442
  if ((pGroupbyExpr == NULL && pQueryMsg->numOfGroupCols != 0) || code != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6443
    goto _over;
6444
  }
6445

H
hjxilinx 已提交
6446
  bool isSTableQuery = false;
6447
  STableGroupInfo tableGroupInfo = {0};
6448 6449
  int64_t st = taosGetTimestampUs();

H
Haojun Liao 已提交
6450
  if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_TABLE_QUERY)) {
weixin_48148422's avatar
weixin_48148422 已提交
6451
    STableIdInfo *id = taosArrayGet(pTableIdList, 0);
H
Haojun Liao 已提交
6452

6453
    qDebug("qmsg:%p query normal table, uid:%"PRId64", tid:%d", pQueryMsg, id->uid, id->tid);
H
Haojun Liao 已提交
6454
    if ((code = tsdbGetOneTableGroup(tsdb, id->uid, pQueryMsg->window.skey, &tableGroupInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6455
      goto _over;
6456
    }
H
Haojun Liao 已提交
6457
  } else if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY|TSDB_QUERY_TYPE_STABLE_QUERY)) {
6458
    isSTableQuery = true;
H
Haojun Liao 已提交
6459 6460 6461

    // also note there's possibility that only one table in the super table
    if (!TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY)) {
weixin_48148422's avatar
weixin_48148422 已提交
6462 6463 6464 6465 6466 6467 6468
      STableIdInfo *id = taosArrayGet(pTableIdList, 0);

      // group by normal column, do not pass the group by condition to tsdb to group table into different group
      int32_t numOfGroupByCols = pQueryMsg->numOfGroupCols;
      if (pQueryMsg->numOfGroupCols == 1 && !TSDB_COL_IS_TAG(pGroupColIndex->flag)) {
        numOfGroupByCols = 0;
      }
6469 6470

      qDebug("qmsg:%p query stable, uid:%"PRId64", tid:%d", pQueryMsg, id->uid, id->tid);
H
Haojun Liao 已提交
6471 6472 6473
      code = tsdbQuerySTableByTagCond(tsdb, id->uid, pQueryMsg->window.skey, tagCond, pQueryMsg->tagCondLen,
          pQueryMsg->tagNameRelType, tbnameCond, &tableGroupInfo, pGroupColIndex, numOfGroupByCols);

6474
      if (code != TSDB_CODE_SUCCESS) {
6475
        qError("qmsg:%p failed to query stable, reason: %s", pQueryMsg, tstrerror(code));
6476 6477
        goto _over;
      }
weixin_48148422's avatar
weixin_48148422 已提交
6478
    } else {
6479 6480 6481 6482
      code = tsdbGetTableGroupFromIdList(tsdb, pTableIdList, &tableGroupInfo);
      if (code != TSDB_CODE_SUCCESS) {
        goto _over;
      }
H
Haojun Liao 已提交
6483

S
TD-1057  
Shengliang Guan 已提交
6484
      qDebug("qmsg:%p query on %" PRIzu " tables in one group from client", pQueryMsg, tableGroupInfo.numOfTables);
6485
    }
6486 6487

    int64_t el = taosGetTimestampUs() - st;
S
TD-1057  
Shengliang Guan 已提交
6488
    qDebug("qmsg:%p tag filter completed, numOfTables:%" PRIzu ", elapsed time:%"PRId64"us", pQueryMsg, tableGroupInfo.numOfTables, el);
H
hjxilinx 已提交
6489
  } else {
6490
    assert(0);
6491
  }
6492

6493
  (*pQInfo) = createQInfoImpl(pQueryMsg, pTableIdList, pGroupbyExpr, pExprs, &tableGroupInfo, pTagColumnInfo);
dengyihao's avatar
dengyihao 已提交
6494 6495 6496
  pExprs = NULL;
  pGroupbyExpr = NULL;
  pTagColumnInfo = NULL;
6497

6498
  if ((*pQInfo) == NULL) {
6499
    code = TSDB_CODE_QRY_OUT_OF_MEMORY;
H
hjxilinx 已提交
6500
    goto _over;
6501
  }
6502

6503
  code = initQInfo(pQueryMsg, tsdb, vgId, *pQInfo, isSTableQuery);
6504

H
hjxilinx 已提交
6505
_over:
dengyihao's avatar
dengyihao 已提交
6506 6507 6508
  free(tagCond);
  free(tbnameCond);
  free(pGroupColIndex);
dengyihao's avatar
dengyihao 已提交
6509 6510
  if (pGroupbyExpr != NULL) {
    taosArrayDestroy(pGroupbyExpr->columnInfo);
dengyihao's avatar
dengyihao 已提交
6511
    free(pGroupbyExpr);
6512
  }
dengyihao's avatar
dengyihao 已提交
6513 6514
  free(pTagColumnInfo);
  free(pExprs);
dengyihao's avatar
dengyihao 已提交
6515
  free(pExprMsg);
H
hjxilinx 已提交
6516
  taosArrayDestroy(pTableIdList);
6517

B
Bomin Zhang 已提交
6518 6519 6520 6521 6522
  for (int32_t i = 0; i < pQueryMsg->numOfCols; i++) {
    SColumnInfo* column = pQueryMsg->colList + i;
    freeColumnFilterInfo(column->filters, column->numOfFilters);
  }

H
Haojun Liao 已提交
6523
  //pQInfo already freed in initQInfo, but *pQInfo may not pointer to null;
6524 6525 6526 6527
  if (code != TSDB_CODE_SUCCESS) {
    *pQInfo = NULL;
  }

6528
  // if failed to add ref for all tables in this query, abort current query
6529
  return code;
H
hjxilinx 已提交
6530 6531
}

H
Haojun Liao 已提交
6532
void qDestroyQueryInfo(qinfo_t qHandle) {
H
Haojun Liao 已提交
6533 6534 6535 6536 6537
  SQInfo* pQInfo = (SQInfo*) qHandle;
  if (!isValidQInfo(pQInfo)) {
    return;
  }

H
Haojun Liao 已提交
6538 6539 6540
  qDebug("QInfo:%p query completed", pQInfo);
  queryCostStatis(pQInfo);   // print the query cost summary
  freeQInfo(pQInfo);
H
Haojun Liao 已提交
6541 6542
}

6543 6544 6545 6546 6547 6548 6549 6550 6551 6552
static bool doBuildResCheck(SQInfo* pQInfo) {
  bool buildRes = false;

  pthread_mutex_lock(&pQInfo->lock);

  pQInfo->dataReady = QUERY_RESULT_READY;
  buildRes = (pQInfo->rspContext != NULL);

  pthread_mutex_unlock(&pQInfo->lock);

H
Haojun Liao 已提交
6553
  // clear qhandle owner
6554 6555
  assert(pQInfo->owner == taosGetPthreadId());
  pQInfo->owner = 0;
H
Haojun Liao 已提交
6556

6557 6558 6559
  return buildRes;
}

6560
bool qTableQuery(qinfo_t qinfo) {
6561
  SQInfo *pQInfo = (SQInfo *)qinfo;
H
Haojun Liao 已提交
6562
  assert(pQInfo && pQInfo->signature == pQInfo);
6563
  int64_t threadId = taosGetPthreadId();
6564

6565 6566 6567 6568
  int64_t curOwner = 0;
  if ((curOwner = atomic_val_compare_exchange_64(&pQInfo->owner, 0, threadId)) != 0) {
    qError("QInfo:%p qhandle is now executed by thread:%p", pQInfo, (void*) curOwner);
    pQInfo->code = TSDB_CODE_QRY_IN_EXEC;
6569
    return false;
H
hjxilinx 已提交
6570
  }
6571

H
Haojun Liao 已提交
6572
  if (IS_QUERY_KILLED(pQInfo)) {
6573
    qDebug("QInfo:%p it is already killed, abort", pQInfo);
6574
    return doBuildResCheck(pQInfo);
H
hjxilinx 已提交
6575
  }
6576

6577 6578
  if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
    qDebug("QInfo:%p no table exists for query, abort", pQInfo);
6579 6580
    setQueryStatus(pQInfo->runtimeEnv.pQuery, QUERY_COMPLETED);
    return doBuildResCheck(pQInfo);
6581 6582 6583
  }

  // error occurs, record the error code and return to client
H
Haojun Liao 已提交
6584
  int32_t ret = setjmp(pQInfo->runtimeEnv.env);
6585 6586
  if (ret != TSDB_CODE_SUCCESS) {
    pQInfo->code = ret;
H
Haojun Liao 已提交
6587
    qDebug("QInfo:%p query abort due to error/cancel occurs, code:%s", pQInfo, tstrerror(pQInfo->code));
6588
    return doBuildResCheck(pQInfo);
6589 6590
  }

6591
  qDebug("QInfo:%p query task is launched", pQInfo);
6592

6593
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
6594
  if (onlyQueryTags(pQInfo->runtimeEnv.pQuery)) {
H
Haojun Liao 已提交
6595
    assert(pQInfo->runtimeEnv.pQueryHandle == NULL);
6596
    buildTagQueryResult(pQInfo);
H
hjxilinx 已提交
6597
  } else if (pQInfo->runtimeEnv.stableQuery) {
6598
    stableQueryImpl(pQInfo);
H
hjxilinx 已提交
6599
  } else {
6600
    tableQueryImpl(pQInfo);
H
hjxilinx 已提交
6601
  }
6602

6603
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
6604
  if (IS_QUERY_KILLED(pQInfo)) {
6605 6606
    qDebug("QInfo:%p query is killed", pQInfo);
  } else if (pQuery->rec.rows == 0) {
S
TD-1057  
Shengliang Guan 已提交
6607
    qDebug("QInfo:%p over, %" PRIzu " tables queried, %"PRId64" rows are returned", pQInfo, pQInfo->tableqinfoGroupInfo.numOfTables, pQuery->rec.total);
6608 6609 6610 6611 6612
  } else {
    qDebug("QInfo:%p query paused, %" PRId64 " rows returned, numOfTotal:%" PRId64 " rows",
           pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
  }

6613
  return doBuildResCheck(pQInfo);
H
hjxilinx 已提交
6614 6615
}

6616
int32_t qRetrieveQueryResultInfo(qinfo_t qinfo, bool* buildRes, void* pRspContext) {
6617 6618
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
6619
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
6620
    return TSDB_CODE_QRY_INVALID_QHANDLE;
H
hjxilinx 已提交
6621
  }
6622

6623
  *buildRes = false;
H
hjxilinx 已提交
6624
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
6625
  if (IS_QUERY_KILLED(pQInfo)) {
6626
    qDebug("QInfo:%p query is killed, code:%d", pQInfo, pQInfo->code);
H
hjxilinx 已提交
6627
    return pQInfo->code;
H
hjxilinx 已提交
6628
  }
6629

6630
  int32_t code = TSDB_CODE_SUCCESS;
6631 6632 6633 6634 6635 6636
  pthread_mutex_lock(&pQInfo->lock);
  if (pQInfo->dataReady == QUERY_RESULT_READY) {
    *buildRes = true;
    qDebug("QInfo:%p retrieve result info, rowsize:%d, rows:%"PRId64", code:%d", pQInfo, pQuery->rowSize, pQuery->rec.rows,
           pQInfo->code);
  } else {
H
Haojun Liao 已提交
6637
    *buildRes = false;
6638
    qDebug("QInfo:%p retrieve req set query return result after paused", pQInfo);
6639 6640
    pQInfo->rspContext = pRspContext;
  }
6641

6642
  code = pQInfo->code;
6643
  pthread_mutex_unlock(&pQInfo->lock);
6644
  return code;
H
hjxilinx 已提交
6645
}
6646

6647
int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *contLen, bool* continueExec) {
6648 6649
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
6650
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
6651
    return TSDB_CODE_QRY_INVALID_QHANDLE;
6652
  }
6653

6654
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
6655 6656
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  size_t  size = getResultSize(pQInfo, &pQuery->rec.rows);
6657

weixin_48148422's avatar
weixin_48148422 已提交
6658 6659
  size += sizeof(int32_t);
  size += sizeof(STableIdInfo) * taosArrayGetSize(pQInfo->arrTableIdInfo);
6660

S
TD-1057  
Shengliang Guan 已提交
6661
  *contLen = (int32_t)(size + sizeof(SRetrieveTableRsp));
6662

B
Bomin Zhang 已提交
6663 6664
  // todo proper handle failed to allocate memory,
  // current solution only avoid crash, but cannot return error code to client
6665
  *pRsp = (SRetrieveTableRsp *)rpcMallocCont(*contLen);
B
Bomin Zhang 已提交
6666 6667 6668
  if (*pRsp == NULL) {
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
  }
6669

S
TD-1057  
Shengliang Guan 已提交
6670
  (*pRsp)->numOfRows = htonl((int32_t)pQuery->rec.rows);
6671

H
Haojun Liao 已提交
6672
  if (pQInfo->code == TSDB_CODE_SUCCESS) {
6673
    (*pRsp)->offset   = htobe64(pQuery->limit.offset);
6674
    (*pRsp)->useconds = htobe64(pRuntimeEnv->summary.elapsedTime);
6675
  } else {
6676 6677
    (*pRsp)->offset   = 0;
    (*pRsp)->useconds = htobe64(pRuntimeEnv->summary.elapsedTime);
6678
  }
6679

6680
  (*pRsp)->precision = htons(pQuery->precision);
H
Haojun Liao 已提交
6681 6682
  if (pQuery->rec.rows > 0 && pQInfo->code == TSDB_CODE_SUCCESS) {
    doDumpQueryResult(pQInfo, (*pRsp)->data);
6683
  } else {
H
hjxilinx 已提交
6684
    setQueryStatus(pQuery, QUERY_OVER);
6685
  }
6686

6687
  pQInfo->rspContext = NULL;
6688
  pQInfo->dataReady  = QUERY_RESULT_NOT_READY;
6689

H
Haojun Liao 已提交
6690
  if (IS_QUERY_KILLED(pQInfo) || Q_STATUS_EQUAL(pQuery->status, QUERY_OVER)) {
6691
    *continueExec = false;
6692
    (*pRsp)->completed = 1;  // notify no more result to client
6693
  } else {
6694
    *continueExec = true;
6695
    qDebug("QInfo:%p has more results waits for client retrieve", pQInfo);
6696 6697
  }

H
Haojun Liao 已提交
6698
  return pQInfo->code;
6699
}
H
hjxilinx 已提交
6700

6701 6702 6703 6704 6705 6706 6707 6708 6709 6710 6711
int32_t qQueryCompleted(qinfo_t qinfo) {
  SQInfo *pQInfo = (SQInfo *)qinfo;

  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
    return TSDB_CODE_QRY_INVALID_QHANDLE;
  }

  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
  return IS_QUERY_KILLED(pQInfo) || Q_STATUS_EQUAL(pQuery->status, QUERY_OVER);
}

H
Haojun Liao 已提交
6712
int32_t qKillQuery(qinfo_t qinfo) {
H
Haojun Liao 已提交
6713 6714 6715 6716 6717 6718 6719
  SQInfo *pQInfo = (SQInfo *)qinfo;

  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
    return TSDB_CODE_QRY_INVALID_QHANDLE;
  }

  setQueryKilled(pQInfo);
6720 6721 6722

  // Wait for the query executing thread being stopped/
  // Once the query is stopped, the owner of qHandle will be cleared immediately.
6723
  while (pQInfo->owner != 0) {
6724 6725 6726
    taosMsleep(100);
  }

H
Haojun Liao 已提交
6727 6728 6729
  return TSDB_CODE_SUCCESS;
}

6730 6731 6732 6733 6734 6735 6736 6737 6738 6739 6740 6741 6742 6743 6744 6745
static void doSetTagValueToResultBuf(char* output, const char* val, int16_t type, int16_t bytes) {
  if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
    if (val == NULL) {
      setVardataNull(output, type);
    } else {
      memcpy(output, val, varDataTLen(val));
    }
  } else {
    if (val == NULL) {
      setNull(output, type, bytes);
    } else {  // todo here stop will cause client crash
      memcpy(output, val, bytes);
    }
  }
}

H
hjxilinx 已提交
6746 6747 6748
static void buildTagQueryResult(SQInfo* pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
6749

H
Haojun Liao 已提交
6750
  size_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
H
Haojun Liao 已提交
6751
  assert(numOfGroup == 0 || numOfGroup == 1);
6752

H
Haojun Liao 已提交
6753
  if (numOfGroup == 0) {
6754 6755
    return;
  }
6756

H
Haojun Liao 已提交
6757
  SArray* pa = GET_TABLEGROUP(pQInfo, 0);
6758

H
Haojun Liao 已提交
6759
  size_t num = taosArrayGetSize(pa);
6760
  assert(num == pQInfo->tableqinfoGroupInfo.numOfTables);
6761

H
Haojun Liao 已提交
6762
  int32_t count = 0;
6763 6764 6765
  int32_t functionId = pQuery->pSelectExpr[0].base.functionId;
  if (functionId == TSDB_FUNC_TID_TAG) { // return the tags & table Id
    assert(pQuery->numOfOutput == 1);
6766

6767 6768
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[0];
    int32_t rsize = pExprInfo->bytes;
H
Haojun Liao 已提交
6769
    count = 0;
6770

H
Haojun Liao 已提交
6771 6772 6773 6774 6775 6776 6777 6778 6779 6780 6781
    int16_t bytes = pExprInfo->bytes;
    int16_t type = pExprInfo->type;

    for(int32_t i = 0; i < pQuery->numOfTags; ++i) {
      if (pQuery->tagColList[i].colId == pExprInfo->base.colInfo.colId) {
        bytes = pQuery->tagColList[i].bytes;
        type = pQuery->tagColList[i].type;
        break;
      }
    }

H
Haojun Liao 已提交
6782 6783
    while(pQInfo->tableIndex < num && count < pQuery->rec.capacity) {
      int32_t i = pQInfo->tableIndex++;
6784
      STableQueryInfo *item = taosArrayGetP(pa, i);
6785

6786
      char *output = pQuery->sdata[0]->data + count * rsize;
6787
      varDataSetLen(output, rsize - VARSTR_HEADER_SIZE);
6788

6789
      output = varDataVal(output);
H
Haojun Liao 已提交
6790
      STableId* id = TSDB_TABLEID(item->pTable);
6791

6792 6793 6794
      *(int16_t *)output = 0;
      output += sizeof(int16_t);

H
Haojun Liao 已提交
6795 6796
      *(int64_t *)output = id->uid;  // memory align problem, todo serialize
      output += sizeof(id->uid);
6797

H
Haojun Liao 已提交
6798 6799
      *(int32_t *)output = id->tid;
      output += sizeof(id->tid);
6800

6801
      *(int32_t *)output = pQInfo->vgId;
6802
      output += sizeof(pQInfo->vgId);
6803

6804
      if (pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
6805
        char* data = tsdbGetTableName(item->pTable);
6806
        memcpy(output, data, varDataTLen(data));
H
[td-90]  
Haojun Liao 已提交
6807
      } else {
6808 6809
        char* data = tsdbGetTableTagVal(item->pTable, pExprInfo->base.colInfo.colId, type, bytes);
        doSetTagValueToResultBuf(output, data, type, bytes);
H
[td-90]  
Haojun Liao 已提交
6810
      }
6811

H
Haojun Liao 已提交
6812
      count += 1;
6813
    }
6814

6815
    qDebug("QInfo:%p create (tableId, tag) info completed, rows:%d", pQInfo, count);
6816

H
Haojun Liao 已提交
6817 6818 6819 6820
  } else if (functionId == TSDB_FUNC_COUNT) {// handle the "count(tbname)" query
    *(int64_t*) pQuery->sdata[0]->data = num;

    count = 1;
H
Haojun Liao 已提交
6821
    SET_STABLE_QUERY_OVER(pQInfo);
6822
    qDebug("QInfo:%p create count(tbname) query, res:%d rows:1", pQInfo, count);
6823
  } else {  // return only the tags|table name etc.
H
Haojun Liao 已提交
6824
    count = 0;
H
Haojun Liao 已提交
6825
    SSchema tbnameSchema = tGetTableNameColumnSchema();
6826

S
TD-1057  
Shengliang Guan 已提交
6827
    int32_t maxNumOfTables = (int32_t)pQuery->rec.capacity;
6828
    if (pQuery->limit.limit >= 0 && pQuery->limit.limit < pQuery->rec.capacity) {
S
TD-1057  
Shengliang Guan 已提交
6829
      maxNumOfTables = (int32_t)pQuery->limit.limit;
6830 6831
    }

6832
    while(pQInfo->tableIndex < num && count < maxNumOfTables) {
H
Haojun Liao 已提交
6833
      int32_t i = pQInfo->tableIndex++;
6834

6835 6836 6837 6838 6839 6840
      // discard current result due to offset
      if (pQuery->limit.offset > 0) {
        pQuery->limit.offset -= 1;
        continue;
      }

6841
      SExprInfo* pExprInfo = pQuery->pSelectExpr;
6842
      STableQueryInfo* item = taosArrayGetP(pa, i);
6843

6844 6845
      char *data = NULL, *dst = NULL;
      int16_t type = 0, bytes = 0;
6846
      for(int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
6847 6848 6849 6850
        // not assign value in case of user defined constant output column
        if (pExprInfo[j].base.colInfo.flag == TSDB_COL_UDC) {
          continue;
        }
6851

6852
        if (pExprInfo[j].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
6853 6854 6855 6856 6857 6858 6859 6860
          bytes = tbnameSchema.bytes;
          type = tbnameSchema.type;

          data = tsdbGetTableName(item->pTable);
          dst = pQuery->sdata[j]->data + count * tbnameSchema.bytes;
        } else {
          type = pExprInfo[j].type;
          bytes = pExprInfo[j].bytes;
6861

6862 6863
          data = tsdbGetTableTagVal(item->pTable, pExprInfo[j].base.colInfo.colId, type, bytes);
          dst = pQuery->sdata[j]->data + count * pExprInfo[j].bytes;
6864

6865
        }
6866 6867

        doSetTagValueToResultBuf(dst, data, type, bytes);
H
hjxilinx 已提交
6868
      }
H
Haojun Liao 已提交
6869
      count += 1;
H
hjxilinx 已提交
6870
    }
6871

6872
    qDebug("QInfo:%p create tag values results completed, rows:%d", pQInfo, count);
H
hjxilinx 已提交
6873
  }
6874

H
Haojun Liao 已提交
6875
  pQuery->rec.rows = count;
H
hjxilinx 已提交
6876
  setQueryStatus(pQuery, QUERY_COMPLETED);
H
hjxilinx 已提交
6877 6878
}

6879 6880 6881 6882 6883 6884 6885
void* qGetResultRetrieveMsg(qinfo_t qinfo) {
  SQInfo* pQInfo = (SQInfo*) qinfo;
  assert(pQInfo != NULL);

  return pQInfo->rspContext;
}

6886 6887 6888 6889 6890 6891 6892
void freeqinfoFn(void *qhandle) {
  void** handle = qhandle;
  if (handle == NULL || *handle == NULL) {
    return;
  }

  qKillQuery(*handle);
H
Haojun Liao 已提交
6893
  qDestroyQueryInfo(*handle);
6894 6895 6896
}

void* qOpenQueryMgmt(int32_t vgId) {
H
Haojun Liao 已提交
6897
  const int32_t REFRESH_HANDLE_INTERVAL = 30; // every 30 seconds, refresh handle pool
6898 6899 6900 6901

  char cacheName[128] = {0};
  sprintf(cacheName, "qhandle_%d", vgId);

6902
  SQueryMgmt* pQueryMgmt = calloc(1, sizeof(SQueryMgmt));
H
Haojun Liao 已提交
6903 6904 6905 6906
  if (pQueryMgmt == NULL) {
    terrno = TSDB_CODE_QRY_OUT_OF_MEMORY;
    return NULL;
  }
6907

6908 6909 6910 6911 6912
  pQueryMgmt->qinfoPool = taosCacheInit(TSDB_DATA_TYPE_BIGINT, REFRESH_HANDLE_INTERVAL, true, freeqinfoFn, cacheName);
  pQueryMgmt->closed    = false;
  pQueryMgmt->vgId      = vgId;

  pthread_mutex_init(&pQueryMgmt->lock, NULL);
6913 6914

  qDebug("vgId:%d, open querymgmt success", vgId);
6915
  return pQueryMgmt;
6916 6917
}

H
Haojun Liao 已提交
6918
static void queryMgmtKillQueryFn(void* handle) {
H
Hui Li 已提交
6919 6920
  void** fp = (void**)handle;
  qKillQuery(*fp);
H
Haojun Liao 已提交
6921 6922 6923
}

void qQueryMgmtNotifyClosed(void* pQMgmt) {
6924 6925 6926 6927 6928 6929 6930
  if (pQMgmt == NULL) {
    return;
  }

  SQueryMgmt* pQueryMgmt = pQMgmt;
  qDebug("vgId:%d, set querymgmt closed, wait for all queries cancelled", pQueryMgmt->vgId);

H
Haojun Liao 已提交
6931
//  pthread_mutex_lock(&pQueryMgmt->lock);
6932
  pQueryMgmt->closed = true;
H
Haojun Liao 已提交
6933
//  pthread_mutex_unlock(&pQueryMgmt->lock);
6934

H
Haojun Liao 已提交
6935
  taosCacheRefresh(pQueryMgmt->qinfoPool, queryMgmtKillQueryFn);
6936 6937 6938 6939 6940 6941 6942 6943 6944 6945 6946 6947 6948 6949 6950 6951 6952
}

void qCleanupQueryMgmt(void* pQMgmt) {
  if (pQMgmt == NULL) {
    return;
  }

  SQueryMgmt* pQueryMgmt = pQMgmt;
  int32_t vgId = pQueryMgmt->vgId;

  assert(pQueryMgmt->closed);

  SCacheObj* pqinfoPool = pQueryMgmt->qinfoPool;
  pQueryMgmt->qinfoPool = NULL;

  taosCacheCleanup(pqinfoPool);
  pthread_mutex_destroy(&pQueryMgmt->lock);
S
Shengliang Guan 已提交
6953
  taosTFree(pQueryMgmt);
6954

6955
  qDebug("vgId:%d queryMgmt cleanup completed", vgId);
6956 6957
}

6958
void** qRegisterQInfo(void* pMgmt, uint64_t qInfo) {
6959 6960 6961 6962
  if (pMgmt == NULL) {
    return NULL;
  }

6963
  const int32_t DEFAULT_QHANDLE_LIFE_SPAN = tsShellActivityTimer * 2 * 1000;
6964

6965 6966
  SQueryMgmt *pQueryMgmt = pMgmt;
  if (pQueryMgmt->qinfoPool == NULL) {
6967
    qError("QInfo:%p failed to add qhandle into qMgmt, since qMgmt is closed", (void *)qInfo);
6968 6969 6970
    return NULL;
  }

H
Haojun Liao 已提交
6971
//  pthread_mutex_lock(&pQueryMgmt->lock);
6972
  if (pQueryMgmt->closed) {
H
Haojun Liao 已提交
6973
//    pthread_mutex_unlock(&pQueryMgmt->lock);
6974
    qError("QInfo:%p failed to add qhandle into cache, since qMgmt is colsing", (void *)qInfo);
6975 6976
    return NULL;
  } else {
6977 6978 6979
    uint64_t handleVal = (uint64_t) qInfo;

    void** handle = taosCachePut(pQueryMgmt->qinfoPool, &handleVal, sizeof(int64_t), &qInfo, POINTER_BYTES, DEFAULT_QHANDLE_LIFE_SPAN);
H
Haojun Liao 已提交
6980
//    pthread_mutex_unlock(&pQueryMgmt->lock);
6981 6982 6983 6984 6985

    return handle;
  }
}

6986
void** qAcquireQInfo(void* pMgmt, uint64_t key) {
6987 6988 6989 6990 6991 6992
  SQueryMgmt *pQueryMgmt = pMgmt;

  if (pQueryMgmt->qinfoPool == NULL || pQueryMgmt->closed) {
    return NULL;
  }

6993
  void** handle = taosCacheAcquireByKey(pQueryMgmt->qinfoPool, &key, sizeof(uint64_t));
6994 6995 6996 6997 6998 6999 7000
  if (handle == NULL || *handle == NULL) {
    return NULL;
  } else {
    return handle;
  }
}

H
Haojun Liao 已提交
7001
void** qReleaseQInfo(void* pMgmt, void* pQInfo, bool freeHandle) {
7002 7003 7004 7005 7006
  SQueryMgmt *pQueryMgmt = pMgmt;
  if (pQueryMgmt->qinfoPool == NULL) {
    return NULL;
  }

H
Haojun Liao 已提交
7007
  taosCacheRelease(pQueryMgmt->qinfoPool, pQInfo, freeHandle);
7008 7009 7010
  return 0;
}

7011