qExecutor.c 245.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
#include "os.h"
H
Haojun Liao 已提交
16 17
#include "qFill.h"
#include "taosmsg.h"
18 19
#include "tcache.h"
#include "tglobal.h"
20

H
Haojun Liao 已提交
21
#include "exception.h"
22
#include "hash.h"
H
Haojun Liao 已提交
23 24 25 26
#include "qAst.h"
#include "qExecutor.h"
#include "qResultbuf.h"
#include "qUtil.h"
H
hjxilinx 已提交
27
#include "query.h"
S
slguan 已提交
28
#include "queryLog.h"
29
#include "tlosertree.h"
30

H
Haojun Liao 已提交
31
#define MAX_ROWS_PER_RESBUF_PAGE  ((1u<<12) - 1)
32 33 34 35 36

/**
 * check if the primary column is load by default, otherwise, the program will
 * forced to load primary column explicitly.
 */
37
#define Q_STATUS_EQUAL(p, s)  (((p) & (s)) != 0)
38 39
#define QUERY_IS_ASC_QUERY(q) (GET_FORWARD_DIRECTION_FACTOR((q)->order.order) == QUERY_ASC_FORWARD_STEP)

40
#define IS_MASTER_SCAN(runtime)        ((runtime)->scanFlag == MASTER_SCAN)
H
hjxilinx 已提交
41
#define IS_REVERSE_SCAN(runtime)       ((runtime)->scanFlag == REVERSE_SCAN)
42
#define SET_MASTER_SCAN_FLAG(runtime)  ((runtime)->scanFlag = MASTER_SCAN)
H
hjxilinx 已提交
43
#define SET_REVERSE_SCAN_FLAG(runtime) ((runtime)->scanFlag = REVERSE_SCAN)
44

H
Haojun Liao 已提交
45
#define GET_QINFO_ADDR(x) ((SQInfo *)((char *)(x)-offsetof(SQInfo, runtimeEnv)))
46

47
#define GET_COL_DATA_POS(query, index, step) ((query)->pos + (index) * (step))
48
#define SWITCH_ORDER(n) (((n) = ((n) == TSDB_ORDER_ASC) ? TSDB_ORDER_DESC : TSDB_ORDER_ASC))
49

H
Haojun Liao 已提交
50 51
#define SDATA_BLOCK_INITIALIZER (SDataBlockInfo) {{0}, 0}

S
TD-1057  
Shengliang Guan 已提交
52 53 54 55 56
#define TIME_WINDOW_COPY(_dst, _src)  do {\
   _dst.skey = _src.skey;\
   _dst.ekey = _src.ekey;\
} while (0);

57
enum {
H
hjxilinx 已提交
58
  // when query starts to execute, this status will set
59 60
  QUERY_NOT_COMPLETED = 0x1u,

H
hjxilinx 已提交
61 62
  /* result output buffer is full, current query is paused.
   * this status is only exist in group-by clause and diff/add/division/multiply/ query.
63
   */
64 65
  QUERY_RESBUF_FULL = 0x2u,

H
hjxilinx 已提交
66 67 68
  /* query is over
   * 1. this status is used in one row result query process, e.g., count/sum/first/last/ avg...etc.
   * 2. when all data within queried time window, it is also denoted as query_completed
69
   */
70
  QUERY_COMPLETED = 0x4u,
71

H
hjxilinx 已提交
72 73
  /* when the result is not completed return to client, this status will be
   * usually used in case of interval query with interpolation option
74
   */
75
  QUERY_OVER = 0x8u,
76
};
77 78

enum {
79 80
  TS_JOIN_TS_EQUAL       = 0,
  TS_JOIN_TS_NOT_EQUALS  = 1,
81 82 83
  TS_JOIN_TAG_NOT_EQUALS = 2,
};

84
typedef struct {
85 86 87 88 89 90
  int32_t     status;       // query status
  TSKEY       lastKey;      // the lastKey value before query executed
  STimeWindow w;            // whole query time window
  STimeWindow curWindow;    // current query window
  int32_t     windowIndex;  // index of active time window result for interval query
  STSCursor   cur;
91 92
} SQueryStatusInfo;

H
Haojun Liao 已提交
93
#if 0
H
Haojun Liao 已提交
94
static UNUSED_FUNC void *u_malloc (size_t __size) {
H
Haojun Liao 已提交
95
  uint32_t v = rand();
H
Haojun Liao 已提交
96 97

  if (v % 1000 <= 0) {
H
Haojun Liao 已提交
98 99
    return NULL;
  } else {
H
Haojun Liao 已提交
100
    return malloc(__size);
H
Haojun Liao 已提交
101
  }
H
Haojun Liao 已提交
102 103
}

H
Haojun Liao 已提交
104 105
static UNUSED_FUNC void* u_calloc(size_t num, size_t __size) {
  uint32_t v = rand();
H
Haojun Liao 已提交
106
  if (v % 1000 <= 0) {
H
Haojun Liao 已提交
107 108 109 110 111 112
    return NULL;
  } else {
    return calloc(num, __size);
  }
}

H
Haojun Liao 已提交
113 114 115 116 117 118 119 120 121
static UNUSED_FUNC void* u_realloc(void* p, size_t __size) {
  uint32_t v = rand();
  if (v % 5 <= 1) {
    return NULL;
  } else {
    return realloc(p, __size);
  }
}

H
Haojun Liao 已提交
122
#define calloc  u_calloc
H
Haojun Liao 已提交
123
#define malloc  u_malloc
H
Haojun Liao 已提交
124
#define realloc u_realloc
H
Haojun Liao 已提交
125
#endif
H
Haojun Liao 已提交
126

127
#define CLEAR_QUERY_STATUS(q, st)   ((q)->status &= (~(st)))
H
Haojun Liao 已提交
128 129 130
#define GET_NUM_OF_TABLEGROUP(q)    taosArrayGetSize((q)->tableqinfoGroupInfo.pGroupList)
#define GET_TABLEGROUP(q, _index)   ((SArray*) taosArrayGetP((q)->tableqinfoGroupInfo.pGroupList, (_index)))

131
static void setQueryStatus(SQuery *pQuery, int8_t status);
H
Haojun Liao 已提交
132
static void finalizeQueryResult(SQueryRuntimeEnv *pRuntimeEnv);
133

134
#define QUERY_IS_INTERVAL_QUERY(_q) ((_q)->interval.interval > 0)
135

136 137
static void getNextTimeWindow(SQuery* pQuery, STimeWindow* tw) {
  int32_t factor = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
138 139 140
  if (pQuery->interval.intervalUnit != 'n' && pQuery->interval.intervalUnit != 'y') {
    tw->skey += pQuery->interval.sliding * factor;
    tw->ekey = tw->skey + pQuery->interval.interval - 1;
141 142 143
    return;
  }

144
  int64_t key = tw->skey / 1000, interval = pQuery->interval.interval;
145 146 147
  if (pQuery->precision == TSDB_TIME_PRECISION_MICRO) {
    key /= 1000;
  }
148
  if (pQuery->interval.intervalUnit == 'y') {
149 150
    interval *= 12;
  }
151 152 153 154 155

  struct tm tm;
  time_t t = (time_t)key;
  localtime_r(&t, &tm);

S
Shengliang Guan 已提交
156
  int mon = (int)(tm.tm_year * 12 + tm.tm_mon + interval * factor);
157 158 159 160
  tm.tm_year = mon / 12;
  tm.tm_mon = mon % 12;
  tw->skey = mktime(&tm) * 1000L;

S
Shengliang Guan 已提交
161
  mon = (int)(mon + interval);
162 163 164 165 166 167 168 169 170 171 172 173
  tm.tm_year = mon / 12;
  tm.tm_mon = mon % 12;
  tw->ekey = mktime(&tm) * 1000L;

  if (pQuery->precision == TSDB_TIME_PRECISION_MICRO) {
    tw->skey *= 1000L;
    tw->ekey *= 1000L;
  }
  tw->ekey -= 1;
}

#define GET_NEXT_TIMEWINDOW(_q, tw) getNextTimeWindow((_q), (tw))
H
Haojun Liao 已提交
174

175 176
#define SET_STABLE_QUERY_OVER(_q) ((_q)->tableIndex = (int32_t)((_q)->tableqinfoGroupInfo.numOfTables))
#define IS_STASBLE_QUERY_OVER(_q) ((_q)->tableIndex >= (int32_t)((_q)->tableqinfoGroupInfo.numOfTables))
H
Haojun Liao 已提交
177

H
hjxilinx 已提交
178
// todo move to utility
179
static int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *group);
180

181
static void setResultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SResultRow *pResult);
H
Haojun Liao 已提交
182 183
static void setWindowResOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SResultRow *pResult);
static void resetMergeResultBuf(SQueryRuntimeEnv* pRuntimeEnv, SQLFunctionCtx *pCtx, SResultRow *pRow);
184
static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId);
185

186
static void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
H
Haojun Liao 已提交
187
                          SDataStatis *pStatis, void *param, int32_t colIndex, int32_t vgId);
188

189
static void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
H
Haojun Liao 已提交
190
static void destroyTableQueryInfoImpl(STableQueryInfo *pTableQueryInfo);
191 192
static void resetCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
static bool hasMainOutput(SQuery *pQuery);
H
hjxilinx 已提交
193
static void buildTagQueryResult(SQInfo *pQInfo);
194

195
static int32_t setAdditionalInfo(SQInfo *pQInfo, void *pTable, STableQueryInfo *pTableQueryInfo);
H
Haojun Liao 已提交
196
static int32_t flushFromResultBuf(SQueryRuntimeEnv* pRuntimeEnv, SGroupResInfo* pGroupResInfo);
H
Haojun Liao 已提交
197 198
static int32_t checkForQueryBuf(size_t numOfTables);
static void releaseQueryBuf(size_t numOfTables);
199

200
bool doFilterData(SQuery *pQuery, int32_t elemPos) {
201 202
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
203

S
TD-1057  
Shengliang Guan 已提交
204
    char *pElem = (char*)pFilterInfo->pData + pFilterInfo->info.bytes * elemPos;
205

206 207
    bool qualified = false;
    for (int32_t j = 0; j < pFilterInfo->numOfFilters; ++j) {
208
      SColumnFilterElem *pFilterElem = &pFilterInfo->pFilters[j];
209

H
Haojun Liao 已提交
210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226
      bool isnull = isNull(pElem, pFilterInfo->info.type);
      if (isnull) {
        if (pFilterElem->fp == isNull_filter) {
          qualified = true;
          break;
        } else {
          continue;
        }
      } else {
        if (pFilterElem->fp == notNull_filter) {
          qualified = true;
          break;
        } else if (pFilterElem->fp == isNull_filter) {
          continue;
        }
      }

227 228 229 230 231
      if (pFilterElem->fp(pFilterElem, pElem, pElem)) {
        qualified = true;
        break;
      }
    }
232

233 234 235 236
    if (!qualified) {
      return false;
    }
  }
237

238 239 240 241 242 243
  return true;
}

int64_t getNumOfResult(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  bool    hasMainFunction = hasMainOutput(pQuery);
244

245
  int64_t maxOutput = 0;
246
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
247
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
248

249 250 251 252 253 254 255 256
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (hasMainFunction &&
        (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ)) {
      continue;
    }
257

H
Haojun Liao 已提交
258
    SResultRowCellInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
259 260 261 262
    if (pResInfo != NULL && maxOutput < pResInfo->numOfRes) {
      maxOutput = pResInfo->numOfRes;
    }
  }
263

264
  assert(maxOutput >= 0);
265 266 267
  return maxOutput;
}

268 269 270 271 272
/*
 * the value of number of result needs to be update due to offset value upated.
 */
void updateNumOfResult(SQueryRuntimeEnv *pRuntimeEnv, int32_t numOfRes) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
273

274
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
275
    SResultRowCellInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
276

H
Haojun Liao 已提交
277 278 279 280 281
    int16_t functionId = pRuntimeEnv->pCtx[j].functionId;
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ ||
        functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
282

H
Haojun Liao 已提交
283
    assert(pResInfo->numOfRes > numOfRes);
284 285 286 287
    pResInfo->numOfRes = numOfRes;
  }
}

H
Haojun Liao 已提交
288
static UNUSED_FUNC int32_t getGroupResultId(int32_t groupIndex) {
H
Haojun Liao 已提交
289
  int32_t base = 20000000;
290 291 292 293 294 295 296
  return base + (groupIndex * 10000);
}

bool isGroupbyNormalCol(SSqlGroupbyExpr *pGroupbyExpr) {
  if (pGroupbyExpr == NULL || pGroupbyExpr->numOfGroupCols == 0) {
    return false;
  }
297

298
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
299
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
Y
TD-1230  
yihaoDeng 已提交
300
    if (TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
301
      //make sure the normal column locates at the second position if tbname exists in group by clause
302
      if (pGroupbyExpr->numOfGroupCols > 1) {
303
        assert(pColIndex->colIndex > 0);
304
      }
305

306 307 308
      return true;
    }
  }
309

310 311 312 313 314
  return false;
}

int16_t getGroupbyColumnType(SQuery *pQuery, SSqlGroupbyExpr *pGroupbyExpr) {
  assert(pGroupbyExpr != NULL);
315

316 317
  int32_t colId = -2;
  int16_t type = TSDB_DATA_TYPE_NULL;
318

319
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
320
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
Y
TD-1230  
yihaoDeng 已提交
321
    if (TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
322 323 324 325
      colId = pColIndex->colId;
      break;
    }
  }
326

327
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
328 329
    if (colId == pQuery->colList[i].colId) {
      type = pQuery->colList[i].type;
330 331 332
      break;
    }
  }
333

334 335 336 337 338 339
  return type;
}

bool isSelectivityWithTagsQuery(SQuery *pQuery) {
  bool    hasTags = false;
  int32_t numOfSelectivity = 0;
340

341
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
342
    int32_t functId = pQuery->pSelectExpr[i].base.functionId;
343 344 345 346
    if (functId == TSDB_FUNC_TAG_DUMMY || functId == TSDB_FUNC_TS_DUMMY) {
      hasTags = true;
      continue;
    }
347

348 349 350 351
    if ((aAggs[functId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
      numOfSelectivity++;
    }
  }
352

353 354 355
  if (numOfSelectivity > 0 && hasTags) {
    return true;
  }
356

357 358 359
  return false;
}

360 361 362 363 364 365 366 367 368 369 370
bool isProjQuery(SQuery *pQuery) {
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    int32_t functId = pQuery->pSelectExpr[i].base.functionId;
    if (functId != TSDB_FUNC_PRJ && functId != TSDB_FUNC_TAGPRJ) {
      return false;
    }
  }

  return true;
}

371
bool isTSCompQuery(SQuery *pQuery) { return pQuery->pSelectExpr[0].base.functionId == TSDB_FUNC_TS_COMP; }
372

373 374 375
static bool limitResults(SQueryRuntimeEnv* pRuntimeEnv) {
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery* pQuery = pRuntimeEnv->pQuery;
376

377 378
  if ((pQuery->limit.limit > 0) && (pQuery->rec.total + pQuery->rec.rows > pQuery->limit.limit)) {
    pQuery->rec.rows = pQuery->limit.limit - pQuery->rec.total;
379

380
    qDebug("QInfo:%p discard remain data due to result limitation, limit:%"PRId64", current return:%" PRId64 ", total:%"PRId64,
381 382
        pQInfo, pQuery->limit.limit, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
    assert(pQuery->rec.rows >= 0);
383 384 385
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return true;
  }
386

387 388 389 390
  return false;
}

static bool isTopBottomQuery(SQuery *pQuery) {
391
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
392
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
393 394 395
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
396

397 398 399 400
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      return true;
    }
  }
401

402 403 404
  return false;
}

H
Haojun Liao 已提交
405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422
static bool hasTagValOutput(SQuery* pQuery) {
  SExprInfo *pExprInfo = &pQuery->pSelectExpr[0];
  if (pQuery->numOfOutput == 1 && pExprInfo->base.functionId == TSDB_FUNC_TS_COMP) {
    return true;
  } else {  // set tag value, by which the results are aggregated.
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
      SExprInfo *pLocalExprInfo = &pQuery->pSelectExpr[idx];

      // ts_comp column required the tag value for join filter
      if (TSDB_COL_IS_TAG(pLocalExprInfo->base.colInfo.flag)) {
        return true;
      }
    }
  }

  return false;
}

423 424 425 426 427 428 429 430
/**
 * @param pQuery
 * @param col
 * @param pDataBlockInfo
 * @param pStatis
 * @param pColStatis
 * @return
 */
H
Haojun Liao 已提交
431
static bool hasNullValue(SColIndex* pColIndex, SDataStatis *pStatis, SDataStatis **pColStatis) {
H
Haojun Liao 已提交
432
  if (pStatis != NULL && TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
H
Haojun Liao 已提交
433 434
    *pColStatis = &pStatis[pColIndex->colIndex];
    assert((*pColStatis)->colId == pColIndex->colId);
H
hjxilinx 已提交
435 436
  } else {
    *pColStatis = NULL;
437
  }
438

H
Haojun Liao 已提交
439
  if (TSDB_COL_IS_TAG(pColIndex->flag) || TSDB_COL_IS_UD_COL(pColIndex->flag) || pColIndex->colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
H
Haojun Liao 已提交
440 441 442
    return false;
  }

443 444 445
  if ((*pColStatis) != NULL && (*pColStatis)->numOfNull == 0) {
    return false;
  }
446

447 448 449
  return true;
}

H
Haojun Liao 已提交
450
static SResultRow *doPrepareResultRowFromKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, char *pData,
H
Haojun Liao 已提交
451 452
                                             int16_t bytes, bool masterscan, uint64_t uid) {
  SET_RES_WINDOW_KEY(pRuntimeEnv->keyBuf, pData, bytes, uid);
453 454
  int32_t *p1 =
      (int32_t *)taosHashGet(pRuntimeEnv->pResultRowHashTable, pRuntimeEnv->keyBuf, GET_RES_WINDOW_KEY_LEN(bytes));
455 456
  if (p1 != NULL) {
    pWindowResInfo->curIndex = *p1;
457
  } else {
H
Haojun Liao 已提交
458 459 460
    if (!masterscan) {  // not master scan, do not add new timewindow
      return NULL;
    }
461

H
Haojun Liao 已提交
462 463
    // more than the capacity, reallocate the resources
    if (pWindowResInfo->size >= pWindowResInfo->capacity) {
H
Haojun Liao 已提交
464
      int64_t newCapacity = 0;
465
      if (pWindowResInfo->capacity > 10000) {
H
Haojun Liao 已提交
466
        newCapacity = (int64_t)(pWindowResInfo->capacity * 1.25);
467
      } else {
H
Haojun Liao 已提交
468
        newCapacity = (int64_t)(pWindowResInfo->capacity * 1.5);
469 470
      }

471
      char *t = realloc(pWindowResInfo->pResult, (size_t)(newCapacity * POINTER_BYTES));
472 473
      if (t == NULL) {
        longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
474
      }
475

H
Haojun Liao 已提交
476
      pWindowResInfo->pResult = (SResultRow **)t;
477

H
Haojun Liao 已提交
478
      int32_t inc = (int32_t)newCapacity - pWindowResInfo->capacity;
479
      memset(&pWindowResInfo->pResult[pWindowResInfo->capacity], 0, POINTER_BYTES * inc);
480

481 482
      pWindowResInfo->capacity = (int32_t)newCapacity;
    }
483 484 485 486 487 488 489

    SResultRow *pResult = getNewResultRow(pRuntimeEnv->pool);
    pWindowResInfo->pResult[pWindowResInfo->size] = pResult;
    int32_t ret = initResultRow(pResult);
    if (ret != TSDB_CODE_SUCCESS) {
      longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
    }
H
Haojun Liao 已提交
490 491 492

    // add a new result set for a new group
    pWindowResInfo->curIndex = pWindowResInfo->size++;
493 494
    taosHashPut(pRuntimeEnv->pResultRowHashTable, pRuntimeEnv->keyBuf, GET_RES_WINDOW_KEY_LEN(bytes),
                (char *)&pWindowResInfo->curIndex, sizeof(int32_t));
495
  }
496

497 498 499 500 501
  // too many time window in query
  if (pWindowResInfo->size > MAX_INTERVAL_TIME_WINDOW) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_TOO_MANY_TIMEWINDOW);
  }

502
  return getResultRow(pWindowResInfo, pWindowResInfo->curIndex);
503 504 505 506 507
}

// get the correct time window according to the handled timestamp
static STimeWindow getActiveTimeWindow(SWindowResInfo *pWindowResInfo, int64_t ts, SQuery *pQuery) {
  STimeWindow w = {0};
508

509
 if (pWindowResInfo->curIndex == -1) {  // the first window, from the previous stored value
510
    w.skey = pWindowResInfo->prevSKey;
511 512
    if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
      w.ekey = taosTimeAdd(w.skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision);
513
    } else {
514
      w.ekey = w.skey + pQuery->interval.interval - 1;
515
    }
516
  } else {
517
    int32_t slot = curTimeWindowIndex(pWindowResInfo);
518
    SResultRow* pWindowRes = getResultRow(pWindowResInfo, slot);
519
    w = pWindowRes->win;
520
  }
521

522
  if (w.skey > ts || w.ekey < ts) {
523 524 525
    if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
      w.skey = taosTimeTruncate(ts, &pQuery->interval, pQuery->precision);
      w.ekey = taosTimeAdd(w.skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
526 527
    } else {
      int64_t st = w.skey;
528

529
      if (st > ts) {
530
        st -= ((st - ts + pQuery->interval.sliding - 1) / pQuery->interval.sliding) * pQuery->interval.sliding;
531
      }
532

533
      int64_t et = st + pQuery->interval.interval - 1;
534
      if (et < ts) {
535
        st += ((ts - et + pQuery->interval.sliding - 1) / pQuery->interval.sliding) * pQuery->interval.sliding;
536
      }
537

538
      w.skey = st;
539
      w.ekey = w.skey + pQuery->interval.interval - 1;
540
    }
541
  }
542

543 544 545 546 547 548 549
  /*
   * query border check, skey should not be bounded by the query time range, since the value skey will
   * be used as the time window index value. So we only change ekey of time window accordingly.
   */
  if (w.ekey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) {
    w.ekey = pQuery->window.ekey;
  }
550

551 552 553
  return w;
}

H
Haojun Liao 已提交
554
static int32_t addNewWindowResultBuf(SResultRow *pWindowRes, SDiskbasedResultBuf *pResultBuf, int32_t tid,
555
                                     int32_t numOfRowsPerPage) {
556
  if (pWindowRes->pageId != -1) {
557 558
    return 0;
  }
559

560
  tFilePage *pData = NULL;
561

562 563
  // in the first scan, new space needed for results
  int32_t pageId = -1;
H
Haojun Liao 已提交
564
  SIDList list = getDataBufPagesIdList(pResultBuf, tid);
565

H
Haojun Liao 已提交
566
  if (taosArrayGetSize(list) == 0) {
H
Haojun Liao 已提交
567
    pData = getNewDataBuf(pResultBuf, tid, &pageId);
568
  } else {
H
Haojun Liao 已提交
569 570 571
    SPageInfo* pi = getLastPageInfo(list);
    pData = getResBufPage(pResultBuf, pi->pageId);
    pageId = pi->pageId;
572

573
    if (pData->num >= numOfRowsPerPage) {
H
Haojun Liao 已提交
574 575 576
      // release current page first, and prepare the next one
      releaseResBufPageInfo(pResultBuf, pi);

H
Haojun Liao 已提交
577
      pData = getNewDataBuf(pResultBuf, tid, &pageId);
578
      if (pData != NULL) {
579
        assert(pData->num == 0);  // number of elements must be 0 for new allocated buffer
580 581 582
      }
    }
  }
583

584 585 586
  if (pData == NULL) {
    return -1;
  }
587

588
  // set the number of rows in current disk page
589 590 591
  if (pWindowRes->pageId == -1) {  // not allocated yet, allocate new buffer
    pWindowRes->pageId = pageId;
    pWindowRes->rowId = (int32_t)(pData->num++);
H
Haojun Liao 已提交
592

593
    assert(pWindowRes->pageId >= 0);
594
  }
595

596 597 598
  return 0;
}

H
Haojun Liao 已提交
599
static int32_t setWindowOutputBufByKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, SDataBlockInfo* pBockInfo,
600
                                       STimeWindow *win, bool masterscan, bool* newWind) {
601 602
  assert(win->skey <= win->ekey);
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
603

H
Haojun Liao 已提交
604 605
  SResultRow *pResultRow = doPrepareResultRowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&win->skey, TSDB_KEYSIZE, masterscan, pBockInfo->uid);
  if (pResultRow == NULL) {
606 607 608
    *newWind = false;

    return masterscan? -1:0;
609
  }
610

611
  *newWind = true;
H
Haojun Liao 已提交
612

613
  // not assign result buffer yet, add new result buffer
H
Haojun Liao 已提交
614 615
  if (pResultRow->pageId == -1) {
    int32_t ret = addNewWindowResultBuf(pResultRow, pResultBuf, pBockInfo->tid, pRuntimeEnv->numOfRowsPerPage);
H
Haojun Liao 已提交
616
    if (ret != TSDB_CODE_SUCCESS) {
617 618 619
      return -1;
    }
  }
620

621
  // set time window for current result
H
Haojun Liao 已提交
622 623
  pResultRow->win = (*win);
  setWindowResOutputBufInitCtx(pRuntimeEnv, pResultRow);
624 625 626
  return TSDB_CODE_SUCCESS;
}

627
static bool getTimeWindowResStatus(SWindowResInfo *pWindowResInfo, int32_t slot) {
628
  assert(slot >= 0 && slot < pWindowResInfo->size);
629
  return pWindowResInfo->pResult[slot]->closed;
630 631
}

H
Haojun Liao 已提交
632
static FORCE_INLINE int32_t getForwardStepsInBlock(int32_t numOfRows, __block_search_fn_t searchFn, TSKEY ekey, int16_t pos,
633 634
                                      int16_t order, int64_t *pData) {
  int32_t forwardStep = 0;
635

H
Haojun Liao 已提交
636 637 638 639
  if (order == TSDB_ORDER_ASC) {
    int32_t end = searchFn((char*) &pData[pos], numOfRows - pos, ekey, order);
    if (end >= 0) {
      forwardStep = end;
640

H
Haojun Liao 已提交
641 642 643 644 645 646 647 648 649 650 651 652
      if (pData[end + pos] == ekey) {
        forwardStep += 1;
      }
    }
  } else {
    int32_t end = searchFn((char *)pData, pos + 1, ekey, order);
    if (end >= 0) {
      forwardStep = pos - end;

      if (pData[end] == ekey) {
        forwardStep += 1;
      }
653 654
    }
  }
655

H
Haojun Liao 已提交
656
  assert(forwardStep > 0);
657 658 659 660 661 662
  return forwardStep;
}

/**
 * NOTE: the query status only set for the first scan of master scan.
 */
663
static int32_t doCheckQueryCompleted(SQueryRuntimeEnv *pRuntimeEnv, TSKEY lastKey, SWindowResInfo *pWindowResInfo) {
664
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
665
  if (pRuntimeEnv->scanFlag != MASTER_SCAN || (!QUERY_IS_INTERVAL_QUERY(pQuery))) {
666
    return pWindowResInfo->size;
667
  }
668

669
  // no qualified results exist, abort check
670
  int32_t numOfClosed = 0;
671

672
  if (pWindowResInfo->size == 0) {
673
    return pWindowResInfo->size;
674
  }
675

676
  // query completed
H
hjxilinx 已提交
677 678
  if ((lastKey >= pQuery->current->win.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (lastKey <= pQuery->current->win.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
679
    closeAllTimeWindow(pWindowResInfo);
680

681 682 683 684
    pWindowResInfo->curIndex = pWindowResInfo->size - 1;
    setQueryStatus(pQuery, QUERY_COMPLETED | QUERY_RESBUF_FULL);
  } else {  // set the current index to be the last unclosed window
    int32_t i = 0;
685
    int64_t skey = TSKEY_INITIAL_VAL;
686

687
    for (i = 0; i < pWindowResInfo->size; ++i) {
H
Haojun Liao 已提交
688
      SResultRow *pResult = pWindowResInfo->pResult[i];
689
      if (pResult->closed) {
690
        numOfClosed += 1;
691 692
        continue;
      }
693

694
      TSKEY ekey = pResult->win.ekey;
695
      if ((ekey <= lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
696
          (pResult->win.skey >= lastKey && !QUERY_IS_ASC_QUERY(pQuery))) {
697 698
        closeTimeWindow(pWindowResInfo, i);
      } else {
699
        skey = pResult->win.skey;
700 701 702
        break;
      }
    }
703

704
    // all windows are closed, set the last one to be the skey
705
    if (skey == TSKEY_INITIAL_VAL) {
706 707 708 709 710
      assert(i == pWindowResInfo->size);
      pWindowResInfo->curIndex = pWindowResInfo->size - 1;
    } else {
      pWindowResInfo->curIndex = i;
    }
711

712
    pWindowResInfo->prevSKey = pWindowResInfo->pResult[pWindowResInfo->curIndex]->win.skey;
713

714 715
    // the number of completed slots are larger than the threshold, return current generated results to client.
    if (numOfClosed > pWindowResInfo->threshold) {
716
      qDebug("QInfo:%p total result window:%d closed:%d, reached the output threshold %d, return",
717
          GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size, numOfClosed, pQuery->rec.threshold);
718

719
      setQueryStatus(pQuery, QUERY_RESBUF_FULL);
720
    } else {
721
      qDebug("QInfo:%p total result window:%d already closed:%d", GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size,
722
             numOfClosed);
723 724
    }
  }
725

726 727 728 729 730
  // output has reached the limitation, set query completed
  if (pQuery->limit.limit > 0 && (pQuery->limit.limit + pQuery->limit.offset) <= numOfClosed &&
      pRuntimeEnv->scanFlag == MASTER_SCAN) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
731

732
  assert(pWindowResInfo->prevSKey != TSKEY_INITIAL_VAL);
733
  return numOfClosed;
734 735 736
}

static int32_t getNumOfRowsInTimeWindow(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo, TSKEY *pPrimaryColumn,
H
hjxilinx 已提交
737
                                        int32_t startPos, TSKEY ekey, __block_search_fn_t searchFn, bool updateLastKey) {
738
  assert(startPos >= 0 && startPos < pDataBlockInfo->rows);
739

H
Haojun Liao 已提交
740
  int32_t num   = -1;
741
  int32_t order = pQuery->order.order;
H
Haojun Liao 已提交
742
  int32_t step  = GET_FORWARD_DIRECTION_FACTOR(order);
743

H
hjxilinx 已提交
744
  STableQueryInfo* item = pQuery->current;
745

746 747
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (ekey < pDataBlockInfo->window.ekey) {
748
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
H
Haojun Liao 已提交
749 750
      if (updateLastKey) { // update the last key
        item->lastKey = pPrimaryColumn[startPos + (num - 1)] + step;
751 752
      }
    } else {
753
      num = pDataBlockInfo->rows - startPos;
754
      if (updateLastKey) {
H
hjxilinx 已提交
755
        item->lastKey = pDataBlockInfo->window.ekey + step;
756 757 758 759
      }
    }
  } else {  // desc
    if (ekey > pDataBlockInfo->window.skey) {
760
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
H
Haojun Liao 已提交
761 762
      if (updateLastKey) {  // update the last key
        item->lastKey = pPrimaryColumn[startPos - (num - 1)] + step;
763 764 765 766
      }
    } else {
      num = startPos + 1;
      if (updateLastKey) {
H
hjxilinx 已提交
767
        item->lastKey = pDataBlockInfo->window.skey + step;
768 769 770
      }
    }
  }
771

H
Haojun Liao 已提交
772
  assert(num > 0);
773 774 775
  return num;
}

H
Haojun Liao 已提交
776 777
static void doBlockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, bool closed, STimeWindow *pWin, int32_t offset,
                                      int32_t forwardStep, TSKEY *tsCol, int32_t numOfTotal) {
778 779
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
780

H
Haojun Liao 已提交
781 782
  bool hasPrev = pCtx[0].preAggVals.isSet;

783
  if (IS_MASTER_SCAN(pRuntimeEnv) || closed) {
784 785 786 787
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
      pCtx[k].nStartQueryTimestamp = pWin->skey;
      pCtx[k].size = forwardStep;
      pCtx[k].startOffset = (QUERY_IS_ASC_QUERY(pQuery)) ? offset : offset - (forwardStep - 1);
788

H
Haojun Liao 已提交
789
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
790
      if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
H
Haojun Liao 已提交
791
        pCtx[k].ptsList = &tsCol[pCtx[k].startOffset];
792
      }
793

794
      // not a whole block involved in query processing, statistics data can not be used
H
Haojun Liao 已提交
795 796 797 798
      // NOTE: the original value of isSet have been changed here
      if (pCtx[k].preAggVals.isSet && forwardStep < numOfTotal) {
        pCtx[k].preAggVals.isSet = false;
      }
H
Haojun Liao 已提交
799

800 801 802
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunction(&pCtx[k]);
      }
H
Haojun Liao 已提交
803 804 805

      // restore it
      pCtx[k].preAggVals.isSet = hasPrev;
806 807 808 809
    }
  }
}

810
static void doRowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, bool closed, STimeWindow *pWin, int32_t offset) {
811 812
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
813

814
  if (IS_MASTER_SCAN(pRuntimeEnv) || closed) {
815 816
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
      pCtx[k].nStartQueryTimestamp = pWin->skey;
817

818 819 820 821
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunctionF(&pCtx[k], offset);
      }
822 823 824 825
    }
  }
}

H
Haojun Liao 已提交
826 827
static int32_t getNextQualifiedWindow(SQueryRuntimeEnv *pRuntimeEnv, STimeWindow *pNext, SDataBlockInfo *pDataBlockInfo,
    TSKEY *primaryKeys, __block_search_fn_t searchFn, int32_t prevPosition) {
828
  SQuery *pQuery = pRuntimeEnv->pQuery;
829

H
Haojun Liao 已提交
830
  GET_NEXT_TIMEWINDOW(pQuery, pNext);
831

H
Haojun Liao 已提交
832
  // next time window is not in current block
H
Haojun Liao 已提交
833 834
  if ((pNext->skey > pDataBlockInfo->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (pNext->ekey < pDataBlockInfo->window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
H
Haojun Liao 已提交
835 836
    return -1;
  }
837

H
Haojun Liao 已提交
838 839
  TSKEY startKey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
840
    startKey = pNext->skey;
H
Haojun Liao 已提交
841 842
    if (startKey < pQuery->window.skey) {
      startKey = pQuery->window.skey;
843
    }
H
Haojun Liao 已提交
844
  } else {
H
Haojun Liao 已提交
845
    startKey = pNext->ekey;
H
Haojun Liao 已提交
846 847
    if (startKey > pQuery->window.skey) {
      startKey = pQuery->window.skey;
848
    }
H
Haojun Liao 已提交
849
  }
850

H
Haojun Liao 已提交
851 852
  int32_t startPos = 0;
  // tumbling time window query, a special case of sliding time window query
853
  if (pQuery->interval.sliding == pQuery->interval.interval && prevPosition != -1) {
H
Haojun Liao 已提交
854 855 856 857 858
    int32_t factor = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
    startPos = prevPosition + factor;
  } else {
    startPos = searchFn((char *)primaryKeys, pDataBlockInfo->rows, startKey, pQuery->order.order);
  }
859

H
Haojun Liao 已提交
860 861 862 863
  /*
   * This time window does not cover any data, try next time window,
   * this case may happen when the time window is too small
   */
H
Haojun Liao 已提交
864
  if (QUERY_IS_ASC_QUERY(pQuery) && primaryKeys[startPos] > pNext->ekey) {
H
Haojun Liao 已提交
865
    TSKEY next = primaryKeys[startPos];
866 867 868
    if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
      pNext->skey = taosTimeTruncate(next, &pQuery->interval, pQuery->precision);
      pNext->ekey = taosTimeAdd(pNext->skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
869
    } else {
870 871
      pNext->ekey += ((next - pNext->ekey + pQuery->interval.sliding - 1)/pQuery->interval.sliding) * pQuery->interval.sliding;
      pNext->skey = pNext->ekey - pQuery->interval.interval + 1;
872
    }
H
Haojun Liao 已提交
873
  } else if ((!QUERY_IS_ASC_QUERY(pQuery)) && primaryKeys[startPos] < pNext->skey) {
H
Haojun Liao 已提交
874
    TSKEY next = primaryKeys[startPos];
875 876 877
    if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
      pNext->skey = taosTimeTruncate(next, &pQuery->interval, pQuery->precision);
      pNext->ekey = taosTimeAdd(pNext->skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
878
    } else {
879 880
      pNext->skey -= ((pNext->skey - next + pQuery->interval.sliding - 1) / pQuery->interval.sliding) * pQuery->interval.sliding;
      pNext->ekey = pNext->skey + pQuery->interval.interval - 1;
881
    }
882
  }
883

H
Haojun Liao 已提交
884
  return startPos;
885 886
}

H
Haojun Liao 已提交
887
static FORCE_INLINE TSKEY reviseWindowEkey(SQuery *pQuery, STimeWindow *pWindow) {
888 889 890 891 892 893 894 895 896 897 898 899
  TSKEY ekey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    ekey = pWindow->ekey;
    if (ekey > pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  } else {
    ekey = pWindow->skey;
    if (ekey < pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  }
900

901 902 903
  return ekey;
}

H
hjxilinx 已提交
904 905
//todo binary search
static void* getDataBlockImpl(SArray* pDataBlock, int32_t colId) {
S
TD-1057  
Shengliang Guan 已提交
906
  int32_t numOfCols = (int32_t)taosArrayGetSize(pDataBlock);
907

H
hjxilinx 已提交
908 909 910 911 912 913
  for (int32_t i = 0; i < numOfCols; ++i) {
    SColumnInfoData *p = taosArrayGet(pDataBlock, i);
    if (colId == p->info.colId) {
      return p->pData;
    }
  }
914

H
hjxilinx 已提交
915 916 917 918
  return NULL;
}

static char *getDataBlock(SQueryRuntimeEnv *pRuntimeEnv, SArithmeticSupport *sas, int32_t col, int32_t size,
919
                    SArray *pDataBlock) {
dengyihao's avatar
dengyihao 已提交
920 921 922
  if (pDataBlock == NULL) {
    return NULL;
  }
923

H
Haojun Liao 已提交
924
  char *dataBlock = NULL;
H
Haojun Liao 已提交
925
  SQuery *pQuery = pRuntimeEnv->pQuery;
926

927
  int32_t functionId = pQuery->pSelectExpr[col].base.functionId;
928
  if (functionId == TSDB_FUNC_ARITHM) {
929
    sas->pArithExpr = &pQuery->pSelectExpr[col];
930

931 932 933 934
    sas->offset  = 0;
    sas->colList = pQuery->colList;
    sas->numOfCols = pQuery->numOfCols;
    sas->data    = calloc(pQuery->numOfCols, POINTER_BYTES);
935

H
Haojun Liao 已提交
936 937 938 939
    if (sas->data == NULL) {
      longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
    }

940
    // here the pQuery->colList and sas->colList are identical
S
TD-1057  
Shengliang Guan 已提交
941
    int32_t numOfCols = (int32_t)taosArrayGetSize(pDataBlock);
942
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
943
      SColumnInfo *pColMsg = &pQuery->colList[i];
944

945 946 947 948 949 950 951 952
      dataBlock = NULL;
      for (int32_t k = 0; k < numOfCols; ++k) {  //todo refactor
        SColumnInfoData *p = taosArrayGet(pDataBlock, k);
        if (pColMsg->colId == p->info.colId) {
          dataBlock = p->pData;
          break;
        }
      }
953

954
      assert(dataBlock != NULL);
955
      sas->data[i] = dataBlock;  // start from the offset
956
    }
957

958
  } else {  // other type of query function
959
    SColIndex *pCol = &pQuery->pSelectExpr[col].base.colInfo;
H
Haojun Liao 已提交
960
    if (TSDB_COL_IS_NORMAL_COL(pCol->flag)) {
H
Haojun Liao 已提交
961 962 963 964 965
      SColIndex* pColIndex = &pQuery->pSelectExpr[col].base.colInfo;
      SColumnInfoData *p = taosArrayGet(pDataBlock, pColIndex->colIndex);
      assert(p->info.colId == pColIndex->colId);

      dataBlock = p->pData;
H
Haojun Liao 已提交
966 967
    } else {
      dataBlock = NULL;
968 969
    }
  }
970

971 972 973 974
  return dataBlock;
}

/**
H
Haojun Liao 已提交
975
 * todo set the last value for pQueryTableInfo as in rowwiseapplyfunctions
976 977
 * @param pRuntimeEnv
 * @param forwardStep
978
 * @param tsCols
979 980 981 982 983
 * @param pFields
 * @param isDiskFileBlock
 * @return                  the incremental number of output value, so it maybe 0 for fixed number of query,
 *                          such as count/min/max etc.
 */
984
static void blockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis,
985 986
                                       SDataBlockInfo *pDataBlockInfo, SWindowResInfo *pWindowResInfo,
                                       __block_search_fn_t searchFn, SArray *pDataBlock) {
987
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
988 989
  bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);

990 991
  SQuery *pQuery = pRuntimeEnv->pQuery;
  TSKEY  *tsCols = NULL;
992
  if (pDataBlock != NULL) {
993
    SColumnInfoData* pColInfo = taosArrayGet(pDataBlock, 0);
994
    tsCols = (TSKEY *)(pColInfo->pData);
995
  }
996

997
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
H
Haojun Liao 已提交
998 999 1000
  if (sasArray == NULL) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }
1001

H
Haojun Liao 已提交
1002
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
1003
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
1004
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
H
Haojun Liao 已提交
1005
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k, pQInfo->vgId);
1006
  }
1007

1008
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
1009
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
H
Haojun Liao 已提交
1010
    TSKEY ts = TSKEY_INITIAL_VAL;
1011

H
Haojun Liao 已提交
1012 1013 1014 1015 1016 1017 1018 1019
    if (tsCols == NULL) {
      ts = QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->window.skey:pDataBlockInfo->window.ekey;
    } else {
      int32_t offset = GET_COL_DATA_POS(pQuery, 0, step);
      ts = tsCols[offset];
    }

    bool        hasTimeWindow = false;
1020
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
H
Haojun Liao 已提交
1021
    if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo, &win, masterScan, &hasTimeWindow) != TSDB_CODE_SUCCESS) {
S
TD-1848  
Shengliang Guan 已提交
1022
      tfree(sasArray);
H
hjxilinx 已提交
1023
      return;
1024
    }
1025

H
Haojun Liao 已提交
1026 1027 1028
    int32_t forwardStep = 0;
    int32_t startPos = pQuery->pos;

1029
    if (hasTimeWindow) {
H
Haojun Liao 已提交
1030
      TSKEY ekey = reviseWindowEkey(pQuery, &win);
H
Haojun Liao 已提交
1031
      forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, pQuery->pos, ekey, searchFn, true);
1032

1033
      bool pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
H
Haojun Liao 已提交
1034
      doBlockwiseApplyFunctions(pRuntimeEnv, pStatus, &win, startPos, forwardStep, tsCols, pDataBlockInfo->rows);
1035
    }
1036

1037 1038
    int32_t     index = pWindowResInfo->curIndex;
    STimeWindow nextWin = win;
1039

1040
    while (1) {
H
Haojun Liao 已提交
1041 1042
      int32_t prevEndPos = (forwardStep - 1) * step + startPos;
      startPos = getNextQualifiedWindow(pRuntimeEnv, &nextWin, pDataBlockInfo, tsCols, searchFn, prevEndPos);
1043 1044 1045
      if (startPos < 0) {
        break;
      }
1046

1047
      // null data, failed to allocate more memory buffer
H
Haojun Liao 已提交
1048
      hasTimeWindow = false;
H
Haojun Liao 已提交
1049
      if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo, &nextWin, masterScan, &hasTimeWindow) != TSDB_CODE_SUCCESS) {
1050 1051
        break;
      }
1052

1053 1054 1055 1056 1057
      if (!hasTimeWindow) {
        continue;
      }

      TSKEY ekey = reviseWindowEkey(pQuery, &nextWin);
H
Haojun Liao 已提交
1058
      forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, startPos, ekey, searchFn, true);
1059

1060 1061
      bool closed = getTimeWindowResStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
      doBlockwiseApplyFunctions(pRuntimeEnv, closed, &nextWin, startPos, forwardStep, tsCols, pDataBlockInfo->rows);
1062
    }
1063

1064 1065 1066 1067 1068 1069 1070
    pWindowResInfo->curIndex = index;
  } else {
    /*
     * the sqlfunctionCtx parameters should be set done before all functions are invoked,
     * since the selectivity + tag_prj query needs all parameters been set done.
     * tag_prj function are changed to be TSDB_FUNC_TAG_DUMMY
     */
1071
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
1072
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
1073 1074 1075 1076 1077
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunction(&pCtx[k]);
      }
    }
  }
1078

1079 1080 1081 1082
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    if (pQuery->pSelectExpr[i].base.functionId != TSDB_FUNC_ARITHM) {
      continue;
    }
1083

S
TD-1848  
Shengliang Guan 已提交
1084
    tfree(sasArray[i].data);
1085
  }
1086

S
TD-1848  
Shengliang Guan 已提交
1087
  tfree(sasArray);
1088 1089
}

1090
static int32_t setGroupResultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, char *pData, int16_t type, int16_t bytes, int32_t groupIndex) {
1091 1092 1093
  if (isNull(pData, type)) {  // ignore the null value
    return -1;
  }
1094

1095
  int32_t GROUPRESULTID = 1;
1096

1097
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
1098

1099
  // not assign result buffer yet, add new result buffer
1100 1101 1102 1103 1104 1105 1106
  char* d = pData;
  int16_t len = bytes;
  if (type == TSDB_DATA_TYPE_BINARY||type == TSDB_DATA_TYPE_NCHAR) {
    d = varDataVal(pData);
    len = varDataLen(pData);
  } else if (type == TSDB_DATA_TYPE_FLOAT || type == TSDB_DATA_TYPE_DOUBLE) {
    SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
H
Haojun Liao 已提交
1107
    qError("QInfo:%p group by not supported on double/float columns, abort", pQInfo);
1108 1109 1110 1111

    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_APP_ERROR);
  }

1112
  uint64_t uid = groupIndex; // uid is always set to be 0.
H
Haojun Liao 已提交
1113 1114
  SResultRow *pResultRow = doPrepareResultRowFromKey(pRuntimeEnv, &pRuntimeEnv->windowResInfo, d, len, true, uid);
  if (pResultRow == NULL) {
1115 1116 1117 1118
    return -1;
  }

  int64_t v = -1;
1119 1120 1121 1122 1123 1124 1125 1126
  switch(type) {
    case TSDB_DATA_TYPE_BOOL:
    case TSDB_DATA_TYPE_TINYINT:  v = GET_INT8_VAL(pData);  break;
    case TSDB_DATA_TYPE_SMALLINT: v = GET_INT16_VAL(pData); break;
    case TSDB_DATA_TYPE_INT:      v = GET_INT32_VAL(pData); break;
    case TSDB_DATA_TYPE_BIGINT:   v = GET_INT64_VAL(pData); break;
  }

1127
  if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
H
Haojun Liao 已提交
1128 1129
    pResultRow->key = malloc(varDataTLen(pData));
    varDataCopy(pResultRow->key, pData);
1130
  } else {
H
Haojun Liao 已提交
1131 1132
    pResultRow->win.skey = v;
    pResultRow->win.ekey = v;
1133
  }
1134

H
Haojun Liao 已提交
1135 1136
  if (pResultRow->pageId == -1) {
    int32_t ret = addNewWindowResultBuf(pResultRow, pResultBuf, GROUPRESULTID, pRuntimeEnv->numOfRowsPerPage);
1137 1138 1139 1140
    if (ret != 0) {
      return -1;
    }
  }
1141

H
Haojun Liao 已提交
1142
  setResultOutputBuf(pRuntimeEnv, pResultRow);
1143 1144 1145 1146
  initCtxOutputBuf(pRuntimeEnv);
  return TSDB_CODE_SUCCESS;
}

1147
static char *getGroupbyColumnData(SQuery *pQuery, int16_t *type, int16_t *bytes, SArray* pDataBlock) {
1148
  SSqlGroupbyExpr *pGroupbyExpr = pQuery->pGroupbyExpr;
1149

1150
  for (int32_t k = 0; k < pGroupbyExpr->numOfGroupCols; ++k) {
1151
    SColIndex* pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, k);
Y
TD-1230  
yihaoDeng 已提交
1152
    if (TSDB_COL_IS_TAG(pColIndex->flag)) {
1153 1154
      continue;
    }
1155

1156
    int16_t colIndex = -1;
1157
    int32_t colId = pColIndex->colId;
1158

1159
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
1160
      if (pQuery->colList[i].colId == colId) {
1161 1162 1163 1164
        colIndex = i;
        break;
      }
    }
1165

1166
    assert(colIndex >= 0 && colIndex < pQuery->numOfCols);
1167

1168 1169
    *type = pQuery->colList[colIndex].type;
    *bytes = pQuery->colList[colIndex].bytes;
1170
    /*
1171 1172 1173
     *  the colIndex is acquired from the first tables of all qualified tables in this vnode during query prepare
     * stage, the remain tables may not have the required column in cache actually. So, the validation of required
     * column in cache with the corresponding schema is reinforced.
1174
     */
S
TD-1057  
Shengliang Guan 已提交
1175
    int32_t numOfCols = (int32_t)taosArrayGetSize(pDataBlock);
1176

1177 1178 1179 1180 1181 1182
    for (int32_t i = 0; i < numOfCols; ++i) {
      SColumnInfoData *p = taosArrayGet(pDataBlock, i);
      if (pColIndex->colId == p->info.colId) {
        return p->pData;
      }
    }
1183
  }
1184

1185
  return NULL;
1186 1187 1188 1189
}

static int32_t doTSJoinFilter(SQueryRuntimeEnv *pRuntimeEnv, int32_t offset) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
1190

1191 1192
  STSElem         elem = tsBufGetElem(pRuntimeEnv->pTSBuf);
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1193

1194
  // compare tag first
H
Haojun Liao 已提交
1195
  if (tVariantCompare(&pCtx[0].tag, elem.tag) != 0) {
1196 1197
    return TS_JOIN_TAG_NOT_EQUALS;
  }
1198

S
TD-1057  
Shengliang Guan 已提交
1199
  TSKEY key = *(TSKEY *)((char*)pCtx[0].aInputElemBuf + TSDB_KEYSIZE * offset);
1200 1201

#if defined(_DEBUG_VIEW)
1202
  printf("elem in comp ts file:%" PRId64 ", key:%" PRId64 ", tag:%"PRIu64", query order:%d, ts order:%d, traverse:%d, index:%d\n",
H
Haojun Liao 已提交
1203
         elem.ts, key, elem.tag.i64Key, pQuery->order.order, pRuntimeEnv->pTSBuf->tsOrder,
1204 1205
         pRuntimeEnv->pTSBuf->cur.order, pRuntimeEnv->pTSBuf->cur.tsIndex);
#endif
1206

1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (key < elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key > elem.ts) {
      assert(false);
    }
  } else {
    if (key > elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key < elem.ts) {
      assert(false);
    }
  }
1220

1221 1222 1223 1224
  return TS_JOIN_TS_EQUAL;
}

static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId) {
H
Haojun Liao 已提交
1225
  SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx);
H
hjxilinx 已提交
1226
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
1227 1228 1229 1230 1231

  // in case of timestamp column, always generated results.
  if (functionId == TSDB_FUNC_TS) {
    return true;
  }
1232

1233 1234 1235
  if (pResInfo->complete || functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TS_DUMMY) {
    return false;
  }
1236

1237
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_FIRST) {
H
hjxilinx 已提交
1238 1239
    return QUERY_IS_ASC_QUERY(pQuery);
  }
1240 1241 1242 1243 1244 1245

  // todo add comments
  if ((functionId == TSDB_FUNC_LAST_DST || functionId == TSDB_FUNC_LAST)) {
    return pCtx->param[0].i64Key == pQuery->order.order;
  }

1246
  // in the supplementary scan, only the following functions need to be executed
H
Haojun Liao 已提交
1247
  if (IS_REVERSE_SCAN(pRuntimeEnv)) {
1248 1249
    return false;
  }
1250

1251 1252 1253
  return true;
}

1254 1255
static void rowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis, SDataBlockInfo *pDataBlockInfo,
    SWindowResInfo *pWindowResInfo, SArray *pDataBlock) {
1256
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1257
  bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);
1258

1259
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1260
  STableQueryInfo* item = pQuery->current;
H
Haojun Liao 已提交
1261 1262 1263 1264

  SColumnInfoData* pColumnInfoData = (SColumnInfoData *)taosArrayGet(pDataBlock, 0);

  TSKEY  *tsCols = (pColumnInfoData->info.type == TSDB_DATA_TYPE_TIMESTAMP)? (TSKEY*) pColumnInfoData->pData:NULL;
H
Haojun Liao 已提交
1265 1266
  bool    groupbyColumnValue = pRuntimeEnv->groupbyNormalCol;

1267
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
H
Haojun Liao 已提交
1268 1269 1270
  if (sasArray == NULL) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }
1271

1272 1273
  int16_t type = 0;
  int16_t bytes = 0;
1274

1275
  char *groupbyColumnData = NULL;
H
Haojun Liao 已提交
1276
  if (groupbyColumnValue) {
1277
    groupbyColumnData = getGroupbyColumnData(pQuery, &type, &bytes, pDataBlock);
1278
  }
1279

H
Haojun Liao 已提交
1280
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
1281
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
1282
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
H
Haojun Liao 已提交
1283
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k, pQInfo->vgId);
1284
  }
1285

1286 1287
  // set the input column data
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
1288
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
H
hjxilinx 已提交
1289 1290
    pFilterInfo->pData = getDataBlockImpl(pDataBlock, pFilterInfo->info.colId);
    assert(pFilterInfo->pData != NULL);
1291
  }
1292

1293
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1294

1295 1296 1297
  // from top to bottom in desc
  // from bottom to top in asc order
  if (pRuntimeEnv->pTSBuf != NULL) {
1298
    qDebug("QInfo:%p process data rows, numOfRows:%d, query order:%d, ts comp order:%d", pQInfo, pDataBlockInfo->rows,
1299 1300
           pQuery->order.order, pRuntimeEnv->pTSBuf->cur.order);
  }
1301

1302
  int32_t j = 0;
H
hjxilinx 已提交
1303
  int32_t offset = -1;
1304

1305
  for (j = 0; j < pDataBlockInfo->rows; ++j) {
H
hjxilinx 已提交
1306
    offset = GET_COL_DATA_POS(pQuery, j, step);
1307

1308 1309 1310 1311 1312 1313 1314 1315 1316 1317
    if (pRuntimeEnv->pTSBuf != NULL) {
      int32_t r = doTSJoinFilter(pRuntimeEnv, offset);
      if (r == TS_JOIN_TAG_NOT_EQUALS) {
        break;
      } else if (r == TS_JOIN_TS_NOT_EQUALS) {
        continue;
      } else {
        assert(r == TS_JOIN_TS_EQUAL);
      }
    }
1318

1319
    if (pQuery->numOfFilterCols > 0 && (!doFilterData(pQuery, offset))) {
1320 1321
      continue;
    }
1322

1323
    // interval window query, decide the time window according to the primary timestamp
H
Haojun Liao 已提交
1324
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1325
      int64_t     ts = tsCols[offset];
1326
      STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
1327

1328
      bool hasTimeWindow = false;
H
Haojun Liao 已提交
1329
      int32_t ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo, &win, masterScan, &hasTimeWindow);
1330 1331 1332
      if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
        continue;
      }
1333

1334 1335 1336 1337
      if (!hasTimeWindow) {
        continue;
      }

1338 1339
      bool closed = getTimeWindowResStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
      doRowwiseApplyFunctions(pRuntimeEnv, closed, &win, offset);
1340

1341 1342
      STimeWindow nextWin = win;
      int32_t     index = pWindowResInfo->curIndex;
1343

1344
      while (1) {
H
Haojun Liao 已提交
1345
        GET_NEXT_TIMEWINDOW(pQuery, &nextWin);
1346
        if ((nextWin.skey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
H
Haojun Liao 已提交
1347
            (nextWin.skey < pQuery->window.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
1348 1349
          break;
        }
1350

1351 1352 1353
        if (ts < nextWin.skey || ts > nextWin.ekey) {
          break;
        }
1354

1355
        // null data, failed to allocate more memory buffer
H
Haojun Liao 已提交
1356
        hasTimeWindow = false;
H
Haojun Liao 已提交
1357
        if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo, &nextWin, masterScan, &hasTimeWindow) != TSDB_CODE_SUCCESS) {
1358 1359
          break;
        }
1360

1361
        if (hasTimeWindow) {
1362 1363
          closed = getTimeWindowResStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
          doRowwiseApplyFunctions(pRuntimeEnv, closed, &nextWin, offset);
1364
        }
1365
      }
1366

1367 1368 1369
      pWindowResInfo->curIndex = index;
    } else {  // other queries
      // decide which group this rows belongs to according to current state value
H
Haojun Liao 已提交
1370
      if (groupbyColumnValue) {
H
hjxilinx 已提交
1371
        char *val = groupbyColumnData + bytes * offset;
1372

1373
        int32_t ret = setGroupResultOutputBuf(pRuntimeEnv, val, type, bytes, item->groupIndex);
1374 1375 1376 1377
        if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
          continue;
        }
      }
1378

1379
      for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
1380
        int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
1381 1382 1383 1384 1385
        if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
          aAggs[functionId].xFunctionF(&pCtx[k], offset);
        }
      }
    }
1386

1387 1388 1389
    if (pRuntimeEnv->pTSBuf != NULL) {
      // if timestamp filter list is empty, quit current query
      if (!tsBufNextPos(pRuntimeEnv->pTSBuf)) {
H
hjxilinx 已提交
1390
        setQueryStatus(pQuery, QUERY_COMPLETED);
1391 1392 1393 1394
        break;
      }
    }
  }
H
Haojun Liao 已提交
1395 1396 1397 1398 1399 1400 1401 1402

  assert(offset >= 0);
  if (tsCols != NULL) {
    item->lastKey = tsCols[offset] + step;
  } else {
    item->lastKey = (QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->window.ekey:pDataBlockInfo->window.skey) + step;
  }

H
Haojun Liao 已提交
1403 1404 1405
  if (pRuntimeEnv->pTSBuf != NULL) {
    item->cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);
  }
H
Haojun Liao 已提交
1406

1407 1408 1409 1410 1411
  // todo refactor: extract method
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    if (pQuery->pSelectExpr[i].base.functionId != TSDB_FUNC_ARITHM) {
      continue;
    }
1412

S
TD-1848  
Shengliang Guan 已提交
1413
    tfree(sasArray[i].data);
1414
  }
1415

1416 1417 1418 1419
  free(sasArray);
}

static int32_t tableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo,
H
hjxilinx 已提交
1420
                                          SDataStatis *pStatis, __block_search_fn_t searchFn, SArray *pDataBlock) {
H
hjxilinx 已提交
1421
  SQuery *pQuery = pRuntimeEnv->pQuery;
1422

H
hjxilinx 已提交
1423 1424
  STableQueryInfo* pTableQInfo = pQuery->current;
  SWindowResInfo*  pWindowResInfo = &pRuntimeEnv->windowResInfo;
1425

H
Haojun Liao 已提交
1426
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->groupbyNormalCol) {
1427
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
1428
  } else {
1429
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
1430
  }
1431

1432
  // update the lastkey of current table
1433
  TSKEY lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pDataBlockInfo->window.ekey : pDataBlockInfo->window.skey;
H
hjxilinx 已提交
1434
  pTableQInfo->lastKey = lastKey + GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1435

1436
  // interval query with limit applied
1437
  int32_t numOfRes = 0;
H
Haojun Liao 已提交
1438
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1439 1440
    numOfRes = doCheckQueryCompleted(pRuntimeEnv, lastKey, pWindowResInfo);
  } else {
S
TD-1057  
Shengliang Guan 已提交
1441
    numOfRes = (int32_t)getNumOfResult(pRuntimeEnv);
1442

1443 1444 1445 1446
    // update the number of output result
    if (numOfRes > 0 && pQuery->checkBuffer == 1) {
      assert(numOfRes >= pQuery->rec.rows);
      pQuery->rec.rows = numOfRes;
1447

1448 1449 1450
      if (numOfRes >= pQuery->rec.threshold) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
      }
1451

1452 1453 1454
      if ((pQuery->limit.limit >= 0) && (pQuery->limit.limit + pQuery->limit.offset) <= numOfRes) {
        setQueryStatus(pQuery, QUERY_COMPLETED);
      }
1455 1456 1457 1458 1459

      if (((pTableQInfo->lastKey > pTableQInfo->win.ekey) && QUERY_IS_ASC_QUERY(pQuery)) ||
          ((pTableQInfo->lastKey < pTableQInfo->win.ekey) && (!QUERY_IS_ASC_QUERY(pQuery)))) {
        setQueryStatus(pQuery, QUERY_COMPLETED);
      }
H
Haojun Liao 已提交
1460
    }
1461
  }
1462

1463
  return numOfRes;
1464 1465
}

H
Haojun Liao 已提交
1466
void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
H
Haojun Liao 已提交
1467
                   SDataStatis *pStatis, void *param, int32_t colIndex, int32_t vgId) {
1468

1469 1470
  int32_t functionId = pQuery->pSelectExpr[colIndex].base.functionId;
  int32_t colId = pQuery->pSelectExpr[colIndex].base.colInfo.colId;
1471

1472
  SDataStatis *tpField = NULL;
H
Haojun Liao 已提交
1473
  pCtx->hasNull = hasNullValue(&pQuery->pSelectExpr[colIndex].base.colInfo, pStatis, &tpField);
1474
  pCtx->aInputElemBuf = inputData;
1475

1476
  if (tpField != NULL) {
H
Haojun Liao 已提交
1477
    pCtx->preAggVals.isSet  = true;
1478 1479
    pCtx->preAggVals.statis = *tpField;
    assert(pCtx->preAggVals.statis.numOfNull <= pBlockInfo->rows);
1480 1481 1482
  } else {
    pCtx->preAggVals.isSet = false;
  }
1483

H
Haojun Liao 已提交
1484 1485
  pCtx->preAggVals.dataBlockLoaded = (inputData != NULL);

H
Haojun Liao 已提交
1486 1487
  // limit/offset query will affect this value
  pCtx->size = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->rows - pQuery->pos : pQuery->pos + 1;
1488

H
Haojun Liao 已提交
1489
  // minimum value no matter ascending/descending order query
H
Haojun Liao 已提交
1490 1491
  pCtx->startOffset = QUERY_IS_ASC_QUERY(pQuery) ? pQuery->pos: (pQuery->pos - pCtx->size + 1);
  assert(pCtx->startOffset >= 0);
H
Haojun Liao 已提交
1492

1493 1494
  uint32_t status = aAggs[functionId].nStatus;
  if (((status & (TSDB_FUNCSTATE_SELECTIVITY | TSDB_FUNCSTATE_NEED_TS)) != 0) && (tsCol != NULL)) {
H
Haojun Liao 已提交
1495
    pCtx->ptsList = &tsCol[pCtx->startOffset];
1496
  }
1497

1498 1499 1500 1501 1502
  if (functionId >= TSDB_FUNC_FIRST_DST && functionId <= TSDB_FUNC_LAST_DST) {
    // last_dist or first_dist function
    // store the first&last timestamp into the intermediate buffer [1], the true
    // value may be null but timestamp will never be null
  } else if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_TWA ||
1503
             functionId == TSDB_FUNC_DIFF || (functionId >= TSDB_FUNC_RATE && functionId <= TSDB_FUNC_AVG_IRATE)) {
1504
    /*
H
Haojun Liao 已提交
1505
     * least squares function needs two columns of input, currently, the x value of linear equation is set to
1506 1507 1508 1509 1510 1511
     * timestamp column, and the y-value is the column specified in pQuery->pSelectExpr[i].colIdxInBuffer
     *
     * top/bottom function needs timestamp to indicate when the
     * top/bottom values emerge, so does diff function
     */
    if (functionId == TSDB_FUNC_TWA) {
H
Haojun Liao 已提交
1512 1513
      SResultRowCellInfo* pInfo = GET_RES_INFO(pCtx);
      STwaInfo *pTWAInfo = (STwaInfo*) GET_ROWCELL_INTERBUF(pInfo);
1514 1515 1516
      pTWAInfo->SKey = pQuery->window.skey;
      pTWAInfo->EKey = pQuery->window.ekey;
    }
1517

1518 1519
  } else if (functionId == TSDB_FUNC_ARITHM) {
    pCtx->param[1].pz = param;
H
Haojun Liao 已提交
1520 1521 1522 1523 1524 1525
  } else if (functionId == TSDB_FUNC_SPREAD) {  // set the statistics data for primary time stamp column
    if (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
      pCtx->preAggVals.isSet  = true;
      pCtx->preAggVals.statis.min = pBlockInfo->window.skey;
      pCtx->preAggVals.statis.max = pBlockInfo->window.ekey;
    }
1526
  } else if (functionId == TSDB_FUNC_INTERP) {
H
Haojun Liao 已提交
1527 1528 1529
    SResultRowCellInfo* pInfo = GET_RES_INFO(pCtx);

    SInterpInfoDetail *pInterpInfo = (SInterpInfoDetail *)GET_ROWCELL_INTERBUF(pInfo);
S
TD-1057  
Shengliang Guan 已提交
1530
    pInterpInfo->type = (int8_t)pQuery->fillType;
1531 1532
    pInterpInfo->ts = pQuery->window.skey;
    pInterpInfo->primaryCol = (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX);
1533

1534 1535 1536 1537
    if (pQuery->fillVal != NULL) {
      if (isNull((const char*) &pQuery->fillVal[colIndex], pCtx->inputType)) {
        pCtx->param[1].nType = TSDB_DATA_TYPE_NULL;
      } else { // todo refactor, tVariantCreateFromBinary should handle the NULL value
H
Haojun Liao 已提交
1538 1539 1540
        if (pCtx->inputType != TSDB_DATA_TYPE_BINARY && pCtx->inputType != TSDB_DATA_TYPE_NCHAR) {
          tVariantCreateFromBinary(&pCtx->param[1], (char*) &pQuery->fillVal[colIndex], pCtx->inputBytes, pCtx->inputType);
        }
1541 1542
      }
    }
H
Haojun Liao 已提交
1543 1544 1545
  } else if (functionId == TSDB_FUNC_TS_COMP) {
    pCtx->param[0].i64Key = vgId;
    pCtx->param[0].nType = TSDB_DATA_TYPE_BIGINT;
1546
  }
1547

1548 1549 1550 1551 1552 1553
#if defined(_DEBUG_VIEW)
  //  int64_t *tsList = (int64_t *)primaryColumnData;
//  int64_t  s = tsList[0];
//  int64_t  e = tsList[size - 1];

//    if (IS_DATA_BLOCK_LOADED(blockStatus)) {
1554
//        qDebug("QInfo:%p query ts:%lld-%lld, offset:%d, rows:%d, bstatus:%d,
1555 1556 1557
//        functId:%d", GET_QINFO_ADDR(pQuery),
//               s, e, startOffset, size, blockStatus, functionId);
//    } else {
1558
//        qDebug("QInfo:%p block not loaded, bstatus:%d",
1559 1560 1561 1562 1563 1564
//        GET_QINFO_ADDR(pQuery), blockStatus);
//    }
#endif
}

// set the output buffer for the selectivity + tag query
H
Haojun Liao 已提交
1565
static int32_t setCtxTagColumnInfo(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx) {
H
Haojun Liao 已提交
1566 1567
  SQuery* pQuery = pRuntimeEnv->pQuery;

1568
  if (isSelectivityWithTagsQuery(pQuery)) {
1569
    int32_t num = 0;
1570
    int16_t tagLen = 0;
1571

1572
    SQLFunctionCtx *p = NULL;
1573
    SQLFunctionCtx **pTagCtx = calloc(pQuery->numOfOutput, POINTER_BYTES);
H
Haojun Liao 已提交
1574 1575 1576
    if (pTagCtx == NULL) {
      return TSDB_CODE_QRY_OUT_OF_MEMORY;
    }
H
Haojun Liao 已提交
1577

1578
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1579
      SSqlFuncMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].base;
1580

1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593
      if (pSqlFuncMsg->functionId == TSDB_FUNC_TAG_DUMMY || pSqlFuncMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        tagLen += pCtx[i].outputBytes;
        pTagCtx[num++] = &pCtx[i];
      } else if ((aAggs[pSqlFuncMsg->functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
        p = &pCtx[i];
      } else if (pSqlFuncMsg->functionId == TSDB_FUNC_TS || pSqlFuncMsg->functionId == TSDB_FUNC_TAG) {
        // tag function may be the group by tag column
        // ts may be the required primary timestamp column
        continue;
      } else {
        // the column may be the normal column, group by normal_column, the functionId is TSDB_FUNC_PRJ
      }
    }
dengyihao's avatar
dengyihao 已提交
1594 1595 1596 1597 1598
    if (p != NULL) {
      p->tagInfo.pTagCtxList = pTagCtx;
      p->tagInfo.numOfTagCols = num;
      p->tagInfo.tagsLen = tagLen;
    } else {
S
TD-1848  
Shengliang Guan 已提交
1599
      tfree(pTagCtx);
dengyihao's avatar
dengyihao 已提交
1600
    }
1601
  }
H
Haojun Liao 已提交
1602 1603

  return TSDB_CODE_SUCCESS;
1604 1605
}

1606
static int32_t setupQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv, int16_t order) {
1607
  qDebug("QInfo:%p setup runtime env", GET_QINFO_ADDR(pRuntimeEnv));
1608 1609
  SQuery *pQuery = pRuntimeEnv->pQuery;

1610
  pRuntimeEnv->pCtx = (SQLFunctionCtx *)calloc(pQuery->numOfOutput, sizeof(SQLFunctionCtx));
H
Haojun Liao 已提交
1611
  pRuntimeEnv->rowCellInfoOffset = calloc(pQuery->numOfOutput, sizeof(int32_t));
H
Haojun Liao 已提交
1612
  pRuntimeEnv->pResultRow = getNewResultRow(pRuntimeEnv->pool);
1613

1614
  if (pRuntimeEnv->pResultRow == NULL || pRuntimeEnv->pCtx == NULL || pRuntimeEnv->rowCellInfoOffset == NULL) {
1615
    goto _clean;
1616
  }
1617

1618
  pRuntimeEnv->offset[0] = 0;
1619
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1620
    SSqlFuncMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].base;
1621

1622
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1623
    SColIndex* pIndex = &pSqlFuncMsg->colInfo;
1624

Y
TD-1230  
yihaoDeng 已提交
1625
    if (TSDB_COL_REQ_NULL(pIndex->flag)) {
1626 1627
      pCtx->requireNull = true;
      pIndex->flag &= ~(TSDB_COL_NULL);
Y
TD-1230  
yihaoDeng 已提交
1628
    } else {
1629 1630
      pCtx->requireNull = false;
    }
Y
TD-1230  
yihaoDeng 已提交
1631

1632 1633
    int32_t index = pSqlFuncMsg->colInfo.colIndex;
    if (TSDB_COL_IS_TAG(pIndex->flag)) {
1634
      if (pIndex->colId == TSDB_TBNAME_COLUMN_INDEX) {  // todo refactor
H
Haojun Liao 已提交
1635 1636 1637 1638
        SSchema s = tGetTableNameColumnSchema();

        pCtx->inputBytes = s.bytes;
        pCtx->inputType = s.type;
1639 1640 1641 1642
      } else {
        pCtx->inputBytes = pQuery->tagColList[index].bytes;
        pCtx->inputType = pQuery->tagColList[index].type;
      }
H
Haojun Liao 已提交
1643 1644 1645
    } else if (TSDB_COL_IS_UD_COL(pIndex->flag)) {
      pCtx->inputBytes = pSqlFuncMsg->arg[0].argBytes;
      pCtx->inputType = pSqlFuncMsg->arg[0].argType;
1646 1647 1648 1649
    } else {
      pCtx->inputBytes = pQuery->colList[index].bytes;
      pCtx->inputType = pQuery->colList[index].type;
    }
1650

1651
    assert(isValidDataType(pCtx->inputType));
1652
    pCtx->ptsOutputBuf = NULL;
1653

1654 1655
    pCtx->outputBytes = pQuery->pSelectExpr[i].bytes;
    pCtx->outputType = pQuery->pSelectExpr[i].type;
1656

1657 1658
    pCtx->order = pQuery->order.order;
    pCtx->functionId = pSqlFuncMsg->functionId;
H
Haojun Liao 已提交
1659 1660
    pCtx->stableQuery = pRuntimeEnv->stableQuery;
    pCtx->interBufBytes = pQuery->pSelectExpr[i].interBytes;
1661

1662 1663 1664 1665 1666
    pCtx->numOfParams = pSqlFuncMsg->numOfParams;
    for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
      int16_t type = pSqlFuncMsg->arg[j].argType;
      int16_t bytes = pSqlFuncMsg->arg[j].argBytes;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
1667
        tVariantCreateFromBinary(&pCtx->param[j], pSqlFuncMsg->arg[j].argValue.pz, bytes, type);
1668 1669 1670 1671
      } else {
        tVariantCreateFromBinary(&pCtx->param[j], (char *)&pSqlFuncMsg->arg[j].argValue.i64, bytes, type);
      }
    }
1672

1673 1674
    // set the order information for top/bottom query
    int32_t functionId = pCtx->functionId;
1675

1676
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
1677
      int32_t f = pQuery->pSelectExpr[0].base.functionId;
1678
      assert(f == TSDB_FUNC_TS || f == TSDB_FUNC_TS_DUMMY);
1679

1680 1681 1682 1683
      pCtx->param[2].i64Key = order;
      pCtx->param[2].nType = TSDB_DATA_TYPE_BIGINT;
      pCtx->param[3].i64Key = functionId;
      pCtx->param[3].nType = TSDB_DATA_TYPE_BIGINT;
1684

1685 1686
      pCtx->param[1].i64Key = pQuery->order.orderColId;
    }
1687

1688 1689
    if (i > 0) {
      pRuntimeEnv->offset[i] = pRuntimeEnv->offset[i - 1] + pRuntimeEnv->pCtx[i - 1].outputBytes;
H
Haojun Liao 已提交
1690
      pRuntimeEnv->rowCellInfoOffset[i] = pRuntimeEnv->rowCellInfoOffset[i - 1] + sizeof(SResultRowCellInfo) + pQuery->pSelectExpr[i - 1].interBytes;
1691
    }
H
Haojun Liao 已提交
1692

1693
  }
1694

1695
  // if it is group by normal column, do not set output buffer, the output buffer is pResult
1696 1697
  // fixed output query/multi-output query for normal table
  if (!pRuntimeEnv->groupbyNormalCol && !pRuntimeEnv->stableQuery && !QUERY_IS_INTERVAL_QUERY(pRuntimeEnv->pQuery)) {
1698 1699
    resetCtxOutputBuf(pRuntimeEnv);
  }
1700

H
Haojun Liao 已提交
1701 1702 1703
  if (setCtxTagColumnInfo(pRuntimeEnv, pRuntimeEnv->pCtx) != TSDB_CODE_SUCCESS) {
    goto _clean;
  }
1704

H
Haojun Liao 已提交
1705
  qDebug("QInfo:%p init runtime completed", GET_QINFO_ADDR(pRuntimeEnv));
1706
  return TSDB_CODE_SUCCESS;
1707

1708
_clean:
S
TD-1848  
Shengliang Guan 已提交
1709
  tfree(pRuntimeEnv->pCtx);
1710

1711
  return TSDB_CODE_QRY_OUT_OF_MEMORY;
1712 1713
}

H
Haojun Liao 已提交
1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726
static void doFreeQueryHandle(SQInfo* pQInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;

  tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
  tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);

  pRuntimeEnv->pQueryHandle = NULL;
  pRuntimeEnv->pSecQueryHandle = NULL;

  SMemRef* pMemRef = &pQInfo->memRef;
  assert(pMemRef->ref == 0 && pMemRef->imem == NULL && pMemRef->mem == NULL);
}

1727 1728 1729 1730
static void teardownQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv) {
  if (pRuntimeEnv->pQuery == NULL) {
    return;
  }
1731

1732
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1733
  SQInfo* pQInfo = (SQInfo*) GET_QINFO_ADDR(pRuntimeEnv);
1734

1735
  qDebug("QInfo:%p teardown runtime env", pQInfo);
H
Haojun Liao 已提交
1736
  cleanupTimeWindowInfo(&pRuntimeEnv->windowResInfo);
1737

1738
  if (pRuntimeEnv->pCtx != NULL) {
1739
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1740
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1741

1742 1743 1744
      for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
        tVariantDestroy(&pCtx->param[j]);
      }
1745

1746
      tVariantDestroy(&pCtx->tag);
S
TD-1848  
Shengliang Guan 已提交
1747
      tfree(pCtx->tagInfo.pTagCtxList);
1748
    }
1749

S
TD-1848  
Shengliang Guan 已提交
1750
    tfree(pRuntimeEnv->pCtx);
1751
  }
1752

1753
  pRuntimeEnv->pFillInfo = taosDestroyFillInfo(pRuntimeEnv->pFillInfo);
1754

H
Haojun Liao 已提交
1755
  destroyResultBuf(pRuntimeEnv->pResultBuf);
H
Haojun Liao 已提交
1756
  doFreeQueryHandle(pQInfo);
1757

H
Haojun Liao 已提交
1758
  pRuntimeEnv->pTSBuf = tsBufDestroy(pRuntimeEnv->pTSBuf);
S
TD-1848  
Shengliang Guan 已提交
1759 1760
  tfree(pRuntimeEnv->keyBuf);
  tfree(pRuntimeEnv->rowCellInfoOffset);
H
Haojun Liao 已提交
1761

H
Haojun Liao 已提交
1762 1763
  taosHashCleanup(pRuntimeEnv->pResultRowHashTable);
  pRuntimeEnv->pResultRowHashTable = NULL;
1764

H
Haojun Liao 已提交
1765
  pRuntimeEnv->pool = destroyResultRowPool(pRuntimeEnv->pool);
1766 1767
}

H
Haojun Liao 已提交
1768
#define IS_QUERY_KILLED(_q) ((_q)->code == TSDB_CODE_TSC_QUERY_CANCELLED)
1769

H
Haojun Liao 已提交
1770
static void setQueryKilled(SQInfo *pQInfo) { pQInfo->code = TSDB_CODE_TSC_QUERY_CANCELLED;}
H
hjxilinx 已提交
1771

H
Haojun Liao 已提交
1772 1773 1774
static bool isFixedOutputQuery(SQueryRuntimeEnv* pRuntimeEnv) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1775 1776
    return false;
  }
1777

1778
  // Note:top/bottom query is fixed output query
H
Haojun Liao 已提交
1779
  if (pRuntimeEnv->topBotQuery || pRuntimeEnv->groupbyNormalCol) {
1780 1781
    return true;
  }
1782

1783
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1784
    SSqlFuncMsg *pExprMsg = &pQuery->pSelectExpr[i].base;
1785

1786 1787
    // ignore the ts_comp function
    if (i == 0 && pExprMsg->functionId == TSDB_FUNC_PRJ && pExprMsg->numOfParams == 1 &&
1788
        pExprMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
1789 1790
      continue;
    }
1791

1792 1793 1794
    if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
1795

1796 1797 1798 1799
    if (!IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus)) {
      return true;
    }
  }
1800

1801 1802 1803
  return false;
}

1804
// todo refactor with isLastRowQuery
H
hjxilinx 已提交
1805
static bool isPointInterpoQuery(SQuery *pQuery) {
1806
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1807
    int32_t functionID = pQuery->pSelectExpr[i].base.functionId;
1808
    if (functionID == TSDB_FUNC_INTERP) {
1809 1810 1811
      return true;
    }
  }
1812

1813 1814 1815 1816
  return false;
}

// TODO REFACTOR:MERGE WITH CLIENT-SIDE FUNCTION
H
hjxilinx 已提交
1817
static bool isSumAvgRateQuery(SQuery *pQuery) {
1818
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1819
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1820 1821 1822
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
1823

1824 1825 1826 1827 1828
    if (functionId == TSDB_FUNC_SUM_RATE || functionId == TSDB_FUNC_SUM_IRATE || functionId == TSDB_FUNC_AVG_RATE ||
        functionId == TSDB_FUNC_AVG_IRATE) {
      return true;
    }
  }
1829

1830 1831 1832
  return false;
}

H
hjxilinx 已提交
1833
static bool isFirstLastRowQuery(SQuery *pQuery) {
1834
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1835
    int32_t functionID = pQuery->pSelectExpr[i].base.functionId;
1836 1837 1838 1839
    if (functionID == TSDB_FUNC_LAST_ROW) {
      return true;
    }
  }
1840

1841 1842 1843
  return false;
}

H
hjxilinx 已提交
1844
static bool needReverseScan(SQuery *pQuery) {
1845
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1846
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1847 1848 1849
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG) {
      continue;
    }
1850

1851
    if ((functionId == TSDB_FUNC_FIRST || functionId == TSDB_FUNC_FIRST_DST) && !QUERY_IS_ASC_QUERY(pQuery)) {
1852 1853
      return true;
    }
1854 1855

    if (functionId == TSDB_FUNC_LAST || functionId == TSDB_FUNC_LAST_DST) {
H
Haojun Liao 已提交
1856
      // the scan order to acquire the last result of the specified column
S
TD-1057  
Shengliang Guan 已提交
1857
      int32_t order = (int32_t)pQuery->pSelectExpr[i].base.arg->argValue.i64;
H
Haojun Liao 已提交
1858 1859 1860
      if (order != pQuery->order.order) {
        return true;
      }
1861
    }
1862
  }
1863

1864 1865
  return false;
}
H
hjxilinx 已提交
1866

H
Haojun Liao 已提交
1867 1868 1869 1870
/**
 * The following 4 kinds of query are treated as the tags query
 * tagprj, tid_tag query, count(tbname), 'abc' (user defined constant value column) query
 */
H
hjxilinx 已提交
1871 1872
static bool onlyQueryTags(SQuery* pQuery) {
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1873 1874 1875
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];

    int32_t functionId = pExprInfo->base.functionId;
H
Haojun Liao 已提交
1876 1877 1878 1879

    if (functionId != TSDB_FUNC_TAGPRJ &&
        functionId != TSDB_FUNC_TID_TAG &&
        (!(functionId == TSDB_FUNC_COUNT && pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX)) &&
Y
TD-1230  
yihaoDeng 已提交
1880
        (!(functionId == TSDB_FUNC_PRJ && TSDB_COL_IS_UD_COL(pExprInfo->base.colInfo.flag)))) {
H
hjxilinx 已提交
1881 1882 1883
      return false;
    }
  }
1884

H
hjxilinx 已提交
1885 1886 1887
  return true;
}

1888 1889
/////////////////////////////////////////////////////////////////////////////////////////////

H
Haojun Liao 已提交
1890
void getAlignQueryTimeWindow(SQuery *pQuery, int64_t key, int64_t keyFirst, int64_t keyLast, STimeWindow *win) {
1891 1892
  assert(key >= keyFirst && key <= keyLast && pQuery->interval.sliding <= pQuery->interval.interval);
  win->skey = taosTimeTruncate(key, &pQuery->interval, pQuery->precision);
H
Haojun Liao 已提交
1893 1894

  /*
1895
   * if the realSkey > INT64_MAX - pQuery->interval.interval, the query duration between
H
Haojun Liao 已提交
1896 1897
   * realSkey and realEkey must be less than one interval.Therefore, no need to adjust the query ranges.
   */
1898 1899
  if (keyFirst > (INT64_MAX - pQuery->interval.interval)) {
    assert(keyLast - keyFirst < pQuery->interval.interval);
1900
    win->ekey = INT64_MAX;
1901 1902
  } else if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
    win->ekey = taosTimeAdd(win->skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
H
Haojun Liao 已提交
1903
  } else {
1904
    win->ekey = win->skey + pQuery->interval.interval - 1;
1905 1906 1907 1908 1909
  }
}

static void setScanLimitationByResultBuffer(SQuery *pQuery) {
  if (isTopBottomQuery(pQuery)) {
1910
    pQuery->checkBuffer = 0;
1911
  } else if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
1912
    pQuery->checkBuffer = 0;
1913 1914
  } else {
    bool hasMultioutput = false;
1915
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1916
      SSqlFuncMsg *pExprMsg = &pQuery->pSelectExpr[i].base;
1917 1918 1919
      if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        continue;
      }
1920

1921 1922 1923 1924 1925
      hasMultioutput = IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus);
      if (!hasMultioutput) {
        break;
      }
    }
1926

1927
    pQuery->checkBuffer = hasMultioutput ? 1 : 0;
1928 1929 1930 1931 1932 1933
  }
}

/*
 * todo add more parameters to check soon..
 */
1934
bool colIdCheck(SQuery *pQuery) {
1935 1936
  // load data column information is incorrect
  for (int32_t i = 0; i < pQuery->numOfCols - 1; ++i) {
1937
    if (pQuery->colList[i].colId == pQuery->colList[i + 1].colId) {
S
slguan 已提交
1938
      qError("QInfo:%p invalid data load column for query", GET_QINFO_ADDR(pQuery));
1939 1940 1941
      return false;
    }
  }
1942

1943 1944 1945 1946 1947 1948
  return true;
}

// todo ignore the avg/sum/min/max/count/stddev/top/bottom functions, of which
// the scan order is not matter
static bool onlyOneQueryType(SQuery *pQuery, int32_t functId, int32_t functIdDst) {
1949
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1950
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1951

1952 1953 1954 1955
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG ||
        functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
1956

1957 1958 1959 1960
    if (functionId != functId && functionId != functIdDst) {
      return false;
    }
  }
1961

1962 1963 1964 1965 1966 1967 1968
  return true;
}

static bool onlyFirstQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_FIRST, TSDB_FUNC_FIRST_DST); }

static bool onlyLastQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_LAST, TSDB_FUNC_LAST_DST); }

H
Haojun Liao 已提交
1969
// todo refactor, add iterator
1970 1971
static void doExchangeTimeWindow(SQInfo* pQInfo, STimeWindow* win) {
  size_t t = taosArrayGetSize(pQInfo->tableGroupInfo.pGroupList);
H
Haojun Liao 已提交
1972
  for(int32_t i = 0; i < t; ++i) {
1973
    SArray* p1 = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, i);
H
Haojun Liao 已提交
1974 1975 1976

    size_t len = taosArrayGetSize(p1);
    for(int32_t j = 0; j < len; ++j) {
1977
      STableKeyInfo* pInfo = taosArrayGet(p1, j);
H
Haojun Liao 已提交
1978

1979 1980 1981 1982
      // update the new lastkey if it is equalled to the value of the old skey
      if (pInfo->lastKey == win->ekey) {
        pInfo->lastKey = win->skey;
      }
H
Haojun Liao 已提交
1983 1984 1985 1986
    }
  }
}

1987
static void changeExecuteScanOrder(SQInfo *pQInfo, SQueryTableMsg* pQueryMsg, bool stableQuery) {
H
Haojun Liao 已提交
1988 1989
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;

1990 1991 1992
  // in case of point-interpolation query, use asc order scan
  char msg[] = "QInfo:%p scan order changed for %s query, old:%d, new:%d, qrange exchanged, old qrange:%" PRId64
               "-%" PRId64 ", new qrange:%" PRId64 "-%" PRId64;
1993

1994 1995
  // todo handle the case the the order irrelevant query type mixed up with order critical query type
  // descending order query for last_row query
H
Haojun Liao 已提交
1996
  if (isFirstLastRowQuery(pQuery)) {
H
Haojun Liao 已提交
1997
    qDebug("QInfo:%p scan order changed for last_row query, old:%d, new:%d", pQInfo, pQuery->order.order, TSDB_ORDER_ASC);
1998

H
Haojun Liao 已提交
1999
    pQuery->order.order = TSDB_ORDER_ASC;
H
Haojun Liao 已提交
2000 2001 2002
    if (pQuery->window.skey > pQuery->window.ekey) {
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
2003

2004 2005
    return;
  }
2006

H
Haojun Liao 已提交
2007
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr) && pQuery->order.order == TSDB_ORDER_DESC) {
H
Haojun Liao 已提交
2008
    pQuery->order.order = TSDB_ORDER_ASC;
H
Haojun Liao 已提交
2009 2010 2011
    if (pQuery->window.skey > pQuery->window.ekey) {
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
H
Haojun Liao 已提交
2012

2013
    doExchangeTimeWindow(pQInfo, &pQuery->window);
H
Haojun Liao 已提交
2014 2015 2016
    return;
  }

2017
  if (isPointInterpoQuery(pQuery) && pQuery->interval.interval == 0) {
H
Haojun Liao 已提交
2018 2019 2020 2021 2022
    if (!QUERY_IS_ASC_QUERY(pQuery)) {
      qDebug(msg, GET_QINFO_ADDR(pQuery), "interp", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
             pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
2023

2024
    pQuery->order.order = TSDB_ORDER_ASC;
2025 2026
    return;
  }
2027

2028
  if (pQuery->interval.interval == 0) {
2029 2030
    if (onlyFirstQuery(pQuery)) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
2031
        qDebug(msg, GET_QINFO_ADDR(pQuery), "only-first", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
2032 2033
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2034
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2035
        doExchangeTimeWindow(pQInfo, &pQuery->window);
2036
      }
2037

2038
      pQuery->order.order = TSDB_ORDER_ASC;
2039 2040
    } else if (onlyLastQuery(pQuery)) {
      if (QUERY_IS_ASC_QUERY(pQuery)) {
2041
        qDebug(msg, GET_QINFO_ADDR(pQuery), "only-last", pQuery->order.order, TSDB_ORDER_DESC, pQuery->window.skey,
2042 2043
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2044
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2045
        doExchangeTimeWindow(pQInfo, &pQuery->window);
2046
      }
2047

2048
      pQuery->order.order = TSDB_ORDER_DESC;
2049
    }
2050

2051
  } else {  // interval query
2052
    if (stableQuery) {
2053 2054
      if (onlyFirstQuery(pQuery)) {
        if (!QUERY_IS_ASC_QUERY(pQuery)) {
2055
          qDebug(msg, GET_QINFO_ADDR(pQuery), "only-first stable", pQuery->order.order, TSDB_ORDER_ASC,
2056 2057
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2058
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2059
          doExchangeTimeWindow(pQInfo, &pQuery->window);
2060
        }
2061

2062
        pQuery->order.order = TSDB_ORDER_ASC;
2063 2064
      } else if (onlyLastQuery(pQuery)) {
        if (QUERY_IS_ASC_QUERY(pQuery)) {
2065
          qDebug(msg, GET_QINFO_ADDR(pQuery), "only-last stable", pQuery->order.order, TSDB_ORDER_DESC,
2066 2067
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2068
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2069
          doExchangeTimeWindow(pQInfo, &pQuery->window);
2070
        }
2071

2072
        pQuery->order.order = TSDB_ORDER_DESC;
2073 2074 2075 2076 2077 2078 2079 2080
      }
    }
  }
}

static int32_t getInitialPageNum(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  int32_t INITIAL_RESULT_ROWS_VALUE = 16;
2081

2082
  int32_t num = 0;
2083

2084 2085
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
    num = 128;
H
Haojun Liao 已提交
2086
  } else if (QUERY_IS_INTERVAL_QUERY(pQuery)) {  // time window query, allocate one page for each table
2087
    size_t s = pQInfo->tableqinfoGroupInfo.numOfTables;
S
TD-1057  
Shengliang Guan 已提交
2088
    num = (int32_t)(MAX(s, INITIAL_RESULT_ROWS_VALUE));
2089 2090
  } else {    // for super table query, one page for each subset
    num = 1;  // pQInfo->pSidSet->numOfSubSet;
2091
  }
2092

2093 2094 2095 2096
  assert(num > 0);
  return num;
}

2097 2098
static void getIntermediateBufInfo(SQueryRuntimeEnv* pRuntimeEnv, int32_t* ps, int32_t* rowsize) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
2099
  int32_t MIN_ROWS_PER_PAGE = 4;
2100

S
TD-1057  
Shengliang Guan 已提交
2101
  *rowsize = (int32_t)(pQuery->rowSize * GET_ROW_PARAM_FOR_MULTIOUTPUT(pQuery, pRuntimeEnv->topBotQuery, pRuntimeEnv->stableQuery));
2102 2103 2104 2105
  int32_t overhead = sizeof(tFilePage);

  // one page contains at least two rows
  *ps = DEFAULT_INTERN_BUF_PAGE_SIZE;
H
Haojun Liao 已提交
2106
  while(((*rowsize) * MIN_ROWS_PER_PAGE) > (*ps) - overhead) {
2107 2108 2109 2110
    *ps = (*ps << 1u);
  }

  pRuntimeEnv->numOfRowsPerPage = ((*ps) - sizeof(tFilePage)) / (*rowsize);
2111
  assert(pRuntimeEnv->numOfRowsPerPage <= MAX_ROWS_PER_RESBUF_PAGE);
2112 2113
}

H
Haojun Liao 已提交
2114
#define IS_PREFILTER_TYPE(_t) ((_t) != TSDB_DATA_TYPE_BINARY && (_t) != TSDB_DATA_TYPE_NCHAR)
2115

H
Haojun Liao 已提交
2116 2117 2118 2119
static bool needToLoadDataBlock(SQueryRuntimeEnv* pRuntimeEnv, SDataStatis *pDataStatis, SQLFunctionCtx *pCtx,
    int32_t numOfRows) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
  if (pDataStatis == NULL || (pQuery->numOfFilterCols == 0 && (!pRuntimeEnv->topBotQuery))) {
2120 2121 2122 2123 2124
    return true;
  }

  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
2125

H
Haojun Liao 已提交
2126 2127 2128 2129 2130 2131 2132 2133
    int32_t index = -1;
    for(int32_t i = 0; i < pQuery->numOfCols; ++i) {
      if (pDataStatis[i].colId == pFilterInfo->info.colId) {
        index = i;
        break;
      }
    }

2134
    // no statistics data, load the true data block
H
Haojun Liao 已提交
2135
    if (index == -1) {
H
Haojun Liao 已提交
2136
      return true;
2137
    }
2138

2139
    // not support pre-filter operation on binary/nchar data type
H
Haojun Liao 已提交
2140
    if (!IS_PREFILTER_TYPE(pFilterInfo->info.type)) {
H
Haojun Liao 已提交
2141
      return true;
2142
    }
2143

2144
    // all data in current column are NULL, no need to check its boundary value
H
Haojun Liao 已提交
2145
    if (pDataStatis[index].numOfNull == numOfRows) {
2146 2147 2148 2149 2150 2151 2152 2153 2154

      // if isNULL query exists, load the null data column
      for (int32_t j = 0; j < pFilterInfo->numOfFilters; ++j) {
        SColumnFilterElem *pFilterElem = &pFilterInfo->pFilters[j];
        if (pFilterElem->fp == isNull_filter) {
          return true;
        }
      }

2155 2156
      continue;
    }
2157

H
Haojun Liao 已提交
2158 2159 2160
    SDataStatis* pDataBlockst = &pDataStatis[index];

    if (pFilterInfo->info.type == TSDB_DATA_TYPE_FLOAT) {
S
TD-1057  
Shengliang Guan 已提交
2161 2162
      float minval = (float)(*(double *)(&pDataBlockst->min));
      float maxval = (float)(*(double *)(&pDataBlockst->max));
2163

2164 2165 2166 2167 2168 2169 2170
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&minval, (char *)&maxval)) {
          return true;
        }
      }
    } else {
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
H
Haojun Liao 已提交
2171
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&pDataBlockst->min, (char *)&pDataBlockst->max)) {
2172 2173 2174 2175 2176
          return true;
        }
      }
    }
  }
2177

H
Haojun Liao 已提交
2178 2179 2180 2181 2182 2183 2184 2185
  if (pRuntimeEnv->topBotQuery) {
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
      int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
      if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
        return topbot_datablock_filter(&pCtx[i], functionId, (char *)&pDataStatis[i].min, (char *)&pDataStatis[i].max);
      }
    }
  }
2186

H
Haojun Liao 已提交
2187
  return false;
2188 2189
}

H
Haojun Liao 已提交
2190 2191 2192 2193 2194 2195 2196 2197
static bool overlapWithTimeWindow(SQuery* pQuery, SDataBlockInfo* pBlockInfo) {
  STimeWindow w = {0};

  TSKEY sk = MIN(pQuery->window.skey, pQuery->window.ekey);
  TSKEY ek = MAX(pQuery->window.skey, pQuery->window.ekey);

  if (QUERY_IS_ASC_QUERY(pQuery)) {
    getAlignQueryTimeWindow(pQuery, pBlockInfo->window.skey, sk, ek, &w);
H
Haojun Liao 已提交
2198
    assert(w.ekey >= pBlockInfo->window.skey);
H
Haojun Liao 已提交
2199

H
Haojun Liao 已提交
2200
    if (w.ekey < pBlockInfo->window.ekey) {
H
Haojun Liao 已提交
2201 2202 2203 2204 2205
      return true;
    }

    while(1) {
      GET_NEXT_TIMEWINDOW(pQuery, &w);
H
Haojun Liao 已提交
2206
      if (w.skey > pBlockInfo->window.ekey) {
H
Haojun Liao 已提交
2207 2208 2209
        break;
      }

H
Haojun Liao 已提交
2210 2211
      assert(w.ekey > pBlockInfo->window.ekey);
      if (w.skey <= pBlockInfo->window.ekey && w.skey > pBlockInfo->window.skey) {
H
Haojun Liao 已提交
2212 2213 2214 2215 2216
        return true;
      }
    }
  } else {
    getAlignQueryTimeWindow(pQuery, pBlockInfo->window.ekey, sk, ek, &w);
H
Haojun Liao 已提交
2217 2218 2219
    assert(w.skey <= pBlockInfo->window.ekey);

    if (w.skey > pBlockInfo->window.skey) {
H
Haojun Liao 已提交
2220 2221 2222 2223 2224 2225 2226 2227 2228
      return true;
    }

    while(1) {
      GET_NEXT_TIMEWINDOW(pQuery, &w);
      if (w.ekey < pBlockInfo->window.skey) {
        break;
      }

H
Haojun Liao 已提交
2229 2230
      assert(w.skey < pBlockInfo->window.skey);
      if (w.ekey < pBlockInfo->window.ekey && w.ekey >= pBlockInfo->window.skey) {
H
Haojun Liao 已提交
2231 2232 2233 2234 2235 2236 2237 2238
        return true;
      }
    }
  }

  return false;
}

H
Haojun Liao 已提交
2239
int32_t loadDataBlockOnDemand(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo * pWindowResInfo, void* pQueryHandle, SDataBlockInfo* pBlockInfo, SDataStatis **pStatis, SArray** pDataBlock, uint32_t* status) {
2240
  SQuery *pQuery = pRuntimeEnv->pQuery;
2241

H
Haojun Liao 已提交
2242 2243
  *status = BLK_DATA_NO_NEEDED;

H
Haojun Liao 已提交
2244
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf > 0) {
2245
    *status = BLK_DATA_ALL_NEEDED;
H
Haojun Liao 已提交
2246
  } else { // check if this data block is required to load
H
Haojun Liao 已提交
2247

H
Haojun Liao 已提交
2248
    // Calculate all time windows that are overlapping or contain current data block.
2249
    // If current data block is contained by all possible time window, do not load current data block.
H
Haojun Liao 已提交
2250
    if (QUERY_IS_INTERVAL_QUERY(pQuery) && overlapWithTimeWindow(pQuery, pBlockInfo)) {
2251
      *status = BLK_DATA_ALL_NEEDED;
2252
    }
2253

2254
    if ((*status) != BLK_DATA_ALL_NEEDED) {
H
Haojun Liao 已提交
2255 2256 2257 2258 2259 2260 2261 2262 2263
      // the pCtx[i] result is belonged to previous time window since the outputBuf has not been set yet,
      // the filter result may be incorrect. So in case of interval query, we need to set the correct time output buffer
      if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
        bool hasTimeWindow = false;
        bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);

        TSKEY k = QUERY_IS_ASC_QUERY(pQuery)? pBlockInfo->window.skey:pBlockInfo->window.ekey;

        STimeWindow win = getActiveTimeWindow(pWindowResInfo, k, pQuery);
H
Haojun Liao 已提交
2264
        if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pBlockInfo, &win, masterScan, &hasTimeWindow) !=
H
Haojun Liao 已提交
2265 2266 2267 2268 2269
            TSDB_CODE_SUCCESS) {
          // todo handle error in set result for timewindow
        }
      }

H
Haojun Liao 已提交
2270 2271 2272 2273 2274
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        SSqlFuncMsg* pSqlFunc = &pQuery->pSelectExpr[i].base;

        int32_t functionId = pSqlFunc->functionId;
        int32_t colId = pSqlFunc->colInfo.colId;
2275 2276
        (*status) |= aAggs[functionId].dataReqFunc(&pRuntimeEnv->pCtx[i], pBlockInfo->window.skey, pBlockInfo->window.ekey, colId);
        if (((*status) & BLK_DATA_ALL_NEEDED) == BLK_DATA_ALL_NEEDED) {
H
Haojun Liao 已提交
2277 2278 2279
          break;
        }
      }
2280 2281
    }
  }
2282

2283
  if ((*status) == BLK_DATA_NO_NEEDED) {
H
Haojun Liao 已提交
2284 2285
    qDebug("QInfo:%p data block discard, brange:%"PRId64 "-%"PRId64", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
           pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
2286
    pRuntimeEnv->summary.discardBlocks += 1;
2287 2288 2289 2290
  } else if ((*status) == BLK_DATA_STATIS_NEEDED) {

    // this function never returns error?
    tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis);
2291

2292
    pRuntimeEnv->summary.loadBlockStatis += 1;
2293

2294
    if (*pStatis == NULL) { // data block statistics does not exist, load data block
H
Haojun Liao 已提交
2295
      *pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2296
      pRuntimeEnv->summary.totalCheckedRows += pBlockInfo->rows;
2297 2298
    }
  } else {
2299
    assert((*status) == BLK_DATA_ALL_NEEDED);
2300

2301
    // load the data block statistics to perform further filter
H
Haojun Liao 已提交
2302
    pRuntimeEnv->summary.loadBlockStatis += 1;
2303
    tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis);
2304

H
Haojun Liao 已提交
2305
    if (!needToLoadDataBlock(pRuntimeEnv, *pStatis, pRuntimeEnv->pCtx, pBlockInfo->rows)) {
2306 2307
      // current block has been discard due to filter applied
      pRuntimeEnv->summary.discardBlocks += 1;
H
Haojun Liao 已提交
2308 2309
      qDebug("QInfo:%p data block discard, brange:%"PRId64 "-%"PRId64", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
          pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
2310
      (*status) = BLK_DATA_DISCARD;
2311
    }
2312

2313
    pRuntimeEnv->summary.totalCheckedRows += pBlockInfo->rows;
H
Haojun Liao 已提交
2314
    pRuntimeEnv->summary.loadBlocks += 1;
H
Haojun Liao 已提交
2315
    *pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2316 2317 2318
    if (*pDataBlock == NULL) {
      return terrno;
    }
2319
  }
2320

H
Haojun Liao 已提交
2321
  return TSDB_CODE_SUCCESS;
2322 2323
}

H
hjxilinx 已提交
2324
int32_t binarySearchForKey(char *pValue, int num, TSKEY key, int order) {
2325
  int32_t midPos = -1;
H
Haojun Liao 已提交
2326
  int32_t numOfRows;
2327

2328 2329 2330
  if (num <= 0) {
    return -1;
  }
2331

2332
  assert(order == TSDB_ORDER_ASC || order == TSDB_ORDER_DESC);
2333 2334

  TSKEY * keyList = (TSKEY *)pValue;
2335
  int32_t firstPos = 0;
2336
  int32_t lastPos = num - 1;
2337

2338
  if (order == TSDB_ORDER_DESC) {
H
hjxilinx 已提交
2339 2340 2341 2342 2343
    // find the first position which is smaller than the key
    while (1) {
      if (key >= keyList[lastPos]) return lastPos;
      if (key == keyList[firstPos]) return firstPos;
      if (key < keyList[firstPos]) return firstPos - 1;
2344

H
Haojun Liao 已提交
2345 2346
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2347

H
hjxilinx 已提交
2348 2349 2350 2351 2352 2353 2354 2355
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
2356

H
hjxilinx 已提交
2357 2358 2359 2360 2361
  } else {
    // find the first position which is bigger than the key
    while (1) {
      if (key <= keyList[firstPos]) return firstPos;
      if (key == keyList[lastPos]) return lastPos;
2362

H
hjxilinx 已提交
2363 2364 2365 2366 2367 2368 2369
      if (key > keyList[lastPos]) {
        lastPos = lastPos + 1;
        if (lastPos >= num)
          return -1;
        else
          return lastPos;
      }
2370

H
Haojun Liao 已提交
2371 2372
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2373

H
hjxilinx 已提交
2374 2375 2376 2377 2378 2379 2380 2381 2382
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
  }
2383

H
hjxilinx 已提交
2384 2385 2386
  return midPos;
}

2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399
static void ensureOutputBufferSimple(SQueryRuntimeEnv* pRuntimeEnv, int32_t capacity) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  if (capacity < pQuery->rec.capacity) {
    return;
  }

  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    int32_t bytes = pQuery->pSelectExpr[i].bytes;
    assert(bytes > 0 && capacity > 0);

    char *tmp = realloc(pQuery->sdata[i], bytes * capacity + sizeof(tFilePage));
    if (tmp == NULL) {  // todo handle the oom
H
Haojun Liao 已提交
2400
      longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
2401 2402 2403 2404 2405 2406 2407 2408
    } else {
      pQuery->sdata[i] = (tFilePage *)tmp;
    }

    // set the pCtx output buffer position
    pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data;
  }

2409
  qDebug("QInfo:%p realloc output buffer to inc output buffer from: %" PRId64 " rows to:%d rows", GET_QINFO_ADDR(pRuntimeEnv),
2410 2411 2412 2413 2414
         pQuery->rec.capacity, capacity);

  pQuery->rec.capacity = capacity;
}

2415 2416 2417
static void ensureOutputBuffer(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pBlockInfo) {
  // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
  SQuery* pQuery = pRuntimeEnv->pQuery;
2418
  if (!QUERY_IS_INTERVAL_QUERY(pQuery) && !pRuntimeEnv->groupbyNormalCol && !isFixedOutputQuery(pRuntimeEnv) && !isTSCompQuery(pQuery)) {
2419
    SResultRec *pRec = &pQuery->rec;
2420

2421
    if (pQuery->rec.capacity - pQuery->rec.rows < pBlockInfo->rows) {
S
TD-1057  
Shengliang Guan 已提交
2422 2423
      int32_t remain = (int32_t)(pRec->capacity - pRec->rows);
      int32_t newSize = (int32_t)(pRec->capacity + (pBlockInfo->rows - remain));
2424

2425 2426
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        int32_t bytes = pQuery->pSelectExpr[i].bytes;
H
Haojun Liao 已提交
2427 2428
        assert(bytes > 0 && newSize > 0);

2429 2430
        char *tmp = realloc(pQuery->sdata[i], bytes * newSize + sizeof(tFilePage));
        if (tmp == NULL) {  // todo handle the oom
H
Haojun Liao 已提交
2431
          longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
2432
        } else {
2433
          memset(tmp + sizeof(tFilePage) + bytes * pRec->rows, 0, (size_t)((newSize - pRec->rows) * bytes));
2434 2435
          pQuery->sdata[i] = (tFilePage *)tmp;
        }
H
Haojun Liao 已提交
2436

2437 2438
        // set the pCtx output buffer position
        pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data + pRec->rows * bytes;
H
Haojun Liao 已提交
2439

2440 2441 2442 2443 2444
        int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
        if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
          pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
        }
      }
H
Haojun Liao 已提交
2445

2446
      qDebug("QInfo:%p realloc output buffer, new size: %d rows, old:%" PRId64 ", remain:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv),
2447
             newSize, pRec->capacity, newSize - pRec->rows);
2448

2449 2450 2451 2452 2453
      pRec->capacity = newSize;
    }
  }
}

2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474
static void doSetInitialTimewindow(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pBlockInfo) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  if (QUERY_IS_INTERVAL_QUERY(pQuery) && pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL) {
    STimeWindow w = TSWINDOW_INITIALIZER;
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;

    if (QUERY_IS_ASC_QUERY(pQuery)) {
      getAlignQueryTimeWindow(pQuery, pBlockInfo->window.skey, pBlockInfo->window.skey, pQuery->window.ekey, &w);
      pWindowResInfo->startTime = w.skey;
      pWindowResInfo->prevSKey = w.skey;
    } else {
      // the start position of the first time window in the endpoint that spreads beyond the queried last timestamp
      getAlignQueryTimeWindow(pQuery, pBlockInfo->window.ekey, pQuery->window.ekey, pBlockInfo->window.ekey, &w);

      pWindowResInfo->startTime = pQuery->window.skey;
      pWindowResInfo->prevSKey = w.skey;
    }
  }
}

2475 2476
static int64_t doScanAllDataBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
2477
  STableQueryInfo* pTableQueryInfo = pQuery->current;
H
Haojun Liao 已提交
2478
  SQueryCostInfo*  summary  = &pRuntimeEnv->summary;
2479

2480
  qDebug("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", lastkey:%" PRId64 ", order:%d",
H
hjxilinx 已提交
2481 2482
         GET_QINFO_ADDR(pRuntimeEnv), pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, pTableQueryInfo->lastKey,
         pQuery->order.order);
2483

2484
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
H
Haojun Liao 已提交
2485
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
2486

H
Haojun Liao 已提交
2487
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
2488
  while (tsdbNextDataBlock(pQueryHandle)) {
H
Haojun Liao 已提交
2489
    summary->totalBlocks += 1;
H
Haojun Liao 已提交
2490

H
Haojun Liao 已提交
2491
    if (IS_QUERY_KILLED(GET_QINFO_ADDR(pRuntimeEnv))) {
H
Haojun Liao 已提交
2492
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
2493
    }
2494

H
Haojun Liao 已提交
2495
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
2496
    doSetInitialTimewindow(pRuntimeEnv, &blockInfo);
2497

H
hjxilinx 已提交
2498
    // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
2499
    ensureOutputBuffer(pRuntimeEnv, &blockInfo);
2500

2501
    SDataStatis *pStatis = NULL;
2502 2503
    SArray *     pDataBlock = NULL;
    uint32_t     status = 0;
2504

H
Haojun Liao 已提交
2505
    int32_t ret = loadDataBlockOnDemand(pRuntimeEnv, &pRuntimeEnv->windowResInfo, pQueryHandle, &blockInfo, &pStatis, &pDataBlock, &status);
2506
    if (ret != TSDB_CODE_SUCCESS) {
2507 2508 2509
      break;
    }

2510 2511 2512 2513 2514 2515
    if (status == BLK_DATA_DISCARD) {
      pQuery->current->lastKey =
              QUERY_IS_ASC_QUERY(pQuery) ? blockInfo.window.ekey + step : blockInfo.window.skey + step;
      continue;
    }

H
Haojun Liao 已提交
2516 2517
    // query start position can not move into tableApplyFunctionsOnBlock due to limit/offset condition
    pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : blockInfo.rows - 1;
H
hjxilinx 已提交
2518
    int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, binarySearchForKey, pDataBlock);
2519

H
Haojun Liao 已提交
2520
    summary->totalRows += blockInfo.rows;
2521
    qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
2522
           blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes, pQuery->current->lastKey);
2523

2524 2525
    // while the output buffer is full or limit/offset is applied, query may be paused here
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL | QUERY_COMPLETED)) {
H
hjxilinx 已提交
2526
      break;
2527 2528
    }
  }
2529

H
Haojun Liao 已提交
2530 2531 2532 2533
  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }

H
hjxilinx 已提交
2534
  // if the result buffer is not full, set the query complete
2535 2536 2537
  if (!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
2538

H
Haojun Liao 已提交
2539
  if (QUERY_IS_INTERVAL_QUERY(pQuery) && IS_MASTER_SCAN(pRuntimeEnv)) {
H
hjxilinx 已提交
2540
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
2541
      closeAllTimeWindow(&pRuntimeEnv->windowResInfo);
H
hjxilinx 已提交
2542
      pRuntimeEnv->windowResInfo.curIndex = pRuntimeEnv->windowResInfo.size - 1;  // point to the last time window
2543 2544 2545 2546
    } else {
      assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
    }
  }
2547

2548
  return 0;
2549 2550 2551 2552 2553 2554
}

/*
 * set tag value in SQLFunctionCtx
 * e.g.,tag information into input buffer
 */
2555
static void doSetTagValueInParam(void *tsdb, void* pTable, int32_t tagColId, tVariant *tag, int16_t type, int16_t bytes) {
H
[td-90]  
Haojun Liao 已提交
2556
  tVariantDestroy(tag);
2557

2558
  if (tagColId == TSDB_TBNAME_COLUMN_INDEX) {
2559
    char* val = tsdbGetTableName(pTable);
H
[td-90]  
Haojun Liao 已提交
2560
    assert(val != NULL);
2561

H
[td-90]  
Haojun Liao 已提交
2562
    tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), TSDB_DATA_TYPE_BINARY);
2563
  } else {
2564
    char* val = tsdbGetTableTagVal(pTable, tagColId, type, bytes);
H
[td-90]  
Haojun Liao 已提交
2565 2566 2567 2568
    if (val == NULL) {
      tag->nType = TSDB_DATA_TYPE_NULL;
      return;
    }
2569

H
hjxilinx 已提交
2570
    if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
H
Hongze Cheng 已提交
2571
      if (isNull(val, type)) {
H
Haojun Liao 已提交
2572 2573 2574 2575
        tag->nType = TSDB_DATA_TYPE_NULL;
        return;
      }

H
[td-90]  
Haojun Liao 已提交
2576
      tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), type);
H
hjxilinx 已提交
2577
    } else {
H
Haojun Liao 已提交
2578 2579 2580 2581 2582
      if (isNull(val, type)) {
        tag->nType = TSDB_DATA_TYPE_NULL;
        return;
      }

H
[td-90]  
Haojun Liao 已提交
2583
      tVariantCreateFromBinary(tag, val, bytes, type);
H
hjxilinx 已提交
2584
    }
2585
  }
2586 2587
}

2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599
static SColumnInfo* doGetTagColumnInfoById(SColumnInfo* pTagColList, int32_t numOfTags, int16_t colId) {
  assert(pTagColList != NULL && numOfTags > 0);

  for(int32_t i = 0; i < numOfTags; ++i) {
    if (pTagColList[i].colId == colId) {
      return &pTagColList[i];
    }
  }

  return NULL;
}

2600
void setTagVal(SQueryRuntimeEnv *pRuntimeEnv, void *pTable, void *tsdb) {
2601
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
2602
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
2603

H
[td-90]  
Haojun Liao 已提交
2604 2605 2606
  SExprInfo *pExprInfo = &pQuery->pSelectExpr[0];
  if (pQuery->numOfOutput == 1 && pExprInfo->base.functionId == TSDB_FUNC_TS_COMP) {
    assert(pExprInfo->base.numOfParams == 1);
H
Haojun Liao 已提交
2607

S
TD-1057  
Shengliang Guan 已提交
2608
    int16_t tagColId = (int16_t)pExprInfo->base.arg->argValue.i64;
2609
    SColumnInfo* pColInfo = doGetTagColumnInfoById(pQuery->tagColList, pQuery->numOfTags, tagColId);
H
Haojun Liao 已提交
2610

2611
    doSetTagValueInParam(tsdb, pTable, tagColId, &pRuntimeEnv->pCtx[0].tag, pColInfo->type, pColInfo->bytes);
2612 2613
  } else {
    // set tag value, by which the results are aggregated.
2614
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
H
Haojun Liao 已提交
2615
      SExprInfo* pLocalExprInfo = &pQuery->pSelectExpr[idx];
2616

2617
      // ts_comp column required the tag value for join filter
H
Haojun Liao 已提交
2618
      if (!TSDB_COL_IS_TAG(pLocalExprInfo->base.colInfo.flag)) {
2619 2620
        continue;
      }
2621

2622
      // todo use tag column index to optimize performance
2623
      doSetTagValueInParam(tsdb, pTable, pLocalExprInfo->base.colInfo.colId, &pRuntimeEnv->pCtx[idx].tag,
H
Haojun Liao 已提交
2624
                           pLocalExprInfo->type, pLocalExprInfo->bytes);
2625
    }
2626

2627
    // set the join tag for first column
H
[td-90]  
Haojun Liao 已提交
2628
    SSqlFuncMsg *pFuncMsg = &pExprInfo->base;
2629 2630
    if ((pFuncMsg->functionId == TSDB_FUNC_TS || pFuncMsg->functionId == TSDB_FUNC_PRJ) && pRuntimeEnv->pTSBuf != NULL &&
        pFuncMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
2631
      assert(pFuncMsg->numOfParams == 1);
H
Haojun Liao 已提交
2632

2633 2634
      int16_t      tagColId = (int16_t)pExprInfo->base.arg->argValue.i64;
      SColumnInfo *pColInfo = doGetTagColumnInfoById(pQuery->tagColList, pQuery->numOfTags, tagColId);
H
Haojun Liao 已提交
2635

2636
      doSetTagValueInParam(tsdb, pTable, tagColId, &pRuntimeEnv->pCtx[0].tag, pColInfo->type, pColInfo->bytes);
H
Haojun Liao 已提交
2637

2638 2639 2640 2641 2642 2643 2644 2645
      int16_t tagType = pRuntimeEnv->pCtx[0].tag.nType;
      if (tagType == TSDB_DATA_TYPE_BINARY || tagType == TSDB_DATA_TYPE_NCHAR) {
        qDebug("QInfo:%p set tag value for join comparison, colId:%" PRId64 ", val:%s", pQInfo,
               pExprInfo->base.arg->argValue.i64, pRuntimeEnv->pCtx[0].tag.pz);
      } else {
        qDebug("QInfo:%p set tag value for join comparison, colId:%" PRId64 ", val:%" PRId64, pQInfo,
               pExprInfo->base.arg->argValue.i64, pRuntimeEnv->pCtx[0].tag.i64Key);
      }
2646 2647 2648 2649
    }
  }
}

H
Haojun Liao 已提交
2650
static void doMerge(SQueryRuntimeEnv *pRuntimeEnv, int64_t timestamp, SResultRow *pWindowRes, bool mergeFlag) {
2651 2652
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
2653

2654
  tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes->pageId);
H
Haojun Liao 已提交
2655

2656
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2657
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2658 2659 2660
    if (!mergeFlag) {
      pCtx[i].aOutputBuf = pCtx[i].aOutputBuf + pCtx[i].outputBytes;
      pCtx[i].currentStage = FIRST_STAGE_MERGE;
2661

2662
      RESET_RESULT_INFO(pCtx[i].resultInfo);
2663 2664
      aAggs[functionId].init(&pCtx[i]);
    }
2665

2666 2667
    pCtx[i].hasNull = true;
    pCtx[i].nStartQueryTimestamp = timestamp;
H
Haojun Liao 已提交
2668
    pCtx[i].aInputElemBuf = getPosInResultPage(pRuntimeEnv, i, pWindowRes, page);
2669

2670 2671 2672
    // in case of tag column, the tag information should be extracted from input buffer
    if (functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TAG) {
      tVariantDestroy(&pCtx[i].tag);
2673

2674 2675 2676 2677 2678 2679
      int32_t type = pCtx[i].outputType;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        tVariantCreateFromBinary(&pCtx[i].tag, varDataVal(pCtx[i].aInputElemBuf), varDataLen(pCtx[i].aInputElemBuf), type);
      } else {
        tVariantCreateFromBinary(&pCtx[i].tag, pCtx[i].aInputElemBuf, pCtx[i].inputBytes, pCtx[i].inputType);
      }
2680

2681 2682
    }
  }
2683

2684
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2685
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2686 2687 2688
    if (functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
2689

2690 2691 2692 2693
    aAggs[functionId].distMergeFunc(&pCtx[i]);
  }
}

2694
static UNUSED_FUNC void printBinaryData(int32_t functionId, char *data, int32_t srcDataType) {
2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_LAST_DST) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_BINARY:
        printf("%" PRId64 ",%s\t", *(TSKEY *)data, (data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int8_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int16_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 ",%" PRId64 "\t", *(TSKEY *)data, *(TSKEY *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int32_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%" PRId64 ",%f\t", *(TSKEY *)data, *(float *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%" PRId64 ",%lf\t", *(TSKEY *)data, *(double *)(data + TSDB_KEYSIZE + 1));
        break;
    }
  } else if (functionId == TSDB_FUNC_AVG) {
    printf("%lf,%d\t", *(double *)data, *(int32_t *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_SPREAD) {
    printf("%lf,%lf\t", *(double *)data, *(double *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_TWA) {
    data += 1;
    printf("%lf,%" PRId64 ",%" PRId64 ",%" PRId64 "\t", *(double *)data, *(int64_t *)(data + 8),
           *(int64_t *)(data + 16), *(int64_t *)(data + 24));
  } else if (functionId == TSDB_FUNC_MIN || functionId == TSDB_FUNC_MAX) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%d\t", *(int8_t *)data);
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%d\t", *(int16_t *)data);
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 "\t", *(int64_t *)data);
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%d\t", *(int *)data);
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%f\t", *(float *)data);
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%f\t", *(float *)data);
        break;
    }
  } else if (functionId == TSDB_FUNC_SUM) {
    if (srcDataType == TSDB_DATA_TYPE_FLOAT || srcDataType == TSDB_DATA_TYPE_DOUBLE) {
      printf("%lf\t", *(float *)data);
    } else {
      printf("%" PRId64 "\t", *(int64_t *)data);
    }
  } else {
    printf("%s\t", data);
  }
}

2763
void UNUSED_FUNC displayInterResult(tFilePage **pdata, SQueryRuntimeEnv* pRuntimeEnv, int32_t numOfRows) {
2764
  SQuery* pQuery = pRuntimeEnv->pQuery;
2765
  int32_t numOfCols = pQuery->numOfOutput;
2766
  printf("super table query intermediate result, total:%d\n", numOfRows);
2767

2768 2769
  for (int32_t j = 0; j < numOfRows; ++j) {
    for (int32_t i = 0; i < numOfCols; ++i) {
2770

2771
      switch (pQuery->pSelectExpr[i].type) {
2772
        case TSDB_DATA_TYPE_BINARY: {
2773
          int32_t type = pQuery->pSelectExpr[i].type;
2774
          printBinaryData(pQuery->pSelectExpr[i].base.functionId, pdata[i]->data + pQuery->pSelectExpr[i].bytes * j,
2775 2776 2777 2778 2779
                          type);
          break;
        }
        case TSDB_DATA_TYPE_TIMESTAMP:
        case TSDB_DATA_TYPE_BIGINT:
2780
          printf("%" PRId64 "\t", *(int64_t *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2781 2782
          break;
        case TSDB_DATA_TYPE_INT:
2783
          printf("%d\t", *(int32_t *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2784 2785
          break;
        case TSDB_DATA_TYPE_FLOAT:
2786
          printf("%f\t", *(float *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2787 2788
          break;
        case TSDB_DATA_TYPE_DOUBLE:
2789
          printf("%lf\t", *(double *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2790 2791 2792 2793 2794 2795 2796 2797
          break;
      }
    }
    printf("\n");
  }
}

typedef struct SCompSupporter {
H
hjxilinx 已提交
2798 2799 2800
  STableQueryInfo **pTableQueryInfo;
  int32_t *         position;
  SQInfo *          pQInfo;
2801 2802 2803 2804 2805
} SCompSupporter;

int32_t tableResultComparFn(const void *pLeft, const void *pRight, void *param) {
  int32_t left = *(int32_t *)pLeft;
  int32_t right = *(int32_t *)pRight;
2806

2807 2808
  SCompSupporter *  supporter = (SCompSupporter *)param;
  SQueryRuntimeEnv *pRuntimeEnv = &supporter->pQInfo->runtimeEnv;
2809

2810 2811
  int32_t leftPos = supporter->position[left];
  int32_t rightPos = supporter->position[right];
2812

2813 2814 2815 2816
  /* left source is exhausted */
  if (leftPos == -1) {
    return 1;
  }
2817

2818 2819 2820 2821
  /* right source is exhausted*/
  if (rightPos == -1) {
    return -1;
  }
2822

H
hjxilinx 已提交
2823
  SWindowResInfo *pWindowResInfo1 = &supporter->pTableQueryInfo[left]->windowResInfo;
2824
  SResultRow * pWindowRes1 = getResultRow(pWindowResInfo1, leftPos);
2825
  tFilePage *page1 = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes1->pageId);
2826

H
Haojun Liao 已提交
2827
  char *b1 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes1, page1);
2828
  TSKEY leftTimestamp = GET_INT64_VAL(b1);
2829

H
hjxilinx 已提交
2830
  SWindowResInfo *pWindowResInfo2 = &supporter->pTableQueryInfo[right]->windowResInfo;
2831
  SResultRow * pWindowRes2 = getResultRow(pWindowResInfo2, rightPos);
2832
  tFilePage *page2 = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes2->pageId);
2833

H
Haojun Liao 已提交
2834
  char *b2 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes2, page2);
2835
  TSKEY rightTimestamp = GET_INT64_VAL(b2);
2836

2837 2838 2839
  if (leftTimestamp == rightTimestamp) {
    return 0;
  }
2840

2841 2842 2843
  return leftTimestamp > rightTimestamp ? 1 : -1;
}

2844
int32_t mergeIntoGroupResult(SQInfo *pQInfo) {
H
Haojun Liao 已提交
2845
  int64_t st = taosGetTimestampUs();
2846
  int32_t ret = TSDB_CODE_SUCCESS;
2847

S
TD-1057  
Shengliang Guan 已提交
2848
  int32_t numOfGroups = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
2849

2850
  while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
2851
    SArray *group = GET_TABLEGROUP(pQInfo, pQInfo->groupIndex);
2852
    ret = mergeIntoGroupResultImpl(pQInfo, group);
2853 2854 2855 2856
    if (ret < 0) {  // not enough disk space to save the data into disk
      return -1;
    }

2857
    pQInfo->groupIndex += 1;
2858 2859

    // this group generates at least one result, return results
2860 2861 2862
    if (ret > 0) {
      break;
    }
2863

H
Haojun Liao 已提交
2864
    assert(pQInfo->groupResInfo.numOfDataPages == 0);
2865
    qDebug("QInfo:%p no result in group %d, continue", pQInfo, pQInfo->groupIndex - 1);
2866
  }
2867

H
Haojun Liao 已提交
2868
  SGroupResInfo* info = &pQInfo->groupResInfo;
2869
  if (pQInfo->groupIndex == numOfGroups && info->pageId == info->numOfDataPages) {
H
Haojun Liao 已提交
2870 2871 2872
    SET_STABLE_QUERY_OVER(pQInfo);
  }

H
Haojun Liao 已提交
2873 2874 2875
  int64_t elapsedTime = taosGetTimestampUs() - st;
  qDebug("QInfo:%p merge res data into group, index:%d, total group:%d, elapsed time:%" PRId64 "us", pQInfo,
         pQInfo->groupIndex - 1, numOfGroups, elapsedTime);
2876

H
Haojun Liao 已提交
2877
  pQInfo->runtimeEnv.summary.firstStageMergeTime += elapsedTime;
2878 2879 2880 2881
  return TSDB_CODE_SUCCESS;
}

void copyResToQueryResultBuf(SQInfo *pQInfo, SQuery *pQuery) {
H
Haojun Liao 已提交
2882 2883 2884
  SGroupResInfo* pGroupResInfo = &pQInfo->groupResInfo;

  // all results have been return to client, try next group
2885
  if (pGroupResInfo->pageId == pGroupResInfo->numOfDataPages) {
H
Haojun Liao 已提交
2886
    pGroupResInfo->numOfDataPages = 0;
2887 2888
    pGroupResInfo->pageId = 0;
    pGroupResInfo->rowId = 0;
2889

2890
    // current results of group has been sent to client, try next group
2891
    if (mergeIntoGroupResult(pQInfo) != TSDB_CODE_SUCCESS) {
2892 2893
      return;  // failed to save data in the disk
    }
2894

2895
    // check if all results has been sent to client
S
TD-1057  
Shengliang Guan 已提交
2896
    int32_t numOfGroup = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
H
Haojun Liao 已提交
2897
    if (pGroupResInfo->numOfDataPages == 0 && pQInfo->groupIndex == numOfGroup) {
H
Haojun Liao 已提交
2898
      SET_STABLE_QUERY_OVER(pQInfo);
2899 2900
      return;
    }
2901
  }
2902 2903

  SQueryRuntimeEnv *   pRuntimeEnv = &pQInfo->runtimeEnv;
2904
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
2905

H
Haojun Liao 已提交
2906 2907
  int32_t id = pQInfo->groupResInfo.groupId;
  SIDList list = getDataBufPagesIdList(pResultBuf, id);
2908

2909
  int32_t offset = 0;
H
Haojun Liao 已提交
2910 2911 2912 2913 2914 2915
  int32_t numOfCopiedRows = 0;

  size_t size = taosArrayGetSize(list);
  assert(size == pGroupResInfo->numOfDataPages);

  bool done = false;
2916
  for (int32_t j = pGroupResInfo->pageId; j < size; ++j) {
H
Haojun Liao 已提交
2917
    SPageInfo* pi = *(SPageInfo**) taosArrayGet(list, j);
H
Haojun Liao 已提交
2918 2919
    tFilePage* pData = getResBufPage(pResultBuf, pi->pageId);

2920 2921
    assert(pData->num > 0 && pData->num <= pRuntimeEnv->numOfRowsPerPage && pGroupResInfo->rowId < pData->num);
    int32_t numOfRes = (int32_t)(pData->num - pGroupResInfo->rowId);
H
Haojun Liao 已提交
2922 2923

    if (numOfRes > pQuery->rec.capacity - offset) {
2924
      numOfCopiedRows = (int32_t)(pQuery->rec.capacity - offset);
2925
      pGroupResInfo->rowId += numOfCopiedRows;
H
Haojun Liao 已提交
2926 2927
      done = true;
    } else {
2928
      numOfCopiedRows = (int32_t)pData->num;
H
Haojun Liao 已提交
2929

2930 2931
      pGroupResInfo->pageId += 1;
      pGroupResInfo->rowId = 0;
H
Haojun Liao 已提交
2932
    }
2933

2934
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2935
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
2936
      char *  pDest = pQuery->sdata[i]->data;
2937

H
Haojun Liao 已提交
2938 2939
      memcpy(pDest + offset * bytes, pData->data + pRuntimeEnv->offset[i] * pRuntimeEnv->numOfRowsPerPage,
          (size_t)bytes * numOfCopiedRows);
2940
    }
2941

H
Haojun Liao 已提交
2942 2943 2944 2945
    offset += numOfCopiedRows;
    if (done) {
      break;
    }
2946
  }
2947

2948
  assert(pQuery->rec.rows == 0);
H
Haojun Liao 已提交
2949
  pQuery->rec.rows += offset;
2950 2951
}

2952 2953 2954
int64_t getNumOfResultWindowRes(SQueryRuntimeEnv* pRuntimeEnv, SResultRow *pResultRow) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

2955
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
2956
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
2957

2958 2959 2960 2961 2962 2963 2964
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
      continue;
    }
2965

2966
    SResultRowCellInfo *pResultInfo = getResultCell(pRuntimeEnv, pResultRow, j);
H
Haojun Liao 已提交
2967
    assert(pResultInfo != NULL);
2968

H
Haojun Liao 已提交
2969 2970
    if (pResultInfo->numOfRes > 0) {
      return pResultInfo->numOfRes;
2971 2972
    }
  }
2973

H
Haojun Liao 已提交
2974
  return 0;
2975 2976
}

2977
int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *pGroup) {
2978
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
2979
  SQuery *          pQuery = pRuntimeEnv->pQuery;
2980

2981
  size_t size = taosArrayGetSize(pGroup);
2982
  tFilePage **buffer = pQuery->sdata;
2983

H
Haojun Liao 已提交
2984
  int32_t *posList = calloc(size, sizeof(int32_t));
H
hjxilinx 已提交
2985
  STableQueryInfo **pTableList = malloc(POINTER_BYTES * size);
2986

2987
  if (pTableList == NULL || posList == NULL) {
S
TD-1848  
Shengliang Guan 已提交
2988 2989
    tfree(posList);
    tfree(pTableList);
2990 2991

    qError("QInfo:%p failed alloc memory", pQInfo);
H
Haojun Liao 已提交
2992
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
2993 2994
  }

2995
  // todo opt for the case of one table per group
2996
  int32_t numOfTables = 0;
H
Haojun Liao 已提交
2997 2998 2999
  SIDList pageList = NULL;
  int32_t tid = -1;

3000
  for (int32_t i = 0; i < size; ++i) {
3001
    STableQueryInfo *item = taosArrayGetP(pGroup, i);
3002

H
Haojun Liao 已提交
3003
    SIDList list = getDataBufPagesIdList(pRuntimeEnv->pResultBuf, TSDB_TABLEID(item->pTable)->tid);
H
Haojun Liao 已提交
3004
    if (taosArrayGetSize(list) > 0 && item->windowResInfo.size > 0) {
H
Haojun Liao 已提交
3005
      pTableList[numOfTables++] = item;
3006 3007
      tid = TSDB_TABLEID(item->pTable)->tid;
      pageList = list;
3008 3009
    }
  }
3010

H
Haojun Liao 已提交
3011
  // there is no data in current group
3012
  if (numOfTables == 0) {
S
TD-1848  
Shengliang Guan 已提交
3013 3014
    tfree(posList);
    tfree(pTableList);
3015
    return 0;
H
Haojun Liao 已提交
3016
  } else if (numOfTables == 1) { // no need to merge results since only one table in each group
S
TD-1848  
Shengliang Guan 已提交
3017 3018
    tfree(posList);
    tfree(pTableList);
H
Haojun Liao 已提交
3019 3020 3021

    SGroupResInfo* pGroupResInfo = &pQInfo->groupResInfo;

3022
    pGroupResInfo->numOfDataPages = (int32_t)taosArrayGetSize(pageList);
H
Haojun Liao 已提交
3023
    pGroupResInfo->groupId = tid;
3024 3025
    pGroupResInfo->pageId = 0;
    pGroupResInfo->rowId = 0;
H
Haojun Liao 已提交
3026 3027

    return pGroupResInfo->numOfDataPages;
3028
  }
3029

3030
  SCompSupporter cs = {pTableList, posList, pQInfo};
3031

3032
  SLoserTreeInfo *pTree = NULL;
3033
  tLoserTreeCreate(&pTree, numOfTables, &cs, tableResultComparFn);
3034

3035
  SResultRow* pRow = getNewResultRow(pRuntimeEnv->pool);
H
Haojun Liao 已提交
3036
  resetMergeResultBuf(pRuntimeEnv, pRuntimeEnv->pCtx, pRow);
3037

H
Haojun Liao 已提交
3038 3039
  pQInfo->groupResInfo.groupId = getGroupResultId(pQInfo->groupIndex);

H
Haojun Liao 已提交
3040
  // todo add windowRes iterator
3041 3042
  int64_t lastTimestamp = -1;
  int64_t startt = taosGetTimestampMs();
3043

3044
  while (1) {
3045 3046
    if (IS_QUERY_KILLED(pQInfo)) {
      qDebug("QInfo:%p it is already killed, abort", pQInfo);
H
Haojun Liao 已提交
3047

S
TD-1848  
Shengliang Guan 已提交
3048 3049 3050
      tfree(pTableList);
      tfree(posList);
      tfree(pTree);
3051 3052 3053
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
    }

3054
    int32_t pos = pTree->pNode[0].index;
3055

H
hjxilinx 已提交
3056
    SWindowResInfo *pWindowResInfo = &pTableList[pos]->windowResInfo;
3057
    SResultRow  *pWindowRes = getResultRow(pWindowResInfo, cs.position[pos]);
3058
    tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes->pageId);
3059

H
Haojun Liao 已提交
3060
    char *b = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes, page);
3061
    TSKEY ts = GET_INT64_VAL(b);
3062

3063
    assert(ts == pWindowRes->win.skey);
3064
    int64_t num = getNumOfResultWindowRes(pRuntimeEnv, pWindowRes);
3065 3066
    if (num <= 0) {
      cs.position[pos] += 1;
3067

3068 3069
      if (cs.position[pos] >= pWindowResInfo->size) {
        cs.position[pos] = -1;
3070

3071
        // all input sources are exhausted
3072
        if (--numOfTables == 0) {
3073 3074 3075 3076 3077 3078 3079
          break;
        }
      }
    } else {
      if (ts == lastTimestamp) {  // merge with the last one
        doMerge(pRuntimeEnv, ts, pWindowRes, true);
      } else {  // copy data to disk buffer
3080
        if (buffer[0]->num == pQuery->rec.capacity) {
H
Haojun Liao 已提交
3081
          if (flushFromResultBuf(pRuntimeEnv, &pQInfo->groupResInfo) != TSDB_CODE_SUCCESS) {
3082 3083
            return -1;
          }
3084

H
Haojun Liao 已提交
3085
          resetMergeResultBuf(pRuntimeEnv, pRuntimeEnv->pCtx, pRow);
3086
        }
3087

3088
        doMerge(pRuntimeEnv, ts, pWindowRes, false);
3089
        buffer[0]->num += 1;
3090
      }
3091

3092
      lastTimestamp = ts;
3093

H
Haojun Liao 已提交
3094
      // move to the next element of current entry
3095
      int32_t currentPageId = pWindowRes->pageId;
H
Haojun Liao 已提交
3096

3097 3098 3099
      cs.position[pos] += 1;
      if (cs.position[pos] >= pWindowResInfo->size) {
        cs.position[pos] = -1;
3100

3101
        // all input sources are exhausted
3102
        if (--numOfTables == 0) {
3103 3104
          break;
        }
H
Haojun Liao 已提交
3105 3106
      } else {
        // current page is not needed anymore
3107
        SResultRow  *pNextWindowRes = getResultRow(pWindowResInfo, cs.position[pos]);
3108
        if (pNextWindowRes->pageId != currentPageId) {
H
Haojun Liao 已提交
3109 3110
          releaseResBufPage(pRuntimeEnv->pResultBuf, page);
        }
3111 3112
      }
    }
3113

3114 3115
    tLoserTreeAdjust(pTree, pos + pTree->numOfEntries);
  }
3116

3117
  if (buffer[0]->num != 0) {  // there are data in buffer
H
Haojun Liao 已提交
3118
    if (flushFromResultBuf(pRuntimeEnv, &pQInfo->groupResInfo) != TSDB_CODE_SUCCESS) {
S
slguan 已提交
3119
      qError("QInfo:%p failed to flush data into temp file, abort query", pQInfo);
3120

S
TD-1848  
Shengliang Guan 已提交
3121 3122 3123
      tfree(pTree);
      tfree(pTableList);
      tfree(posList);
3124 3125 3126
      return -1;
    }
  }
3127

3128 3129 3130
  int64_t endt = taosGetTimestampMs();

#ifdef _DEBUG_VIEW
3131
  displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
3132
#endif
3133

3134
  qDebug("QInfo:%p result merge completed for group:%d, elapsed time:%" PRId64 " ms", pQInfo, pQInfo->groupIndex, endt - startt);
3135

S
TD-1848  
Shengliang Guan 已提交
3136 3137 3138
  tfree(pTableList);
  tfree(posList);
  tfree(pTree);
3139

S
TD-1848  
Shengliang Guan 已提交
3140 3141
//  tfree(pResultInfo);
//  tfree(buf);
H
Haojun Liao 已提交
3142 3143

  return pQInfo->groupResInfo.numOfDataPages;
3144 3145
}

H
Haojun Liao 已提交
3146 3147
int32_t flushFromResultBuf(SQueryRuntimeEnv* pRuntimeEnv, SGroupResInfo* pGroupResInfo) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3148

3149
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
3150

3151 3152
  // the base value for group result, since the maximum number of table for each vnode will not exceed 100,000.
  int32_t pageId = -1;
3153
  int32_t capacity = pResultBuf->numOfRowsPerPage;
3154

H
Haojun Liao 已提交
3155
  int32_t remain = (int32_t) pQuery->sdata[0]->num;
3156
  int32_t offset = 0;
3157

3158
  while (remain > 0) {
H
Haojun Liao 已提交
3159 3160
    int32_t rows = (remain > capacity)? capacity:remain;
    assert(rows > 0);
3161

H
Haojun Liao 已提交
3162 3163 3164
    // get the output buffer page
    tFilePage *buf = getNewDataBuf(pResultBuf, pGroupResInfo->groupId, &pageId);
    buf->num = rows;
3165

H
Haojun Liao 已提交
3166
    // pagewisely copy to dest buffer
3167
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3168
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
3169

H
Haojun Liao 已提交
3170 3171
      char* output = buf->data + pRuntimeEnv->offset[i] * pRuntimeEnv->numOfRowsPerPage;
      char* src = ((char *) pQuery->sdata[i]->data) + offset * bytes;
3172
      memcpy(output, src, (size_t)(buf->num * bytes));
3173
    }
3174

H
Haojun Liao 已提交
3175 3176 3177 3178
    offset += rows;
    remain -= rows;

    pGroupResInfo->numOfDataPages += 1;
3179
  }
3180

3181 3182 3183
  return TSDB_CODE_SUCCESS;
}

H
Haojun Liao 已提交
3184 3185 3186
void resetMergeResultBuf(SQueryRuntimeEnv* pRuntimeEnv, SQLFunctionCtx *pCtx, SResultRow *pRow) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

3187
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
3188
    pCtx[k].aOutputBuf = pQuery->sdata[k]->data - pCtx[k].outputBytes;
3189 3190
    pCtx[k].size = 1;
    pCtx[k].startOffset = 0;
H
Haojun Liao 已提交
3191
    pCtx[k].resultInfo = getResultCell(pRuntimeEnv, pRow, k);
3192

3193
    pQuery->sdata[k]->num = 0;
3194 3195 3196
  }
}

3197 3198 3199 3200
static void updateTableQueryInfoForReverseScan(SQuery *pQuery, STableQueryInfo *pTableQueryInfo) {
  if (pTableQueryInfo == NULL) {
    return;
  }
3201

H
Haojun Liao 已提交
3202
  // order has changed already
3203
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
3204

H
Haojun Liao 已提交
3205 3206 3207 3208 3209 3210
  // TODO validate the assertion
//  if (!QUERY_IS_ASC_QUERY(pQuery)) {
//    assert(pTableQueryInfo->win.ekey >= pTableQueryInfo->lastKey + step);
//  } else {
//    assert(pTableQueryInfo->win.ekey <= pTableQueryInfo->lastKey + step);
//  }
H
Haojun Liao 已提交
3211 3212 3213 3214 3215 3216 3217

  if (pTableQueryInfo->lastKey == pTableQueryInfo->win.skey) {
    // do nothing, no results
  } else {
    pTableQueryInfo->win.ekey = pTableQueryInfo->lastKey + step;
  }

3218 3219
  SWAP(pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, TSKEY);
  pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
3220

3221 3222
  SWITCH_ORDER(pTableQueryInfo->cur.order);
  pTableQueryInfo->cur.vgroupIndex = -1;
H
Haojun Liao 已提交
3223 3224 3225

  // set the index at the end of time window
  pTableQueryInfo->windowResInfo.curIndex = pTableQueryInfo->windowResInfo.size - 1;
3226 3227
}

3228 3229
static void disableFuncInReverseScanImpl(SQueryRuntimeEnv* pRuntimeEnv, SWindowResInfo *pWindowResInfo, int32_t order) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
3230

3231
  for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
3232 3233
    bool closed = getTimeWindowResStatus(pWindowResInfo, i);
    if (!closed) {
3234 3235
      continue;
    }
3236

3237
    SResultRow *pRow = getResultRow(pWindowResInfo, i);
3238

3239
    // open/close the specified query for each group result
3240
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3241
      int32_t functId = pQuery->pSelectExpr[j].base.functionId;
3242
      SResultRowCellInfo* pInfo = getResultCell(pRuntimeEnv, pRow, j);
3243

3244 3245
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
3246
        pInfo->complete = false;
3247
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
3248
        pInfo->complete = true;
3249 3250 3251 3252 3253
      }
    }
  }
}

3254 3255
void disableFuncInReverseScan(SQInfo *pQInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
3256
  SQuery *pQuery = pRuntimeEnv->pQuery;
3257
  int32_t order = pQuery->order.order;
3258

3259 3260
  // group by normal columns and interval query on normal table
  SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
3261
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3262
    disableFuncInReverseScanImpl(pRuntimeEnv, pWindowResInfo, order);
3263
  } else {  // for simple result of table query,
3264
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {  // todo refactor
3265
      int32_t functId = pQuery->pSelectExpr[j].base.functionId;
3266

3267
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[j];
3268 3269 3270
      if (pCtx->resultInfo == NULL) {
        continue; // resultInfo is NULL, means no data checked in previous scan
      }
3271

3272 3273
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
3274 3275 3276 3277 3278 3279
        pCtx->resultInfo->complete = false;
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
        pCtx->resultInfo->complete = true;
      }
    }
  }
H
Haojun Liao 已提交
3280 3281 3282 3283
}

static void setupQueryRangeForReverseScan(SQInfo* pQInfo) {
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
S
Shengliang Guan 已提交
3284
  int32_t numOfGroups = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
H
Haojun Liao 已提交
3285

H
hjxilinx 已提交
3286
  for(int32_t i = 0; i < numOfGroups; ++i) {
H
Haojun Liao 已提交
3287
    SArray *group = GET_TABLEGROUP(pQInfo, i);
H
Haojun Liao 已提交
3288 3289
    SArray *tableKeyGroup = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, i);

H
hjxilinx 已提交
3290 3291
    size_t t = taosArrayGetSize(group);
    for (int32_t j = 0; j < t; ++j) {
3292 3293
      STableQueryInfo *pCheckInfo = taosArrayGetP(group, j);
      updateTableQueryInfoForReverseScan(pQuery, pCheckInfo);
H
Haojun Liao 已提交
3294

H
Haojun Liao 已提交
3295 3296
      // update the last key in tableKeyInfo list, the tableKeyInfo is used to build the tsdbQueryHandle and decide
      // the start check timestamp of tsdbQueryHandle
H
Haojun Liao 已提交
3297 3298 3299 3300
      STableKeyInfo *pTableKeyInfo = taosArrayGet(tableKeyGroup, j);
      pTableKeyInfo->lastKey = pCheckInfo->lastKey;

      assert(pCheckInfo->pTable == pTableKeyInfo->pTable);
H
hjxilinx 已提交
3301 3302
    }
  }
3303 3304
}

3305
void switchCtxOrder(SQueryRuntimeEnv *pRuntimeEnv) {
3306
  SQuery *pQuery = pRuntimeEnv->pQuery;
3307
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
3308
    SWITCH_ORDER(pRuntimeEnv->pCtx[i].order);
3309 3310 3311
  }
}

3312
int32_t initResultRow(SResultRow *pResultRow) {
H
Haojun Liao 已提交
3313
  pResultRow->pCellInfo = (SResultRowCellInfo*)((char*)pResultRow + sizeof(SResultRow));
3314 3315
  pResultRow->pageId = -1;
  pResultRow->rowId = -1;
B
Bomin Zhang 已提交
3316
  return TSDB_CODE_SUCCESS;
3317 3318 3319 3320
}

void resetCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3321
  SResultRow* pRow = pRuntimeEnv->pResultRow;
3322

3323
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3324 3325
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
    pCtx->aOutputBuf = pQuery->sdata[i]->data;
3326

3327 3328 3329 3330
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT/TAGPRJ/PRJ/TAG etc.
     */
3331 3332 3333
    SResultRowCellInfo* pCellInfo = getResultCell(pRuntimeEnv, pRow, i);
    RESET_RESULT_INFO(pCellInfo);
    pCtx->resultInfo = pCellInfo;
3334

3335
    // set the timestamp output buffer for top/bottom/diff query
3336
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3337 3338 3339
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3340

3341
    memset(pQuery->sdata[i]->data, 0, (size_t)(pQuery->pSelectExpr[i].bytes * pQuery->rec.capacity));
3342
  }
3343

3344 3345 3346 3347 3348
  initCtxOutputBuf(pRuntimeEnv);
}

void forwardCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, int64_t output) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3349

3350
  // reset the execution contexts
3351
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3352
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
3353
    assert(functionId != TSDB_FUNC_DIFF);
3354

3355 3356 3357 3358
    // set next output position
    if (IS_OUTER_FORWARD(aAggs[functionId].nStatus)) {
      pRuntimeEnv->pCtx[j].aOutputBuf += pRuntimeEnv->pCtx[j].outputBytes * output;
    }
3359

3360 3361 3362 3363 3364 3365 3366 3367
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      /*
       * NOTE: for top/bottom query, the value of first column of output (timestamp) are assigned
       * in the procedure of top/bottom routine
       * the output buffer in top/bottom routine is ptsOutputBuf, so we need to forward the output buffer
       *
       * diff function is handled in multi-output function
       */
S
TD-1057  
Shengliang Guan 已提交
3368
      pRuntimeEnv->pCtx[j].ptsOutputBuf = (char*)pRuntimeEnv->pCtx[j].ptsOutputBuf + TSDB_KEYSIZE * output;
3369
    }
3370

3371
    RESET_RESULT_INFO(pRuntimeEnv->pCtx[j].resultInfo);
3372 3373 3374 3375 3376
  }
}

void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3377

3378
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3379
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
3380
    pRuntimeEnv->pCtx[j].currentStage = 0;
3381

H
Haojun Liao 已提交
3382
    SResultRowCellInfo* pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
H
Haojun Liao 已提交
3383 3384 3385
    if (pResInfo->initialized) {
      continue;
    }
3386

3387 3388 3389 3390
    aAggs[functionId].init(&pRuntimeEnv->pCtx[j]);
  }
}

3391
void skipResults(SQueryRuntimeEnv *pRuntimeEnv) {
3392
  SQuery *pQuery = pRuntimeEnv->pQuery;
3393
  if (pQuery->rec.rows == 0 || pQuery->limit.offset == 0) {
3394 3395
    return;
  }
3396

3397
  if (pQuery->rec.rows <= pQuery->limit.offset) {
3398
    qDebug("QInfo:%p skip rows:%" PRId64 ", new offset:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), pQuery->rec.rows,
3399
        pQuery->limit.offset - pQuery->rec.rows);
3400

3401 3402
    pQuery->limit.offset -= pQuery->rec.rows;
    pQuery->rec.rows = 0;
3403

3404
    resetCtxOutputBuf(pRuntimeEnv);
3405

H
Haojun Liao 已提交
3406
    // clear the buffer full flag if exists
3407
    CLEAR_QUERY_STATUS(pQuery, QUERY_RESBUF_FULL);
3408
  } else {
3409
    int64_t numOfSkip = pQuery->limit.offset;
3410
    pQuery->rec.rows -= numOfSkip;
3411
    pQuery->limit.offset = 0;
3412

3413
    qDebug("QInfo:%p skip row:%"PRId64", new offset:%d, numOfRows remain:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), numOfSkip,
3414
           0, pQuery->rec.rows);
3415

3416
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3417
      int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3418
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
H
Haojun Liao 已提交
3419

3420
      memmove(pQuery->sdata[i]->data, (char*)pQuery->sdata[i]->data + bytes * numOfSkip, (size_t)(pQuery->rec.rows * bytes));
H
Haojun Liao 已提交
3421
      pRuntimeEnv->pCtx[i].aOutputBuf = ((char*) pQuery->sdata[i]->data) + pQuery->rec.rows * bytes;
3422

3423
      if (functionId == TSDB_FUNC_DIFF || functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
3424
        pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
3425 3426
      }
    }
3427

S
TD-1057  
Shengliang Guan 已提交
3428
    updateNumOfResult(pRuntimeEnv, (int32_t)pQuery->rec.rows);
3429 3430 3431 3432 3433 3434 3435 3436
  }
}

void setQueryStatus(SQuery *pQuery, int8_t status) {
  if (status == QUERY_NOT_COMPLETED) {
    pQuery->status = status;
  } else {
    // QUERY_NOT_COMPLETED is not compatible with any other status, so clear its position first
3437
    CLEAR_QUERY_STATUS(pQuery, QUERY_NOT_COMPLETED);
3438 3439 3440 3441 3442 3443
    pQuery->status |= status;
  }
}

bool needScanDataBlocksAgain(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3444

H
hjxilinx 已提交
3445
  bool toContinue = false;
H
Haojun Liao 已提交
3446
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3447 3448
    // for each group result, call the finalize function for each column
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
3449

3450
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
3451
      SResultRow *pResult = getResultRow(pWindowResInfo, i);
3452
      if (!pResult->closed) {
3453 3454
        continue;
      }
3455

3456
      setResultOutputBuf(pRuntimeEnv, pResult);
3457

3458
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3459
        int16_t functId = pQuery->pSelectExpr[j].base.functionId;
3460 3461 3462
        if (functId == TSDB_FUNC_TS) {
          continue;
        }
3463

3464
        aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
H
Haojun Liao 已提交
3465
        SResultRowCellInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3466

3467 3468 3469 3470
        toContinue |= (!pResInfo->complete);
      }
    }
  } else {
3471
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3472
      int16_t functId = pQuery->pSelectExpr[j].base.functionId;
3473 3474 3475
      if (functId == TSDB_FUNC_TS) {
        continue;
      }
3476

3477
      aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
H
Haojun Liao 已提交
3478
      SResultRowCellInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3479

3480 3481 3482
      toContinue |= (!pResInfo->complete);
    }
  }
3483

3484 3485 3486
  return toContinue;
}

H
Haojun Liao 已提交
3487
static SQueryStatusInfo getQueryStatusInfo(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
3488
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3489
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3490

H
Haojun Liao 已提交
3491 3492
  assert((start <= pTableQueryInfo->lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (start >= pTableQueryInfo->lastKey && !QUERY_IS_ASC_QUERY(pQuery)));
3493

3494
  SQueryStatusInfo info = {
H
hjxilinx 已提交
3495
      .status      = pQuery->status,
3496
      .windowIndex = pRuntimeEnv->windowResInfo.curIndex,
H
Haojun Liao 已提交
3497
      .lastKey     = start,
3498
  };
3499

S
TD-1057  
Shengliang Guan 已提交
3500 3501 3502 3503 3504
  TIME_WINDOW_COPY(info.w, pQuery->window);
  TIME_WINDOW_COPY(info.curWindow, pTableQueryInfo->win);

  info.curWindow.skey = start;

3505 3506 3507
  return info;
}

3508 3509 3510 3511
static void setEnvBeforeReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQInfo *pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery *pQuery = pRuntimeEnv->pQuery;

3512 3513 3514
  pStatus->cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);  // save the cursor
  if (pRuntimeEnv->pTSBuf) {
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
3515 3516
    bool ret = tsBufNextPos(pRuntimeEnv->pTSBuf);
    assert(ret);
3517
  }
3518

3519
  // reverse order time range
3520 3521 3522
  pQuery->window = pStatus->curWindow;
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);

3523
  SWITCH_ORDER(pQuery->order.order);
3524 3525 3526 3527 3528 3529 3530

  if (QUERY_IS_ASC_QUERY(pQuery)) {
    assert(pQuery->window.skey <= pQuery->window.ekey);
  } else {
    assert(pQuery->window.skey >= pQuery->window.ekey);
  }

3531
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
3532

3533
  STsdbQueryCond cond = {
H
hjxilinx 已提交
3534
      .order   = pQuery->order.order,
3535
      .colList = pQuery->colList,
3536 3537
      .numOfCols = pQuery->numOfCols,
  };
3538

S
TD-1057  
Shengliang Guan 已提交
3539 3540
  TIME_WINDOW_COPY(cond.twindow, pQuery->window);

H
Haojun Liao 已提交
3541 3542 3543 3544 3545
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
  disableFuncInReverseScan(pQInfo);
  setupQueryRangeForReverseScan(pQInfo);

3546 3547 3548 3549
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
3550

H
Haojun Liao 已提交
3551
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef);
B
Bomin Zhang 已提交
3552 3553 3554
  if (pRuntimeEnv->pSecQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
3555 3556
}

3557 3558
static void clearEnvAfterReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3559
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3560

3561 3562
  SWITCH_ORDER(pQuery->order.order);
  switchCtxOrder(pRuntimeEnv);
3563

3564 3565 3566 3567
  tsBufSetCursor(pRuntimeEnv->pTSBuf, &pStatus->cur);
  if (pRuntimeEnv->pTSBuf) {
    pRuntimeEnv->pTSBuf->cur.order = pQuery->order.order;
  }
3568

3569
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
3570

3571
  // update the pQuery->window.skey and pQuery->window.ekey to limit the scan scope of sliding query during reverse scan
H
hjxilinx 已提交
3572
  pTableQueryInfo->lastKey = pStatus->lastKey;
3573
  pQuery->status = pStatus->status;
3574

H
hjxilinx 已提交
3575
  pTableQueryInfo->win = pStatus->w;
3576
  pQuery->window = pTableQueryInfo->win;
3577 3578
}

H
Haojun Liao 已提交
3579 3580 3581 3582 3583 3584 3585
static void restoreTimeWindow(STableGroupInfo* pTableGroupInfo, STsdbQueryCond* pCond) {
  assert(pTableGroupInfo->numOfTables == 1);
  SArray* pTableKeyGroup = taosArrayGetP(pTableGroupInfo->pGroupList, 0);
  STableKeyInfo* pKeyInfo = taosArrayGet(pTableKeyGroup, 0);
  pKeyInfo->lastKey = pCond->twindow.skey;
}

3586
void scanOneTableDataBlocks(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
H
hjxilinx 已提交
3587
  SQInfo *pQInfo = (SQInfo *) GET_QINFO_ADDR(pRuntimeEnv);
3588
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3589
  STableQueryInfo *pTableQueryInfo = pQuery->current;
3590

3591
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
3592

3593
  // store the start query position
H
Haojun Liao 已提交
3594
  SQueryStatusInfo qstatus = getQueryStatusInfo(pRuntimeEnv, start);
3595

3596 3597
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
3598

3599 3600
  while (1) {
    doScanAllDataBlocks(pRuntimeEnv);
3601

3602 3603
    if (pRuntimeEnv->scanFlag == MASTER_SCAN) {
      qstatus.status = pQuery->status;
3604 3605 3606 3607 3608 3609

      // do nothing if no data blocks are found qualified during scan
      if (qstatus.lastKey != pTableQueryInfo->lastKey) {
        qstatus.curWindow.ekey = pTableQueryInfo->lastKey - step;
      }

3610
      qstatus.lastKey = pTableQueryInfo->lastKey;
3611
    }
3612

3613
    if (!needScanDataBlocksAgain(pRuntimeEnv)) {
3614
      // restore the status code and jump out of loop
3615
      if (pRuntimeEnv->scanFlag == REPEAT_SCAN) {
3616
        pQuery->status = qstatus.status;
3617
      }
3618

3619 3620
      break;
    }
3621

3622
    STsdbQueryCond cond = {
H
hjxilinx 已提交
3623
        .order   = pQuery->order.order,
3624
        .colList = pQuery->colList,
3625
        .numOfCols = pQuery->numOfCols,
3626
    };
3627

S
TD-1057  
Shengliang Guan 已提交
3628 3629
    TIME_WINDOW_COPY(cond.twindow, qstatus.curWindow);

3630 3631
    if (pRuntimeEnv->pSecQueryHandle != NULL) {
      tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
3632
    }
3633

H
Haojun Liao 已提交
3634
    restoreTimeWindow(&pQInfo->tableGroupInfo, &cond);
H
Haojun Liao 已提交
3635
    pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef);
B
Bomin Zhang 已提交
3636 3637 3638
    if (pRuntimeEnv->pSecQueryHandle == NULL) {
      longjmp(pRuntimeEnv->env, terrno);
    }
3639

3640
    pRuntimeEnv->windowResInfo.curIndex = qstatus.windowIndex;
3641 3642
    setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
    pRuntimeEnv->scanFlag = REPEAT_SCAN;
3643

3644
    qDebug("QInfo:%p start to repeat scan data blocks due to query func required, qrange:%"PRId64"-%"PRId64, pQInfo,
3645
        cond.twindow.skey, cond.twindow.ekey);
3646

3647
    // check if query is killed or not
H
Haojun Liao 已提交
3648
    if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
3649
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
3650 3651
    }
  }
3652

H
hjxilinx 已提交
3653
  if (!needReverseScan(pQuery)) {
3654 3655
    return;
  }
3656

3657
  setEnvBeforeReverseScan(pRuntimeEnv, &qstatus);
3658

3659
  // reverse scan from current position
3660
  qDebug("QInfo:%p start to reverse scan", pQInfo);
3661
  doScanAllDataBlocks(pRuntimeEnv);
3662 3663

  clearEnvAfterReverseScan(pRuntimeEnv, &qstatus);
3664 3665
}

H
hjxilinx 已提交
3666
void finalizeQueryResult(SQueryRuntimeEnv *pRuntimeEnv) {
3667
  SQuery *pQuery = pRuntimeEnv->pQuery;
3668

H
Haojun Liao 已提交
3669
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3670 3671
    // for each group result, call the finalize function for each column
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
3672
    if (pRuntimeEnv->groupbyNormalCol) {
3673 3674
      closeAllTimeWindow(pWindowResInfo);
    }
3675

3676
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
H
Haojun Liao 已提交
3677
      SResultRow *buf = pWindowResInfo->pResult[i];
3678 3679 3680
      if (!isWindowResClosed(pWindowResInfo, i)) {
        continue;
      }
3681

3682
      setResultOutputBuf(pRuntimeEnv, buf);
3683

3684
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3685
        aAggs[pQuery->pSelectExpr[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3686
      }
3687

3688 3689 3690 3691
      /*
       * set the number of output results for group by normal columns, the number of output rows usually is 1 except
       * the top and bottom query
       */
S
TD-1057  
Shengliang Guan 已提交
3692
      buf->numOfRows = (uint16_t)getNumOfResult(pRuntimeEnv);
3693
    }
3694

3695
  } else {
3696
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3697
      aAggs[pQuery->pSelectExpr[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3698 3699 3700 3701 3702
    }
  }
}

static bool hasMainOutput(SQuery *pQuery) {
3703
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3704
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3705

3706 3707 3708 3709
    if (functionId != TSDB_FUNC_TS && functionId != TSDB_FUNC_TAG && functionId != TSDB_FUNC_TAGPRJ) {
      return true;
    }
  }
3710

3711 3712 3713
  return false;
}

H
Haojun Liao 已提交
3714
static STableQueryInfo *createTableQueryInfo(SQueryRuntimeEnv *pRuntimeEnv, void* pTable, STimeWindow win, void* buf) {
H
Haojun Liao 已提交
3715
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
3716

H
Haojun Liao 已提交
3717
  STableQueryInfo *pTableQueryInfo = buf;
3718

H
hjxilinx 已提交
3719 3720
  pTableQueryInfo->win = win;
  pTableQueryInfo->lastKey = win.skey;
3721

3722
  pTableQueryInfo->pTable = pTable;
3723
  pTableQueryInfo->cur.vgroupIndex = -1;
3724

H
Haojun Liao 已提交
3725 3726
  // set more initial size of interval/groupby query
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyNormalCol) {
H
Haojun Liao 已提交
3727
    int32_t initialSize = 16;
H
Haojun Liao 已提交
3728
    int32_t initialThreshold = 100;
3729
    int32_t code = initWindowResInfo(&pTableQueryInfo->windowResInfo, initialSize, initialThreshold, TSDB_DATA_TYPE_INT);
B
Bomin Zhang 已提交
3730 3731 3732
    if (code != TSDB_CODE_SUCCESS) {
      return NULL;
    }
H
Haojun Liao 已提交
3733
  } else { // in other aggregate query, do not initialize the windowResInfo
H
Haojun Liao 已提交
3734 3735
  }

3736 3737 3738
  return pTableQueryInfo;
}

H
Haojun Liao 已提交
3739
void destroyTableQueryInfoImpl(STableQueryInfo *pTableQueryInfo) {
3740 3741 3742
  if (pTableQueryInfo == NULL) {
    return;
  }
3743

H
Haojun Liao 已提交
3744
  tVariantDestroy(&pTableQueryInfo->tag);
H
Haojun Liao 已提交
3745
  cleanupTimeWindowInfo(&pTableQueryInfo->windowResInfo);
3746 3747 3748 3749 3750
}

/**
 * set output buffer for different group
 * @param pRuntimeEnv
3751
 * @param pDataBlockInfo
3752
 */
H
Haojun Liao 已提交
3753
void setExecutionContext(SQInfo *pQInfo, int32_t groupIndex, TSKEY nextKey) {
3754
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
Haojun Liao 已提交
3755 3756 3757
  STableQueryInfo  *pTableQueryInfo = pRuntimeEnv->pQuery->current;
  SWindowResInfo   *pWindowResInfo = &pRuntimeEnv->windowResInfo;

H
Haojun Liao 已提交
3758 3759
  // lastKey needs to be updated
  pTableQueryInfo->lastKey = nextKey;
H
Haojun Liao 已提交
3760 3761 3762 3763

  if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
    setAdditionalInfo(pQInfo, pTableQueryInfo->pTable, pTableQueryInfo);
  }
H
Haojun Liao 已提交
3764

H
Haojun Liao 已提交
3765 3766 3767
  if (pRuntimeEnv->prevGroupId != INT32_MIN && pRuntimeEnv->prevGroupId == groupIndex) {
    return;
  }
3768

H
Haojun Liao 已提交
3769
  uint64_t uid = 0; // uid is always set to be 0
H
Haojun Liao 已提交
3770
  SResultRow *pResultRow = doPrepareResultRowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&groupIndex,
H
Haojun Liao 已提交
3771
      sizeof(groupIndex), true, uid);
H
Haojun Liao 已提交
3772
  if (pResultRow == NULL) {
3773 3774
    return;
  }
3775

3776 3777 3778 3779
  /*
   * not assign result buffer yet, add new result buffer
   * all group belong to one result set, and each group result has different group id so set the id to be one
   */
H
Haojun Liao 已提交
3780 3781
  if (pResultRow->pageId == -1) {
    if (addNewWindowResultBuf(pResultRow, pRuntimeEnv->pResultBuf, groupIndex, pRuntimeEnv->numOfRowsPerPage) !=
3782 3783 3784 3785
        TSDB_CODE_SUCCESS) {
      return;
    }
  }
3786

H
Haojun Liao 已提交
3787 3788
  // record the current active group id
  pRuntimeEnv->prevGroupId = groupIndex;
H
Haojun Liao 已提交
3789
  setResultOutputBuf(pRuntimeEnv, pResultRow);
3790 3791 3792
  initCtxOutputBuf(pRuntimeEnv);
}

3793
void setResultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SResultRow *pResult) {
3794
  SQuery *pQuery = pRuntimeEnv->pQuery;
3795

3796
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
3797
  tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pageId);
H
Haojun Liao 已提交
3798

3799
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3800
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
H
Haojun Liao 已提交
3801
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult, page);
3802

3803
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3804 3805 3806
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3807

3808
    /*
3809
     * set the output buffer information and intermediate buffer,
3810 3811
     * not all queries require the interResultBuf, such as COUNT
     */
3812
    pCtx->resultInfo = getResultCell(pRuntimeEnv, pResult, i);
3813 3814 3815
  }
}

H
Haojun Liao 已提交
3816
void setWindowResOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SResultRow *pResult) {
H
Haojun Liao 已提交
3817
  SQuery *pQuery = pRuntimeEnv->pQuery;
3818

H
Haojun Liao 已提交
3819
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
3820
  tFilePage* bufPage = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pageId);
H
Haojun Liao 已提交
3821

H
Haojun Liao 已提交
3822 3823 3824
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];

3825
    pCtx->resultInfo = getResultCell(pRuntimeEnv, pResult, i);
H
Haojun Liao 已提交
3826
    if (pCtx->resultInfo->initialized && pCtx->resultInfo->complete) {
H
Haojun Liao 已提交
3827 3828
      continue;
    }
3829

H
Haojun Liao 已提交
3830
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult, bufPage);
H
Haojun Liao 已提交
3831
    pCtx->currentStage = 0;
3832

H
Haojun Liao 已提交
3833 3834 3835 3836
    int32_t functionId = pCtx->functionId;
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3837

H
Haojun Liao 已提交
3838 3839 3840 3841 3842 3843
    if (!pCtx->resultInfo->initialized) {
      aAggs[functionId].init(pCtx);
    }
  }
}

3844
int32_t setAdditionalInfo(SQInfo *pQInfo, void* pTable, STableQueryInfo *pTableQueryInfo) {
3845
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
3846

3847
  setTagVal(pRuntimeEnv, pTable, pQInfo->tsdb);
3848

3849 3850
  // both the master and supplement scan needs to set the correct ts comp start position
  if (pRuntimeEnv->pTSBuf != NULL) {
H
Haojun Liao 已提交
3851 3852
    tVariant* pTag = &pRuntimeEnv->pCtx[0].tag;

H
Haojun Liao 已提交
3853 3854
    if (pTableQueryInfo->cur.vgroupIndex == -1) {
      tVariantAssign(&pTableQueryInfo->tag, pTag);
3855

H
Haojun Liao 已提交
3856 3857
      STSElem elem = tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, pQInfo->vgId, &pTableQueryInfo->tag);

H
Haojun Liao 已提交
3858
      // failed to find data with the specified tag value and vnodeId
3859
      if (!tsBufIsValidElem(&elem)) {
H
Haojun Liao 已提交
3860 3861 3862
        if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
          qError("QInfo:%p failed to find tag:%s in ts_comp", pQInfo, pTag->pz);
        } else {
H
Haojun Liao 已提交
3863
          qError("QInfo:%p failed to find tag:%" PRId64 " in ts_comp", pQInfo, pTag->i64Key);
H
Haojun Liao 已提交
3864 3865 3866 3867 3868
        }

        return false;
      }

H
Haojun Liao 已提交
3869
      // keep the cursor info of current meter
H
Haojun Liao 已提交
3870 3871
      pTableQueryInfo->cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);
      if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
H
Haojun Liao 已提交
3872
        qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex);
H
Haojun Liao 已提交
3873
      } else {
H
Haojun Liao 已提交
3874
        qDebug("QInfo:%p find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex);
H
Haojun Liao 已提交
3875 3876
      }

H
Haojun Liao 已提交
3877 3878
    } else {
      tsBufSetCursor(pRuntimeEnv->pTSBuf, &pTableQueryInfo->cur);
H
Haojun Liao 已提交
3879 3880

      if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
H
Haojun Liao 已提交
3881
        qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex);
H
Haojun Liao 已提交
3882
      } else {
H
Haojun Liao 已提交
3883
        qDebug("QInfo:%p find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex);
H
Haojun Liao 已提交
3884
      }
3885 3886
    }
  }
3887

3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899
  return 0;
}

/*
 * There are two cases to handle:
 *
 * 1. Query range is not set yet (queryRangeSet = 0). we need to set the query range info, including pQuery->lastKey,
 *    pQuery->window.skey, and pQuery->eKey.
 * 2. Query range is set and query is in progress. There may be another result with the same query ranges to be
 *    merged during merge stage. In this case, we need the pTableQueryInfo->lastResRows to decide if there
 *    is a previous result generated or not.
 */
H
hjxilinx 已提交
3900
void setIntervalQueryRange(SQInfo *pQInfo, TSKEY key) {
3901 3902
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3903
  STableQueryInfo *pTableQueryInfo = pQuery->current;
3904

3905 3906 3907
  if (pTableQueryInfo->queryRangeSet) {
    pTableQueryInfo->lastKey = key;
  } else {
3908
    pTableQueryInfo->win.skey = key;
3909
    STimeWindow win = {.skey = key, .ekey = pQuery->window.ekey};
3910

3911 3912 3913 3914 3915
    // for too small query range, no data in this interval.
    if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey < pQuery->window.skey)) ||
        (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey < pQuery->window.ekey))) {
      return;
    }
3916

3917 3918 3919 3920 3921 3922
    /**
     * In handling the both ascending and descending order super table query, we need to find the first qualified
     * timestamp of this table, and then set the first qualified start timestamp.
     * In ascending query, key is the first qualified timestamp. However, in the descending order query, additional
     * operations involve.
     */
H
Haojun Liao 已提交
3923
    STimeWindow     w = TSWINDOW_INITIALIZER;
3924
    SWindowResInfo *pWindowResInfo = &pTableQueryInfo->windowResInfo;
3925

H
Haojun Liao 已提交
3926 3927
    TSKEY sk = MIN(win.skey, win.ekey);
    TSKEY ek = MAX(win.skey, win.ekey);
H
Haojun Liao 已提交
3928
    getAlignQueryTimeWindow(pQuery, win.skey, sk, ek, &w);
3929
    pWindowResInfo->startTime = pTableQueryInfo->win.skey;  // windowSKey may be 0 in case of 1970 timestamp
3930

3931 3932
    if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
3933
        assert(win.ekey == pQuery->window.ekey);
3934
      }
3935

3936
      pWindowResInfo->prevSKey = w.skey;
3937
    }
3938

3939
    pTableQueryInfo->queryRangeSet = 1;
3940
    pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
3941 3942 3943 3944
  }
}

bool requireTimestamp(SQuery *pQuery) {
3945
  for (int32_t i = 0; i < pQuery->numOfOutput; i++) {
3946
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959
    if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_NEED_TS) != 0) {
      return true;
    }
  }
  return false;
}

bool needPrimaryTimestampCol(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo) {
  /*
   * 1. if skey or ekey locates in this block, we need to load the timestamp column to decide the precise position
   * 2. if there are top/bottom, first_dst/last_dst functions, we need to load timestamp column in any cases;
   */
  STimeWindow *w = &pDataBlockInfo->window;
H
hjxilinx 已提交
3960
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3961

H
hjxilinx 已提交
3962
  bool loadPrimaryTS = (pTableQueryInfo->lastKey >= w->skey && pTableQueryInfo->lastKey <= w->ekey) ||
3963 3964
                       (pQuery->window.ekey >= w->skey && pQuery->window.ekey <= w->ekey) || requireTimestamp(pQuery);

3965 3966 3967
  return loadPrimaryTS;
}

3968
static int32_t doCopyToSData(SQInfo *pQInfo, SWindowResInfo *pResultInfo, int32_t orderType) {
3969 3970
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
3971

3972 3973 3974
  int32_t numOfResult = 0;
  int32_t startIdx = 0;
  int32_t step = -1;
3975

3976
  qDebug("QInfo:%p start to copy data from windowResInfo to query buf", pQInfo);
3977
  int32_t totalSet = numOfClosedTimeWindow(pResultInfo);
H
Haojun Liao 已提交
3978
  SResultRow** result = pResultInfo->pResult;
3979

3980
  if (orderType == TSDB_ORDER_ASC) {
3981
    startIdx = pQInfo->groupIndex;
3982 3983
    step = 1;
  } else {  // desc order copy all data
3984
    startIdx = totalSet - pQInfo->groupIndex - 1;
3985 3986
    step = -1;
  }
3987

H
Haojun Liao 已提交
3988 3989
  SGroupResInfo* pGroupResInfo = &pQInfo->groupResInfo;

3990
  for (int32_t i = startIdx; (i < totalSet) && (i >= 0); i += step) {
3991
    if (result[i]->numOfRows == 0) {
3992
      pQInfo->groupIndex += 1;
3993
      pGroupResInfo->rowId = 0;
3994 3995
      continue;
    }
3996

3997
    int32_t numOfRowsToCopy = result[i]->numOfRows - pGroupResInfo->rowId;
3998
    int32_t oldOffset = pGroupResInfo->rowId;
3999

4000
    /*
H
Haojun Liao 已提交
4001 4002
     * current output space is not enough to accommodate all data of this page, only partial results
     * will be copied to SQuery object's result buffer
4003
     */
4004
    if (numOfRowsToCopy > pQuery->rec.capacity - numOfResult) {
H
Haojun Liao 已提交
4005
      numOfRowsToCopy = (int32_t) pQuery->rec.capacity - numOfResult;
4006
      pGroupResInfo->rowId += numOfRowsToCopy;
4007
    } else {
4008
      pGroupResInfo->rowId = 0;
4009
      pQInfo->groupIndex += 1;
4010
    }
4011

4012
    tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, result[i]->pageId);
H
Haojun Liao 已提交
4013

4014
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
4015
      int32_t size = pRuntimeEnv->pCtx[j].outputBytes;
4016

4017
      char *out = pQuery->sdata[j]->data + numOfResult * size;
4018
      char *in = getPosInResultPage(pRuntimeEnv, j, result[i], page);
4019 4020
      memcpy(out, in + oldOffset * size, size * numOfRowsToCopy);
    }
4021

4022
    numOfResult += numOfRowsToCopy;
4023 4024 4025
    if (numOfResult == pQuery->rec.capacity) {
      break;
    }
4026
  }
4027

4028
  qDebug("QInfo:%p copy data to query buf completed", pQInfo);
4029 4030

#ifdef _DEBUG_VIEW
4031
  displayInterResult(pQuery->sdata, pRuntimeEnv, numOfResult);
4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044
#endif
  return numOfResult;
}

/**
 * copyFromWindowResToSData support copy data in ascending/descending order
 * For interval query of both super table and table, copy the data in ascending order, since the output results are
 * ordered in SWindowResutl already. While handling the group by query for both table and super table,
 * all group result are completed already.
 *
 * @param pQInfo
 * @param result
 */
4045
void copyFromWindowResToSData(SQInfo *pQInfo, SWindowResInfo *pResultInfo) {
4046
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
4047

4048
  int32_t orderType = (pQuery->pGroupbyExpr != NULL) ? pQuery->pGroupbyExpr->orderType : TSDB_ORDER_ASC;
4049
  int32_t numOfResult = doCopyToSData(pQInfo, pResultInfo, orderType);
4050

4051
  pQuery->rec.rows += numOfResult;
4052

4053
  assert(pQuery->rec.rows <= pQuery->rec.capacity);
4054 4055
}

H
Haojun Liao 已提交
4056
static void updateWindowResNumOfRes(SQueryRuntimeEnv *pRuntimeEnv) {
4057
  SQuery *pQuery = pRuntimeEnv->pQuery;
4058

4059
  // update the number of result for each, only update the number of rows for the corresponding window result.
H
Haojun Liao 已提交
4060 4061 4062
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
    return;
  }
4063

H
Haojun Liao 已提交
4064
  for (int32_t i = 0; i < pRuntimeEnv->windowResInfo.size; ++i) {
H
Haojun Liao 已提交
4065
    SResultRow *pResult = pRuntimeEnv->windowResInfo.pResult[i];
4066

H
Haojun Liao 已提交
4067 4068 4069 4070
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
      int32_t functionId = pRuntimeEnv->pCtx[j].functionId;
      if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
        continue;
4071
      }
H
Haojun Liao 已提交
4072

4073 4074
      SResultRowCellInfo* pCell = getResultCell(pRuntimeEnv, pResult, j);
      pResult->numOfRows = (uint16_t)(MAX(pResult->numOfRows, pCell->numOfRes));
4075 4076 4077 4078
    }
  }
}

H
Haojun Liao 已提交
4079
static void stableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo, SDataStatis *pStatis,
4080
    SArray *pDataBlock, __block_search_fn_t searchFn) {
4081
  SQuery *         pQuery = pRuntimeEnv->pQuery;
4082
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4083

4084
  SWindowResInfo * pWindowResInfo = &pTableQueryInfo->windowResInfo;
H
hjxilinx 已提交
4085
  pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : pDataBlockInfo->rows - 1;
4086

H
Haojun Liao 已提交
4087
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->groupbyNormalCol) {
4088
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
4089
  } else {
4090
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
4091 4092 4093
  }
}

H
Haojun Liao 已提交
4094
bool queryHasRemainResForTableQuery(SQueryRuntimeEnv* pRuntimeEnv) {
4095 4096
  SQuery *pQuery = pRuntimeEnv->pQuery;
  SFillInfo *pFillInfo = pRuntimeEnv->pFillInfo;
4097

H
Haojun Liao 已提交
4098
  if (pQuery->limit.limit > 0 && pQuery->rec.total >= pQuery->limit.limit) {
4099 4100
    return false;
  }
4101

4102
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
H
Haojun Liao 已提交
4103
    // There are results not returned to client yet, so filling applied to the remain result is required firstly.
H
Haojun Liao 已提交
4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118
    int32_t remain = taosNumOfRemainRows(pFillInfo);
    if (remain > 0) {
      return true;
    }

    /*
     * While the code reaches here, there are no results remains now.
     * If query is not completed yet, the gaps between two results blocks need to be handled after next data block
     * is retrieved from TSDB.
     *
     * NOTE: If the result set is not the first block, the gap in front of the result set will be filled. If the result
     * set is the FIRST result block, the gap between the start time of query time window and the timestamp of the
     * first result row in the actual result set will fill nothing.
     */
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
S
TD-1057  
Shengliang Guan 已提交
4119
      int32_t numOfTotal = (int32_t)getFilledNumOfRes(pFillInfo, pQuery->window.ekey, (int32_t)pQuery->rec.capacity);
H
Haojun Liao 已提交
4120 4121 4122 4123 4124 4125
      return numOfTotal > 0;
    }

  } else {
    // there are results waiting for returned to client.
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED) &&
H
Haojun Liao 已提交
4126
        (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) &&
H
Haojun Liao 已提交
4127 4128 4129
        (pRuntimeEnv->windowResInfo.size > 0)) {
      return true;
    }
4130
  }
4131 4132

  return false;
4133 4134 4135
}

static void doCopyQueryResultToMsg(SQInfo *pQInfo, int32_t numOfRows, char *data) {
4136
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
4137

4138 4139
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
    int32_t bytes = pQuery->pSelectExpr[col].bytes;
4140

4141 4142 4143
    memmove(data, pQuery->sdata[col]->data, bytes * numOfRows);
    data += bytes * numOfRows;
  }
4144

weixin_48148422's avatar
weixin_48148422 已提交
4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156
  int32_t numOfTables = (int32_t)taosArrayGetSize(pQInfo->arrTableIdInfo);
  *(int32_t*)data = htonl(numOfTables);
  data += sizeof(int32_t);
  for(int32_t i = 0; i < numOfTables; i++) {
    STableIdInfo* pSrc = taosArrayGet(pQInfo->arrTableIdInfo, i);
    STableIdInfo* pDst = (STableIdInfo*)data;
    pDst->uid = htobe64(pSrc->uid);
    pDst->tid = htonl(pSrc->tid);
    pDst->key = htobe64(pSrc->key);
    data += sizeof(STableIdInfo);
  }

H
Haojun Liao 已提交
4157
  // Check if query is completed or not for stable query or normal table query respectively.
H
hjxilinx 已提交
4158
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4159
    if (pQInfo->runtimeEnv.stableQuery) {
H
Haojun Liao 已提交
4160
      if (IS_STASBLE_QUERY_OVER(pQInfo)) {
4161 4162 4163
        setQueryStatus(pQuery, QUERY_OVER);
      }
    } else {
H
Haojun Liao 已提交
4164
      if (!queryHasRemainResForTableQuery(&pQInfo->runtimeEnv)) {
4165 4166
        setQueryStatus(pQuery, QUERY_OVER);
      }
4167
    }
H
hjxilinx 已提交
4168
  }
4169 4170
}

H
Haojun Liao 已提交
4171
int32_t doFillGapsInResults(SQueryRuntimeEnv* pRuntimeEnv, tFilePage **pDst, int32_t *numOfFilled) {
4172
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
4173
  SQuery *pQuery = pRuntimeEnv->pQuery;
4174
  SFillInfo* pFillInfo = pRuntimeEnv->pFillInfo;
4175

4176
  while (1) {
S
TD-1057  
Shengliang Guan 已提交
4177
    int32_t ret = (int32_t)taosGenerateDataBlock(pFillInfo, (tFilePage**)pQuery->sdata, (int32_t)pQuery->rec.capacity);
4178

4179
    // todo apply limit output function
4180 4181
    /* reached the start position of according to offset value, return immediately */
    if (pQuery->limit.offset == 0) {
4182
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows", pQInfo, pFillInfo->numOfRows, ret);
4183 4184
      return ret;
    }
4185

4186
    if (pQuery->limit.offset < ret) {
4187
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%" PRId64 ". Discard due to offset, remain:%" PRId64 ", new offset:%d",
4188
             pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, ret - pQuery->limit.offset, 0);
4189

S
TD-1057  
Shengliang Guan 已提交
4190
      ret -= (int32_t)pQuery->limit.offset;
4191 4192
      // todo !!!!there exactly number of interpo is not valid.
      // todo refactor move to the beginning of buffer
4193 4194 4195
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        memmove(pDst[i]->data, pDst[i]->data + pQuery->pSelectExpr[i].bytes * pQuery->limit.offset,
                ret * pQuery->pSelectExpr[i].bytes);
4196
      }
4197

4198 4199 4200
      pQuery->limit.offset = 0;
      return ret;
    } else {
4201
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%" PRId64 ". Discard due to offset, "
B
Bomin Zhang 已提交
4202
             "remain:%d, new offset:%" PRId64, pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, 0,
4203
          pQuery->limit.offset - ret);
4204

4205
      pQuery->limit.offset -= ret;
4206
      pQuery->rec.rows = 0;
4207 4208
      ret = 0;
    }
4209

H
Haojun Liao 已提交
4210
    if (!queryHasRemainResForTableQuery(pRuntimeEnv)) {
4211 4212 4213 4214 4215
      return ret;
    }
  }
}

4216
static void queryCostStatis(SQInfo *pQInfo) {
4217
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4218
  SQueryCostInfo *pSummary = &pRuntimeEnv->summary;
H
Haojun Liao 已提交
4219

H
Haojun Liao 已提交
4220
  uint64_t hashSize = taosHashGetMemSize(pQInfo->runtimeEnv.pResultRowHashTable);
H
Haojun Liao 已提交
4221 4222 4223
  hashSize += taosHashGetMemSize(pQInfo->tableqinfoGroupInfo.map);
  pSummary->hashSize = hashSize;

H
Haojun Liao 已提交
4224 4225 4226
  // add the merge time
  pSummary->elapsedTime += pSummary->firstStageMergeTime;

H
Haojun Liao 已提交
4227 4228 4229
  SResultRowPool* p = pQInfo->runtimeEnv.pool;
  pSummary->winInfoSize = getResultRowPoolMemSize(p);
  pSummary->numOfTimeWindows = getNumOfAllocatedResultRows(p);
4230

H
Haojun Liao 已提交
4231 4232 4233
  qDebug("QInfo:%p :cost summary: elapsed time:%"PRId64" us, first merge:%"PRId64" us, total blocks:%d, "
         "load block statis:%d, load data block:%d, total rows:%"PRId64 ", check rows:%"PRId64,
         pQInfo, pSummary->elapsedTime, pSummary->firstStageMergeTime, pSummary->totalBlocks, pSummary->loadBlockStatis,
H
Haojun Liao 已提交
4234
         pSummary->loadBlocks, pSummary->totalRows, pSummary->totalCheckedRows);
4235

4236
  qDebug("QInfo:%p :cost summary: winResPool size:%.2f Kb, numOfWin:%"PRId64", tableInfoSize:%.2f Kb, hashTable:%.2f Kb", pQInfo, pSummary->winInfoSize/1024.0,
H
Haojun Liao 已提交
4237
      pSummary->numOfTimeWindows, pSummary->tableInfoSize/1024.0, pSummary->hashSize/1024.0);
4238 4239
}

4240 4241
static void updateOffsetVal(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pBlockInfo) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
4242
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4243

4244
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
4245

4246
  if (pQuery->limit.offset == pBlockInfo->rows) {  // current block will ignore completed
H
hjxilinx 已提交
4247
    pTableQueryInfo->lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->window.ekey + step : pBlockInfo->window.skey + step;
4248 4249 4250
    pQuery->limit.offset = 0;
    return;
  }
4251

4252
  if (QUERY_IS_ASC_QUERY(pQuery)) {
S
TD-1057  
Shengliang Guan 已提交
4253
    pQuery->pos = (int32_t)pQuery->limit.offset;
4254
  } else {
S
TD-1057  
Shengliang Guan 已提交
4255
    pQuery->pos = pBlockInfo->rows - (int32_t)pQuery->limit.offset - 1;
4256
  }
4257

4258
  assert(pQuery->pos >= 0 && pQuery->pos <= pBlockInfo->rows - 1);
4259

4260
  SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
4261
  SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);
4262

4263
  // update the pQuery->limit.offset value, and pQuery->pos value
H
Haojun Liao 已提交
4264
  TSKEY *keys = (TSKEY *) pColInfoData->pData;
4265 4266

  // update the offset value
H
hjxilinx 已提交
4267
  pTableQueryInfo->lastKey = keys[pQuery->pos];
4268
  pQuery->limit.offset = 0;
4269

H
hjxilinx 已提交
4270
  int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, pBlockInfo, NULL, binarySearchForKey, pDataBlock);
4271

4272
  qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
4273
         pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows, numOfRes, pQuery->current->lastKey);
4274
}
4275

4276 4277 4278 4279 4280
void skipBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;

  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0) {
    return;
4281
  }
4282

4283 4284 4285
  pQuery->pos = 0;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

H
hjxilinx 已提交
4286
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4287
  TsdbQueryHandleT pQueryHandle = pRuntimeEnv->pQueryHandle;
4288

H
Haojun Liao 已提交
4289
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4290
  while (tsdbNextDataBlock(pQueryHandle)) {
H
Haojun Liao 已提交
4291
    if (IS_QUERY_KILLED(GET_QINFO_ADDR(pRuntimeEnv))) {
H
Haojun Liao 已提交
4292
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4293
    }
4294

H
Haojun Liao 已提交
4295
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
4296

4297 4298
    if (pQuery->limit.offset > blockInfo.rows) {
      pQuery->limit.offset -= blockInfo.rows;
H
hjxilinx 已提交
4299 4300
      pTableQueryInfo->lastKey = (QUERY_IS_ASC_QUERY(pQuery)) ? blockInfo.window.ekey : blockInfo.window.skey;
      pTableQueryInfo->lastKey += step;
4301

4302
      qDebug("QInfo:%p skip rows:%d, offset:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv), blockInfo.rows,
4303 4304
             pQuery->limit.offset);
    } else {  // find the appropriated start position in current block
4305 4306 4307
      updateOffsetVal(pRuntimeEnv, &blockInfo);
      break;
    }
4308
  }
H
Haojun Liao 已提交
4309 4310 4311 4312

  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }
4313
}
4314

H
Haojun Liao 已提交
4315
static bool skipTimeInterval(SQueryRuntimeEnv *pRuntimeEnv, TSKEY* start) {
4316
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
4317
  *start = pQuery->current->lastKey;
4318

4319
  // if queried with value filter, do NOT forward query start position
4320
  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->pFillInfo != NULL) {
4321
    return true;
4322
  }
4323

4324
  /*
4325 4326
   * 1. for interval without interpolation query we forward pQuery->interval.interval at a time for
   *    pQuery->limit.offset times. Since hole exists, pQuery->interval.interval*pQuery->limit.offset value is
4327 4328
   *    not valid. otherwise, we only forward pQuery->limit.offset number of points
   */
4329
  assert(pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL);
4330

H
Haojun Liao 已提交
4331
  STimeWindow w = TSWINDOW_INITIALIZER;
4332

4333
  SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
hjxilinx 已提交
4334
  STableQueryInfo *pTableQueryInfo = pQuery->current;
4335

H
Haojun Liao 已提交
4336
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4337
  while (tsdbNextDataBlock(pRuntimeEnv->pQueryHandle)) {
H
Haojun Liao 已提交
4338
    tsdbRetrieveDataBlockInfo(pRuntimeEnv->pQueryHandle, &blockInfo);
4339

H
Haojun Liao 已提交
4340 4341
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
H
Haojun Liao 已提交
4342
        getAlignQueryTimeWindow(pQuery, blockInfo.window.skey, blockInfo.window.skey, pQuery->window.ekey, &w);
H
Haojun Liao 已提交
4343 4344 4345
        pWindowResInfo->startTime = w.skey;
        pWindowResInfo->prevSKey = w.skey;
      }
4346
    } else {
H
Haojun Liao 已提交
4347
      getAlignQueryTimeWindow(pQuery, blockInfo.window.ekey, pQuery->window.ekey, blockInfo.window.ekey, &w);
4348

4349 4350 4351
      pWindowResInfo->startTime = pQuery->window.skey;
      pWindowResInfo->prevSKey = w.skey;
    }
4352

4353 4354
    // the first time window
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, pWindowResInfo->prevSKey, pQuery);
4355

4356 4357 4358 4359 4360 4361
    while (pQuery->limit.offset > 0) {
      if ((win.ekey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (win.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        pQuery->limit.offset -= 1;
        pWindowResInfo->prevSKey = win.skey;
      }
4362

4363
      STimeWindow tw = win;
H
Haojun Liao 已提交
4364
      GET_NEXT_TIMEWINDOW(pQuery, &tw);
4365

4366
      if (pQuery->limit.offset == 0) {
4367 4368
        if ((tw.skey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
            (tw.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
H
Haojun Liao 已提交
4369 4370
          // load the data block and check data remaining in current data block
          // TODO optimize performance
4371 4372 4373
          SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
          SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

4374 4375
          tw = win;
          int32_t startPos =
H
Haojun Liao 已提交
4376
              getNextQualifiedWindow(pRuntimeEnv, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey, -1);
4377 4378 4379 4380
          assert(startPos >= 0);

          // set the abort info
          pQuery->pos = startPos;
4381

H
Haojun Liao 已提交
4382 4383 4384 4385
          // reset the query start timestamp
          pTableQueryInfo->win.skey = ((TSKEY *)pColInfoData->pData)[startPos];
          pQuery->window.skey = pTableQueryInfo->win.skey;
          *start = pTableQueryInfo->win.skey;
4386

4387
          pWindowResInfo->prevSKey = tw.skey;
H
Haojun Liao 已提交
4388
          int32_t index = pRuntimeEnv->windowResInfo.curIndex;
4389

H
hjxilinx 已提交
4390
          int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, NULL, binarySearchForKey, pDataBlock);
H
Haojun Liao 已提交
4391
          pRuntimeEnv->windowResInfo.curIndex = index;  // restore the window index
4392

4393
          qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64,
4394
                 GET_QINFO_ADDR(pRuntimeEnv), blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes, pQuery->current->lastKey);
4395

4396
          return true;
H
Haojun Liao 已提交
4397 4398 4399 4400
        } else { // do nothing
          *start = tw.skey;
          pQuery->window.skey = tw.skey;
          pWindowResInfo->prevSKey = tw.skey;
4401
          return true;
4402 4403 4404
        }
      }

H
Haojun Liao 已提交
4405 4406 4407 4408 4409 4410 4411
      /*
       * If the next time window still starts from current data block,
       * load the primary timestamp column first, and then find the start position for the next queried time window.
       * Note that only the primary timestamp column is required.
       * TODO: Optimize for this cases. All data blocks are not needed to be loaded, only if the first actually required
       * time window resides in current data block.
       */
4412 4413 4414 4415 4416 4417 4418
      if ((tw.skey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (tw.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
        SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

        tw = win;
        int32_t startPos =
H
Haojun Liao 已提交
4419
            getNextQualifiedWindow(pRuntimeEnv, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey, -1);
4420 4421 4422 4423
        assert(startPos >= 0);

        // set the abort info
        pQuery->pos = startPos;
H
hjxilinx 已提交
4424
        pTableQueryInfo->lastKey = ((TSKEY *)pColInfoData->pData)[startPos];
4425 4426
        pWindowResInfo->prevSKey = tw.skey;
        win = tw;
4427
      } else {
H
Haojun Liao 已提交
4428
        break;  // offset is not 0, and next time window begins or ends in the next block.
4429 4430 4431
      }
    }
  }
4432

H
Haojun Liao 已提交
4433 4434 4435 4436 4437
  // check for error
  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }

4438 4439 4440
  return true;
}

H
Haojun Liao 已提交
4441 4442
static void doDestroyTableQueryInfo(STableGroupInfo* pTableqinfoGroupInfo);

B
Bomin Zhang 已提交
4443
static int32_t setupQueryHandle(void* tsdb, SQInfo* pQInfo, bool isSTableQuery) {
B
Bomin Zhang 已提交
4444
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4445 4446
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

B
Bomin Zhang 已提交
4447
  if (onlyQueryTags(pQuery)) {
B
Bomin Zhang 已提交
4448
    return TSDB_CODE_SUCCESS;
B
Bomin Zhang 已提交
4449 4450
  }

H
Haojun Liao 已提交
4451
  if (isSTableQuery && (!QUERY_IS_INTERVAL_QUERY(pQuery)) && (!isFixedOutputQuery(pRuntimeEnv))) {
B
Bomin Zhang 已提交
4452
    return TSDB_CODE_SUCCESS;
B
Bomin Zhang 已提交
4453
  }
4454 4455

  STsdbQueryCond cond = {
B
Bomin Zhang 已提交
4456 4457 4458
    .order   = pQuery->order.order,
    .colList = pQuery->colList,
    .numOfCols = pQuery->numOfCols,
4459
  };
weixin_48148422's avatar
weixin_48148422 已提交
4460

S
TD-1057  
Shengliang Guan 已提交
4461 4462
  TIME_WINDOW_COPY(cond.twindow, pQuery->window);

B
Bomin Zhang 已提交
4463
  if (!isSTableQuery
4464
    && (pQInfo->tableqinfoGroupInfo.numOfTables == 1)
4465
    && (cond.order == TSDB_ORDER_ASC)
H
Haojun Liao 已提交
4466
    && (!QUERY_IS_INTERVAL_QUERY(pQuery))
B
Bomin Zhang 已提交
4467
    && (!isGroupbyNormalCol(pQuery->pGroupbyExpr))
H
Haojun Liao 已提交
4468
    && (!isFixedOutputQuery(pRuntimeEnv))
B
Bomin Zhang 已提交
4469
  ) {
H
Haojun Liao 已提交
4470
    SArray* pa = GET_TABLEGROUP(pQInfo, 0);
4471 4472
    STableQueryInfo* pCheckInfo = taosArrayGetP(pa, 0);
    cond.twindow = pCheckInfo->win;
4473
  }
B
Bomin Zhang 已提交
4474

B
Bomin Zhang 已提交
4475
  terrno = TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
4476
  if (isFirstLastRowQuery(pQuery)) {
H
Haojun Liao 已提交
4477
    pRuntimeEnv->pQueryHandle = tsdbQueryLastRow(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef);
H
Haojun Liao 已提交
4478

H
Haojun Liao 已提交
4479 4480 4481
    // update the query time window
    pQuery->window = cond.twindow;

H
Haojun Liao 已提交
4482
    if (pQInfo->tableGroupInfo.numOfTables == 0) {
H
Haojun Liao 已提交
4483
      pQInfo->tableqinfoGroupInfo.numOfTables = 0;
H
Haojun Liao 已提交
4484 4485 4486 4487
    } else {
      size_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
      for(int32_t i = 0; i < numOfGroups; ++i) {
        SArray *group = GET_TABLEGROUP(pQInfo, i);
H
Haojun Liao 已提交
4488

H
Haojun Liao 已提交
4489 4490 4491
        size_t t = taosArrayGetSize(group);
        for (int32_t j = 0; j < t; ++j) {
          STableQueryInfo *pCheckInfo = taosArrayGetP(group, j);
H
Haojun Liao 已提交
4492

H
Haojun Liao 已提交
4493 4494 4495
          pCheckInfo->win = pQuery->window;
          pCheckInfo->lastKey = pCheckInfo->win.skey;
        }
H
Haojun Liao 已提交
4496 4497
      }
    }
4498
  } else if (isPointInterpoQuery(pQuery)) {
H
Haojun Liao 已提交
4499
    pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef);
H
Haojun Liao 已提交
4500
  } else {
H
Haojun Liao 已提交
4501
    pRuntimeEnv->pQueryHandle = tsdbQueryTables(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef);
H
Haojun Liao 已提交
4502
  }
4503

B
Bomin Zhang 已提交
4504
  return terrno;
B
Bomin Zhang 已提交
4505 4506
}

4507 4508 4509
static SFillColInfo* taosCreateFillColInfo(SQuery* pQuery) {
  int32_t numOfCols = pQuery->numOfOutput;
  int32_t offset = 0;
4510

4511
  SFillColInfo* pFillCol = calloc(numOfCols, sizeof(SFillColInfo));
H
Haojun Liao 已提交
4512 4513 4514 4515
  if (pFillCol == NULL) {
    return NULL;
  }

4516 4517
  for(int32_t i = 0; i < numOfCols; ++i) {
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];
4518

4519
    pFillCol[i].col.bytes  = pExprInfo->bytes;
S
TD-1057  
Shengliang Guan 已提交
4520
    pFillCol[i].col.type   = (int8_t)pExprInfo->type;
4521 4522 4523
    pFillCol[i].col.offset = offset;
    pFillCol[i].flag       = TSDB_COL_NORMAL;    // always be ta normal column for table query
    pFillCol[i].functionId = pExprInfo->base.functionId;
4524
    pFillCol[i].fillVal.i = pQuery->fillVal[i];
4525

4526 4527
    offset += pExprInfo->bytes;
  }
4528

4529 4530 4531
  return pFillCol;
}

4532
int32_t doInitQInfo(SQInfo *pQInfo, STSBuf *pTsBuf, void *tsdb, int32_t vgId, bool isSTableQuery) {
4533 4534
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;

4535
  int32_t code = TSDB_CODE_SUCCESS;
4536
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
4537 4538 4539

  pRuntimeEnv->topBotQuery = isTopBottomQuery(pQuery);
  pRuntimeEnv->hasTagResults = hasTagValOutput(pQuery);
4540 4541

  setScanLimitationByResultBuffer(pQuery);
H
Haojun Liao 已提交
4542

B
Bomin Zhang 已提交
4543 4544 4545 4546
  code = setupQueryHandle(tsdb, pQInfo, isSTableQuery);
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }
4547

4548
  pQInfo->tsdb = tsdb;
4549
  pQInfo->vgId = vgId;
4550 4551

  pRuntimeEnv->pQuery = pQuery;
H
Haojun Liao 已提交
4552
  pRuntimeEnv->pTSBuf = pTsBuf;
4553
  pRuntimeEnv->cur.vgroupIndex = -1;
4554
  pRuntimeEnv->stableQuery = isSTableQuery;
H
Haojun Liao 已提交
4555
  pRuntimeEnv->prevGroupId = INT32_MIN;
H
Haojun Liao 已提交
4556
  pRuntimeEnv->groupbyNormalCol = isGroupbyNormalCol(pQuery->pGroupbyExpr);
4557

H
Haojun Liao 已提交
4558
  if (pTsBuf != NULL) {
4559 4560 4561 4562 4563 4564 4565 4566 4567 4568
    int16_t order = (pQuery->order.order == pRuntimeEnv->pTSBuf->tsOrder) ? TSDB_ORDER_ASC : TSDB_ORDER_DESC;
    tsBufSetTraverseOrder(pRuntimeEnv->pTSBuf, order);
  }

  // create runtime environment
  code = setupQueryRuntimeEnv(pRuntimeEnv, pQuery->order.order);
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }

4569 4570 4571
  int32_t ps = DEFAULT_PAGE_SIZE;
  int32_t rowsize = 0;
  getIntermediateBufInfo(pRuntimeEnv, &ps, &rowsize);
H
Haojun Liao 已提交
4572
  int32_t TWOMB = 1024*1024*2;
4573

H
Haojun Liao 已提交
4574
  if (isSTableQuery && !onlyQueryTags(pRuntimeEnv->pQuery)) {
H
Haojun Liao 已提交
4575
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rowsize, ps, TWOMB, pQInfo);
4576 4577 4578 4579
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

H
Haojun Liao 已提交
4580
    if (!QUERY_IS_INTERVAL_QUERY(pQuery)) {
4581
      int16_t type = TSDB_DATA_TYPE_NULL;
4582
      int32_t threshold = 0;
4583

H
Haojun Liao 已提交
4584
      if (pRuntimeEnv->groupbyNormalCol) {  // group by columns not tags;
4585
        type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
4586
        threshold = 4000;
4587 4588
      } else {
        type = TSDB_DATA_TYPE_INT;  // group id
S
TD-1057  
Shengliang Guan 已提交
4589
        threshold = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
4590 4591 4592
        if (threshold < 8) {
          threshold = 8;
        }
4593 4594
      }

4595
      code = initWindowResInfo(&pRuntimeEnv->windowResInfo, 8, threshold, type);
B
Bomin Zhang 已提交
4596 4597 4598
      if (code != TSDB_CODE_SUCCESS) {
        return code;
      }
4599
    }
H
Haojun Liao 已提交
4600
  } else if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
4601 4602
    int32_t numOfResultRows = getInitialPageNum(pQInfo);
    getIntermediateBufInfo(pRuntimeEnv, &ps, &rowsize);
H
Haojun Liao 已提交
4603
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rowsize, ps, TWOMB, pQInfo);
4604 4605 4606 4607 4608
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

    int16_t type = TSDB_DATA_TYPE_NULL;
H
Haojun Liao 已提交
4609
    if (pRuntimeEnv->groupbyNormalCol) {
4610 4611 4612 4613 4614
      type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
    } else {
      type = TSDB_DATA_TYPE_TIMESTAMP;
    }

4615
    code = initWindowResInfo(&pRuntimeEnv->windowResInfo, numOfResultRows, 1024, type);
B
Bomin Zhang 已提交
4616 4617 4618
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }
4619 4620
  }

4621
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
4622
    SFillColInfo* pColInfo = taosCreateFillColInfo(pQuery);
H
Haojun Liao 已提交
4623 4624 4625 4626 4627 4628
    STimeWindow w = TSWINDOW_INITIALIZER;

    TSKEY sk = MIN(pQuery->window.skey, pQuery->window.ekey);
    TSKEY ek = MAX(pQuery->window.skey, pQuery->window.ekey);
    getAlignQueryTimeWindow(pQuery, pQuery->window.skey, sk, ek, &w);

S
TD-1057  
Shengliang Guan 已提交
4629
    pRuntimeEnv->pFillInfo = taosInitFillInfo(pQuery->order.order, w.skey, 0, (int32_t)pQuery->rec.capacity, pQuery->numOfOutput,
4630 4631
                                              pQuery->interval.sliding, pQuery->interval.slidingUnit, (int8_t)pQuery->precision,
                                              pQuery->fillType, pColInfo);
4632
  }
4633

H
Haojun Liao 已提交
4634
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
4635
  return TSDB_CODE_SUCCESS;
4636 4637
}

4638
static void enableExecutionForNextTable(SQueryRuntimeEnv *pRuntimeEnv) {
4639
  SQuery *pQuery = pRuntimeEnv->pQuery;
4640

4641
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
4642
    SResultRowCellInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[i]);
4643 4644 4645 4646 4647 4648
    if (pResInfo != NULL) {
      pResInfo->complete = false;
    }
  }
}

H
Haojun Liao 已提交
4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665
static FORCE_INLINE void setEnvForEachBlock(SQInfo* pQInfo, STableQueryInfo* pTableQueryInfo, SDataBlockInfo* pBlockInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

  if (!QUERY_IS_INTERVAL_QUERY(pQuery)) {
    setExecutionContext(pQInfo, pTableQueryInfo->groupIndex, pBlockInfo->window.ekey + step);
  } else {  // interval query
    TSKEY nextKey = pBlockInfo->window.skey;
    setIntervalQueryRange(pQInfo, nextKey);

    if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
      setAdditionalInfo(pQInfo, pTableQueryInfo->pTable, pTableQueryInfo);
    }
  }
}

H
Haojun Liao 已提交
4666
static int64_t scanMultiTableDataBlocks(SQInfo *pQInfo) {
4667
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
Haojun Liao 已提交
4668 4669
  SQuery*           pQuery = pRuntimeEnv->pQuery;
  SQueryCostInfo*   summary  = &pRuntimeEnv->summary;
H
Haojun Liao 已提交
4670

H
hjxilinx 已提交
4671
  int64_t st = taosGetTimestampMs();
4672

4673
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
H
Haojun Liao 已提交
4674
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4675

H
Haojun Liao 已提交
4676 4677
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

H
Haojun Liao 已提交
4678
  while (tsdbNextDataBlock(pQueryHandle)) {
4679
    summary->totalBlocks += 1;
H
Haojun Liao 已提交
4680

H
Haojun Liao 已提交
4681
    if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
4682
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4683
    }
4684

H
Haojun Liao 已提交
4685
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
H
Haojun Liao 已提交
4686 4687 4688 4689
    STableQueryInfo **pTableQueryInfo = (STableQueryInfo**) taosHashGet(pQInfo->tableqinfoGroupInfo.map, &blockInfo.tid, sizeof(blockInfo.tid));
    if(pTableQueryInfo == NULL) {
      break;
    }
4690

H
Haojun Liao 已提交
4691
    pQuery->current = *pTableQueryInfo;
H
Haojun Liao 已提交
4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      assert(
          ((*pTableQueryInfo)->win.skey <= (*pTableQueryInfo)->win.ekey) &&
          ((*pTableQueryInfo)->lastKey >= (*pTableQueryInfo)->win.skey) &&
          ((*pTableQueryInfo)->win.skey >= pQuery->window.skey && (*pTableQueryInfo)->win.ekey <= pQuery->window.ekey));
    } else {
      assert(
          ((*pTableQueryInfo)->win.skey >= (*pTableQueryInfo)->win.ekey) &&
          ((*pTableQueryInfo)->lastKey <= (*pTableQueryInfo)->win.skey) &&
          ((*pTableQueryInfo)->win.skey <= pQuery->window.skey && (*pTableQueryInfo)->win.ekey >= pQuery->window.ekey));
    }
4703

H
Haojun Liao 已提交
4704
    if (!pRuntimeEnv->groupbyNormalCol) {
H
Haojun Liao 已提交
4705
      setEnvForEachBlock(pQInfo, *pTableQueryInfo, &blockInfo);
4706
    }
4707

4708
    uint32_t     status = 0;
H
Haojun Liao 已提交
4709 4710
    SDataStatis *pStatis = NULL;
    SArray      *pDataBlock = NULL;
4711

H
Haojun Liao 已提交
4712
    int32_t ret = loadDataBlockOnDemand(pRuntimeEnv, &pQuery->current->windowResInfo, pQueryHandle, &blockInfo, &pStatis, &pDataBlock, &status);
4713 4714 4715 4716 4717
    if (ret != TSDB_CODE_SUCCESS) {
      break;
    }

    if (status == BLK_DATA_DISCARD) {
H
Haojun Liao 已提交
4718
      pQuery->current->lastKey = QUERY_IS_ASC_QUERY(pQuery)? blockInfo.window.ekey + step : blockInfo.window.skey + step;
H
Haojun Liao 已提交
4719 4720 4721
      continue;
    }

4722 4723
    summary->totalRows += blockInfo.rows;
    stableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, pDataBlock, binarySearchForKey);
H
Haojun Liao 已提交
4724

H
Haojun Liao 已提交
4725 4726 4727 4728
    qDebug("QInfo:%p check data block completed, uid:%"PRId64", tid:%d, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, "
           "lastKey:%" PRId64,
           pQInfo, blockInfo.uid, blockInfo.tid, blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows,
           pQuery->current->lastKey);
4729
  }
4730

H
Haojun Liao 已提交
4731 4732 4733 4734
  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }

H
Haojun Liao 已提交
4735 4736
  updateWindowResNumOfRes(pRuntimeEnv);

H
hjxilinx 已提交
4737 4738
  int64_t et = taosGetTimestampMs();
  return et - st;
4739 4740
}

4741 4742
static bool multiTableMultioutputHelper(SQInfo *pQInfo, int32_t index) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4743
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4744

4745
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
H
Haojun Liao 已提交
4746
  SArray *group = GET_TABLEGROUP(pQInfo, 0);
4747
  STableQueryInfo* pCheckInfo = taosArrayGetP(group, index);
4748

H
Haojun Liao 已提交
4749 4750 4751
  if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
    setTagVal(pRuntimeEnv, pCheckInfo->pTable, pQInfo->tsdb);
  }
4752

H
Haojun Liao 已提交
4753
  STableId* id = TSDB_TABLEID(pCheckInfo->pTable);
4754
  qDebug("QInfo:%p query on (%d): uid:%" PRIu64 ", tid:%d, qrange:%" PRId64 "-%" PRId64, pQInfo, index,
H
Haojun Liao 已提交
4755
         id->uid, id->tid, pCheckInfo->lastKey, pCheckInfo->win.ekey);
4756

4757
  STsdbQueryCond cond = {
4758
      .twindow   = {pCheckInfo->lastKey, pCheckInfo->win.ekey},
H
hjxilinx 已提交
4759 4760
      .order     = pQuery->order.order,
      .colList   = pQuery->colList,
4761
      .numOfCols = pQuery->numOfCols,
4762
  };
4763

H
hjxilinx 已提交
4764
  // todo refactor
4765
  SArray *g1 = taosArrayInit(1, POINTER_BYTES);
H
Haojun Liao 已提交
4766 4767 4768 4769
  SArray *tx = taosArrayInit(1, sizeof(STableKeyInfo));

  STableKeyInfo info = {.pTable = pCheckInfo->pTable, .lastKey = pCheckInfo->lastKey};
  taosArrayPush(tx, &info);
4770

4771
  taosArrayPush(g1, &tx);
4772
  STableGroupInfo gp = {.numOfTables = 1, .pGroupList = g1};
4773

4774
  // include only current table
4775 4776 4777 4778
  if (pRuntimeEnv->pQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
    pRuntimeEnv->pQueryHandle = NULL;
  }
4779

H
Haojun Liao 已提交
4780
  pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo, &pQInfo->memRef);
4781 4782
  taosArrayDestroy(tx);
  taosArrayDestroy(g1);
B
Bomin Zhang 已提交
4783 4784 4785
  if (pRuntimeEnv->pQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
4786

4787
  if (pRuntimeEnv->pTSBuf != NULL) {
H
Haojun Liao 已提交
4788 4789
      tVariant* pTag = &pRuntimeEnv->pCtx[0].tag;

4790
    if (pRuntimeEnv->cur.vgroupIndex == -1) {
H
Haojun Liao 已提交
4791
      STSElem elem = tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, pQInfo->vgId, pTag);
H
Haojun Liao 已提交
4792
      // failed to find data with the specified tag value and vnodeId
4793
      if (!tsBufIsValidElem(&elem)) {
H
Haojun Liao 已提交
4794 4795 4796 4797 4798 4799
        if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
          qError("QInfo:%p failed to find tag:%s in ts_comp", pQInfo, pTag->pz);
        } else {
          qError("QInfo:%p failed to find tag:%"PRId64" in ts_comp", pQInfo, pTag->i64Key);
        }

4800
        return false;
H
Haojun Liao 已提交
4801 4802
      } else {
        STSCursor cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);
H
Haojun Liao 已提交
4803 4804 4805 4806 4807 4808 4809 4810

        if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
          qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz,
                 cur.blockIndex, cur.tsIndex);
        } else {
          qDebug("QInfo:%p find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key,
                 cur.blockIndex, cur.tsIndex);
        }
4811 4812
      }
    } else {
H
Haojun Liao 已提交
4813
      STSElem elem = tsBufGetElem(pRuntimeEnv->pTSBuf);
H
Haojun Liao 已提交
4814
      if (tVariantCompare(elem.tag, &pRuntimeEnv->pCtx[0].tag) != 0) {
H
Haojun Liao 已提交
4815

H
Haojun Liao 已提交
4816
        STSElem elem1 = tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, pQInfo->vgId, pTag);
H
Haojun Liao 已提交
4817
        // failed to find data with the specified tag value and vnodeId
4818
        if (!tsBufIsValidElem(&elem1)) {
H
Haojun Liao 已提交
4819 4820 4821 4822 4823
          if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
            qError("QInfo:%p failed to find tag:%s in ts_comp", pQInfo, pTag->pz);
          } else {
            qError("QInfo:%p failed to find tag:%"PRId64" in ts_comp", pQInfo, pTag->i64Key);
          }
H
Haojun Liao 已提交
4824

H
Haojun Liao 已提交
4825
          return false;
H
Haojun Liao 已提交
4826 4827
        } else {
          STSCursor cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);
H
Haojun Liao 已提交
4828 4829 4830 4831 4832
          if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
            qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, cur.blockIndex, cur.tsIndex);
          } else {
            qDebug("QInfo:%p find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key, cur.blockIndex, cur.tsIndex);
          }
H
Haojun Liao 已提交
4833
        }
H
Haojun Liao 已提交
4834

H
Haojun Liao 已提交
4835 4836
      } else {
        tsBufSetCursor(pRuntimeEnv->pTSBuf, &pRuntimeEnv->cur);
H
Haojun Liao 已提交
4837
        STSCursor cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);
H
Haojun Liao 已提交
4838 4839 4840 4841 4842
        if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
          qDebug("QInfo:%p continue scan ts_comp file, tag:%s blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, cur.blockIndex, cur.tsIndex);
        } else {
          qDebug("QInfo:%p continue scan ts_comp file, tag:%"PRId64" blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key, cur.blockIndex, cur.tsIndex);
        }
H
Haojun Liao 已提交
4843
      }
4844 4845
    }
  }
4846

4847
  initCtxOutputBuf(pRuntimeEnv);
4848 4849 4850 4851 4852 4853 4854 4855 4856 4857
  return true;
}

/**
 * super table query handler
 * 1. super table projection query, group-by on normal columns query, ts-comp query
 * 2. point interpolation query, last row query
 *
 * @param pQInfo
 */
4858
static void sequentialTableProcess(SQInfo *pQInfo) {
4859
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4860
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4861
  setQueryStatus(pQuery, QUERY_COMPLETED);
4862

H
Haojun Liao 已提交
4863
  size_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
4864

4865
  if (isPointInterpoQuery(pQuery)) {
4866 4867
    resetCtxOutputBuf(pRuntimeEnv);
    assert(pQuery->limit.offset == 0 && pQuery->limit.limit != 0);
4868

4869
    while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
4870
      SArray* group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex);
4871

S
TD-1057  
Shengliang Guan 已提交
4872
      qDebug("QInfo:%p last_row query on group:%d, total group:%" PRIzu ", current group:%p", pQInfo, pQInfo->groupIndex,
dengyihao's avatar
dengyihao 已提交
4873
             numOfGroups, group);
H
Haojun Liao 已提交
4874 4875 4876 4877 4878 4879 4880

      STsdbQueryCond cond = {
          .colList = pQuery->colList,
          .order   = pQuery->order.order,
          .numOfCols = pQuery->numOfCols,
      };

S
TD-1057  
Shengliang Guan 已提交
4881 4882
      TIME_WINDOW_COPY(cond.twindow, pQuery->window);

H
Haojun Liao 已提交
4883 4884 4885
      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);
4886

H
Haojun Liao 已提交
4887 4888 4889 4890 4891 4892 4893
      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }
4894

H
Haojun Liao 已提交
4895
      pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(pQInfo->tsdb, &cond, &gp, pQInfo, &pQInfo->memRef);
B
Bomin Zhang 已提交
4896 4897 4898 4899 4900 4901

      taosArrayDestroy(tx);
      taosArrayDestroy(g1);
      if (pRuntimeEnv->pQueryHandle == NULL) {
        longjmp(pRuntimeEnv->env, terrno);
      }
H
Haojun Liao 已提交
4902

H
Haojun Liao 已提交
4903
      initCtxOutputBuf(pRuntimeEnv);
4904

4905
      SArray* s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle);
4906
      assert(taosArrayGetSize(s) >= 1);
4907

4908
      setTagVal(pRuntimeEnv, taosArrayGetP(s, 0), pQInfo->tsdb);
dengyihao's avatar
dengyihao 已提交
4909
      taosArrayDestroy(s);
H
Haojun Liao 已提交
4910

H
Haojun Liao 已提交
4911
      // here we simply set the first table as current table
4912 4913 4914
      SArray* first = GET_TABLEGROUP(pQInfo, pQInfo->groupIndex);
      pQuery->current = taosArrayGetP(first, 0);

4915
      scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
4916

H
Haojun Liao 已提交
4917 4918 4919 4920 4921
      int64_t numOfRes = getNumOfResult(pRuntimeEnv);
      if (numOfRes > 0) {
        pQuery->rec.rows += numOfRes;
        forwardCtxOutputBuf(pRuntimeEnv, numOfRes);
      }
4922

H
Haojun Liao 已提交
4923 4924 4925 4926 4927
      skipResults(pRuntimeEnv);
      pQInfo->groupIndex += 1;

      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
4928 4929 4930 4931 4932 4933

      if (pQuery->rec.rows >= pQuery->rec.capacity) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
        break;
      }
    }
H
Haojun Liao 已提交
4934
  } else if (pRuntimeEnv->groupbyNormalCol) { // group-by on normal columns query
4935
    while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
4936
      SArray* group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex);
4937

S
TD-1057  
Shengliang Guan 已提交
4938
      qDebug("QInfo:%p group by normal columns group:%d, total group:%" PRIzu "", pQInfo, pQInfo->groupIndex, numOfGroups);
4939 4940 4941 4942 4943 4944 4945

      STsdbQueryCond cond = {
          .colList = pQuery->colList,
          .order   = pQuery->order.order,
          .numOfCols = pQuery->numOfCols,
      };

S
TD-1057  
Shengliang Guan 已提交
4946 4947
      TIME_WINDOW_COPY(cond.twindow, pQuery->window);

4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959
      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);

      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }

H
Haojun Liao 已提交
4960
      // no need to update the lastkey for each table
H
Haojun Liao 已提交
4961
      pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo, &pQInfo->memRef);
H
Haojun Liao 已提交
4962

B
Bomin Zhang 已提交
4963 4964
      taosArrayDestroy(g1);
      taosArrayDestroy(tx);
B
Bomin Zhang 已提交
4965 4966 4967
      if (pRuntimeEnv->pQueryHandle == NULL) {
        longjmp(pRuntimeEnv->env, terrno);
      }
4968

4969
      SArray* s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle);
4970 4971
      assert(taosArrayGetSize(s) >= 1);

4972
      setTagVal(pRuntimeEnv, taosArrayGetP(s, 0), pQInfo->tsdb);
4973 4974 4975 4976 4977 4978 4979 4980

      // here we simply set the first table as current table
      scanMultiTableDataBlocks(pQInfo);
      pQInfo->groupIndex += 1;

      SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;

        // no results generated for current group, continue to try the next group
4981
      taosArrayDestroy(s);
4982 4983 4984 4985 4986
      if (pWindowResInfo->size <= 0) {
        continue;
      }

      for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
4987
        pWindowResInfo->pResult[i]->closed = true; // enable return all results for group by normal columns
4988

H
Haojun Liao 已提交
4989
        SResultRow *pResult = pWindowResInfo->pResult[i];
4990
        for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
4991 4992
          SResultRowCellInfo* pCell = getResultCell(pRuntimeEnv, pResult, j);
          pResult->numOfRows = (uint16_t)(MAX(pResult->numOfRows, pCell->numOfRes));
4993 4994 4995
        }
      }

4996
      qDebug("QInfo:%p generated groupby columns results %d rows for group %d completed", pQInfo, pWindowResInfo->size,
4997 4998 4999 5000 5001 5002 5003
          pQInfo->groupIndex);
      int32_t currentGroupIndex = pQInfo->groupIndex;

      pQuery->rec.rows = 0;
      pQInfo->groupIndex = 0;

      ensureOutputBufferSimple(pRuntimeEnv, pWindowResInfo->size);
5004
      copyFromWindowResToSData(pQInfo, pWindowResInfo);
5005 5006 5007 5008 5009 5010

      pQInfo->groupIndex = currentGroupIndex;  //restore the group index
      assert(pQuery->rec.rows == pWindowResInfo->size);

      clearClosedTimeWindow(pRuntimeEnv);
      break;
5011 5012 5013
    }
  } else {
    /*
5014
     * 1. super table projection query, 2. ts-comp query
5015 5016 5017
     * if the subgroup index is larger than 0, results generated by group by tbname,k is existed.
     * we need to return it to client in the first place.
     */
5018
    if (pQInfo->groupIndex > 0) {
5019
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5020
      pQuery->rec.total += pQuery->rec.rows;
5021

5022
      if (pQuery->rec.rows > 0) {
5023 5024 5025
        return;
      }
    }
5026

5027
    // all data have returned already
5028
    if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
5029 5030
      return;
    }
5031

5032 5033
    resetCtxOutputBuf(pRuntimeEnv);
    resetTimeWindowInfo(pRuntimeEnv, &pRuntimeEnv->windowResInfo);
5034

H
Haojun Liao 已提交
5035
    SArray *group = GET_TABLEGROUP(pQInfo, 0);
5036 5037
    assert(taosArrayGetSize(group) == pQInfo->tableqinfoGroupInfo.numOfTables &&
           1 == taosArrayGetSize(pQInfo->tableqinfoGroupInfo.pGroupList));
5038

5039
    while (pQInfo->tableIndex < pQInfo->tableqinfoGroupInfo.numOfTables) {
H
Haojun Liao 已提交
5040
      if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
5041
        longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
5042
      }
5043

5044
      pQuery->current = taosArrayGetP(group, pQInfo->tableIndex);
5045
      if (!multiTableMultioutputHelper(pQInfo, pQInfo->tableIndex)) {
5046
        pQInfo->tableIndex++;
5047 5048
        continue;
      }
5049

H
hjxilinx 已提交
5050
      // TODO handle the limit offset problem
5051
      if (pQuery->numOfFilterCols == 0 && pQuery->limit.offset > 0) {
5052 5053
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
          pQInfo->tableIndex++;
5054 5055 5056
          continue;
        }
      }
5057

5058
      scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
5059
      skipResults(pRuntimeEnv);
5060

5061
      // the limitation of output result is reached, set the query completed
5062
      if (limitResults(pRuntimeEnv)) {
H
Haojun Liao 已提交
5063
        SET_STABLE_QUERY_OVER(pQInfo);
5064 5065
        break;
      }
5066

5067 5068
      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
5069

5070
      if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
5071 5072 5073 5074 5075 5076
        /*
         * query range is identical in terms of all meters involved in query,
         * so we need to restore them at the *beginning* of query on each meter,
         * not the consecutive query on meter on which is aborted due to buffer limitation
         * to ensure that, we can reset the query range once query on a meter is completed.
         */
5077
        pQInfo->tableIndex++;
weixin_48148422's avatar
weixin_48148422 已提交
5078

H
Haojun Liao 已提交
5079
        STableIdInfo tidInfo = {0};
5080

H
Haojun Liao 已提交
5081 5082 5083
        STableId* id = TSDB_TABLEID(pQuery->current->pTable);
        tidInfo.uid = id->uid;
        tidInfo.tid = id->tid;
weixin_48148422's avatar
weixin_48148422 已提交
5084
        tidInfo.key = pQuery->current->lastKey;
weixin_48148422's avatar
weixin_48148422 已提交
5085 5086
        taosArrayPush(pQInfo->arrTableIdInfo, &tidInfo);

5087
        // if the buffer is full or group by each table, we need to jump out of the loop
H
Haojun Liao 已提交
5088
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
5089 5090
          break;
        }
5091

H
Haojun Liao 已提交
5092 5093 5094 5095
        if (pRuntimeEnv->pTSBuf != NULL) {
          pRuntimeEnv->cur = pRuntimeEnv->pTSBuf->cur;
        }

5096
      } else {
5097
        // all data in the result buffer are skipped due to the offset, continue to retrieve data from current meter
5098 5099
        if (pQuery->rec.rows == 0) {
          assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
5100 5101
          continue;
        } else {
5102 5103 5104
          // buffer is full, wait for the next round to retrieve data from current meter
          assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
          break;
5105 5106 5107
        }
      }
    }
H
Haojun Liao 已提交
5108

5109
    if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
H
Haojun Liao 已提交
5110 5111
      setQueryStatus(pQuery, QUERY_COMPLETED);
    }
5112
  }
5113

5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125
  /*
   * 1. super table projection query, group-by on normal columns query, ts-comp query
   * 2. point interpolation query, last row query
   *
   * group-by on normal columns query and last_row query do NOT invoke the finalizer here,
   * since the finalize stage will be done at the client side.
   *
   * projection query, point interpolation query do not need the finalizer.
   *
   * Only the ts-comp query requires the finalizer function to be executed here.
   */
  if (isTSCompQuery(pQuery)) {
H
hjxilinx 已提交
5126
    finalizeQueryResult(pRuntimeEnv);
5127
  }
5128

5129 5130 5131
  if (pRuntimeEnv->pTSBuf != NULL) {
    pRuntimeEnv->cur = pRuntimeEnv->pTSBuf->cur;
  }
5132

5133
  qDebug(
S
TD-1530  
Shengliang Guan 已提交
5134 5135
      "QInfo %p numOfTables:%" PRIu64 ", index:%d, numOfGroups:%" PRIzu ", %" PRId64 " points returned, total:%" PRId64 ", offset:%" PRId64,
      pQInfo, (uint64_t)pQInfo->tableqinfoGroupInfo.numOfTables, pQInfo->tableIndex, numOfGroups, pQuery->rec.rows, pQuery->rec.total,
5136
      pQuery->limit.offset);
5137 5138
}

5139 5140 5141 5142
static void doSaveContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

5143 5144 5145
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
  SWITCH_ORDER(pQuery->order.order);
5146

5147
  if (pRuntimeEnv->pTSBuf != NULL) {
H
Haojun Liao 已提交
5148
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
5149
  }
5150

5151 5152 5153 5154 5155
  STsdbQueryCond cond = {
      .order   = pQuery->order.order,
      .colList = pQuery->colList,
      .numOfCols = pQuery->numOfCols,
  };
H
Haojun Liao 已提交
5156

S
TD-1057  
Shengliang Guan 已提交
5157 5158
  TIME_WINDOW_COPY(cond.twindow, pQuery->window);

5159 5160 5161 5162
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
H
Haojun Liao 已提交
5163

H
Haojun Liao 已提交
5164 5165 5166 5167 5168
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
  disableFuncInReverseScan(pQInfo);
  setupQueryRangeForReverseScan(pQInfo);

H
Haojun Liao 已提交
5169
  pRuntimeEnv->prevGroupId = INT32_MIN;
H
Haojun Liao 已提交
5170
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef);
B
Bomin Zhang 已提交
5171 5172 5173
  if (pRuntimeEnv->pSecQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
H
hjxilinx 已提交
5174 5175
}

5176 5177 5178 5179
static void doRestoreContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

H
hjxilinx 已提交
5180
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
5181

5182
  if (pRuntimeEnv->pTSBuf != NULL) {
5183
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
5184
  }
5185

5186
  switchCtxOrder(pRuntimeEnv);
5187 5188 5189
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
}

5190 5191 5192
static void doCloseAllTimeWindowAfterScan(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
Haojun Liao 已提交
5193
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
H
Haojun Liao 已提交
5194
    size_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
5195
    for (int32_t i = 0; i < numOfGroup; ++i) {
H
Haojun Liao 已提交
5196
      SArray *group = GET_TABLEGROUP(pQInfo, i);
5197

5198
      size_t num = taosArrayGetSize(group);
5199
      for (int32_t j = 0; j < num; ++j) {
5200 5201
        STableQueryInfo* item = taosArrayGetP(group, j);
        closeAllTimeWindow(&item->windowResInfo);
5202
      }
H
hjxilinx 已提交
5203 5204 5205 5206 5207 5208 5209
    }
  } else {  // close results for group result
    closeAllTimeWindow(&pQInfo->runtimeEnv.windowResInfo);
  }
}

static void multiTableQueryProcess(SQInfo *pQInfo) {
5210 5211 5212
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

5213
  if (pQInfo->groupIndex > 0) {
5214
    /*
5215
     * if the groupIndex > 0, the query process must be completed yet, we only need to
5216 5217
     * copy the data into output buffer
     */
H
Haojun Liao 已提交
5218
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
5219 5220
      copyResToQueryResultBuf(pQInfo, pQuery);
#ifdef _DEBUG_VIEW
5221
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
5222 5223
#endif
    } else {
5224
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5225
    }
5226

5227
    qDebug("QInfo:%p current:%"PRId64", total:%"PRId64"", pQInfo, pQuery->rec.rows, pQuery->rec.total);
5228 5229
    return;
  }
5230

5231
  qDebug("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", order:%d, forward scan start", pQInfo,
5232 5233
         pQuery->window.skey, pQuery->window.ekey, pQuery->order.order);

H
hjxilinx 已提交
5234
  // do check all qualified data blocks
H
Haojun Liao 已提交
5235
  int64_t el = scanMultiTableDataBlocks(pQInfo);
5236
  qDebug("QInfo:%p master scan completed, elapsed time: %" PRId64 "ms, reverse scan start", pQInfo, el);
5237

H
hjxilinx 已提交
5238
  // query error occurred or query is killed, abort current execution
H
Haojun Liao 已提交
5239
  if (pQInfo->code != TSDB_CODE_SUCCESS || IS_QUERY_KILLED(pQInfo)) {
5240
    qDebug("QInfo:%p query killed or error occurred, code:%s, abort", pQInfo, tstrerror(pQInfo->code));
H
Haojun Liao 已提交
5241
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
5242
  }
5243

H
hjxilinx 已提交
5244 5245
  // close all time window results
  doCloseAllTimeWindowAfterScan(pQInfo);
5246

H
hjxilinx 已提交
5247 5248
  if (needReverseScan(pQuery)) {
    doSaveContext(pQInfo);
5249

H
Haojun Liao 已提交
5250
    el = scanMultiTableDataBlocks(pQInfo);
5251
    qDebug("QInfo:%p reversed scan completed, elapsed time: %" PRId64 "ms", pQInfo, el);
5252

H
Haojun Liao 已提交
5253
    doRestoreContext(pQInfo);
H
hjxilinx 已提交
5254
  } else {
5255
    qDebug("QInfo:%p no need to do reversed scan, query completed", pQInfo);
5256
  }
5257

5258
  setQueryStatus(pQuery, QUERY_COMPLETED);
5259

H
Haojun Liao 已提交
5260
  if (pQInfo->code != TSDB_CODE_SUCCESS || IS_QUERY_KILLED(pQInfo)) {
5261
    qDebug("QInfo:%p query killed or error occurred, code:%s, abort", pQInfo, tstrerror(pQInfo->code));
H
Haojun Liao 已提交
5262
    //TODO finalizeQueryResult may cause SEGSEV, since the memory may not allocated yet, add a cleanup function instead
H
Haojun Liao 已提交
5263
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
H
hjxilinx 已提交
5264
  }
5265

H
Haojun Liao 已提交
5266
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || isSumAvgRateQuery(pQuery)) {
5267
    if (mergeIntoGroupResult(pQInfo) == TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
5268
      copyResToQueryResultBuf(pQInfo, pQuery);
5269 5270

#ifdef _DEBUG_VIEW
5271
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
5272 5273 5274
#endif
    }
  } else {  // not a interval query
5275
    copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5276
  }
5277

5278
  // handle the limitation of output buffer
5279
  qDebug("QInfo:%p points returned:%" PRId64 ", total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
5280 5281 5282 5283 5284 5285 5286 5287
}

/*
 * in each query, this function will be called only once, no retry for further result.
 *
 * select count(*)/top(field,k)/avg(field name) from table_name [where ts>now-1a];
 * select count(*) from table_name group by status_column;
 */
H
hjxilinx 已提交
5288
static void tableFixedOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
5289
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
5290

H
hjxilinx 已提交
5291
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
5292
  if (!pRuntimeEnv->topBotQuery && pQuery->limit.offset > 0) {  // no need to execute, since the output will be ignore.
H
Haojun Liao 已提交
5293 5294
    return;
  }
5295

H
hjxilinx 已提交
5296
  pQuery->current = pTableInfo;  // set current query table info
5297

5298
  scanOneTableDataBlocks(pRuntimeEnv, pTableInfo->lastKey);
H
hjxilinx 已提交
5299
  finalizeQueryResult(pRuntimeEnv);
5300

H
Haojun Liao 已提交
5301
  if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
5302
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
5303
  }
5304

H
Haojun Liao 已提交
5305
  // since the numOfRows must be identical for all sql functions that are allowed to be executed simutaneously.
5306
  pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
5307

5308
  skipResults(pRuntimeEnv);
5309
  limitResults(pRuntimeEnv);
5310 5311
}

H
hjxilinx 已提交
5312
static void tableMultiOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
5313
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
5314

H
hjxilinx 已提交
5315 5316
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
5317

5318 5319 5320 5321
  // for ts_comp query, re-initialized is not allowed
  if (!isTSCompQuery(pQuery)) {
    resetCtxOutputBuf(pRuntimeEnv);
  }
5322

5323 5324 5325 5326 5327 5328
  // skip blocks without load the actual data block from file if no filter condition present
  skipBlocks(&pQInfo->runtimeEnv);
  if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return;
  }
5329 5330

  while (1) {
5331
    scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
H
hjxilinx 已提交
5332
    finalizeQueryResult(pRuntimeEnv);
5333

5334 5335
    pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
    if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols > 0 && pQuery->rec.rows > 0) {
5336
      skipResults(pRuntimeEnv);
5337 5338 5339
    }

    /*
H
hjxilinx 已提交
5340 5341
     * 1. if pQuery->size == 0, pQuery->limit.offset >= 0, still need to check data
     * 2. if pQuery->size > 0, pQuery->limit.offset must be 0
5342
     */
5343
    if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
5344 5345 5346
      break;
    }

5347
    qDebug("QInfo:%p skip current result, offset:%" PRId64 ", next qrange:%" PRId64 "-%" PRId64,
B
Bomin Zhang 已提交
5348
           pQInfo, pQuery->limit.offset, pQuery->current->lastKey, pQuery->current->win.ekey);
5349 5350 5351 5352

    resetCtxOutputBuf(pRuntimeEnv);
  }

5353
  limitResults(pRuntimeEnv);
5354
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
5355
    qDebug("QInfo:%p query paused due to output limitation, next qrange:%" PRId64 "-%" PRId64, pQInfo,
H
hjxilinx 已提交
5356
        pQuery->current->lastKey, pQuery->window.ekey);
weixin_48148422's avatar
weixin_48148422 已提交
5357 5358
  } else if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
    STableIdInfo tidInfo;
B
Bomin Zhang 已提交
5359
    STableId* id = TSDB_TABLEID(pQuery->current->pTable);
5360

H
Haojun Liao 已提交
5361 5362
    tidInfo.uid = id->uid;
    tidInfo.tid = id->tid;
weixin_48148422's avatar
weixin_48148422 已提交
5363 5364
    tidInfo.key = pQuery->current->lastKey;
    taosArrayPush(pQInfo->arrTableIdInfo, &tidInfo);
5365 5366
  }

5367 5368 5369
  if (!isTSCompQuery(pQuery)) {
    assert(pQuery->rec.rows <= pQuery->rec.capacity);
  }
5370 5371
}

H
Haojun Liao 已提交
5372
static void tableIntervalProcessImpl(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
5373
  SQuery *pQuery = pRuntimeEnv->pQuery;
5374

5375
  while (1) {
5376
    scanOneTableDataBlocks(pRuntimeEnv, start);
5377

5378
    assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_NOT_COMPLETED));
H
hjxilinx 已提交
5379
    finalizeQueryResult(pRuntimeEnv);
5380

5381 5382 5383
    // here we can ignore the records in case of no interpolation
    // todo handle offset, in case of top/bottom interval query
    if ((pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL) && pQuery->limit.offset > 0 &&
5384
        pQuery->fillType == TSDB_FILL_NONE) {
5385 5386
      // maxOutput <= 0, means current query does not generate any results
      int32_t numOfClosed = numOfClosedTimeWindow(&pRuntimeEnv->windowResInfo);
5387

S
TD-1057  
Shengliang Guan 已提交
5388
      int32_t c = (int32_t)(MIN(numOfClosed, pQuery->limit.offset));
5389 5390 5391
      clearFirstNTimeWindow(pRuntimeEnv, c);
      pQuery->limit.offset -= c;
    }
5392

5393
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED | QUERY_RESBUF_FULL)) {
5394 5395 5396 5397 5398
      break;
    }
  }
}

5399
// handle time interval query on table
H
hjxilinx 已提交
5400
static void tableIntervalProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
5401 5402
  SQueryRuntimeEnv *pRuntimeEnv = &(pQInfo->runtimeEnv);

H
hjxilinx 已提交
5403 5404
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
5405

H
Haojun Liao 已提交
5406
  int32_t numOfFilled = 0;
H
Haojun Liao 已提交
5407
  TSKEY newStartKey = TSKEY_INITIAL_VAL;
5408

5409
  // skip blocks without load the actual data block from file if no filter condition present
H
Haojun Liao 已提交
5410
  skipTimeInterval(pRuntimeEnv, &newStartKey);
5411
  if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0 && pRuntimeEnv->pFillInfo == NULL) {
5412 5413 5414 5415
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return;
  }

5416
  while (1) {
H
Haojun Liao 已提交
5417
    tableIntervalProcessImpl(pRuntimeEnv, newStartKey);
5418

H
Haojun Liao 已提交
5419
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
5420
      pQInfo->groupIndex = 0;  // always start from 0
5421
      pQuery->rec.rows = 0;
5422
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5423

5424
      clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
5425
    }
5426

5427
    // the offset is handled at prepare stage if no interpolation involved
5428
    if (pQuery->fillType == TSDB_FILL_NONE || pQuery->rec.rows == 0) {
5429
      limitResults(pRuntimeEnv);
5430 5431
      break;
    } else {
S
TD-1057  
Shengliang Guan 已提交
5432
      taosFillSetStartInfo(pRuntimeEnv->pFillInfo, (int32_t)pQuery->rec.rows, pQuery->window.ekey);
5433
      taosFillCopyInputDataFromFilePage(pRuntimeEnv->pFillInfo, (tFilePage**) pQuery->sdata);
H
Haojun Liao 已提交
5434
      numOfFilled = 0;
5435

H
Haojun Liao 已提交
5436
      pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, &numOfFilled);
5437
      if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
5438
        limitResults(pRuntimeEnv);
5439 5440
        break;
      }
5441

5442
      // no result generated yet, continue retrieve data
5443
      pQuery->rec.rows = 0;
5444 5445
    }
  }
5446

5447
  // all data scanned, the group by normal column can return
H
Haojun Liao 已提交
5448
  if (pRuntimeEnv->groupbyNormalCol) {  // todo refactor with merge interval time result
5449
    pQInfo->groupIndex = 0;
5450
    pQuery->rec.rows = 0;
5451
    copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5452
    clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
5453 5454 5455
  }
}

5456 5457 5458 5459
static void tableQueryImpl(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

H
Haojun Liao 已提交
5460
  if (queryHasRemainResForTableQuery(pRuntimeEnv)) {
5461

H
Haojun Liao 已提交
5462 5463 5464 5465 5466 5467 5468 5469 5470 5471 5472 5473
    if (pQuery->fillType != TSDB_FILL_NONE) {
      /*
       * There are remain results that are not returned due to result interpolation
       * So, we do keep in this procedure instead of launching retrieve procedure for next results.
       */
      int32_t numOfFilled = 0;
      pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, &numOfFilled);

      if (pQuery->rec.rows > 0) {
        limitResults(pRuntimeEnv);
      }

H
Haojun Liao 已提交
5474
      qDebug("QInfo:%p current:%" PRId64 " returned, total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total);
5475
      return;
H
Haojun Liao 已提交
5476
    } else {
5477
      pQuery->rec.rows = 0;
5478
      pQInfo->groupIndex = 0;  // always start from 0
5479

5480
      if (pRuntimeEnv->windowResInfo.size > 0) {
5481
        copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5482
        clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
5483

5484
        if (pQuery->rec.rows > 0) {
5485
          qDebug("QInfo:%p %"PRId64" rows returned from group results, total:%"PRId64"", pQInfo, pQuery->rec.rows, pQuery->rec.total);
H
Haojun Liao 已提交
5486 5487 5488

          // there are not data remains
          if (pRuntimeEnv->windowResInfo.size <= 0) {
H
Haojun Liao 已提交
5489
            qDebug("QInfo:%p query over, %"PRId64" rows are returned", pQInfo, pQuery->rec.total);
H
Haojun Liao 已提交
5490 5491
          }

5492 5493 5494 5495 5496
          return;
        }
      }
    }
  }
5497

H
hjxilinx 已提交
5498
  // number of points returned during this query
5499
  pQuery->rec.rows = 0;
5500
  int64_t st = taosGetTimestampUs();
5501

5502
  assert(pQInfo->tableqinfoGroupInfo.numOfTables == 1);
H
Haojun Liao 已提交
5503
  SArray* g = GET_TABLEGROUP(pQInfo, 0);
5504
  STableQueryInfo* item = taosArrayGetP(g, 0);
5505

5506
  // group by normal column, sliding window query, interval query are handled by interval query processor
H
Haojun Liao 已提交
5507
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyNormalCol) {  // interval (down sampling operation)
5508
    tableIntervalProcess(pQInfo, item);
H
Haojun Liao 已提交
5509
  } else if (isFixedOutputQuery(pRuntimeEnv)) {
5510
    tableFixedOutputProcess(pQInfo, item);
5511 5512
  } else {  // diff/add/multiply/subtract/division
    assert(pQuery->checkBuffer == 1);
5513
    tableMultiOutputProcess(pQInfo, item);
5514
  }
5515

5516
  // record the total elapsed time
5517
  pRuntimeEnv->summary.elapsedTime += (taosGetTimestampUs() - st);
5518
  assert(pQInfo->tableqinfoGroupInfo.numOfTables == 1);
5519 5520
}

5521
static void stableQueryImpl(SQInfo *pQInfo) {
H
Haojun Liao 已提交
5522 5523
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *pQuery = pRuntimeEnv->pQuery;
5524
  pQuery->rec.rows = 0;
5525

5526
  int64_t st = taosGetTimestampUs();
5527

H
Haojun Liao 已提交
5528
  if (QUERY_IS_INTERVAL_QUERY(pQuery) ||
5529
      (isFixedOutputQuery(pRuntimeEnv) && (!isPointInterpoQuery(pQuery)) && (!pRuntimeEnv->groupbyNormalCol))) {
H
hjxilinx 已提交
5530
    multiTableQueryProcess(pQInfo);
5531
  } else {
5532
    assert((pQuery->checkBuffer == 1 && pQuery->interval.interval == 0) || isPointInterpoQuery(pQuery) ||
H
Haojun Liao 已提交
5533
            isFirstLastRowQuery(pQuery) || pRuntimeEnv->groupbyNormalCol);
5534

5535
    sequentialTableProcess(pQInfo);
5536
  }
5537

H
hjxilinx 已提交
5538
  // record the total elapsed time
5539
  pQInfo->runtimeEnv.summary.elapsedTime += (taosGetTimestampUs() - st);
H
hjxilinx 已提交
5540 5541
}

5542
static int32_t getColumnIndexInSource(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
5543
  int32_t j = 0;
5544

5545
  if (TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) {
H
Haojun Liao 已提交
5546
    if (pExprMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
5547
      return TSDB_TBNAME_COLUMN_INDEX;
H
Haojun Liao 已提交
5548 5549
    }

5550 5551 5552 5553
    while(j < pQueryMsg->numOfTags) {
      if (pExprMsg->colInfo.colId == pTagCols[j].colId) {
        return j;
      }
5554

5555 5556
      j += 1;
    }
5557

Y
TD-1230  
yihaoDeng 已提交
5558
  } else if (TSDB_COL_IS_UD_COL(pExprMsg->colInfo.flag)) {  // user specified column data
H
Haojun Liao 已提交
5559
    return TSDB_UD_COLUMN_INDEX;
5560 5561 5562 5563 5564
  } else {
    while (j < pQueryMsg->numOfCols) {
      if (pExprMsg->colInfo.colId == pQueryMsg->colList[j].colId) {
        return j;
      }
5565

5566
      j += 1;
5567 5568
    }
  }
5569
  assert(0);
5570
  return -1;
5571 5572
}

5573 5574 5575
bool validateExprColumnInfo(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
  int32_t j = getColumnIndexInSource(pQueryMsg, pExprMsg, pTagCols);
  return j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags;
5576 5577
}

5578
static bool validateQueryMsg(SQueryTableMsg *pQueryMsg) {
5579 5580
  if (pQueryMsg->interval.interval < 0) {
    qError("qmsg:%p illegal value of interval time %" PRId64, pQueryMsg, pQueryMsg->interval.interval);
5581
    return false;
5582 5583
  }

H
hjxilinx 已提交
5584
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
5585
    qError("qmsg:%p illegal value of numOfTables %d", pQueryMsg, pQueryMsg->numOfTables);
5586
    return false;
5587 5588
  }

H
hjxilinx 已提交
5589
  if (pQueryMsg->numOfGroupCols < 0) {
S
slguan 已提交
5590
    qError("qmsg:%p illegal value of numOfGroupbyCols %d", pQueryMsg, pQueryMsg->numOfGroupCols);
5591
    return false;
5592 5593
  }

5594 5595
  if (pQueryMsg->numOfOutput > TSDB_MAX_COLUMNS || pQueryMsg->numOfOutput <= 0) {
    qError("qmsg:%p illegal value of output columns %d", pQueryMsg, pQueryMsg->numOfOutput);
5596
    return false;
5597 5598
  }

5599 5600 5601 5602 5603 5604 5605 5606 5607 5608
  return true;
}

static bool validateQuerySourceCols(SQueryTableMsg *pQueryMsg, SSqlFuncMsg** pExprMsg) {
  int32_t numOfTotal = pQueryMsg->numOfCols + pQueryMsg->numOfTags;
  if (pQueryMsg->numOfCols < 0 || pQueryMsg->numOfTags < 0 || numOfTotal > TSDB_MAX_COLUMNS) {
    qError("qmsg:%p illegal value of numOfCols %d numOfTags:%d", pQueryMsg, pQueryMsg->numOfCols, pQueryMsg->numOfTags);
    return false;
  } else if (numOfTotal == 0) {
    for(int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
H
Haojun Liao 已提交
5609 5610 5611 5612 5613
      SSqlFuncMsg* pFuncMsg = pExprMsg[i];

      if ((pFuncMsg->functionId == TSDB_FUNC_TAGPRJ) ||
          (pFuncMsg->functionId == TSDB_FUNC_TID_TAG && pFuncMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) ||
          (pFuncMsg->functionId == TSDB_FUNC_COUNT && pFuncMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX)) {
5614
        continue;
5615
      }
5616

5617
      return false;
5618 5619
    }
  }
5620

5621
  return true;
5622 5623
}

5624
static char *createTableIdList(SQueryTableMsg *pQueryMsg, char *pMsg, SArray **pTableIdList) {
H
hjxilinx 已提交
5625
  assert(pQueryMsg->numOfTables > 0);
5626

weixin_48148422's avatar
weixin_48148422 已提交
5627
  *pTableIdList = taosArrayInit(pQueryMsg->numOfTables, sizeof(STableIdInfo));
5628

weixin_48148422's avatar
weixin_48148422 已提交
5629 5630
  for (int32_t j = 0; j < pQueryMsg->numOfTables; ++j) {
    STableIdInfo* pTableIdInfo = (STableIdInfo *)pMsg;
5631

5632
    pTableIdInfo->tid = htonl(pTableIdInfo->tid);
H
hjxilinx 已提交
5633 5634
    pTableIdInfo->uid = htobe64(pTableIdInfo->uid);
    pTableIdInfo->key = htobe64(pTableIdInfo->key);
5635

H
hjxilinx 已提交
5636 5637 5638
    taosArrayPush(*pTableIdList, pTableIdInfo);
    pMsg += sizeof(STableIdInfo);
  }
5639

H
hjxilinx 已提交
5640 5641
  return pMsg;
}
5642

5643
/**
H
hjxilinx 已提交
5644
 * pQueryMsg->head has been converted before this function is called.
5645
 *
H
hjxilinx 已提交
5646
 * @param pQueryMsg
5647 5648 5649 5650
 * @param pTableIdList
 * @param pExpr
 * @return
 */
5651
static int32_t convertQueryMsg(SQueryTableMsg *pQueryMsg, SArray **pTableIdList, SSqlFuncMsg ***pExpr,
weixin_48148422's avatar
weixin_48148422 已提交
5652
                               char **tagCond, char** tbnameCond, SColIndex **groupbyCols, SColumnInfo** tagCols) {
5653 5654
  int32_t code = TSDB_CODE_SUCCESS;

5655 5656 5657 5658
  pQueryMsg->numOfTables = htonl(pQueryMsg->numOfTables);

  pQueryMsg->window.skey = htobe64(pQueryMsg->window.skey);
  pQueryMsg->window.ekey = htobe64(pQueryMsg->window.ekey);
5659 5660 5661 5662 5663 5664
  pQueryMsg->interval.interval = htobe64(pQueryMsg->interval.interval);
  pQueryMsg->interval.sliding = htobe64(pQueryMsg->interval.sliding);
  pQueryMsg->interval.offset = htobe64(pQueryMsg->interval.offset);
  pQueryMsg->interval.intervalUnit = pQueryMsg->interval.intervalUnit;
  pQueryMsg->interval.slidingUnit = pQueryMsg->interval.slidingUnit;
  pQueryMsg->interval.offsetUnit = pQueryMsg->interval.offsetUnit;
5665 5666
  pQueryMsg->limit = htobe64(pQueryMsg->limit);
  pQueryMsg->offset = htobe64(pQueryMsg->offset);
H
hjxilinx 已提交
5667

5668 5669
  pQueryMsg->order = htons(pQueryMsg->order);
  pQueryMsg->orderColId = htons(pQueryMsg->orderColId);
H
Haojun Liao 已提交
5670
  pQueryMsg->queryType = htonl(pQueryMsg->queryType);
weixin_48148422's avatar
weixin_48148422 已提交
5671
  pQueryMsg->tagNameRelType = htons(pQueryMsg->tagNameRelType);
5672 5673

  pQueryMsg->numOfCols = htons(pQueryMsg->numOfCols);
5674
  pQueryMsg->numOfOutput = htons(pQueryMsg->numOfOutput);
H
hjxilinx 已提交
5675
  pQueryMsg->numOfGroupCols = htons(pQueryMsg->numOfGroupCols);
5676 5677 5678
  pQueryMsg->tagCondLen = htons(pQueryMsg->tagCondLen);
  pQueryMsg->tsOffset = htonl(pQueryMsg->tsOffset);
  pQueryMsg->tsLen = htonl(pQueryMsg->tsLen);
H
hjxilinx 已提交
5679
  pQueryMsg->tsNumOfBlocks = htonl(pQueryMsg->tsNumOfBlocks);
5680
  pQueryMsg->tsOrder = htonl(pQueryMsg->tsOrder);
5681
  pQueryMsg->numOfTags = htonl(pQueryMsg->numOfTags);
5682

5683
  // query msg safety check
5684
  if (!validateQueryMsg(pQueryMsg)) {
5685 5686
    code = TSDB_CODE_QRY_INVALID_MSG;
    goto _cleanup;
5687 5688
  }

H
hjxilinx 已提交
5689 5690
  char *pMsg = (char *)(pQueryMsg->colList) + sizeof(SColumnInfo) * pQueryMsg->numOfCols;
  for (int32_t col = 0; col < pQueryMsg->numOfCols; ++col) {
5691 5692
    SColumnInfo *pColInfo = &pQueryMsg->colList[col];

H
hjxilinx 已提交
5693
    pColInfo->colId = htons(pColInfo->colId);
5694
    pColInfo->type = htons(pColInfo->type);
H
hjxilinx 已提交
5695 5696
    pColInfo->bytes = htons(pColInfo->bytes);
    pColInfo->numOfFilters = htons(pColInfo->numOfFilters);
5697

H
hjxilinx 已提交
5698
    assert(pColInfo->type >= TSDB_DATA_TYPE_BOOL && pColInfo->type <= TSDB_DATA_TYPE_NCHAR);
5699

H
hjxilinx 已提交
5700
    int32_t numOfFilters = pColInfo->numOfFilters;
5701
    if (numOfFilters > 0) {
H
hjxilinx 已提交
5702
      pColInfo->filters = calloc(numOfFilters, sizeof(SColumnFilterInfo));
H
Haojun Liao 已提交
5703 5704 5705 5706
      if (pColInfo->filters == NULL) {
        code = TSDB_CODE_QRY_OUT_OF_MEMORY;
        goto _cleanup;
      }
5707 5708 5709
    }

    for (int32_t f = 0; f < numOfFilters; ++f) {
5710
      SColumnFilterInfo *pFilterMsg = (SColumnFilterInfo *)pMsg;
5711

5712 5713
      SColumnFilterInfo *pColFilter = &pColInfo->filters[f];
      pColFilter->filterstr = htons(pFilterMsg->filterstr);
5714 5715 5716

      pMsg += sizeof(SColumnFilterInfo);

5717 5718
      if (pColFilter->filterstr) {
        pColFilter->len = htobe64(pFilterMsg->len);
5719

5720
        pColFilter->pz = (int64_t)calloc(1, (size_t)(pColFilter->len + 1 * TSDB_NCHAR_SIZE)); // note: null-terminator
H
Haojun Liao 已提交
5721 5722 5723 5724 5725
        if (pColFilter->pz == 0) {
          code = TSDB_CODE_QRY_OUT_OF_MEMORY;
          goto _cleanup;
        }

5726
        memcpy((void *)pColFilter->pz, pMsg, (size_t)pColFilter->len);
5727
        pMsg += (pColFilter->len + 1);
5728
      } else {
5729 5730
        pColFilter->lowerBndi = htobe64(pFilterMsg->lowerBndi);
        pColFilter->upperBndi = htobe64(pFilterMsg->upperBndi);
5731 5732
      }

5733 5734
      pColFilter->lowerRelOptr = htons(pFilterMsg->lowerRelOptr);
      pColFilter->upperRelOptr = htons(pFilterMsg->upperRelOptr);
5735 5736 5737
    }
  }

5738
  *pExpr = calloc(pQueryMsg->numOfOutput, POINTER_BYTES);
H
Haojun Liao 已提交
5739 5740 5741 5742 5743
  if (*pExpr == NULL) {
    code = TSDB_CODE_QRY_OUT_OF_MEMORY;
    goto _cleanup;
  }

5744
  SSqlFuncMsg *pExprMsg = (SSqlFuncMsg *)pMsg;
5745

5746
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5747
    (*pExpr)[i] = pExprMsg;
5748

5749
    pExprMsg->colInfo.colIndex = htons(pExprMsg->colInfo.colIndex);
5750 5751 5752 5753
    pExprMsg->colInfo.colId = htons(pExprMsg->colInfo.colId);
    pExprMsg->colInfo.flag = htons(pExprMsg->colInfo.flag);
    pExprMsg->functionId = htons(pExprMsg->functionId);
    pExprMsg->numOfParams = htons(pExprMsg->numOfParams);
5754

5755
    pMsg += sizeof(SSqlFuncMsg);
5756 5757

    for (int32_t j = 0; j < pExprMsg->numOfParams; ++j) {
5758
      pExprMsg->arg[j].argType = htons(pExprMsg->arg[j].argType);
5759 5760 5761 5762
      pExprMsg->arg[j].argBytes = htons(pExprMsg->arg[j].argBytes);

      if (pExprMsg->arg[j].argType == TSDB_DATA_TYPE_BINARY) {
        pExprMsg->arg[j].argValue.pz = pMsg;
5763
        pMsg += pExprMsg->arg[j].argBytes;  // one more for the string terminated char.
5764 5765 5766 5767 5768
      } else {
        pExprMsg->arg[j].argValue.i64 = htobe64(pExprMsg->arg[j].argValue.i64);
      }
    }

H
Haojun Liao 已提交
5769 5770
    int16_t functionId = pExprMsg->functionId;
    if (functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ || functionId == TSDB_FUNC_TAG_DUMMY) {
Y
TD-1230  
yihaoDeng 已提交
5771
      if (!TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) {  // ignore the column  index check for arithmetic expression.
5772 5773
        code = TSDB_CODE_QRY_INVALID_MSG;
        goto _cleanup;
5774 5775
      }
    } else {
5776
//      if (!validateExprColumnInfo(pQueryMsg, pExprMsg)) {
5777
//        return TSDB_CODE_QRY_INVALID_MSG;
5778
//      }
5779 5780
    }

5781
    pExprMsg = (SSqlFuncMsg *)pMsg;
5782
  }
5783

5784
  if (!validateQuerySourceCols(pQueryMsg, *pExpr)) {
5785
    code = TSDB_CODE_QRY_INVALID_MSG;
dengyihao's avatar
dengyihao 已提交
5786
    goto _cleanup;
5787
  }
5788

H
hjxilinx 已提交
5789
  pMsg = createTableIdList(pQueryMsg, pMsg, pTableIdList);
5790

H
hjxilinx 已提交
5791
  if (pQueryMsg->numOfGroupCols > 0) {  // group by tag columns
5792
    *groupbyCols = malloc(pQueryMsg->numOfGroupCols * sizeof(SColIndex));
5793 5794 5795 5796
    if (*groupbyCols == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }
5797 5798 5799

    for (int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
      (*groupbyCols)[i].colId = *(int16_t *)pMsg;
5800
      pMsg += sizeof((*groupbyCols)[i].colId);
5801 5802

      (*groupbyCols)[i].colIndex = *(int16_t *)pMsg;
5803 5804
      pMsg += sizeof((*groupbyCols)[i].colIndex);

5805
      (*groupbyCols)[i].flag = *(int16_t *)pMsg;
5806 5807 5808 5809 5810
      pMsg += sizeof((*groupbyCols)[i].flag);

      memcpy((*groupbyCols)[i].name, pMsg, tListLen(groupbyCols[i]->name));
      pMsg += tListLen((*groupbyCols)[i].name);
    }
5811

H
hjxilinx 已提交
5812 5813
    pQueryMsg->orderByIdx = htons(pQueryMsg->orderByIdx);
    pQueryMsg->orderType = htons(pQueryMsg->orderType);
5814 5815
  }

5816 5817
  pQueryMsg->fillType = htons(pQueryMsg->fillType);
  if (pQueryMsg->fillType != TSDB_FILL_NONE) {
5818
    pQueryMsg->fillVal = (uint64_t)(pMsg);
5819 5820

    int64_t *v = (int64_t *)pMsg;
5821
    for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5822 5823
      v[i] = htobe64(v[i]);
    }
5824

5825
    pMsg += sizeof(int64_t) * pQueryMsg->numOfOutput;
5826
  }
5827

5828 5829
  if (pQueryMsg->numOfTags > 0) {
    (*tagCols) = calloc(1, sizeof(SColumnInfo) * pQueryMsg->numOfTags);
H
Haojun Liao 已提交
5830 5831 5832 5833 5834
    if (*tagCols == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }

5835 5836
    for (int32_t i = 0; i < pQueryMsg->numOfTags; ++i) {
      SColumnInfo* pTagCol = (SColumnInfo*) pMsg;
5837

5838 5839 5840 5841
      pTagCol->colId = htons(pTagCol->colId);
      pTagCol->bytes = htons(pTagCol->bytes);
      pTagCol->type  = htons(pTagCol->type);
      pTagCol->numOfFilters = 0;
5842

5843
      (*tagCols)[i] = *pTagCol;
5844
      pMsg += sizeof(SColumnInfo);
5845
    }
H
hjxilinx 已提交
5846
  }
5847

5848 5849 5850
  // the tag query condition expression string is located at the end of query msg
  if (pQueryMsg->tagCondLen > 0) {
    *tagCond = calloc(1, pQueryMsg->tagCondLen);
H
Haojun Liao 已提交
5851 5852 5853 5854 5855 5856

    if (*tagCond == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;

    }
5857 5858 5859
    memcpy(*tagCond, pMsg, pQueryMsg->tagCondLen);
    pMsg += pQueryMsg->tagCondLen;
  }
5860

weixin_48148422's avatar
weixin_48148422 已提交
5861
  if (*pMsg != 0) {
5862
    size_t len = strlen(pMsg) + 1;
5863

5864
    *tbnameCond = malloc(len);
5865 5866 5867 5868 5869
    if (*tbnameCond == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }

weixin_48148422's avatar
weixin_48148422 已提交
5870
    strcpy(*tbnameCond, pMsg);
5871
    pMsg += len;
weixin_48148422's avatar
weixin_48148422 已提交
5872
  }
5873

5874
  qDebug("qmsg:%p query %d tables, type:%d, qrange:%" PRId64 "-%" PRId64 ", numOfGroupbyTagCols:%d, order:%d, "
H
Haojun Liao 已提交
5875 5876
         "outputCols:%d, numOfCols:%d, interval:%" PRId64 ", fillType:%d, comptsLen:%d, compNumOfBlocks:%d, limit:%" PRId64 ", offset:%" PRId64,
         pQueryMsg, pQueryMsg->numOfTables, pQueryMsg->queryType, pQueryMsg->window.skey, pQueryMsg->window.ekey, pQueryMsg->numOfGroupCols,
5877
         pQueryMsg->order, pQueryMsg->numOfOutput, pQueryMsg->numOfCols, pQueryMsg->interval.interval,
H
Haojun Liao 已提交
5878
         pQueryMsg->fillType, pQueryMsg->tsLen, pQueryMsg->tsNumOfBlocks, pQueryMsg->limit, pQueryMsg->offset);
5879 5880

  return TSDB_CODE_SUCCESS;
dengyihao's avatar
dengyihao 已提交
5881 5882

_cleanup:
S
TD-1848  
Shengliang Guan 已提交
5883
  tfree(*pExpr);
dengyihao's avatar
dengyihao 已提交
5884 5885
  taosArrayDestroy(*pTableIdList);
  *pTableIdList = NULL;
S
TD-1848  
Shengliang Guan 已提交
5886 5887 5888 5889
  tfree(*tbnameCond);
  tfree(*groupbyCols);
  tfree(*tagCols);
  tfree(*tagCond);
5890 5891

  return code;
5892 5893
}

H
hjxilinx 已提交
5894
static int32_t buildAirthmeticExprFromMsg(SExprInfo *pArithExprInfo, SQueryTableMsg *pQueryMsg) {
5895
  qDebug("qmsg:%p create arithmetic expr from binary string: %s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz);
weixin_48148422's avatar
weixin_48148422 已提交
5896 5897

  tExprNode* pExprNode = NULL;
dengyihao's avatar
dengyihao 已提交
5898
  TRY(TSDB_MAX_TAG_CONDITIONS) {
weixin_48148422's avatar
weixin_48148422 已提交
5899 5900 5901
    pExprNode = exprTreeFromBinary(pArithExprInfo->base.arg[0].argValue.pz, pArithExprInfo->base.arg[0].argBytes);
  } CATCH( code ) {
    CLEANUP_EXECUTE();
dengyihao's avatar
TD-816  
dengyihao 已提交
5902
    qError("qmsg:%p failed to create arithmetic expression string from:%s, reason: %s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz, tstrerror(code));
weixin_48148422's avatar
weixin_48148422 已提交
5903 5904 5905
    return code;
  } END_TRY

H
hjxilinx 已提交
5906
  if (pExprNode == NULL) {
5907
    qError("qmsg:%p failed to create arithmetic expression string from:%s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz);
5908
    return TSDB_CODE_QRY_APP_ERROR;
5909
  }
5910

5911
  pArithExprInfo->pExpr = pExprNode;
5912 5913 5914
  return TSDB_CODE_SUCCESS;
}

H
Haojun Liao 已提交
5915
static int32_t createQFunctionExprFromMsg(SQueryTableMsg *pQueryMsg, SExprInfo **pExprInfo, SSqlFuncMsg **pExprMsg,
5916 5917
    SColumnInfo* pTagCols) {
  *pExprInfo = NULL;
H
hjxilinx 已提交
5918
  int32_t code = TSDB_CODE_SUCCESS;
5919

H
Haojun Liao 已提交
5920
  SExprInfo *pExprs = (SExprInfo *)calloc(pQueryMsg->numOfOutput, sizeof(SExprInfo));
5921
  if (pExprs == NULL) {
5922
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
5923 5924 5925 5926 5927
  }

  bool    isSuperTable = QUERY_IS_STABLE_QUERY(pQueryMsg->queryType);
  int16_t tagLen = 0;

5928
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5929
    pExprs[i].base = *pExprMsg[i];
5930
    pExprs[i].bytes = 0;
5931 5932 5933 5934

    int16_t type = 0;
    int16_t bytes = 0;

5935
    // parse the arithmetic expression
5936
    if (pExprs[i].base.functionId == TSDB_FUNC_ARITHM) {
5937
      code = buildAirthmeticExprFromMsg(&pExprs[i], pQueryMsg);
5938

5939
      if (code != TSDB_CODE_SUCCESS) {
S
TD-1848  
Shengliang Guan 已提交
5940
        tfree(pExprs);
5941
        return code;
5942 5943
      }

5944
      type  = TSDB_DATA_TYPE_DOUBLE;
5945
      bytes = tDataTypeDesc[type].nSize;
H
Haojun Liao 已提交
5946
    } else if (pExprs[i].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX && pExprs[i].base.functionId == TSDB_FUNC_TAGPRJ) {  // parse the normal column
H
Haojun Liao 已提交
5947
      SSchema s = tGetTableNameColumnSchema();
H
Haojun Liao 已提交
5948
      type = s.type;
H
Haojun Liao 已提交
5949
      bytes = s.bytes;
5950 5951
    } else if (pExprs[i].base.colInfo.colId <= TSDB_UD_COLUMN_INDEX) {
      // it is a user-defined constant value column
H
Haojun Liao 已提交
5952 5953
      assert(pExprs[i].base.functionId == TSDB_FUNC_PRJ);

5954 5955
      type = pExprs[i].base.arg[1].argType;
      bytes = pExprs[i].base.arg[1].argBytes;
H
Haojun Liao 已提交
5956 5957 5958 5959 5960

      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        bytes += VARSTR_HEADER_SIZE;
      }
    } else {
5961
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
dengyihao's avatar
dengyihao 已提交
5962
      assert(j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags);
H
Haojun Liao 已提交
5963

dengyihao's avatar
dengyihao 已提交
5964
      if (pExprs[i].base.colInfo.colId != TSDB_TBNAME_COLUMN_INDEX && j >= 0) {
H
Haojun Liao 已提交
5965 5966 5967 5968
        SColumnInfo* pCol = (TSDB_COL_IS_TAG(pExprs[i].base.colInfo.flag))? &pTagCols[j]:&pQueryMsg->colList[j];
        type = pCol->type;
        bytes = pCol->bytes;
      } else {
H
Haojun Liao 已提交
5969
        SSchema s = tGetTableNameColumnSchema();
H
hjxilinx 已提交
5970

H
Haojun Liao 已提交
5971 5972 5973
        type  = s.type;
        bytes = s.bytes;
      }
5974 5975
    }

S
TD-1057  
Shengliang Guan 已提交
5976
    int32_t param = (int32_t)pExprs[i].base.arg[0].argValue.i64;
5977
    if (getResultDataInfo(type, bytes, pExprs[i].base.functionId, param, &pExprs[i].type, &pExprs[i].bytes,
5978
                          &pExprs[i].interBytes, 0, isSuperTable) != TSDB_CODE_SUCCESS) {
S
TD-1848  
Shengliang Guan 已提交
5979
      tfree(pExprs);
5980
      return TSDB_CODE_QRY_INVALID_MSG;
5981 5982
    }

5983
    if (pExprs[i].base.functionId == TSDB_FUNC_TAG_DUMMY || pExprs[i].base.functionId == TSDB_FUNC_TS_DUMMY) {
5984
      tagLen += pExprs[i].bytes;
5985
    }
5986
    assert(isValidDataType(pExprs[i].type));
5987 5988 5989
  }

  // TODO refactor
5990
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5991 5992
    pExprs[i].base = *pExprMsg[i];
    int16_t functId = pExprs[i].base.functionId;
5993

5994
    if (functId == TSDB_FUNC_TOP || functId == TSDB_FUNC_BOTTOM) {
5995
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
5996 5997 5998 5999 6000 6001 6002 6003 6004
      if (j < 0 || j >= pQueryMsg->numOfCols) {
        assert(0);
      } else {
        SColumnInfo *pCol = &pQueryMsg->colList[j];
        int32_t ret =
            getResultDataInfo(pCol->type, pCol->bytes, functId, (int32_t)pExprs[i].base.arg[0].argValue.i64,
                              &pExprs[i].type, &pExprs[i].bytes, &pExprs[i].interBytes, tagLen, isSuperTable);
        assert(ret == TSDB_CODE_SUCCESS);
      }
6005 6006 6007
    }
  }

6008
  *pExprInfo = pExprs;
6009 6010 6011
  return TSDB_CODE_SUCCESS;
}

6012
static SSqlGroupbyExpr *createGroupbyExprFromMsg(SQueryTableMsg *pQueryMsg, SColIndex *pColIndex, int32_t *code) {
6013 6014 6015 6016 6017
  if (pQueryMsg->numOfGroupCols == 0) {
    return NULL;
  }

  // using group by tag columns
6018
  SSqlGroupbyExpr *pGroupbyExpr = (SSqlGroupbyExpr *)calloc(1, sizeof(SSqlGroupbyExpr));
6019
  if (pGroupbyExpr == NULL) {
6020
    *code = TSDB_CODE_QRY_OUT_OF_MEMORY;
6021 6022 6023 6024 6025 6026 6027
    return NULL;
  }

  pGroupbyExpr->numOfGroupCols = pQueryMsg->numOfGroupCols;
  pGroupbyExpr->orderType = pQueryMsg->orderType;
  pGroupbyExpr->orderIndex = pQueryMsg->orderByIdx;

6028 6029 6030 6031
  pGroupbyExpr->columnInfo = taosArrayInit(pQueryMsg->numOfGroupCols, sizeof(SColIndex));
  for(int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
    taosArrayPush(pGroupbyExpr->columnInfo, &pColIndex[i]);
  }
6032

6033 6034 6035
  return pGroupbyExpr;
}

6036
static int32_t createFilterInfo(void *pQInfo, SQuery *pQuery) {
6037
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
6038
    if (pQuery->colList[i].numOfFilters > 0) {
6039 6040 6041 6042 6043 6044 6045 6046 6047
      pQuery->numOfFilterCols++;
    }
  }

  if (pQuery->numOfFilterCols == 0) {
    return TSDB_CODE_SUCCESS;
  }

  pQuery->pFilterInfo = calloc(1, sizeof(SSingleColumnFilterInfo) * pQuery->numOfFilterCols);
H
Haojun Liao 已提交
6048 6049 6050
  if (pQuery->pFilterInfo == NULL) {
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
  }
6051 6052

  for (int32_t i = 0, j = 0; i < pQuery->numOfCols; ++i) {
6053
    if (pQuery->colList[i].numOfFilters > 0) {
6054 6055
      SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[j];

B
Bomin Zhang 已提交
6056
      memcpy(&pFilterInfo->info, &pQuery->colList[i], sizeof(SColumnInfo));
6057
      pFilterInfo->info = pQuery->colList[i];
6058

6059
      pFilterInfo->numOfFilters = pQuery->colList[i].numOfFilters;
6060
      pFilterInfo->pFilters = calloc(pFilterInfo->numOfFilters, sizeof(SColumnFilterElem));
H
Haojun Liao 已提交
6061 6062 6063
      if (pFilterInfo->pFilters == NULL) {
        return TSDB_CODE_QRY_OUT_OF_MEMORY;
      }
6064 6065 6066

      for (int32_t f = 0; f < pFilterInfo->numOfFilters; ++f) {
        SColumnFilterElem *pSingleColFilter = &pFilterInfo->pFilters[f];
6067
        pSingleColFilter->filterInfo = pQuery->colList[i].filters[f];
6068 6069 6070 6071 6072

        int32_t lower = pSingleColFilter->filterInfo.lowerRelOptr;
        int32_t upper = pSingleColFilter->filterInfo.upperRelOptr;

        if (lower == TSDB_RELATION_INVALID && upper == TSDB_RELATION_INVALID) {
S
slguan 已提交
6073
          qError("QInfo:%p invalid filter info", pQInfo);
6074
          return TSDB_CODE_QRY_INVALID_MSG;
6075 6076
        }

6077 6078
        int16_t type  = pQuery->colList[i].type;
        int16_t bytes = pQuery->colList[i].bytes;
6079

6080 6081 6082
        // todo refactor
        __filter_func_t *rangeFilterArray = getRangeFilterFuncArray(type);
        __filter_func_t *filterArray = getValueFilterFuncArray(type);
6083 6084

        if (rangeFilterArray == NULL && filterArray == NULL) {
S
slguan 已提交
6085
          qError("QInfo:%p failed to get filter function, invalid data type:%d", pQInfo, type);
6086
          return TSDB_CODE_QRY_INVALID_MSG;
6087 6088
        }

6089
        if ((lower == TSDB_RELATION_GREATER_EQUAL || lower == TSDB_RELATION_GREATER) &&
6090
            (upper == TSDB_RELATION_LESS_EQUAL || upper == TSDB_RELATION_LESS)) {
dengyihao's avatar
dengyihao 已提交
6091
          assert(rangeFilterArray != NULL);
6092
          if (lower == TSDB_RELATION_GREATER_EQUAL) {
6093 6094 6095 6096 6097 6098 6099 6100 6101 6102 6103 6104 6105
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[4];
            } else {
              pSingleColFilter->fp = rangeFilterArray[2];
            }
          } else {
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[3];
            } else {
              pSingleColFilter->fp = rangeFilterArray[1];
            }
          }
        } else {  // set callback filter function
dengyihao's avatar
dengyihao 已提交
6106
          assert(filterArray != NULL);
6107 6108 6109 6110
          if (lower != TSDB_RELATION_INVALID) {
            pSingleColFilter->fp = filterArray[lower];

            if (upper != TSDB_RELATION_INVALID) {
dengyihao's avatar
dengyihao 已提交
6111
              qError("pQInfo:%p failed to get filter function, invalid filter condition: %d", pQInfo, type);
6112
              return TSDB_CODE_QRY_INVALID_MSG;
6113 6114 6115 6116 6117 6118 6119 6120 6121 6122 6123 6124 6125 6126 6127 6128
            }
          } else {
            pSingleColFilter->fp = filterArray[upper];
          }
        }
        assert(pSingleColFilter->fp != NULL);
        pSingleColFilter->bytes = bytes;
      }

      j++;
    }
  }

  return TSDB_CODE_SUCCESS;
}

6129
static void doUpdateExprColumnIndex(SQuery *pQuery) {
6130
  assert(pQuery->pSelectExpr != NULL && pQuery != NULL);
6131

6132
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
6133
    SSqlFuncMsg *pSqlExprMsg = &pQuery->pSelectExpr[k].base;
6134
    if (pSqlExprMsg->functionId == TSDB_FUNC_ARITHM) {
6135 6136
      continue;
    }
6137

6138
    // todo opt performance
H
Haojun Liao 已提交
6139
    SColIndex *pColIndex = &pSqlExprMsg->colInfo;
H
Haojun Liao 已提交
6140
    if (TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
6141 6142
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfCols; ++f) {
H
Haojun Liao 已提交
6143 6144
        if (pColIndex->colId == pQuery->colList[f].colId) {
          pColIndex->colIndex = f;
6145 6146 6147
          break;
        }
      }
H
Haojun Liao 已提交
6148 6149

      assert(f < pQuery->numOfCols);
6150 6151
    } else if (pColIndex->colId <= TSDB_UD_COLUMN_INDEX) {
      // do nothing for user-defined constant value result columns
6152
    } else {
6153 6154
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfTags; ++f) {
H
Haojun Liao 已提交
6155 6156
        if (pColIndex->colId == pQuery->tagColList[f].colId) {
          pColIndex->colIndex = f;
6157 6158
          break;
        }
6159
      }
6160

6161
      assert(f < pQuery->numOfTags || pColIndex->colId == TSDB_TBNAME_COLUMN_INDEX);
6162 6163 6164 6165
    }
  }
}

dengyihao's avatar
dengyihao 已提交
6166 6167
static void freeQInfo(SQInfo *pQInfo);

H
Haojun Liao 已提交
6168 6169 6170
static void calResultBufSize(SQuery* pQuery) {
  const int32_t RESULT_MSG_MIN_SIZE  = 1024 * (1024 + 512);  // bytes
  const int32_t RESULT_MSG_MIN_ROWS  = 8192;
S
TD-1057  
Shengliang Guan 已提交
6171
  const float RESULT_THRESHOLD_RATIO = 0.85f;
H
Haojun Liao 已提交
6172

6173 6174 6175 6176 6177
  if (isProjQuery(pQuery)) {
    int32_t numOfRes = RESULT_MSG_MIN_SIZE / pQuery->rowSize;
    if (numOfRes < RESULT_MSG_MIN_ROWS) {
      numOfRes = RESULT_MSG_MIN_ROWS;
    }
H
Haojun Liao 已提交
6178

6179
    pQuery->rec.capacity  = numOfRes;
S
TD-1057  
Shengliang Guan 已提交
6180
    pQuery->rec.threshold = (int32_t)(numOfRes * RESULT_THRESHOLD_RATIO);
6181 6182
  } else {  // in case of non-prj query, a smaller output buffer will be used.
    pQuery->rec.capacity = 4096;
S
TD-1057  
Shengliang Guan 已提交
6183
    pQuery->rec.threshold = (int32_t)(pQuery->rec.capacity * RESULT_THRESHOLD_RATIO);
6184
  }
H
Haojun Liao 已提交
6185 6186
}

6187 6188
static SQInfo *createQInfoImpl(SQueryTableMsg *pQueryMsg, SSqlGroupbyExpr *pGroupbyExpr, SExprInfo *pExprs,
                               STableGroupInfo *pTableGroupInfo, SColumnInfo* pTagCols, bool stableQuery) {
B
Bomin Zhang 已提交
6189 6190 6191
  int16_t numOfCols = pQueryMsg->numOfCols;
  int16_t numOfOutput = pQueryMsg->numOfOutput;

6192 6193
  SQInfo *pQInfo = (SQInfo *)calloc(1, sizeof(SQInfo));
  if (pQInfo == NULL) {
B
Bomin Zhang 已提交
6194
    goto _cleanup_qinfo;
6195
  }
6196

B
Bomin Zhang 已提交
6197 6198 6199
  // to make sure third party won't overwrite this structure
  pQInfo->signature = pQInfo;
  pQInfo->tableGroupInfo = *pTableGroupInfo;
6200 6201

  SQuery *pQuery = calloc(1, sizeof(SQuery));
B
Bomin Zhang 已提交
6202 6203 6204
  if (pQuery == NULL) {
    goto _cleanup_query;
  }
H
Haojun Liao 已提交
6205

6206 6207
  pQInfo->runtimeEnv.pQuery = pQuery;

6208
  pQuery->numOfCols       = numOfCols;
H
hjxilinx 已提交
6209
  pQuery->numOfOutput     = numOfOutput;
6210 6211 6212
  pQuery->limit.limit     = pQueryMsg->limit;
  pQuery->limit.offset    = pQueryMsg->offset;
  pQuery->order.order     = pQueryMsg->order;
6213
  pQuery->order.orderColId = pQueryMsg->orderColId;
6214 6215
  pQuery->pSelectExpr     = pExprs;
  pQuery->pGroupbyExpr    = pGroupbyExpr;
6216
  memcpy(&pQuery->interval, &pQueryMsg->interval, sizeof(pQuery->interval));
6217
  pQuery->fillType        = pQueryMsg->fillType;
6218
  pQuery->numOfTags       = pQueryMsg->numOfTags;
B
Bomin Zhang 已提交
6219
  pQuery->tagColList      = pTagCols;
H
Haojun Liao 已提交
6220

6221
  pQuery->colList = calloc(numOfCols, sizeof(SSingleColumnFilterInfo));
6222
  if (pQuery->colList == NULL) {
6223
    goto _cleanup;
6224
  }
6225

H
hjxilinx 已提交
6226
  for (int16_t i = 0; i < numOfCols; ++i) {
6227
    pQuery->colList[i] = pQueryMsg->colList[i];
6228
    pQuery->colList[i].filters = tscFilterInfoClone(pQueryMsg->colList[i].filters, pQuery->colList[i].numOfFilters);
H
hjxilinx 已提交
6229
  }
6230

6231
  // calculate the result row size
6232 6233 6234
  for (int16_t col = 0; col < numOfOutput; ++col) {
    assert(pExprs[col].bytes > 0);
    pQuery->rowSize += pExprs[col].bytes;
6235
  }
6236

6237
  doUpdateExprColumnIndex(pQuery);
6238

6239
  int32_t ret = createFilterInfo(pQInfo, pQuery);
6240
  if (ret != TSDB_CODE_SUCCESS) {
6241
    goto _cleanup;
6242 6243 6244
  }

  // prepare the result buffer
6245
  pQuery->sdata = (tFilePage **)calloc(pQuery->numOfOutput, POINTER_BYTES);
6246
  if (pQuery->sdata == NULL) {
6247
    goto _cleanup;
6248 6249
  }

H
Haojun Liao 已提交
6250
  calResultBufSize(pQuery);
6251

6252
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
6253
    // allocate additional memory for interResults that are usually larger then final results
6254
    size_t size = (size_t)((pQuery->rec.capacity + 1) * pExprs[col].bytes + pExprs[col].interBytes + sizeof(tFilePage));
6255
    pQuery->sdata[col] = (tFilePage *)calloc(1, size);
6256
    if (pQuery->sdata[col] == NULL) {
6257
      goto _cleanup;
6258 6259 6260
    }
  }

6261
  if (pQuery->fillType != TSDB_FILL_NONE) {
6262 6263
    pQuery->fillVal = malloc(sizeof(int64_t) * pQuery->numOfOutput);
    if (pQuery->fillVal == NULL) {
6264
      goto _cleanup;
6265 6266 6267
    }

    // the first column is the timestamp
6268
    memcpy(pQuery->fillVal, (char *)pQueryMsg->fillVal, pQuery->numOfOutput * sizeof(int64_t));
6269 6270
  }

dengyihao's avatar
dengyihao 已提交
6271 6272 6273 6274 6275 6276
  size_t numOfGroups = 0;
  if (pTableGroupInfo->pGroupList != NULL) {
    numOfGroups = taosArrayGetSize(pTableGroupInfo->pGroupList);

    pQInfo->tableqinfoGroupInfo.pGroupList = taosArrayInit(numOfGroups, POINTER_BYTES);
    pQInfo->tableqinfoGroupInfo.numOfTables = pTableGroupInfo->numOfTables;
H
Haojun Liao 已提交
6277
    pQInfo->tableqinfoGroupInfo.map = taosHashInit(pTableGroupInfo->numOfTables,
H
Haojun Liao 已提交
6278
                                                   taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_NO_LOCK);
H
Haojun Liao 已提交
6279
  }
6280

weixin_48148422's avatar
weixin_48148422 已提交
6281
  int tableIndex = 0;
6282

H
Haojun Liao 已提交
6283
  pQInfo->runtimeEnv.interBufSize = getOutputInterResultBufSize(pQuery);
H
Haojun Liao 已提交
6284 6285
  pQInfo->runtimeEnv.summary.tableInfoSize += (pTableGroupInfo->numOfTables * sizeof(STableQueryInfo));

H
Haojun Liao 已提交
6286
  pQInfo->runtimeEnv.pResultRowHashTable = taosHashInit(pTableGroupInfo->numOfTables, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_NO_LOCK);
H
Haojun Liao 已提交
6287
  pQInfo->runtimeEnv.keyBuf = malloc(TSDB_MAX_BYTES_PER_ROW);
H
Haojun Liao 已提交
6288
  pQInfo->runtimeEnv.pool = initResultRowPool(getWindowResultSize(&pQInfo->runtimeEnv));
H
Haojun Liao 已提交
6289

H
Haojun Liao 已提交
6290
  pQInfo->pBuf = calloc(pTableGroupInfo->numOfTables, sizeof(STableQueryInfo));
H
Haojun Liao 已提交
6291 6292 6293 6294
  if (pQInfo->pBuf == NULL) {
    goto _cleanup;
  }

H
Haojun Liao 已提交
6295
  // NOTE: pTableCheckInfo need to update the query time range and the lastKey info
6296 6297
  pQInfo->arrTableIdInfo = taosArrayInit(tableIndex, sizeof(STableIdInfo));
  pQInfo->dataReady = QUERY_RESULT_NOT_READY;
H
Haojun Liao 已提交
6298
  pQInfo->rspContext = NULL;
6299
  pthread_mutex_init(&pQInfo->lock, NULL);
H
Haojun Liao 已提交
6300
  tsem_init(&pQInfo->ready, 0, 0);
6301 6302 6303 6304 6305 6306

  pQuery->pos = -1;
  pQuery->window = pQueryMsg->window;
  changeExecuteScanOrder(pQInfo, pQueryMsg, stableQuery);

  STimeWindow window = pQuery->window;
H
Haojun Liao 已提交
6307

H
Haojun Liao 已提交
6308 6309
  int32_t index = 0;

H
hjxilinx 已提交
6310
  for(int32_t i = 0; i < numOfGroups; ++i) {
6311
    SArray* pa = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, i);
6312

H
Haojun Liao 已提交
6313
    size_t s = taosArrayGetSize(pa);
6314
    SArray* p1 = taosArrayInit(s, POINTER_BYTES);
B
Bomin Zhang 已提交
6315 6316 6317
    if (p1 == NULL) {
      goto _cleanup;
    }
H
Haojun Liao 已提交
6318

Y
yihaoDeng 已提交
6319
    taosArrayPush(pQInfo->tableqinfoGroupInfo.pGroupList, &p1);
6320

H
hjxilinx 已提交
6321
    for(int32_t j = 0; j < s; ++j) {
H
Haojun Liao 已提交
6322
      STableKeyInfo* info = taosArrayGet(pa, j);
6323

H
Haojun Liao 已提交
6324
      void* buf = (char*) pQInfo->pBuf + index * sizeof(STableQueryInfo);
H
Haojun Liao 已提交
6325

6326
      window.skey = info->lastKey;
H
Haojun Liao 已提交
6327
      STableQueryInfo* item = createTableQueryInfo(&pQInfo->runtimeEnv, info->pTable, window, buf);
B
Bomin Zhang 已提交
6328 6329 6330
      if (item == NULL) {
        goto _cleanup;
      }
H
Haojun Liao 已提交
6331

6332
      item->groupIndex = i;
H
hjxilinx 已提交
6333
      taosArrayPush(p1, &item);
6334 6335

      STableId* id = TSDB_TABLEID(info->pTable);
H
Haojun Liao 已提交
6336 6337
      taosHashPut(pQInfo->tableqinfoGroupInfo.map, &id->tid, sizeof(id->tid), &item, POINTER_BYTES);
      index += 1;
H
hjxilinx 已提交
6338 6339
    }
  }
6340

6341
  colIdCheck(pQuery);
6342

6343
  qDebug("qmsg:%p QInfo:%p created", pQueryMsg, pQInfo);
6344 6345
  return pQInfo;

B
Bomin Zhang 已提交
6346
_cleanup_qinfo:
H
Haojun Liao 已提交
6347
  tsdbDestroyTableGroup(pTableGroupInfo);
B
Bomin Zhang 已提交
6348 6349

_cleanup_query:
6350 6351 6352 6353
  if (pGroupbyExpr != NULL) {
    taosArrayDestroy(pGroupbyExpr->columnInfo);
    free(pGroupbyExpr);
  }
H
Haojun Liao 已提交
6354

S
TD-1848  
Shengliang Guan 已提交
6355
  tfree(pTagCols);
B
Bomin Zhang 已提交
6356 6357 6358 6359 6360 6361
  for (int32_t i = 0; i < numOfOutput; ++i) {
    SExprInfo* pExprInfo = &pExprs[i];
    if (pExprInfo->pExpr != NULL) {
      tExprTreeDestroy(&pExprInfo->pExpr, NULL);
    }
  }
H
Haojun Liao 已提交
6362

S
TD-1848  
Shengliang Guan 已提交
6363
  tfree(pExprs);
B
Bomin Zhang 已提交
6364

6365
_cleanup:
dengyihao's avatar
dengyihao 已提交
6366
  freeQInfo(pQInfo);
6367 6368 6369
  return NULL;
}

H
hjxilinx 已提交
6370
static bool isValidQInfo(void *param) {
H
hjxilinx 已提交
6371 6372 6373 6374
  SQInfo *pQInfo = (SQInfo *)param;
  if (pQInfo == NULL) {
    return false;
  }
6375

H
hjxilinx 已提交
6376 6377 6378 6379
  /*
   * pQInfo->signature may be changed by another thread, so we assign value of signature
   * into local variable, then compare by using local variable
   */
6380
  uint64_t sig = (uint64_t)pQInfo->signature;
H
hjxilinx 已提交
6381 6382 6383
  return (sig == (uint64_t)pQInfo);
}

6384
static int32_t initQInfo(SQueryTableMsg *pQueryMsg, void *tsdb, int32_t vgId, SQInfo *pQInfo, bool isSTable) {
H
hjxilinx 已提交
6385
  int32_t code = TSDB_CODE_SUCCESS;
6386
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
6387

H
hjxilinx 已提交
6388
  STSBuf *pTSBuf = NULL;
6389
  if (pQueryMsg->tsLen > 0) { // open new file to save the result
H
Haojun Liao 已提交
6390
    char *tsBlock = (char *) pQueryMsg + pQueryMsg->tsOffset;
H
Haojun Liao 已提交
6391
    pTSBuf = tsBufCreateFromCompBlocks(tsBlock, pQueryMsg->tsNumOfBlocks, pQueryMsg->tsLen, pQueryMsg->tsOrder, vgId);
6392

H
hjxilinx 已提交
6393
    tsBufResetPos(pTSBuf);
dengyihao's avatar
dengyihao 已提交
6394
    bool ret = tsBufNextPos(pTSBuf);
6395

dengyihao's avatar
dengyihao 已提交
6396
    UNUSED(ret);
H
hjxilinx 已提交
6397
  }
Y
TD-1665  
yihaoDeng 已提交
6398 6399
  
  pQuery->precision = tsdbGetCfg(tsdb)->precision;
6400

6401 6402
  if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey > pQuery->window.ekey)) ||
      (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey > pQuery->window.skey))) {
6403
    qDebug("QInfo:%p no result in time range %" PRId64 "-%" PRId64 ", order %d", pQInfo, pQuery->window.skey,
6404
           pQuery->window.ekey, pQuery->order.order);
6405
    setQueryStatus(pQuery, QUERY_COMPLETED);
B
Bomin Zhang 已提交
6406
    pQInfo->tableqinfoGroupInfo.numOfTables = 0;
6407 6408
    return TSDB_CODE_SUCCESS;
  }
6409

6410
  if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
6411
    qDebug("QInfo:%p no table qualified for tag filter, abort query", pQInfo);
6412 6413 6414
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return TSDB_CODE_SUCCESS;
  }
H
hjxilinx 已提交
6415 6416

  // filter the qualified
6417
  if ((code = doInitQInfo(pQInfo, pTSBuf, tsdb, vgId, isSTable)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6418 6419
    goto _error;
  }
6420

H
hjxilinx 已提交
6421 6422 6423 6424
  return code;

_error:
  // table query ref will be decrease during error handling
6425
  freeQInfo(pQInfo);
H
hjxilinx 已提交
6426 6427 6428
  return code;
}

B
Bomin Zhang 已提交
6429
static void freeColumnFilterInfo(SColumnFilterInfo* pFilter, int32_t numOfFilters) {
H
Haojun Liao 已提交
6430
    if (pFilter == NULL || numOfFilters == 0) {
B
Bomin Zhang 已提交
6431 6432
      return;
    }
H
Haojun Liao 已提交
6433

B
Bomin Zhang 已提交
6434 6435 6436 6437 6438
    for (int32_t i = 0; i < numOfFilters; i++) {
      if (pFilter[i].filterstr) {
        free((void*)(pFilter[i].pz));
      }
    }
H
Haojun Liao 已提交
6439

B
Bomin Zhang 已提交
6440 6441 6442
    free(pFilter);
}

H
Haojun Liao 已提交
6443 6444
static void doDestroyTableQueryInfo(STableGroupInfo* pTableqinfoGroupInfo) {
  if (pTableqinfoGroupInfo->pGroupList != NULL) {
H
Haojun Liao 已提交
6445
    int32_t numOfGroups = (int32_t) taosArrayGetSize(pTableqinfoGroupInfo->pGroupList);
H
Haojun Liao 已提交
6446 6447 6448 6449 6450 6451 6452 6453 6454 6455 6456 6457 6458 6459 6460 6461 6462 6463 6464 6465 6466
    for (int32_t i = 0; i < numOfGroups; ++i) {
      SArray *p = taosArrayGetP(pTableqinfoGroupInfo->pGroupList, i);

      size_t num = taosArrayGetSize(p);
      for(int32_t j = 0; j < num; ++j) {
        STableQueryInfo* item = taosArrayGetP(p, j);
        destroyTableQueryInfoImpl(item);
      }

      taosArrayDestroy(p);
    }
  }

  taosArrayDestroy(pTableqinfoGroupInfo->pGroupList);
  taosHashCleanup(pTableqinfoGroupInfo->map);

  pTableqinfoGroupInfo->pGroupList = NULL;
  pTableqinfoGroupInfo->map = NULL;
  pTableqinfoGroupInfo->numOfTables = 0;
}

H
hjxilinx 已提交
6467 6468 6469 6470
static void freeQInfo(SQInfo *pQInfo) {
  if (!isValidQInfo(pQInfo)) {
    return;
  }
6471

6472
  qDebug("QInfo:%p start to free QInfo", pQInfo);
6473

H
Haojun Liao 已提交
6474
  releaseQueryBuf(pQInfo->tableqinfoGroupInfo.numOfTables);
H
Haojun Liao 已提交
6475

6476
  teardownQueryRuntimeEnv(&pQInfo->runtimeEnv);
6477

H
Haojun Liao 已提交
6478 6479 6480 6481
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  if (pQuery != NULL) {
    if (pQuery->sdata != NULL) {
      for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
S
TD-1848  
Shengliang Guan 已提交
6482
        tfree(pQuery->sdata[col]);
H
Haojun Liao 已提交
6483
      }
S
TD-1848  
Shengliang Guan 已提交
6484
      tfree(pQuery->sdata);
H
hjxilinx 已提交
6485
    }
6486

H
Haojun Liao 已提交
6487
    if (pQuery->fillVal != NULL) {
S
TD-1848  
Shengliang Guan 已提交
6488
      tfree(pQuery->fillVal);
H
Haojun Liao 已提交
6489
    }
6490

H
Haojun Liao 已提交
6491 6492 6493
    for (int32_t i = 0; i < pQuery->numOfFilterCols; ++i) {
      SSingleColumnFilterInfo *pColFilter = &pQuery->pFilterInfo[i];
      if (pColFilter->numOfFilters > 0) {
S
TD-1848  
Shengliang Guan 已提交
6494
        tfree(pColFilter->pFilters);
H
hjxilinx 已提交
6495
      }
H
hjxilinx 已提交
6496
    }
6497

H
Haojun Liao 已提交
6498 6499 6500 6501 6502 6503 6504 6505 6506
    if (pQuery->pSelectExpr != NULL) {
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        SExprInfo *pExprInfo = &pQuery->pSelectExpr[i];

        if (pExprInfo->pExpr != NULL) {
          tExprTreeDestroy(&pExprInfo->pExpr, NULL);
        }
      }

S
TD-1848  
Shengliang Guan 已提交
6507
      tfree(pQuery->pSelectExpr);
H
Haojun Liao 已提交
6508 6509 6510 6511
    }

    if (pQuery->pGroupbyExpr != NULL) {
      taosArrayDestroy(pQuery->pGroupbyExpr->columnInfo);
S
TD-1848  
Shengliang Guan 已提交
6512
      tfree(pQuery->pGroupbyExpr);
H
Haojun Liao 已提交
6513
    }
6514

S
TD-1848  
Shengliang Guan 已提交
6515 6516
    tfree(pQuery->tagColList);
    tfree(pQuery->pFilterInfo);
H
Haojun Liao 已提交
6517 6518 6519 6520 6521 6522

    if (pQuery->colList != NULL) {
      for (int32_t i = 0; i < pQuery->numOfCols; i++) {
        SColumnInfo *column = pQuery->colList + i;
        freeColumnFilterInfo(column->filters, column->numOfFilters);
      }
S
TD-1848  
Shengliang Guan 已提交
6523
      tfree(pQuery->colList);
H
Haojun Liao 已提交
6524 6525
    }

S
TD-1848  
Shengliang Guan 已提交
6526
    tfree(pQuery);
H
hjxilinx 已提交
6527
  }
6528

H
Haojun Liao 已提交
6529
  doDestroyTableQueryInfo(&pQInfo->tableqinfoGroupInfo);
H
Haojun Liao 已提交
6530

S
TD-1848  
Shengliang Guan 已提交
6531
  tfree(pQInfo->pBuf);
H
Haojun Liao 已提交
6532
  tsdbDestroyTableGroup(&pQInfo->tableGroupInfo);
weixin_48148422's avatar
weixin_48148422 已提交
6533
  taosArrayDestroy(pQInfo->arrTableIdInfo);
6534

6535
  pQInfo->signature = 0;
6536

6537
  qDebug("QInfo:%p QInfo is freed", pQInfo);
6538

S
TD-1848  
Shengliang Guan 已提交
6539
  tfree(pQInfo);
H
hjxilinx 已提交
6540 6541
}

H
hjxilinx 已提交
6542
static size_t getResultSize(SQInfo *pQInfo, int64_t *numOfRows) {
6543 6544
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
hjxilinx 已提交
6545 6546 6547 6548 6549 6550 6551 6552 6553 6554 6555
  /*
   * get the file size and set the numOfRows to be the file size, since for tsComp query,
   * the returned row size is equalled to 1
   * TODO handle the case that the file is too large to send back one time
   */
  if (isTSCompQuery(pQuery) && (*numOfRows) > 0) {
    struct stat fstat;
    if (stat(pQuery->sdata[0]->data, &fstat) == 0) {
      *numOfRows = fstat.st_size;
      return fstat.st_size;
    } else {
S
slguan 已提交
6556
      qError("QInfo:%p failed to get file info, path:%s, reason:%s", pQInfo, pQuery->sdata[0]->data, strerror(errno));
H
hjxilinx 已提交
6557 6558 6559
      return 0;
    }
  } else {
6560
    return (size_t)(pQuery->rowSize * (*numOfRows));
6561
  }
H
hjxilinx 已提交
6562
}
6563

H
hjxilinx 已提交
6564 6565 6566
static int32_t doDumpQueryResult(SQInfo *pQInfo, char *data) {
  // the remained number of retrieved rows, not the interpolated result
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
6567

H
hjxilinx 已提交
6568 6569 6570
  // load data from file to msg buffer
  if (isTSCompQuery(pQuery)) {
    int32_t fd = open(pQuery->sdata[0]->data, O_RDONLY, 0666);
6571

H
hjxilinx 已提交
6572 6573
    // make sure file exist
    if (FD_VALID(fd)) {
6574 6575 6576
      uint64_t s = lseek(fd, 0, SEEK_END);

      qDebug("QInfo:%p ts comp data return, file:%s, size:%"PRId64, pQInfo, pQuery->sdata[0]->data, s);
H
Haojun Liao 已提交
6577
      if (lseek(fd, 0, SEEK_SET) >= 0) {
6578
        size_t sz = read(fd, data, (uint32_t)s);
6579 6580 6581
        if(sz < s) {  // todo handle error
          assert(0);
        }
H
Haojun Liao 已提交
6582
      } else {
6583
        UNUSED(s);
dengyihao's avatar
dengyihao 已提交
6584
      }
H
Haojun Liao 已提交
6585

H
hjxilinx 已提交
6586 6587 6588
      close(fd);
      unlink(pQuery->sdata[0]->data);
    } else {
dengyihao's avatar
dengyihao 已提交
6589
      // todo return the error code to client and handle invalid fd
S
slguan 已提交
6590
      qError("QInfo:%p failed to open tmp file to send ts-comp data to client, path:%s, reason:%s", pQInfo,
H
hjxilinx 已提交
6591
             pQuery->sdata[0]->data, strerror(errno));
dengyihao's avatar
dengyihao 已提交
6592
      if (fd != -1) {
6593
        close(fd);
dengyihao's avatar
dengyihao 已提交
6594
      }
H
hjxilinx 已提交
6595
    }
6596

H
hjxilinx 已提交
6597 6598 6599 6600
    // all data returned, set query over
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
      setQueryStatus(pQuery, QUERY_OVER);
    }
H
hjxilinx 已提交
6601
  } else {
S
TD-1057  
Shengliang Guan 已提交
6602
    doCopyQueryResultToMsg(pQInfo, (int32_t)pQuery->rec.rows, data);
6603
  }
6604

6605
  pQuery->rec.total += pQuery->rec.rows;
6606
  qDebug("QInfo:%p current numOfRes rows:%" PRId64 ", total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total);
6607

6608
  if (pQuery->limit.limit > 0 && pQuery->limit.limit == pQuery->rec.total) {
6609
    qDebug("QInfo:%p results limitation reached, limitation:%"PRId64, pQInfo, pQuery->limit.limit);
6610 6611
    setQueryStatus(pQuery, QUERY_OVER);
  }
6612

H
hjxilinx 已提交
6613
  return TSDB_CODE_SUCCESS;
6614 6615
}

6616 6617 6618 6619 6620 6621 6622
typedef struct SQueryMgmt {
  SCacheObj      *qinfoPool;      // query handle pool
  int32_t         vgId;
  bool            closed;
  pthread_mutex_t lock;
} SQueryMgmt;

6623
int32_t qCreateQueryInfo(void* tsdb, int32_t vgId, SQueryTableMsg* pQueryMsg, qinfo_t* pQInfo) {
6624
  assert(pQueryMsg != NULL && tsdb != NULL);
6625 6626

  int32_t code = TSDB_CODE_SUCCESS;
6627

6628 6629 6630 6631 6632 6633 6634 6635
  char            *tagCond  = NULL;
  char            *tbnameCond = NULL;
  SArray          *pTableIdList = NULL;
  SSqlFuncMsg    **pExprMsg = NULL;
  SExprInfo       *pExprs   = NULL;
  SColIndex       *pGroupColIndex = NULL;
  SColumnInfo     *pTagColumnInfo = NULL;
  SSqlGroupbyExpr *pGroupbyExpr   = NULL;
6636

6637 6638
  code = convertQueryMsg(pQueryMsg, &pTableIdList, &pExprMsg, &tagCond, &tbnameCond, &pGroupColIndex, &pTagColumnInfo);
  if (code != TSDB_CODE_SUCCESS) {
B
Bomin Zhang 已提交
6639
    goto _over;
6640 6641
  }

H
hjxilinx 已提交
6642
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
6643
    qError("Invalid number of tables to query, numOfTables:%d", pQueryMsg->numOfTables);
6644
    code = TSDB_CODE_QRY_INVALID_MSG;
H
hjxilinx 已提交
6645
    goto _over;
6646 6647
  }

H
hjxilinx 已提交
6648
  if (pTableIdList == NULL || taosArrayGetSize(pTableIdList) == 0) {
S
slguan 已提交
6649
    qError("qmsg:%p, SQueryTableMsg wrong format", pQueryMsg);
6650
    code = TSDB_CODE_QRY_INVALID_MSG;
H
hjxilinx 已提交
6651
    goto _over;
6652 6653
  }

H
Haojun Liao 已提交
6654
  if ((code = createQFunctionExprFromMsg(pQueryMsg, &pExprs, pExprMsg, pTagColumnInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6655
    goto _over;
6656 6657
  }

dengyihao's avatar
dengyihao 已提交
6658
  pGroupbyExpr = createGroupbyExprFromMsg(pQueryMsg, pGroupColIndex, &code);
H
hjxilinx 已提交
6659
  if ((pGroupbyExpr == NULL && pQueryMsg->numOfGroupCols != 0) || code != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6660
    goto _over;
6661
  }
6662

H
hjxilinx 已提交
6663
  bool isSTableQuery = false;
6664
  STableGroupInfo tableGroupInfo = {0};
6665 6666
  int64_t st = taosGetTimestampUs();

H
Haojun Liao 已提交
6667
  if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_TABLE_QUERY)) {
weixin_48148422's avatar
weixin_48148422 已提交
6668
    STableIdInfo *id = taosArrayGet(pTableIdList, 0);
H
Haojun Liao 已提交
6669

6670
    qDebug("qmsg:%p query normal table, uid:%"PRId64", tid:%d", pQueryMsg, id->uid, id->tid);
H
Haojun Liao 已提交
6671
    if ((code = tsdbGetOneTableGroup(tsdb, id->uid, pQueryMsg->window.skey, &tableGroupInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6672
      goto _over;
6673
    }
H
Haojun Liao 已提交
6674
  } else if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY|TSDB_QUERY_TYPE_STABLE_QUERY)) {
6675
    isSTableQuery = true;
H
Haojun Liao 已提交
6676 6677 6678

    // also note there's possibility that only one table in the super table
    if (!TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY)) {
weixin_48148422's avatar
weixin_48148422 已提交
6679 6680 6681 6682 6683 6684 6685
      STableIdInfo *id = taosArrayGet(pTableIdList, 0);

      // group by normal column, do not pass the group by condition to tsdb to group table into different group
      int32_t numOfGroupByCols = pQueryMsg->numOfGroupCols;
      if (pQueryMsg->numOfGroupCols == 1 && !TSDB_COL_IS_TAG(pGroupColIndex->flag)) {
        numOfGroupByCols = 0;
      }
6686 6687

      qDebug("qmsg:%p query stable, uid:%"PRId64", tid:%d", pQueryMsg, id->uid, id->tid);
H
Haojun Liao 已提交
6688 6689 6690
      code = tsdbQuerySTableByTagCond(tsdb, id->uid, pQueryMsg->window.skey, tagCond, pQueryMsg->tagCondLen,
          pQueryMsg->tagNameRelType, tbnameCond, &tableGroupInfo, pGroupColIndex, numOfGroupByCols);

6691
      if (code != TSDB_CODE_SUCCESS) {
6692
        qError("qmsg:%p failed to query stable, reason: %s", pQueryMsg, tstrerror(code));
6693 6694
        goto _over;
      }
weixin_48148422's avatar
weixin_48148422 已提交
6695
    } else {
6696 6697 6698 6699
      code = tsdbGetTableGroupFromIdList(tsdb, pTableIdList, &tableGroupInfo);
      if (code != TSDB_CODE_SUCCESS) {
        goto _over;
      }
H
Haojun Liao 已提交
6700

S
TD-1057  
Shengliang Guan 已提交
6701
      qDebug("qmsg:%p query on %" PRIzu " tables in one group from client", pQueryMsg, tableGroupInfo.numOfTables);
6702
    }
6703 6704

    int64_t el = taosGetTimestampUs() - st;
S
TD-1057  
Shengliang Guan 已提交
6705
    qDebug("qmsg:%p tag filter completed, numOfTables:%" PRIzu ", elapsed time:%"PRId64"us", pQueryMsg, tableGroupInfo.numOfTables, el);
H
hjxilinx 已提交
6706
  } else {
6707
    assert(0);
6708
  }
6709

H
Haojun Liao 已提交
6710 6711 6712 6713 6714
  code = checkForQueryBuf(tableGroupInfo.numOfTables);
  if (code != TSDB_CODE_SUCCESS) {  // not enough query buffer, abort
    goto _over;
  }

6715
  (*pQInfo) = createQInfoImpl(pQueryMsg, pGroupbyExpr, pExprs, &tableGroupInfo, pTagColumnInfo, isSTableQuery);
dengyihao's avatar
dengyihao 已提交
6716 6717 6718
  pExprs = NULL;
  pGroupbyExpr = NULL;
  pTagColumnInfo = NULL;
6719

6720
  if ((*pQInfo) == NULL) {
6721
    code = TSDB_CODE_QRY_OUT_OF_MEMORY;
H
hjxilinx 已提交
6722
    goto _over;
6723
  }
6724

6725
  code = initQInfo(pQueryMsg, tsdb, vgId, *pQInfo, isSTableQuery);
6726

H
hjxilinx 已提交
6727
_over:
dengyihao's avatar
dengyihao 已提交
6728 6729 6730
  free(tagCond);
  free(tbnameCond);
  free(pGroupColIndex);
dengyihao's avatar
dengyihao 已提交
6731 6732
  if (pGroupbyExpr != NULL) {
    taosArrayDestroy(pGroupbyExpr->columnInfo);
dengyihao's avatar
dengyihao 已提交
6733
    free(pGroupbyExpr);
6734
  }
dengyihao's avatar
dengyihao 已提交
6735 6736
  free(pTagColumnInfo);
  free(pExprs);
dengyihao's avatar
dengyihao 已提交
6737
  free(pExprMsg);
H
hjxilinx 已提交
6738
  taosArrayDestroy(pTableIdList);
6739

B
Bomin Zhang 已提交
6740 6741 6742 6743 6744
  for (int32_t i = 0; i < pQueryMsg->numOfCols; i++) {
    SColumnInfo* column = pQueryMsg->colList + i;
    freeColumnFilterInfo(column->filters, column->numOfFilters);
  }

H
Haojun Liao 已提交
6745
  //pQInfo already freed in initQInfo, but *pQInfo may not pointer to null;
6746 6747 6748 6749
  if (code != TSDB_CODE_SUCCESS) {
    *pQInfo = NULL;
  }

6750
  // if failed to add ref for all tables in this query, abort current query
6751
  return code;
H
hjxilinx 已提交
6752 6753
}

H
Haojun Liao 已提交
6754
void qDestroyQueryInfo(qinfo_t qHandle) {
H
Haojun Liao 已提交
6755 6756 6757 6758 6759
  SQInfo* pQInfo = (SQInfo*) qHandle;
  if (!isValidQInfo(pQInfo)) {
    return;
  }

H
Haojun Liao 已提交
6760 6761 6762
  qDebug("QInfo:%p query completed", pQInfo);
  queryCostStatis(pQInfo);   // print the query cost summary
  freeQInfo(pQInfo);
H
Haojun Liao 已提交
6763 6764
}

6765 6766 6767 6768 6769 6770 6771 6772
static bool doBuildResCheck(SQInfo* pQInfo) {
  bool buildRes = false;

  pthread_mutex_lock(&pQInfo->lock);

  pQInfo->dataReady = QUERY_RESULT_READY;
  buildRes = (pQInfo->rspContext != NULL);

H
Haojun Liao 已提交
6773 6774
  // clear qhandle owner, it must be in the secure area. other thread may run ahead before current, after it is
  // put into task to be executed.
6775 6776
  assert(pQInfo->owner == taosGetPthreadId());
  pQInfo->owner = 0;
H
Haojun Liao 已提交
6777

H
Haojun Liao 已提交
6778
  pthread_mutex_unlock(&pQInfo->lock);
H
Haojun Liao 已提交
6779 6780

  tsem_post(&pQInfo->ready);
6781 6782 6783
  return buildRes;
}

6784
bool qTableQuery(qinfo_t qinfo) {
6785
  SQInfo *pQInfo = (SQInfo *)qinfo;
H
Haojun Liao 已提交
6786
  assert(pQInfo && pQInfo->signature == pQInfo);
6787
  int64_t threadId = taosGetPthreadId();
6788

6789 6790 6791 6792
  int64_t curOwner = 0;
  if ((curOwner = atomic_val_compare_exchange_64(&pQInfo->owner, 0, threadId)) != 0) {
    qError("QInfo:%p qhandle is now executed by thread:%p", pQInfo, (void*) curOwner);
    pQInfo->code = TSDB_CODE_QRY_IN_EXEC;
6793
    return false;
H
hjxilinx 已提交
6794
  }
6795

H
Haojun Liao 已提交
6796
  if (IS_QUERY_KILLED(pQInfo)) {
6797
    qDebug("QInfo:%p it is already killed, abort", pQInfo);
6798
    return doBuildResCheck(pQInfo);
H
hjxilinx 已提交
6799
  }
6800

6801 6802
  if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
    qDebug("QInfo:%p no table exists for query, abort", pQInfo);
6803 6804
    setQueryStatus(pQInfo->runtimeEnv.pQuery, QUERY_COMPLETED);
    return doBuildResCheck(pQInfo);
6805 6806 6807
  }

  // error occurs, record the error code and return to client
H
Haojun Liao 已提交
6808
  int32_t ret = setjmp(pQInfo->runtimeEnv.env);
6809 6810
  if (ret != TSDB_CODE_SUCCESS) {
    pQInfo->code = ret;
H
Haojun Liao 已提交
6811
    qDebug("QInfo:%p query abort due to error/cancel occurs, code:%s", pQInfo, tstrerror(pQInfo->code));
6812
    return doBuildResCheck(pQInfo);
6813 6814
  }

6815
  qDebug("QInfo:%p query task is launched", pQInfo);
6816

6817
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
6818
  if (onlyQueryTags(pQInfo->runtimeEnv.pQuery)) {
H
Haojun Liao 已提交
6819
    assert(pQInfo->runtimeEnv.pQueryHandle == NULL);
6820
    buildTagQueryResult(pQInfo);
H
hjxilinx 已提交
6821
  } else if (pQInfo->runtimeEnv.stableQuery) {
6822
    stableQueryImpl(pQInfo);
H
hjxilinx 已提交
6823
  } else {
6824
    tableQueryImpl(pQInfo);
H
hjxilinx 已提交
6825
  }
6826

6827
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
6828
  if (IS_QUERY_KILLED(pQInfo)) {
6829 6830
    qDebug("QInfo:%p query is killed", pQInfo);
  } else if (pQuery->rec.rows == 0) {
S
TD-1057  
Shengliang Guan 已提交
6831
    qDebug("QInfo:%p over, %" PRIzu " tables queried, %"PRId64" rows are returned", pQInfo, pQInfo->tableqinfoGroupInfo.numOfTables, pQuery->rec.total);
6832 6833 6834 6835 6836
  } else {
    qDebug("QInfo:%p query paused, %" PRId64 " rows returned, numOfTotal:%" PRId64 " rows",
           pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
  }

6837
  return doBuildResCheck(pQInfo);
H
hjxilinx 已提交
6838 6839
}

6840
int32_t qRetrieveQueryResultInfo(qinfo_t qinfo, bool* buildRes, void* pRspContext) {
6841 6842
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
6843
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
H
Haojun Liao 已提交
6844
    qError("QInfo:%p invalid qhandle", pQInfo);
6845
    return TSDB_CODE_QRY_INVALID_QHANDLE;
H
hjxilinx 已提交
6846
  }
6847

6848
  *buildRes = false;
H
Haojun Liao 已提交
6849
  if (IS_QUERY_KILLED(pQInfo)) {
B
Bomin Zhang 已提交
6850
    qDebug("QInfo:%p query is killed, code:0x%08x", pQInfo, pQInfo->code);
H
hjxilinx 已提交
6851
    return pQInfo->code;
H
hjxilinx 已提交
6852
  }
6853

6854
  int32_t code = TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
6855

H
Haojun Liao 已提交
6856
#if _NON_BLOCKING_RETRIEVE
H
Haojun Liao 已提交
6857 6858
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

6859
  pthread_mutex_lock(&pQInfo->lock);
H
Haojun Liao 已提交
6860 6861
  assert(pQInfo->rspContext == NULL);

6862 6863 6864 6865 6866
  if (pQInfo->dataReady == QUERY_RESULT_READY) {
    *buildRes = true;
    qDebug("QInfo:%p retrieve result info, rowsize:%d, rows:%"PRId64", code:%d", pQInfo, pQuery->rowSize, pQuery->rec.rows,
           pQInfo->code);
  } else {
H
Haojun Liao 已提交
6867
    *buildRes = false;
6868
    qDebug("QInfo:%p retrieve req set query return result after paused", pQInfo);
6869
    pQInfo->rspContext = pRspContext;
H
Haojun Liao 已提交
6870
    assert(pQInfo->rspContext != NULL);
6871
  }
6872

6873
  code = pQInfo->code;
6874
  pthread_mutex_unlock(&pQInfo->lock);
H
Haojun Liao 已提交
6875 6876 6877 6878 6879 6880
#else
  tsem_wait(&pQInfo->ready);
  *buildRes = true;
  code = pQInfo->code;
#endif

6881
  return code;
H
hjxilinx 已提交
6882
}
6883

6884
int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *contLen, bool* continueExec) {
6885 6886
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
6887
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
6888
    return TSDB_CODE_QRY_INVALID_QHANDLE;
6889
  }
6890

6891
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
6892 6893
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  size_t  size = getResultSize(pQInfo, &pQuery->rec.rows);
6894

weixin_48148422's avatar
weixin_48148422 已提交
6895 6896
  size += sizeof(int32_t);
  size += sizeof(STableIdInfo) * taosArrayGetSize(pQInfo->arrTableIdInfo);
6897

S
TD-1057  
Shengliang Guan 已提交
6898
  *contLen = (int32_t)(size + sizeof(SRetrieveTableRsp));
6899

B
Bomin Zhang 已提交
6900 6901
  // todo proper handle failed to allocate memory,
  // current solution only avoid crash, but cannot return error code to client
6902
  *pRsp = (SRetrieveTableRsp *)rpcMallocCont(*contLen);
B
Bomin Zhang 已提交
6903 6904 6905
  if (*pRsp == NULL) {
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
  }
6906

S
TD-1057  
Shengliang Guan 已提交
6907
  (*pRsp)->numOfRows = htonl((int32_t)pQuery->rec.rows);
6908

H
Haojun Liao 已提交
6909
  if (pQInfo->code == TSDB_CODE_SUCCESS) {
6910
    (*pRsp)->offset   = htobe64(pQuery->limit.offset);
6911
    (*pRsp)->useconds = htobe64(pRuntimeEnv->summary.elapsedTime);
6912
  } else {
6913 6914
    (*pRsp)->offset   = 0;
    (*pRsp)->useconds = htobe64(pRuntimeEnv->summary.elapsedTime);
6915
  }
6916

6917
  (*pRsp)->precision = htons(pQuery->precision);
H
Haojun Liao 已提交
6918 6919
  if (pQuery->rec.rows > 0 && pQInfo->code == TSDB_CODE_SUCCESS) {
    doDumpQueryResult(pQInfo, (*pRsp)->data);
6920
  } else {
H
hjxilinx 已提交
6921
    setQueryStatus(pQuery, QUERY_OVER);
6922
  }
6923

6924
  pQInfo->rspContext = NULL;
6925
  pQInfo->dataReady  = QUERY_RESULT_NOT_READY;
6926

H
Haojun Liao 已提交
6927
  if (IS_QUERY_KILLED(pQInfo) || Q_STATUS_EQUAL(pQuery->status, QUERY_OVER)) {
H
Haojun Liao 已提交
6928 6929
    // here current thread hold the refcount, so it is safe to free tsdbQueryHandle.
    doFreeQueryHandle(pQInfo);
6930
    *continueExec = false;
6931
    (*pRsp)->completed = 1;  // notify no more result to client
6932
  } else {
6933
    *continueExec = true;
6934
    qDebug("QInfo:%p has more results waits for client retrieve", pQInfo);
6935 6936
  }

H
Haojun Liao 已提交
6937
  return pQInfo->code;
6938
}
H
hjxilinx 已提交
6939

6940 6941 6942 6943 6944 6945 6946 6947 6948 6949 6950
int32_t qQueryCompleted(qinfo_t qinfo) {
  SQInfo *pQInfo = (SQInfo *)qinfo;

  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
    return TSDB_CODE_QRY_INVALID_QHANDLE;
  }

  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
  return IS_QUERY_KILLED(pQInfo) || Q_STATUS_EQUAL(pQuery->status, QUERY_OVER);
}

H
Haojun Liao 已提交
6951
int32_t qKillQuery(qinfo_t qinfo) {
H
Haojun Liao 已提交
6952 6953 6954 6955 6956 6957 6958
  SQInfo *pQInfo = (SQInfo *)qinfo;

  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
    return TSDB_CODE_QRY_INVALID_QHANDLE;
  }

  setQueryKilled(pQInfo);
6959 6960 6961

  // Wait for the query executing thread being stopped/
  // Once the query is stopped, the owner of qHandle will be cleared immediately.
6962
  while (pQInfo->owner != 0) {
6963 6964 6965
    taosMsleep(100);
  }

H
Haojun Liao 已提交
6966 6967 6968
  return TSDB_CODE_SUCCESS;
}

6969 6970 6971 6972 6973 6974 6975 6976 6977 6978 6979 6980 6981 6982 6983 6984
static void doSetTagValueToResultBuf(char* output, const char* val, int16_t type, int16_t bytes) {
  if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
    if (val == NULL) {
      setVardataNull(output, type);
    } else {
      memcpy(output, val, varDataTLen(val));
    }
  } else {
    if (val == NULL) {
      setNull(output, type, bytes);
    } else {  // todo here stop will cause client crash
      memcpy(output, val, bytes);
    }
  }
}

H
hjxilinx 已提交
6985 6986 6987
static void buildTagQueryResult(SQInfo* pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
6988

H
Haojun Liao 已提交
6989
  size_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
H
Haojun Liao 已提交
6990
  assert(numOfGroup == 0 || numOfGroup == 1);
6991

H
Haojun Liao 已提交
6992
  if (numOfGroup == 0) {
6993 6994
    return;
  }
6995

H
Haojun Liao 已提交
6996
  SArray* pa = GET_TABLEGROUP(pQInfo, 0);
6997

H
Haojun Liao 已提交
6998
  size_t num = taosArrayGetSize(pa);
6999
  assert(num == pQInfo->tableqinfoGroupInfo.numOfTables);
7000

H
Haojun Liao 已提交
7001
  int32_t count = 0;
7002 7003 7004
  int32_t functionId = pQuery->pSelectExpr[0].base.functionId;
  if (functionId == TSDB_FUNC_TID_TAG) { // return the tags & table Id
    assert(pQuery->numOfOutput == 1);
7005

7006 7007
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[0];
    int32_t rsize = pExprInfo->bytes;
H
Haojun Liao 已提交
7008
    count = 0;
7009

H
Haojun Liao 已提交
7010 7011 7012 7013 7014 7015 7016 7017 7018 7019 7020
    int16_t bytes = pExprInfo->bytes;
    int16_t type = pExprInfo->type;

    for(int32_t i = 0; i < pQuery->numOfTags; ++i) {
      if (pQuery->tagColList[i].colId == pExprInfo->base.colInfo.colId) {
        bytes = pQuery->tagColList[i].bytes;
        type = pQuery->tagColList[i].type;
        break;
      }
    }

H
Haojun Liao 已提交
7021 7022
    while(pQInfo->tableIndex < num && count < pQuery->rec.capacity) {
      int32_t i = pQInfo->tableIndex++;
7023
      STableQueryInfo *item = taosArrayGetP(pa, i);
7024

7025
      char *output = pQuery->sdata[0]->data + count * rsize;
7026
      varDataSetLen(output, rsize - VARSTR_HEADER_SIZE);
7027

7028
      output = varDataVal(output);
H
Haojun Liao 已提交
7029
      STableId* id = TSDB_TABLEID(item->pTable);
7030

7031 7032 7033
      *(int16_t *)output = 0;
      output += sizeof(int16_t);

H
Haojun Liao 已提交
7034 7035
      *(int64_t *)output = id->uid;  // memory align problem, todo serialize
      output += sizeof(id->uid);
7036

H
Haojun Liao 已提交
7037 7038
      *(int32_t *)output = id->tid;
      output += sizeof(id->tid);
7039

7040
      *(int32_t *)output = pQInfo->vgId;
7041
      output += sizeof(pQInfo->vgId);
7042

7043
      if (pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
7044
        char* data = tsdbGetTableName(item->pTable);
7045
        memcpy(output, data, varDataTLen(data));
H
[td-90]  
Haojun Liao 已提交
7046
      } else {
7047 7048
        char* data = tsdbGetTableTagVal(item->pTable, pExprInfo->base.colInfo.colId, type, bytes);
        doSetTagValueToResultBuf(output, data, type, bytes);
H
[td-90]  
Haojun Liao 已提交
7049
      }
7050

H
Haojun Liao 已提交
7051
      count += 1;
7052
    }
7053

7054
    qDebug("QInfo:%p create (tableId, tag) info completed, rows:%d", pQInfo, count);
7055

H
Haojun Liao 已提交
7056 7057 7058 7059
  } else if (functionId == TSDB_FUNC_COUNT) {// handle the "count(tbname)" query
    *(int64_t*) pQuery->sdata[0]->data = num;

    count = 1;
H
Haojun Liao 已提交
7060
    SET_STABLE_QUERY_OVER(pQInfo);
7061
    qDebug("QInfo:%p create count(tbname) query, res:%d rows:1", pQInfo, count);
7062
  } else {  // return only the tags|table name etc.
H
Haojun Liao 已提交
7063
    count = 0;
H
Haojun Liao 已提交
7064
    SSchema tbnameSchema = tGetTableNameColumnSchema();
7065

S
TD-1057  
Shengliang Guan 已提交
7066
    int32_t maxNumOfTables = (int32_t)pQuery->rec.capacity;
7067
    if (pQuery->limit.limit >= 0 && pQuery->limit.limit < pQuery->rec.capacity) {
S
TD-1057  
Shengliang Guan 已提交
7068
      maxNumOfTables = (int32_t)pQuery->limit.limit;
7069 7070
    }

7071
    while(pQInfo->tableIndex < num && count < maxNumOfTables) {
H
Haojun Liao 已提交
7072
      int32_t i = pQInfo->tableIndex++;
7073

7074 7075 7076 7077 7078 7079
      // discard current result due to offset
      if (pQuery->limit.offset > 0) {
        pQuery->limit.offset -= 1;
        continue;
      }

7080
      SExprInfo* pExprInfo = pQuery->pSelectExpr;
7081
      STableQueryInfo* item = taosArrayGetP(pa, i);
7082

7083 7084
      char *data = NULL, *dst = NULL;
      int16_t type = 0, bytes = 0;
7085
      for(int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
7086
        // not assign value in case of user defined constant output column
Y
TD-1230  
yihaoDeng 已提交
7087
        if (TSDB_COL_IS_UD_COL(pExprInfo[j].base.colInfo.flag)) {
H
Haojun Liao 已提交
7088 7089
          continue;
        }
7090

7091
        if (pExprInfo[j].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
7092 7093 7094 7095 7096 7097 7098 7099
          bytes = tbnameSchema.bytes;
          type = tbnameSchema.type;

          data = tsdbGetTableName(item->pTable);
          dst = pQuery->sdata[j]->data + count * tbnameSchema.bytes;
        } else {
          type = pExprInfo[j].type;
          bytes = pExprInfo[j].bytes;
7100

7101 7102
          data = tsdbGetTableTagVal(item->pTable, pExprInfo[j].base.colInfo.colId, type, bytes);
          dst = pQuery->sdata[j]->data + count * pExprInfo[j].bytes;
7103

7104
        }
7105 7106

        doSetTagValueToResultBuf(dst, data, type, bytes);
H
hjxilinx 已提交
7107
      }
H
Haojun Liao 已提交
7108
      count += 1;
H
hjxilinx 已提交
7109
    }
7110

7111
    qDebug("QInfo:%p create tag values results completed, rows:%d", pQInfo, count);
H
hjxilinx 已提交
7112
  }
7113

H
Haojun Liao 已提交
7114
  pQuery->rec.rows = count;
H
hjxilinx 已提交
7115
  setQueryStatus(pQuery, QUERY_COMPLETED);
H
hjxilinx 已提交
7116 7117
}

H
Haojun Liao 已提交
7118
static int64_t getQuerySupportBufSize(size_t numOfTables) {
H
Haojun Liao 已提交
7119 7120 7121 7122
  size_t s1 = sizeof(STableQueryInfo);
  size_t s2 = sizeof(SHashNode);

//  size_t s3 = sizeof(STableCheckInfo);  buffer consumption in tsdb
H
Haojun Liao 已提交
7123
  return (int64_t)((s1 + s2) * 1.5 * numOfTables);
H
Haojun Liao 已提交
7124 7125
}

H
Haojun Liao 已提交
7126
int32_t checkForQueryBuf(size_t numOfTables) {
H
Haojun Liao 已提交
7127 7128
  int64_t t = getQuerySupportBufSize(numOfTables);
  if (tsQueryBufferSize < 0) {
H
Haojun Liao 已提交
7129
    return TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
7130 7131 7132 7133 7134 7135 7136 7137 7138 7139 7140 7141 7142 7143 7144 7145 7146 7147 7148
  } else if (tsQueryBufferSize > 0) {

    while(1) {
      int64_t s = tsQueryBufferSize;
      int64_t remain = s - t;
      if (remain >= 0) {
        if (atomic_val_compare_exchange_64(&tsQueryBufferSize, s, remain) == s) {
          return TSDB_CODE_SUCCESS;
        }
      } else {
        return TSDB_CODE_QRY_NOT_ENOUGH_BUFFER;
      }
    }
  }

  // disable query processing if the value of tsQueryBufferSize is zero.
  return TSDB_CODE_QRY_NOT_ENOUGH_BUFFER;
}

H
Haojun Liao 已提交
7149
void releaseQueryBuf(size_t numOfTables) {
H
Haojun Liao 已提交
7150 7151 7152 7153 7154 7155 7156 7157 7158 7159
  if (tsQueryBufferSize <= 0) {
    return;
  }

  int64_t t = getQuerySupportBufSize(numOfTables);

  // restore value is not enough buffer available
  atomic_add_fetch_64(&tsQueryBufferSize, t);
}

7160 7161 7162 7163 7164 7165 7166
void* qGetResultRetrieveMsg(qinfo_t qinfo) {
  SQInfo* pQInfo = (SQInfo*) qinfo;
  assert(pQInfo != NULL);

  return pQInfo->rspContext;
}

7167 7168 7169 7170 7171 7172 7173
void freeqinfoFn(void *qhandle) {
  void** handle = qhandle;
  if (handle == NULL || *handle == NULL) {
    return;
  }

  qKillQuery(*handle);
H
Haojun Liao 已提交
7174
  qDestroyQueryInfo(*handle);
7175 7176 7177
}

void* qOpenQueryMgmt(int32_t vgId) {
H
Haojun Liao 已提交
7178
  const int32_t REFRESH_HANDLE_INTERVAL = 30; // every 30 seconds, refresh handle pool
7179 7180 7181 7182

  char cacheName[128] = {0};
  sprintf(cacheName, "qhandle_%d", vgId);

7183
  SQueryMgmt* pQueryMgmt = calloc(1, sizeof(SQueryMgmt));
H
Haojun Liao 已提交
7184 7185 7186 7187
  if (pQueryMgmt == NULL) {
    terrno = TSDB_CODE_QRY_OUT_OF_MEMORY;
    return NULL;
  }
7188

S
TD-1530  
Shengliang Guan 已提交
7189
  pQueryMgmt->qinfoPool = taosCacheInit(TSDB_CACHE_PTR_KEY, REFRESH_HANDLE_INTERVAL, true, freeqinfoFn, cacheName);
7190 7191 7192 7193
  pQueryMgmt->closed    = false;
  pQueryMgmt->vgId      = vgId;

  pthread_mutex_init(&pQueryMgmt->lock, NULL);
7194 7195

  qDebug("vgId:%d, open querymgmt success", vgId);
7196
  return pQueryMgmt;
7197 7198
}

H
Haojun Liao 已提交
7199
static void queryMgmtKillQueryFn(void* handle) {
H
Hui Li 已提交
7200 7201
  void** fp = (void**)handle;
  qKillQuery(*fp);
H
Haojun Liao 已提交
7202 7203 7204
}

void qQueryMgmtNotifyClosed(void* pQMgmt) {
7205 7206 7207 7208 7209 7210 7211
  if (pQMgmt == NULL) {
    return;
  }

  SQueryMgmt* pQueryMgmt = pQMgmt;
  qDebug("vgId:%d, set querymgmt closed, wait for all queries cancelled", pQueryMgmt->vgId);

H
Haojun Liao 已提交
7212
//  pthread_mutex_lock(&pQueryMgmt->lock);
7213
  pQueryMgmt->closed = true;
H
Haojun Liao 已提交
7214
//  pthread_mutex_unlock(&pQueryMgmt->lock);
7215

H
Haojun Liao 已提交
7216
  taosCacheRefresh(pQueryMgmt->qinfoPool, queryMgmtKillQueryFn);
7217 7218 7219 7220 7221 7222 7223 7224 7225 7226 7227 7228 7229 7230 7231 7232 7233
}

void qCleanupQueryMgmt(void* pQMgmt) {
  if (pQMgmt == NULL) {
    return;
  }

  SQueryMgmt* pQueryMgmt = pQMgmt;
  int32_t vgId = pQueryMgmt->vgId;

  assert(pQueryMgmt->closed);

  SCacheObj* pqinfoPool = pQueryMgmt->qinfoPool;
  pQueryMgmt->qinfoPool = NULL;

  taosCacheCleanup(pqinfoPool);
  pthread_mutex_destroy(&pQueryMgmt->lock);
S
TD-1848  
Shengliang Guan 已提交
7234
  tfree(pQueryMgmt);
7235

S
Shengliang Guan 已提交
7236
  qDebug("vgId:%d, queryMgmt cleanup completed", vgId);
7237 7238
}

7239
void** qRegisterQInfo(void* pMgmt, uint64_t qInfo) {
7240
  if (pMgmt == NULL) {
7241
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
7242 7243 7244
    return NULL;
  }

7245
  const int32_t DEFAULT_QHANDLE_LIFE_SPAN = tsShellActivityTimer * 2 * 1000;
7246

7247 7248
  SQueryMgmt *pQueryMgmt = pMgmt;
  if (pQueryMgmt->qinfoPool == NULL) {
7249
    qError("QInfo:%p failed to add qhandle into qMgmt, since qMgmt is closed", (void *)qInfo);
7250
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
7251 7252 7253
    return NULL;
  }

H
Haojun Liao 已提交
7254
//  pthread_mutex_lock(&pQueryMgmt->lock);
7255
  if (pQueryMgmt->closed) {
H
Haojun Liao 已提交
7256
//    pthread_mutex_unlock(&pQueryMgmt->lock);
7257
    qError("QInfo:%p failed to add qhandle into cache, since qMgmt is colsing", (void *)qInfo);
7258
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
7259 7260
    return NULL;
  } else {
S
TD-1530  
Shengliang Guan 已提交
7261 7262
    TSDB_CACHE_PTR_TYPE handleVal = (TSDB_CACHE_PTR_TYPE) qInfo;
    void** handle = taosCachePut(pQueryMgmt->qinfoPool, &handleVal, sizeof(TSDB_CACHE_PTR_TYPE), &qInfo, sizeof(TSDB_CACHE_PTR_TYPE), DEFAULT_QHANDLE_LIFE_SPAN);
H
Haojun Liao 已提交
7263
//    pthread_mutex_unlock(&pQueryMgmt->lock);
7264 7265 7266 7267 7268

    return handle;
  }
}

S
TD-1530  
Shengliang Guan 已提交
7269
void** qAcquireQInfo(void* pMgmt, uint64_t _key) {
7270 7271
  SQueryMgmt *pQueryMgmt = pMgmt;

B
Bomin Zhang 已提交
7272 7273 7274 7275 7276 7277 7278
  if (pQueryMgmt->closed) {
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
    return NULL;
  }

  if (pQueryMgmt->qinfoPool == NULL) {
    terrno = TSDB_CODE_QRY_INVALID_QHANDLE;
7279 7280 7281
    return NULL;
  }

S
TD-1530  
Shengliang Guan 已提交
7282 7283
  TSDB_CACHE_PTR_TYPE key = (TSDB_CACHE_PTR_TYPE)_key;
  void** handle = taosCacheAcquireByKey(pQueryMgmt->qinfoPool, &key, sizeof(TSDB_CACHE_PTR_TYPE));
7284
  if (handle == NULL || *handle == NULL) {
B
Bomin Zhang 已提交
7285
    terrno = TSDB_CODE_QRY_INVALID_QHANDLE;
7286 7287 7288 7289 7290 7291
    return NULL;
  } else {
    return handle;
  }
}

H
Haojun Liao 已提交
7292
void** qReleaseQInfo(void* pMgmt, void* pQInfo, bool freeHandle) {
7293 7294 7295 7296 7297
  SQueryMgmt *pQueryMgmt = pMgmt;
  if (pQueryMgmt->qinfoPool == NULL) {
    return NULL;
  }

H
Haojun Liao 已提交
7298
  taosCacheRelease(pQueryMgmt->qinfoPool, pQInfo, freeHandle);
7299 7300 7301
  return 0;
}

7302