qExecutor.c 252.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
#include "os.h"
H
Haojun Liao 已提交
16 17
#include "qFill.h"
#include "taosmsg.h"
18 19
#include "tcache.h"
#include "tglobal.h"
20

H
Haojun Liao 已提交
21
#include "exception.h"
22
#include "hash.h"
H
Haojun Liao 已提交
23 24 25 26
#include "qAst.h"
#include "qExecutor.h"
#include "qResultbuf.h"
#include "qUtil.h"
H
hjxilinx 已提交
27
#include "query.h"
S
slguan 已提交
28
#include "queryLog.h"
29
#include "tlosertree.h"
30

H
Haojun Liao 已提交
31
#define MAX_ROWS_PER_RESBUF_PAGE  ((1u<<12) - 1)
32 33 34 35 36

/**
 * check if the primary column is load by default, otherwise, the program will
 * forced to load primary column explicitly.
 */
37
#define Q_STATUS_EQUAL(p, s)  (((p) & (s)) != 0)
38 39
#define QUERY_IS_ASC_QUERY(q) (GET_FORWARD_DIRECTION_FACTOR((q)->order.order) == QUERY_ASC_FORWARD_STEP)

40
#define IS_MASTER_SCAN(runtime)        ((runtime)->scanFlag == MASTER_SCAN)
H
hjxilinx 已提交
41
#define IS_REVERSE_SCAN(runtime)       ((runtime)->scanFlag == REVERSE_SCAN)
42
#define SET_MASTER_SCAN_FLAG(runtime)  ((runtime)->scanFlag = MASTER_SCAN)
H
hjxilinx 已提交
43
#define SET_REVERSE_SCAN_FLAG(runtime) ((runtime)->scanFlag = REVERSE_SCAN)
44

H
Haojun Liao 已提交
45
#define GET_QINFO_ADDR(x) ((SQInfo *)((char *)(x)-offsetof(SQInfo, runtimeEnv)))
46

47
#define GET_COL_DATA_POS(query, index, step) ((query)->pos + (index) * (step))
48
#define SWITCH_ORDER(n) (((n) = ((n) == TSDB_ORDER_ASC) ? TSDB_ORDER_DESC : TSDB_ORDER_ASC))
49

H
Haojun Liao 已提交
50 51
#define SDATA_BLOCK_INITIALIZER (SDataBlockInfo) {{0}, 0}

S
TD-1057  
Shengliang Guan 已提交
52 53 54 55 56
#define TIME_WINDOW_COPY(_dst, _src)  do {\
   _dst.skey = _src.skey;\
   _dst.ekey = _src.ekey;\
} while (0);

57
enum {
H
hjxilinx 已提交
58
  // when query starts to execute, this status will set
59 60
  QUERY_NOT_COMPLETED = 0x1u,

H
hjxilinx 已提交
61 62
  /* result output buffer is full, current query is paused.
   * this status is only exist in group-by clause and diff/add/division/multiply/ query.
63
   */
64 65
  QUERY_RESBUF_FULL = 0x2u,

H
hjxilinx 已提交
66 67 68
  /* query is over
   * 1. this status is used in one row result query process, e.g., count/sum/first/last/ avg...etc.
   * 2. when all data within queried time window, it is also denoted as query_completed
69
   */
70
  QUERY_COMPLETED = 0x4u,
71

H
hjxilinx 已提交
72 73
  /* when the result is not completed return to client, this status will be
   * usually used in case of interval query with interpolation option
74
   */
75
  QUERY_OVER = 0x8u,
76
};
77 78

enum {
79 80
  TS_JOIN_TS_EQUAL       = 0,
  TS_JOIN_TS_NOT_EQUALS  = 1,
81 82 83
  TS_JOIN_TAG_NOT_EQUALS = 2,
};

84
typedef struct {
85 86 87 88 89 90
  int32_t     status;       // query status
  TSKEY       lastKey;      // the lastKey value before query executed
  STimeWindow w;            // whole query time window
  STimeWindow curWindow;    // current query window
  int32_t     windowIndex;  // index of active time window result for interval query
  STSCursor   cur;
91 92
} SQueryStatusInfo;

H
Haojun Liao 已提交
93
#if 0
H
Haojun Liao 已提交
94
static UNUSED_FUNC void *u_malloc (size_t __size) {
H
Haojun Liao 已提交
95
  uint32_t v = rand();
H
Haojun Liao 已提交
96 97

  if (v % 1000 <= 0) {
H
Haojun Liao 已提交
98 99
    return NULL;
  } else {
H
Haojun Liao 已提交
100
    return malloc(__size);
H
Haojun Liao 已提交
101
  }
H
Haojun Liao 已提交
102 103
}

H
Haojun Liao 已提交
104 105
static UNUSED_FUNC void* u_calloc(size_t num, size_t __size) {
  uint32_t v = rand();
H
Haojun Liao 已提交
106
  if (v % 1000 <= 0) {
H
Haojun Liao 已提交
107 108 109 110 111 112
    return NULL;
  } else {
    return calloc(num, __size);
  }
}

H
Haojun Liao 已提交
113 114 115 116 117 118 119 120 121
static UNUSED_FUNC void* u_realloc(void* p, size_t __size) {
  uint32_t v = rand();
  if (v % 5 <= 1) {
    return NULL;
  } else {
    return realloc(p, __size);
  }
}

H
Haojun Liao 已提交
122
#define calloc  u_calloc
H
Haojun Liao 已提交
123
#define malloc  u_malloc
H
Haojun Liao 已提交
124
#define realloc u_realloc
H
Haojun Liao 已提交
125
#endif
H
Haojun Liao 已提交
126

127
#define CLEAR_QUERY_STATUS(q, st)   ((q)->status &= (~(st)))
H
Haojun Liao 已提交
128 129 130
#define GET_NUM_OF_TABLEGROUP(q)    taosArrayGetSize((q)->tableqinfoGroupInfo.pGroupList)
#define GET_TABLEGROUP(q, _index)   ((SArray*) taosArrayGetP((q)->tableqinfoGroupInfo.pGroupList, (_index)))

131
static void setQueryStatus(SQuery *pQuery, int8_t status);
H
Haojun Liao 已提交
132
static void finalizeQueryResult(SQueryRuntimeEnv *pRuntimeEnv);
133

134
#define QUERY_IS_INTERVAL_QUERY(_q) ((_q)->interval.interval > 0)
135

136 137
static void getNextTimeWindow(SQuery* pQuery, STimeWindow* tw) {
  int32_t factor = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
138 139 140
  if (pQuery->interval.intervalUnit != 'n' && pQuery->interval.intervalUnit != 'y') {
    tw->skey += pQuery->interval.sliding * factor;
    tw->ekey = tw->skey + pQuery->interval.interval - 1;
141 142 143
    return;
  }

144
  int64_t key = tw->skey / 1000, interval = pQuery->interval.interval;
145 146 147
  if (pQuery->precision == TSDB_TIME_PRECISION_MICRO) {
    key /= 1000;
  }
148
  if (pQuery->interval.intervalUnit == 'y') {
149 150
    interval *= 12;
  }
151 152 153 154 155

  struct tm tm;
  time_t t = (time_t)key;
  localtime_r(&t, &tm);

S
Shengliang Guan 已提交
156
  int mon = (int)(tm.tm_year * 12 + tm.tm_mon + interval * factor);
157 158 159 160
  tm.tm_year = mon / 12;
  tm.tm_mon = mon % 12;
  tw->skey = mktime(&tm) * 1000L;

S
Shengliang Guan 已提交
161
  mon = (int)(mon + interval);
162 163 164 165 166 167 168 169 170 171 172
  tm.tm_year = mon / 12;
  tm.tm_mon = mon % 12;
  tw->ekey = mktime(&tm) * 1000L;

  if (pQuery->precision == TSDB_TIME_PRECISION_MICRO) {
    tw->skey *= 1000L;
    tw->ekey *= 1000L;
  }
  tw->ekey -= 1;
}

173 174
#define SET_STABLE_QUERY_OVER(_q) ((_q)->tableIndex = (int32_t)((_q)->tableqinfoGroupInfo.numOfTables))
#define IS_STASBLE_QUERY_OVER(_q) ((_q)->tableIndex >= (int32_t)((_q)->tableqinfoGroupInfo.numOfTables))
H
Haojun Liao 已提交
175

H
hjxilinx 已提交
176
// todo move to utility
177
static int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *group);
178

179
static void setResultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SResultRow *pResult);
H
Haojun Liao 已提交
180 181
static void setWindowResOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SResultRow *pResult);
static void resetMergeResultBuf(SQueryRuntimeEnv* pRuntimeEnv, SQLFunctionCtx *pCtx, SResultRow *pRow);
182
static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId);
183

184
static void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
H
Haojun Liao 已提交
185
                          SDataStatis *pStatis, void *param, int32_t colIndex, int32_t vgId);
186

187
static void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
H
Haojun Liao 已提交
188
static void destroyTableQueryInfoImpl(STableQueryInfo *pTableQueryInfo);
189 190
static void resetCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
static bool hasMainOutput(SQuery *pQuery);
H
hjxilinx 已提交
191
static void buildTagQueryResult(SQInfo *pQInfo);
192

193
static int32_t setAdditionalInfo(SQInfo *pQInfo, void *pTable, STableQueryInfo *pTableQueryInfo);
H
Haojun Liao 已提交
194
static int32_t flushFromResultBuf(SQueryRuntimeEnv* pRuntimeEnv, SGroupResInfo* pGroupResInfo);
H
Haojun Liao 已提交
195 196
static int32_t checkForQueryBuf(size_t numOfTables);
static void releaseQueryBuf(size_t numOfTables);
197

198
bool doFilterData(SQuery *pQuery, int32_t elemPos) {
199 200
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
201

S
TD-1057  
Shengliang Guan 已提交
202
    char *pElem = (char*)pFilterInfo->pData + pFilterInfo->info.bytes * elemPos;
203

204 205
    bool qualified = false;
    for (int32_t j = 0; j < pFilterInfo->numOfFilters; ++j) {
206
      SColumnFilterElem *pFilterElem = &pFilterInfo->pFilters[j];
207

H
Haojun Liao 已提交
208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224
      bool isnull = isNull(pElem, pFilterInfo->info.type);
      if (isnull) {
        if (pFilterElem->fp == isNull_filter) {
          qualified = true;
          break;
        } else {
          continue;
        }
      } else {
        if (pFilterElem->fp == notNull_filter) {
          qualified = true;
          break;
        } else if (pFilterElem->fp == isNull_filter) {
          continue;
        }
      }

225 226 227 228 229
      if (pFilterElem->fp(pFilterElem, pElem, pElem)) {
        qualified = true;
        break;
      }
    }
230

231 232 233 234
    if (!qualified) {
      return false;
    }
  }
235

236 237 238 239 240 241
  return true;
}

int64_t getNumOfResult(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  bool    hasMainFunction = hasMainOutput(pQuery);
242

243
  int64_t maxOutput = 0;
244
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
245
    int32_t functionId = pQuery->pExpr1[j].base.functionId;
246

247 248 249 250 251 252 253 254
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (hasMainFunction &&
        (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ)) {
      continue;
    }
255

H
Haojun Liao 已提交
256
    SResultRowCellInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
257 258 259 260
    if (pResInfo != NULL && maxOutput < pResInfo->numOfRes) {
      maxOutput = pResInfo->numOfRes;
    }
  }
261

262
  assert(maxOutput >= 0);
263 264 265
  return maxOutput;
}

266 267 268 269 270
/*
 * the value of number of result needs to be update due to offset value upated.
 */
void updateNumOfResult(SQueryRuntimeEnv *pRuntimeEnv, int32_t numOfRes) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
271

272
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
273
    SResultRowCellInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
274

H
Haojun Liao 已提交
275 276 277 278 279
    int16_t functionId = pRuntimeEnv->pCtx[j].functionId;
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ ||
        functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
280

H
Haojun Liao 已提交
281
    assert(pResInfo->numOfRes > numOfRes);
282 283 284 285
    pResInfo->numOfRes = numOfRes;
  }
}

H
Haojun Liao 已提交
286
static UNUSED_FUNC int32_t getGroupResultId(int32_t groupIndex) {
H
Haojun Liao 已提交
287
  int32_t base = 20000000;
288 289 290 291 292 293 294
  return base + (groupIndex * 10000);
}

bool isGroupbyNormalCol(SSqlGroupbyExpr *pGroupbyExpr) {
  if (pGroupbyExpr == NULL || pGroupbyExpr->numOfGroupCols == 0) {
    return false;
  }
295

296
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
297
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
Y
TD-1230  
yihaoDeng 已提交
298
    if (TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
299
      //make sure the normal column locates at the second position if tbname exists in group by clause
300
      if (pGroupbyExpr->numOfGroupCols > 1) {
301
        assert(pColIndex->colIndex > 0);
302
      }
303

304 305 306
      return true;
    }
  }
307

308 309 310 311 312
  return false;
}

int16_t getGroupbyColumnType(SQuery *pQuery, SSqlGroupbyExpr *pGroupbyExpr) {
  assert(pGroupbyExpr != NULL);
313

314 315
  int32_t colId = -2;
  int16_t type = TSDB_DATA_TYPE_NULL;
316

317
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
318
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
Y
TD-1230  
yihaoDeng 已提交
319
    if (TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
320 321 322 323
      colId = pColIndex->colId;
      break;
    }
  }
324

325
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
326 327
    if (colId == pQuery->colList[i].colId) {
      type = pQuery->colList[i].type;
328 329 330
      break;
    }
  }
331

332 333 334 335 336 337
  return type;
}

bool isSelectivityWithTagsQuery(SQuery *pQuery) {
  bool    hasTags = false;
  int32_t numOfSelectivity = 0;
338

339
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
340
    int32_t functId = pQuery->pExpr1[i].base.functionId;
341 342 343 344
    if (functId == TSDB_FUNC_TAG_DUMMY || functId == TSDB_FUNC_TS_DUMMY) {
      hasTags = true;
      continue;
    }
345

346 347 348 349
    if ((aAggs[functId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
      numOfSelectivity++;
    }
  }
350

351 352 353
  if (numOfSelectivity > 0 && hasTags) {
    return true;
  }
354

355 356 357
  return false;
}

358 359
bool isProjQuery(SQuery *pQuery) {
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
360
    int32_t functId = pQuery->pExpr1[i].base.functionId;
361 362 363 364 365 366 367 368
    if (functId != TSDB_FUNC_PRJ && functId != TSDB_FUNC_TAGPRJ) {
      return false;
    }
  }

  return true;
}

H
Haojun Liao 已提交
369
bool isTSCompQuery(SQuery *pQuery) { return pQuery->pExpr1[0].base.functionId == TSDB_FUNC_TS_COMP; }
370

371 372 373
static bool limitResults(SQueryRuntimeEnv* pRuntimeEnv) {
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery* pQuery = pRuntimeEnv->pQuery;
374

375 376
  if ((pQuery->limit.limit > 0) && (pQuery->rec.total + pQuery->rec.rows > pQuery->limit.limit)) {
    pQuery->rec.rows = pQuery->limit.limit - pQuery->rec.total;
377

378
    qDebug("QInfo:%p discard remain data due to result limitation, limit:%"PRId64", current return:%" PRId64 ", total:%"PRId64,
379 380
        pQInfo, pQuery->limit.limit, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
    assert(pQuery->rec.rows >= 0);
381 382 383
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return true;
  }
384

385 386 387 388
  return false;
}

static bool isTopBottomQuery(SQuery *pQuery) {
389
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
390
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
391 392 393
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
394

395 396 397 398
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      return true;
    }
  }
399

400 401 402
  return false;
}

H
Haojun Liao 已提交
403
static bool hasTagValOutput(SQuery* pQuery) {
H
Haojun Liao 已提交
404
  SExprInfo *pExprInfo = &pQuery->pExpr1[0];
H
Haojun Liao 已提交
405 406 407 408
  if (pQuery->numOfOutput == 1 && pExprInfo->base.functionId == TSDB_FUNC_TS_COMP) {
    return true;
  } else {  // set tag value, by which the results are aggregated.
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
H
Haojun Liao 已提交
409
      SExprInfo *pLocalExprInfo = &pQuery->pExpr1[idx];
H
Haojun Liao 已提交
410 411 412 413 414 415 416 417 418 419 420

      // ts_comp column required the tag value for join filter
      if (TSDB_COL_IS_TAG(pLocalExprInfo->base.colInfo.flag)) {
        return true;
      }
    }
  }

  return false;
}

421 422 423 424 425 426 427 428
/**
 * @param pQuery
 * @param col
 * @param pDataBlockInfo
 * @param pStatis
 * @param pColStatis
 * @return
 */
H
Haojun Liao 已提交
429
static bool hasNullValue(SColIndex* pColIndex, SDataStatis *pStatis, SDataStatis **pColStatis) {
H
Haojun Liao 已提交
430
  if (pStatis != NULL && TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
H
Haojun Liao 已提交
431 432
    *pColStatis = &pStatis[pColIndex->colIndex];
    assert((*pColStatis)->colId == pColIndex->colId);
H
hjxilinx 已提交
433 434
  } else {
    *pColStatis = NULL;
435
  }
436

H
Haojun Liao 已提交
437
  if (TSDB_COL_IS_TAG(pColIndex->flag) || TSDB_COL_IS_UD_COL(pColIndex->flag) || pColIndex->colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
H
Haojun Liao 已提交
438 439 440
    return false;
  }

441 442 443
  if ((*pColStatis) != NULL && (*pColStatis)->numOfNull == 0) {
    return false;
  }
444

445 446 447
  return true;
}

H
Haojun Liao 已提交
448
static SResultRow *doPrepareResultRowFromKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, char *pData,
H
Haojun Liao 已提交
449 450
                                             int16_t bytes, bool masterscan, uint64_t uid) {
  SET_RES_WINDOW_KEY(pRuntimeEnv->keyBuf, pData, bytes, uid);
451 452
  int32_t *p1 =
      (int32_t *)taosHashGet(pRuntimeEnv->pResultRowHashTable, pRuntimeEnv->keyBuf, GET_RES_WINDOW_KEY_LEN(bytes));
453 454
  if (p1 != NULL) {
    pWindowResInfo->curIndex = *p1;
455
  } else {
H
Haojun Liao 已提交
456 457 458
    if (!masterscan) {  // not master scan, do not add new timewindow
      return NULL;
    }
459

H
Haojun Liao 已提交
460 461
    // more than the capacity, reallocate the resources
    if (pWindowResInfo->size >= pWindowResInfo->capacity) {
H
Haojun Liao 已提交
462
      int64_t newCapacity = 0;
463
      if (pWindowResInfo->capacity > 10000) {
H
Haojun Liao 已提交
464
        newCapacity = (int64_t)(pWindowResInfo->capacity * 1.25);
465
      } else {
H
Haojun Liao 已提交
466
        newCapacity = (int64_t)(pWindowResInfo->capacity * 1.5);
467 468
      }

469
      char *t = realloc(pWindowResInfo->pResult, (size_t)(newCapacity * POINTER_BYTES));
470 471
      if (t == NULL) {
        longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
472
      }
473

H
Haojun Liao 已提交
474
      pWindowResInfo->pResult = (SResultRow **)t;
475

H
Haojun Liao 已提交
476
      int32_t inc = (int32_t)newCapacity - pWindowResInfo->capacity;
477
      memset(&pWindowResInfo->pResult[pWindowResInfo->capacity], 0, POINTER_BYTES * inc);
478

479 480
      pWindowResInfo->capacity = (int32_t)newCapacity;
    }
481 482 483 484 485 486 487

    SResultRow *pResult = getNewResultRow(pRuntimeEnv->pool);
    pWindowResInfo->pResult[pWindowResInfo->size] = pResult;
    int32_t ret = initResultRow(pResult);
    if (ret != TSDB_CODE_SUCCESS) {
      longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
    }
H
Haojun Liao 已提交
488 489 490

    // add a new result set for a new group
    pWindowResInfo->curIndex = pWindowResInfo->size++;
491 492
    taosHashPut(pRuntimeEnv->pResultRowHashTable, pRuntimeEnv->keyBuf, GET_RES_WINDOW_KEY_LEN(bytes),
                (char *)&pWindowResInfo->curIndex, sizeof(int32_t));
493
  }
494

495 496 497 498 499
  // too many time window in query
  if (pWindowResInfo->size > MAX_INTERVAL_TIME_WINDOW) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_TOO_MANY_TIMEWINDOW);
  }

500
  return getResultRow(pWindowResInfo, pWindowResInfo->curIndex);
501 502 503 504 505
}

// get the correct time window according to the handled timestamp
static STimeWindow getActiveTimeWindow(SWindowResInfo *pWindowResInfo, int64_t ts, SQuery *pQuery) {
  STimeWindow w = {0};
506

507
 if (pWindowResInfo->curIndex == -1) {  // the first window, from the previous stored value
508
    w.skey = pWindowResInfo->prevSKey;
509 510
    if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
      w.ekey = taosTimeAdd(w.skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision);
511
    } else {
512
      w.ekey = w.skey + pQuery->interval.interval - 1;
513
    }
514
  } else {
515
    int32_t slot = curTimeWindowIndex(pWindowResInfo);
516
    SResultRow* pWindowRes = getResultRow(pWindowResInfo, slot);
517
    w = pWindowRes->win;
518
  }
519

520
  if (w.skey > ts || w.ekey < ts) {
521 522 523
    if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
      w.skey = taosTimeTruncate(ts, &pQuery->interval, pQuery->precision);
      w.ekey = taosTimeAdd(w.skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
524 525
    } else {
      int64_t st = w.skey;
526

527
      if (st > ts) {
528
        st -= ((st - ts + pQuery->interval.sliding - 1) / pQuery->interval.sliding) * pQuery->interval.sliding;
529
      }
530

531
      int64_t et = st + pQuery->interval.interval - 1;
532
      if (et < ts) {
533
        st += ((ts - et + pQuery->interval.sliding - 1) / pQuery->interval.sliding) * pQuery->interval.sliding;
534
      }
535

536
      w.skey = st;
537
      w.ekey = w.skey + pQuery->interval.interval - 1;
538
    }
539
  }
540

541 542 543 544 545 546 547
  /*
   * query border check, skey should not be bounded by the query time range, since the value skey will
   * be used as the time window index value. So we only change ekey of time window accordingly.
   */
  if (w.ekey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) {
    w.ekey = pQuery->window.ekey;
  }
548

549 550 551
  return w;
}

H
Haojun Liao 已提交
552
static int32_t addNewWindowResultBuf(SResultRow *pWindowRes, SDiskbasedResultBuf *pResultBuf, int32_t tid,
553
                                     int32_t numOfRowsPerPage) {
554
  if (pWindowRes->pageId != -1) {
555 556
    return 0;
  }
557

558
  tFilePage *pData = NULL;
559

560 561
  // in the first scan, new space needed for results
  int32_t pageId = -1;
H
Haojun Liao 已提交
562
  SIDList list = getDataBufPagesIdList(pResultBuf, tid);
563

H
Haojun Liao 已提交
564
  if (taosArrayGetSize(list) == 0) {
H
Haojun Liao 已提交
565
    pData = getNewDataBuf(pResultBuf, tid, &pageId);
566
  } else {
H
Haojun Liao 已提交
567 568 569
    SPageInfo* pi = getLastPageInfo(list);
    pData = getResBufPage(pResultBuf, pi->pageId);
    pageId = pi->pageId;
570

571
    if (pData->num >= numOfRowsPerPage) {
H
Haojun Liao 已提交
572 573 574
      // release current page first, and prepare the next one
      releaseResBufPageInfo(pResultBuf, pi);

H
Haojun Liao 已提交
575
      pData = getNewDataBuf(pResultBuf, tid, &pageId);
576
      if (pData != NULL) {
577
        assert(pData->num == 0);  // number of elements must be 0 for new allocated buffer
578 579 580
      }
    }
  }
581

582 583 584
  if (pData == NULL) {
    return -1;
  }
585

586
  // set the number of rows in current disk page
587 588 589
  if (pWindowRes->pageId == -1) {  // not allocated yet, allocate new buffer
    pWindowRes->pageId = pageId;
    pWindowRes->rowId = (int32_t)(pData->num++);
H
Haojun Liao 已提交
590

591
    assert(pWindowRes->pageId >= 0);
592
  }
593

594 595 596
  return 0;
}

H
Haojun Liao 已提交
597
static int32_t setWindowOutputBufByKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, SDataBlockInfo* pBockInfo,
598
                                       STimeWindow *win, bool masterscan, bool* newWind) {
599 600
  assert(win->skey <= win->ekey);
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
601

H
Haojun Liao 已提交
602 603
  SResultRow *pResultRow = doPrepareResultRowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&win->skey, TSDB_KEYSIZE, masterscan, pBockInfo->uid);
  if (pResultRow == NULL) {
604 605 606
    *newWind = false;

    return masterscan? -1:0;
607
  }
608

609
  *newWind = true;
H
Haojun Liao 已提交
610

611
  // not assign result buffer yet, add new result buffer
H
Haojun Liao 已提交
612 613
  if (pResultRow->pageId == -1) {
    int32_t ret = addNewWindowResultBuf(pResultRow, pResultBuf, pBockInfo->tid, pRuntimeEnv->numOfRowsPerPage);
H
Haojun Liao 已提交
614
    if (ret != TSDB_CODE_SUCCESS) {
615 616 617
      return -1;
    }
  }
618

619
  // set time window for current result
H
Haojun Liao 已提交
620 621
  pResultRow->win = (*win);
  setWindowResOutputBufInitCtx(pRuntimeEnv, pResultRow);
622 623 624
  return TSDB_CODE_SUCCESS;
}

625
static bool getTimeWindowResStatus(SWindowResInfo *pWindowResInfo, int32_t slot) {
626
  assert(slot >= 0 && slot < pWindowResInfo->size);
627
  return pWindowResInfo->pResult[slot]->closed;
628 629
}

H
Haojun Liao 已提交
630
static FORCE_INLINE int32_t getForwardStepsInBlock(int32_t numOfRows, __block_search_fn_t searchFn, TSKEY ekey, int16_t pos,
631 632
                                      int16_t order, int64_t *pData) {
  int32_t forwardStep = 0;
633

H
Haojun Liao 已提交
634 635 636 637
  if (order == TSDB_ORDER_ASC) {
    int32_t end = searchFn((char*) &pData[pos], numOfRows - pos, ekey, order);
    if (end >= 0) {
      forwardStep = end;
638

H
Haojun Liao 已提交
639 640 641 642 643 644 645 646 647 648 649 650
      if (pData[end + pos] == ekey) {
        forwardStep += 1;
      }
    }
  } else {
    int32_t end = searchFn((char *)pData, pos + 1, ekey, order);
    if (end >= 0) {
      forwardStep = pos - end;

      if (pData[end] == ekey) {
        forwardStep += 1;
      }
651 652
    }
  }
653

H
Haojun Liao 已提交
654
  assert(forwardStep > 0);
655 656 657 658 659 660
  return forwardStep;
}

/**
 * NOTE: the query status only set for the first scan of master scan.
 */
661
static int32_t doCheckQueryCompleted(SQueryRuntimeEnv *pRuntimeEnv, TSKEY lastKey, SWindowResInfo *pWindowResInfo) {
662
  SQuery *pQuery = pRuntimeEnv->pQuery;
663 664 665 666 667 668 669
  if (pRuntimeEnv->scanFlag != MASTER_SCAN) {
    return pWindowResInfo->size;
  }

  // for group by normal column query, close time window and return.
  if (!QUERY_IS_INTERVAL_QUERY(pQuery)) {
    closeAllTimeWindow(pWindowResInfo);
670
    return pWindowResInfo->size;
671
  }
672

673
  // no qualified results exist, abort check
674
  int32_t numOfClosed = 0;
675

676
  if (pWindowResInfo->size == 0) {
677
    return pWindowResInfo->size;
678
  }
679

680
  // query completed
H
hjxilinx 已提交
681 682
  if ((lastKey >= pQuery->current->win.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (lastKey <= pQuery->current->win.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
683
    closeAllTimeWindow(pWindowResInfo);
684

685 686 687 688
    pWindowResInfo->curIndex = pWindowResInfo->size - 1;
    setQueryStatus(pQuery, QUERY_COMPLETED | QUERY_RESBUF_FULL);
  } else {  // set the current index to be the last unclosed window
    int32_t i = 0;
689
    int64_t skey = TSKEY_INITIAL_VAL;
690

691
    for (i = 0; i < pWindowResInfo->size; ++i) {
H
Haojun Liao 已提交
692
      SResultRow *pResult = pWindowResInfo->pResult[i];
693
      if (pResult->closed) {
694
        numOfClosed += 1;
695 696
        continue;
      }
697

698
      TSKEY ekey = pResult->win.ekey;
699
      if ((ekey <= lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
700
          (pResult->win.skey >= lastKey && !QUERY_IS_ASC_QUERY(pQuery))) {
701 702
        closeTimeWindow(pWindowResInfo, i);
      } else {
703
        skey = pResult->win.skey;
704 705 706
        break;
      }
    }
707

708
    // all windows are closed, set the last one to be the skey
709
    if (skey == TSKEY_INITIAL_VAL) {
710 711 712 713 714
      assert(i == pWindowResInfo->size);
      pWindowResInfo->curIndex = pWindowResInfo->size - 1;
    } else {
      pWindowResInfo->curIndex = i;
    }
715

716
    pWindowResInfo->prevSKey = pWindowResInfo->pResult[pWindowResInfo->curIndex]->win.skey;
717

718 719
    // the number of completed slots are larger than the threshold, return current generated results to client.
    if (numOfClosed > pWindowResInfo->threshold) {
720
      qDebug("QInfo:%p total result window:%d closed:%d, reached the output threshold %d, return",
721
          GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size, numOfClosed, pQuery->rec.threshold);
722

723
      setQueryStatus(pQuery, QUERY_RESBUF_FULL);
724
    } else {
725
      qDebug("QInfo:%p total result window:%d already closed:%d", GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size,
726
             numOfClosed);
727 728
    }
  }
729

730 731 732 733 734
  // output has reached the limitation, set query completed
  if (pQuery->limit.limit > 0 && (pQuery->limit.limit + pQuery->limit.offset) <= numOfClosed &&
      pRuntimeEnv->scanFlag == MASTER_SCAN) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
735

736
  assert(pWindowResInfo->prevSKey != TSKEY_INITIAL_VAL);
737
  return numOfClosed;
738 739 740
}

static int32_t getNumOfRowsInTimeWindow(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo, TSKEY *pPrimaryColumn,
H
hjxilinx 已提交
741
                                        int32_t startPos, TSKEY ekey, __block_search_fn_t searchFn, bool updateLastKey) {
742
  assert(startPos >= 0 && startPos < pDataBlockInfo->rows);
743

H
Haojun Liao 已提交
744
  int32_t num   = -1;
745
  int32_t order = pQuery->order.order;
H
Haojun Liao 已提交
746
  int32_t step  = GET_FORWARD_DIRECTION_FACTOR(order);
747

H
hjxilinx 已提交
748
  STableQueryInfo* item = pQuery->current;
749

750 751
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (ekey < pDataBlockInfo->window.ekey) {
752
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
H
Haojun Liao 已提交
753 754
      if (updateLastKey) { // update the last key
        item->lastKey = pPrimaryColumn[startPos + (num - 1)] + step;
755 756
      }
    } else {
757
      num = pDataBlockInfo->rows - startPos;
758
      if (updateLastKey) {
H
hjxilinx 已提交
759
        item->lastKey = pDataBlockInfo->window.ekey + step;
760 761 762 763
      }
    }
  } else {  // desc
    if (ekey > pDataBlockInfo->window.skey) {
764
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
H
Haojun Liao 已提交
765 766
      if (updateLastKey) {  // update the last key
        item->lastKey = pPrimaryColumn[startPos - (num - 1)] + step;
767 768 769 770
      }
    } else {
      num = startPos + 1;
      if (updateLastKey) {
H
hjxilinx 已提交
771
        item->lastKey = pDataBlockInfo->window.skey + step;
772 773 774
      }
    }
  }
775

H
Haojun Liao 已提交
776
  assert(num > 0);
777 778 779
  return num;
}

H
Haojun Liao 已提交
780 781
static void doBlockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, bool closed, STimeWindow *pWin, int32_t offset,
                                      int32_t forwardStep, TSKEY *tsCol, int32_t numOfTotal) {
782 783
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
784

H
Haojun Liao 已提交
785 786
  bool hasPrev = pCtx[0].preAggVals.isSet;

787
  if (IS_MASTER_SCAN(pRuntimeEnv) || closed) {
788 789 790 791
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
      pCtx[k].nStartQueryTimestamp = pWin->skey;
      pCtx[k].size = forwardStep;
      pCtx[k].startOffset = (QUERY_IS_ASC_QUERY(pQuery)) ? offset : offset - (forwardStep - 1);
792

H
Haojun Liao 已提交
793
      int32_t functionId = pQuery->pExpr1[k].base.functionId;
794
      if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
H
Haojun Liao 已提交
795
        pCtx[k].ptsList = &tsCol[pCtx[k].startOffset];
796
      }
797

798
      // not a whole block involved in query processing, statistics data can not be used
H
Haojun Liao 已提交
799 800 801 802
      // NOTE: the original value of isSet have been changed here
      if (pCtx[k].preAggVals.isSet && forwardStep < numOfTotal) {
        pCtx[k].preAggVals.isSet = false;
      }
H
Haojun Liao 已提交
803

804 805 806
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunction(&pCtx[k]);
      }
H
Haojun Liao 已提交
807 808 809

      // restore it
      pCtx[k].preAggVals.isSet = hasPrev;
810 811 812 813
    }
  }
}

814
static void doRowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, bool closed, STimeWindow *pWin, int32_t offset) {
815 816
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
817

818
  if (IS_MASTER_SCAN(pRuntimeEnv) || closed) {
819 820
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
      pCtx[k].nStartQueryTimestamp = pWin->skey;
821

H
Haojun Liao 已提交
822
      int32_t functionId = pQuery->pExpr1[k].base.functionId;
823 824 825
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunctionF(&pCtx[k], offset);
      }
826 827 828 829
    }
  }
}

H
Haojun Liao 已提交
830 831
static int32_t getNextQualifiedWindow(SQueryRuntimeEnv *pRuntimeEnv, STimeWindow *pNext, SDataBlockInfo *pDataBlockInfo,
    TSKEY *primaryKeys, __block_search_fn_t searchFn, int32_t prevPosition) {
832
  SQuery *pQuery = pRuntimeEnv->pQuery;
833

H
Haojun Liao 已提交
834
  getNextTimeWindow(pQuery, pNext);
835

H
Haojun Liao 已提交
836
  // next time window is not in current block
H
Haojun Liao 已提交
837 838
  if ((pNext->skey > pDataBlockInfo->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (pNext->ekey < pDataBlockInfo->window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
H
Haojun Liao 已提交
839 840
    return -1;
  }
841

H
Haojun Liao 已提交
842 843
  TSKEY startKey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
844
    startKey = pNext->skey;
H
Haojun Liao 已提交
845 846
    if (startKey < pQuery->window.skey) {
      startKey = pQuery->window.skey;
847
    }
H
Haojun Liao 已提交
848
  } else {
H
Haojun Liao 已提交
849
    startKey = pNext->ekey;
H
Haojun Liao 已提交
850 851
    if (startKey > pQuery->window.skey) {
      startKey = pQuery->window.skey;
852
    }
H
Haojun Liao 已提交
853
  }
854

H
Haojun Liao 已提交
855
  int32_t startPos = 0;
H
Haojun Liao 已提交
856

H
Haojun Liao 已提交
857
  // tumbling time window query, a special case of sliding time window query
858
  if (pQuery->interval.sliding == pQuery->interval.interval && prevPosition != -1) {
H
Haojun Liao 已提交
859 860 861
    int32_t factor = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
    startPos = prevPosition + factor;
  } else {
H
Haojun Liao 已提交
862
    if (startKey <= pDataBlockInfo->window.skey && QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
863
      startPos = 0;
H
Haojun Liao 已提交
864
    } else if (startKey >= pDataBlockInfo->window.ekey && !QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
865 866 867 868
      startPos = pDataBlockInfo->rows - 1;
    } else {
      startPos = searchFn((char *)primaryKeys, pDataBlockInfo->rows, startKey, pQuery->order.order);
    }
H
Haojun Liao 已提交
869
  }
870

H
Haojun Liao 已提交
871 872 873 874
  /*
   * This time window does not cover any data, try next time window,
   * this case may happen when the time window is too small
   */
H
Haojun Liao 已提交
875 876 877
  if (primaryKeys == NULL) {
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      assert(pDataBlockInfo->window.skey <= pNext->ekey);
878
    } else {
H
Haojun Liao 已提交
879
      assert(pDataBlockInfo->window.ekey >= pNext->skey);
880
    }
H
Haojun Liao 已提交
881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899
  } else {
    if (QUERY_IS_ASC_QUERY(pQuery) && primaryKeys[startPos] > pNext->ekey) {
      TSKEY next = primaryKeys[startPos];
      if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
        pNext->skey = taosTimeTruncate(next, &pQuery->interval, pQuery->precision);
        pNext->ekey = taosTimeAdd(pNext->skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
      } else {
        pNext->ekey += ((next - pNext->ekey + pQuery->interval.sliding - 1)/pQuery->interval.sliding) * pQuery->interval.sliding;
        pNext->skey = pNext->ekey - pQuery->interval.interval + 1;
      }
    } else if ((!QUERY_IS_ASC_QUERY(pQuery)) && primaryKeys[startPos] < pNext->skey) {
      TSKEY next = primaryKeys[startPos];
      if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
        pNext->skey = taosTimeTruncate(next, &pQuery->interval, pQuery->precision);
        pNext->ekey = taosTimeAdd(pNext->skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
      } else {
        pNext->skey -= ((pNext->skey - next + pQuery->interval.sliding - 1) / pQuery->interval.sliding) * pQuery->interval.sliding;
        pNext->ekey = pNext->skey + pQuery->interval.interval - 1;
      }
900
    }
901
  }
902

H
Haojun Liao 已提交
903
  return startPos;
904 905
}

H
Haojun Liao 已提交
906
static FORCE_INLINE TSKEY reviseWindowEkey(SQuery *pQuery, STimeWindow *pWindow) {
907 908 909 910 911 912 913 914 915 916 917 918
  TSKEY ekey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    ekey = pWindow->ekey;
    if (ekey > pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  } else {
    ekey = pWindow->skey;
    if (ekey < pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  }
919

920 921 922
  return ekey;
}

H
hjxilinx 已提交
923 924
//todo binary search
static void* getDataBlockImpl(SArray* pDataBlock, int32_t colId) {
S
TD-1057  
Shengliang Guan 已提交
925
  int32_t numOfCols = (int32_t)taosArrayGetSize(pDataBlock);
926

H
hjxilinx 已提交
927 928 929 930 931 932
  for (int32_t i = 0; i < numOfCols; ++i) {
    SColumnInfoData *p = taosArrayGet(pDataBlock, i);
    if (colId == p->info.colId) {
      return p->pData;
    }
  }
933

H
hjxilinx 已提交
934 935 936 937
  return NULL;
}

static char *getDataBlock(SQueryRuntimeEnv *pRuntimeEnv, SArithmeticSupport *sas, int32_t col, int32_t size,
938
                    SArray *pDataBlock) {
dengyihao's avatar
dengyihao 已提交
939 940 941
  if (pDataBlock == NULL) {
    return NULL;
  }
942

H
Haojun Liao 已提交
943
  char *dataBlock = NULL;
H
Haojun Liao 已提交
944
  SQuery *pQuery = pRuntimeEnv->pQuery;
945

H
Haojun Liao 已提交
946
  int32_t functionId = pQuery->pExpr1[col].base.functionId;
947
  if (functionId == TSDB_FUNC_ARITHM) {
H
Haojun Liao 已提交
948
    sas->pArithExpr = &pQuery->pExpr1[col];
949

950 951 952 953
    sas->offset  = 0;
    sas->colList = pQuery->colList;
    sas->numOfCols = pQuery->numOfCols;
    sas->data    = calloc(pQuery->numOfCols, POINTER_BYTES);
954

H
Haojun Liao 已提交
955 956 957 958
    if (sas->data == NULL) {
      longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
    }

959
    // here the pQuery->colList and sas->colList are identical
S
TD-1057  
Shengliang Guan 已提交
960
    int32_t numOfCols = (int32_t)taosArrayGetSize(pDataBlock);
961
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
962
      SColumnInfo *pColMsg = &pQuery->colList[i];
963

964 965 966 967 968 969 970 971
      dataBlock = NULL;
      for (int32_t k = 0; k < numOfCols; ++k) {  //todo refactor
        SColumnInfoData *p = taosArrayGet(pDataBlock, k);
        if (pColMsg->colId == p->info.colId) {
          dataBlock = p->pData;
          break;
        }
      }
972

973
      assert(dataBlock != NULL);
974
      sas->data[i] = dataBlock;  // start from the offset
975
    }
976

977
  } else {  // other type of query function
H
Haojun Liao 已提交
978
    SColIndex *pCol = &pQuery->pExpr1[col].base.colInfo;
H
Haojun Liao 已提交
979
    if (TSDB_COL_IS_NORMAL_COL(pCol->flag)) {
H
Haojun Liao 已提交
980
      SColIndex* pColIndex = &pQuery->pExpr1[col].base.colInfo;
H
Haojun Liao 已提交
981 982 983 984
      SColumnInfoData *p = taosArrayGet(pDataBlock, pColIndex->colIndex);
      assert(p->info.colId == pColIndex->colId);

      dataBlock = p->pData;
H
Haojun Liao 已提交
985 986
    } else {
      dataBlock = NULL;
987 988
    }
  }
989

990 991 992 993
  return dataBlock;
}

/**
H
Haojun Liao 已提交
994
 * todo set the last value for pQueryTableInfo as in rowwiseapplyfunctions
995 996
 * @param pRuntimeEnv
 * @param forwardStep
997
 * @param tsCols
998 999 1000 1001 1002
 * @param pFields
 * @param isDiskFileBlock
 * @return                  the incremental number of output value, so it maybe 0 for fixed number of query,
 *                          such as count/min/max etc.
 */
1003
static void blockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis,
1004 1005
                                       SDataBlockInfo *pDataBlockInfo, SWindowResInfo *pWindowResInfo,
                                       __block_search_fn_t searchFn, SArray *pDataBlock) {
1006
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1007 1008
  bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);

1009 1010
  SQuery *pQuery = pRuntimeEnv->pQuery;
  TSKEY  *tsCols = NULL;
1011
  if (pDataBlock != NULL) {
1012
    SColumnInfoData* pColInfo = taosArrayGet(pDataBlock, 0);
1013
    tsCols = (TSKEY *)(pColInfo->pData);
1014
  }
1015

1016
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
H
Haojun Liao 已提交
1017 1018 1019
  if (sasArray == NULL) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }
1020

H
Haojun Liao 已提交
1021
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
1022
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
1023
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
H
Haojun Liao 已提交
1024
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k, pQInfo->vgId);
1025
  }
1026

1027
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
1028
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
H
Haojun Liao 已提交
1029
    TSKEY ts = TSKEY_INITIAL_VAL;
1030

H
Haojun Liao 已提交
1031 1032 1033 1034 1035 1036 1037 1038
    if (tsCols == NULL) {
      ts = QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->window.skey:pDataBlockInfo->window.ekey;
    } else {
      int32_t offset = GET_COL_DATA_POS(pQuery, 0, step);
      ts = tsCols[offset];
    }

    bool        hasTimeWindow = false;
1039
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
H
Haojun Liao 已提交
1040
    if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo, &win, masterScan, &hasTimeWindow) != TSDB_CODE_SUCCESS) {
S
TD-1848  
Shengliang Guan 已提交
1041
      tfree(sasArray);
H
hjxilinx 已提交
1042
      return;
1043
    }
1044

H
Haojun Liao 已提交
1045 1046 1047
    int32_t forwardStep = 0;
    int32_t startPos = pQuery->pos;

1048
    if (hasTimeWindow) {
H
Haojun Liao 已提交
1049
      TSKEY ekey = reviseWindowEkey(pQuery, &win);
H
Haojun Liao 已提交
1050
      forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, pQuery->pos, ekey, searchFn, true);
1051

1052
      bool pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
H
Haojun Liao 已提交
1053
      doBlockwiseApplyFunctions(pRuntimeEnv, pStatus, &win, startPos, forwardStep, tsCols, pDataBlockInfo->rows);
1054
    }
1055

1056 1057
    int32_t     index = pWindowResInfo->curIndex;
    STimeWindow nextWin = win;
1058

1059
    while (1) {
H
Haojun Liao 已提交
1060 1061
      int32_t prevEndPos = (forwardStep - 1) * step + startPos;
      startPos = getNextQualifiedWindow(pRuntimeEnv, &nextWin, pDataBlockInfo, tsCols, searchFn, prevEndPos);
1062 1063 1064
      if (startPos < 0) {
        break;
      }
1065

1066
      // null data, failed to allocate more memory buffer
H
Haojun Liao 已提交
1067
      hasTimeWindow = false;
H
Haojun Liao 已提交
1068
      if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo, &nextWin, masterScan, &hasTimeWindow) != TSDB_CODE_SUCCESS) {
1069 1070
        break;
      }
1071

1072 1073 1074 1075 1076
      if (!hasTimeWindow) {
        continue;
      }

      TSKEY ekey = reviseWindowEkey(pQuery, &nextWin);
H
Haojun Liao 已提交
1077
      forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, startPos, ekey, searchFn, true);
1078

1079 1080
      bool closed = getTimeWindowResStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
      doBlockwiseApplyFunctions(pRuntimeEnv, closed, &nextWin, startPos, forwardStep, tsCols, pDataBlockInfo->rows);
1081
    }
1082

1083 1084 1085 1086 1087 1088 1089
    pWindowResInfo->curIndex = index;
  } else {
    /*
     * the sqlfunctionCtx parameters should be set done before all functions are invoked,
     * since the selectivity + tag_prj query needs all parameters been set done.
     * tag_prj function are changed to be TSDB_FUNC_TAG_DUMMY
     */
1090
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
Haojun Liao 已提交
1091
      int32_t functionId = pQuery->pExpr1[k].base.functionId;
1092 1093 1094 1095 1096
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunction(&pCtx[k]);
      }
    }
  }
1097

1098
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1099
    if (pQuery->pExpr1[i].base.functionId != TSDB_FUNC_ARITHM) {
1100 1101
      continue;
    }
1102

S
TD-1848  
Shengliang Guan 已提交
1103
    tfree(sasArray[i].data);
1104
  }
1105

S
TD-1848  
Shengliang Guan 已提交
1106
  tfree(sasArray);
1107 1108
}

1109
static int32_t setGroupResultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, char *pData, int16_t type, int16_t bytes, int32_t groupIndex) {
1110 1111 1112
  if (isNull(pData, type)) {  // ignore the null value
    return -1;
  }
1113

1114
  int32_t GROUPRESULTID = 1;
1115

1116
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
1117

1118
  // not assign result buffer yet, add new result buffer
1119 1120 1121 1122 1123 1124 1125
  char* d = pData;
  int16_t len = bytes;
  if (type == TSDB_DATA_TYPE_BINARY||type == TSDB_DATA_TYPE_NCHAR) {
    d = varDataVal(pData);
    len = varDataLen(pData);
  } else if (type == TSDB_DATA_TYPE_FLOAT || type == TSDB_DATA_TYPE_DOUBLE) {
    SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
H
Haojun Liao 已提交
1126
    qError("QInfo:%p group by not supported on double/float columns, abort", pQInfo);
1127 1128 1129 1130

    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_APP_ERROR);
  }

1131
  uint64_t uid = groupIndex; // uid is always set to be 0.
H
Haojun Liao 已提交
1132 1133
  SResultRow *pResultRow = doPrepareResultRowFromKey(pRuntimeEnv, &pRuntimeEnv->windowResInfo, d, len, true, uid);
  if (pResultRow == NULL) {
1134 1135 1136 1137
    return -1;
  }

  int64_t v = -1;
1138 1139 1140 1141 1142 1143 1144 1145
  switch(type) {
    case TSDB_DATA_TYPE_BOOL:
    case TSDB_DATA_TYPE_TINYINT:  v = GET_INT8_VAL(pData);  break;
    case TSDB_DATA_TYPE_SMALLINT: v = GET_INT16_VAL(pData); break;
    case TSDB_DATA_TYPE_INT:      v = GET_INT32_VAL(pData); break;
    case TSDB_DATA_TYPE_BIGINT:   v = GET_INT64_VAL(pData); break;
  }

1146
  if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
H
Haojun Liao 已提交
1147 1148
    pResultRow->key = malloc(varDataTLen(pData));
    varDataCopy(pResultRow->key, pData);
1149
  } else {
H
Haojun Liao 已提交
1150 1151
    pResultRow->win.skey = v;
    pResultRow->win.ekey = v;
1152
  }
1153

H
Haojun Liao 已提交
1154 1155
  if (pResultRow->pageId == -1) {
    int32_t ret = addNewWindowResultBuf(pResultRow, pResultBuf, GROUPRESULTID, pRuntimeEnv->numOfRowsPerPage);
1156 1157 1158 1159
    if (ret != 0) {
      return -1;
    }
  }
1160

H
Haojun Liao 已提交
1161
  setResultOutputBuf(pRuntimeEnv, pResultRow);
1162 1163 1164 1165
  initCtxOutputBuf(pRuntimeEnv);
  return TSDB_CODE_SUCCESS;
}

1166
static char *getGroupbyColumnData(SQuery *pQuery, int16_t *type, int16_t *bytes, SArray* pDataBlock) {
1167
  SSqlGroupbyExpr *pGroupbyExpr = pQuery->pGroupbyExpr;
1168

1169
  for (int32_t k = 0; k < pGroupbyExpr->numOfGroupCols; ++k) {
1170
    SColIndex* pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, k);
Y
TD-1230  
yihaoDeng 已提交
1171
    if (TSDB_COL_IS_TAG(pColIndex->flag)) {
1172 1173
      continue;
    }
1174

1175
    int16_t colIndex = -1;
1176
    int32_t colId = pColIndex->colId;
1177

1178
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
1179
      if (pQuery->colList[i].colId == colId) {
1180 1181 1182 1183
        colIndex = i;
        break;
      }
    }
1184

1185
    assert(colIndex >= 0 && colIndex < pQuery->numOfCols);
1186

1187 1188
    *type = pQuery->colList[colIndex].type;
    *bytes = pQuery->colList[colIndex].bytes;
1189
    /*
1190 1191 1192
     *  the colIndex is acquired from the first tables of all qualified tables in this vnode during query prepare
     * stage, the remain tables may not have the required column in cache actually. So, the validation of required
     * column in cache with the corresponding schema is reinforced.
1193
     */
S
TD-1057  
Shengliang Guan 已提交
1194
    int32_t numOfCols = (int32_t)taosArrayGetSize(pDataBlock);
1195

1196 1197 1198 1199 1200 1201
    for (int32_t i = 0; i < numOfCols; ++i) {
      SColumnInfoData *p = taosArrayGet(pDataBlock, i);
      if (pColIndex->colId == p->info.colId) {
        return p->pData;
      }
    }
1202
  }
1203

1204
  return NULL;
1205 1206 1207 1208
}

static int32_t doTSJoinFilter(SQueryRuntimeEnv *pRuntimeEnv, int32_t offset) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
1209

1210 1211
  STSElem         elem = tsBufGetElem(pRuntimeEnv->pTSBuf);
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1212

1213
  // compare tag first
H
Haojun Liao 已提交
1214
  if (tVariantCompare(&pCtx[0].tag, elem.tag) != 0) {
1215 1216
    return TS_JOIN_TAG_NOT_EQUALS;
  }
1217

S
TD-1057  
Shengliang Guan 已提交
1218
  TSKEY key = *(TSKEY *)((char*)pCtx[0].aInputElemBuf + TSDB_KEYSIZE * offset);
1219 1220

#if defined(_DEBUG_VIEW)
1221
  printf("elem in comp ts file:%" PRId64 ", key:%" PRId64 ", tag:%"PRIu64", query order:%d, ts order:%d, traverse:%d, index:%d\n",
H
Haojun Liao 已提交
1222
         elem.ts, key, elem.tag.i64Key, pQuery->order.order, pRuntimeEnv->pTSBuf->tsOrder,
1223 1224
         pRuntimeEnv->pTSBuf->cur.order, pRuntimeEnv->pTSBuf->cur.tsIndex);
#endif
1225

1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (key < elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key > elem.ts) {
      assert(false);
    }
  } else {
    if (key > elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key < elem.ts) {
      assert(false);
    }
  }
1239

1240 1241 1242 1243
  return TS_JOIN_TS_EQUAL;
}

static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId) {
H
Haojun Liao 已提交
1244
  SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx);
H
hjxilinx 已提交
1245
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
1246 1247 1248 1249 1250

  // in case of timestamp column, always generated results.
  if (functionId == TSDB_FUNC_TS) {
    return true;
  }
1251

1252 1253 1254
  if (pResInfo->complete || functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TS_DUMMY) {
    return false;
  }
1255

1256
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_FIRST) {
H
hjxilinx 已提交
1257 1258
    return QUERY_IS_ASC_QUERY(pQuery);
  }
1259 1260 1261 1262 1263 1264

  // todo add comments
  if ((functionId == TSDB_FUNC_LAST_DST || functionId == TSDB_FUNC_LAST)) {
    return pCtx->param[0].i64Key == pQuery->order.order;
  }

1265
  // in the supplementary scan, only the following functions need to be executed
H
Haojun Liao 已提交
1266
  if (IS_REVERSE_SCAN(pRuntimeEnv)) {
1267 1268
    return false;
  }
1269

1270 1271 1272
  return true;
}

1273 1274
static void rowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis, SDataBlockInfo *pDataBlockInfo,
    SWindowResInfo *pWindowResInfo, SArray *pDataBlock) {
1275
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1276
  bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);
1277

1278
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1279
  STableQueryInfo* item = pQuery->current;
H
Haojun Liao 已提交
1280 1281 1282 1283

  SColumnInfoData* pColumnInfoData = (SColumnInfoData *)taosArrayGet(pDataBlock, 0);

  TSKEY  *tsCols = (pColumnInfoData->info.type == TSDB_DATA_TYPE_TIMESTAMP)? (TSKEY*) pColumnInfoData->pData:NULL;
H
Haojun Liao 已提交
1284 1285
  bool    groupbyColumnValue = pRuntimeEnv->groupbyNormalCol;

1286
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
H
Haojun Liao 已提交
1287 1288 1289
  if (sasArray == NULL) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }
1290

1291 1292
  int16_t type = 0;
  int16_t bytes = 0;
1293

1294
  char *groupbyColumnData = NULL;
H
Haojun Liao 已提交
1295
  if (groupbyColumnValue) {
1296
    groupbyColumnData = getGroupbyColumnData(pQuery, &type, &bytes, pDataBlock);
1297
  }
1298

H
Haojun Liao 已提交
1299
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
1300
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
1301
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
H
Haojun Liao 已提交
1302
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k, pQInfo->vgId);
1303
  }
1304

1305 1306
  // set the input column data
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
1307
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
H
hjxilinx 已提交
1308 1309
    pFilterInfo->pData = getDataBlockImpl(pDataBlock, pFilterInfo->info.colId);
    assert(pFilterInfo->pData != NULL);
1310
  }
1311

1312
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1313

1314 1315 1316
  // from top to bottom in desc
  // from bottom to top in asc order
  if (pRuntimeEnv->pTSBuf != NULL) {
1317
    qDebug("QInfo:%p process data rows, numOfRows:%d, query order:%d, ts comp order:%d", pQInfo, pDataBlockInfo->rows,
1318 1319
           pQuery->order.order, pRuntimeEnv->pTSBuf->cur.order);
  }
1320

1321
  int32_t j = 0;
H
hjxilinx 已提交
1322
  int32_t offset = -1;
1323

1324
  for (j = 0; j < pDataBlockInfo->rows; ++j) {
H
hjxilinx 已提交
1325
    offset = GET_COL_DATA_POS(pQuery, j, step);
1326

1327 1328 1329 1330 1331 1332 1333 1334 1335 1336
    if (pRuntimeEnv->pTSBuf != NULL) {
      int32_t r = doTSJoinFilter(pRuntimeEnv, offset);
      if (r == TS_JOIN_TAG_NOT_EQUALS) {
        break;
      } else if (r == TS_JOIN_TS_NOT_EQUALS) {
        continue;
      } else {
        assert(r == TS_JOIN_TS_EQUAL);
      }
    }
1337

1338
    if (pQuery->numOfFilterCols > 0 && (!doFilterData(pQuery, offset))) {
1339 1340
      continue;
    }
1341

1342
    // interval window query, decide the time window according to the primary timestamp
H
Haojun Liao 已提交
1343
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1344
      int64_t     ts = tsCols[offset];
1345
      STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
1346

1347
      bool hasTimeWindow = false;
H
Haojun Liao 已提交
1348
      int32_t ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo, &win, masterScan, &hasTimeWindow);
1349 1350 1351
      if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
        continue;
      }
1352

1353 1354 1355 1356
      if (!hasTimeWindow) {
        continue;
      }

1357 1358
      bool closed = getTimeWindowResStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
      doRowwiseApplyFunctions(pRuntimeEnv, closed, &win, offset);
1359

1360 1361
      STimeWindow nextWin = win;
      int32_t     index = pWindowResInfo->curIndex;
1362

1363
      while (1) {
H
Haojun Liao 已提交
1364
        getNextTimeWindow(pQuery, &nextWin);
1365
        if ((nextWin.skey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
H
Haojun Liao 已提交
1366
            (nextWin.ekey < pQuery->window.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
1367 1368
          break;
        }
1369

1370 1371 1372
        if (ts < nextWin.skey || ts > nextWin.ekey) {
          break;
        }
1373

1374
        // null data, failed to allocate more memory buffer
H
Haojun Liao 已提交
1375
        hasTimeWindow = false;
H
Haojun Liao 已提交
1376
        if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo, &nextWin, masterScan, &hasTimeWindow) != TSDB_CODE_SUCCESS) {
1377 1378
          break;
        }
1379

1380
        if (hasTimeWindow) {
1381 1382
          closed = getTimeWindowResStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
          doRowwiseApplyFunctions(pRuntimeEnv, closed, &nextWin, offset);
1383
        }
1384
      }
1385

1386 1387 1388
      pWindowResInfo->curIndex = index;
    } else {  // other queries
      // decide which group this rows belongs to according to current state value
H
Haojun Liao 已提交
1389
      if (groupbyColumnValue) {
H
hjxilinx 已提交
1390
        char *val = groupbyColumnData + bytes * offset;
1391

1392
        int32_t ret = setGroupResultOutputBuf(pRuntimeEnv, val, type, bytes, item->groupIndex);
1393 1394 1395 1396
        if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
          continue;
        }
      }
1397

1398
      for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
Haojun Liao 已提交
1399
        int32_t functionId = pQuery->pExpr1[k].base.functionId;
1400 1401 1402 1403 1404
        if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
          aAggs[functionId].xFunctionF(&pCtx[k], offset);
        }
      }
    }
1405

1406 1407 1408
    if (pRuntimeEnv->pTSBuf != NULL) {
      // if timestamp filter list is empty, quit current query
      if (!tsBufNextPos(pRuntimeEnv->pTSBuf)) {
H
hjxilinx 已提交
1409
        setQueryStatus(pQuery, QUERY_COMPLETED);
1410 1411 1412 1413
        break;
      }
    }
  }
H
Haojun Liao 已提交
1414 1415 1416 1417 1418 1419 1420 1421

  assert(offset >= 0);
  if (tsCols != NULL) {
    item->lastKey = tsCols[offset] + step;
  } else {
    item->lastKey = (QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->window.ekey:pDataBlockInfo->window.skey) + step;
  }

H
Haojun Liao 已提交
1422 1423 1424
  if (pRuntimeEnv->pTSBuf != NULL) {
    item->cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);
  }
H
Haojun Liao 已提交
1425

1426 1427
  // todo refactor: extract method
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1428
    if (pQuery->pExpr1[i].base.functionId != TSDB_FUNC_ARITHM) {
1429 1430
      continue;
    }
1431

S
TD-1848  
Shengliang Guan 已提交
1432
    tfree(sasArray[i].data);
1433
  }
1434

1435 1436 1437 1438
  free(sasArray);
}

static int32_t tableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo,
H
hjxilinx 已提交
1439
                                          SDataStatis *pStatis, __block_search_fn_t searchFn, SArray *pDataBlock) {
H
hjxilinx 已提交
1440
  SQuery *pQuery = pRuntimeEnv->pQuery;
1441

H
hjxilinx 已提交
1442 1443
  STableQueryInfo* pTableQInfo = pQuery->current;
  SWindowResInfo*  pWindowResInfo = &pRuntimeEnv->windowResInfo;
1444

H
Haojun Liao 已提交
1445
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->groupbyNormalCol) {
1446
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
1447
  } else {
1448
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
1449
  }
1450

1451
  // update the lastkey of current table
1452
  TSKEY lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pDataBlockInfo->window.ekey : pDataBlockInfo->window.skey;
H
hjxilinx 已提交
1453
  pTableQInfo->lastKey = lastKey + GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1454

1455
  // interval query with limit applied
1456
  int32_t numOfRes = 0;
1457
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyNormalCol) {
1458 1459
    numOfRes = doCheckQueryCompleted(pRuntimeEnv, lastKey, pWindowResInfo);
  } else {
S
TD-1057  
Shengliang Guan 已提交
1460
    numOfRes = (int32_t)getNumOfResult(pRuntimeEnv);
1461

1462 1463 1464 1465
    // update the number of output result
    if (numOfRes > 0 && pQuery->checkBuffer == 1) {
      assert(numOfRes >= pQuery->rec.rows);
      pQuery->rec.rows = numOfRes;
1466

1467 1468 1469
      if (numOfRes >= pQuery->rec.threshold) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
      }
1470

1471 1472 1473
      if ((pQuery->limit.limit >= 0) && (pQuery->limit.limit + pQuery->limit.offset) <= numOfRes) {
        setQueryStatus(pQuery, QUERY_COMPLETED);
      }
1474 1475 1476 1477 1478

      if (((pTableQInfo->lastKey > pTableQInfo->win.ekey) && QUERY_IS_ASC_QUERY(pQuery)) ||
          ((pTableQInfo->lastKey < pTableQInfo->win.ekey) && (!QUERY_IS_ASC_QUERY(pQuery)))) {
        setQueryStatus(pQuery, QUERY_COMPLETED);
      }
H
Haojun Liao 已提交
1479
    }
1480
  }
1481

1482
  return numOfRes;
1483 1484
}

H
Haojun Liao 已提交
1485
void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
H
Haojun Liao 已提交
1486
                   SDataStatis *pStatis, void *param, int32_t colIndex, int32_t vgId) {
1487

H
Haojun Liao 已提交
1488 1489
  int32_t functionId = pQuery->pExpr1[colIndex].base.functionId;
  int32_t colId = pQuery->pExpr1[colIndex].base.colInfo.colId;
1490

1491
  SDataStatis *tpField = NULL;
H
Haojun Liao 已提交
1492
  pCtx->hasNull = hasNullValue(&pQuery->pExpr1[colIndex].base.colInfo, pStatis, &tpField);
1493
  pCtx->aInputElemBuf = inputData;
1494

1495
  if (tpField != NULL) {
H
Haojun Liao 已提交
1496
    pCtx->preAggVals.isSet  = true;
1497 1498
    pCtx->preAggVals.statis = *tpField;
    assert(pCtx->preAggVals.statis.numOfNull <= pBlockInfo->rows);
1499 1500 1501
  } else {
    pCtx->preAggVals.isSet = false;
  }
1502

H
Haojun Liao 已提交
1503 1504
  pCtx->preAggVals.dataBlockLoaded = (inputData != NULL);

H
Haojun Liao 已提交
1505 1506
  // limit/offset query will affect this value
  pCtx->size = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->rows - pQuery->pos : pQuery->pos + 1;
1507

H
Haojun Liao 已提交
1508
  // minimum value no matter ascending/descending order query
H
Haojun Liao 已提交
1509 1510
  pCtx->startOffset = QUERY_IS_ASC_QUERY(pQuery) ? pQuery->pos: (pQuery->pos - pCtx->size + 1);
  assert(pCtx->startOffset >= 0);
H
Haojun Liao 已提交
1511

1512 1513
  uint32_t status = aAggs[functionId].nStatus;
  if (((status & (TSDB_FUNCSTATE_SELECTIVITY | TSDB_FUNCSTATE_NEED_TS)) != 0) && (tsCol != NULL)) {
H
Haojun Liao 已提交
1514
    pCtx->ptsList = &tsCol[pCtx->startOffset];
1515
  }
1516

1517 1518 1519 1520 1521
  if (functionId >= TSDB_FUNC_FIRST_DST && functionId <= TSDB_FUNC_LAST_DST) {
    // last_dist or first_dist function
    // store the first&last timestamp into the intermediate buffer [1], the true
    // value may be null but timestamp will never be null
  } else if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_TWA ||
1522
             functionId == TSDB_FUNC_DIFF || (functionId >= TSDB_FUNC_RATE && functionId <= TSDB_FUNC_AVG_IRATE)) {
1523
    /*
H
Haojun Liao 已提交
1524
     * least squares function needs two columns of input, currently, the x value of linear equation is set to
H
Haojun Liao 已提交
1525
     * timestamp column, and the y-value is the column specified in pQuery->pExpr1[i].colIdxInBuffer
1526 1527 1528 1529 1530
     *
     * top/bottom function needs timestamp to indicate when the
     * top/bottom values emerge, so does diff function
     */
    if (functionId == TSDB_FUNC_TWA) {
H
Haojun Liao 已提交
1531 1532
      SResultRowCellInfo* pInfo = GET_RES_INFO(pCtx);
      STwaInfo *pTWAInfo = (STwaInfo*) GET_ROWCELL_INTERBUF(pInfo);
1533 1534 1535
      pTWAInfo->SKey = pQuery->window.skey;
      pTWAInfo->EKey = pQuery->window.ekey;
    }
1536

1537 1538
  } else if (functionId == TSDB_FUNC_ARITHM) {
    pCtx->param[1].pz = param;
H
Haojun Liao 已提交
1539 1540 1541 1542 1543 1544
  } else if (functionId == TSDB_FUNC_SPREAD) {  // set the statistics data for primary time stamp column
    if (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
      pCtx->preAggVals.isSet  = true;
      pCtx->preAggVals.statis.min = pBlockInfo->window.skey;
      pCtx->preAggVals.statis.max = pBlockInfo->window.ekey;
    }
1545
  } else if (functionId == TSDB_FUNC_INTERP) {
H
Haojun Liao 已提交
1546 1547 1548
    SResultRowCellInfo* pInfo = GET_RES_INFO(pCtx);

    SInterpInfoDetail *pInterpInfo = (SInterpInfoDetail *)GET_ROWCELL_INTERBUF(pInfo);
S
TD-1057  
Shengliang Guan 已提交
1549
    pInterpInfo->type = (int8_t)pQuery->fillType;
1550 1551
    pInterpInfo->ts = pQuery->window.skey;
    pInterpInfo->primaryCol = (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX);
1552

1553 1554 1555 1556
    if (pQuery->fillVal != NULL) {
      if (isNull((const char*) &pQuery->fillVal[colIndex], pCtx->inputType)) {
        pCtx->param[1].nType = TSDB_DATA_TYPE_NULL;
      } else { // todo refactor, tVariantCreateFromBinary should handle the NULL value
H
Haojun Liao 已提交
1557 1558 1559
        if (pCtx->inputType != TSDB_DATA_TYPE_BINARY && pCtx->inputType != TSDB_DATA_TYPE_NCHAR) {
          tVariantCreateFromBinary(&pCtx->param[1], (char*) &pQuery->fillVal[colIndex], pCtx->inputBytes, pCtx->inputType);
        }
1560 1561
      }
    }
H
Haojun Liao 已提交
1562 1563 1564
  } else if (functionId == TSDB_FUNC_TS_COMP) {
    pCtx->param[0].i64Key = vgId;
    pCtx->param[0].nType = TSDB_DATA_TYPE_BIGINT;
1565
  }
1566

1567 1568 1569 1570 1571 1572
#if defined(_DEBUG_VIEW)
  //  int64_t *tsList = (int64_t *)primaryColumnData;
//  int64_t  s = tsList[0];
//  int64_t  e = tsList[size - 1];

//    if (IS_DATA_BLOCK_LOADED(blockStatus)) {
1573
//        qDebug("QInfo:%p query ts:%lld-%lld, offset:%d, rows:%d, bstatus:%d,
1574 1575 1576
//        functId:%d", GET_QINFO_ADDR(pQuery),
//               s, e, startOffset, size, blockStatus, functionId);
//    } else {
1577
//        qDebug("QInfo:%p block not loaded, bstatus:%d",
1578 1579 1580 1581 1582 1583
//        GET_QINFO_ADDR(pQuery), blockStatus);
//    }
#endif
}

// set the output buffer for the selectivity + tag query
H
Haojun Liao 已提交
1584
static int32_t setCtxTagColumnInfo(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx) {
H
Haojun Liao 已提交
1585 1586
  SQuery* pQuery = pRuntimeEnv->pQuery;

1587
  if (isSelectivityWithTagsQuery(pQuery)) {
1588
    int32_t num = 0;
1589
    int16_t tagLen = 0;
1590

1591
    SQLFunctionCtx *p = NULL;
1592
    SQLFunctionCtx **pTagCtx = calloc(pQuery->numOfOutput, POINTER_BYTES);
H
Haojun Liao 已提交
1593 1594 1595
    if (pTagCtx == NULL) {
      return TSDB_CODE_QRY_OUT_OF_MEMORY;
    }
H
Haojun Liao 已提交
1596

1597
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1598
      SSqlFuncMsg *pSqlFuncMsg = &pQuery->pExpr1[i].base;
1599

1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612
      if (pSqlFuncMsg->functionId == TSDB_FUNC_TAG_DUMMY || pSqlFuncMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        tagLen += pCtx[i].outputBytes;
        pTagCtx[num++] = &pCtx[i];
      } else if ((aAggs[pSqlFuncMsg->functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
        p = &pCtx[i];
      } else if (pSqlFuncMsg->functionId == TSDB_FUNC_TS || pSqlFuncMsg->functionId == TSDB_FUNC_TAG) {
        // tag function may be the group by tag column
        // ts may be the required primary timestamp column
        continue;
      } else {
        // the column may be the normal column, group by normal_column, the functionId is TSDB_FUNC_PRJ
      }
    }
dengyihao's avatar
dengyihao 已提交
1613 1614 1615 1616 1617
    if (p != NULL) {
      p->tagInfo.pTagCtxList = pTagCtx;
      p->tagInfo.numOfTagCols = num;
      p->tagInfo.tagsLen = tagLen;
    } else {
S
TD-1848  
Shengliang Guan 已提交
1618
      tfree(pTagCtx);
dengyihao's avatar
dengyihao 已提交
1619
    }
1620
  }
H
Haojun Liao 已提交
1621 1622

  return TSDB_CODE_SUCCESS;
1623 1624
}

1625
static int32_t setupQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv, int16_t order) {
1626
  qDebug("QInfo:%p setup runtime env", GET_QINFO_ADDR(pRuntimeEnv));
1627 1628
  SQuery *pQuery = pRuntimeEnv->pQuery;

1629
  pRuntimeEnv->pCtx = (SQLFunctionCtx *)calloc(pQuery->numOfOutput, sizeof(SQLFunctionCtx));
H
Haojun Liao 已提交
1630
  pRuntimeEnv->offset = calloc(pQuery->numOfOutput, sizeof(int16_t));
H
Haojun Liao 已提交
1631
  pRuntimeEnv->rowCellInfoOffset = calloc(pQuery->numOfOutput, sizeof(int32_t));
1632

H
Haojun Liao 已提交
1633
  if (pRuntimeEnv->offset == NULL || pRuntimeEnv->pCtx == NULL || pRuntimeEnv->rowCellInfoOffset == NULL) {
1634
    goto _clean;
1635
  }
1636

1637
  pRuntimeEnv->offset[0] = 0;
1638
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1639
    SSqlFuncMsg *pSqlFuncMsg = &pQuery->pExpr1[i].base;
1640

1641
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1642
    SColIndex* pIndex = &pSqlFuncMsg->colInfo;
1643

Y
TD-1230  
yihaoDeng 已提交
1644
    if (TSDB_COL_REQ_NULL(pIndex->flag)) {
1645 1646
      pCtx->requireNull = true;
      pIndex->flag &= ~(TSDB_COL_NULL);
Y
TD-1230  
yihaoDeng 已提交
1647
    } else {
1648 1649
      pCtx->requireNull = false;
    }
Y
TD-1230  
yihaoDeng 已提交
1650

1651 1652
    int32_t index = pSqlFuncMsg->colInfo.colIndex;
    if (TSDB_COL_IS_TAG(pIndex->flag)) {
1653
      if (pIndex->colId == TSDB_TBNAME_COLUMN_INDEX) {  // todo refactor
H
Haojun Liao 已提交
1654 1655 1656 1657
        SSchema s = tGetTableNameColumnSchema();

        pCtx->inputBytes = s.bytes;
        pCtx->inputType = s.type;
1658 1659 1660 1661
      } else {
        pCtx->inputBytes = pQuery->tagColList[index].bytes;
        pCtx->inputType = pQuery->tagColList[index].type;
      }
H
Haojun Liao 已提交
1662 1663 1664
    } else if (TSDB_COL_IS_UD_COL(pIndex->flag)) {
      pCtx->inputBytes = pSqlFuncMsg->arg[0].argBytes;
      pCtx->inputType = pSqlFuncMsg->arg[0].argType;
1665 1666 1667 1668
    } else {
      pCtx->inputBytes = pQuery->colList[index].bytes;
      pCtx->inputType = pQuery->colList[index].type;
    }
1669

1670
    assert(isValidDataType(pCtx->inputType));
1671
    pCtx->ptsOutputBuf = NULL;
1672

H
Haojun Liao 已提交
1673 1674
    pCtx->outputBytes  = pQuery->pExpr1[i].bytes;
    pCtx->outputType   = pQuery->pExpr1[i].type;
1675

H
Haojun Liao 已提交
1676 1677 1678
    pCtx->order        = pQuery->order.order;
    pCtx->functionId   = pSqlFuncMsg->functionId;
    pCtx->stableQuery  = pRuntimeEnv->stableQuery;
H
Haojun Liao 已提交
1679
    pCtx->interBufBytes = pQuery->pExpr1[i].interBytes;
1680

H
Haojun Liao 已提交
1681
    pCtx->numOfParams  = pSqlFuncMsg->numOfParams;
1682 1683 1684 1685
    for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
      int16_t type = pSqlFuncMsg->arg[j].argType;
      int16_t bytes = pSqlFuncMsg->arg[j].argBytes;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
1686
        tVariantCreateFromBinary(&pCtx->param[j], pSqlFuncMsg->arg[j].argValue.pz, bytes, type);
1687 1688 1689 1690
      } else {
        tVariantCreateFromBinary(&pCtx->param[j], (char *)&pSqlFuncMsg->arg[j].argValue.i64, bytes, type);
      }
    }
1691

1692 1693
    // set the order information for top/bottom query
    int32_t functionId = pCtx->functionId;
1694

1695
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
H
Haojun Liao 已提交
1696
      int32_t f = pQuery->pExpr1[0].base.functionId;
1697
      assert(f == TSDB_FUNC_TS || f == TSDB_FUNC_TS_DUMMY);
1698

1699 1700 1701 1702
      pCtx->param[2].i64Key = order;
      pCtx->param[2].nType = TSDB_DATA_TYPE_BIGINT;
      pCtx->param[3].i64Key = functionId;
      pCtx->param[3].nType = TSDB_DATA_TYPE_BIGINT;
1703

1704 1705
      pCtx->param[1].i64Key = pQuery->order.orderColId;
    }
1706

1707 1708
    if (i > 0) {
      pRuntimeEnv->offset[i] = pRuntimeEnv->offset[i - 1] + pRuntimeEnv->pCtx[i - 1].outputBytes;
H
Haojun Liao 已提交
1709
      pRuntimeEnv->rowCellInfoOffset[i] = pRuntimeEnv->rowCellInfoOffset[i - 1] + sizeof(SResultRowCellInfo) + pQuery->pExpr1[i - 1].interBytes;
1710
    }
H
Haojun Liao 已提交
1711

1712
  }
1713

1714
  // if it is group by normal column, do not set output buffer, the output buffer is pResult
1715 1716
  // fixed output query/multi-output query for normal table
  if (!pRuntimeEnv->groupbyNormalCol && !pRuntimeEnv->stableQuery && !QUERY_IS_INTERVAL_QUERY(pRuntimeEnv->pQuery)) {
1717 1718
    resetCtxOutputBuf(pRuntimeEnv);
  }
1719

H
Haojun Liao 已提交
1720 1721 1722
  if (setCtxTagColumnInfo(pRuntimeEnv, pRuntimeEnv->pCtx) != TSDB_CODE_SUCCESS) {
    goto _clean;
  }
1723

H
Haojun Liao 已提交
1724
  qDebug("QInfo:%p init runtime completed", GET_QINFO_ADDR(pRuntimeEnv));
1725
  return TSDB_CODE_SUCCESS;
1726

1727
_clean:
S
TD-1848  
Shengliang Guan 已提交
1728
  tfree(pRuntimeEnv->pCtx);
H
Haojun Liao 已提交
1729 1730
  tfree(pRuntimeEnv->offset);
  tfree(pRuntimeEnv->rowCellInfoOffset);
1731

1732
  return TSDB_CODE_QRY_OUT_OF_MEMORY;
1733 1734
}

H
Haojun Liao 已提交
1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747
static void doFreeQueryHandle(SQInfo* pQInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;

  tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
  tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);

  pRuntimeEnv->pQueryHandle = NULL;
  pRuntimeEnv->pSecQueryHandle = NULL;

  SMemRef* pMemRef = &pQInfo->memRef;
  assert(pMemRef->ref == 0 && pMemRef->imem == NULL && pMemRef->mem == NULL);
}

1748 1749 1750 1751
static void teardownQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv) {
  if (pRuntimeEnv->pQuery == NULL) {
    return;
  }
1752

1753
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1754
  SQInfo* pQInfo = (SQInfo*) GET_QINFO_ADDR(pRuntimeEnv);
1755

1756
  qDebug("QInfo:%p teardown runtime env", pQInfo);
H
Haojun Liao 已提交
1757
  cleanupTimeWindowInfo(&pRuntimeEnv->windowResInfo);
1758

1759
  if (pRuntimeEnv->pCtx != NULL) {
1760
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1761
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1762

1763 1764 1765
      for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
        tVariantDestroy(&pCtx->param[j]);
      }
1766

1767
      tVariantDestroy(&pCtx->tag);
S
TD-1848  
Shengliang Guan 已提交
1768
      tfree(pCtx->tagInfo.pTagCtxList);
1769
    }
1770

S
TD-1848  
Shengliang Guan 已提交
1771
    tfree(pRuntimeEnv->pCtx);
1772
  }
1773

1774
  pRuntimeEnv->pFillInfo = taosDestroyFillInfo(pRuntimeEnv->pFillInfo);
1775

H
Haojun Liao 已提交
1776
  destroyResultBuf(pRuntimeEnv->pResultBuf);
H
Haojun Liao 已提交
1777
  doFreeQueryHandle(pQInfo);
1778

H
Haojun Liao 已提交
1779
  pRuntimeEnv->pTSBuf = tsBufDestroy(pRuntimeEnv->pTSBuf);
H
Haojun Liao 已提交
1780 1781

  tfree(pRuntimeEnv->offset);
S
TD-1848  
Shengliang Guan 已提交
1782 1783
  tfree(pRuntimeEnv->keyBuf);
  tfree(pRuntimeEnv->rowCellInfoOffset);
H
Haojun Liao 已提交
1784

H
Haojun Liao 已提交
1785 1786
  taosHashCleanup(pRuntimeEnv->pResultRowHashTable);
  pRuntimeEnv->pResultRowHashTable = NULL;
1787

H
Haojun Liao 已提交
1788
  pRuntimeEnv->pool = destroyResultRowPool(pRuntimeEnv->pool);
1789 1790
}

H
Haojun Liao 已提交
1791
#define IS_QUERY_KILLED(_q) ((_q)->code == TSDB_CODE_TSC_QUERY_CANCELLED)
1792

H
Haojun Liao 已提交
1793
static void setQueryKilled(SQInfo *pQInfo) { pQInfo->code = TSDB_CODE_TSC_QUERY_CANCELLED;}
H
hjxilinx 已提交
1794

H
Haojun Liao 已提交
1795 1796 1797
static bool isFixedOutputQuery(SQueryRuntimeEnv* pRuntimeEnv) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1798 1799
    return false;
  }
1800

1801
  // Note:top/bottom query is fixed output query
H
Haojun Liao 已提交
1802
  if (pRuntimeEnv->topBotQuery || pRuntimeEnv->groupbyNormalCol) {
1803 1804
    return true;
  }
1805

1806
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1807
    SSqlFuncMsg *pExprMsg = &pQuery->pExpr1[i].base;
1808

1809 1810
    // ignore the ts_comp function
    if (i == 0 && pExprMsg->functionId == TSDB_FUNC_PRJ && pExprMsg->numOfParams == 1 &&
1811
        pExprMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
1812 1813
      continue;
    }
1814

1815 1816 1817
    if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
1818

1819 1820 1821 1822
    if (!IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus)) {
      return true;
    }
  }
1823

1824 1825 1826
  return false;
}

1827
// todo refactor with isLastRowQuery
H
hjxilinx 已提交
1828
static bool isPointInterpoQuery(SQuery *pQuery) {
1829
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1830
    int32_t functionID = pQuery->pExpr1[i].base.functionId;
1831
    if (functionID == TSDB_FUNC_INTERP) {
1832 1833 1834
      return true;
    }
  }
1835

1836 1837 1838 1839
  return false;
}

// TODO REFACTOR:MERGE WITH CLIENT-SIDE FUNCTION
H
hjxilinx 已提交
1840
static bool isSumAvgRateQuery(SQuery *pQuery) {
1841
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1842
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
1843 1844 1845
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
1846

1847 1848 1849 1850 1851
    if (functionId == TSDB_FUNC_SUM_RATE || functionId == TSDB_FUNC_SUM_IRATE || functionId == TSDB_FUNC_AVG_RATE ||
        functionId == TSDB_FUNC_AVG_IRATE) {
      return true;
    }
  }
1852

1853 1854 1855
  return false;
}

H
hjxilinx 已提交
1856
static bool isFirstLastRowQuery(SQuery *pQuery) {
1857
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1858
    int32_t functionID = pQuery->pExpr1[i].base.functionId;
1859 1860 1861 1862
    if (functionID == TSDB_FUNC_LAST_ROW) {
      return true;
    }
  }
1863

1864 1865 1866
  return false;
}

H
hjxilinx 已提交
1867
static bool needReverseScan(SQuery *pQuery) {
1868
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1869
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
1870 1871 1872
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG) {
      continue;
    }
1873

1874
    if ((functionId == TSDB_FUNC_FIRST || functionId == TSDB_FUNC_FIRST_DST) && !QUERY_IS_ASC_QUERY(pQuery)) {
1875 1876
      return true;
    }
1877 1878

    if (functionId == TSDB_FUNC_LAST || functionId == TSDB_FUNC_LAST_DST) {
H
Haojun Liao 已提交
1879
      // the scan order to acquire the last result of the specified column
H
Haojun Liao 已提交
1880
      int32_t order = (int32_t)pQuery->pExpr1[i].base.arg->argValue.i64;
H
Haojun Liao 已提交
1881 1882 1883
      if (order != pQuery->order.order) {
        return true;
      }
1884
    }
1885
  }
1886

1887 1888
  return false;
}
H
hjxilinx 已提交
1889

H
Haojun Liao 已提交
1890 1891 1892 1893
/**
 * The following 4 kinds of query are treated as the tags query
 * tagprj, tid_tag query, count(tbname), 'abc' (user defined constant value column) query
 */
H
hjxilinx 已提交
1894 1895
static bool onlyQueryTags(SQuery* pQuery) {
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1896
    SExprInfo* pExprInfo = &pQuery->pExpr1[i];
H
Haojun Liao 已提交
1897 1898

    int32_t functionId = pExprInfo->base.functionId;
H
Haojun Liao 已提交
1899 1900 1901 1902

    if (functionId != TSDB_FUNC_TAGPRJ &&
        functionId != TSDB_FUNC_TID_TAG &&
        (!(functionId == TSDB_FUNC_COUNT && pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX)) &&
Y
TD-1230  
yihaoDeng 已提交
1903
        (!(functionId == TSDB_FUNC_PRJ && TSDB_COL_IS_UD_COL(pExprInfo->base.colInfo.flag)))) {
H
hjxilinx 已提交
1904 1905 1906
      return false;
    }
  }
1907

H
hjxilinx 已提交
1908 1909 1910
  return true;
}

1911 1912
/////////////////////////////////////////////////////////////////////////////////////////////

H
Haojun Liao 已提交
1913
void getAlignQueryTimeWindow(SQuery *pQuery, int64_t key, int64_t keyFirst, int64_t keyLast, STimeWindow *win) {
1914 1915
  assert(key >= keyFirst && key <= keyLast && pQuery->interval.sliding <= pQuery->interval.interval);
  win->skey = taosTimeTruncate(key, &pQuery->interval, pQuery->precision);
H
Haojun Liao 已提交
1916 1917

  /*
1918
   * if the realSkey > INT64_MAX - pQuery->interval.interval, the query duration between
H
Haojun Liao 已提交
1919 1920
   * realSkey and realEkey must be less than one interval.Therefore, no need to adjust the query ranges.
   */
1921 1922
  if (keyFirst > (INT64_MAX - pQuery->interval.interval)) {
    assert(keyLast - keyFirst < pQuery->interval.interval);
1923
    win->ekey = INT64_MAX;
1924 1925
  } else if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
    win->ekey = taosTimeAdd(win->skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
H
Haojun Liao 已提交
1926
  } else {
1927
    win->ekey = win->skey + pQuery->interval.interval - 1;
1928 1929 1930 1931 1932
  }
}

static void setScanLimitationByResultBuffer(SQuery *pQuery) {
  if (isTopBottomQuery(pQuery)) {
1933
    pQuery->checkBuffer = 0;
1934
  } else if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
1935
    pQuery->checkBuffer = 0;
1936 1937
  } else {
    bool hasMultioutput = false;
1938
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1939
      SSqlFuncMsg *pExprMsg = &pQuery->pExpr1[i].base;
1940 1941 1942
      if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        continue;
      }
1943

1944 1945 1946 1947 1948
      hasMultioutput = IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus);
      if (!hasMultioutput) {
        break;
      }
    }
1949

1950
    pQuery->checkBuffer = hasMultioutput ? 1 : 0;
1951 1952 1953 1954 1955 1956
  }
}

/*
 * todo add more parameters to check soon..
 */
1957
bool colIdCheck(SQuery *pQuery) {
1958 1959
  // load data column information is incorrect
  for (int32_t i = 0; i < pQuery->numOfCols - 1; ++i) {
1960
    if (pQuery->colList[i].colId == pQuery->colList[i + 1].colId) {
S
slguan 已提交
1961
      qError("QInfo:%p invalid data load column for query", GET_QINFO_ADDR(pQuery));
1962 1963 1964
      return false;
    }
  }
1965

1966 1967 1968 1969 1970 1971
  return true;
}

// todo ignore the avg/sum/min/max/count/stddev/top/bottom functions, of which
// the scan order is not matter
static bool onlyOneQueryType(SQuery *pQuery, int32_t functId, int32_t functIdDst) {
1972
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1973
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
1974

1975 1976 1977 1978
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG ||
        functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
1979

1980 1981 1982 1983
    if (functionId != functId && functionId != functIdDst) {
      return false;
    }
  }
1984

1985 1986 1987 1988 1989 1990 1991
  return true;
}

static bool onlyFirstQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_FIRST, TSDB_FUNC_FIRST_DST); }

static bool onlyLastQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_LAST, TSDB_FUNC_LAST_DST); }

H
Haojun Liao 已提交
1992
// todo refactor, add iterator
1993 1994
static void doExchangeTimeWindow(SQInfo* pQInfo, STimeWindow* win) {
  size_t t = taosArrayGetSize(pQInfo->tableGroupInfo.pGroupList);
H
Haojun Liao 已提交
1995
  for(int32_t i = 0; i < t; ++i) {
1996
    SArray* p1 = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, i);
H
Haojun Liao 已提交
1997 1998 1999

    size_t len = taosArrayGetSize(p1);
    for(int32_t j = 0; j < len; ++j) {
2000
      STableKeyInfo* pInfo = taosArrayGet(p1, j);
H
Haojun Liao 已提交
2001

2002 2003 2004 2005
      // update the new lastkey if it is equalled to the value of the old skey
      if (pInfo->lastKey == win->ekey) {
        pInfo->lastKey = win->skey;
      }
H
Haojun Liao 已提交
2006 2007 2008 2009
    }
  }
}

2010
static void changeExecuteScanOrder(SQInfo *pQInfo, SQueryTableMsg* pQueryMsg, bool stableQuery) {
H
Haojun Liao 已提交
2011 2012
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;

2013 2014 2015
  // in case of point-interpolation query, use asc order scan
  char msg[] = "QInfo:%p scan order changed for %s query, old:%d, new:%d, qrange exchanged, old qrange:%" PRId64
               "-%" PRId64 ", new qrange:%" PRId64 "-%" PRId64;
2016

2017 2018
  // todo handle the case the the order irrelevant query type mixed up with order critical query type
  // descending order query for last_row query
H
Haojun Liao 已提交
2019
  if (isFirstLastRowQuery(pQuery)) {
H
Haojun Liao 已提交
2020
    qDebug("QInfo:%p scan order changed for last_row query, old:%d, new:%d", pQInfo, pQuery->order.order, TSDB_ORDER_ASC);
2021

H
Haojun Liao 已提交
2022
    pQuery->order.order = TSDB_ORDER_ASC;
H
Haojun Liao 已提交
2023 2024 2025
    if (pQuery->window.skey > pQuery->window.ekey) {
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
2026

2027 2028
    return;
  }
2029

H
Haojun Liao 已提交
2030
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr) && pQuery->order.order == TSDB_ORDER_DESC) {
H
Haojun Liao 已提交
2031
    pQuery->order.order = TSDB_ORDER_ASC;
H
Haojun Liao 已提交
2032 2033 2034
    if (pQuery->window.skey > pQuery->window.ekey) {
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
H
Haojun Liao 已提交
2035

2036
    doExchangeTimeWindow(pQInfo, &pQuery->window);
H
Haojun Liao 已提交
2037 2038 2039
    return;
  }

2040
  if (isPointInterpoQuery(pQuery) && pQuery->interval.interval == 0) {
H
Haojun Liao 已提交
2041 2042 2043 2044 2045
    if (!QUERY_IS_ASC_QUERY(pQuery)) {
      qDebug(msg, GET_QINFO_ADDR(pQuery), "interp", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
             pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
2046

2047
    pQuery->order.order = TSDB_ORDER_ASC;
2048 2049
    return;
  }
2050

2051
  if (pQuery->interval.interval == 0) {
2052 2053
    if (onlyFirstQuery(pQuery)) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
2054
        qDebug(msg, GET_QINFO_ADDR(pQuery), "only-first", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
2055 2056
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2057
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2058
        doExchangeTimeWindow(pQInfo, &pQuery->window);
2059
      }
2060

2061
      pQuery->order.order = TSDB_ORDER_ASC;
2062 2063
    } else if (onlyLastQuery(pQuery)) {
      if (QUERY_IS_ASC_QUERY(pQuery)) {
2064
        qDebug(msg, GET_QINFO_ADDR(pQuery), "only-last", pQuery->order.order, TSDB_ORDER_DESC, pQuery->window.skey,
2065 2066
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2067
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2068
        doExchangeTimeWindow(pQInfo, &pQuery->window);
2069
      }
2070

2071
      pQuery->order.order = TSDB_ORDER_DESC;
2072
    }
2073

2074
  } else {  // interval query
2075
    if (stableQuery) {
2076 2077
      if (onlyFirstQuery(pQuery)) {
        if (!QUERY_IS_ASC_QUERY(pQuery)) {
2078
          qDebug(msg, GET_QINFO_ADDR(pQuery), "only-first stable", pQuery->order.order, TSDB_ORDER_ASC,
2079 2080
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2081
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2082
          doExchangeTimeWindow(pQInfo, &pQuery->window);
2083
        }
2084

2085
        pQuery->order.order = TSDB_ORDER_ASC;
2086 2087
      } else if (onlyLastQuery(pQuery)) {
        if (QUERY_IS_ASC_QUERY(pQuery)) {
2088
          qDebug(msg, GET_QINFO_ADDR(pQuery), "only-last stable", pQuery->order.order, TSDB_ORDER_DESC,
2089 2090
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2091
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2092
          doExchangeTimeWindow(pQInfo, &pQuery->window);
2093
        }
2094

2095
        pQuery->order.order = TSDB_ORDER_DESC;
2096 2097 2098 2099 2100 2101 2102 2103
      }
    }
  }
}

static int32_t getInitialPageNum(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  int32_t INITIAL_RESULT_ROWS_VALUE = 16;
2104

2105
  int32_t num = 0;
2106

2107 2108
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
    num = 128;
H
Haojun Liao 已提交
2109
  } else if (QUERY_IS_INTERVAL_QUERY(pQuery)) {  // time window query, allocate one page for each table
2110
    size_t s = pQInfo->tableqinfoGroupInfo.numOfTables;
S
TD-1057  
Shengliang Guan 已提交
2111
    num = (int32_t)(MAX(s, INITIAL_RESULT_ROWS_VALUE));
2112 2113
  } else {    // for super table query, one page for each subset
    num = 1;  // pQInfo->pSidSet->numOfSubSet;
2114
  }
2115

2116 2117 2118 2119
  assert(num > 0);
  return num;
}

2120 2121
static void getIntermediateBufInfo(SQueryRuntimeEnv* pRuntimeEnv, int32_t* ps, int32_t* rowsize) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
2122
  int32_t MIN_ROWS_PER_PAGE = 4;
2123

S
TD-1057  
Shengliang Guan 已提交
2124
  *rowsize = (int32_t)(pQuery->rowSize * GET_ROW_PARAM_FOR_MULTIOUTPUT(pQuery, pRuntimeEnv->topBotQuery, pRuntimeEnv->stableQuery));
2125 2126 2127 2128
  int32_t overhead = sizeof(tFilePage);

  // one page contains at least two rows
  *ps = DEFAULT_INTERN_BUF_PAGE_SIZE;
H
Haojun Liao 已提交
2129
  while(((*rowsize) * MIN_ROWS_PER_PAGE) > (*ps) - overhead) {
2130 2131 2132 2133
    *ps = (*ps << 1u);
  }

  pRuntimeEnv->numOfRowsPerPage = ((*ps) - sizeof(tFilePage)) / (*rowsize);
2134
  assert(pRuntimeEnv->numOfRowsPerPage <= MAX_ROWS_PER_RESBUF_PAGE);
2135 2136
}

H
Haojun Liao 已提交
2137
#define IS_PREFILTER_TYPE(_t) ((_t) != TSDB_DATA_TYPE_BINARY && (_t) != TSDB_DATA_TYPE_NCHAR)
2138

H
Haojun Liao 已提交
2139 2140 2141 2142
static bool needToLoadDataBlock(SQueryRuntimeEnv* pRuntimeEnv, SDataStatis *pDataStatis, SQLFunctionCtx *pCtx,
    int32_t numOfRows) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
  if (pDataStatis == NULL || (pQuery->numOfFilterCols == 0 && (!pRuntimeEnv->topBotQuery))) {
2143 2144 2145 2146 2147
    return true;
  }

  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
2148

H
Haojun Liao 已提交
2149 2150 2151 2152 2153 2154 2155 2156
    int32_t index = -1;
    for(int32_t i = 0; i < pQuery->numOfCols; ++i) {
      if (pDataStatis[i].colId == pFilterInfo->info.colId) {
        index = i;
        break;
      }
    }

2157
    // no statistics data, load the true data block
H
Haojun Liao 已提交
2158
    if (index == -1) {
H
Haojun Liao 已提交
2159
      return true;
2160
    }
2161

2162
    // not support pre-filter operation on binary/nchar data type
H
Haojun Liao 已提交
2163
    if (!IS_PREFILTER_TYPE(pFilterInfo->info.type)) {
H
Haojun Liao 已提交
2164
      return true;
2165
    }
2166

2167
    // all data in current column are NULL, no need to check its boundary value
H
Haojun Liao 已提交
2168
    if (pDataStatis[index].numOfNull == numOfRows) {
2169 2170 2171 2172 2173 2174 2175 2176 2177

      // if isNULL query exists, load the null data column
      for (int32_t j = 0; j < pFilterInfo->numOfFilters; ++j) {
        SColumnFilterElem *pFilterElem = &pFilterInfo->pFilters[j];
        if (pFilterElem->fp == isNull_filter) {
          return true;
        }
      }

2178 2179
      continue;
    }
2180

H
Haojun Liao 已提交
2181 2182 2183
    SDataStatis* pDataBlockst = &pDataStatis[index];

    if (pFilterInfo->info.type == TSDB_DATA_TYPE_FLOAT) {
S
TD-1057  
Shengliang Guan 已提交
2184 2185
      float minval = (float)(*(double *)(&pDataBlockst->min));
      float maxval = (float)(*(double *)(&pDataBlockst->max));
2186

2187 2188 2189 2190 2191 2192 2193
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&minval, (char *)&maxval)) {
          return true;
        }
      }
    } else {
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
H
Haojun Liao 已提交
2194
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&pDataBlockst->min, (char *)&pDataBlockst->max)) {
2195 2196 2197 2198 2199
          return true;
        }
      }
    }
  }
2200

H
Haojun Liao 已提交
2201 2202
  if (pRuntimeEnv->topBotQuery) {
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2203
      int32_t functionId = pQuery->pExpr1[i].base.functionId;
H
Haojun Liao 已提交
2204 2205 2206 2207 2208
      if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
        return topbot_datablock_filter(&pCtx[i], functionId, (char *)&pDataStatis[i].min, (char *)&pDataStatis[i].max);
      }
    }
  }
2209

H
Haojun Liao 已提交
2210
  return false;
2211 2212
}

H
Haojun Liao 已提交
2213 2214 2215 2216 2217 2218 2219 2220
static bool overlapWithTimeWindow(SQuery* pQuery, SDataBlockInfo* pBlockInfo) {
  STimeWindow w = {0};

  TSKEY sk = MIN(pQuery->window.skey, pQuery->window.ekey);
  TSKEY ek = MAX(pQuery->window.skey, pQuery->window.ekey);

  if (QUERY_IS_ASC_QUERY(pQuery)) {
    getAlignQueryTimeWindow(pQuery, pBlockInfo->window.skey, sk, ek, &w);
H
Haojun Liao 已提交
2221
    assert(w.ekey >= pBlockInfo->window.skey);
H
Haojun Liao 已提交
2222

H
Haojun Liao 已提交
2223
    if (w.ekey < pBlockInfo->window.ekey) {
H
Haojun Liao 已提交
2224 2225 2226 2227
      return true;
    }

    while(1) {
H
Haojun Liao 已提交
2228
      getNextTimeWindow(pQuery, &w);
H
Haojun Liao 已提交
2229
      if (w.skey > pBlockInfo->window.ekey) {
H
Haojun Liao 已提交
2230 2231 2232
        break;
      }

H
Haojun Liao 已提交
2233 2234
      assert(w.ekey > pBlockInfo->window.ekey);
      if (w.skey <= pBlockInfo->window.ekey && w.skey > pBlockInfo->window.skey) {
H
Haojun Liao 已提交
2235 2236 2237 2238 2239
        return true;
      }
    }
  } else {
    getAlignQueryTimeWindow(pQuery, pBlockInfo->window.ekey, sk, ek, &w);
H
Haojun Liao 已提交
2240 2241 2242
    assert(w.skey <= pBlockInfo->window.ekey);

    if (w.skey > pBlockInfo->window.skey) {
H
Haojun Liao 已提交
2243 2244 2245 2246
      return true;
    }

    while(1) {
H
Haojun Liao 已提交
2247
      getNextTimeWindow(pQuery, &w);
H
Haojun Liao 已提交
2248 2249 2250 2251
      if (w.ekey < pBlockInfo->window.skey) {
        break;
      }

H
Haojun Liao 已提交
2252 2253
      assert(w.skey < pBlockInfo->window.skey);
      if (w.ekey < pBlockInfo->window.ekey && w.ekey >= pBlockInfo->window.skey) {
H
Haojun Liao 已提交
2254 2255 2256 2257 2258 2259 2260 2261
        return true;
      }
    }
  }

  return false;
}

H
Haojun Liao 已提交
2262
int32_t loadDataBlockOnDemand(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo * pWindowResInfo, void* pQueryHandle, SDataBlockInfo* pBlockInfo, SDataStatis **pStatis, SArray** pDataBlock, uint32_t* status) {
2263
  SQuery *pQuery = pRuntimeEnv->pQuery;
2264

H
Haojun Liao 已提交
2265 2266
  *status = BLK_DATA_NO_NEEDED;

H
Haojun Liao 已提交
2267
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf > 0) {
2268
    *status = BLK_DATA_ALL_NEEDED;
H
Haojun Liao 已提交
2269
  } else { // check if this data block is required to load
H
Haojun Liao 已提交
2270

H
Haojun Liao 已提交
2271
    // Calculate all time windows that are overlapping or contain current data block.
2272
    // If current data block is contained by all possible time window, do not load current data block.
H
Haojun Liao 已提交
2273
    if (QUERY_IS_INTERVAL_QUERY(pQuery) && overlapWithTimeWindow(pQuery, pBlockInfo)) {
2274
      *status = BLK_DATA_ALL_NEEDED;
2275
    }
2276

2277
    if ((*status) != BLK_DATA_ALL_NEEDED) {
H
Haojun Liao 已提交
2278 2279 2280 2281 2282 2283 2284 2285 2286
      // the pCtx[i] result is belonged to previous time window since the outputBuf has not been set yet,
      // the filter result may be incorrect. So in case of interval query, we need to set the correct time output buffer
      if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
        bool hasTimeWindow = false;
        bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);

        TSKEY k = QUERY_IS_ASC_QUERY(pQuery)? pBlockInfo->window.skey:pBlockInfo->window.ekey;

        STimeWindow win = getActiveTimeWindow(pWindowResInfo, k, pQuery);
H
Haojun Liao 已提交
2287
        if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pBlockInfo, &win, masterScan, &hasTimeWindow) !=
H
Haojun Liao 已提交
2288 2289 2290 2291 2292
            TSDB_CODE_SUCCESS) {
          // todo handle error in set result for timewindow
        }
      }

H
Haojun Liao 已提交
2293
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2294
        SSqlFuncMsg* pSqlFunc = &pQuery->pExpr1[i].base;
H
Haojun Liao 已提交
2295 2296 2297

        int32_t functionId = pSqlFunc->functionId;
        int32_t colId = pSqlFunc->colInfo.colId;
2298 2299
        (*status) |= aAggs[functionId].dataReqFunc(&pRuntimeEnv->pCtx[i], pBlockInfo->window.skey, pBlockInfo->window.ekey, colId);
        if (((*status) & BLK_DATA_ALL_NEEDED) == BLK_DATA_ALL_NEEDED) {
H
Haojun Liao 已提交
2300 2301 2302
          break;
        }
      }
2303 2304
    }
  }
2305

2306
  if ((*status) == BLK_DATA_NO_NEEDED) {
H
Haojun Liao 已提交
2307 2308
    qDebug("QInfo:%p data block discard, brange:%"PRId64 "-%"PRId64", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
           pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
2309
    pRuntimeEnv->summary.discardBlocks += 1;
2310 2311 2312 2313
  } else if ((*status) == BLK_DATA_STATIS_NEEDED) {

    // this function never returns error?
    tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis);
2314

2315
    pRuntimeEnv->summary.loadBlockStatis += 1;
2316

2317
    if (*pStatis == NULL) { // data block statistics does not exist, load data block
H
Haojun Liao 已提交
2318
      *pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2319
      pRuntimeEnv->summary.totalCheckedRows += pBlockInfo->rows;
2320 2321
    }
  } else {
2322
    assert((*status) == BLK_DATA_ALL_NEEDED);
2323

2324
    // load the data block statistics to perform further filter
H
Haojun Liao 已提交
2325
    pRuntimeEnv->summary.loadBlockStatis += 1;
2326
    tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis);
2327

H
Haojun Liao 已提交
2328
    if (!needToLoadDataBlock(pRuntimeEnv, *pStatis, pRuntimeEnv->pCtx, pBlockInfo->rows)) {
2329 2330
      // current block has been discard due to filter applied
      pRuntimeEnv->summary.discardBlocks += 1;
H
Haojun Liao 已提交
2331 2332
      qDebug("QInfo:%p data block discard, brange:%"PRId64 "-%"PRId64", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
          pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
2333
      (*status) = BLK_DATA_DISCARD;
2334
    }
2335

2336
    pRuntimeEnv->summary.totalCheckedRows += pBlockInfo->rows;
H
Haojun Liao 已提交
2337
    pRuntimeEnv->summary.loadBlocks += 1;
H
Haojun Liao 已提交
2338
    *pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2339 2340 2341
    if (*pDataBlock == NULL) {
      return terrno;
    }
2342
  }
2343

H
Haojun Liao 已提交
2344
  return TSDB_CODE_SUCCESS;
2345 2346
}

H
hjxilinx 已提交
2347
int32_t binarySearchForKey(char *pValue, int num, TSKEY key, int order) {
2348
  int32_t midPos = -1;
H
Haojun Liao 已提交
2349
  int32_t numOfRows;
2350

2351 2352 2353
  if (num <= 0) {
    return -1;
  }
2354

2355
  assert(order == TSDB_ORDER_ASC || order == TSDB_ORDER_DESC);
2356 2357

  TSKEY * keyList = (TSKEY *)pValue;
2358
  int32_t firstPos = 0;
2359
  int32_t lastPos = num - 1;
2360

2361
  if (order == TSDB_ORDER_DESC) {
H
hjxilinx 已提交
2362 2363 2364 2365 2366
    // find the first position which is smaller than the key
    while (1) {
      if (key >= keyList[lastPos]) return lastPos;
      if (key == keyList[firstPos]) return firstPos;
      if (key < keyList[firstPos]) return firstPos - 1;
2367

H
Haojun Liao 已提交
2368 2369
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2370

H
hjxilinx 已提交
2371 2372 2373 2374 2375 2376 2377 2378
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
2379

H
hjxilinx 已提交
2380 2381 2382 2383 2384
  } else {
    // find the first position which is bigger than the key
    while (1) {
      if (key <= keyList[firstPos]) return firstPos;
      if (key == keyList[lastPos]) return lastPos;
2385

H
hjxilinx 已提交
2386 2387 2388 2389 2390 2391 2392
      if (key > keyList[lastPos]) {
        lastPos = lastPos + 1;
        if (lastPos >= num)
          return -1;
        else
          return lastPos;
      }
2393

H
Haojun Liao 已提交
2394 2395
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2396

H
hjxilinx 已提交
2397 2398 2399 2400 2401 2402 2403 2404 2405
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
  }
2406

H
hjxilinx 已提交
2407 2408 2409
  return midPos;
}

2410 2411 2412 2413 2414 2415 2416 2417
static void ensureOutputBufferSimple(SQueryRuntimeEnv* pRuntimeEnv, int32_t capacity) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  if (capacity < pQuery->rec.capacity) {
    return;
  }

  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2418
    int32_t bytes = pQuery->pExpr1[i].bytes;
2419 2420 2421 2422
    assert(bytes > 0 && capacity > 0);

    char *tmp = realloc(pQuery->sdata[i], bytes * capacity + sizeof(tFilePage));
    if (tmp == NULL) {  // todo handle the oom
H
Haojun Liao 已提交
2423
      longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
2424 2425 2426 2427 2428 2429 2430 2431
    } else {
      pQuery->sdata[i] = (tFilePage *)tmp;
    }

    // set the pCtx output buffer position
    pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data;
  }

2432
  qDebug("QInfo:%p realloc output buffer to inc output buffer from: %" PRId64 " rows to:%d rows", GET_QINFO_ADDR(pRuntimeEnv),
2433 2434 2435 2436 2437
         pQuery->rec.capacity, capacity);

  pQuery->rec.capacity = capacity;
}

2438 2439 2440
static void ensureOutputBuffer(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pBlockInfo) {
  // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
  SQuery* pQuery = pRuntimeEnv->pQuery;
2441
  if (!QUERY_IS_INTERVAL_QUERY(pQuery) && !pRuntimeEnv->groupbyNormalCol && !isFixedOutputQuery(pRuntimeEnv) && !isTSCompQuery(pQuery)) {
2442
    SResultRec *pRec = &pQuery->rec;
2443

2444
    if (pQuery->rec.capacity - pQuery->rec.rows < pBlockInfo->rows) {
S
TD-1057  
Shengliang Guan 已提交
2445 2446
      int32_t remain = (int32_t)(pRec->capacity - pRec->rows);
      int32_t newSize = (int32_t)(pRec->capacity + (pBlockInfo->rows - remain));
2447

2448
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2449
        int32_t bytes = pQuery->pExpr1[i].bytes;
H
Haojun Liao 已提交
2450 2451
        assert(bytes > 0 && newSize > 0);

2452 2453
        char *tmp = realloc(pQuery->sdata[i], bytes * newSize + sizeof(tFilePage));
        if (tmp == NULL) {  // todo handle the oom
H
Haojun Liao 已提交
2454
          longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
2455
        } else {
2456
          memset(tmp + sizeof(tFilePage) + bytes * pRec->rows, 0, (size_t)((newSize - pRec->rows) * bytes));
2457 2458
          pQuery->sdata[i] = (tFilePage *)tmp;
        }
H
Haojun Liao 已提交
2459

2460 2461
        // set the pCtx output buffer position
        pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data + pRec->rows * bytes;
H
Haojun Liao 已提交
2462

H
Haojun Liao 已提交
2463
        int32_t functionId = pQuery->pExpr1[i].base.functionId;
2464 2465 2466 2467
        if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
          pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
        }
      }
H
Haojun Liao 已提交
2468

2469
      qDebug("QInfo:%p realloc output buffer, new size: %d rows, old:%" PRId64 ", remain:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv),
2470
             newSize, pRec->capacity, newSize - pRec->rows);
2471

2472 2473 2474 2475 2476
      pRec->capacity = newSize;
    }
  }
}

2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497
static void doSetInitialTimewindow(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pBlockInfo) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  if (QUERY_IS_INTERVAL_QUERY(pQuery) && pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL) {
    STimeWindow w = TSWINDOW_INITIALIZER;
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;

    if (QUERY_IS_ASC_QUERY(pQuery)) {
      getAlignQueryTimeWindow(pQuery, pBlockInfo->window.skey, pBlockInfo->window.skey, pQuery->window.ekey, &w);
      pWindowResInfo->startTime = w.skey;
      pWindowResInfo->prevSKey = w.skey;
    } else {
      // the start position of the first time window in the endpoint that spreads beyond the queried last timestamp
      getAlignQueryTimeWindow(pQuery, pBlockInfo->window.ekey, pQuery->window.ekey, pBlockInfo->window.ekey, &w);

      pWindowResInfo->startTime = pQuery->window.skey;
      pWindowResInfo->prevSKey = w.skey;
    }
  }
}

2498 2499
static int64_t doScanAllDataBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
2500
  STableQueryInfo* pTableQueryInfo = pQuery->current;
H
Haojun Liao 已提交
2501
  SQueryCostInfo*  summary  = &pRuntimeEnv->summary;
2502

2503
  qDebug("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", lastkey:%" PRId64 ", order:%d",
H
hjxilinx 已提交
2504 2505
         GET_QINFO_ADDR(pRuntimeEnv), pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, pTableQueryInfo->lastKey,
         pQuery->order.order);
2506

2507
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
H
Haojun Liao 已提交
2508
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
2509

H
Haojun Liao 已提交
2510
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
2511
  while (tsdbNextDataBlock(pQueryHandle)) {
H
Haojun Liao 已提交
2512
    summary->totalBlocks += 1;
H
Haojun Liao 已提交
2513

H
Haojun Liao 已提交
2514
    if (IS_QUERY_KILLED(GET_QINFO_ADDR(pRuntimeEnv))) {
H
Haojun Liao 已提交
2515
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
2516
    }
2517

H
Haojun Liao 已提交
2518
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
2519
    doSetInitialTimewindow(pRuntimeEnv, &blockInfo);
2520

H
hjxilinx 已提交
2521
    // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
2522
    ensureOutputBuffer(pRuntimeEnv, &blockInfo);
2523

2524
    SDataStatis *pStatis = NULL;
2525 2526
    SArray *     pDataBlock = NULL;
    uint32_t     status = 0;
2527

H
Haojun Liao 已提交
2528
    int32_t ret = loadDataBlockOnDemand(pRuntimeEnv, &pRuntimeEnv->windowResInfo, pQueryHandle, &blockInfo, &pStatis, &pDataBlock, &status);
2529
    if (ret != TSDB_CODE_SUCCESS) {
2530 2531 2532
      break;
    }

2533 2534 2535 2536 2537 2538
    if (status == BLK_DATA_DISCARD) {
      pQuery->current->lastKey =
              QUERY_IS_ASC_QUERY(pQuery) ? blockInfo.window.ekey + step : blockInfo.window.skey + step;
      continue;
    }

H
Haojun Liao 已提交
2539 2540
    // query start position can not move into tableApplyFunctionsOnBlock due to limit/offset condition
    pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : blockInfo.rows - 1;
H
hjxilinx 已提交
2541
    int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, binarySearchForKey, pDataBlock);
2542

H
Haojun Liao 已提交
2543
    summary->totalRows += blockInfo.rows;
2544
    qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
2545
           blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes, pQuery->current->lastKey);
2546

2547 2548
    // while the output buffer is full or limit/offset is applied, query may be paused here
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL | QUERY_COMPLETED)) {
H
hjxilinx 已提交
2549
      break;
2550 2551
    }
  }
2552

H
Haojun Liao 已提交
2553 2554 2555 2556
  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }

H
hjxilinx 已提交
2557
  // if the result buffer is not full, set the query complete
2558 2559 2560
  if (!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
2561

H
Haojun Liao 已提交
2562
  if (QUERY_IS_INTERVAL_QUERY(pQuery) && (IS_MASTER_SCAN(pRuntimeEnv)|| pRuntimeEnv->scanFlag == REPEAT_SCAN)) {
H
hjxilinx 已提交
2563
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
2564
      closeAllTimeWindow(&pRuntimeEnv->windowResInfo);
H
hjxilinx 已提交
2565
      pRuntimeEnv->windowResInfo.curIndex = pRuntimeEnv->windowResInfo.size - 1;  // point to the last time window
2566 2567 2568 2569
    } else {
      assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
    }
  }
2570

2571
  return 0;
2572 2573 2574 2575 2576 2577
}

/*
 * set tag value in SQLFunctionCtx
 * e.g.,tag information into input buffer
 */
2578
static void doSetTagValueInParam(void *tsdb, void* pTable, int32_t tagColId, tVariant *tag, int16_t type, int16_t bytes) {
H
[td-90]  
Haojun Liao 已提交
2579
  tVariantDestroy(tag);
2580

2581
  if (tagColId == TSDB_TBNAME_COLUMN_INDEX) {
2582
    char* val = tsdbGetTableName(pTable);
H
[td-90]  
Haojun Liao 已提交
2583
    assert(val != NULL);
2584

H
[td-90]  
Haojun Liao 已提交
2585
    tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), TSDB_DATA_TYPE_BINARY);
2586
  } else {
2587
    char* val = tsdbGetTableTagVal(pTable, tagColId, type, bytes);
H
[td-90]  
Haojun Liao 已提交
2588 2589 2590 2591
    if (val == NULL) {
      tag->nType = TSDB_DATA_TYPE_NULL;
      return;
    }
2592

H
hjxilinx 已提交
2593
    if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
H
Hongze Cheng 已提交
2594
      if (isNull(val, type)) {
H
Haojun Liao 已提交
2595 2596 2597 2598
        tag->nType = TSDB_DATA_TYPE_NULL;
        return;
      }

H
[td-90]  
Haojun Liao 已提交
2599
      tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), type);
H
hjxilinx 已提交
2600
    } else {
H
Haojun Liao 已提交
2601 2602 2603 2604 2605
      if (isNull(val, type)) {
        tag->nType = TSDB_DATA_TYPE_NULL;
        return;
      }

H
[td-90]  
Haojun Liao 已提交
2606
      tVariantCreateFromBinary(tag, val, bytes, type);
H
hjxilinx 已提交
2607
    }
2608
  }
2609 2610
}

2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622
static SColumnInfo* doGetTagColumnInfoById(SColumnInfo* pTagColList, int32_t numOfTags, int16_t colId) {
  assert(pTagColList != NULL && numOfTags > 0);

  for(int32_t i = 0; i < numOfTags; ++i) {
    if (pTagColList[i].colId == colId) {
      return &pTagColList[i];
    }
  }

  return NULL;
}

2623
void setTagVal(SQueryRuntimeEnv *pRuntimeEnv, void *pTable, void *tsdb) {
2624
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
2625
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
2626

H
Haojun Liao 已提交
2627
  SExprInfo *pExprInfo = &pQuery->pExpr1[0];
H
[td-90]  
Haojun Liao 已提交
2628 2629
  if (pQuery->numOfOutput == 1 && pExprInfo->base.functionId == TSDB_FUNC_TS_COMP) {
    assert(pExprInfo->base.numOfParams == 1);
H
Haojun Liao 已提交
2630

S
TD-1057  
Shengliang Guan 已提交
2631
    int16_t tagColId = (int16_t)pExprInfo->base.arg->argValue.i64;
2632
    SColumnInfo* pColInfo = doGetTagColumnInfoById(pQuery->tagColList, pQuery->numOfTags, tagColId);
H
Haojun Liao 已提交
2633

2634
    doSetTagValueInParam(tsdb, pTable, tagColId, &pRuntimeEnv->pCtx[0].tag, pColInfo->type, pColInfo->bytes);
2635 2636
  } else {
    // set tag value, by which the results are aggregated.
2637
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
H
Haojun Liao 已提交
2638
      SExprInfo* pLocalExprInfo = &pQuery->pExpr1[idx];
2639

2640
      // ts_comp column required the tag value for join filter
H
Haojun Liao 已提交
2641
      if (!TSDB_COL_IS_TAG(pLocalExprInfo->base.colInfo.flag)) {
2642 2643
        continue;
      }
2644

2645
      // todo use tag column index to optimize performance
2646
      doSetTagValueInParam(tsdb, pTable, pLocalExprInfo->base.colInfo.colId, &pRuntimeEnv->pCtx[idx].tag,
H
Haojun Liao 已提交
2647
                           pLocalExprInfo->type, pLocalExprInfo->bytes);
2648
    }
2649

2650
    // set the join tag for first column
H
[td-90]  
Haojun Liao 已提交
2651
    SSqlFuncMsg *pFuncMsg = &pExprInfo->base;
2652 2653
    if ((pFuncMsg->functionId == TSDB_FUNC_TS || pFuncMsg->functionId == TSDB_FUNC_PRJ) && pRuntimeEnv->pTSBuf != NULL &&
        pFuncMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
2654
      assert(pFuncMsg->numOfParams == 1);
H
Haojun Liao 已提交
2655

2656 2657
      int16_t      tagColId = (int16_t)pExprInfo->base.arg->argValue.i64;
      SColumnInfo *pColInfo = doGetTagColumnInfoById(pQuery->tagColList, pQuery->numOfTags, tagColId);
H
Haojun Liao 已提交
2658

2659
      doSetTagValueInParam(tsdb, pTable, tagColId, &pRuntimeEnv->pCtx[0].tag, pColInfo->type, pColInfo->bytes);
H
Haojun Liao 已提交
2660

2661 2662 2663 2664 2665 2666 2667 2668
      int16_t tagType = pRuntimeEnv->pCtx[0].tag.nType;
      if (tagType == TSDB_DATA_TYPE_BINARY || tagType == TSDB_DATA_TYPE_NCHAR) {
        qDebug("QInfo:%p set tag value for join comparison, colId:%" PRId64 ", val:%s", pQInfo,
               pExprInfo->base.arg->argValue.i64, pRuntimeEnv->pCtx[0].tag.pz);
      } else {
        qDebug("QInfo:%p set tag value for join comparison, colId:%" PRId64 ", val:%" PRId64, pQInfo,
               pExprInfo->base.arg->argValue.i64, pRuntimeEnv->pCtx[0].tag.i64Key);
      }
2669 2670 2671 2672
    }
  }
}

H
Haojun Liao 已提交
2673
static void doMerge(SQueryRuntimeEnv *pRuntimeEnv, int64_t timestamp, SResultRow *pWindowRes, bool mergeFlag) {
2674 2675
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
2676

2677
  tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes->pageId);
H
Haojun Liao 已提交
2678

2679
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2680
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
2681 2682 2683
    if (!mergeFlag) {
      pCtx[i].aOutputBuf = pCtx[i].aOutputBuf + pCtx[i].outputBytes;
      pCtx[i].currentStage = FIRST_STAGE_MERGE;
2684

2685
      RESET_RESULT_INFO(pCtx[i].resultInfo);
2686 2687
      aAggs[functionId].init(&pCtx[i]);
    }
2688

2689 2690
    pCtx[i].hasNull = true;
    pCtx[i].nStartQueryTimestamp = timestamp;
H
Haojun Liao 已提交
2691
    pCtx[i].aInputElemBuf = getPosInResultPage(pRuntimeEnv, i, pWindowRes, page);
2692

2693 2694 2695
    // in case of tag column, the tag information should be extracted from input buffer
    if (functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TAG) {
      tVariantDestroy(&pCtx[i].tag);
2696

2697 2698 2699 2700 2701 2702
      int32_t type = pCtx[i].outputType;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        tVariantCreateFromBinary(&pCtx[i].tag, varDataVal(pCtx[i].aInputElemBuf), varDataLen(pCtx[i].aInputElemBuf), type);
      } else {
        tVariantCreateFromBinary(&pCtx[i].tag, pCtx[i].aInputElemBuf, pCtx[i].inputBytes, pCtx[i].inputType);
      }
2703

2704 2705
    }
  }
2706

2707
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2708
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
2709 2710 2711
    if (functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
2712

2713 2714 2715 2716
    aAggs[functionId].distMergeFunc(&pCtx[i]);
  }
}

2717
static UNUSED_FUNC void printBinaryData(int32_t functionId, char *data, int32_t srcDataType) {
2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_LAST_DST) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_BINARY:
        printf("%" PRId64 ",%s\t", *(TSKEY *)data, (data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int8_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int16_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 ",%" PRId64 "\t", *(TSKEY *)data, *(TSKEY *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int32_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%" PRId64 ",%f\t", *(TSKEY *)data, *(float *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%" PRId64 ",%lf\t", *(TSKEY *)data, *(double *)(data + TSDB_KEYSIZE + 1));
        break;
    }
  } else if (functionId == TSDB_FUNC_AVG) {
    printf("%lf,%d\t", *(double *)data, *(int32_t *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_SPREAD) {
    printf("%lf,%lf\t", *(double *)data, *(double *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_TWA) {
    data += 1;
    printf("%lf,%" PRId64 ",%" PRId64 ",%" PRId64 "\t", *(double *)data, *(int64_t *)(data + 8),
           *(int64_t *)(data + 16), *(int64_t *)(data + 24));
  } else if (functionId == TSDB_FUNC_MIN || functionId == TSDB_FUNC_MAX) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%d\t", *(int8_t *)data);
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%d\t", *(int16_t *)data);
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 "\t", *(int64_t *)data);
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%d\t", *(int *)data);
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%f\t", *(float *)data);
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%f\t", *(float *)data);
        break;
    }
  } else if (functionId == TSDB_FUNC_SUM) {
    if (srcDataType == TSDB_DATA_TYPE_FLOAT || srcDataType == TSDB_DATA_TYPE_DOUBLE) {
      printf("%lf\t", *(float *)data);
    } else {
      printf("%" PRId64 "\t", *(int64_t *)data);
    }
  } else {
    printf("%s\t", data);
  }
}

2786
void UNUSED_FUNC displayInterResult(tFilePage **pdata, SQueryRuntimeEnv* pRuntimeEnv, int32_t numOfRows) {
2787
  SQuery* pQuery = pRuntimeEnv->pQuery;
2788
  int32_t numOfCols = pQuery->numOfOutput;
2789
  printf("super table query intermediate result, total:%d\n", numOfRows);
2790

2791 2792
  for (int32_t j = 0; j < numOfRows; ++j) {
    for (int32_t i = 0; i < numOfCols; ++i) {
2793

H
Haojun Liao 已提交
2794
      switch (pQuery->pExpr1[i].type) {
2795
        case TSDB_DATA_TYPE_BINARY: {
H
Haojun Liao 已提交
2796 2797
          int32_t type = pQuery->pExpr1[i].type;
          printBinaryData(pQuery->pExpr1[i].base.functionId, pdata[i]->data + pQuery->pExpr1[i].bytes * j,
2798 2799 2800 2801 2802
                          type);
          break;
        }
        case TSDB_DATA_TYPE_TIMESTAMP:
        case TSDB_DATA_TYPE_BIGINT:
H
Haojun Liao 已提交
2803
          printf("%" PRId64 "\t", *(int64_t *)(pdata[i]->data + pQuery->pExpr1[i].bytes * j));
2804 2805
          break;
        case TSDB_DATA_TYPE_INT:
H
Haojun Liao 已提交
2806
          printf("%d\t", *(int32_t *)(pdata[i]->data + pQuery->pExpr1[i].bytes * j));
2807 2808
          break;
        case TSDB_DATA_TYPE_FLOAT:
H
Haojun Liao 已提交
2809
          printf("%f\t", *(float *)(pdata[i]->data + pQuery->pExpr1[i].bytes * j));
2810 2811
          break;
        case TSDB_DATA_TYPE_DOUBLE:
H
Haojun Liao 已提交
2812
          printf("%lf\t", *(double *)(pdata[i]->data + pQuery->pExpr1[i].bytes * j));
2813 2814 2815 2816 2817 2818 2819 2820
          break;
      }
    }
    printf("\n");
  }
}

typedef struct SCompSupporter {
H
hjxilinx 已提交
2821 2822 2823
  STableQueryInfo **pTableQueryInfo;
  int32_t *         position;
  SQInfo *          pQInfo;
2824 2825 2826 2827 2828
} SCompSupporter;

int32_t tableResultComparFn(const void *pLeft, const void *pRight, void *param) {
  int32_t left = *(int32_t *)pLeft;
  int32_t right = *(int32_t *)pRight;
2829

2830 2831
  SCompSupporter *  supporter = (SCompSupporter *)param;
  SQueryRuntimeEnv *pRuntimeEnv = &supporter->pQInfo->runtimeEnv;
2832

2833 2834
  int32_t leftPos = supporter->position[left];
  int32_t rightPos = supporter->position[right];
2835

2836 2837 2838 2839
  /* left source is exhausted */
  if (leftPos == -1) {
    return 1;
  }
2840

2841 2842 2843 2844
  /* right source is exhausted*/
  if (rightPos == -1) {
    return -1;
  }
2845

H
hjxilinx 已提交
2846
  SWindowResInfo *pWindowResInfo1 = &supporter->pTableQueryInfo[left]->windowResInfo;
2847
  SResultRow * pWindowRes1 = getResultRow(pWindowResInfo1, leftPos);
2848
  tFilePage *page1 = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes1->pageId);
2849

H
Haojun Liao 已提交
2850
  char *b1 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes1, page1);
2851
  TSKEY leftTimestamp = GET_INT64_VAL(b1);
2852

H
hjxilinx 已提交
2853
  SWindowResInfo *pWindowResInfo2 = &supporter->pTableQueryInfo[right]->windowResInfo;
2854
  SResultRow * pWindowRes2 = getResultRow(pWindowResInfo2, rightPos);
2855
  tFilePage *page2 = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes2->pageId);
2856

H
Haojun Liao 已提交
2857
  char *b2 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes2, page2);
2858
  TSKEY rightTimestamp = GET_INT64_VAL(b2);
2859

2860 2861 2862
  if (leftTimestamp == rightTimestamp) {
    return 0;
  }
2863

2864 2865 2866
  return leftTimestamp > rightTimestamp ? 1 : -1;
}

2867
int32_t mergeIntoGroupResult(SQInfo *pQInfo) {
H
Haojun Liao 已提交
2868
  int64_t st = taosGetTimestampUs();
2869
  int32_t ret = TSDB_CODE_SUCCESS;
2870

S
TD-1057  
Shengliang Guan 已提交
2871
  int32_t numOfGroups = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
2872

2873
  while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
2874
    SArray *group = GET_TABLEGROUP(pQInfo, pQInfo->groupIndex);
2875
    ret = mergeIntoGroupResultImpl(pQInfo, group);
2876 2877 2878 2879
    if (ret < 0) {  // not enough disk space to save the data into disk
      return -1;
    }

2880
    pQInfo->groupIndex += 1;
2881 2882

    // this group generates at least one result, return results
2883 2884 2885
    if (ret > 0) {
      break;
    }
2886

H
Haojun Liao 已提交
2887
    assert(pQInfo->groupResInfo.numOfDataPages == 0);
2888
    qDebug("QInfo:%p no result in group %d, continue", pQInfo, pQInfo->groupIndex - 1);
2889
  }
2890

H
Haojun Liao 已提交
2891
  SGroupResInfo* info = &pQInfo->groupResInfo;
2892
  if (pQInfo->groupIndex == numOfGroups && info->pageId == info->numOfDataPages) {
H
Haojun Liao 已提交
2893 2894 2895
    SET_STABLE_QUERY_OVER(pQInfo);
  }

H
Haojun Liao 已提交
2896 2897 2898
  int64_t elapsedTime = taosGetTimestampUs() - st;
  qDebug("QInfo:%p merge res data into group, index:%d, total group:%d, elapsed time:%" PRId64 "us", pQInfo,
         pQInfo->groupIndex - 1, numOfGroups, elapsedTime);
2899

H
Haojun Liao 已提交
2900
  pQInfo->runtimeEnv.summary.firstStageMergeTime += elapsedTime;
2901 2902 2903 2904
  return TSDB_CODE_SUCCESS;
}

void copyResToQueryResultBuf(SQInfo *pQInfo, SQuery *pQuery) {
H
Haojun Liao 已提交
2905 2906 2907
  SGroupResInfo* pGroupResInfo = &pQInfo->groupResInfo;

  // all results have been return to client, try next group
2908
  if (pGroupResInfo->pageId == pGroupResInfo->numOfDataPages) {
H
Haojun Liao 已提交
2909
    pGroupResInfo->numOfDataPages = 0;
2910 2911
    pGroupResInfo->pageId = 0;
    pGroupResInfo->rowId = 0;
2912

2913
    // current results of group has been sent to client, try next group
2914
    if (mergeIntoGroupResult(pQInfo) != TSDB_CODE_SUCCESS) {
2915 2916
      return;  // failed to save data in the disk
    }
2917

2918
    // check if all results has been sent to client
S
TD-1057  
Shengliang Guan 已提交
2919
    int32_t numOfGroup = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
H
Haojun Liao 已提交
2920
    if (pGroupResInfo->numOfDataPages == 0 && pQInfo->groupIndex == numOfGroup) {
H
Haojun Liao 已提交
2921
      SET_STABLE_QUERY_OVER(pQInfo);
2922 2923
      return;
    }
2924
  }
2925 2926

  SQueryRuntimeEnv *   pRuntimeEnv = &pQInfo->runtimeEnv;
2927
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
2928

H
Haojun Liao 已提交
2929 2930
  int32_t id = pQInfo->groupResInfo.groupId;
  SIDList list = getDataBufPagesIdList(pResultBuf, id);
2931

2932
  int32_t offset = 0;
H
Haojun Liao 已提交
2933 2934 2935 2936 2937 2938
  int32_t numOfCopiedRows = 0;

  size_t size = taosArrayGetSize(list);
  assert(size == pGroupResInfo->numOfDataPages);

  bool done = false;
2939
  for (int32_t j = pGroupResInfo->pageId; j < size; ++j) {
H
Haojun Liao 已提交
2940
    SPageInfo* pi = *(SPageInfo**) taosArrayGet(list, j);
H
Haojun Liao 已提交
2941 2942
    tFilePage* pData = getResBufPage(pResultBuf, pi->pageId);

2943 2944
    assert(pData->num > 0 && pData->num <= pRuntimeEnv->numOfRowsPerPage && pGroupResInfo->rowId < pData->num);
    int32_t numOfRes = (int32_t)(pData->num - pGroupResInfo->rowId);
H
Haojun Liao 已提交
2945 2946

    if (numOfRes > pQuery->rec.capacity - offset) {
2947
      numOfCopiedRows = (int32_t)(pQuery->rec.capacity - offset);
2948
      pGroupResInfo->rowId += numOfCopiedRows;
H
Haojun Liao 已提交
2949 2950
      done = true;
    } else {
2951
      numOfCopiedRows = (int32_t)pData->num;
H
Haojun Liao 已提交
2952

2953 2954
      pGroupResInfo->pageId += 1;
      pGroupResInfo->rowId = 0;
H
Haojun Liao 已提交
2955
    }
2956

2957
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2958
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
2959
      char *  pDest = pQuery->sdata[i]->data;
2960

H
Haojun Liao 已提交
2961 2962
      memcpy(pDest + offset * bytes, pData->data + pRuntimeEnv->offset[i] * pRuntimeEnv->numOfRowsPerPage,
          (size_t)bytes * numOfCopiedRows);
2963
    }
2964

H
Haojun Liao 已提交
2965 2966 2967 2968
    offset += numOfCopiedRows;
    if (done) {
      break;
    }
2969
  }
2970

2971
  assert(pQuery->rec.rows == 0);
H
Haojun Liao 已提交
2972
  pQuery->rec.rows += offset;
2973 2974
}

2975 2976 2977
int64_t getNumOfResultWindowRes(SQueryRuntimeEnv* pRuntimeEnv, SResultRow *pResultRow) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

2978
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
2979
    int32_t functionId = pQuery->pExpr1[j].base.functionId;
2980

2981 2982 2983 2984 2985 2986 2987
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
      continue;
    }
2988

2989
    SResultRowCellInfo *pResultInfo = getResultCell(pRuntimeEnv, pResultRow, j);
H
Haojun Liao 已提交
2990
    assert(pResultInfo != NULL);
2991

H
Haojun Liao 已提交
2992 2993
    if (pResultInfo->numOfRes > 0) {
      return pResultInfo->numOfRes;
2994 2995
    }
  }
2996

H
Haojun Liao 已提交
2997
  return 0;
2998 2999
}

3000
int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *pGroup) {
3001
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
3002
  SQuery *          pQuery = pRuntimeEnv->pQuery;
3003

3004
  size_t size = taosArrayGetSize(pGroup);
3005
  tFilePage **buffer = pQuery->sdata;
3006

H
Haojun Liao 已提交
3007
  int32_t *posList = calloc(size, sizeof(int32_t));
H
hjxilinx 已提交
3008
  STableQueryInfo **pTableList = malloc(POINTER_BYTES * size);
3009

3010
  if (pTableList == NULL || posList == NULL) {
S
TD-1848  
Shengliang Guan 已提交
3011 3012
    tfree(posList);
    tfree(pTableList);
3013 3014

    qError("QInfo:%p failed alloc memory", pQInfo);
H
Haojun Liao 已提交
3015
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
3016 3017
  }

3018
  // todo opt for the case of one table per group
3019
  int32_t numOfTables = 0;
H
Haojun Liao 已提交
3020 3021 3022
  SIDList pageList = NULL;
  int32_t tid = -1;

3023
  for (int32_t i = 0; i < size; ++i) {
3024
    STableQueryInfo *item = taosArrayGetP(pGroup, i);
3025

H
Haojun Liao 已提交
3026
    SIDList list = getDataBufPagesIdList(pRuntimeEnv->pResultBuf, TSDB_TABLEID(item->pTable)->tid);
H
Haojun Liao 已提交
3027
    if (taosArrayGetSize(list) > 0 && item->windowResInfo.size > 0) {
H
Haojun Liao 已提交
3028
      pTableList[numOfTables++] = item;
3029 3030
      tid = TSDB_TABLEID(item->pTable)->tid;
      pageList = list;
3031 3032
    }
  }
3033

H
Haojun Liao 已提交
3034
  // there is no data in current group
3035
  if (numOfTables == 0) {
S
TD-1848  
Shengliang Guan 已提交
3036 3037
    tfree(posList);
    tfree(pTableList);
3038
    return 0;
H
Haojun Liao 已提交
3039
  } else if (numOfTables == 1) { // no need to merge results since only one table in each group
S
TD-1848  
Shengliang Guan 已提交
3040 3041
    tfree(posList);
    tfree(pTableList);
H
Haojun Liao 已提交
3042 3043 3044

    SGroupResInfo* pGroupResInfo = &pQInfo->groupResInfo;

3045
    pGroupResInfo->numOfDataPages = (int32_t)taosArrayGetSize(pageList);
H
Haojun Liao 已提交
3046
    pGroupResInfo->groupId = tid;
3047 3048
    pGroupResInfo->pageId = 0;
    pGroupResInfo->rowId = 0;
H
Haojun Liao 已提交
3049 3050

    return pGroupResInfo->numOfDataPages;
3051
  }
3052

3053
  SCompSupporter cs = {pTableList, posList, pQInfo};
3054

3055
  SLoserTreeInfo *pTree = NULL;
3056
  tLoserTreeCreate(&pTree, numOfTables, &cs, tableResultComparFn);
3057

3058
  SResultRow* pRow = getNewResultRow(pRuntimeEnv->pool);
H
Haojun Liao 已提交
3059
  resetMergeResultBuf(pRuntimeEnv, pRuntimeEnv->pCtx, pRow);
3060

H
Haojun Liao 已提交
3061 3062
  pQInfo->groupResInfo.groupId = getGroupResultId(pQInfo->groupIndex);

H
Haojun Liao 已提交
3063
  // todo add windowRes iterator
3064 3065
  int64_t lastTimestamp = -1;
  int64_t startt = taosGetTimestampMs();
3066

3067
  while (1) {
3068 3069
    if (IS_QUERY_KILLED(pQInfo)) {
      qDebug("QInfo:%p it is already killed, abort", pQInfo);
H
Haojun Liao 已提交
3070

S
TD-1848  
Shengliang Guan 已提交
3071 3072 3073
      tfree(pTableList);
      tfree(posList);
      tfree(pTree);
3074 3075 3076
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
    }

3077
    int32_t pos = pTree->pNode[0].index;
3078

H
hjxilinx 已提交
3079
    SWindowResInfo *pWindowResInfo = &pTableList[pos]->windowResInfo;
3080
    SResultRow  *pWindowRes = getResultRow(pWindowResInfo, cs.position[pos]);
3081
    tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes->pageId);
3082

H
Haojun Liao 已提交
3083
    char *b = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes, page);
3084
    TSKEY ts = GET_INT64_VAL(b);
3085

3086
    assert(ts == pWindowRes->win.skey);
3087
    int64_t num = getNumOfResultWindowRes(pRuntimeEnv, pWindowRes);
3088 3089
    if (num <= 0) {
      cs.position[pos] += 1;
3090

3091 3092
      if (cs.position[pos] >= pWindowResInfo->size) {
        cs.position[pos] = -1;
3093

3094
        // all input sources are exhausted
3095
        if (--numOfTables == 0) {
3096 3097 3098 3099 3100 3101 3102
          break;
        }
      }
    } else {
      if (ts == lastTimestamp) {  // merge with the last one
        doMerge(pRuntimeEnv, ts, pWindowRes, true);
      } else {  // copy data to disk buffer
3103
        if (buffer[0]->num == pQuery->rec.capacity) {
H
Haojun Liao 已提交
3104
          if (flushFromResultBuf(pRuntimeEnv, &pQInfo->groupResInfo) != TSDB_CODE_SUCCESS) {
3105 3106
            return -1;
          }
3107

H
Haojun Liao 已提交
3108
          resetMergeResultBuf(pRuntimeEnv, pRuntimeEnv->pCtx, pRow);
3109
        }
3110

3111
        doMerge(pRuntimeEnv, ts, pWindowRes, false);
3112
        buffer[0]->num += 1;
3113
      }
3114

3115
      lastTimestamp = ts;
3116

H
Haojun Liao 已提交
3117
      // move to the next element of current entry
3118
      int32_t currentPageId = pWindowRes->pageId;
H
Haojun Liao 已提交
3119

3120 3121 3122
      cs.position[pos] += 1;
      if (cs.position[pos] >= pWindowResInfo->size) {
        cs.position[pos] = -1;
3123

3124
        // all input sources are exhausted
3125
        if (--numOfTables == 0) {
3126 3127
          break;
        }
H
Haojun Liao 已提交
3128 3129
      } else {
        // current page is not needed anymore
3130
        SResultRow  *pNextWindowRes = getResultRow(pWindowResInfo, cs.position[pos]);
3131
        if (pNextWindowRes->pageId != currentPageId) {
H
Haojun Liao 已提交
3132 3133
          releaseResBufPage(pRuntimeEnv->pResultBuf, page);
        }
3134 3135
      }
    }
3136

3137 3138
    tLoserTreeAdjust(pTree, pos + pTree->numOfEntries);
  }
3139

3140
  if (buffer[0]->num != 0) {  // there are data in buffer
H
Haojun Liao 已提交
3141
    if (flushFromResultBuf(pRuntimeEnv, &pQInfo->groupResInfo) != TSDB_CODE_SUCCESS) {
S
slguan 已提交
3142
      qError("QInfo:%p failed to flush data into temp file, abort query", pQInfo);
3143

S
TD-1848  
Shengliang Guan 已提交
3144 3145 3146
      tfree(pTree);
      tfree(pTableList);
      tfree(posList);
3147 3148 3149
      return -1;
    }
  }
3150

3151 3152 3153
  int64_t endt = taosGetTimestampMs();

#ifdef _DEBUG_VIEW
3154
  displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
3155
#endif
3156

3157
  qDebug("QInfo:%p result merge completed for group:%d, elapsed time:%" PRId64 " ms", pQInfo, pQInfo->groupIndex, endt - startt);
3158

S
TD-1848  
Shengliang Guan 已提交
3159 3160 3161
  tfree(pTableList);
  tfree(posList);
  tfree(pTree);
3162

S
TD-1848  
Shengliang Guan 已提交
3163 3164
//  tfree(pResultInfo);
//  tfree(buf);
H
Haojun Liao 已提交
3165 3166

  return pQInfo->groupResInfo.numOfDataPages;
3167 3168
}

H
Haojun Liao 已提交
3169 3170
int32_t flushFromResultBuf(SQueryRuntimeEnv* pRuntimeEnv, SGroupResInfo* pGroupResInfo) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3171

3172
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
3173

3174 3175
  // the base value for group result, since the maximum number of table for each vnode will not exceed 100,000.
  int32_t pageId = -1;
3176
  int32_t capacity = pResultBuf->numOfRowsPerPage;
3177

H
Haojun Liao 已提交
3178
  int32_t remain = (int32_t) pQuery->sdata[0]->num;
3179
  int32_t offset = 0;
3180

3181
  while (remain > 0) {
H
Haojun Liao 已提交
3182 3183
    int32_t rows = (remain > capacity)? capacity:remain;
    assert(rows > 0);
3184

H
Haojun Liao 已提交
3185 3186 3187
    // get the output buffer page
    tFilePage *buf = getNewDataBuf(pResultBuf, pGroupResInfo->groupId, &pageId);
    buf->num = rows;
3188

H
Haojun Liao 已提交
3189
    // pagewisely copy to dest buffer
3190
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3191
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
3192

H
Haojun Liao 已提交
3193 3194
      char* output = buf->data + pRuntimeEnv->offset[i] * pRuntimeEnv->numOfRowsPerPage;
      char* src = ((char *) pQuery->sdata[i]->data) + offset * bytes;
3195
      memcpy(output, src, (size_t)(buf->num * bytes));
3196
    }
3197

H
Haojun Liao 已提交
3198 3199 3200 3201
    offset += rows;
    remain -= rows;

    pGroupResInfo->numOfDataPages += 1;
3202
  }
3203

3204 3205 3206
  return TSDB_CODE_SUCCESS;
}

H
Haojun Liao 已提交
3207 3208 3209
void resetMergeResultBuf(SQueryRuntimeEnv* pRuntimeEnv, SQLFunctionCtx *pCtx, SResultRow *pRow) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

3210
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
3211
    pCtx[k].aOutputBuf = pQuery->sdata[k]->data - pCtx[k].outputBytes;
3212 3213
    pCtx[k].size = 1;
    pCtx[k].startOffset = 0;
H
Haojun Liao 已提交
3214
    pCtx[k].resultInfo = getResultCell(pRuntimeEnv, pRow, k);
3215

3216
    pQuery->sdata[k]->num = 0;
3217 3218 3219
  }
}

3220 3221 3222 3223
static void updateTableQueryInfoForReverseScan(SQuery *pQuery, STableQueryInfo *pTableQueryInfo) {
  if (pTableQueryInfo == NULL) {
    return;
  }
3224

H
Haojun Liao 已提交
3225
  // order has changed already
3226
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
3227

H
Haojun Liao 已提交
3228 3229 3230 3231 3232 3233
  // TODO validate the assertion
//  if (!QUERY_IS_ASC_QUERY(pQuery)) {
//    assert(pTableQueryInfo->win.ekey >= pTableQueryInfo->lastKey + step);
//  } else {
//    assert(pTableQueryInfo->win.ekey <= pTableQueryInfo->lastKey + step);
//  }
H
Haojun Liao 已提交
3234 3235 3236 3237 3238 3239 3240

  if (pTableQueryInfo->lastKey == pTableQueryInfo->win.skey) {
    // do nothing, no results
  } else {
    pTableQueryInfo->win.ekey = pTableQueryInfo->lastKey + step;
  }

3241 3242
  SWAP(pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, TSKEY);
  pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
3243

3244 3245
  SWITCH_ORDER(pTableQueryInfo->cur.order);
  pTableQueryInfo->cur.vgroupIndex = -1;
H
Haojun Liao 已提交
3246 3247 3248

  // set the index at the end of time window
  pTableQueryInfo->windowResInfo.curIndex = pTableQueryInfo->windowResInfo.size - 1;
3249 3250
}

3251 3252
static void disableFuncInReverseScanImpl(SQueryRuntimeEnv* pRuntimeEnv, SWindowResInfo *pWindowResInfo, int32_t order) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
3253

3254
  for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
3255 3256
    bool closed = getTimeWindowResStatus(pWindowResInfo, i);
    if (!closed) {
3257 3258
      continue;
    }
3259

3260
    SResultRow *pRow = getResultRow(pWindowResInfo, i);
3261

3262
    // open/close the specified query for each group result
3263
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
3264
      int32_t functId = pQuery->pExpr1[j].base.functionId;
3265
      SResultRowCellInfo* pInfo = getResultCell(pRuntimeEnv, pRow, j);
3266

3267 3268
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
3269
        pInfo->complete = false;
3270
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
3271
        pInfo->complete = true;
3272 3273 3274 3275 3276
      }
    }
  }
}

3277 3278
void disableFuncInReverseScan(SQInfo *pQInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
3279
  SQuery *pQuery = pRuntimeEnv->pQuery;
3280
  int32_t order = pQuery->order.order;
3281

3282 3283
  // group by normal columns and interval query on normal table
  SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
3284
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3285
    disableFuncInReverseScanImpl(pRuntimeEnv, pWindowResInfo, order);
3286
  } else {  // for simple result of table query,
3287
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {  // todo refactor
H
Haojun Liao 已提交
3288
      int32_t functId = pQuery->pExpr1[j].base.functionId;
3289

3290
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[j];
3291 3292 3293
      if (pCtx->resultInfo == NULL) {
        continue; // resultInfo is NULL, means no data checked in previous scan
      }
3294

3295 3296
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
3297 3298 3299 3300 3301 3302
        pCtx->resultInfo->complete = false;
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
        pCtx->resultInfo->complete = true;
      }
    }
  }
H
Haojun Liao 已提交
3303 3304 3305 3306
}

static void setupQueryRangeForReverseScan(SQInfo* pQInfo) {
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
S
Shengliang Guan 已提交
3307
  int32_t numOfGroups = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
H
Haojun Liao 已提交
3308

H
hjxilinx 已提交
3309
  for(int32_t i = 0; i < numOfGroups; ++i) {
H
Haojun Liao 已提交
3310
    SArray *group = GET_TABLEGROUP(pQInfo, i);
H
Haojun Liao 已提交
3311 3312
    SArray *tableKeyGroup = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, i);

H
hjxilinx 已提交
3313 3314
    size_t t = taosArrayGetSize(group);
    for (int32_t j = 0; j < t; ++j) {
3315 3316
      STableQueryInfo *pCheckInfo = taosArrayGetP(group, j);
      updateTableQueryInfoForReverseScan(pQuery, pCheckInfo);
H
Haojun Liao 已提交
3317

H
Haojun Liao 已提交
3318 3319
      // update the last key in tableKeyInfo list, the tableKeyInfo is used to build the tsdbQueryHandle and decide
      // the start check timestamp of tsdbQueryHandle
H
Haojun Liao 已提交
3320 3321 3322 3323
      STableKeyInfo *pTableKeyInfo = taosArrayGet(tableKeyGroup, j);
      pTableKeyInfo->lastKey = pCheckInfo->lastKey;

      assert(pCheckInfo->pTable == pTableKeyInfo->pTable);
H
hjxilinx 已提交
3324 3325
    }
  }
3326 3327
}

3328
void switchCtxOrder(SQueryRuntimeEnv *pRuntimeEnv) {
3329
  SQuery *pQuery = pRuntimeEnv->pQuery;
3330
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
3331
    SWITCH_ORDER(pRuntimeEnv->pCtx[i].order);
3332 3333 3334
  }
}

3335
int32_t initResultRow(SResultRow *pResultRow) {
H
Haojun Liao 已提交
3336
  pResultRow->pCellInfo = (SResultRowCellInfo*)((char*)pResultRow + sizeof(SResultRow));
3337 3338
  pResultRow->pageId = -1;
  pResultRow->rowId = -1;
B
Bomin Zhang 已提交
3339
  return TSDB_CODE_SUCCESS;
3340 3341 3342 3343
}

void resetCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
3344 3345 3346 3347 3348 3349 3350

  SResultRow* pRow = NULL;
  if (pRuntimeEnv->windowResInfo.size == 0) {
    int32_t groupIndex = 0;
    int32_t uid = 0;
    pRow = doPrepareResultRowFromKey(pRuntimeEnv, &pRuntimeEnv->windowResInfo, (char *)&groupIndex, sizeof(groupIndex), true, uid);
  }
3351

3352
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3353 3354
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
    pCtx->aOutputBuf = pQuery->sdata[i]->data;
3355

3356 3357 3358 3359
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT/TAGPRJ/PRJ/TAG etc.
     */
3360 3361 3362
    SResultRowCellInfo* pCellInfo = getResultCell(pRuntimeEnv, pRow, i);
    RESET_RESULT_INFO(pCellInfo);
    pCtx->resultInfo = pCellInfo;
3363

3364
    // set the timestamp output buffer for top/bottom/diff query
H
Haojun Liao 已提交
3365
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
3366 3367 3368
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3369

H
Haojun Liao 已提交
3370
    memset(pQuery->sdata[i]->data, 0, (size_t)(pQuery->pExpr1[i].bytes * pQuery->rec.capacity));
3371
  }
3372

3373 3374 3375 3376 3377
  initCtxOutputBuf(pRuntimeEnv);
}

void forwardCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, int64_t output) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3378

3379
  // reset the execution contexts
3380
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
3381
    int32_t functionId = pQuery->pExpr1[j].base.functionId;
3382
    assert(functionId != TSDB_FUNC_DIFF);
3383

3384 3385 3386 3387
    // set next output position
    if (IS_OUTER_FORWARD(aAggs[functionId].nStatus)) {
      pRuntimeEnv->pCtx[j].aOutputBuf += pRuntimeEnv->pCtx[j].outputBytes * output;
    }
3388

3389 3390 3391 3392 3393 3394 3395 3396
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      /*
       * NOTE: for top/bottom query, the value of first column of output (timestamp) are assigned
       * in the procedure of top/bottom routine
       * the output buffer in top/bottom routine is ptsOutputBuf, so we need to forward the output buffer
       *
       * diff function is handled in multi-output function
       */
S
TD-1057  
Shengliang Guan 已提交
3397
      pRuntimeEnv->pCtx[j].ptsOutputBuf = (char*)pRuntimeEnv->pCtx[j].ptsOutputBuf + TSDB_KEYSIZE * output;
3398
    }
3399

3400
    RESET_RESULT_INFO(pRuntimeEnv->pCtx[j].resultInfo);
3401 3402 3403 3404 3405
  }
}

void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3406

3407
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
3408
    int32_t functionId = pQuery->pExpr1[j].base.functionId;
3409
    pRuntimeEnv->pCtx[j].currentStage = 0;
3410

H
Haojun Liao 已提交
3411
    SResultRowCellInfo* pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
H
Haojun Liao 已提交
3412 3413 3414
    if (pResInfo->initialized) {
      continue;
    }
3415

3416 3417 3418 3419
    aAggs[functionId].init(&pRuntimeEnv->pCtx[j]);
  }
}

3420
void skipResults(SQueryRuntimeEnv *pRuntimeEnv) {
3421
  SQuery *pQuery = pRuntimeEnv->pQuery;
3422
  if (pQuery->rec.rows == 0 || pQuery->limit.offset == 0) {
3423 3424
    return;
  }
3425

3426
  if (pQuery->rec.rows <= pQuery->limit.offset) {
3427
    qDebug("QInfo:%p skip rows:%" PRId64 ", new offset:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), pQuery->rec.rows,
3428
        pQuery->limit.offset - pQuery->rec.rows);
3429

3430 3431
    pQuery->limit.offset -= pQuery->rec.rows;
    pQuery->rec.rows = 0;
3432

3433
    resetCtxOutputBuf(pRuntimeEnv);
3434

H
Haojun Liao 已提交
3435
    // clear the buffer full flag if exists
3436
    CLEAR_QUERY_STATUS(pQuery, QUERY_RESBUF_FULL);
3437
  } else {
3438
    int64_t numOfSkip = pQuery->limit.offset;
3439
    pQuery->rec.rows -= numOfSkip;
3440
    pQuery->limit.offset = 0;
3441

3442
    qDebug("QInfo:%p skip row:%"PRId64", new offset:%d, numOfRows remain:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), numOfSkip,
3443
           0, pQuery->rec.rows);
3444

3445
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
3446
      int32_t functionId = pQuery->pExpr1[i].base.functionId;
3447
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
H
Haojun Liao 已提交
3448

3449
      memmove(pQuery->sdata[i]->data, (char*)pQuery->sdata[i]->data + bytes * numOfSkip, (size_t)(pQuery->rec.rows * bytes));
H
Haojun Liao 已提交
3450
      pRuntimeEnv->pCtx[i].aOutputBuf = ((char*) pQuery->sdata[i]->data) + pQuery->rec.rows * bytes;
3451

3452
      if (functionId == TSDB_FUNC_DIFF || functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
3453
        pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
3454 3455
      }
    }
3456

S
TD-1057  
Shengliang Guan 已提交
3457
    updateNumOfResult(pRuntimeEnv, (int32_t)pQuery->rec.rows);
3458 3459 3460 3461 3462 3463 3464 3465
  }
}

void setQueryStatus(SQuery *pQuery, int8_t status) {
  if (status == QUERY_NOT_COMPLETED) {
    pQuery->status = status;
  } else {
    // QUERY_NOT_COMPLETED is not compatible with any other status, so clear its position first
3466
    CLEAR_QUERY_STATUS(pQuery, QUERY_NOT_COMPLETED);
3467 3468 3469 3470 3471 3472
    pQuery->status |= status;
  }
}

bool needScanDataBlocksAgain(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3473

H
hjxilinx 已提交
3474
  bool toContinue = false;
H
Haojun Liao 已提交
3475
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3476 3477
    // for each group result, call the finalize function for each column
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
3478

3479
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
3480
      SResultRow *pResult = getResultRow(pWindowResInfo, i);
3481
      if (!pResult->closed) {
3482 3483
        continue;
      }
3484

3485
      setResultOutputBuf(pRuntimeEnv, pResult);
3486

3487
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
3488
        int16_t functId = pQuery->pExpr1[j].base.functionId;
3489 3490 3491
        if (functId == TSDB_FUNC_TS) {
          continue;
        }
3492

3493
        aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
H
Haojun Liao 已提交
3494
        SResultRowCellInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3495

3496 3497 3498 3499
        toContinue |= (!pResInfo->complete);
      }
    }
  } else {
3500
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
3501
      int16_t functId = pQuery->pExpr1[j].base.functionId;
3502 3503 3504
      if (functId == TSDB_FUNC_TS) {
        continue;
      }
3505

3506
      aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
H
Haojun Liao 已提交
3507
      SResultRowCellInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3508

3509 3510 3511
      toContinue |= (!pResInfo->complete);
    }
  }
3512

3513 3514 3515
  return toContinue;
}

H
Haojun Liao 已提交
3516
static SQueryStatusInfo getQueryStatusInfo(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
3517
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3518
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3519

H
Haojun Liao 已提交
3520 3521
  assert((start <= pTableQueryInfo->lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (start >= pTableQueryInfo->lastKey && !QUERY_IS_ASC_QUERY(pQuery)));
3522

3523
  SQueryStatusInfo info = {
H
hjxilinx 已提交
3524
      .status      = pQuery->status,
3525
      .windowIndex = pRuntimeEnv->windowResInfo.curIndex,
H
Haojun Liao 已提交
3526
      .lastKey     = start,
3527
  };
3528

S
TD-1057  
Shengliang Guan 已提交
3529 3530 3531 3532 3533
  TIME_WINDOW_COPY(info.w, pQuery->window);
  TIME_WINDOW_COPY(info.curWindow, pTableQueryInfo->win);

  info.curWindow.skey = start;

3534 3535 3536
  return info;
}

3537 3538 3539 3540
static void setEnvBeforeReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQInfo *pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery *pQuery = pRuntimeEnv->pQuery;

3541 3542 3543
  pStatus->cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);  // save the cursor
  if (pRuntimeEnv->pTSBuf) {
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
3544 3545
    bool ret = tsBufNextPos(pRuntimeEnv->pTSBuf);
    assert(ret);
3546
  }
3547

3548
  // reverse order time range
3549 3550 3551
  pQuery->window = pStatus->curWindow;
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);

3552
  SWITCH_ORDER(pQuery->order.order);
3553 3554 3555 3556 3557 3558 3559

  if (QUERY_IS_ASC_QUERY(pQuery)) {
    assert(pQuery->window.skey <= pQuery->window.ekey);
  } else {
    assert(pQuery->window.skey >= pQuery->window.ekey);
  }

3560
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
3561

3562
  STsdbQueryCond cond = {
H
hjxilinx 已提交
3563
      .order   = pQuery->order.order,
3564
      .colList = pQuery->colList,
3565 3566
      .numOfCols = pQuery->numOfCols,
  };
3567

S
TD-1057  
Shengliang Guan 已提交
3568 3569
  TIME_WINDOW_COPY(cond.twindow, pQuery->window);

H
Haojun Liao 已提交
3570 3571 3572 3573 3574
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
  disableFuncInReverseScan(pQInfo);
  setupQueryRangeForReverseScan(pQInfo);

3575 3576 3577 3578
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
3579

H
Haojun Liao 已提交
3580
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef);
B
Bomin Zhang 已提交
3581 3582 3583
  if (pRuntimeEnv->pSecQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
3584 3585
}

3586 3587
static void clearEnvAfterReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3588
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3589

3590 3591
  SWITCH_ORDER(pQuery->order.order);
  switchCtxOrder(pRuntimeEnv);
3592

3593 3594 3595 3596
  tsBufSetCursor(pRuntimeEnv->pTSBuf, &pStatus->cur);
  if (pRuntimeEnv->pTSBuf) {
    pRuntimeEnv->pTSBuf->cur.order = pQuery->order.order;
  }
3597

3598
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
3599

3600
  // update the pQuery->window.skey and pQuery->window.ekey to limit the scan scope of sliding query during reverse scan
H
hjxilinx 已提交
3601
  pTableQueryInfo->lastKey = pStatus->lastKey;
3602
  pQuery->status = pStatus->status;
3603

H
hjxilinx 已提交
3604
  pTableQueryInfo->win = pStatus->w;
3605
  pQuery->window = pTableQueryInfo->win;
3606 3607
}

H
Haojun Liao 已提交
3608 3609 3610 3611 3612 3613 3614
static void restoreTimeWindow(STableGroupInfo* pTableGroupInfo, STsdbQueryCond* pCond) {
  assert(pTableGroupInfo->numOfTables == 1);
  SArray* pTableKeyGroup = taosArrayGetP(pTableGroupInfo->pGroupList, 0);
  STableKeyInfo* pKeyInfo = taosArrayGet(pTableKeyGroup, 0);
  pKeyInfo->lastKey = pCond->twindow.skey;
}

3615
void scanOneTableDataBlocks(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
H
hjxilinx 已提交
3616
  SQInfo *pQInfo = (SQInfo *) GET_QINFO_ADDR(pRuntimeEnv);
3617
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3618
  STableQueryInfo *pTableQueryInfo = pQuery->current;
3619

3620
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
3621

3622
  // store the start query position
H
Haojun Liao 已提交
3623
  SQueryStatusInfo qstatus = getQueryStatusInfo(pRuntimeEnv, start);
3624

3625 3626
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
3627

3628 3629
  while (1) {
    doScanAllDataBlocks(pRuntimeEnv);
3630

3631 3632
    if (pRuntimeEnv->scanFlag == MASTER_SCAN) {
      qstatus.status = pQuery->status;
3633 3634 3635 3636 3637 3638

      // do nothing if no data blocks are found qualified during scan
      if (qstatus.lastKey != pTableQueryInfo->lastKey) {
        qstatus.curWindow.ekey = pTableQueryInfo->lastKey - step;
      }

3639
      qstatus.lastKey = pTableQueryInfo->lastKey;
3640
    }
3641

3642
    if (!needScanDataBlocksAgain(pRuntimeEnv)) {
3643
      // restore the status code and jump out of loop
3644
      if (pRuntimeEnv->scanFlag == REPEAT_SCAN) {
3645
        pQuery->status = qstatus.status;
3646
      }
3647

3648 3649
      break;
    }
3650

3651
    STsdbQueryCond cond = {
H
hjxilinx 已提交
3652
        .order   = pQuery->order.order,
3653
        .colList = pQuery->colList,
3654
        .numOfCols = pQuery->numOfCols,
3655
    };
3656

S
TD-1057  
Shengliang Guan 已提交
3657 3658
    TIME_WINDOW_COPY(cond.twindow, qstatus.curWindow);

3659 3660
    if (pRuntimeEnv->pSecQueryHandle != NULL) {
      tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
3661
    }
3662

H
Haojun Liao 已提交
3663
    restoreTimeWindow(&pQInfo->tableGroupInfo, &cond);
H
Haojun Liao 已提交
3664
    pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef);
B
Bomin Zhang 已提交
3665 3666 3667
    if (pRuntimeEnv->pSecQueryHandle == NULL) {
      longjmp(pRuntimeEnv->env, terrno);
    }
3668

3669
    pRuntimeEnv->windowResInfo.curIndex = qstatus.windowIndex;
3670 3671
    setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
    pRuntimeEnv->scanFlag = REPEAT_SCAN;
3672

3673
    qDebug("QInfo:%p start to repeat scan data blocks due to query func required, qrange:%"PRId64"-%"PRId64, pQInfo,
3674
        cond.twindow.skey, cond.twindow.ekey);
3675

3676
    // check if query is killed or not
H
Haojun Liao 已提交
3677
    if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
3678
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
3679 3680
    }
  }
3681

H
hjxilinx 已提交
3682
  if (!needReverseScan(pQuery)) {
3683 3684
    return;
  }
3685

3686
  setEnvBeforeReverseScan(pRuntimeEnv, &qstatus);
3687

3688
  // reverse scan from current position
3689
  qDebug("QInfo:%p start to reverse scan", pQInfo);
3690
  doScanAllDataBlocks(pRuntimeEnv);
3691 3692

  clearEnvAfterReverseScan(pRuntimeEnv, &qstatus);
3693 3694
}

H
hjxilinx 已提交
3695
void finalizeQueryResult(SQueryRuntimeEnv *pRuntimeEnv) {
3696
  SQuery *pQuery = pRuntimeEnv->pQuery;
3697

H
Haojun Liao 已提交
3698
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3699 3700
    // for each group result, call the finalize function for each column
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
3701
    if (pRuntimeEnv->groupbyNormalCol) {
3702 3703
      closeAllTimeWindow(pWindowResInfo);
    }
3704

3705
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
H
Haojun Liao 已提交
3706
      SResultRow *buf = pWindowResInfo->pResult[i];
3707 3708 3709
      if (!isWindowResClosed(pWindowResInfo, i)) {
        continue;
      }
3710

3711
      setResultOutputBuf(pRuntimeEnv, buf);
3712

3713
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
3714
        aAggs[pQuery->pExpr1[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3715
      }
3716

3717 3718 3719 3720
      /*
       * set the number of output results for group by normal columns, the number of output rows usually is 1 except
       * the top and bottom query
       */
S
TD-1057  
Shengliang Guan 已提交
3721
      buf->numOfRows = (uint16_t)getNumOfResult(pRuntimeEnv);
3722
    }
3723

3724
  } else {
3725
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
3726
      aAggs[pQuery->pExpr1[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3727 3728 3729 3730 3731
    }
  }
}

static bool hasMainOutput(SQuery *pQuery) {
3732
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
3733
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
3734

3735 3736 3737 3738
    if (functionId != TSDB_FUNC_TS && functionId != TSDB_FUNC_TAG && functionId != TSDB_FUNC_TAGPRJ) {
      return true;
    }
  }
3739

3740 3741 3742
  return false;
}

H
Haojun Liao 已提交
3743
static STableQueryInfo *createTableQueryInfo(SQueryRuntimeEnv *pRuntimeEnv, void* pTable, STimeWindow win, void* buf) {
H
Haojun Liao 已提交
3744
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
3745

H
Haojun Liao 已提交
3746
  STableQueryInfo *pTableQueryInfo = buf;
3747

H
hjxilinx 已提交
3748 3749
  pTableQueryInfo->win = win;
  pTableQueryInfo->lastKey = win.skey;
3750

3751
  pTableQueryInfo->pTable = pTable;
3752
  pTableQueryInfo->cur.vgroupIndex = -1;
3753

H
Haojun Liao 已提交
3754 3755
  // set more initial size of interval/groupby query
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyNormalCol) {
H
Haojun Liao 已提交
3756
    int32_t initialSize = 16;
H
Haojun Liao 已提交
3757
    int32_t initialThreshold = 100;
3758
    int32_t code = initWindowResInfo(&pTableQueryInfo->windowResInfo, initialSize, initialThreshold, TSDB_DATA_TYPE_INT);
B
Bomin Zhang 已提交
3759 3760 3761
    if (code != TSDB_CODE_SUCCESS) {
      return NULL;
    }
H
Haojun Liao 已提交
3762
  } else { // in other aggregate query, do not initialize the windowResInfo
H
Haojun Liao 已提交
3763 3764
  }

3765 3766 3767
  return pTableQueryInfo;
}

H
Haojun Liao 已提交
3768
void destroyTableQueryInfoImpl(STableQueryInfo *pTableQueryInfo) {
3769 3770 3771
  if (pTableQueryInfo == NULL) {
    return;
  }
3772

H
Haojun Liao 已提交
3773
  tVariantDestroy(&pTableQueryInfo->tag);
H
Haojun Liao 已提交
3774
  cleanupTimeWindowInfo(&pTableQueryInfo->windowResInfo);
3775 3776 3777 3778 3779
}

/**
 * set output buffer for different group
 * @param pRuntimeEnv
3780
 * @param pDataBlockInfo
3781
 */
H
Haojun Liao 已提交
3782
void setExecutionContext(SQInfo *pQInfo, int32_t groupIndex, TSKEY nextKey) {
3783
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
Haojun Liao 已提交
3784 3785 3786
  STableQueryInfo  *pTableQueryInfo = pRuntimeEnv->pQuery->current;
  SWindowResInfo   *pWindowResInfo = &pRuntimeEnv->windowResInfo;

H
Haojun Liao 已提交
3787 3788
  // lastKey needs to be updated
  pTableQueryInfo->lastKey = nextKey;
H
Haojun Liao 已提交
3789 3790 3791 3792

  if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
    setAdditionalInfo(pQInfo, pTableQueryInfo->pTable, pTableQueryInfo);
  }
H
Haojun Liao 已提交
3793

H
Haojun Liao 已提交
3794 3795 3796
  if (pRuntimeEnv->prevGroupId != INT32_MIN && pRuntimeEnv->prevGroupId == groupIndex) {
    return;
  }
3797

H
Haojun Liao 已提交
3798
  uint64_t uid = 0; // uid is always set to be 0
H
Haojun Liao 已提交
3799
  SResultRow *pResultRow = doPrepareResultRowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&groupIndex,
H
Haojun Liao 已提交
3800
      sizeof(groupIndex), true, uid);
H
Haojun Liao 已提交
3801
  if (pResultRow == NULL) {
3802 3803
    return;
  }
3804

3805 3806 3807 3808
  /*
   * not assign result buffer yet, add new result buffer
   * all group belong to one result set, and each group result has different group id so set the id to be one
   */
H
Haojun Liao 已提交
3809 3810
  if (pResultRow->pageId == -1) {
    if (addNewWindowResultBuf(pResultRow, pRuntimeEnv->pResultBuf, groupIndex, pRuntimeEnv->numOfRowsPerPage) !=
3811 3812 3813 3814
        TSDB_CODE_SUCCESS) {
      return;
    }
  }
3815

H
Haojun Liao 已提交
3816 3817
  // record the current active group id
  pRuntimeEnv->prevGroupId = groupIndex;
H
Haojun Liao 已提交
3818
  setResultOutputBuf(pRuntimeEnv, pResultRow);
3819 3820 3821
  initCtxOutputBuf(pRuntimeEnv);
}

3822
void setResultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SResultRow *pResult) {
3823
  SQuery *pQuery = pRuntimeEnv->pQuery;
3824

3825
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
3826
  tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pageId);
H
Haojun Liao 已提交
3827

3828
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3829
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
H
Haojun Liao 已提交
3830
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult, page);
3831

H
Haojun Liao 已提交
3832
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
3833 3834 3835
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3836

3837
    /*
3838
     * set the output buffer information and intermediate buffer,
3839 3840
     * not all queries require the interResultBuf, such as COUNT
     */
3841
    pCtx->resultInfo = getResultCell(pRuntimeEnv, pResult, i);
3842 3843 3844
  }
}

H
Haojun Liao 已提交
3845
void setWindowResOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SResultRow *pResult) {
H
Haojun Liao 已提交
3846
  SQuery *pQuery = pRuntimeEnv->pQuery;
3847

H
Haojun Liao 已提交
3848
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
3849
  tFilePage* bufPage = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pageId);
H
Haojun Liao 已提交
3850

H
Haojun Liao 已提交
3851 3852 3853
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];

3854
    pCtx->resultInfo = getResultCell(pRuntimeEnv, pResult, i);
H
Haojun Liao 已提交
3855
    if (pCtx->resultInfo->initialized && pCtx->resultInfo->complete) {
H
Haojun Liao 已提交
3856 3857
      continue;
    }
3858

H
Haojun Liao 已提交
3859
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult, bufPage);
H
Haojun Liao 已提交
3860
    pCtx->currentStage = 0;
3861

H
Haojun Liao 已提交
3862 3863 3864 3865
    int32_t functionId = pCtx->functionId;
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3866

H
Haojun Liao 已提交
3867 3868 3869 3870 3871 3872
    if (!pCtx->resultInfo->initialized) {
      aAggs[functionId].init(pCtx);
    }
  }
}

3873
int32_t setAdditionalInfo(SQInfo *pQInfo, void* pTable, STableQueryInfo *pTableQueryInfo) {
3874
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
3875

3876
  setTagVal(pRuntimeEnv, pTable, pQInfo->tsdb);
3877

3878 3879
  // both the master and supplement scan needs to set the correct ts comp start position
  if (pRuntimeEnv->pTSBuf != NULL) {
H
Haojun Liao 已提交
3880 3881
    tVariant* pTag = &pRuntimeEnv->pCtx[0].tag;

H
Haojun Liao 已提交
3882 3883
    if (pTableQueryInfo->cur.vgroupIndex == -1) {
      tVariantAssign(&pTableQueryInfo->tag, pTag);
3884

H
Haojun Liao 已提交
3885 3886
      STSElem elem = tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, pQInfo->vgId, &pTableQueryInfo->tag);

H
Haojun Liao 已提交
3887
      // failed to find data with the specified tag value and vnodeId
3888
      if (!tsBufIsValidElem(&elem)) {
H
Haojun Liao 已提交
3889 3890 3891
        if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
          qError("QInfo:%p failed to find tag:%s in ts_comp", pQInfo, pTag->pz);
        } else {
H
Haojun Liao 已提交
3892
          qError("QInfo:%p failed to find tag:%" PRId64 " in ts_comp", pQInfo, pTag->i64Key);
H
Haojun Liao 已提交
3893 3894 3895 3896 3897
        }

        return false;
      }

H
Haojun Liao 已提交
3898
      // keep the cursor info of current meter
H
Haojun Liao 已提交
3899 3900
      pTableQueryInfo->cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);
      if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
H
Haojun Liao 已提交
3901
        qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex);
H
Haojun Liao 已提交
3902
      } else {
H
Haojun Liao 已提交
3903
        qDebug("QInfo:%p find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex);
H
Haojun Liao 已提交
3904 3905
      }

H
Haojun Liao 已提交
3906 3907
    } else {
      tsBufSetCursor(pRuntimeEnv->pTSBuf, &pTableQueryInfo->cur);
H
Haojun Liao 已提交
3908 3909

      if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
H
Haojun Liao 已提交
3910
        qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex);
H
Haojun Liao 已提交
3911
      } else {
H
Haojun Liao 已提交
3912
        qDebug("QInfo:%p find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex);
H
Haojun Liao 已提交
3913
      }
3914 3915
    }
  }
3916

3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928
  return 0;
}

/*
 * There are two cases to handle:
 *
 * 1. Query range is not set yet (queryRangeSet = 0). we need to set the query range info, including pQuery->lastKey,
 *    pQuery->window.skey, and pQuery->eKey.
 * 2. Query range is set and query is in progress. There may be another result with the same query ranges to be
 *    merged during merge stage. In this case, we need the pTableQueryInfo->lastResRows to decide if there
 *    is a previous result generated or not.
 */
H
hjxilinx 已提交
3929
void setIntervalQueryRange(SQInfo *pQInfo, TSKEY key) {
3930 3931
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3932
  STableQueryInfo *pTableQueryInfo = pQuery->current;
3933

3934 3935 3936
  if (pTableQueryInfo->queryRangeSet) {
    pTableQueryInfo->lastKey = key;
  } else {
3937
    pTableQueryInfo->win.skey = key;
3938
    STimeWindow win = {.skey = key, .ekey = pQuery->window.ekey};
3939

3940 3941 3942 3943 3944
    // for too small query range, no data in this interval.
    if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey < pQuery->window.skey)) ||
        (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey < pQuery->window.ekey))) {
      return;
    }
3945

3946 3947 3948 3949 3950 3951
    /**
     * In handling the both ascending and descending order super table query, we need to find the first qualified
     * timestamp of this table, and then set the first qualified start timestamp.
     * In ascending query, key is the first qualified timestamp. However, in the descending order query, additional
     * operations involve.
     */
H
Haojun Liao 已提交
3952
    STimeWindow     w = TSWINDOW_INITIALIZER;
3953
    SWindowResInfo *pWindowResInfo = &pTableQueryInfo->windowResInfo;
3954

H
Haojun Liao 已提交
3955 3956
    TSKEY sk = MIN(win.skey, win.ekey);
    TSKEY ek = MAX(win.skey, win.ekey);
H
Haojun Liao 已提交
3957
    getAlignQueryTimeWindow(pQuery, win.skey, sk, ek, &w);
3958
    pWindowResInfo->startTime = pTableQueryInfo->win.skey;  // windowSKey may be 0 in case of 1970 timestamp
3959

3960 3961
    if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
3962
        assert(win.ekey == pQuery->window.ekey);
3963
      }
3964

3965
      pWindowResInfo->prevSKey = w.skey;
3966
    }
3967

3968
    pTableQueryInfo->queryRangeSet = 1;
3969
    pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
3970 3971 3972 3973
  }
}

bool requireTimestamp(SQuery *pQuery) {
3974
  for (int32_t i = 0; i < pQuery->numOfOutput; i++) {
H
Haojun Liao 已提交
3975
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988
    if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_NEED_TS) != 0) {
      return true;
    }
  }
  return false;
}

bool needPrimaryTimestampCol(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo) {
  /*
   * 1. if skey or ekey locates in this block, we need to load the timestamp column to decide the precise position
   * 2. if there are top/bottom, first_dst/last_dst functions, we need to load timestamp column in any cases;
   */
  STimeWindow *w = &pDataBlockInfo->window;
H
hjxilinx 已提交
3989
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3990

H
hjxilinx 已提交
3991
  bool loadPrimaryTS = (pTableQueryInfo->lastKey >= w->skey && pTableQueryInfo->lastKey <= w->ekey) ||
3992 3993
                       (pQuery->window.ekey >= w->skey && pQuery->window.ekey <= w->ekey) || requireTimestamp(pQuery);

3994 3995 3996
  return loadPrimaryTS;
}

3997
static int32_t doCopyToSData(SQInfo *pQInfo, SWindowResInfo *pResultInfo, int32_t orderType) {
3998 3999
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4000

4001 4002 4003
  int32_t numOfResult = 0;
  int32_t startIdx = 0;
  int32_t step = -1;
4004

4005
  qDebug("QInfo:%p start to copy data from windowResInfo to query buf", pQInfo);
4006
  int32_t totalSet = numOfClosedTimeWindow(pResultInfo);
H
Haojun Liao 已提交
4007
  SResultRow** result = pResultInfo->pResult;
4008

4009
  if (orderType == TSDB_ORDER_ASC) {
4010
    startIdx = pQInfo->groupIndex;
4011 4012
    step = 1;
  } else {  // desc order copy all data
4013
    startIdx = totalSet - pQInfo->groupIndex - 1;
4014 4015
    step = -1;
  }
4016

H
Haojun Liao 已提交
4017 4018
  SGroupResInfo* pGroupResInfo = &pQInfo->groupResInfo;

4019
  for (int32_t i = startIdx; (i < totalSet) && (i >= 0); i += step) {
4020
    if (result[i]->numOfRows == 0) {
4021
      pQInfo->groupIndex += 1;
4022
      pGroupResInfo->rowId = 0;
4023 4024
      continue;
    }
4025

4026
    int32_t numOfRowsToCopy = result[i]->numOfRows - pGroupResInfo->rowId;
4027
    int32_t oldOffset = pGroupResInfo->rowId;
4028

4029
    /*
H
Haojun Liao 已提交
4030 4031
     * current output space is not enough to accommodate all data of this page, only partial results
     * will be copied to SQuery object's result buffer
4032
     */
4033
    if (numOfRowsToCopy > pQuery->rec.capacity - numOfResult) {
H
Haojun Liao 已提交
4034
      numOfRowsToCopy = (int32_t) pQuery->rec.capacity - numOfResult;
4035
      pGroupResInfo->rowId += numOfRowsToCopy;
4036
    } else {
4037
      pGroupResInfo->rowId = 0;
4038
      pQInfo->groupIndex += 1;
4039
    }
4040

4041
    tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, result[i]->pageId);
H
Haojun Liao 已提交
4042

4043
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
4044
      int32_t size = pRuntimeEnv->pCtx[j].outputBytes;
4045

4046
      char *out = pQuery->sdata[j]->data + numOfResult * size;
4047
      char *in = getPosInResultPage(pRuntimeEnv, j, result[i], page);
4048 4049
      memcpy(out, in + oldOffset * size, size * numOfRowsToCopy);
    }
4050

4051
    numOfResult += numOfRowsToCopy;
4052 4053 4054
    if (numOfResult == pQuery->rec.capacity) {
      break;
    }
4055
  }
4056

4057
  qDebug("QInfo:%p copy data to query buf completed", pQInfo);
4058 4059

#ifdef _DEBUG_VIEW
4060
  displayInterResult(pQuery->sdata, pRuntimeEnv, numOfResult);
4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073
#endif
  return numOfResult;
}

/**
 * copyFromWindowResToSData support copy data in ascending/descending order
 * For interval query of both super table and table, copy the data in ascending order, since the output results are
 * ordered in SWindowResutl already. While handling the group by query for both table and super table,
 * all group result are completed already.
 *
 * @param pQInfo
 * @param result
 */
4074
void copyFromWindowResToSData(SQInfo *pQInfo, SWindowResInfo *pResultInfo) {
4075
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
4076

4077
  int32_t orderType = (pQuery->pGroupbyExpr != NULL) ? pQuery->pGroupbyExpr->orderType : TSDB_ORDER_ASC;
4078
  int32_t numOfResult = doCopyToSData(pQInfo, pResultInfo, orderType);
4079

4080
  pQuery->rec.rows += numOfResult;
4081

4082
  assert(pQuery->rec.rows <= pQuery->rec.capacity);
4083 4084
}

H
Haojun Liao 已提交
4085
static void updateWindowResNumOfRes(SQueryRuntimeEnv *pRuntimeEnv) {
4086
  SQuery *pQuery = pRuntimeEnv->pQuery;
4087

4088
  // update the number of result for each, only update the number of rows for the corresponding window result.
H
Haojun Liao 已提交
4089 4090 4091
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
    return;
  }
4092

H
Haojun Liao 已提交
4093
  for (int32_t i = 0; i < pRuntimeEnv->windowResInfo.size; ++i) {
H
Haojun Liao 已提交
4094
    SResultRow *pResult = pRuntimeEnv->windowResInfo.pResult[i];
4095

H
Haojun Liao 已提交
4096 4097 4098 4099
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
      int32_t functionId = pRuntimeEnv->pCtx[j].functionId;
      if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
        continue;
4100
      }
H
Haojun Liao 已提交
4101

4102 4103
      SResultRowCellInfo* pCell = getResultCell(pRuntimeEnv, pResult, j);
      pResult->numOfRows = (uint16_t)(MAX(pResult->numOfRows, pCell->numOfRes));
4104 4105 4106 4107
    }
  }
}

H
Haojun Liao 已提交
4108
static void stableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo, SDataStatis *pStatis,
4109
    SArray *pDataBlock, __block_search_fn_t searchFn) {
4110
  SQuery *         pQuery = pRuntimeEnv->pQuery;
4111
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4112

4113
  SWindowResInfo * pWindowResInfo = &pTableQueryInfo->windowResInfo;
H
hjxilinx 已提交
4114
  pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : pDataBlockInfo->rows - 1;
4115

H
Haojun Liao 已提交
4116
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->groupbyNormalCol) {
4117
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
4118
  } else {
4119
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
4120 4121 4122
  }
}

H
Haojun Liao 已提交
4123
bool queryHasRemainResForTableQuery(SQueryRuntimeEnv* pRuntimeEnv) {
4124 4125
  SQuery *pQuery = pRuntimeEnv->pQuery;
  SFillInfo *pFillInfo = pRuntimeEnv->pFillInfo;
4126

H
Haojun Liao 已提交
4127
  if (pQuery->limit.limit > 0 && pQuery->rec.total >= pQuery->limit.limit) {
4128 4129
    return false;
  }
4130

4131
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
H
Haojun Liao 已提交
4132
    // There are results not returned to client yet, so filling applied to the remain result is required firstly.
H
Haojun Liao 已提交
4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147
    int32_t remain = taosNumOfRemainRows(pFillInfo);
    if (remain > 0) {
      return true;
    }

    /*
     * While the code reaches here, there are no results remains now.
     * If query is not completed yet, the gaps between two results blocks need to be handled after next data block
     * is retrieved from TSDB.
     *
     * NOTE: If the result set is not the first block, the gap in front of the result set will be filled. If the result
     * set is the FIRST result block, the gap between the start time of query time window and the timestamp of the
     * first result row in the actual result set will fill nothing.
     */
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
H
Haojun Liao 已提交
4148
      int32_t numOfTotal = (int32_t)getNumOfResWithFill(pFillInfo, pQuery->window.ekey, (int32_t)pQuery->rec.capacity);
H
Haojun Liao 已提交
4149 4150 4151 4152 4153 4154
      return numOfTotal > 0;
    }

  } else {
    // there are results waiting for returned to client.
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED) &&
H
Haojun Liao 已提交
4155
        (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) &&
H
Haojun Liao 已提交
4156 4157 4158
        (pRuntimeEnv->windowResInfo.size > 0)) {
      return true;
    }
4159
  }
4160 4161

  return false;
4162 4163
}

H
Haojun Liao 已提交
4164 4165 4166 4167
static int16_t getNumOfFinalResCol(SQuery* pQuery) {
  return pQuery->pExpr2 == NULL? pQuery->numOfOutput:pQuery->numOfExpr2;
}

4168
static void doCopyQueryResultToMsg(SQInfo *pQInfo, int32_t numOfRows, char *data) {
4169
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
4170

H
Haojun Liao 已提交
4171 4172
  if (pQuery->pExpr2 == NULL) {
    for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
H
Haojun Liao 已提交
4173
      int32_t bytes = pQuery->pExpr1[col].bytes;
4174

H
Haojun Liao 已提交
4175 4176 4177 4178 4179 4180 4181 4182 4183 4184
      memmove(data, pQuery->sdata[col]->data, bytes * numOfRows);
      data += bytes * numOfRows;
    }
  } else {
    for (int32_t col = 0; col < pQuery->numOfExpr2; ++col) {
      int32_t bytes = pQuery->pExpr2[col].bytes;

      memmove(data, pQuery->sdata[col]->data, bytes * numOfRows);
      data += bytes * numOfRows;
    }
4185
  }
4186

weixin_48148422's avatar
weixin_48148422 已提交
4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198
  int32_t numOfTables = (int32_t)taosArrayGetSize(pQInfo->arrTableIdInfo);
  *(int32_t*)data = htonl(numOfTables);
  data += sizeof(int32_t);
  for(int32_t i = 0; i < numOfTables; i++) {
    STableIdInfo* pSrc = taosArrayGet(pQInfo->arrTableIdInfo, i);
    STableIdInfo* pDst = (STableIdInfo*)data;
    pDst->uid = htobe64(pSrc->uid);
    pDst->tid = htonl(pSrc->tid);
    pDst->key = htobe64(pSrc->key);
    data += sizeof(STableIdInfo);
  }

H
Haojun Liao 已提交
4199
  // Check if query is completed or not for stable query or normal table query respectively.
H
hjxilinx 已提交
4200
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4201
    if (pQInfo->runtimeEnv.stableQuery) {
H
Haojun Liao 已提交
4202
      if (IS_STASBLE_QUERY_OVER(pQInfo)) {
4203 4204 4205
        setQueryStatus(pQuery, QUERY_OVER);
      }
    } else {
H
Haojun Liao 已提交
4206
      if (!queryHasRemainResForTableQuery(&pQInfo->runtimeEnv)) {
4207 4208
        setQueryStatus(pQuery, QUERY_OVER);
      }
4209
    }
H
hjxilinx 已提交
4210
  }
4211 4212
}

H
Haojun Liao 已提交
4213
int32_t doFillGapsInResults(SQueryRuntimeEnv* pRuntimeEnv, tFilePage **pDst, int32_t *numOfFilled) {
4214
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
4215
  SQuery *pQuery = pRuntimeEnv->pQuery;
4216
  SFillInfo* pFillInfo = pRuntimeEnv->pFillInfo;
4217

4218
  while (1) {
H
Haojun Liao 已提交
4219
    int32_t ret = (int32_t)taosFillResultDataBlock(pFillInfo, (tFilePage**)pQuery->sdata, (int32_t)pQuery->rec.capacity);
4220

4221
    // todo apply limit output function
4222 4223
    /* reached the start position of according to offset value, return immediately */
    if (pQuery->limit.offset == 0) {
4224
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows", pQInfo, pFillInfo->numOfRows, ret);
4225 4226
      return ret;
    }
4227

4228
    if (pQuery->limit.offset < ret) {
4229
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%" PRId64 ". Discard due to offset, remain:%" PRId64 ", new offset:%d",
4230
             pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, ret - pQuery->limit.offset, 0);
4231

S
TD-1057  
Shengliang Guan 已提交
4232
      ret -= (int32_t)pQuery->limit.offset;
4233
      // todo !!!!there exactly number of interpo is not valid.
4234
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
4235 4236
        memmove(pDst[i]->data, pDst[i]->data + pQuery->pExpr1[i].bytes * pQuery->limit.offset,
                ret * pQuery->pExpr1[i].bytes);
4237
      }
4238

4239 4240 4241
      pQuery->limit.offset = 0;
      return ret;
    } else {
4242
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%" PRId64 ". Discard due to offset, "
B
Bomin Zhang 已提交
4243
             "remain:%d, new offset:%" PRId64, pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, 0,
4244
          pQuery->limit.offset - ret);
4245

4246
      pQuery->limit.offset -= ret;
4247
      pQuery->rec.rows = 0;
4248 4249
      ret = 0;
    }
4250

H
Haojun Liao 已提交
4251
    if (!queryHasRemainResForTableQuery(pRuntimeEnv)) {
4252 4253 4254 4255 4256
      return ret;
    }
  }
}

4257
static void queryCostStatis(SQInfo *pQInfo) {
4258
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4259
  SQueryCostInfo *pSummary = &pRuntimeEnv->summary;
H
Haojun Liao 已提交
4260

H
Haojun Liao 已提交
4261
  uint64_t hashSize = taosHashGetMemSize(pQInfo->runtimeEnv.pResultRowHashTable);
H
Haojun Liao 已提交
4262 4263 4264
  hashSize += taosHashGetMemSize(pQInfo->tableqinfoGroupInfo.map);
  pSummary->hashSize = hashSize;

H
Haojun Liao 已提交
4265 4266 4267
  // add the merge time
  pSummary->elapsedTime += pSummary->firstStageMergeTime;

H
Haojun Liao 已提交
4268 4269 4270
  SResultRowPool* p = pQInfo->runtimeEnv.pool;
  pSummary->winInfoSize = getResultRowPoolMemSize(p);
  pSummary->numOfTimeWindows = getNumOfAllocatedResultRows(p);
4271

H
Haojun Liao 已提交
4272 4273 4274
  qDebug("QInfo:%p :cost summary: elapsed time:%"PRId64" us, first merge:%"PRId64" us, total blocks:%d, "
         "load block statis:%d, load data block:%d, total rows:%"PRId64 ", check rows:%"PRId64,
         pQInfo, pSummary->elapsedTime, pSummary->firstStageMergeTime, pSummary->totalBlocks, pSummary->loadBlockStatis,
H
Haojun Liao 已提交
4275
         pSummary->loadBlocks, pSummary->totalRows, pSummary->totalCheckedRows);
4276

4277
  qDebug("QInfo:%p :cost summary: winResPool size:%.2f Kb, numOfWin:%"PRId64", tableInfoSize:%.2f Kb, hashTable:%.2f Kb", pQInfo, pSummary->winInfoSize/1024.0,
H
Haojun Liao 已提交
4278
      pSummary->numOfTimeWindows, pSummary->tableInfoSize/1024.0, pSummary->hashSize/1024.0);
4279 4280
}

4281 4282
static void updateOffsetVal(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pBlockInfo) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
4283
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4284

4285
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
4286

4287
  if (pQuery->limit.offset == pBlockInfo->rows) {  // current block will ignore completed
H
hjxilinx 已提交
4288
    pTableQueryInfo->lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->window.ekey + step : pBlockInfo->window.skey + step;
4289 4290 4291
    pQuery->limit.offset = 0;
    return;
  }
4292

4293
  if (QUERY_IS_ASC_QUERY(pQuery)) {
S
TD-1057  
Shengliang Guan 已提交
4294
    pQuery->pos = (int32_t)pQuery->limit.offset;
4295
  } else {
S
TD-1057  
Shengliang Guan 已提交
4296
    pQuery->pos = pBlockInfo->rows - (int32_t)pQuery->limit.offset - 1;
4297
  }
4298

4299
  assert(pQuery->pos >= 0 && pQuery->pos <= pBlockInfo->rows - 1);
4300

4301
  SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
4302
  SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);
4303

4304
  // update the pQuery->limit.offset value, and pQuery->pos value
H
Haojun Liao 已提交
4305
  TSKEY *keys = (TSKEY *) pColInfoData->pData;
4306 4307

  // update the offset value
H
hjxilinx 已提交
4308
  pTableQueryInfo->lastKey = keys[pQuery->pos];
4309
  pQuery->limit.offset = 0;
4310

H
hjxilinx 已提交
4311
  int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, pBlockInfo, NULL, binarySearchForKey, pDataBlock);
4312

4313
  qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
4314
         pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows, numOfRes, pQuery->current->lastKey);
4315
}
4316

4317 4318 4319 4320 4321
void skipBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;

  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0) {
    return;
4322
  }
4323

4324 4325 4326
  pQuery->pos = 0;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

H
hjxilinx 已提交
4327
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4328
  TsdbQueryHandleT pQueryHandle = pRuntimeEnv->pQueryHandle;
4329

H
Haojun Liao 已提交
4330
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4331
  while (tsdbNextDataBlock(pQueryHandle)) {
H
Haojun Liao 已提交
4332
    if (IS_QUERY_KILLED(GET_QINFO_ADDR(pRuntimeEnv))) {
H
Haojun Liao 已提交
4333
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4334
    }
4335

H
Haojun Liao 已提交
4336
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
4337

4338 4339
    if (pQuery->limit.offset > blockInfo.rows) {
      pQuery->limit.offset -= blockInfo.rows;
H
hjxilinx 已提交
4340 4341
      pTableQueryInfo->lastKey = (QUERY_IS_ASC_QUERY(pQuery)) ? blockInfo.window.ekey : blockInfo.window.skey;
      pTableQueryInfo->lastKey += step;
4342

4343
      qDebug("QInfo:%p skip rows:%d, offset:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv), blockInfo.rows,
4344 4345
             pQuery->limit.offset);
    } else {  // find the appropriated start position in current block
4346 4347 4348
      updateOffsetVal(pRuntimeEnv, &blockInfo);
      break;
    }
4349
  }
H
Haojun Liao 已提交
4350 4351 4352 4353

  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }
4354
}
4355

H
Haojun Liao 已提交
4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405
static TSKEY doSkipIntervalProcess(SQueryRuntimeEnv* pRuntimeEnv, STimeWindow* win, SDataBlockInfo* pBlockInfo, STableQueryInfo* pTableQueryInfo) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;

  assert(pQuery->limit.offset == 0);
  STimeWindow tw = *win;
  getNextTimeWindow(pQuery, &tw);

  if ((tw.skey <= pBlockInfo->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (tw.ekey >= pBlockInfo->window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {

    // load the data block and check data remaining in current data block
    // TODO optimize performance
    SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
    SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

    tw = *win;
    int32_t startPos =
        getNextQualifiedWindow(pRuntimeEnv, &tw, pBlockInfo, pColInfoData->pData, binarySearchForKey, -1);
    assert(startPos >= 0);

    // set the abort info
    pQuery->pos = startPos;

    // reset the query start timestamp
    pTableQueryInfo->win.skey = ((TSKEY *)pColInfoData->pData)[startPos];
    pQuery->window.skey = pTableQueryInfo->win.skey;
    TSKEY key = pTableQueryInfo->win.skey;

    pWindowResInfo->prevSKey = tw.skey;
    int32_t index = pRuntimeEnv->windowResInfo.curIndex;

    int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, pBlockInfo, NULL, binarySearchForKey, pDataBlock);
    pRuntimeEnv->windowResInfo.curIndex = index;  // restore the window index

    qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%" PRId64,
           GET_QINFO_ADDR(pRuntimeEnv), pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows, numOfRes,
           pQuery->current->lastKey);

    return key;
  } else {  // do nothing
    pQuery->window.skey = tw.skey;
    pWindowResInfo->prevSKey = tw.skey;

    return tw.skey;
  }

  return true;
}

H
Haojun Liao 已提交
4406
static bool skipTimeInterval(SQueryRuntimeEnv *pRuntimeEnv, TSKEY* start) {
4407
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
4408
  *start = pQuery->current->lastKey;
4409

4410
  // if queried with value filter, do NOT forward query start position
4411
  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->pFillInfo != NULL) {
4412
    return true;
4413
  }
4414

4415
  /*
4416 4417
   * 1. for interval without interpolation query we forward pQuery->interval.interval at a time for
   *    pQuery->limit.offset times. Since hole exists, pQuery->interval.interval*pQuery->limit.offset value is
4418 4419
   *    not valid. otherwise, we only forward pQuery->limit.offset number of points
   */
4420
  assert(pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL);
4421

H
Haojun Liao 已提交
4422
  STimeWindow w = TSWINDOW_INITIALIZER;
4423

4424
  SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
hjxilinx 已提交
4425
  STableQueryInfo *pTableQueryInfo = pQuery->current;
4426

H
Haojun Liao 已提交
4427
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4428
  while (tsdbNextDataBlock(pRuntimeEnv->pQueryHandle)) {
H
Haojun Liao 已提交
4429
    tsdbRetrieveDataBlockInfo(pRuntimeEnv->pQueryHandle, &blockInfo);
4430

H
Haojun Liao 已提交
4431 4432
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
H
Haojun Liao 已提交
4433
        getAlignQueryTimeWindow(pQuery, blockInfo.window.skey, blockInfo.window.skey, pQuery->window.ekey, &w);
H
Haojun Liao 已提交
4434 4435 4436
        pWindowResInfo->startTime = w.skey;
        pWindowResInfo->prevSKey = w.skey;
      }
4437
    } else {
H
Haojun Liao 已提交
4438
      getAlignQueryTimeWindow(pQuery, blockInfo.window.ekey, pQuery->window.ekey, blockInfo.window.ekey, &w);
4439

4440 4441 4442
      pWindowResInfo->startTime = pQuery->window.skey;
      pWindowResInfo->prevSKey = w.skey;
    }
4443

4444 4445
    // the first time window
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, pWindowResInfo->prevSKey, pQuery);
4446

4447
    while (pQuery->limit.offset > 0) {
H
Haojun Liao 已提交
4448 4449
      STimeWindow tw = win;

4450 4451 4452 4453
      if ((win.ekey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (win.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        pQuery->limit.offset -= 1;
        pWindowResInfo->prevSKey = win.skey;
H
Haojun Liao 已提交
4454
      }
4455

H
Haojun Liao 已提交
4456 4457
      // current window does not ended in current data block, try next data block
      getNextTimeWindow(pQuery, &tw);
4458
      if (pQuery->limit.offset == 0) {
H
Haojun Liao 已提交
4459 4460
        *start = doSkipIntervalProcess(pRuntimeEnv, &win, &blockInfo, pTableQueryInfo);
        return true;
4461 4462
      }

H
Haojun Liao 已提交
4463 4464 4465 4466 4467 4468 4469
      /*
       * If the next time window still starts from current data block,
       * load the primary timestamp column first, and then find the start position for the next queried time window.
       * Note that only the primary timestamp column is required.
       * TODO: Optimize for this cases. All data blocks are not needed to be loaded, only if the first actually required
       * time window resides in current data block.
       */
4470 4471 4472 4473 4474
      if ((tw.skey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (tw.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
        SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

H
Haojun Liao 已提交
4475 4476 4477 4478 4479 4480
        if ((win.ekey > blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
            (win.ekey < blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
          pQuery->limit.offset -= 1;
        }

        if (pQuery->limit.offset == 0) {
H
Haojun Liao 已提交
4481 4482
          *start = doSkipIntervalProcess(pRuntimeEnv, &win, &blockInfo, pTableQueryInfo);
          return true;
H
Haojun Liao 已提交
4483 4484 4485 4486 4487
        } else {
          tw = win;
          int32_t startPos =
              getNextQualifiedWindow(pRuntimeEnv, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey, -1);
          assert(startPos >= 0);
4488

H
Haojun Liao 已提交
4489 4490 4491 4492 4493 4494
          // set the abort info
          pQuery->pos = startPos;
          pTableQueryInfo->lastKey = ((TSKEY *)pColInfoData->pData)[startPos];
          pWindowResInfo->prevSKey = tw.skey;
          win = tw;
        }
4495
      } else {
H
Haojun Liao 已提交
4496
        break;  // offset is not 0, and next time window begins or ends in the next block.
4497 4498 4499
      }
    }
  }
4500

H
Haojun Liao 已提交
4501 4502 4503 4504 4505
  // check for error
  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }

4506 4507 4508
  return true;
}

H
Haojun Liao 已提交
4509 4510
static void doDestroyTableQueryInfo(STableGroupInfo* pTableqinfoGroupInfo);

B
Bomin Zhang 已提交
4511
static int32_t setupQueryHandle(void* tsdb, SQInfo* pQInfo, bool isSTableQuery) {
B
Bomin Zhang 已提交
4512
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4513 4514
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

B
Bomin Zhang 已提交
4515
  if (onlyQueryTags(pQuery)) {
B
Bomin Zhang 已提交
4516
    return TSDB_CODE_SUCCESS;
B
Bomin Zhang 已提交
4517 4518
  }

H
Haojun Liao 已提交
4519
  if (isSTableQuery && (!QUERY_IS_INTERVAL_QUERY(pQuery)) && (!isFixedOutputQuery(pRuntimeEnv))) {
B
Bomin Zhang 已提交
4520
    return TSDB_CODE_SUCCESS;
B
Bomin Zhang 已提交
4521
  }
4522 4523

  STsdbQueryCond cond = {
B
Bomin Zhang 已提交
4524 4525 4526
    .order   = pQuery->order.order,
    .colList = pQuery->colList,
    .numOfCols = pQuery->numOfCols,
4527
  };
weixin_48148422's avatar
weixin_48148422 已提交
4528

S
TD-1057  
Shengliang Guan 已提交
4529 4530
  TIME_WINDOW_COPY(cond.twindow, pQuery->window);

B
Bomin Zhang 已提交
4531
  if (!isSTableQuery
4532
    && (pQInfo->tableqinfoGroupInfo.numOfTables == 1)
4533
    && (cond.order == TSDB_ORDER_ASC)
H
Haojun Liao 已提交
4534
    && (!QUERY_IS_INTERVAL_QUERY(pQuery))
B
Bomin Zhang 已提交
4535
    && (!isGroupbyNormalCol(pQuery->pGroupbyExpr))
H
Haojun Liao 已提交
4536
    && (!isFixedOutputQuery(pRuntimeEnv))
B
Bomin Zhang 已提交
4537
  ) {
H
Haojun Liao 已提交
4538
    SArray* pa = GET_TABLEGROUP(pQInfo, 0);
4539 4540
    STableQueryInfo* pCheckInfo = taosArrayGetP(pa, 0);
    cond.twindow = pCheckInfo->win;
4541
  }
B
Bomin Zhang 已提交
4542

B
Bomin Zhang 已提交
4543
  terrno = TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
4544
  if (isFirstLastRowQuery(pQuery)) {
H
Haojun Liao 已提交
4545
    pRuntimeEnv->pQueryHandle = tsdbQueryLastRow(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef);
H
Haojun Liao 已提交
4546

H
Haojun Liao 已提交
4547 4548 4549
    // update the query time window
    pQuery->window = cond.twindow;

H
Haojun Liao 已提交
4550
    if (pQInfo->tableGroupInfo.numOfTables == 0) {
H
Haojun Liao 已提交
4551
      pQInfo->tableqinfoGroupInfo.numOfTables = 0;
H
Haojun Liao 已提交
4552 4553 4554 4555
    } else {
      size_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
      for(int32_t i = 0; i < numOfGroups; ++i) {
        SArray *group = GET_TABLEGROUP(pQInfo, i);
H
Haojun Liao 已提交
4556

H
Haojun Liao 已提交
4557 4558 4559
        size_t t = taosArrayGetSize(group);
        for (int32_t j = 0; j < t; ++j) {
          STableQueryInfo *pCheckInfo = taosArrayGetP(group, j);
H
Haojun Liao 已提交
4560

H
Haojun Liao 已提交
4561 4562 4563
          pCheckInfo->win = pQuery->window;
          pCheckInfo->lastKey = pCheckInfo->win.skey;
        }
H
Haojun Liao 已提交
4564 4565
      }
    }
4566
  } else if (isPointInterpoQuery(pQuery)) {
H
Haojun Liao 已提交
4567
    pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef);
H
Haojun Liao 已提交
4568
  } else {
H
Haojun Liao 已提交
4569
    pRuntimeEnv->pQueryHandle = tsdbQueryTables(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef);
H
Haojun Liao 已提交
4570
  }
4571

B
Bomin Zhang 已提交
4572
  return terrno;
B
Bomin Zhang 已提交
4573 4574
}

H
Haojun Liao 已提交
4575
static SFillColInfo* createFillColInfo(SQuery* pQuery) {
H
Haojun Liao 已提交
4576
  int32_t numOfCols = getNumOfFinalResCol(pQuery);
4577
  int32_t offset = 0;
4578

4579
  SFillColInfo* pFillCol = calloc(numOfCols, sizeof(SFillColInfo));
H
Haojun Liao 已提交
4580 4581 4582 4583
  if (pFillCol == NULL) {
    return NULL;
  }

H
Haojun Liao 已提交
4584
  // TODO refactor
4585
  for(int32_t i = 0; i < numOfCols; ++i) {
H
Haojun Liao 已提交
4586
    SExprInfo* pExprInfo = (pQuery->pExpr2 == NULL)? &pQuery->pExpr1[i]:&pQuery->pExpr2[i];
4587

4588
    pFillCol[i].col.bytes  = pExprInfo->bytes;
S
TD-1057  
Shengliang Guan 已提交
4589
    pFillCol[i].col.type   = (int8_t)pExprInfo->type;
4590
    pFillCol[i].col.offset = offset;
H
Haojun Liao 已提交
4591
    pFillCol[i].tagIndex   = -2;
4592 4593
    pFillCol[i].flag       = TSDB_COL_NORMAL;    // always be ta normal column for table query
    pFillCol[i].functionId = pExprInfo->base.functionId;
4594
    pFillCol[i].fillVal.i = pQuery->fillVal[i];
4595

4596 4597
    offset += pExprInfo->bytes;
  }
4598

4599 4600 4601
  return pFillCol;
}

4602
int32_t doInitQInfo(SQInfo *pQInfo, STSBuf *pTsBuf, void *tsdb, int32_t vgId, bool isSTableQuery) {
4603 4604 4605
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;

  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
4606 4607 4608

  pRuntimeEnv->topBotQuery = isTopBottomQuery(pQuery);
  pRuntimeEnv->hasTagResults = hasTagValOutput(pQuery);
4609 4610

  setScanLimitationByResultBuffer(pQuery);
H
Haojun Liao 已提交
4611

H
Haojun Liao 已提交
4612
  int32_t code = setupQueryHandle(tsdb, pQInfo, isSTableQuery);
B
Bomin Zhang 已提交
4613 4614 4615
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }
4616

4617
  pQInfo->tsdb = tsdb;
4618
  pQInfo->vgId = vgId;
4619 4620

  pRuntimeEnv->pQuery = pQuery;
H
Haojun Liao 已提交
4621
  pRuntimeEnv->pTSBuf = pTsBuf;
4622
  pRuntimeEnv->cur.vgroupIndex = -1;
4623
  pRuntimeEnv->stableQuery = isSTableQuery;
H
Haojun Liao 已提交
4624
  pRuntimeEnv->prevGroupId = INT32_MIN;
H
Haojun Liao 已提交
4625
  pRuntimeEnv->groupbyNormalCol = isGroupbyNormalCol(pQuery->pGroupbyExpr);
4626

H
Haojun Liao 已提交
4627
  if (pTsBuf != NULL) {
4628 4629 4630 4631
    int16_t order = (pQuery->order.order == pRuntimeEnv->pTSBuf->tsOrder) ? TSDB_ORDER_ASC : TSDB_ORDER_DESC;
    tsBufSetTraverseOrder(pRuntimeEnv->pTSBuf, order);
  }

4632 4633 4634
  int32_t ps = DEFAULT_PAGE_SIZE;
  int32_t rowsize = 0;
  getIntermediateBufInfo(pRuntimeEnv, &ps, &rowsize);
H
Haojun Liao 已提交
4635
  int32_t TWOMB = 1024*1024*2;
4636

H
Haojun Liao 已提交
4637
  if (isSTableQuery && !onlyQueryTags(pRuntimeEnv->pQuery)) {
H
Haojun Liao 已提交
4638
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rowsize, ps, TWOMB, pQInfo);
4639 4640 4641 4642
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

H
Haojun Liao 已提交
4643
    if (!QUERY_IS_INTERVAL_QUERY(pQuery)) {
4644
      int16_t type = TSDB_DATA_TYPE_NULL;
4645
      int32_t threshold = 0;
4646

H
Haojun Liao 已提交
4647
      if (pRuntimeEnv->groupbyNormalCol) {  // group by columns not tags;
4648
        type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
4649
        threshold = 4000;
4650 4651
      } else {
        type = TSDB_DATA_TYPE_INT;  // group id
S
TD-1057  
Shengliang Guan 已提交
4652
        threshold = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
4653 4654 4655
        if (threshold < 8) {
          threshold = 8;
        }
4656 4657
      }

4658
      code = initWindowResInfo(&pRuntimeEnv->windowResInfo, 8, threshold, type);
B
Bomin Zhang 已提交
4659 4660 4661
      if (code != TSDB_CODE_SUCCESS) {
        return code;
      }
4662
    }
H
Haojun Liao 已提交
4663
  } else if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery) || (!isSTableQuery)) {
4664 4665
    int32_t numOfResultRows = getInitialPageNum(pQInfo);
    getIntermediateBufInfo(pRuntimeEnv, &ps, &rowsize);
H
Haojun Liao 已提交
4666
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rowsize, ps, TWOMB, pQInfo);
4667 4668 4669 4670 4671
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

    int16_t type = TSDB_DATA_TYPE_NULL;
H
Haojun Liao 已提交
4672
    if (pRuntimeEnv->groupbyNormalCol) {
4673 4674 4675 4676 4677
      type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
    } else {
      type = TSDB_DATA_TYPE_TIMESTAMP;
    }

4678
    code = initWindowResInfo(&pRuntimeEnv->windowResInfo, numOfResultRows, 1024, type);
B
Bomin Zhang 已提交
4679 4680 4681
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }
4682 4683
  }

H
Haojun Liao 已提交
4684 4685 4686 4687 4688 4689
  // create runtime environment
  code = setupQueryRuntimeEnv(pRuntimeEnv, pQuery->order.order);
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }

4690
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
H
Haojun Liao 已提交
4691
    SFillColInfo* pColInfo = createFillColInfo(pQuery);
H
Haojun Liao 已提交
4692 4693 4694 4695 4696 4697
    STimeWindow w = TSWINDOW_INITIALIZER;

    TSKEY sk = MIN(pQuery->window.skey, pQuery->window.ekey);
    TSKEY ek = MAX(pQuery->window.skey, pQuery->window.ekey);
    getAlignQueryTimeWindow(pQuery, pQuery->window.skey, sk, ek, &w);

H
Haojun Liao 已提交
4698
    int32_t numOfCols = getNumOfFinalResCol(pQuery);
H
Haojun Liao 已提交
4699
    pRuntimeEnv->pFillInfo = taosInitFillInfo(pQuery->order.order, w.skey, 0, (int32_t)pQuery->rec.capacity, numOfCols,
4700
                                              pQuery->interval.sliding, pQuery->interval.slidingUnit, (int8_t)pQuery->precision,
H
Haojun Liao 已提交
4701
                                              pQuery->fillType, pColInfo, pQInfo);
4702
  }
4703

H
Haojun Liao 已提交
4704
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
4705
  return TSDB_CODE_SUCCESS;
4706 4707
}

4708
static void enableExecutionForNextTable(SQueryRuntimeEnv *pRuntimeEnv) {
4709
  SQuery *pQuery = pRuntimeEnv->pQuery;
4710

4711
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
4712
    SResultRowCellInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[i]);
4713 4714 4715 4716 4717 4718
    if (pResInfo != NULL) {
      pResInfo->complete = false;
    }
  }
}

H
Haojun Liao 已提交
4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735
static FORCE_INLINE void setEnvForEachBlock(SQInfo* pQInfo, STableQueryInfo* pTableQueryInfo, SDataBlockInfo* pBlockInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

  if (!QUERY_IS_INTERVAL_QUERY(pQuery)) {
    setExecutionContext(pQInfo, pTableQueryInfo->groupIndex, pBlockInfo->window.ekey + step);
  } else {  // interval query
    TSKEY nextKey = pBlockInfo->window.skey;
    setIntervalQueryRange(pQInfo, nextKey);

    if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
      setAdditionalInfo(pQInfo, pTableQueryInfo->pTable, pTableQueryInfo);
    }
  }
}

H
Haojun Liao 已提交
4736
static int64_t scanMultiTableDataBlocks(SQInfo *pQInfo) {
4737
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
Haojun Liao 已提交
4738 4739
  SQuery*           pQuery = pRuntimeEnv->pQuery;
  SQueryCostInfo*   summary  = &pRuntimeEnv->summary;
H
Haojun Liao 已提交
4740

H
hjxilinx 已提交
4741
  int64_t st = taosGetTimestampMs();
4742

4743
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
H
Haojun Liao 已提交
4744
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4745

H
Haojun Liao 已提交
4746 4747
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

H
Haojun Liao 已提交
4748
  while (tsdbNextDataBlock(pQueryHandle)) {
4749
    summary->totalBlocks += 1;
H
Haojun Liao 已提交
4750

H
Haojun Liao 已提交
4751
    if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
4752
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4753
    }
4754

H
Haojun Liao 已提交
4755
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
H
Haojun Liao 已提交
4756 4757 4758 4759
    STableQueryInfo **pTableQueryInfo = (STableQueryInfo**) taosHashGet(pQInfo->tableqinfoGroupInfo.map, &blockInfo.tid, sizeof(blockInfo.tid));
    if(pTableQueryInfo == NULL) {
      break;
    }
4760

H
Haojun Liao 已提交
4761
    pQuery->current = *pTableQueryInfo;
H
Haojun Liao 已提交
4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      assert(
          ((*pTableQueryInfo)->win.skey <= (*pTableQueryInfo)->win.ekey) &&
          ((*pTableQueryInfo)->lastKey >= (*pTableQueryInfo)->win.skey) &&
          ((*pTableQueryInfo)->win.skey >= pQuery->window.skey && (*pTableQueryInfo)->win.ekey <= pQuery->window.ekey));
    } else {
      assert(
          ((*pTableQueryInfo)->win.skey >= (*pTableQueryInfo)->win.ekey) &&
          ((*pTableQueryInfo)->lastKey <= (*pTableQueryInfo)->win.skey) &&
          ((*pTableQueryInfo)->win.skey <= pQuery->window.skey && (*pTableQueryInfo)->win.ekey >= pQuery->window.ekey));
    }
4773

H
Haojun Liao 已提交
4774
    if (!pRuntimeEnv->groupbyNormalCol) {
H
Haojun Liao 已提交
4775
      setEnvForEachBlock(pQInfo, *pTableQueryInfo, &blockInfo);
4776
    }
4777

4778
    uint32_t     status = 0;
H
Haojun Liao 已提交
4779 4780
    SDataStatis *pStatis = NULL;
    SArray      *pDataBlock = NULL;
4781

H
Haojun Liao 已提交
4782
    int32_t ret = loadDataBlockOnDemand(pRuntimeEnv, &pQuery->current->windowResInfo, pQueryHandle, &blockInfo, &pStatis, &pDataBlock, &status);
4783 4784 4785 4786 4787
    if (ret != TSDB_CODE_SUCCESS) {
      break;
    }

    if (status == BLK_DATA_DISCARD) {
H
Haojun Liao 已提交
4788
      pQuery->current->lastKey = QUERY_IS_ASC_QUERY(pQuery)? blockInfo.window.ekey + step : blockInfo.window.skey + step;
H
Haojun Liao 已提交
4789 4790 4791
      continue;
    }

4792 4793
    summary->totalRows += blockInfo.rows;
    stableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, pDataBlock, binarySearchForKey);
H
Haojun Liao 已提交
4794

H
Haojun Liao 已提交
4795 4796 4797 4798
    qDebug("QInfo:%p check data block completed, uid:%"PRId64", tid:%d, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, "
           "lastKey:%" PRId64,
           pQInfo, blockInfo.uid, blockInfo.tid, blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows,
           pQuery->current->lastKey);
4799
  }
4800

H
Haojun Liao 已提交
4801 4802 4803 4804
  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }

H
Haojun Liao 已提交
4805 4806
  updateWindowResNumOfRes(pRuntimeEnv);

H
hjxilinx 已提交
4807 4808
  int64_t et = taosGetTimestampMs();
  return et - st;
4809 4810
}

4811 4812
static bool multiTableMultioutputHelper(SQInfo *pQInfo, int32_t index) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4813
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4814

4815
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
H
Haojun Liao 已提交
4816
  SArray *group = GET_TABLEGROUP(pQInfo, 0);
4817
  STableQueryInfo* pCheckInfo = taosArrayGetP(group, index);
4818

H
Haojun Liao 已提交
4819 4820 4821
  if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
    setTagVal(pRuntimeEnv, pCheckInfo->pTable, pQInfo->tsdb);
  }
4822

H
Haojun Liao 已提交
4823
  STableId* id = TSDB_TABLEID(pCheckInfo->pTable);
4824
  qDebug("QInfo:%p query on (%d): uid:%" PRIu64 ", tid:%d, qrange:%" PRId64 "-%" PRId64, pQInfo, index,
H
Haojun Liao 已提交
4825
         id->uid, id->tid, pCheckInfo->lastKey, pCheckInfo->win.ekey);
4826

4827
  STsdbQueryCond cond = {
4828
      .twindow   = {pCheckInfo->lastKey, pCheckInfo->win.ekey},
H
hjxilinx 已提交
4829 4830
      .order     = pQuery->order.order,
      .colList   = pQuery->colList,
4831
      .numOfCols = pQuery->numOfCols,
4832
  };
4833

H
hjxilinx 已提交
4834
  // todo refactor
4835
  SArray *g1 = taosArrayInit(1, POINTER_BYTES);
H
Haojun Liao 已提交
4836 4837 4838 4839
  SArray *tx = taosArrayInit(1, sizeof(STableKeyInfo));

  STableKeyInfo info = {.pTable = pCheckInfo->pTable, .lastKey = pCheckInfo->lastKey};
  taosArrayPush(tx, &info);
4840

4841
  taosArrayPush(g1, &tx);
4842
  STableGroupInfo gp = {.numOfTables = 1, .pGroupList = g1};
4843

4844
  // include only current table
4845 4846 4847 4848
  if (pRuntimeEnv->pQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
    pRuntimeEnv->pQueryHandle = NULL;
  }
4849

H
Haojun Liao 已提交
4850
  pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo, &pQInfo->memRef);
4851 4852
  taosArrayDestroy(tx);
  taosArrayDestroy(g1);
B
Bomin Zhang 已提交
4853 4854 4855
  if (pRuntimeEnv->pQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
4856

4857
  if (pRuntimeEnv->pTSBuf != NULL) {
H
Haojun Liao 已提交
4858 4859
      tVariant* pTag = &pRuntimeEnv->pCtx[0].tag;

4860
    if (pRuntimeEnv->cur.vgroupIndex == -1) {
H
Haojun Liao 已提交
4861
      STSElem elem = tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, pQInfo->vgId, pTag);
H
Haojun Liao 已提交
4862
      // failed to find data with the specified tag value and vnodeId
4863
      if (!tsBufIsValidElem(&elem)) {
H
Haojun Liao 已提交
4864 4865 4866 4867 4868 4869
        if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
          qError("QInfo:%p failed to find tag:%s in ts_comp", pQInfo, pTag->pz);
        } else {
          qError("QInfo:%p failed to find tag:%"PRId64" in ts_comp", pQInfo, pTag->i64Key);
        }

4870
        return false;
H
Haojun Liao 已提交
4871 4872
      } else {
        STSCursor cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);
H
Haojun Liao 已提交
4873 4874 4875 4876 4877 4878 4879 4880

        if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
          qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz,
                 cur.blockIndex, cur.tsIndex);
        } else {
          qDebug("QInfo:%p find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key,
                 cur.blockIndex, cur.tsIndex);
        }
4881 4882
      }
    } else {
H
Haojun Liao 已提交
4883
      STSElem elem = tsBufGetElem(pRuntimeEnv->pTSBuf);
H
Haojun Liao 已提交
4884
      if (tVariantCompare(elem.tag, &pRuntimeEnv->pCtx[0].tag) != 0) {
H
Haojun Liao 已提交
4885

H
Haojun Liao 已提交
4886
        STSElem elem1 = tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, pQInfo->vgId, pTag);
H
Haojun Liao 已提交
4887
        // failed to find data with the specified tag value and vnodeId
4888
        if (!tsBufIsValidElem(&elem1)) {
H
Haojun Liao 已提交
4889 4890 4891 4892 4893
          if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
            qError("QInfo:%p failed to find tag:%s in ts_comp", pQInfo, pTag->pz);
          } else {
            qError("QInfo:%p failed to find tag:%"PRId64" in ts_comp", pQInfo, pTag->i64Key);
          }
H
Haojun Liao 已提交
4894

H
Haojun Liao 已提交
4895
          return false;
H
Haojun Liao 已提交
4896 4897
        } else {
          STSCursor cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);
H
Haojun Liao 已提交
4898 4899 4900 4901 4902
          if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
            qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, cur.blockIndex, cur.tsIndex);
          } else {
            qDebug("QInfo:%p find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key, cur.blockIndex, cur.tsIndex);
          }
H
Haojun Liao 已提交
4903
        }
H
Haojun Liao 已提交
4904

H
Haojun Liao 已提交
4905 4906
      } else {
        tsBufSetCursor(pRuntimeEnv->pTSBuf, &pRuntimeEnv->cur);
H
Haojun Liao 已提交
4907
        STSCursor cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);
H
Haojun Liao 已提交
4908 4909 4910 4911 4912
        if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
          qDebug("QInfo:%p continue scan ts_comp file, tag:%s blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, cur.blockIndex, cur.tsIndex);
        } else {
          qDebug("QInfo:%p continue scan ts_comp file, tag:%"PRId64" blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key, cur.blockIndex, cur.tsIndex);
        }
H
Haojun Liao 已提交
4913
      }
4914 4915
    }
  }
4916

4917
  initCtxOutputBuf(pRuntimeEnv);
4918 4919 4920 4921 4922 4923 4924 4925 4926 4927
  return true;
}

/**
 * super table query handler
 * 1. super table projection query, group-by on normal columns query, ts-comp query
 * 2. point interpolation query, last row query
 *
 * @param pQInfo
 */
4928
static void sequentialTableProcess(SQInfo *pQInfo) {
4929
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4930
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4931
  setQueryStatus(pQuery, QUERY_COMPLETED);
4932

H
Haojun Liao 已提交
4933
  size_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
4934

4935
  if (isPointInterpoQuery(pQuery)) {
4936 4937
    resetCtxOutputBuf(pRuntimeEnv);
    assert(pQuery->limit.offset == 0 && pQuery->limit.limit != 0);
4938

4939
    while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
4940
      SArray* group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex);
4941

S
TD-1057  
Shengliang Guan 已提交
4942
      qDebug("QInfo:%p last_row query on group:%d, total group:%" PRIzu ", current group:%p", pQInfo, pQInfo->groupIndex,
dengyihao's avatar
dengyihao 已提交
4943
             numOfGroups, group);
H
Haojun Liao 已提交
4944 4945 4946 4947 4948 4949 4950

      STsdbQueryCond cond = {
          .colList = pQuery->colList,
          .order   = pQuery->order.order,
          .numOfCols = pQuery->numOfCols,
      };

S
TD-1057  
Shengliang Guan 已提交
4951 4952
      TIME_WINDOW_COPY(cond.twindow, pQuery->window);

H
Haojun Liao 已提交
4953 4954 4955
      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);
4956

H
Haojun Liao 已提交
4957 4958 4959 4960 4961 4962 4963
      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }
4964

H
Haojun Liao 已提交
4965
      pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(pQInfo->tsdb, &cond, &gp, pQInfo, &pQInfo->memRef);
B
Bomin Zhang 已提交
4966 4967 4968 4969 4970 4971

      taosArrayDestroy(tx);
      taosArrayDestroy(g1);
      if (pRuntimeEnv->pQueryHandle == NULL) {
        longjmp(pRuntimeEnv->env, terrno);
      }
H
Haojun Liao 已提交
4972

H
Haojun Liao 已提交
4973
      initCtxOutputBuf(pRuntimeEnv);
4974

4975
      SArray* s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle);
4976
      assert(taosArrayGetSize(s) >= 1);
4977

4978
      setTagVal(pRuntimeEnv, taosArrayGetP(s, 0), pQInfo->tsdb);
dengyihao's avatar
dengyihao 已提交
4979
      taosArrayDestroy(s);
H
Haojun Liao 已提交
4980

H
Haojun Liao 已提交
4981
      // here we simply set the first table as current table
4982 4983 4984
      SArray* first = GET_TABLEGROUP(pQInfo, pQInfo->groupIndex);
      pQuery->current = taosArrayGetP(first, 0);

4985
      scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
4986

H
Haojun Liao 已提交
4987 4988 4989 4990 4991
      int64_t numOfRes = getNumOfResult(pRuntimeEnv);
      if (numOfRes > 0) {
        pQuery->rec.rows += numOfRes;
        forwardCtxOutputBuf(pRuntimeEnv, numOfRes);
      }
4992

H
Haojun Liao 已提交
4993 4994 4995 4996 4997
      skipResults(pRuntimeEnv);
      pQInfo->groupIndex += 1;

      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
4998 4999 5000 5001 5002 5003

      if (pQuery->rec.rows >= pQuery->rec.capacity) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
        break;
      }
    }
H
Haojun Liao 已提交
5004
  } else if (pRuntimeEnv->groupbyNormalCol) { // group-by on normal columns query
5005
    while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
5006
      SArray* group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex);
5007

S
TD-1057  
Shengliang Guan 已提交
5008
      qDebug("QInfo:%p group by normal columns group:%d, total group:%" PRIzu "", pQInfo, pQInfo->groupIndex, numOfGroups);
5009 5010 5011 5012 5013 5014 5015

      STsdbQueryCond cond = {
          .colList = pQuery->colList,
          .order   = pQuery->order.order,
          .numOfCols = pQuery->numOfCols,
      };

S
TD-1057  
Shengliang Guan 已提交
5016 5017
      TIME_WINDOW_COPY(cond.twindow, pQuery->window);

5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029
      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);

      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }

H
Haojun Liao 已提交
5030
      // no need to update the lastkey for each table
H
Haojun Liao 已提交
5031
      pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo, &pQInfo->memRef);
H
Haojun Liao 已提交
5032

B
Bomin Zhang 已提交
5033 5034
      taosArrayDestroy(g1);
      taosArrayDestroy(tx);
B
Bomin Zhang 已提交
5035 5036 5037
      if (pRuntimeEnv->pQueryHandle == NULL) {
        longjmp(pRuntimeEnv->env, terrno);
      }
5038

5039
      SArray* s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle);
5040 5041
      assert(taosArrayGetSize(s) >= 1);

5042
      setTagVal(pRuntimeEnv, taosArrayGetP(s, 0), pQInfo->tsdb);
5043 5044 5045 5046 5047 5048 5049 5050

      // here we simply set the first table as current table
      scanMultiTableDataBlocks(pQInfo);
      pQInfo->groupIndex += 1;

      SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;

        // no results generated for current group, continue to try the next group
5051
      taosArrayDestroy(s);
5052 5053 5054 5055 5056
      if (pWindowResInfo->size <= 0) {
        continue;
      }

      for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
5057
        pWindowResInfo->pResult[i]->closed = true; // enable return all results for group by normal columns
5058

H
Haojun Liao 已提交
5059
        SResultRow *pResult = pWindowResInfo->pResult[i];
5060
        for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
5061 5062
          SResultRowCellInfo* pCell = getResultCell(pRuntimeEnv, pResult, j);
          pResult->numOfRows = (uint16_t)(MAX(pResult->numOfRows, pCell->numOfRes));
5063 5064 5065
        }
      }

5066
      qDebug("QInfo:%p generated groupby columns results %d rows for group %d completed", pQInfo, pWindowResInfo->size,
5067 5068 5069 5070 5071 5072 5073
          pQInfo->groupIndex);
      int32_t currentGroupIndex = pQInfo->groupIndex;

      pQuery->rec.rows = 0;
      pQInfo->groupIndex = 0;

      ensureOutputBufferSimple(pRuntimeEnv, pWindowResInfo->size);
5074
      copyFromWindowResToSData(pQInfo, pWindowResInfo);
5075 5076 5077 5078 5079 5080

      pQInfo->groupIndex = currentGroupIndex;  //restore the group index
      assert(pQuery->rec.rows == pWindowResInfo->size);

      clearClosedTimeWindow(pRuntimeEnv);
      break;
5081 5082 5083
    }
  } else {
    /*
5084
     * 1. super table projection query, 2. ts-comp query
5085 5086 5087
     * if the subgroup index is larger than 0, results generated by group by tbname,k is existed.
     * we need to return it to client in the first place.
     */
5088
    if (pQInfo->groupIndex > 0) {
5089
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5090
      pQuery->rec.total += pQuery->rec.rows;
5091

5092
      if (pQuery->rec.rows > 0) {
5093 5094 5095
        return;
      }
    }
5096

5097
    // all data have returned already
5098
    if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
5099 5100
      return;
    }
5101

5102 5103
    resetCtxOutputBuf(pRuntimeEnv);
    resetTimeWindowInfo(pRuntimeEnv, &pRuntimeEnv->windowResInfo);
5104

H
Haojun Liao 已提交
5105
    SArray *group = GET_TABLEGROUP(pQInfo, 0);
5106 5107
    assert(taosArrayGetSize(group) == pQInfo->tableqinfoGroupInfo.numOfTables &&
           1 == taosArrayGetSize(pQInfo->tableqinfoGroupInfo.pGroupList));
5108

5109
    while (pQInfo->tableIndex < pQInfo->tableqinfoGroupInfo.numOfTables) {
H
Haojun Liao 已提交
5110
      if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
5111
        longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
5112
      }
5113

5114
      pQuery->current = taosArrayGetP(group, pQInfo->tableIndex);
5115
      if (!multiTableMultioutputHelper(pQInfo, pQInfo->tableIndex)) {
5116
        pQInfo->tableIndex++;
5117 5118
        continue;
      }
5119

H
hjxilinx 已提交
5120
      // TODO handle the limit offset problem
5121
      if (pQuery->numOfFilterCols == 0 && pQuery->limit.offset > 0) {
5122 5123
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
          pQInfo->tableIndex++;
5124 5125 5126
          continue;
        }
      }
5127

5128
      scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
5129
      skipResults(pRuntimeEnv);
5130

5131
      // the limitation of output result is reached, set the query completed
5132
      if (limitResults(pRuntimeEnv)) {
H
Haojun Liao 已提交
5133
        SET_STABLE_QUERY_OVER(pQInfo);
5134 5135
        break;
      }
5136

5137 5138
      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
5139

5140
      if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
5141 5142 5143 5144 5145 5146
        /*
         * query range is identical in terms of all meters involved in query,
         * so we need to restore them at the *beginning* of query on each meter,
         * not the consecutive query on meter on which is aborted due to buffer limitation
         * to ensure that, we can reset the query range once query on a meter is completed.
         */
5147
        pQInfo->tableIndex++;
weixin_48148422's avatar
weixin_48148422 已提交
5148

H
Haojun Liao 已提交
5149
        STableIdInfo tidInfo = {0};
5150

H
Haojun Liao 已提交
5151 5152 5153
        STableId* id = TSDB_TABLEID(pQuery->current->pTable);
        tidInfo.uid = id->uid;
        tidInfo.tid = id->tid;
weixin_48148422's avatar
weixin_48148422 已提交
5154
        tidInfo.key = pQuery->current->lastKey;
weixin_48148422's avatar
weixin_48148422 已提交
5155 5156
        taosArrayPush(pQInfo->arrTableIdInfo, &tidInfo);

5157
        // if the buffer is full or group by each table, we need to jump out of the loop
H
Haojun Liao 已提交
5158
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
5159 5160
          break;
        }
5161

H
Haojun Liao 已提交
5162 5163 5164 5165
        if (pRuntimeEnv->pTSBuf != NULL) {
          pRuntimeEnv->cur = pRuntimeEnv->pTSBuf->cur;
        }

5166
      } else {
5167
        // all data in the result buffer are skipped due to the offset, continue to retrieve data from current meter
5168 5169
        if (pQuery->rec.rows == 0) {
          assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
5170 5171
          continue;
        } else {
5172 5173 5174
          // buffer is full, wait for the next round to retrieve data from current meter
          assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
          break;
5175 5176 5177
        }
      }
    }
H
Haojun Liao 已提交
5178

5179
    if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
H
Haojun Liao 已提交
5180 5181
      setQueryStatus(pQuery, QUERY_COMPLETED);
    }
5182
  }
5183

5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195
  /*
   * 1. super table projection query, group-by on normal columns query, ts-comp query
   * 2. point interpolation query, last row query
   *
   * group-by on normal columns query and last_row query do NOT invoke the finalizer here,
   * since the finalize stage will be done at the client side.
   *
   * projection query, point interpolation query do not need the finalizer.
   *
   * Only the ts-comp query requires the finalizer function to be executed here.
   */
  if (isTSCompQuery(pQuery)) {
H
hjxilinx 已提交
5196
    finalizeQueryResult(pRuntimeEnv);
5197
  }
5198

5199 5200 5201
  if (pRuntimeEnv->pTSBuf != NULL) {
    pRuntimeEnv->cur = pRuntimeEnv->pTSBuf->cur;
  }
5202

5203
  qDebug(
S
TD-1530  
Shengliang Guan 已提交
5204 5205
      "QInfo %p numOfTables:%" PRIu64 ", index:%d, numOfGroups:%" PRIzu ", %" PRId64 " points returned, total:%" PRId64 ", offset:%" PRId64,
      pQInfo, (uint64_t)pQInfo->tableqinfoGroupInfo.numOfTables, pQInfo->tableIndex, numOfGroups, pQuery->rec.rows, pQuery->rec.total,
5206
      pQuery->limit.offset);
5207 5208
}

5209 5210 5211 5212
static void doSaveContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

5213 5214 5215
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
  SWITCH_ORDER(pQuery->order.order);
5216

5217
  if (pRuntimeEnv->pTSBuf != NULL) {
H
Haojun Liao 已提交
5218
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
5219
  }
5220

5221 5222 5223 5224 5225
  STsdbQueryCond cond = {
      .order   = pQuery->order.order,
      .colList = pQuery->colList,
      .numOfCols = pQuery->numOfCols,
  };
H
Haojun Liao 已提交
5226

S
TD-1057  
Shengliang Guan 已提交
5227 5228
  TIME_WINDOW_COPY(cond.twindow, pQuery->window);

5229 5230 5231 5232
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
H
Haojun Liao 已提交
5233

H
Haojun Liao 已提交
5234 5235 5236 5237 5238
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
  disableFuncInReverseScan(pQInfo);
  setupQueryRangeForReverseScan(pQInfo);

H
Haojun Liao 已提交
5239
  pRuntimeEnv->prevGroupId = INT32_MIN;
H
Haojun Liao 已提交
5240
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef);
B
Bomin Zhang 已提交
5241 5242 5243
  if (pRuntimeEnv->pSecQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
H
hjxilinx 已提交
5244 5245
}

5246 5247 5248 5249
static void doRestoreContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

H
hjxilinx 已提交
5250
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
5251

5252
  if (pRuntimeEnv->pTSBuf != NULL) {
5253
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
5254
  }
5255

5256
  switchCtxOrder(pRuntimeEnv);
5257 5258 5259
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
}

5260 5261 5262
static void doCloseAllTimeWindowAfterScan(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
Haojun Liao 已提交
5263
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
H
Haojun Liao 已提交
5264
    size_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
5265
    for (int32_t i = 0; i < numOfGroup; ++i) {
H
Haojun Liao 已提交
5266
      SArray *group = GET_TABLEGROUP(pQInfo, i);
5267

5268
      size_t num = taosArrayGetSize(group);
5269
      for (int32_t j = 0; j < num; ++j) {
5270 5271
        STableQueryInfo* item = taosArrayGetP(group, j);
        closeAllTimeWindow(&item->windowResInfo);
5272
      }
H
hjxilinx 已提交
5273 5274 5275 5276 5277 5278 5279
    }
  } else {  // close results for group result
    closeAllTimeWindow(&pQInfo->runtimeEnv.windowResInfo);
  }
}

static void multiTableQueryProcess(SQInfo *pQInfo) {
5280 5281 5282
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

5283
  if (pQInfo->groupIndex > 0) {
5284
    /*
5285
     * if the groupIndex > 0, the query process must be completed yet, we only need to
5286 5287
     * copy the data into output buffer
     */
H
Haojun Liao 已提交
5288
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
5289 5290
      copyResToQueryResultBuf(pQInfo, pQuery);
#ifdef _DEBUG_VIEW
5291
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
5292 5293
#endif
    } else {
5294
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5295
    }
5296

5297
    qDebug("QInfo:%p current:%"PRId64", total:%"PRId64"", pQInfo, pQuery->rec.rows, pQuery->rec.total);
5298 5299
    return;
  }
5300

5301
  qDebug("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", order:%d, forward scan start", pQInfo,
5302 5303
         pQuery->window.skey, pQuery->window.ekey, pQuery->order.order);

H
hjxilinx 已提交
5304
  // do check all qualified data blocks
H
Haojun Liao 已提交
5305
  int64_t el = scanMultiTableDataBlocks(pQInfo);
5306
  qDebug("QInfo:%p master scan completed, elapsed time: %" PRId64 "ms, reverse scan start", pQInfo, el);
5307

H
hjxilinx 已提交
5308
  // query error occurred or query is killed, abort current execution
H
Haojun Liao 已提交
5309
  if (pQInfo->code != TSDB_CODE_SUCCESS || IS_QUERY_KILLED(pQInfo)) {
5310
    qDebug("QInfo:%p query killed or error occurred, code:%s, abort", pQInfo, tstrerror(pQInfo->code));
H
Haojun Liao 已提交
5311
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
5312
  }
5313

H
hjxilinx 已提交
5314 5315
  // close all time window results
  doCloseAllTimeWindowAfterScan(pQInfo);
5316

H
hjxilinx 已提交
5317 5318
  if (needReverseScan(pQuery)) {
    doSaveContext(pQInfo);
5319

H
Haojun Liao 已提交
5320
    el = scanMultiTableDataBlocks(pQInfo);
5321
    qDebug("QInfo:%p reversed scan completed, elapsed time: %" PRId64 "ms", pQInfo, el);
5322

H
Haojun Liao 已提交
5323
    doRestoreContext(pQInfo);
H
hjxilinx 已提交
5324
  } else {
5325
    qDebug("QInfo:%p no need to do reversed scan, query completed", pQInfo);
5326
  }
5327

5328
  setQueryStatus(pQuery, QUERY_COMPLETED);
5329

H
Haojun Liao 已提交
5330
  if (pQInfo->code != TSDB_CODE_SUCCESS || IS_QUERY_KILLED(pQInfo)) {
5331
    qDebug("QInfo:%p query killed or error occurred, code:%s, abort", pQInfo, tstrerror(pQInfo->code));
H
Haojun Liao 已提交
5332
    //TODO finalizeQueryResult may cause SEGSEV, since the memory may not allocated yet, add a cleanup function instead
H
Haojun Liao 已提交
5333
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
H
hjxilinx 已提交
5334
  }
5335

H
Haojun Liao 已提交
5336
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || isSumAvgRateQuery(pQuery)) {
5337
    if (mergeIntoGroupResult(pQInfo) == TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
5338
      copyResToQueryResultBuf(pQInfo, pQuery);
5339 5340

#ifdef _DEBUG_VIEW
5341
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
5342 5343 5344
#endif
    }
  } else {  // not a interval query
5345
    copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5346
  }
5347

5348
  // handle the limitation of output buffer
5349
  qDebug("QInfo:%p points returned:%" PRId64 ", total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
5350 5351
}

H
Haojun Liao 已提交
5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369

static char *getArithemicInputSrc(void *param, const char *name, int32_t colId) {
  SArithmeticSupport *pSupport = (SArithmeticSupport *) param;
  SExprInfo* pExprInfo = (SExprInfo*) pSupport->exprList;

  int32_t index = -1;
  for (int32_t i = 0; i < pSupport->numOfCols; ++i) {
    if (colId == pExprInfo[i].base.resColId) {
      index = i;
      break;
    }
  }

  assert(index >= 0 && index < pSupport->numOfCols);
  return pSupport->data[index] + pSupport->offset * pExprInfo[index].bytes;
}

static void doSecondaryArithmeticProcess(SQuery* pQuery) {
H
Haojun Liao 已提交
5370 5371 5372
  if (pQuery->numOfExpr2 == 0) {
    return;
  }
H
Haojun Liao 已提交
5373

H
Haojun Liao 已提交
5374
  SArithmeticSupport arithSup = {0};
H
Haojun Liao 已提交
5375 5376 5377 5378 5379 5380
  tFilePage **data = calloc(pQuery->numOfExpr2, POINTER_BYTES);
  for (int32_t i = 0; i < pQuery->numOfExpr2; ++i) {
    int32_t bytes = pQuery->pExpr2[i].bytes;
    data[i] = (tFilePage *)malloc(bytes * pQuery->rec.rows + sizeof(tFilePage));
  }

H
Haojun Liao 已提交
5381 5382 5383 5384
  arithSup.offset = 0;
  arithSup.numOfCols = (int32_t)pQuery->numOfOutput;
  arithSup.exprList  = pQuery->pExpr1;
  arithSup.data      = calloc(arithSup.numOfCols, POINTER_BYTES);
H
Haojun Liao 已提交
5385

H
Haojun Liao 已提交
5386 5387
  for (int32_t k = 0; k < arithSup.numOfCols; ++k) {
    arithSup.data[k] = pQuery->sdata[k]->data;
H
Haojun Liao 已提交
5388 5389 5390 5391 5392 5393 5394 5395 5396 5397
  }

  for (int i = 0; i < pQuery->numOfExpr2; ++i) {
    SExprInfo *pExpr = &pQuery->pExpr2[i];

    // calculate the result from several other columns
    SSqlFuncMsg* pSqlFunc = &pExpr->base;
    if (pSqlFunc->functionId != TSDB_FUNC_ARITHM) {

      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
5398 5399 5400
        if (pSqlFunc->functionId == pQuery->pExpr1[j].base.functionId &&
            pSqlFunc->colInfo.colId == pQuery->pExpr1[j].base.colInfo.colId) {
          memcpy(data[i]->data, pQuery->sdata[j]->data, pQuery->pExpr1[j].bytes * pQuery->rec.rows);
H
Haojun Liao 已提交
5401 5402 5403 5404
          break;
        }
      }
    } else {
H
Haojun Liao 已提交
5405 5406
      arithSup.pArithExpr = pExpr;
      tExprTreeCalcTraverse(arithSup.pArithExpr->pExpr, (int32_t)pQuery->rec.rows, data[i]->data, &arithSup, TSDB_ORDER_ASC,
H
Haojun Liao 已提交
5407 5408 5409 5410 5411 5412 5413 5414
                            getArithemicInputSrc);
    }
  }

  for (int32_t i = 0; i < pQuery->numOfExpr2; ++i) {
    memcpy(pQuery->sdata[i]->data, data[i]->data, pQuery->pExpr2[i].bytes * pQuery->rec.rows);
  }

H
Haojun Liao 已提交
5415 5416 5417 5418 5419 5420
  for (int32_t i = 0; i < pQuery->numOfExpr2; ++i) {
    tfree(data[i]);
  }

  tfree(data);
  tfree(arithSup.data);
H
Haojun Liao 已提交
5421 5422
}

5423 5424 5425 5426 5427 5428
/*
 * in each query, this function will be called only once, no retry for further result.
 *
 * select count(*)/top(field,k)/avg(field name) from table_name [where ts>now-1a];
 * select count(*) from table_name group by status_column;
 */
H
hjxilinx 已提交
5429
static void tableFixedOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
5430
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
5431

H
hjxilinx 已提交
5432
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
5433
  if (!pRuntimeEnv->topBotQuery && pQuery->limit.offset > 0) {  // no need to execute, since the output will be ignore.
H
Haojun Liao 已提交
5434 5435
    return;
  }
5436

H
hjxilinx 已提交
5437
  pQuery->current = pTableInfo;  // set current query table info
5438

5439
  scanOneTableDataBlocks(pRuntimeEnv, pTableInfo->lastKey);
H
hjxilinx 已提交
5440
  finalizeQueryResult(pRuntimeEnv);
5441

H
Haojun Liao 已提交
5442 5443 5444 5445
  // since the numOfRows must be identical for all sql functions that are allowed to be executed simutaneously.
  pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
  doSecondaryArithmeticProcess(pQuery);

H
Haojun Liao 已提交
5446
  if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
5447
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
5448
  }
5449

5450
  skipResults(pRuntimeEnv);
5451
  limitResults(pRuntimeEnv);
5452 5453
}

H
hjxilinx 已提交
5454
static void tableMultiOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
5455
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
5456

H
hjxilinx 已提交
5457 5458
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
5459

5460 5461 5462 5463
  // for ts_comp query, re-initialized is not allowed
  if (!isTSCompQuery(pQuery)) {
    resetCtxOutputBuf(pRuntimeEnv);
  }
5464

5465 5466 5467 5468 5469 5470
  // skip blocks without load the actual data block from file if no filter condition present
  skipBlocks(&pQInfo->runtimeEnv);
  if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return;
  }
5471 5472

  while (1) {
5473
    scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
H
hjxilinx 已提交
5474
    finalizeQueryResult(pRuntimeEnv);
5475

5476 5477
    pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
    if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols > 0 && pQuery->rec.rows > 0) {
5478
      skipResults(pRuntimeEnv);
5479 5480 5481
    }

    /*
H
hjxilinx 已提交
5482 5483
     * 1. if pQuery->size == 0, pQuery->limit.offset >= 0, still need to check data
     * 2. if pQuery->size > 0, pQuery->limit.offset must be 0
5484
     */
5485
    if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
5486 5487 5488
      break;
    }

5489
    qDebug("QInfo:%p skip current result, offset:%" PRId64 ", next qrange:%" PRId64 "-%" PRId64,
B
Bomin Zhang 已提交
5490
           pQInfo, pQuery->limit.offset, pQuery->current->lastKey, pQuery->current->win.ekey);
5491 5492 5493 5494

    resetCtxOutputBuf(pRuntimeEnv);
  }

5495
  limitResults(pRuntimeEnv);
5496
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
5497
    qDebug("QInfo:%p query paused due to output limitation, next qrange:%" PRId64 "-%" PRId64, pQInfo,
H
hjxilinx 已提交
5498
        pQuery->current->lastKey, pQuery->window.ekey);
weixin_48148422's avatar
weixin_48148422 已提交
5499 5500
  } else if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
    STableIdInfo tidInfo;
B
Bomin Zhang 已提交
5501
    STableId* id = TSDB_TABLEID(pQuery->current->pTable);
5502

H
Haojun Liao 已提交
5503 5504
    tidInfo.uid = id->uid;
    tidInfo.tid = id->tid;
weixin_48148422's avatar
weixin_48148422 已提交
5505 5506
    tidInfo.key = pQuery->current->lastKey;
    taosArrayPush(pQInfo->arrTableIdInfo, &tidInfo);
5507 5508
  }

5509 5510 5511
  if (!isTSCompQuery(pQuery)) {
    assert(pQuery->rec.rows <= pQuery->rec.capacity);
  }
5512 5513
}

H
Haojun Liao 已提交
5514
static void tableIntervalProcessImpl(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
5515
  SQuery *pQuery = pRuntimeEnv->pQuery;
5516

5517
  while (1) {
5518
    scanOneTableDataBlocks(pRuntimeEnv, start);
5519

5520
    assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_NOT_COMPLETED));
H
hjxilinx 已提交
5521
    finalizeQueryResult(pRuntimeEnv);
5522

5523 5524 5525
    // here we can ignore the records in case of no interpolation
    // todo handle offset, in case of top/bottom interval query
    if ((pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL) && pQuery->limit.offset > 0 &&
5526
        pQuery->fillType == TSDB_FILL_NONE) {
5527 5528
      // maxOutput <= 0, means current query does not generate any results
      int32_t numOfClosed = numOfClosedTimeWindow(&pRuntimeEnv->windowResInfo);
5529

S
TD-1057  
Shengliang Guan 已提交
5530
      int32_t c = (int32_t)(MIN(numOfClosed, pQuery->limit.offset));
5531 5532 5533
      clearFirstNTimeWindow(pRuntimeEnv, c);
      pQuery->limit.offset -= c;
    }
5534

5535
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED | QUERY_RESBUF_FULL)) {
5536 5537 5538 5539 5540
      break;
    }
  }
}

5541
// handle time interval query on table
H
hjxilinx 已提交
5542
static void tableIntervalProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
5543 5544
  SQueryRuntimeEnv *pRuntimeEnv = &(pQInfo->runtimeEnv);

H
hjxilinx 已提交
5545 5546
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
5547

H
Haojun Liao 已提交
5548
  int32_t numOfFilled = 0;
H
Haojun Liao 已提交
5549
  TSKEY newStartKey = TSKEY_INITIAL_VAL;
5550

5551
  // skip blocks without load the actual data block from file if no filter condition present
H
Haojun Liao 已提交
5552 5553 5554 5555 5556 5557
  if (!pRuntimeEnv->groupbyNormalCol) {
    skipTimeInterval(pRuntimeEnv, &newStartKey);
    if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0 && pRuntimeEnv->pFillInfo == NULL) {
      setQueryStatus(pQuery, QUERY_COMPLETED);
      return;
    }
5558 5559
  }

5560
  while (1) {
H
Haojun Liao 已提交
5561
    tableIntervalProcessImpl(pRuntimeEnv, newStartKey);
5562

H
Haojun Liao 已提交
5563
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
5564
      pQInfo->groupIndex = 0;  // always start from 0
5565
      pQuery->rec.rows = 0;
5566
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5567

5568
      clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
5569
    }
5570

H
Haojun Liao 已提交
5571
    // no result generated, abort
H
Haojun Liao 已提交
5572
    if (pQuery->rec.rows == 0 || pRuntimeEnv->groupbyNormalCol) {
H
Haojun Liao 已提交
5573 5574 5575 5576 5577
      break;
    }

    doSecondaryArithmeticProcess(pQuery);
    
5578
    // the offset is handled at prepare stage if no interpolation involved
H
Haojun Liao 已提交
5579
    if (pQuery->fillType == TSDB_FILL_NONE) {
5580
      limitResults(pRuntimeEnv);
5581 5582
      break;
    } else {
S
TD-1057  
Shengliang Guan 已提交
5583
      taosFillSetStartInfo(pRuntimeEnv->pFillInfo, (int32_t)pQuery->rec.rows, pQuery->window.ekey);
H
Haojun Liao 已提交
5584
      taosFillCopyInputDataFromFilePage(pRuntimeEnv->pFillInfo, (const tFilePage**) pQuery->sdata);
H
Haojun Liao 已提交
5585
      numOfFilled = 0;
5586

H
Haojun Liao 已提交
5587
      pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, &numOfFilled);
5588
      if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
5589
        limitResults(pRuntimeEnv);
5590 5591
        break;
      }
5592

5593
      // no result generated yet, continue retrieve data
5594
      pQuery->rec.rows = 0;
5595 5596
    }
  }
5597

5598
  // all data scanned, the group by normal column can return
H
Haojun Liao 已提交
5599
  if (pRuntimeEnv->groupbyNormalCol) {  // todo refactor with merge interval time result
H
Haojun Liao 已提交
5600 5601
    // maxOutput <= 0, means current query does not generate any results
    int32_t numOfClosed = numOfClosedTimeWindow(&pRuntimeEnv->windowResInfo);
5602

H
Haojun Liao 已提交
5603 5604
    if ((pQuery->limit.offset > 0 && pQuery->limit.offset < numOfClosed) || pQuery->limit.offset == 0) {
      // skip offset result rows
H
Haojun Liao 已提交
5605
      clearFirstNTimeWindow(pRuntimeEnv, (int32_t) pQuery->limit.offset);
H
Haojun Liao 已提交
5606 5607 5608 5609 5610 5611 5612 5613 5614

      pQuery->rec.rows   = 0;
      pQInfo->groupIndex = 0;
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
      clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);

      doSecondaryArithmeticProcess(pQuery);
      limitResults(pRuntimeEnv);
    }
5615 5616 5617
  }
}

5618 5619 5620 5621
static void tableQueryImpl(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

H
Haojun Liao 已提交
5622
  if (queryHasRemainResForTableQuery(pRuntimeEnv)) {
5623

H
Haojun Liao 已提交
5624 5625 5626 5627 5628 5629 5630 5631 5632 5633 5634 5635
    if (pQuery->fillType != TSDB_FILL_NONE) {
      /*
       * There are remain results that are not returned due to result interpolation
       * So, we do keep in this procedure instead of launching retrieve procedure for next results.
       */
      int32_t numOfFilled = 0;
      pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, &numOfFilled);

      if (pQuery->rec.rows > 0) {
        limitResults(pRuntimeEnv);
      }

H
Haojun Liao 已提交
5636
      qDebug("QInfo:%p current:%" PRId64 " returned, total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total);
5637
      return;
H
Haojun Liao 已提交
5638
    } else {
5639
      pQuery->rec.rows = 0;
5640
      pQInfo->groupIndex = 0;  // always start from 0
5641

5642
      if (pRuntimeEnv->windowResInfo.size > 0) {
5643
        copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5644
        clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
5645

5646
        if (pQuery->rec.rows > 0) {
5647
          qDebug("QInfo:%p %"PRId64" rows returned from group results, total:%"PRId64"", pQInfo, pQuery->rec.rows, pQuery->rec.total);
H
Haojun Liao 已提交
5648 5649 5650

          // there are not data remains
          if (pRuntimeEnv->windowResInfo.size <= 0) {
H
Haojun Liao 已提交
5651
            qDebug("QInfo:%p query over, %"PRId64" rows are returned", pQInfo, pQuery->rec.total);
H
Haojun Liao 已提交
5652 5653
          }

5654 5655 5656 5657 5658
          return;
        }
      }
    }
  }
5659

H
hjxilinx 已提交
5660
  // number of points returned during this query
5661
  pQuery->rec.rows = 0;
5662
  int64_t st = taosGetTimestampUs();
5663

5664
  assert(pQInfo->tableqinfoGroupInfo.numOfTables == 1);
H
Haojun Liao 已提交
5665
  SArray* g = GET_TABLEGROUP(pQInfo, 0);
5666
  STableQueryInfo* item = taosArrayGetP(g, 0);
5667

5668
  // group by normal column, sliding window query, interval query are handled by interval query processor
H
Haojun Liao 已提交
5669
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyNormalCol) {  // interval (down sampling operation)
5670
    tableIntervalProcess(pQInfo, item);
H
Haojun Liao 已提交
5671
  } else if (isFixedOutputQuery(pRuntimeEnv)) {
5672
    tableFixedOutputProcess(pQInfo, item);
5673 5674
  } else {  // diff/add/multiply/subtract/division
    assert(pQuery->checkBuffer == 1);
5675
    tableMultiOutputProcess(pQInfo, item);
5676
  }
5677

5678
  // record the total elapsed time
5679
  pRuntimeEnv->summary.elapsedTime += (taosGetTimestampUs() - st);
5680
  assert(pQInfo->tableqinfoGroupInfo.numOfTables == 1);
5681 5682
}

5683
static void stableQueryImpl(SQInfo *pQInfo) {
H
Haojun Liao 已提交
5684 5685
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *pQuery = pRuntimeEnv->pQuery;
5686
  pQuery->rec.rows = 0;
5687

5688
  int64_t st = taosGetTimestampUs();
5689

H
Haojun Liao 已提交
5690
  if (QUERY_IS_INTERVAL_QUERY(pQuery) ||
5691
      (isFixedOutputQuery(pRuntimeEnv) && (!isPointInterpoQuery(pQuery)) && (!pRuntimeEnv->groupbyNormalCol))) {
H
hjxilinx 已提交
5692
    multiTableQueryProcess(pQInfo);
5693
  } else {
5694
    assert((pQuery->checkBuffer == 1 && pQuery->interval.interval == 0) || isPointInterpoQuery(pQuery) ||
H
Haojun Liao 已提交
5695
            isFirstLastRowQuery(pQuery) || pRuntimeEnv->groupbyNormalCol);
5696

5697
    sequentialTableProcess(pQInfo);
5698
  }
5699

H
hjxilinx 已提交
5700
  // record the total elapsed time
5701
  pQInfo->runtimeEnv.summary.elapsedTime += (taosGetTimestampUs() - st);
H
hjxilinx 已提交
5702 5703
}

5704
static int32_t getColumnIndexInSource(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
5705
  int32_t j = 0;
5706

5707
  if (TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) {
H
Haojun Liao 已提交
5708
    if (pExprMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
5709
      return TSDB_TBNAME_COLUMN_INDEX;
H
Haojun Liao 已提交
5710 5711
    }

5712 5713 5714 5715
    while(j < pQueryMsg->numOfTags) {
      if (pExprMsg->colInfo.colId == pTagCols[j].colId) {
        return j;
      }
5716

5717 5718
      j += 1;
    }
5719

Y
TD-1230  
yihaoDeng 已提交
5720
  } else if (TSDB_COL_IS_UD_COL(pExprMsg->colInfo.flag)) {  // user specified column data
H
Haojun Liao 已提交
5721
    return TSDB_UD_COLUMN_INDEX;
5722 5723 5724 5725 5726
  } else {
    while (j < pQueryMsg->numOfCols) {
      if (pExprMsg->colInfo.colId == pQueryMsg->colList[j].colId) {
        return j;
      }
5727

5728
      j += 1;
5729 5730
    }
  }
5731
  assert(0);
5732
  return -1;
5733 5734
}

5735 5736 5737
bool validateExprColumnInfo(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
  int32_t j = getColumnIndexInSource(pQueryMsg, pExprMsg, pTagCols);
  return j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags;
5738 5739
}

5740
static bool validateQueryMsg(SQueryTableMsg *pQueryMsg) {
5741 5742
  if (pQueryMsg->interval.interval < 0) {
    qError("qmsg:%p illegal value of interval time %" PRId64, pQueryMsg, pQueryMsg->interval.interval);
5743
    return false;
5744 5745
  }

H
hjxilinx 已提交
5746
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
5747
    qError("qmsg:%p illegal value of numOfTables %d", pQueryMsg, pQueryMsg->numOfTables);
5748
    return false;
5749 5750
  }

H
hjxilinx 已提交
5751
  if (pQueryMsg->numOfGroupCols < 0) {
S
slguan 已提交
5752
    qError("qmsg:%p illegal value of numOfGroupbyCols %d", pQueryMsg, pQueryMsg->numOfGroupCols);
5753
    return false;
5754 5755
  }

5756 5757
  if (pQueryMsg->numOfOutput > TSDB_MAX_COLUMNS || pQueryMsg->numOfOutput <= 0) {
    qError("qmsg:%p illegal value of output columns %d", pQueryMsg, pQueryMsg->numOfOutput);
5758
    return false;
5759 5760
  }

5761 5762 5763 5764 5765 5766 5767 5768 5769 5770
  return true;
}

static bool validateQuerySourceCols(SQueryTableMsg *pQueryMsg, SSqlFuncMsg** pExprMsg) {
  int32_t numOfTotal = pQueryMsg->numOfCols + pQueryMsg->numOfTags;
  if (pQueryMsg->numOfCols < 0 || pQueryMsg->numOfTags < 0 || numOfTotal > TSDB_MAX_COLUMNS) {
    qError("qmsg:%p illegal value of numOfCols %d numOfTags:%d", pQueryMsg, pQueryMsg->numOfCols, pQueryMsg->numOfTags);
    return false;
  } else if (numOfTotal == 0) {
    for(int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
H
Haojun Liao 已提交
5771 5772 5773 5774 5775
      SSqlFuncMsg* pFuncMsg = pExprMsg[i];

      if ((pFuncMsg->functionId == TSDB_FUNC_TAGPRJ) ||
          (pFuncMsg->functionId == TSDB_FUNC_TID_TAG && pFuncMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) ||
          (pFuncMsg->functionId == TSDB_FUNC_COUNT && pFuncMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX)) {
5776
        continue;
5777
      }
5778

5779
      return false;
5780 5781
    }
  }
5782

5783
  return true;
5784 5785
}

5786
static char *createTableIdList(SQueryTableMsg *pQueryMsg, char *pMsg, SArray **pTableIdList) {
H
hjxilinx 已提交
5787
  assert(pQueryMsg->numOfTables > 0);
5788

weixin_48148422's avatar
weixin_48148422 已提交
5789
  *pTableIdList = taosArrayInit(pQueryMsg->numOfTables, sizeof(STableIdInfo));
5790

weixin_48148422's avatar
weixin_48148422 已提交
5791 5792
  for (int32_t j = 0; j < pQueryMsg->numOfTables; ++j) {
    STableIdInfo* pTableIdInfo = (STableIdInfo *)pMsg;
5793

5794
    pTableIdInfo->tid = htonl(pTableIdInfo->tid);
H
hjxilinx 已提交
5795 5796
    pTableIdInfo->uid = htobe64(pTableIdInfo->uid);
    pTableIdInfo->key = htobe64(pTableIdInfo->key);
5797

H
hjxilinx 已提交
5798 5799 5800
    taosArrayPush(*pTableIdList, pTableIdInfo);
    pMsg += sizeof(STableIdInfo);
  }
5801

H
hjxilinx 已提交
5802 5803
  return pMsg;
}
5804

5805
/**
H
hjxilinx 已提交
5806
 * pQueryMsg->head has been converted before this function is called.
5807
 *
H
hjxilinx 已提交
5808
 * @param pQueryMsg
5809 5810 5811 5812
 * @param pTableIdList
 * @param pExpr
 * @return
 */
H
Haojun Liao 已提交
5813
static int32_t convertQueryMsg(SQueryTableMsg *pQueryMsg, SArray **pTableIdList, SSqlFuncMsg ***pExpr, SSqlFuncMsg ***pSecStageExpr,
weixin_48148422's avatar
weixin_48148422 已提交
5814
                               char **tagCond, char** tbnameCond, SColIndex **groupbyCols, SColumnInfo** tagCols) {
5815 5816
  int32_t code = TSDB_CODE_SUCCESS;

5817 5818 5819 5820
  pQueryMsg->numOfTables = htonl(pQueryMsg->numOfTables);

  pQueryMsg->window.skey = htobe64(pQueryMsg->window.skey);
  pQueryMsg->window.ekey = htobe64(pQueryMsg->window.ekey);
5821 5822 5823 5824 5825 5826
  pQueryMsg->interval.interval = htobe64(pQueryMsg->interval.interval);
  pQueryMsg->interval.sliding = htobe64(pQueryMsg->interval.sliding);
  pQueryMsg->interval.offset = htobe64(pQueryMsg->interval.offset);
  pQueryMsg->interval.intervalUnit = pQueryMsg->interval.intervalUnit;
  pQueryMsg->interval.slidingUnit = pQueryMsg->interval.slidingUnit;
  pQueryMsg->interval.offsetUnit = pQueryMsg->interval.offsetUnit;
5827 5828
  pQueryMsg->limit = htobe64(pQueryMsg->limit);
  pQueryMsg->offset = htobe64(pQueryMsg->offset);
H
hjxilinx 已提交
5829

5830 5831
  pQueryMsg->order = htons(pQueryMsg->order);
  pQueryMsg->orderColId = htons(pQueryMsg->orderColId);
H
Haojun Liao 已提交
5832
  pQueryMsg->queryType = htonl(pQueryMsg->queryType);
weixin_48148422's avatar
weixin_48148422 已提交
5833
  pQueryMsg->tagNameRelType = htons(pQueryMsg->tagNameRelType);
5834 5835

  pQueryMsg->numOfCols = htons(pQueryMsg->numOfCols);
5836
  pQueryMsg->numOfOutput = htons(pQueryMsg->numOfOutput);
H
hjxilinx 已提交
5837
  pQueryMsg->numOfGroupCols = htons(pQueryMsg->numOfGroupCols);
5838 5839 5840
  pQueryMsg->tagCondLen = htons(pQueryMsg->tagCondLen);
  pQueryMsg->tsOffset = htonl(pQueryMsg->tsOffset);
  pQueryMsg->tsLen = htonl(pQueryMsg->tsLen);
H
hjxilinx 已提交
5841
  pQueryMsg->tsNumOfBlocks = htonl(pQueryMsg->tsNumOfBlocks);
5842
  pQueryMsg->tsOrder = htonl(pQueryMsg->tsOrder);
5843
  pQueryMsg->numOfTags = htonl(pQueryMsg->numOfTags);
H
Haojun Liao 已提交
5844
  pQueryMsg->secondStageOutput = htonl(pQueryMsg->secondStageOutput);
5845

5846
  // query msg safety check
5847
  if (!validateQueryMsg(pQueryMsg)) {
5848 5849
    code = TSDB_CODE_QRY_INVALID_MSG;
    goto _cleanup;
5850 5851
  }

H
hjxilinx 已提交
5852 5853
  char *pMsg = (char *)(pQueryMsg->colList) + sizeof(SColumnInfo) * pQueryMsg->numOfCols;
  for (int32_t col = 0; col < pQueryMsg->numOfCols; ++col) {
5854 5855
    SColumnInfo *pColInfo = &pQueryMsg->colList[col];

H
hjxilinx 已提交
5856
    pColInfo->colId = htons(pColInfo->colId);
5857
    pColInfo->type = htons(pColInfo->type);
H
hjxilinx 已提交
5858 5859
    pColInfo->bytes = htons(pColInfo->bytes);
    pColInfo->numOfFilters = htons(pColInfo->numOfFilters);
5860

H
hjxilinx 已提交
5861
    assert(pColInfo->type >= TSDB_DATA_TYPE_BOOL && pColInfo->type <= TSDB_DATA_TYPE_NCHAR);
5862

H
hjxilinx 已提交
5863
    int32_t numOfFilters = pColInfo->numOfFilters;
5864
    if (numOfFilters > 0) {
H
hjxilinx 已提交
5865
      pColInfo->filters = calloc(numOfFilters, sizeof(SColumnFilterInfo));
H
Haojun Liao 已提交
5866 5867 5868 5869
      if (pColInfo->filters == NULL) {
        code = TSDB_CODE_QRY_OUT_OF_MEMORY;
        goto _cleanup;
      }
5870 5871 5872
    }

    for (int32_t f = 0; f < numOfFilters; ++f) {
5873
      SColumnFilterInfo *pFilterMsg = (SColumnFilterInfo *)pMsg;
5874

5875 5876
      SColumnFilterInfo *pColFilter = &pColInfo->filters[f];
      pColFilter->filterstr = htons(pFilterMsg->filterstr);
5877 5878 5879

      pMsg += sizeof(SColumnFilterInfo);

5880 5881
      if (pColFilter->filterstr) {
        pColFilter->len = htobe64(pFilterMsg->len);
5882

5883
        pColFilter->pz = (int64_t)calloc(1, (size_t)(pColFilter->len + 1 * TSDB_NCHAR_SIZE)); // note: null-terminator
H
Haojun Liao 已提交
5884 5885 5886 5887 5888
        if (pColFilter->pz == 0) {
          code = TSDB_CODE_QRY_OUT_OF_MEMORY;
          goto _cleanup;
        }

5889
        memcpy((void *)pColFilter->pz, pMsg, (size_t)pColFilter->len);
5890
        pMsg += (pColFilter->len + 1);
5891
      } else {
5892 5893
        pColFilter->lowerBndi = htobe64(pFilterMsg->lowerBndi);
        pColFilter->upperBndi = htobe64(pFilterMsg->upperBndi);
5894 5895
      }

5896 5897
      pColFilter->lowerRelOptr = htons(pFilterMsg->lowerRelOptr);
      pColFilter->upperRelOptr = htons(pFilterMsg->upperRelOptr);
5898 5899 5900
    }
  }

5901
  *pExpr = calloc(pQueryMsg->numOfOutput, POINTER_BYTES);
H
Haojun Liao 已提交
5902 5903 5904 5905 5906
  if (*pExpr == NULL) {
    code = TSDB_CODE_QRY_OUT_OF_MEMORY;
    goto _cleanup;
  }

5907
  SSqlFuncMsg *pExprMsg = (SSqlFuncMsg *)pMsg;
5908

5909
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5910
    (*pExpr)[i] = pExprMsg;
5911

5912
    pExprMsg->colInfo.colIndex = htons(pExprMsg->colInfo.colIndex);
5913
    pExprMsg->colInfo.colId = htons(pExprMsg->colInfo.colId);
H
Haojun Liao 已提交
5914 5915 5916 5917
    pExprMsg->colInfo.flag  = htons(pExprMsg->colInfo.flag);
    pExprMsg->functionId    = htons(pExprMsg->functionId);
    pExprMsg->numOfParams   = htons(pExprMsg->numOfParams);
    pExprMsg->resColId      = htons(pExprMsg->resColId);
5918

5919
    pMsg += sizeof(SSqlFuncMsg);
5920 5921

    for (int32_t j = 0; j < pExprMsg->numOfParams; ++j) {
5922
      pExprMsg->arg[j].argType = htons(pExprMsg->arg[j].argType);
5923 5924 5925 5926
      pExprMsg->arg[j].argBytes = htons(pExprMsg->arg[j].argBytes);

      if (pExprMsg->arg[j].argType == TSDB_DATA_TYPE_BINARY) {
        pExprMsg->arg[j].argValue.pz = pMsg;
5927
        pMsg += pExprMsg->arg[j].argBytes;  // one more for the string terminated char.
5928 5929 5930 5931 5932
      } else {
        pExprMsg->arg[j].argValue.i64 = htobe64(pExprMsg->arg[j].argValue.i64);
      }
    }

H
Haojun Liao 已提交
5933 5934
    int16_t functionId = pExprMsg->functionId;
    if (functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ || functionId == TSDB_FUNC_TAG_DUMMY) {
Y
TD-1230  
yihaoDeng 已提交
5935
      if (!TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) {  // ignore the column  index check for arithmetic expression.
5936 5937
        code = TSDB_CODE_QRY_INVALID_MSG;
        goto _cleanup;
5938 5939
      }
    } else {
5940
//      if (!validateExprColumnInfo(pQueryMsg, pExprMsg)) {
5941
//        return TSDB_CODE_QRY_INVALID_MSG;
5942
//      }
5943 5944
    }

5945
    pExprMsg = (SSqlFuncMsg *)pMsg;
5946
  }
5947

5948
  if (!validateQuerySourceCols(pQueryMsg, *pExpr)) {
5949
    code = TSDB_CODE_QRY_INVALID_MSG;
dengyihao's avatar
dengyihao 已提交
5950
    goto _cleanup;
5951
  }
5952

H
Haojun Liao 已提交
5953 5954 5955 5956 5957 5958 5959 5960 5961 5962 5963 5964 5965 5966 5967 5968 5969 5970 5971 5972 5973 5974 5975 5976 5977 5978 5979 5980 5981 5982 5983 5984 5985 5986 5987 5988 5989 5990 5991 5992 5993 5994 5995
  if (pQueryMsg->secondStageOutput) {
    pExprMsg = (SSqlFuncMsg *)pMsg;
    *pSecStageExpr = calloc(pQueryMsg->secondStageOutput, POINTER_BYTES);
    
    for (int32_t i = 0; i < pQueryMsg->secondStageOutput; ++i) {
      (*pSecStageExpr)[i] = pExprMsg;

      pExprMsg->colInfo.colIndex = htons(pExprMsg->colInfo.colIndex);
      pExprMsg->colInfo.colId = htons(pExprMsg->colInfo.colId);
      pExprMsg->colInfo.flag = htons(pExprMsg->colInfo.flag);
      pExprMsg->functionId = htons(pExprMsg->functionId);
      pExprMsg->numOfParams = htons(pExprMsg->numOfParams);

      pMsg += sizeof(SSqlFuncMsg);

      for (int32_t j = 0; j < pExprMsg->numOfParams; ++j) {
        pExprMsg->arg[j].argType = htons(pExprMsg->arg[j].argType);
        pExprMsg->arg[j].argBytes = htons(pExprMsg->arg[j].argBytes);

        if (pExprMsg->arg[j].argType == TSDB_DATA_TYPE_BINARY) {
          pExprMsg->arg[j].argValue.pz = pMsg;
          pMsg += pExprMsg->arg[j].argBytes;  // one more for the string terminated char.
        } else {
          pExprMsg->arg[j].argValue.i64 = htobe64(pExprMsg->arg[j].argValue.i64);
        }
      }

      int16_t functionId = pExprMsg->functionId;
      if (functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ || functionId == TSDB_FUNC_TAG_DUMMY) {
        if (!TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) {  // ignore the column  index check for arithmetic expression.
          code = TSDB_CODE_QRY_INVALID_MSG;
          goto _cleanup;
        }
      } else {
//      if (!validateExprColumnInfo(pQueryMsg, pExprMsg)) {
//        return TSDB_CODE_QRY_INVALID_MSG;
//      }
      }

      pExprMsg = (SSqlFuncMsg *)pMsg;
    }
  }

H
hjxilinx 已提交
5996
  pMsg = createTableIdList(pQueryMsg, pMsg, pTableIdList);
5997

H
hjxilinx 已提交
5998
  if (pQueryMsg->numOfGroupCols > 0) {  // group by tag columns
5999
    *groupbyCols = malloc(pQueryMsg->numOfGroupCols * sizeof(SColIndex));
6000 6001 6002 6003
    if (*groupbyCols == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }
6004 6005 6006

    for (int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
      (*groupbyCols)[i].colId = *(int16_t *)pMsg;
6007
      pMsg += sizeof((*groupbyCols)[i].colId);
6008 6009

      (*groupbyCols)[i].colIndex = *(int16_t *)pMsg;
6010 6011
      pMsg += sizeof((*groupbyCols)[i].colIndex);

6012
      (*groupbyCols)[i].flag = *(int16_t *)pMsg;
6013 6014 6015 6016 6017
      pMsg += sizeof((*groupbyCols)[i].flag);

      memcpy((*groupbyCols)[i].name, pMsg, tListLen(groupbyCols[i]->name));
      pMsg += tListLen((*groupbyCols)[i].name);
    }
6018

H
hjxilinx 已提交
6019 6020
    pQueryMsg->orderByIdx = htons(pQueryMsg->orderByIdx);
    pQueryMsg->orderType = htons(pQueryMsg->orderType);
6021 6022
  }

6023 6024
  pQueryMsg->fillType = htons(pQueryMsg->fillType);
  if (pQueryMsg->fillType != TSDB_FILL_NONE) {
6025
    pQueryMsg->fillVal = (uint64_t)(pMsg);
6026 6027

    int64_t *v = (int64_t *)pMsg;
6028
    for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
6029 6030
      v[i] = htobe64(v[i]);
    }
6031

6032
    pMsg += sizeof(int64_t) * pQueryMsg->numOfOutput;
6033
  }
6034

6035 6036
  if (pQueryMsg->numOfTags > 0) {
    (*tagCols) = calloc(1, sizeof(SColumnInfo) * pQueryMsg->numOfTags);
H
Haojun Liao 已提交
6037 6038 6039 6040 6041
    if (*tagCols == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }

6042 6043
    for (int32_t i = 0; i < pQueryMsg->numOfTags; ++i) {
      SColumnInfo* pTagCol = (SColumnInfo*) pMsg;
6044

6045 6046 6047 6048
      pTagCol->colId = htons(pTagCol->colId);
      pTagCol->bytes = htons(pTagCol->bytes);
      pTagCol->type  = htons(pTagCol->type);
      pTagCol->numOfFilters = 0;
6049

6050
      (*tagCols)[i] = *pTagCol;
6051
      pMsg += sizeof(SColumnInfo);
6052
    }
H
hjxilinx 已提交
6053
  }
6054

6055 6056 6057
  // the tag query condition expression string is located at the end of query msg
  if (pQueryMsg->tagCondLen > 0) {
    *tagCond = calloc(1, pQueryMsg->tagCondLen);
H
Haojun Liao 已提交
6058 6059 6060 6061 6062 6063

    if (*tagCond == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;

    }
6064 6065 6066
    memcpy(*tagCond, pMsg, pQueryMsg->tagCondLen);
    pMsg += pQueryMsg->tagCondLen;
  }
6067

weixin_48148422's avatar
weixin_48148422 已提交
6068
  if (*pMsg != 0) {
6069
    size_t len = strlen(pMsg) + 1;
6070

6071
    *tbnameCond = malloc(len);
6072 6073 6074 6075 6076
    if (*tbnameCond == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }

weixin_48148422's avatar
weixin_48148422 已提交
6077
    strcpy(*tbnameCond, pMsg);
6078
    pMsg += len;
weixin_48148422's avatar
weixin_48148422 已提交
6079
  }
6080

6081
  qDebug("qmsg:%p query %d tables, type:%d, qrange:%" PRId64 "-%" PRId64 ", numOfGroupbyTagCols:%d, order:%d, "
H
Haojun Liao 已提交
6082 6083
         "outputCols:%d, numOfCols:%d, interval:%" PRId64 ", fillType:%d, comptsLen:%d, compNumOfBlocks:%d, limit:%" PRId64 ", offset:%" PRId64,
         pQueryMsg, pQueryMsg->numOfTables, pQueryMsg->queryType, pQueryMsg->window.skey, pQueryMsg->window.ekey, pQueryMsg->numOfGroupCols,
6084
         pQueryMsg->order, pQueryMsg->numOfOutput, pQueryMsg->numOfCols, pQueryMsg->interval.interval,
H
Haojun Liao 已提交
6085
         pQueryMsg->fillType, pQueryMsg->tsLen, pQueryMsg->tsNumOfBlocks, pQueryMsg->limit, pQueryMsg->offset);
6086 6087

  return TSDB_CODE_SUCCESS;
dengyihao's avatar
dengyihao 已提交
6088 6089

_cleanup:
S
TD-1848  
Shengliang Guan 已提交
6090
  tfree(*pExpr);
dengyihao's avatar
dengyihao 已提交
6091 6092
  taosArrayDestroy(*pTableIdList);
  *pTableIdList = NULL;
S
TD-1848  
Shengliang Guan 已提交
6093 6094 6095 6096
  tfree(*tbnameCond);
  tfree(*groupbyCols);
  tfree(*tagCols);
  tfree(*tagCond);
6097 6098

  return code;
6099 6100
}

H
Haojun Liao 已提交
6101 6102
static int32_t buildArithmeticExprFromMsg(SExprInfo *pArithExprInfo, SQueryTableMsg *pQueryMsg) {
  qDebug("qmsg:%p create arithmetic expr from binary", pQueryMsg);
weixin_48148422's avatar
weixin_48148422 已提交
6103 6104

  tExprNode* pExprNode = NULL;
dengyihao's avatar
dengyihao 已提交
6105
  TRY(TSDB_MAX_TAG_CONDITIONS) {
weixin_48148422's avatar
weixin_48148422 已提交
6106 6107 6108
    pExprNode = exprTreeFromBinary(pArithExprInfo->base.arg[0].argValue.pz, pArithExprInfo->base.arg[0].argBytes);
  } CATCH( code ) {
    CLEANUP_EXECUTE();
dengyihao's avatar
TD-816  
dengyihao 已提交
6109
    qError("qmsg:%p failed to create arithmetic expression string from:%s, reason: %s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz, tstrerror(code));
weixin_48148422's avatar
weixin_48148422 已提交
6110 6111 6112
    return code;
  } END_TRY

H
hjxilinx 已提交
6113
  if (pExprNode == NULL) {
6114
    qError("qmsg:%p failed to create arithmetic expression string from:%s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz);
6115
    return TSDB_CODE_QRY_APP_ERROR;
6116
  }
6117

6118
  pArithExprInfo->pExpr = pExprNode;
6119 6120 6121
  return TSDB_CODE_SUCCESS;
}

H
Haojun Liao 已提交
6122
static int32_t createQueryFuncExprFromMsg(SQueryTableMsg *pQueryMsg, int32_t numOfOutput, SExprInfo **pExprInfo, SSqlFuncMsg **pExprMsg,
6123 6124
    SColumnInfo* pTagCols) {
  *pExprInfo = NULL;
H
hjxilinx 已提交
6125
  int32_t code = TSDB_CODE_SUCCESS;
6126

H
Haojun Liao 已提交
6127
  SExprInfo *pExprs = (SExprInfo *)calloc(pQueryMsg->numOfOutput, sizeof(SExprInfo));
6128
  if (pExprs == NULL) {
6129
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
6130 6131 6132 6133 6134
  }

  bool    isSuperTable = QUERY_IS_STABLE_QUERY(pQueryMsg->queryType);
  int16_t tagLen = 0;

H
Haojun Liao 已提交
6135
  for (int32_t i = 0; i < numOfOutput; ++i) {
6136
    pExprs[i].base = *pExprMsg[i];
6137
    pExprs[i].bytes = 0;
6138 6139 6140 6141

    int16_t type = 0;
    int16_t bytes = 0;

6142
    // parse the arithmetic expression
6143
    if (pExprs[i].base.functionId == TSDB_FUNC_ARITHM) {
H
Haojun Liao 已提交
6144
      code = buildArithmeticExprFromMsg(&pExprs[i], pQueryMsg);
6145

6146
      if (code != TSDB_CODE_SUCCESS) {
S
TD-1848  
Shengliang Guan 已提交
6147
        tfree(pExprs);
6148
        return code;
6149 6150
      }

6151
      type  = TSDB_DATA_TYPE_DOUBLE;
6152
      bytes = tDataTypeDesc[type].nSize;
H
Haojun Liao 已提交
6153
    } else if (pExprs[i].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX && pExprs[i].base.functionId == TSDB_FUNC_TAGPRJ) {  // parse the normal column
H
Haojun Liao 已提交
6154
      SSchema s = tGetTableNameColumnSchema();
H
Haojun Liao 已提交
6155
      type = s.type;
H
Haojun Liao 已提交
6156
      bytes = s.bytes;
6157 6158
    } else if (pExprs[i].base.colInfo.colId <= TSDB_UD_COLUMN_INDEX) {
      // it is a user-defined constant value column
H
Haojun Liao 已提交
6159 6160
      assert(pExprs[i].base.functionId == TSDB_FUNC_PRJ);

6161 6162
      type = pExprs[i].base.arg[1].argType;
      bytes = pExprs[i].base.arg[1].argBytes;
H
Haojun Liao 已提交
6163 6164 6165 6166 6167

      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        bytes += VARSTR_HEADER_SIZE;
      }
    } else {
6168
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
dengyihao's avatar
dengyihao 已提交
6169
      assert(j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags);
H
Haojun Liao 已提交
6170

dengyihao's avatar
dengyihao 已提交
6171
      if (pExprs[i].base.colInfo.colId != TSDB_TBNAME_COLUMN_INDEX && j >= 0) {
H
Haojun Liao 已提交
6172 6173 6174 6175
        SColumnInfo* pCol = (TSDB_COL_IS_TAG(pExprs[i].base.colInfo.flag))? &pTagCols[j]:&pQueryMsg->colList[j];
        type = pCol->type;
        bytes = pCol->bytes;
      } else {
H
Haojun Liao 已提交
6176
        SSchema s = tGetTableNameColumnSchema();
H
hjxilinx 已提交
6177

H
Haojun Liao 已提交
6178 6179 6180
        type  = s.type;
        bytes = s.bytes;
      }
6181 6182
    }

S
TD-1057  
Shengliang Guan 已提交
6183
    int32_t param = (int32_t)pExprs[i].base.arg[0].argValue.i64;
6184
    if (getResultDataInfo(type, bytes, pExprs[i].base.functionId, param, &pExprs[i].type, &pExprs[i].bytes,
6185
                          &pExprs[i].interBytes, 0, isSuperTable) != TSDB_CODE_SUCCESS) {
S
TD-1848  
Shengliang Guan 已提交
6186
      tfree(pExprs);
6187
      return TSDB_CODE_QRY_INVALID_MSG;
6188 6189
    }

6190
    if (pExprs[i].base.functionId == TSDB_FUNC_TAG_DUMMY || pExprs[i].base.functionId == TSDB_FUNC_TS_DUMMY) {
6191
      tagLen += pExprs[i].bytes;
6192
    }
6193
    assert(isValidDataType(pExprs[i].type));
6194 6195 6196
  }

  // TODO refactor
H
Haojun Liao 已提交
6197
  for (int32_t i = 0; i < numOfOutput; ++i) {
6198 6199
    pExprs[i].base = *pExprMsg[i];
    int16_t functId = pExprs[i].base.functionId;
6200

6201
    if (functId == TSDB_FUNC_TOP || functId == TSDB_FUNC_BOTTOM) {
6202
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
6203 6204 6205 6206 6207 6208 6209 6210 6211
      if (j < 0 || j >= pQueryMsg->numOfCols) {
        assert(0);
      } else {
        SColumnInfo *pCol = &pQueryMsg->colList[j];
        int32_t ret =
            getResultDataInfo(pCol->type, pCol->bytes, functId, (int32_t)pExprs[i].base.arg[0].argValue.i64,
                              &pExprs[i].type, &pExprs[i].bytes, &pExprs[i].interBytes, tagLen, isSuperTable);
        assert(ret == TSDB_CODE_SUCCESS);
      }
6212 6213 6214
    }
  }

6215
  *pExprInfo = pExprs;
6216 6217 6218
  return TSDB_CODE_SUCCESS;
}

6219
static SSqlGroupbyExpr *createGroupbyExprFromMsg(SQueryTableMsg *pQueryMsg, SColIndex *pColIndex, int32_t *code) {
6220 6221 6222 6223 6224
  if (pQueryMsg->numOfGroupCols == 0) {
    return NULL;
  }

  // using group by tag columns
6225
  SSqlGroupbyExpr *pGroupbyExpr = (SSqlGroupbyExpr *)calloc(1, sizeof(SSqlGroupbyExpr));
6226
  if (pGroupbyExpr == NULL) {
6227
    *code = TSDB_CODE_QRY_OUT_OF_MEMORY;
6228 6229 6230 6231 6232 6233 6234
    return NULL;
  }

  pGroupbyExpr->numOfGroupCols = pQueryMsg->numOfGroupCols;
  pGroupbyExpr->orderType = pQueryMsg->orderType;
  pGroupbyExpr->orderIndex = pQueryMsg->orderByIdx;

6235 6236 6237 6238
  pGroupbyExpr->columnInfo = taosArrayInit(pQueryMsg->numOfGroupCols, sizeof(SColIndex));
  for(int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
    taosArrayPush(pGroupbyExpr->columnInfo, &pColIndex[i]);
  }
6239

6240 6241 6242
  return pGroupbyExpr;
}

6243
static int32_t createFilterInfo(void *pQInfo, SQuery *pQuery) {
6244
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
6245
    if (pQuery->colList[i].numOfFilters > 0) {
6246 6247 6248 6249 6250 6251 6252 6253 6254
      pQuery->numOfFilterCols++;
    }
  }

  if (pQuery->numOfFilterCols == 0) {
    return TSDB_CODE_SUCCESS;
  }

  pQuery->pFilterInfo = calloc(1, sizeof(SSingleColumnFilterInfo) * pQuery->numOfFilterCols);
H
Haojun Liao 已提交
6255 6256 6257
  if (pQuery->pFilterInfo == NULL) {
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
  }
6258 6259

  for (int32_t i = 0, j = 0; i < pQuery->numOfCols; ++i) {
6260
    if (pQuery->colList[i].numOfFilters > 0) {
6261 6262
      SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[j];

B
Bomin Zhang 已提交
6263
      memcpy(&pFilterInfo->info, &pQuery->colList[i], sizeof(SColumnInfo));
6264
      pFilterInfo->info = pQuery->colList[i];
6265

6266
      pFilterInfo->numOfFilters = pQuery->colList[i].numOfFilters;
6267
      pFilterInfo->pFilters = calloc(pFilterInfo->numOfFilters, sizeof(SColumnFilterElem));
H
Haojun Liao 已提交
6268 6269 6270
      if (pFilterInfo->pFilters == NULL) {
        return TSDB_CODE_QRY_OUT_OF_MEMORY;
      }
6271 6272 6273

      for (int32_t f = 0; f < pFilterInfo->numOfFilters; ++f) {
        SColumnFilterElem *pSingleColFilter = &pFilterInfo->pFilters[f];
6274
        pSingleColFilter->filterInfo = pQuery->colList[i].filters[f];
6275 6276 6277 6278 6279

        int32_t lower = pSingleColFilter->filterInfo.lowerRelOptr;
        int32_t upper = pSingleColFilter->filterInfo.upperRelOptr;

        if (lower == TSDB_RELATION_INVALID && upper == TSDB_RELATION_INVALID) {
S
slguan 已提交
6280
          qError("QInfo:%p invalid filter info", pQInfo);
6281
          return TSDB_CODE_QRY_INVALID_MSG;
6282 6283
        }

6284 6285
        int16_t type  = pQuery->colList[i].type;
        int16_t bytes = pQuery->colList[i].bytes;
6286

6287 6288 6289
        // todo refactor
        __filter_func_t *rangeFilterArray = getRangeFilterFuncArray(type);
        __filter_func_t *filterArray = getValueFilterFuncArray(type);
6290 6291

        if (rangeFilterArray == NULL && filterArray == NULL) {
S
slguan 已提交
6292
          qError("QInfo:%p failed to get filter function, invalid data type:%d", pQInfo, type);
6293
          return TSDB_CODE_QRY_INVALID_MSG;
6294 6295
        }

6296
        if ((lower == TSDB_RELATION_GREATER_EQUAL || lower == TSDB_RELATION_GREATER) &&
6297
            (upper == TSDB_RELATION_LESS_EQUAL || upper == TSDB_RELATION_LESS)) {
dengyihao's avatar
dengyihao 已提交
6298
          assert(rangeFilterArray != NULL);
6299
          if (lower == TSDB_RELATION_GREATER_EQUAL) {
6300 6301 6302 6303 6304 6305 6306 6307 6308 6309 6310 6311 6312
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[4];
            } else {
              pSingleColFilter->fp = rangeFilterArray[2];
            }
          } else {
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[3];
            } else {
              pSingleColFilter->fp = rangeFilterArray[1];
            }
          }
        } else {  // set callback filter function
dengyihao's avatar
dengyihao 已提交
6313
          assert(filterArray != NULL);
6314 6315 6316 6317
          if (lower != TSDB_RELATION_INVALID) {
            pSingleColFilter->fp = filterArray[lower];

            if (upper != TSDB_RELATION_INVALID) {
dengyihao's avatar
dengyihao 已提交
6318
              qError("pQInfo:%p failed to get filter function, invalid filter condition: %d", pQInfo, type);
6319
              return TSDB_CODE_QRY_INVALID_MSG;
6320 6321 6322 6323 6324 6325 6326 6327 6328 6329 6330 6331 6332 6333 6334 6335
            }
          } else {
            pSingleColFilter->fp = filterArray[upper];
          }
        }
        assert(pSingleColFilter->fp != NULL);
        pSingleColFilter->bytes = bytes;
      }

      j++;
    }
  }

  return TSDB_CODE_SUCCESS;
}

6336
static void doUpdateExprColumnIndex(SQuery *pQuery) {
H
Haojun Liao 已提交
6337
  assert(pQuery->pExpr1 != NULL && pQuery != NULL);
6338

6339
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
Haojun Liao 已提交
6340
    SSqlFuncMsg *pSqlExprMsg = &pQuery->pExpr1[k].base;
6341
    if (pSqlExprMsg->functionId == TSDB_FUNC_ARITHM) {
6342 6343
      continue;
    }
6344

6345
    // todo opt performance
H
Haojun Liao 已提交
6346
    SColIndex *pColIndex = &pSqlExprMsg->colInfo;
H
Haojun Liao 已提交
6347
    if (TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
6348 6349
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfCols; ++f) {
H
Haojun Liao 已提交
6350 6351
        if (pColIndex->colId == pQuery->colList[f].colId) {
          pColIndex->colIndex = f;
6352 6353 6354
          break;
        }
      }
H
Haojun Liao 已提交
6355 6356

      assert(f < pQuery->numOfCols);
6357 6358
    } else if (pColIndex->colId <= TSDB_UD_COLUMN_INDEX) {
      // do nothing for user-defined constant value result columns
6359
    } else {
6360 6361
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfTags; ++f) {
H
Haojun Liao 已提交
6362 6363
        if (pColIndex->colId == pQuery->tagColList[f].colId) {
          pColIndex->colIndex = f;
6364 6365
          break;
        }
6366
      }
6367

6368
      assert(f < pQuery->numOfTags || pColIndex->colId == TSDB_TBNAME_COLUMN_INDEX);
6369 6370 6371 6372
    }
  }
}

dengyihao's avatar
dengyihao 已提交
6373 6374
static void freeQInfo(SQInfo *pQInfo);

H
Haojun Liao 已提交
6375 6376 6377
static void calResultBufSize(SQuery* pQuery) {
  const int32_t RESULT_MSG_MIN_SIZE  = 1024 * (1024 + 512);  // bytes
  const int32_t RESULT_MSG_MIN_ROWS  = 8192;
S
TD-1057  
Shengliang Guan 已提交
6378
  const float RESULT_THRESHOLD_RATIO = 0.85f;
H
Haojun Liao 已提交
6379

6380 6381 6382 6383 6384
  if (isProjQuery(pQuery)) {
    int32_t numOfRes = RESULT_MSG_MIN_SIZE / pQuery->rowSize;
    if (numOfRes < RESULT_MSG_MIN_ROWS) {
      numOfRes = RESULT_MSG_MIN_ROWS;
    }
H
Haojun Liao 已提交
6385

6386
    pQuery->rec.capacity  = numOfRes;
S
TD-1057  
Shengliang Guan 已提交
6387
    pQuery->rec.threshold = (int32_t)(numOfRes * RESULT_THRESHOLD_RATIO);
6388 6389
  } else {  // in case of non-prj query, a smaller output buffer will be used.
    pQuery->rec.capacity = 4096;
S
TD-1057  
Shengliang Guan 已提交
6390
    pQuery->rec.threshold = (int32_t)(pQuery->rec.capacity * RESULT_THRESHOLD_RATIO);
6391
  }
H
Haojun Liao 已提交
6392 6393
}

6394
static SQInfo *createQInfoImpl(SQueryTableMsg *pQueryMsg, SSqlGroupbyExpr *pGroupbyExpr, SExprInfo *pExprs,
H
Haojun Liao 已提交
6395
                               SExprInfo *pSecExprs, STableGroupInfo *pTableGroupInfo, SColumnInfo* pTagCols, bool stableQuery) {
B
Bomin Zhang 已提交
6396 6397 6398
  int16_t numOfCols = pQueryMsg->numOfCols;
  int16_t numOfOutput = pQueryMsg->numOfOutput;

6399 6400
  SQInfo *pQInfo = (SQInfo *)calloc(1, sizeof(SQInfo));
  if (pQInfo == NULL) {
B
Bomin Zhang 已提交
6401
    goto _cleanup_qinfo;
6402
  }
6403

B
Bomin Zhang 已提交
6404 6405 6406
  // to make sure third party won't overwrite this structure
  pQInfo->signature = pQInfo;
  pQInfo->tableGroupInfo = *pTableGroupInfo;
6407 6408

  SQuery *pQuery = calloc(1, sizeof(SQuery));
B
Bomin Zhang 已提交
6409 6410 6411
  if (pQuery == NULL) {
    goto _cleanup_query;
  }
H
Haojun Liao 已提交
6412

6413 6414
  pQInfo->runtimeEnv.pQuery = pQuery;

6415
  pQuery->numOfCols       = numOfCols;
H
hjxilinx 已提交
6416
  pQuery->numOfOutput     = numOfOutput;
6417 6418 6419
  pQuery->limit.limit     = pQueryMsg->limit;
  pQuery->limit.offset    = pQueryMsg->offset;
  pQuery->order.order     = pQueryMsg->order;
6420
  pQuery->order.orderColId = pQueryMsg->orderColId;
H
Haojun Liao 已提交
6421
  pQuery->pExpr1          = pExprs;
H
Haojun Liao 已提交
6422 6423
  pQuery->pExpr2          = pSecExprs;
  pQuery->numOfExpr2      = pQueryMsg->secondStageOutput;
6424
  pQuery->pGroupbyExpr    = pGroupbyExpr;
6425
  memcpy(&pQuery->interval, &pQueryMsg->interval, sizeof(pQuery->interval));
6426
  pQuery->fillType        = pQueryMsg->fillType;
6427
  pQuery->numOfTags       = pQueryMsg->numOfTags;
B
Bomin Zhang 已提交
6428
  pQuery->tagColList      = pTagCols;
H
Haojun Liao 已提交
6429

6430
  pQuery->colList = calloc(numOfCols, sizeof(SSingleColumnFilterInfo));
6431
  if (pQuery->colList == NULL) {
6432
    goto _cleanup;
6433
  }
6434

H
hjxilinx 已提交
6435
  for (int16_t i = 0; i < numOfCols; ++i) {
6436
    pQuery->colList[i] = pQueryMsg->colList[i];
6437
    pQuery->colList[i].filters = tscFilterInfoClone(pQueryMsg->colList[i].filters, pQuery->colList[i].numOfFilters);
H
hjxilinx 已提交
6438
  }
6439

6440
  // calculate the result row size
6441 6442 6443
  for (int16_t col = 0; col < numOfOutput; ++col) {
    assert(pExprs[col].bytes > 0);
    pQuery->rowSize += pExprs[col].bytes;
6444
  }
6445

6446
  doUpdateExprColumnIndex(pQuery);
6447

6448
  int32_t ret = createFilterInfo(pQInfo, pQuery);
6449
  if (ret != TSDB_CODE_SUCCESS) {
6450
    goto _cleanup;
6451 6452 6453
  }

  // prepare the result buffer
6454
  pQuery->sdata = (tFilePage **)calloc(pQuery->numOfOutput, POINTER_BYTES);
6455
  if (pQuery->sdata == NULL) {
6456
    goto _cleanup;
6457 6458
  }

H
Haojun Liao 已提交
6459
  calResultBufSize(pQuery);
6460

6461
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
6462
    // allocate additional memory for interResults that are usually larger then final results
H
Haojun Liao 已提交
6463
    // TODO refactor
H
Haojun Liao 已提交
6464 6465 6466 6467 6468 6469 6470 6471
    int16_t bytes = 0;
    if (pQuery->pExpr2 == NULL || col > pQuery->numOfExpr2) {
      bytes = pExprs[col].bytes;
    } else {
      bytes = MAX(pQuery->pExpr2[col].bytes, pExprs[col].bytes);
    }

    size_t size = (size_t)((pQuery->rec.capacity + 1) * bytes + pExprs[col].interBytes + sizeof(tFilePage));
6472
    pQuery->sdata[col] = (tFilePage *)calloc(1, size);
6473
    if (pQuery->sdata[col] == NULL) {
6474
      goto _cleanup;
6475 6476 6477
    }
  }

6478
  if (pQuery->fillType != TSDB_FILL_NONE) {
6479 6480
    pQuery->fillVal = malloc(sizeof(int64_t) * pQuery->numOfOutput);
    if (pQuery->fillVal == NULL) {
6481
      goto _cleanup;
6482 6483 6484
    }

    // the first column is the timestamp
6485
    memcpy(pQuery->fillVal, (char *)pQueryMsg->fillVal, pQuery->numOfOutput * sizeof(int64_t));
6486 6487
  }

dengyihao's avatar
dengyihao 已提交
6488 6489 6490 6491 6492 6493
  size_t numOfGroups = 0;
  if (pTableGroupInfo->pGroupList != NULL) {
    numOfGroups = taosArrayGetSize(pTableGroupInfo->pGroupList);

    pQInfo->tableqinfoGroupInfo.pGroupList = taosArrayInit(numOfGroups, POINTER_BYTES);
    pQInfo->tableqinfoGroupInfo.numOfTables = pTableGroupInfo->numOfTables;
H
Haojun Liao 已提交
6494
    pQInfo->tableqinfoGroupInfo.map = taosHashInit(pTableGroupInfo->numOfTables,
H
Haojun Liao 已提交
6495
                                                   taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_NO_LOCK);
H
Haojun Liao 已提交
6496
  }
6497

weixin_48148422's avatar
weixin_48148422 已提交
6498
  int tableIndex = 0;
6499

H
Haojun Liao 已提交
6500
  pQInfo->runtimeEnv.interBufSize = getOutputInterResultBufSize(pQuery);
H
Haojun Liao 已提交
6501 6502
  pQInfo->runtimeEnv.summary.tableInfoSize += (pTableGroupInfo->numOfTables * sizeof(STableQueryInfo));

H
Haojun Liao 已提交
6503
  pQInfo->runtimeEnv.pResultRowHashTable = taosHashInit(pTableGroupInfo->numOfTables, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_NO_LOCK);
H
Haojun Liao 已提交
6504
  pQInfo->runtimeEnv.keyBuf = malloc(TSDB_MAX_BYTES_PER_ROW);
H
Haojun Liao 已提交
6505
  pQInfo->runtimeEnv.pool = initResultRowPool(getWindowResultSize(&pQInfo->runtimeEnv));
H
Haojun Liao 已提交
6506

H
Haojun Liao 已提交
6507
  pQInfo->pBuf = calloc(pTableGroupInfo->numOfTables, sizeof(STableQueryInfo));
H
Haojun Liao 已提交
6508 6509 6510 6511
  if (pQInfo->pBuf == NULL) {
    goto _cleanup;
  }

H
Haojun Liao 已提交
6512
  // NOTE: pTableCheckInfo need to update the query time range and the lastKey info
6513 6514
  pQInfo->arrTableIdInfo = taosArrayInit(tableIndex, sizeof(STableIdInfo));
  pQInfo->dataReady = QUERY_RESULT_NOT_READY;
H
Haojun Liao 已提交
6515
  pQInfo->rspContext = NULL;
6516
  pthread_mutex_init(&pQInfo->lock, NULL);
H
Haojun Liao 已提交
6517
  tsem_init(&pQInfo->ready, 0, 0);
6518 6519 6520 6521 6522 6523

  pQuery->pos = -1;
  pQuery->window = pQueryMsg->window;
  changeExecuteScanOrder(pQInfo, pQueryMsg, stableQuery);

  STimeWindow window = pQuery->window;
H
Haojun Liao 已提交
6524

H
Haojun Liao 已提交
6525 6526
  int32_t index = 0;

H
hjxilinx 已提交
6527
  for(int32_t i = 0; i < numOfGroups; ++i) {
6528
    SArray* pa = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, i);
6529

H
Haojun Liao 已提交
6530
    size_t s = taosArrayGetSize(pa);
6531
    SArray* p1 = taosArrayInit(s, POINTER_BYTES);
B
Bomin Zhang 已提交
6532 6533 6534
    if (p1 == NULL) {
      goto _cleanup;
    }
H
Haojun Liao 已提交
6535

Y
yihaoDeng 已提交
6536
    taosArrayPush(pQInfo->tableqinfoGroupInfo.pGroupList, &p1);
6537

H
hjxilinx 已提交
6538
    for(int32_t j = 0; j < s; ++j) {
H
Haojun Liao 已提交
6539
      STableKeyInfo* info = taosArrayGet(pa, j);
6540

H
Haojun Liao 已提交
6541
      void* buf = (char*) pQInfo->pBuf + index * sizeof(STableQueryInfo);
H
Haojun Liao 已提交
6542

6543
      window.skey = info->lastKey;
H
Haojun Liao 已提交
6544
      STableQueryInfo* item = createTableQueryInfo(&pQInfo->runtimeEnv, info->pTable, window, buf);
B
Bomin Zhang 已提交
6545 6546 6547
      if (item == NULL) {
        goto _cleanup;
      }
H
Haojun Liao 已提交
6548

6549
      item->groupIndex = i;
H
hjxilinx 已提交
6550
      taosArrayPush(p1, &item);
6551 6552

      STableId* id = TSDB_TABLEID(info->pTable);
H
Haojun Liao 已提交
6553 6554
      taosHashPut(pQInfo->tableqinfoGroupInfo.map, &id->tid, sizeof(id->tid), &item, POINTER_BYTES);
      index += 1;
H
hjxilinx 已提交
6555 6556
    }
  }
6557

6558
  colIdCheck(pQuery);
6559

6560
  qDebug("qmsg:%p QInfo:%p created", pQueryMsg, pQInfo);
6561 6562
  return pQInfo;

B
Bomin Zhang 已提交
6563
_cleanup_qinfo:
H
Haojun Liao 已提交
6564
  tsdbDestroyTableGroup(pTableGroupInfo);
B
Bomin Zhang 已提交
6565 6566

_cleanup_query:
6567 6568 6569 6570
  if (pGroupbyExpr != NULL) {
    taosArrayDestroy(pGroupbyExpr->columnInfo);
    free(pGroupbyExpr);
  }
H
Haojun Liao 已提交
6571

S
TD-1848  
Shengliang Guan 已提交
6572
  tfree(pTagCols);
B
Bomin Zhang 已提交
6573 6574 6575 6576 6577 6578
  for (int32_t i = 0; i < numOfOutput; ++i) {
    SExprInfo* pExprInfo = &pExprs[i];
    if (pExprInfo->pExpr != NULL) {
      tExprTreeDestroy(&pExprInfo->pExpr, NULL);
    }
  }
H
Haojun Liao 已提交
6579

S
TD-1848  
Shengliang Guan 已提交
6580
  tfree(pExprs);
B
Bomin Zhang 已提交
6581

6582
_cleanup:
dengyihao's avatar
dengyihao 已提交
6583
  freeQInfo(pQInfo);
6584 6585 6586
  return NULL;
}

H
hjxilinx 已提交
6587
static bool isValidQInfo(void *param) {
H
hjxilinx 已提交
6588 6589 6590 6591
  SQInfo *pQInfo = (SQInfo *)param;
  if (pQInfo == NULL) {
    return false;
  }
6592

H
hjxilinx 已提交
6593 6594 6595 6596
  /*
   * pQInfo->signature may be changed by another thread, so we assign value of signature
   * into local variable, then compare by using local variable
   */
6597
  uint64_t sig = (uint64_t)pQInfo->signature;
H
hjxilinx 已提交
6598 6599 6600
  return (sig == (uint64_t)pQInfo);
}

6601
static int32_t initQInfo(SQueryTableMsg *pQueryMsg, void *tsdb, int32_t vgId, SQInfo *pQInfo, bool isSTable) {
H
hjxilinx 已提交
6602
  int32_t code = TSDB_CODE_SUCCESS;
6603
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
6604

H
hjxilinx 已提交
6605
  STSBuf *pTSBuf = NULL;
6606
  if (pQueryMsg->tsLen > 0) { // open new file to save the result
H
Haojun Liao 已提交
6607
    char *tsBlock = (char *) pQueryMsg + pQueryMsg->tsOffset;
H
Haojun Liao 已提交
6608
    pTSBuf = tsBufCreateFromCompBlocks(tsBlock, pQueryMsg->tsNumOfBlocks, pQueryMsg->tsLen, pQueryMsg->tsOrder, vgId);
6609

H
hjxilinx 已提交
6610
    tsBufResetPos(pTSBuf);
dengyihao's avatar
dengyihao 已提交
6611
    bool ret = tsBufNextPos(pTSBuf);
6612

dengyihao's avatar
dengyihao 已提交
6613
    UNUSED(ret);
H
hjxilinx 已提交
6614
  }
Y
TD-1665  
yihaoDeng 已提交
6615 6616
  
  pQuery->precision = tsdbGetCfg(tsdb)->precision;
6617

6618 6619
  if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey > pQuery->window.ekey)) ||
      (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey > pQuery->window.skey))) {
6620
    qDebug("QInfo:%p no result in time range %" PRId64 "-%" PRId64 ", order %d", pQInfo, pQuery->window.skey,
6621
           pQuery->window.ekey, pQuery->order.order);
6622
    setQueryStatus(pQuery, QUERY_COMPLETED);
B
Bomin Zhang 已提交
6623
    pQInfo->tableqinfoGroupInfo.numOfTables = 0;
6624 6625
    return TSDB_CODE_SUCCESS;
  }
6626

6627
  if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
6628
    qDebug("QInfo:%p no table qualified for tag filter, abort query", pQInfo);
6629 6630 6631
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return TSDB_CODE_SUCCESS;
  }
H
hjxilinx 已提交
6632 6633

  // filter the qualified
6634
  if ((code = doInitQInfo(pQInfo, pTSBuf, tsdb, vgId, isSTable)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6635 6636
    goto _error;
  }
6637

H
hjxilinx 已提交
6638 6639 6640 6641
  return code;

_error:
  // table query ref will be decrease during error handling
6642
  freeQInfo(pQInfo);
H
hjxilinx 已提交
6643 6644 6645
  return code;
}

B
Bomin Zhang 已提交
6646
static void freeColumnFilterInfo(SColumnFilterInfo* pFilter, int32_t numOfFilters) {
H
Haojun Liao 已提交
6647
    if (pFilter == NULL || numOfFilters == 0) {
B
Bomin Zhang 已提交
6648 6649
      return;
    }
H
Haojun Liao 已提交
6650

B
Bomin Zhang 已提交
6651 6652 6653 6654 6655
    for (int32_t i = 0; i < numOfFilters; i++) {
      if (pFilter[i].filterstr) {
        free((void*)(pFilter[i].pz));
      }
    }
H
Haojun Liao 已提交
6656

B
Bomin Zhang 已提交
6657 6658 6659
    free(pFilter);
}

H
Haojun Liao 已提交
6660 6661
static void doDestroyTableQueryInfo(STableGroupInfo* pTableqinfoGroupInfo) {
  if (pTableqinfoGroupInfo->pGroupList != NULL) {
H
Haojun Liao 已提交
6662
    int32_t numOfGroups = (int32_t) taosArrayGetSize(pTableqinfoGroupInfo->pGroupList);
H
Haojun Liao 已提交
6663 6664 6665 6666 6667 6668 6669 6670 6671 6672 6673 6674 6675 6676 6677 6678 6679 6680 6681 6682 6683
    for (int32_t i = 0; i < numOfGroups; ++i) {
      SArray *p = taosArrayGetP(pTableqinfoGroupInfo->pGroupList, i);

      size_t num = taosArrayGetSize(p);
      for(int32_t j = 0; j < num; ++j) {
        STableQueryInfo* item = taosArrayGetP(p, j);
        destroyTableQueryInfoImpl(item);
      }

      taosArrayDestroy(p);
    }
  }

  taosArrayDestroy(pTableqinfoGroupInfo->pGroupList);
  taosHashCleanup(pTableqinfoGroupInfo->map);

  pTableqinfoGroupInfo->pGroupList = NULL;
  pTableqinfoGroupInfo->map = NULL;
  pTableqinfoGroupInfo->numOfTables = 0;
}

H
Haojun Liao 已提交
6684 6685 6686 6687 6688 6689 6690 6691 6692 6693 6694 6695 6696 6697 6698 6699
static void* destroyQueryFuncExpr(SExprInfo* pExprInfo, int32_t numOfExpr) {
  if (pExprInfo == NULL) {
    assert(numOfExpr == 0);
    return NULL;
  }

  for (int32_t i = 0; i < numOfExpr; ++i) {
    if (pExprInfo[i].pExpr != NULL) {
      tExprNodeDestroy(pExprInfo[i].pExpr, NULL);
    }
  }

  tfree(pExprInfo);
  return NULL;
}

H
hjxilinx 已提交
6700 6701 6702 6703
static void freeQInfo(SQInfo *pQInfo) {
  if (!isValidQInfo(pQInfo)) {
    return;
  }
6704

6705
  qDebug("QInfo:%p start to free QInfo", pQInfo);
6706

H
Haojun Liao 已提交
6707
  releaseQueryBuf(pQInfo->tableqinfoGroupInfo.numOfTables);
H
Haojun Liao 已提交
6708

6709
  teardownQueryRuntimeEnv(&pQInfo->runtimeEnv);
6710

H
Haojun Liao 已提交
6711 6712 6713 6714
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  if (pQuery != NULL) {
    if (pQuery->sdata != NULL) {
      for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
S
TD-1848  
Shengliang Guan 已提交
6715
        tfree(pQuery->sdata[col]);
H
Haojun Liao 已提交
6716
      }
S
TD-1848  
Shengliang Guan 已提交
6717
      tfree(pQuery->sdata);
H
hjxilinx 已提交
6718
    }
6719

H
Haojun Liao 已提交
6720
    if (pQuery->fillVal != NULL) {
S
TD-1848  
Shengliang Guan 已提交
6721
      tfree(pQuery->fillVal);
H
Haojun Liao 已提交
6722
    }
6723

H
Haojun Liao 已提交
6724 6725 6726
    for (int32_t i = 0; i < pQuery->numOfFilterCols; ++i) {
      SSingleColumnFilterInfo *pColFilter = &pQuery->pFilterInfo[i];
      if (pColFilter->numOfFilters > 0) {
S
TD-1848  
Shengliang Guan 已提交
6727
        tfree(pColFilter->pFilters);
H
hjxilinx 已提交
6728
      }
H
hjxilinx 已提交
6729
    }
6730

H
Haojun Liao 已提交
6731 6732
    pQuery->pExpr1 = destroyQueryFuncExpr(pQuery->pExpr1, pQuery->numOfOutput);
    pQuery->pExpr2 = destroyQueryFuncExpr(pQuery->pExpr2, pQuery->numOfExpr2);
6733

S
TD-1848  
Shengliang Guan 已提交
6734 6735
    tfree(pQuery->tagColList);
    tfree(pQuery->pFilterInfo);
H
Haojun Liao 已提交
6736 6737 6738 6739 6740 6741

    if (pQuery->colList != NULL) {
      for (int32_t i = 0; i < pQuery->numOfCols; i++) {
        SColumnInfo *column = pQuery->colList + i;
        freeColumnFilterInfo(column->filters, column->numOfFilters);
      }
S
TD-1848  
Shengliang Guan 已提交
6742
      tfree(pQuery->colList);
H
Haojun Liao 已提交
6743 6744
    }

H
Haojun Liao 已提交
6745 6746 6747 6748 6749
    if (pQuery->pGroupbyExpr != NULL) {
      taosArrayDestroy(pQuery->pGroupbyExpr->columnInfo);
      tfree(pQuery->pGroupbyExpr);
    }

S
TD-1848  
Shengliang Guan 已提交
6750
    tfree(pQuery);
H
hjxilinx 已提交
6751
  }
6752

H
Haojun Liao 已提交
6753
  doDestroyTableQueryInfo(&pQInfo->tableqinfoGroupInfo);
H
Haojun Liao 已提交
6754

S
TD-1848  
Shengliang Guan 已提交
6755
  tfree(pQInfo->pBuf);
H
Haojun Liao 已提交
6756
  tsdbDestroyTableGroup(&pQInfo->tableGroupInfo);
weixin_48148422's avatar
weixin_48148422 已提交
6757
  taosArrayDestroy(pQInfo->arrTableIdInfo);
6758

6759
  pQInfo->signature = 0;
6760

6761
  qDebug("QInfo:%p QInfo is freed", pQInfo);
6762

S
TD-1848  
Shengliang Guan 已提交
6763
  tfree(pQInfo);
H
hjxilinx 已提交
6764 6765
}

H
hjxilinx 已提交
6766
static size_t getResultSize(SQInfo *pQInfo, int64_t *numOfRows) {
6767 6768
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
hjxilinx 已提交
6769 6770 6771 6772 6773 6774 6775 6776 6777 6778 6779
  /*
   * get the file size and set the numOfRows to be the file size, since for tsComp query,
   * the returned row size is equalled to 1
   * TODO handle the case that the file is too large to send back one time
   */
  if (isTSCompQuery(pQuery) && (*numOfRows) > 0) {
    struct stat fstat;
    if (stat(pQuery->sdata[0]->data, &fstat) == 0) {
      *numOfRows = fstat.st_size;
      return fstat.st_size;
    } else {
S
slguan 已提交
6780
      qError("QInfo:%p failed to get file info, path:%s, reason:%s", pQInfo, pQuery->sdata[0]->data, strerror(errno));
H
hjxilinx 已提交
6781 6782 6783
      return 0;
    }
  } else {
6784
    return (size_t)(pQuery->rowSize * (*numOfRows));
6785
  }
H
hjxilinx 已提交
6786
}
6787

H
hjxilinx 已提交
6788 6789 6790
static int32_t doDumpQueryResult(SQInfo *pQInfo, char *data) {
  // the remained number of retrieved rows, not the interpolated result
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
6791

H
hjxilinx 已提交
6792 6793 6794
  // load data from file to msg buffer
  if (isTSCompQuery(pQuery)) {
    int32_t fd = open(pQuery->sdata[0]->data, O_RDONLY, 0666);
6795

H
hjxilinx 已提交
6796 6797
    // make sure file exist
    if (FD_VALID(fd)) {
6798 6799 6800
      uint64_t s = lseek(fd, 0, SEEK_END);

      qDebug("QInfo:%p ts comp data return, file:%s, size:%"PRId64, pQInfo, pQuery->sdata[0]->data, s);
H
Haojun Liao 已提交
6801
      if (lseek(fd, 0, SEEK_SET) >= 0) {
6802
        size_t sz = read(fd, data, (uint32_t)s);
6803 6804 6805
        if(sz < s) {  // todo handle error
          assert(0);
        }
H
Haojun Liao 已提交
6806
      } else {
6807
        UNUSED(s);
dengyihao's avatar
dengyihao 已提交
6808
      }
H
Haojun Liao 已提交
6809

H
hjxilinx 已提交
6810 6811 6812
      close(fd);
      unlink(pQuery->sdata[0]->data);
    } else {
dengyihao's avatar
dengyihao 已提交
6813
      // todo return the error code to client and handle invalid fd
S
slguan 已提交
6814
      qError("QInfo:%p failed to open tmp file to send ts-comp data to client, path:%s, reason:%s", pQInfo,
H
hjxilinx 已提交
6815
             pQuery->sdata[0]->data, strerror(errno));
dengyihao's avatar
dengyihao 已提交
6816
      if (fd != -1) {
6817
        close(fd);
dengyihao's avatar
dengyihao 已提交
6818
      }
H
hjxilinx 已提交
6819
    }
6820

H
hjxilinx 已提交
6821 6822 6823 6824
    // all data returned, set query over
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
      setQueryStatus(pQuery, QUERY_OVER);
    }
H
hjxilinx 已提交
6825
  } else {
S
TD-1057  
Shengliang Guan 已提交
6826
    doCopyQueryResultToMsg(pQInfo, (int32_t)pQuery->rec.rows, data);
6827
  }
6828

6829
  pQuery->rec.total += pQuery->rec.rows;
6830
  qDebug("QInfo:%p current numOfRes rows:%" PRId64 ", total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total);
6831

6832
  if (pQuery->limit.limit > 0 && pQuery->limit.limit == pQuery->rec.total) {
6833
    qDebug("QInfo:%p results limitation reached, limitation:%"PRId64, pQInfo, pQuery->limit.limit);
6834 6835
    setQueryStatus(pQuery, QUERY_OVER);
  }
6836

H
hjxilinx 已提交
6837
  return TSDB_CODE_SUCCESS;
6838 6839
}

6840 6841 6842 6843 6844 6845 6846
typedef struct SQueryMgmt {
  SCacheObj      *qinfoPool;      // query handle pool
  int32_t         vgId;
  bool            closed;
  pthread_mutex_t lock;
} SQueryMgmt;

6847
int32_t qCreateQueryInfo(void* tsdb, int32_t vgId, SQueryTableMsg* pQueryMsg, qinfo_t* pQInfo) {
6848
  assert(pQueryMsg != NULL && tsdb != NULL);
6849 6850

  int32_t code = TSDB_CODE_SUCCESS;
6851

H
Haojun Liao 已提交
6852 6853
  char            *tagCond      = NULL;
  char            *tbnameCond   = NULL;
6854
  SArray          *pTableIdList = NULL;
H
Haojun Liao 已提交
6855 6856 6857 6858 6859
  SSqlFuncMsg    **pExprMsg     = NULL;
  SSqlFuncMsg    **pSecExprMsg  = NULL;
  SExprInfo       *pExprs       = NULL;
  SExprInfo       *pSecExprs    = NULL;

6860 6861 6862
  SColIndex       *pGroupColIndex = NULL;
  SColumnInfo     *pTagColumnInfo = NULL;
  SSqlGroupbyExpr *pGroupbyExpr   = NULL;
6863

H
Haojun Liao 已提交
6864
  code = convertQueryMsg(pQueryMsg, &pTableIdList, &pExprMsg, &pSecExprMsg, &tagCond, &tbnameCond, &pGroupColIndex, &pTagColumnInfo);
6865
  if (code != TSDB_CODE_SUCCESS) {
B
Bomin Zhang 已提交
6866
    goto _over;
6867 6868
  }

H
hjxilinx 已提交
6869
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
6870
    qError("Invalid number of tables to query, numOfTables:%d", pQueryMsg->numOfTables);
6871
    code = TSDB_CODE_QRY_INVALID_MSG;
H
hjxilinx 已提交
6872
    goto _over;
6873 6874
  }

H
hjxilinx 已提交
6875
  if (pTableIdList == NULL || taosArrayGetSize(pTableIdList) == 0) {
S
slguan 已提交
6876
    qError("qmsg:%p, SQueryTableMsg wrong format", pQueryMsg);
6877
    code = TSDB_CODE_QRY_INVALID_MSG;
H
hjxilinx 已提交
6878
    goto _over;
6879 6880
  }

H
Haojun Liao 已提交
6881
  if ((code = createQueryFuncExprFromMsg(pQueryMsg, pQueryMsg->numOfOutput, &pExprs, pExprMsg, pTagColumnInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6882
    goto _over;
6883 6884
  }

H
Haojun Liao 已提交
6885
  if (pSecExprMsg != NULL) {
H
Haojun Liao 已提交
6886
    if ((code = createQueryFuncExprFromMsg(pQueryMsg, pQueryMsg->secondStageOutput, &pSecExprs, pSecExprMsg, pTagColumnInfo)) != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
6887 6888 6889 6890
      goto _over;
    }
  }

dengyihao's avatar
dengyihao 已提交
6891
  pGroupbyExpr = createGroupbyExprFromMsg(pQueryMsg, pGroupColIndex, &code);
H
hjxilinx 已提交
6892
  if ((pGroupbyExpr == NULL && pQueryMsg->numOfGroupCols != 0) || code != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6893
    goto _over;
6894
  }
6895

H
hjxilinx 已提交
6896
  bool isSTableQuery = false;
6897
  STableGroupInfo tableGroupInfo = {0};
6898 6899
  int64_t st = taosGetTimestampUs();

H
Haojun Liao 已提交
6900
  if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_TABLE_QUERY)) {
weixin_48148422's avatar
weixin_48148422 已提交
6901
    STableIdInfo *id = taosArrayGet(pTableIdList, 0);
H
Haojun Liao 已提交
6902

6903
    qDebug("qmsg:%p query normal table, uid:%"PRId64", tid:%d", pQueryMsg, id->uid, id->tid);
H
Haojun Liao 已提交
6904
    if ((code = tsdbGetOneTableGroup(tsdb, id->uid, pQueryMsg->window.skey, &tableGroupInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6905
      goto _over;
6906
    }
H
Haojun Liao 已提交
6907
  } else if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY|TSDB_QUERY_TYPE_STABLE_QUERY)) {
6908
    isSTableQuery = true;
H
Haojun Liao 已提交
6909 6910 6911

    // also note there's possibility that only one table in the super table
    if (!TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY)) {
weixin_48148422's avatar
weixin_48148422 已提交
6912 6913 6914 6915 6916 6917 6918
      STableIdInfo *id = taosArrayGet(pTableIdList, 0);

      // group by normal column, do not pass the group by condition to tsdb to group table into different group
      int32_t numOfGroupByCols = pQueryMsg->numOfGroupCols;
      if (pQueryMsg->numOfGroupCols == 1 && !TSDB_COL_IS_TAG(pGroupColIndex->flag)) {
        numOfGroupByCols = 0;
      }
6919 6920

      qDebug("qmsg:%p query stable, uid:%"PRId64", tid:%d", pQueryMsg, id->uid, id->tid);
H
Haojun Liao 已提交
6921 6922 6923
      code = tsdbQuerySTableByTagCond(tsdb, id->uid, pQueryMsg->window.skey, tagCond, pQueryMsg->tagCondLen,
          pQueryMsg->tagNameRelType, tbnameCond, &tableGroupInfo, pGroupColIndex, numOfGroupByCols);

6924
      if (code != TSDB_CODE_SUCCESS) {
6925
        qError("qmsg:%p failed to query stable, reason: %s", pQueryMsg, tstrerror(code));
6926 6927
        goto _over;
      }
weixin_48148422's avatar
weixin_48148422 已提交
6928
    } else {
6929 6930 6931 6932
      code = tsdbGetTableGroupFromIdList(tsdb, pTableIdList, &tableGroupInfo);
      if (code != TSDB_CODE_SUCCESS) {
        goto _over;
      }
H
Haojun Liao 已提交
6933

S
TD-1057  
Shengliang Guan 已提交
6934
      qDebug("qmsg:%p query on %" PRIzu " tables in one group from client", pQueryMsg, tableGroupInfo.numOfTables);
6935
    }
6936 6937

    int64_t el = taosGetTimestampUs() - st;
S
TD-1057  
Shengliang Guan 已提交
6938
    qDebug("qmsg:%p tag filter completed, numOfTables:%" PRIzu ", elapsed time:%"PRId64"us", pQueryMsg, tableGroupInfo.numOfTables, el);
H
hjxilinx 已提交
6939
  } else {
6940
    assert(0);
6941
  }
6942

H
Haojun Liao 已提交
6943 6944 6945 6946 6947
  code = checkForQueryBuf(tableGroupInfo.numOfTables);
  if (code != TSDB_CODE_SUCCESS) {  // not enough query buffer, abort
    goto _over;
  }

H
Haojun Liao 已提交
6948
  (*pQInfo) = createQInfoImpl(pQueryMsg, pGroupbyExpr, pExprs, pSecExprs, &tableGroupInfo, pTagColumnInfo, isSTableQuery);
H
Haojun Liao 已提交
6949

dengyihao's avatar
dengyihao 已提交
6950
  pExprs = NULL;
H
Haojun Liao 已提交
6951
  pSecExprs = NULL;
dengyihao's avatar
dengyihao 已提交
6952 6953
  pGroupbyExpr = NULL;
  pTagColumnInfo = NULL;
6954

6955
  if ((*pQInfo) == NULL) {
6956
    code = TSDB_CODE_QRY_OUT_OF_MEMORY;
H
hjxilinx 已提交
6957
    goto _over;
6958
  }
6959

6960
  code = initQInfo(pQueryMsg, tsdb, vgId, *pQInfo, isSTableQuery);
6961

H
hjxilinx 已提交
6962
_over:
dengyihao's avatar
dengyihao 已提交
6963 6964 6965
  free(tagCond);
  free(tbnameCond);
  free(pGroupColIndex);
H
Haojun Liao 已提交
6966

dengyihao's avatar
dengyihao 已提交
6967 6968
  if (pGroupbyExpr != NULL) {
    taosArrayDestroy(pGroupbyExpr->columnInfo);
dengyihao's avatar
dengyihao 已提交
6969
    free(pGroupbyExpr);
6970
  }
H
Haojun Liao 已提交
6971

dengyihao's avatar
dengyihao 已提交
6972 6973
  free(pTagColumnInfo);
  free(pExprs);
H
Haojun Liao 已提交
6974 6975
  free(pSecExprs);

dengyihao's avatar
dengyihao 已提交
6976
  free(pExprMsg);
H
Haojun Liao 已提交
6977 6978
  free(pSecExprMsg);

H
hjxilinx 已提交
6979
  taosArrayDestroy(pTableIdList);
6980

B
Bomin Zhang 已提交
6981 6982 6983 6984 6985
  for (int32_t i = 0; i < pQueryMsg->numOfCols; i++) {
    SColumnInfo* column = pQueryMsg->colList + i;
    freeColumnFilterInfo(column->filters, column->numOfFilters);
  }

H
Haojun Liao 已提交
6986
  //pQInfo already freed in initQInfo, but *pQInfo may not pointer to null;
6987 6988 6989 6990
  if (code != TSDB_CODE_SUCCESS) {
    *pQInfo = NULL;
  }

6991
  // if failed to add ref for all tables in this query, abort current query
6992
  return code;
H
hjxilinx 已提交
6993 6994
}

H
Haojun Liao 已提交
6995
void qDestroyQueryInfo(qinfo_t qHandle) {
H
Haojun Liao 已提交
6996 6997 6998 6999 7000
  SQInfo* pQInfo = (SQInfo*) qHandle;
  if (!isValidQInfo(pQInfo)) {
    return;
  }

H
Haojun Liao 已提交
7001 7002 7003
  qDebug("QInfo:%p query completed", pQInfo);
  queryCostStatis(pQInfo);   // print the query cost summary
  freeQInfo(pQInfo);
H
Haojun Liao 已提交
7004 7005
}

7006 7007 7008 7009 7010 7011 7012 7013
static bool doBuildResCheck(SQInfo* pQInfo) {
  bool buildRes = false;

  pthread_mutex_lock(&pQInfo->lock);

  pQInfo->dataReady = QUERY_RESULT_READY;
  buildRes = (pQInfo->rspContext != NULL);

H
Haojun Liao 已提交
7014 7015
  // clear qhandle owner, it must be in the secure area. other thread may run ahead before current, after it is
  // put into task to be executed.
7016 7017
  assert(pQInfo->owner == taosGetPthreadId());
  pQInfo->owner = 0;
H
Haojun Liao 已提交
7018

H
Haojun Liao 已提交
7019
  pthread_mutex_unlock(&pQInfo->lock);
H
Haojun Liao 已提交
7020 7021

  tsem_post(&pQInfo->ready);
7022 7023 7024
  return buildRes;
}

7025
bool qTableQuery(qinfo_t qinfo) {
7026
  SQInfo *pQInfo = (SQInfo *)qinfo;
H
Haojun Liao 已提交
7027
  assert(pQInfo && pQInfo->signature == pQInfo);
7028
  int64_t threadId = taosGetPthreadId();
7029

7030 7031 7032 7033
  int64_t curOwner = 0;
  if ((curOwner = atomic_val_compare_exchange_64(&pQInfo->owner, 0, threadId)) != 0) {
    qError("QInfo:%p qhandle is now executed by thread:%p", pQInfo, (void*) curOwner);
    pQInfo->code = TSDB_CODE_QRY_IN_EXEC;
7034
    return false;
H
hjxilinx 已提交
7035
  }
7036

H
Haojun Liao 已提交
7037
  if (IS_QUERY_KILLED(pQInfo)) {
7038
    qDebug("QInfo:%p it is already killed, abort", pQInfo);
7039
    return doBuildResCheck(pQInfo);
H
hjxilinx 已提交
7040
  }
7041

7042 7043
  if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
    qDebug("QInfo:%p no table exists for query, abort", pQInfo);
7044 7045
    setQueryStatus(pQInfo->runtimeEnv.pQuery, QUERY_COMPLETED);
    return doBuildResCheck(pQInfo);
7046 7047 7048
  }

  // error occurs, record the error code and return to client
H
Haojun Liao 已提交
7049
  int32_t ret = setjmp(pQInfo->runtimeEnv.env);
7050 7051
  if (ret != TSDB_CODE_SUCCESS) {
    pQInfo->code = ret;
H
Haojun Liao 已提交
7052
    qDebug("QInfo:%p query abort due to error/cancel occurs, code:%s", pQInfo, tstrerror(pQInfo->code));
7053
    return doBuildResCheck(pQInfo);
7054 7055
  }

7056
  qDebug("QInfo:%p query task is launched", pQInfo);
7057

7058
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
7059
  if (onlyQueryTags(pQInfo->runtimeEnv.pQuery)) {
H
Haojun Liao 已提交
7060
    assert(pQInfo->runtimeEnv.pQueryHandle == NULL);
7061
    buildTagQueryResult(pQInfo);
H
hjxilinx 已提交
7062
  } else if (pQInfo->runtimeEnv.stableQuery) {
7063
    stableQueryImpl(pQInfo);
H
hjxilinx 已提交
7064
  } else {
7065
    tableQueryImpl(pQInfo);
H
hjxilinx 已提交
7066
  }
7067

7068
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
7069
  if (IS_QUERY_KILLED(pQInfo)) {
7070 7071
    qDebug("QInfo:%p query is killed", pQInfo);
  } else if (pQuery->rec.rows == 0) {
S
TD-1057  
Shengliang Guan 已提交
7072
    qDebug("QInfo:%p over, %" PRIzu " tables queried, %"PRId64" rows are returned", pQInfo, pQInfo->tableqinfoGroupInfo.numOfTables, pQuery->rec.total);
7073 7074 7075 7076 7077
  } else {
    qDebug("QInfo:%p query paused, %" PRId64 " rows returned, numOfTotal:%" PRId64 " rows",
           pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
  }

7078
  return doBuildResCheck(pQInfo);
H
hjxilinx 已提交
7079 7080
}

7081
int32_t qRetrieveQueryResultInfo(qinfo_t qinfo, bool* buildRes, void* pRspContext) {
7082 7083
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
7084
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
H
Haojun Liao 已提交
7085
    qError("QInfo:%p invalid qhandle", pQInfo);
7086
    return TSDB_CODE_QRY_INVALID_QHANDLE;
H
hjxilinx 已提交
7087
  }
7088

7089
  *buildRes = false;
H
Haojun Liao 已提交
7090
  if (IS_QUERY_KILLED(pQInfo)) {
B
Bomin Zhang 已提交
7091
    qDebug("QInfo:%p query is killed, code:0x%08x", pQInfo, pQInfo->code);
H
hjxilinx 已提交
7092
    return pQInfo->code;
H
hjxilinx 已提交
7093
  }
7094

7095
  int32_t code = TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
7096

H
Haojun Liao 已提交
7097
#if _NON_BLOCKING_RETRIEVE
H
Haojun Liao 已提交
7098 7099
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

7100
  pthread_mutex_lock(&pQInfo->lock);
H
Haojun Liao 已提交
7101 7102
  assert(pQInfo->rspContext == NULL);

7103 7104 7105 7106 7107
  if (pQInfo->dataReady == QUERY_RESULT_READY) {
    *buildRes = true;
    qDebug("QInfo:%p retrieve result info, rowsize:%d, rows:%"PRId64", code:%d", pQInfo, pQuery->rowSize, pQuery->rec.rows,
           pQInfo->code);
  } else {
H
Haojun Liao 已提交
7108
    *buildRes = false;
7109
    qDebug("QInfo:%p retrieve req set query return result after paused", pQInfo);
7110
    pQInfo->rspContext = pRspContext;
H
Haojun Liao 已提交
7111
    assert(pQInfo->rspContext != NULL);
7112
  }
7113

7114
  code = pQInfo->code;
7115
  pthread_mutex_unlock(&pQInfo->lock);
H
Haojun Liao 已提交
7116 7117 7118 7119 7120 7121
#else
  tsem_wait(&pQInfo->ready);
  *buildRes = true;
  code = pQInfo->code;
#endif

7122
  return code;
H
hjxilinx 已提交
7123
}
7124

7125
int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *contLen, bool* continueExec) {
7126 7127
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
7128
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
7129
    return TSDB_CODE_QRY_INVALID_QHANDLE;
7130
  }
7131

7132
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
7133 7134
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  size_t  size = getResultSize(pQInfo, &pQuery->rec.rows);
7135

weixin_48148422's avatar
weixin_48148422 已提交
7136 7137
  size += sizeof(int32_t);
  size += sizeof(STableIdInfo) * taosArrayGetSize(pQInfo->arrTableIdInfo);
7138

S
TD-1057  
Shengliang Guan 已提交
7139
  *contLen = (int32_t)(size + sizeof(SRetrieveTableRsp));
7140

B
Bomin Zhang 已提交
7141 7142
  // todo proper handle failed to allocate memory,
  // current solution only avoid crash, but cannot return error code to client
7143
  *pRsp = (SRetrieveTableRsp *)rpcMallocCont(*contLen);
B
Bomin Zhang 已提交
7144 7145 7146
  if (*pRsp == NULL) {
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
  }
7147

S
TD-1057  
Shengliang Guan 已提交
7148
  (*pRsp)->numOfRows = htonl((int32_t)pQuery->rec.rows);
7149

H
Haojun Liao 已提交
7150
  if (pQInfo->code == TSDB_CODE_SUCCESS) {
7151
    (*pRsp)->offset   = htobe64(pQuery->limit.offset);
7152
    (*pRsp)->useconds = htobe64(pRuntimeEnv->summary.elapsedTime);
7153
  } else {
7154 7155
    (*pRsp)->offset   = 0;
    (*pRsp)->useconds = htobe64(pRuntimeEnv->summary.elapsedTime);
7156
  }
7157

7158
  (*pRsp)->precision = htons(pQuery->precision);
H
Haojun Liao 已提交
7159 7160
  if (pQuery->rec.rows > 0 && pQInfo->code == TSDB_CODE_SUCCESS) {
    doDumpQueryResult(pQInfo, (*pRsp)->data);
7161
  } else {
H
hjxilinx 已提交
7162
    setQueryStatus(pQuery, QUERY_OVER);
7163
  }
7164

7165
  pQInfo->rspContext = NULL;
7166
  pQInfo->dataReady  = QUERY_RESULT_NOT_READY;
7167

H
Haojun Liao 已提交
7168
  if (IS_QUERY_KILLED(pQInfo) || Q_STATUS_EQUAL(pQuery->status, QUERY_OVER)) {
H
Haojun Liao 已提交
7169
    // here current thread hold the refcount, so it is safe to free tsdbQueryHandle.
7170
    *continueExec = false;
7171
    (*pRsp)->completed = 1;  // notify no more result to client
7172
  } else {
7173
    *continueExec = true;
7174
    qDebug("QInfo:%p has more results waits for client retrieve", pQInfo);
7175 7176
  }

H
Haojun Liao 已提交
7177
  return pQInfo->code;
7178
}
H
hjxilinx 已提交
7179

7180 7181 7182 7183 7184 7185 7186 7187 7188 7189 7190
int32_t qQueryCompleted(qinfo_t qinfo) {
  SQInfo *pQInfo = (SQInfo *)qinfo;

  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
    return TSDB_CODE_QRY_INVALID_QHANDLE;
  }

  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
  return IS_QUERY_KILLED(pQInfo) || Q_STATUS_EQUAL(pQuery->status, QUERY_OVER);
}

H
Haojun Liao 已提交
7191
int32_t qKillQuery(qinfo_t qinfo) {
H
Haojun Liao 已提交
7192 7193 7194 7195 7196 7197 7198
  SQInfo *pQInfo = (SQInfo *)qinfo;

  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
    return TSDB_CODE_QRY_INVALID_QHANDLE;
  }

  setQueryKilled(pQInfo);
7199 7200 7201

  // Wait for the query executing thread being stopped/
  // Once the query is stopped, the owner of qHandle will be cleared immediately.
7202
  while (pQInfo->owner != 0) {
7203 7204 7205
    taosMsleep(100);
  }

H
Haojun Liao 已提交
7206 7207 7208
  return TSDB_CODE_SUCCESS;
}

7209 7210 7211 7212 7213 7214 7215 7216 7217 7218 7219 7220 7221 7222 7223 7224
static void doSetTagValueToResultBuf(char* output, const char* val, int16_t type, int16_t bytes) {
  if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
    if (val == NULL) {
      setVardataNull(output, type);
    } else {
      memcpy(output, val, varDataTLen(val));
    }
  } else {
    if (val == NULL) {
      setNull(output, type, bytes);
    } else {  // todo here stop will cause client crash
      memcpy(output, val, bytes);
    }
  }
}

H
hjxilinx 已提交
7225 7226 7227
static void buildTagQueryResult(SQInfo* pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
7228

H
Haojun Liao 已提交
7229
  size_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
H
Haojun Liao 已提交
7230
  assert(numOfGroup == 0 || numOfGroup == 1);
7231

H
Haojun Liao 已提交
7232
  if (numOfGroup == 0) {
7233 7234
    return;
  }
7235

H
Haojun Liao 已提交
7236
  SArray* pa = GET_TABLEGROUP(pQInfo, 0);
7237

H
Haojun Liao 已提交
7238
  size_t num = taosArrayGetSize(pa);
7239
  assert(num == pQInfo->tableqinfoGroupInfo.numOfTables);
7240

H
Haojun Liao 已提交
7241
  int32_t count = 0;
H
Haojun Liao 已提交
7242
  int32_t functionId = pQuery->pExpr1[0].base.functionId;
7243 7244
  if (functionId == TSDB_FUNC_TID_TAG) { // return the tags & table Id
    assert(pQuery->numOfOutput == 1);
7245

H
Haojun Liao 已提交
7246
    SExprInfo* pExprInfo = &pQuery->pExpr1[0];
7247
    int32_t rsize = pExprInfo->bytes;
H
Haojun Liao 已提交
7248
    count = 0;
7249

H
Haojun Liao 已提交
7250 7251 7252 7253 7254 7255 7256 7257 7258 7259 7260
    int16_t bytes = pExprInfo->bytes;
    int16_t type = pExprInfo->type;

    for(int32_t i = 0; i < pQuery->numOfTags; ++i) {
      if (pQuery->tagColList[i].colId == pExprInfo->base.colInfo.colId) {
        bytes = pQuery->tagColList[i].bytes;
        type = pQuery->tagColList[i].type;
        break;
      }
    }

H
Haojun Liao 已提交
7261 7262
    while(pQInfo->tableIndex < num && count < pQuery->rec.capacity) {
      int32_t i = pQInfo->tableIndex++;
7263
      STableQueryInfo *item = taosArrayGetP(pa, i);
7264

7265
      char *output = pQuery->sdata[0]->data + count * rsize;
7266
      varDataSetLen(output, rsize - VARSTR_HEADER_SIZE);
7267

7268
      output = varDataVal(output);
H
Haojun Liao 已提交
7269
      STableId* id = TSDB_TABLEID(item->pTable);
7270

7271 7272 7273
      *(int16_t *)output = 0;
      output += sizeof(int16_t);

H
Haojun Liao 已提交
7274 7275
      *(int64_t *)output = id->uid;  // memory align problem, todo serialize
      output += sizeof(id->uid);
7276

H
Haojun Liao 已提交
7277 7278
      *(int32_t *)output = id->tid;
      output += sizeof(id->tid);
7279

7280
      *(int32_t *)output = pQInfo->vgId;
7281
      output += sizeof(pQInfo->vgId);
7282

7283
      if (pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
7284
        char* data = tsdbGetTableName(item->pTable);
7285
        memcpy(output, data, varDataTLen(data));
H
[td-90]  
Haojun Liao 已提交
7286
      } else {
7287 7288
        char* data = tsdbGetTableTagVal(item->pTable, pExprInfo->base.colInfo.colId, type, bytes);
        doSetTagValueToResultBuf(output, data, type, bytes);
H
[td-90]  
Haojun Liao 已提交
7289
      }
7290

H
Haojun Liao 已提交
7291
      count += 1;
7292
    }
7293

7294
    qDebug("QInfo:%p create (tableId, tag) info completed, rows:%d", pQInfo, count);
7295

H
Haojun Liao 已提交
7296 7297 7298 7299
  } else if (functionId == TSDB_FUNC_COUNT) {// handle the "count(tbname)" query
    *(int64_t*) pQuery->sdata[0]->data = num;

    count = 1;
H
Haojun Liao 已提交
7300
    SET_STABLE_QUERY_OVER(pQInfo);
7301
    qDebug("QInfo:%p create count(tbname) query, res:%d rows:1", pQInfo, count);
7302
  } else {  // return only the tags|table name etc.
H
Haojun Liao 已提交
7303
    count = 0;
H
Haojun Liao 已提交
7304
    SSchema tbnameSchema = tGetTableNameColumnSchema();
7305

S
TD-1057  
Shengliang Guan 已提交
7306
    int32_t maxNumOfTables = (int32_t)pQuery->rec.capacity;
7307
    if (pQuery->limit.limit >= 0 && pQuery->limit.limit < pQuery->rec.capacity) {
S
TD-1057  
Shengliang Guan 已提交
7308
      maxNumOfTables = (int32_t)pQuery->limit.limit;
7309 7310
    }

7311
    while(pQInfo->tableIndex < num && count < maxNumOfTables) {
H
Haojun Liao 已提交
7312
      int32_t i = pQInfo->tableIndex++;
7313

7314 7315 7316 7317 7318 7319
      // discard current result due to offset
      if (pQuery->limit.offset > 0) {
        pQuery->limit.offset -= 1;
        continue;
      }

H
Haojun Liao 已提交
7320
      SExprInfo* pExprInfo = pQuery->pExpr1;
7321
      STableQueryInfo* item = taosArrayGetP(pa, i);
7322

7323 7324
      char *data = NULL, *dst = NULL;
      int16_t type = 0, bytes = 0;
7325
      for(int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
7326
        // not assign value in case of user defined constant output column
Y
TD-1230  
yihaoDeng 已提交
7327
        if (TSDB_COL_IS_UD_COL(pExprInfo[j].base.colInfo.flag)) {
H
Haojun Liao 已提交
7328 7329
          continue;
        }
7330

7331
        if (pExprInfo[j].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
7332 7333 7334 7335 7336 7337 7338 7339
          bytes = tbnameSchema.bytes;
          type = tbnameSchema.type;

          data = tsdbGetTableName(item->pTable);
          dst = pQuery->sdata[j]->data + count * tbnameSchema.bytes;
        } else {
          type = pExprInfo[j].type;
          bytes = pExprInfo[j].bytes;
7340

7341 7342
          data = tsdbGetTableTagVal(item->pTable, pExprInfo[j].base.colInfo.colId, type, bytes);
          dst = pQuery->sdata[j]->data + count * pExprInfo[j].bytes;
7343

7344
        }
7345 7346

        doSetTagValueToResultBuf(dst, data, type, bytes);
H
hjxilinx 已提交
7347
      }
H
Haojun Liao 已提交
7348
      count += 1;
H
hjxilinx 已提交
7349
    }
7350

7351
    qDebug("QInfo:%p create tag values results completed, rows:%d", pQInfo, count);
H
hjxilinx 已提交
7352
  }
7353

H
Haojun Liao 已提交
7354
  pQuery->rec.rows = count;
H
hjxilinx 已提交
7355
  setQueryStatus(pQuery, QUERY_COMPLETED);
H
hjxilinx 已提交
7356 7357
}

H
Haojun Liao 已提交
7358
static int64_t getQuerySupportBufSize(size_t numOfTables) {
H
Haojun Liao 已提交
7359 7360 7361 7362
  size_t s1 = sizeof(STableQueryInfo);
  size_t s2 = sizeof(SHashNode);

//  size_t s3 = sizeof(STableCheckInfo);  buffer consumption in tsdb
H
Haojun Liao 已提交
7363
  return (int64_t)((s1 + s2) * 1.5 * numOfTables);
H
Haojun Liao 已提交
7364 7365
}

H
Haojun Liao 已提交
7366
int32_t checkForQueryBuf(size_t numOfTables) {
H
Haojun Liao 已提交
7367 7368
  int64_t t = getQuerySupportBufSize(numOfTables);
  if (tsQueryBufferSize < 0) {
H
Haojun Liao 已提交
7369
    return TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
7370 7371 7372 7373 7374 7375 7376 7377 7378 7379 7380 7381 7382 7383 7384 7385 7386 7387 7388
  } else if (tsQueryBufferSize > 0) {

    while(1) {
      int64_t s = tsQueryBufferSize;
      int64_t remain = s - t;
      if (remain >= 0) {
        if (atomic_val_compare_exchange_64(&tsQueryBufferSize, s, remain) == s) {
          return TSDB_CODE_SUCCESS;
        }
      } else {
        return TSDB_CODE_QRY_NOT_ENOUGH_BUFFER;
      }
    }
  }

  // disable query processing if the value of tsQueryBufferSize is zero.
  return TSDB_CODE_QRY_NOT_ENOUGH_BUFFER;
}

H
Haojun Liao 已提交
7389
void releaseQueryBuf(size_t numOfTables) {
H
Haojun Liao 已提交
7390 7391 7392 7393 7394 7395 7396 7397 7398 7399
  if (tsQueryBufferSize <= 0) {
    return;
  }

  int64_t t = getQuerySupportBufSize(numOfTables);

  // restore value is not enough buffer available
  atomic_add_fetch_64(&tsQueryBufferSize, t);
}

7400 7401 7402 7403 7404 7405 7406
void* qGetResultRetrieveMsg(qinfo_t qinfo) {
  SQInfo* pQInfo = (SQInfo*) qinfo;
  assert(pQInfo != NULL);

  return pQInfo->rspContext;
}

7407 7408 7409 7410 7411 7412 7413
void freeqinfoFn(void *qhandle) {
  void** handle = qhandle;
  if (handle == NULL || *handle == NULL) {
    return;
  }

  qKillQuery(*handle);
H
Haojun Liao 已提交
7414
  qDestroyQueryInfo(*handle);
7415 7416 7417
}

void* qOpenQueryMgmt(int32_t vgId) {
H
Haojun Liao 已提交
7418
  const int32_t REFRESH_HANDLE_INTERVAL = 30; // every 30 seconds, refresh handle pool
7419 7420 7421 7422

  char cacheName[128] = {0};
  sprintf(cacheName, "qhandle_%d", vgId);

7423
  SQueryMgmt* pQueryMgmt = calloc(1, sizeof(SQueryMgmt));
H
Haojun Liao 已提交
7424 7425 7426 7427
  if (pQueryMgmt == NULL) {
    terrno = TSDB_CODE_QRY_OUT_OF_MEMORY;
    return NULL;
  }
7428

S
TD-1530  
Shengliang Guan 已提交
7429
  pQueryMgmt->qinfoPool = taosCacheInit(TSDB_CACHE_PTR_KEY, REFRESH_HANDLE_INTERVAL, true, freeqinfoFn, cacheName);
7430 7431 7432 7433
  pQueryMgmt->closed    = false;
  pQueryMgmt->vgId      = vgId;

  pthread_mutex_init(&pQueryMgmt->lock, NULL);
7434 7435

  qDebug("vgId:%d, open querymgmt success", vgId);
7436
  return pQueryMgmt;
7437 7438
}

H
Haojun Liao 已提交
7439
static void queryMgmtKillQueryFn(void* handle) {
H
Hui Li 已提交
7440 7441
  void** fp = (void**)handle;
  qKillQuery(*fp);
H
Haojun Liao 已提交
7442 7443 7444
}

void qQueryMgmtNotifyClosed(void* pQMgmt) {
7445 7446 7447 7448 7449 7450 7451
  if (pQMgmt == NULL) {
    return;
  }

  SQueryMgmt* pQueryMgmt = pQMgmt;
  qDebug("vgId:%d, set querymgmt closed, wait for all queries cancelled", pQueryMgmt->vgId);

H
Haojun Liao 已提交
7452
//  pthread_mutex_lock(&pQueryMgmt->lock);
7453
  pQueryMgmt->closed = true;
H
Haojun Liao 已提交
7454
//  pthread_mutex_unlock(&pQueryMgmt->lock);
7455

H
Haojun Liao 已提交
7456
  taosCacheRefresh(pQueryMgmt->qinfoPool, queryMgmtKillQueryFn);
7457 7458 7459 7460 7461 7462 7463 7464 7465 7466 7467 7468 7469 7470 7471 7472 7473
}

void qCleanupQueryMgmt(void* pQMgmt) {
  if (pQMgmt == NULL) {
    return;
  }

  SQueryMgmt* pQueryMgmt = pQMgmt;
  int32_t vgId = pQueryMgmt->vgId;

  assert(pQueryMgmt->closed);

  SCacheObj* pqinfoPool = pQueryMgmt->qinfoPool;
  pQueryMgmt->qinfoPool = NULL;

  taosCacheCleanup(pqinfoPool);
  pthread_mutex_destroy(&pQueryMgmt->lock);
S
TD-1848  
Shengliang Guan 已提交
7474
  tfree(pQueryMgmt);
7475

S
Shengliang Guan 已提交
7476
  qDebug("vgId:%d, queryMgmt cleanup completed", vgId);
7477 7478
}

7479
void** qRegisterQInfo(void* pMgmt, uint64_t qInfo) {
7480
  if (pMgmt == NULL) {
7481
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
7482 7483 7484
    return NULL;
  }

7485
  const int32_t DEFAULT_QHANDLE_LIFE_SPAN = tsShellActivityTimer * 2 * 1000;
7486

7487 7488
  SQueryMgmt *pQueryMgmt = pMgmt;
  if (pQueryMgmt->qinfoPool == NULL) {
7489
    qError("QInfo:%p failed to add qhandle into qMgmt, since qMgmt is closed", (void *)qInfo);
7490
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
7491 7492 7493
    return NULL;
  }

H
Haojun Liao 已提交
7494
//  pthread_mutex_lock(&pQueryMgmt->lock);
7495
  if (pQueryMgmt->closed) {
H
Haojun Liao 已提交
7496
//    pthread_mutex_unlock(&pQueryMgmt->lock);
7497
    qError("QInfo:%p failed to add qhandle into cache, since qMgmt is colsing", (void *)qInfo);
7498
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
7499 7500
    return NULL;
  } else {
S
TD-1530  
Shengliang Guan 已提交
7501 7502
    TSDB_CACHE_PTR_TYPE handleVal = (TSDB_CACHE_PTR_TYPE) qInfo;
    void** handle = taosCachePut(pQueryMgmt->qinfoPool, &handleVal, sizeof(TSDB_CACHE_PTR_TYPE), &qInfo, sizeof(TSDB_CACHE_PTR_TYPE), DEFAULT_QHANDLE_LIFE_SPAN);
H
Haojun Liao 已提交
7503
//    pthread_mutex_unlock(&pQueryMgmt->lock);
7504 7505 7506 7507 7508

    return handle;
  }
}

S
TD-1530  
Shengliang Guan 已提交
7509
void** qAcquireQInfo(void* pMgmt, uint64_t _key) {
7510 7511
  SQueryMgmt *pQueryMgmt = pMgmt;

B
Bomin Zhang 已提交
7512 7513 7514 7515 7516 7517 7518
  if (pQueryMgmt->closed) {
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
    return NULL;
  }

  if (pQueryMgmt->qinfoPool == NULL) {
    terrno = TSDB_CODE_QRY_INVALID_QHANDLE;
7519 7520 7521
    return NULL;
  }

S
TD-1530  
Shengliang Guan 已提交
7522 7523
  TSDB_CACHE_PTR_TYPE key = (TSDB_CACHE_PTR_TYPE)_key;
  void** handle = taosCacheAcquireByKey(pQueryMgmt->qinfoPool, &key, sizeof(TSDB_CACHE_PTR_TYPE));
7524
  if (handle == NULL || *handle == NULL) {
B
Bomin Zhang 已提交
7525
    terrno = TSDB_CODE_QRY_INVALID_QHANDLE;
7526 7527 7528 7529 7530 7531
    return NULL;
  } else {
    return handle;
  }
}

H
Haojun Liao 已提交
7532
void** qReleaseQInfo(void* pMgmt, void* pQInfo, bool freeHandle) {
7533 7534 7535 7536 7537
  SQueryMgmt *pQueryMgmt = pMgmt;
  if (pQueryMgmt->qinfoPool == NULL) {
    return NULL;
  }

H
Haojun Liao 已提交
7538
  taosCacheRelease(pQueryMgmt->qinfoPool, pQInfo, freeHandle);
7539 7540 7541
  return 0;
}

7542