qExecutor.c 251.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
#include "os.h"
H
Haojun Liao 已提交
16 17
#include "qFill.h"
#include "taosmsg.h"
18 19
#include "tcache.h"
#include "tglobal.h"
20

H
Haojun Liao 已提交
21
#include "exception.h"
22
#include "hash.h"
H
Haojun Liao 已提交
23 24 25 26
#include "qAst.h"
#include "qExecutor.h"
#include "qResultbuf.h"
#include "qUtil.h"
H
hjxilinx 已提交
27
#include "query.h"
S
slguan 已提交
28
#include "queryLog.h"
29
#include "tlosertree.h"
30

H
Haojun Liao 已提交
31
#define MAX_ROWS_PER_RESBUF_PAGE  ((1u<<12) - 1)
32 33 34 35 36

/**
 * check if the primary column is load by default, otherwise, the program will
 * forced to load primary column explicitly.
 */
37
#define Q_STATUS_EQUAL(p, s)  (((p) & (s)) != 0)
38 39
#define QUERY_IS_ASC_QUERY(q) (GET_FORWARD_DIRECTION_FACTOR((q)->order.order) == QUERY_ASC_FORWARD_STEP)

40
#define IS_MASTER_SCAN(runtime)        ((runtime)->scanFlag == MASTER_SCAN)
H
hjxilinx 已提交
41
#define IS_REVERSE_SCAN(runtime)       ((runtime)->scanFlag == REVERSE_SCAN)
42
#define SET_MASTER_SCAN_FLAG(runtime)  ((runtime)->scanFlag = MASTER_SCAN)
H
hjxilinx 已提交
43
#define SET_REVERSE_SCAN_FLAG(runtime) ((runtime)->scanFlag = REVERSE_SCAN)
44

H
Haojun Liao 已提交
45
#define GET_QINFO_ADDR(x) ((SQInfo *)((char *)(x)-offsetof(SQInfo, runtimeEnv)))
46

47
#define GET_COL_DATA_POS(query, index, step) ((query)->pos + (index) * (step))
48
#define SWITCH_ORDER(n) (((n) = ((n) == TSDB_ORDER_ASC) ? TSDB_ORDER_DESC : TSDB_ORDER_ASC))
49

H
Haojun Liao 已提交
50 51
#define SDATA_BLOCK_INITIALIZER (SDataBlockInfo) {{0}, 0}

S
TD-1057  
Shengliang Guan 已提交
52 53 54 55 56
#define TIME_WINDOW_COPY(_dst, _src)  do {\
   _dst.skey = _src.skey;\
   _dst.ekey = _src.ekey;\
} while (0);

57
enum {
H
hjxilinx 已提交
58
  // when query starts to execute, this status will set
59 60
  QUERY_NOT_COMPLETED = 0x1u,

H
hjxilinx 已提交
61 62
  /* result output buffer is full, current query is paused.
   * this status is only exist in group-by clause and diff/add/division/multiply/ query.
63
   */
64 65
  QUERY_RESBUF_FULL = 0x2u,

H
hjxilinx 已提交
66 67 68
  /* query is over
   * 1. this status is used in one row result query process, e.g., count/sum/first/last/ avg...etc.
   * 2. when all data within queried time window, it is also denoted as query_completed
69
   */
70
  QUERY_COMPLETED = 0x4u,
71

H
hjxilinx 已提交
72 73
  /* when the result is not completed return to client, this status will be
   * usually used in case of interval query with interpolation option
74
   */
75
  QUERY_OVER = 0x8u,
76
};
77 78

enum {
79 80
  TS_JOIN_TS_EQUAL       = 0,
  TS_JOIN_TS_NOT_EQUALS  = 1,
81 82 83
  TS_JOIN_TAG_NOT_EQUALS = 2,
};

84
typedef struct {
85 86 87 88 89 90
  int32_t     status;       // query status
  TSKEY       lastKey;      // the lastKey value before query executed
  STimeWindow w;            // whole query time window
  STimeWindow curWindow;    // current query window
  int32_t     windowIndex;  // index of active time window result for interval query
  STSCursor   cur;
91 92
} SQueryStatusInfo;

H
Haojun Liao 已提交
93
#if 0
H
Haojun Liao 已提交
94
static UNUSED_FUNC void *u_malloc (size_t __size) {
H
Haojun Liao 已提交
95
  uint32_t v = rand();
H
Haojun Liao 已提交
96 97

  if (v % 1000 <= 0) {
H
Haojun Liao 已提交
98 99
    return NULL;
  } else {
H
Haojun Liao 已提交
100
    return malloc(__size);
H
Haojun Liao 已提交
101
  }
H
Haojun Liao 已提交
102 103
}

H
Haojun Liao 已提交
104 105
static UNUSED_FUNC void* u_calloc(size_t num, size_t __size) {
  uint32_t v = rand();
H
Haojun Liao 已提交
106
  if (v % 1000 <= 0) {
H
Haojun Liao 已提交
107 108 109 110 111 112
    return NULL;
  } else {
    return calloc(num, __size);
  }
}

H
Haojun Liao 已提交
113 114 115 116 117 118 119 120 121
static UNUSED_FUNC void* u_realloc(void* p, size_t __size) {
  uint32_t v = rand();
  if (v % 5 <= 1) {
    return NULL;
  } else {
    return realloc(p, __size);
  }
}

H
Haojun Liao 已提交
122
#define calloc  u_calloc
H
Haojun Liao 已提交
123
#define malloc  u_malloc
H
Haojun Liao 已提交
124
#define realloc u_realloc
H
Haojun Liao 已提交
125
#endif
H
Haojun Liao 已提交
126

127
#define CLEAR_QUERY_STATUS(q, st)   ((q)->status &= (~(st)))
H
Haojun Liao 已提交
128 129 130
#define GET_NUM_OF_TABLEGROUP(q)    taosArrayGetSize((q)->tableqinfoGroupInfo.pGroupList)
#define GET_TABLEGROUP(q, _index)   ((SArray*) taosArrayGetP((q)->tableqinfoGroupInfo.pGroupList, (_index)))

131
static void setQueryStatus(SQuery *pQuery, int8_t status);
H
Haojun Liao 已提交
132
static void finalizeQueryResult(SQueryRuntimeEnv *pRuntimeEnv);
133

134
#define QUERY_IS_INTERVAL_QUERY(_q) ((_q)->interval.interval > 0)
135

136 137
static void getNextTimeWindow(SQuery* pQuery, STimeWindow* tw) {
  int32_t factor = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
138 139 140
  if (pQuery->interval.intervalUnit != 'n' && pQuery->interval.intervalUnit != 'y') {
    tw->skey += pQuery->interval.sliding * factor;
    tw->ekey = tw->skey + pQuery->interval.interval - 1;
141 142 143
    return;
  }

144
  int64_t key = tw->skey / 1000, interval = pQuery->interval.interval;
145 146 147
  if (pQuery->precision == TSDB_TIME_PRECISION_MICRO) {
    key /= 1000;
  }
148
  if (pQuery->interval.intervalUnit == 'y') {
149 150
    interval *= 12;
  }
151 152 153 154 155

  struct tm tm;
  time_t t = (time_t)key;
  localtime_r(&t, &tm);

S
Shengliang Guan 已提交
156
  int mon = (int)(tm.tm_year * 12 + tm.tm_mon + interval * factor);
157 158 159 160
  tm.tm_year = mon / 12;
  tm.tm_mon = mon % 12;
  tw->skey = mktime(&tm) * 1000L;

S
Shengliang Guan 已提交
161
  mon = (int)(mon + interval);
162 163 164 165 166 167 168 169 170 171 172
  tm.tm_year = mon / 12;
  tm.tm_mon = mon % 12;
  tw->ekey = mktime(&tm) * 1000L;

  if (pQuery->precision == TSDB_TIME_PRECISION_MICRO) {
    tw->skey *= 1000L;
    tw->ekey *= 1000L;
  }
  tw->ekey -= 1;
}

173 174
#define SET_STABLE_QUERY_OVER(_q) ((_q)->tableIndex = (int32_t)((_q)->tableqinfoGroupInfo.numOfTables))
#define IS_STASBLE_QUERY_OVER(_q) ((_q)->tableIndex >= (int32_t)((_q)->tableqinfoGroupInfo.numOfTables))
H
Haojun Liao 已提交
175

H
hjxilinx 已提交
176
// todo move to utility
177
static int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *group);
178

179
static void setResultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SResultRow *pResult);
H
Haojun Liao 已提交
180 181
static void setWindowResOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SResultRow *pResult);
static void resetMergeResultBuf(SQueryRuntimeEnv* pRuntimeEnv, SQLFunctionCtx *pCtx, SResultRow *pRow);
182
static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId);
183

184
static void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
H
Haojun Liao 已提交
185
                          SDataStatis *pStatis, void *param, int32_t colIndex, int32_t vgId);
186

187
static void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
H
Haojun Liao 已提交
188
static void destroyTableQueryInfoImpl(STableQueryInfo *pTableQueryInfo);
189 190
static void resetCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
static bool hasMainOutput(SQuery *pQuery);
H
hjxilinx 已提交
191
static void buildTagQueryResult(SQInfo *pQInfo);
192

193
static int32_t setAdditionalInfo(SQInfo *pQInfo, void *pTable, STableQueryInfo *pTableQueryInfo);
H
Haojun Liao 已提交
194
static int32_t flushFromResultBuf(SQueryRuntimeEnv* pRuntimeEnv, SGroupResInfo* pGroupResInfo);
H
Haojun Liao 已提交
195 196
static int32_t checkForQueryBuf(size_t numOfTables);
static void releaseQueryBuf(size_t numOfTables);
197

198
bool doFilterData(SQuery *pQuery, int32_t elemPos) {
199 200
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
201

S
TD-1057  
Shengliang Guan 已提交
202
    char *pElem = (char*)pFilterInfo->pData + pFilterInfo->info.bytes * elemPos;
203

204 205
    bool qualified = false;
    for (int32_t j = 0; j < pFilterInfo->numOfFilters; ++j) {
206
      SColumnFilterElem *pFilterElem = &pFilterInfo->pFilters[j];
207

H
Haojun Liao 已提交
208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224
      bool isnull = isNull(pElem, pFilterInfo->info.type);
      if (isnull) {
        if (pFilterElem->fp == isNull_filter) {
          qualified = true;
          break;
        } else {
          continue;
        }
      } else {
        if (pFilterElem->fp == notNull_filter) {
          qualified = true;
          break;
        } else if (pFilterElem->fp == isNull_filter) {
          continue;
        }
      }

225 226 227 228 229
      if (pFilterElem->fp(pFilterElem, pElem, pElem)) {
        qualified = true;
        break;
      }
    }
230

231 232 233 234
    if (!qualified) {
      return false;
    }
  }
235

236 237 238 239 240 241
  return true;
}

int64_t getNumOfResult(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  bool    hasMainFunction = hasMainOutput(pQuery);
242

243
  int64_t maxOutput = 0;
244
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
245
    int32_t functionId = pQuery->pExpr1[j].base.functionId;
246

247 248 249 250 251 252 253 254
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (hasMainFunction &&
        (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ)) {
      continue;
    }
255

H
Haojun Liao 已提交
256
    SResultRowCellInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
257 258 259 260
    if (pResInfo != NULL && maxOutput < pResInfo->numOfRes) {
      maxOutput = pResInfo->numOfRes;
    }
  }
261

262
  assert(maxOutput >= 0);
263 264 265
  return maxOutput;
}

266 267 268 269 270
/*
 * the value of number of result needs to be update due to offset value upated.
 */
void updateNumOfResult(SQueryRuntimeEnv *pRuntimeEnv, int32_t numOfRes) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
271

272
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
273
    SResultRowCellInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
274

H
Haojun Liao 已提交
275 276 277 278 279
    int16_t functionId = pRuntimeEnv->pCtx[j].functionId;
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ ||
        functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
280

H
Haojun Liao 已提交
281
    assert(pResInfo->numOfRes > numOfRes);
282 283 284 285
    pResInfo->numOfRes = numOfRes;
  }
}

H
Haojun Liao 已提交
286
static UNUSED_FUNC int32_t getGroupResultId(int32_t groupIndex) {
H
Haojun Liao 已提交
287
  int32_t base = 20000000;
288 289 290 291 292 293 294
  return base + (groupIndex * 10000);
}

bool isGroupbyNormalCol(SSqlGroupbyExpr *pGroupbyExpr) {
  if (pGroupbyExpr == NULL || pGroupbyExpr->numOfGroupCols == 0) {
    return false;
  }
295

296
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
297
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
Y
TD-1230  
yihaoDeng 已提交
298
    if (TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
299
      //make sure the normal column locates at the second position if tbname exists in group by clause
300
      if (pGroupbyExpr->numOfGroupCols > 1) {
301
        assert(pColIndex->colIndex > 0);
302
      }
303

304 305 306
      return true;
    }
  }
307

308 309 310 311 312
  return false;
}

int16_t getGroupbyColumnType(SQuery *pQuery, SSqlGroupbyExpr *pGroupbyExpr) {
  assert(pGroupbyExpr != NULL);
313

314 315
  int32_t colId = -2;
  int16_t type = TSDB_DATA_TYPE_NULL;
316

317
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
318
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
Y
TD-1230  
yihaoDeng 已提交
319
    if (TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
320 321 322 323
      colId = pColIndex->colId;
      break;
    }
  }
324

325
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
326 327
    if (colId == pQuery->colList[i].colId) {
      type = pQuery->colList[i].type;
328 329 330
      break;
    }
  }
331

332 333 334 335 336 337
  return type;
}

bool isSelectivityWithTagsQuery(SQuery *pQuery) {
  bool    hasTags = false;
  int32_t numOfSelectivity = 0;
338

339
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
340
    int32_t functId = pQuery->pExpr1[i].base.functionId;
341 342 343 344
    if (functId == TSDB_FUNC_TAG_DUMMY || functId == TSDB_FUNC_TS_DUMMY) {
      hasTags = true;
      continue;
    }
345

346 347 348 349
    if ((aAggs[functId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
      numOfSelectivity++;
    }
  }
350

351 352 353
  if (numOfSelectivity > 0 && hasTags) {
    return true;
  }
354

355 356 357
  return false;
}

358 359
bool isProjQuery(SQuery *pQuery) {
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
360
    int32_t functId = pQuery->pExpr1[i].base.functionId;
361 362 363 364 365 366 367 368
    if (functId != TSDB_FUNC_PRJ && functId != TSDB_FUNC_TAGPRJ) {
      return false;
    }
  }

  return true;
}

H
Haojun Liao 已提交
369
bool isTSCompQuery(SQuery *pQuery) { return pQuery->pExpr1[0].base.functionId == TSDB_FUNC_TS_COMP; }
370

371 372 373
static bool limitResults(SQueryRuntimeEnv* pRuntimeEnv) {
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery* pQuery = pRuntimeEnv->pQuery;
374

375 376
  if ((pQuery->limit.limit > 0) && (pQuery->rec.total + pQuery->rec.rows > pQuery->limit.limit)) {
    pQuery->rec.rows = pQuery->limit.limit - pQuery->rec.total;
377

378
    qDebug("QInfo:%p discard remain data due to result limitation, limit:%"PRId64", current return:%" PRId64 ", total:%"PRId64,
379 380
        pQInfo, pQuery->limit.limit, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
    assert(pQuery->rec.rows >= 0);
381 382 383
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return true;
  }
384

385 386 387 388
  return false;
}

static bool isTopBottomQuery(SQuery *pQuery) {
389
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
390
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
391 392 393
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
394

395 396 397 398
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      return true;
    }
  }
399

400 401 402
  return false;
}

H
Haojun Liao 已提交
403
static bool hasTagValOutput(SQuery* pQuery) {
H
Haojun Liao 已提交
404
  SExprInfo *pExprInfo = &pQuery->pExpr1[0];
H
Haojun Liao 已提交
405 406 407 408
  if (pQuery->numOfOutput == 1 && pExprInfo->base.functionId == TSDB_FUNC_TS_COMP) {
    return true;
  } else {  // set tag value, by which the results are aggregated.
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
H
Haojun Liao 已提交
409
      SExprInfo *pLocalExprInfo = &pQuery->pExpr1[idx];
H
Haojun Liao 已提交
410 411 412 413 414 415 416 417 418 419 420

      // ts_comp column required the tag value for join filter
      if (TSDB_COL_IS_TAG(pLocalExprInfo->base.colInfo.flag)) {
        return true;
      }
    }
  }

  return false;
}

421 422 423 424 425 426 427 428
/**
 * @param pQuery
 * @param col
 * @param pDataBlockInfo
 * @param pStatis
 * @param pColStatis
 * @return
 */
H
Haojun Liao 已提交
429
static bool hasNullValue(SColIndex* pColIndex, SDataStatis *pStatis, SDataStatis **pColStatis) {
H
Haojun Liao 已提交
430
  if (pStatis != NULL && TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
H
Haojun Liao 已提交
431 432
    *pColStatis = &pStatis[pColIndex->colIndex];
    assert((*pColStatis)->colId == pColIndex->colId);
H
hjxilinx 已提交
433 434
  } else {
    *pColStatis = NULL;
435
  }
436

H
Haojun Liao 已提交
437
  if (TSDB_COL_IS_TAG(pColIndex->flag) || TSDB_COL_IS_UD_COL(pColIndex->flag) || pColIndex->colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
H
Haojun Liao 已提交
438 439 440
    return false;
  }

441 442 443
  if ((*pColStatis) != NULL && (*pColStatis)->numOfNull == 0) {
    return false;
  }
444

445 446 447
  return true;
}

H
Haojun Liao 已提交
448
static SResultRow *doPrepareResultRowFromKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, char *pData,
H
Haojun Liao 已提交
449 450
                                             int16_t bytes, bool masterscan, uint64_t uid) {
  SET_RES_WINDOW_KEY(pRuntimeEnv->keyBuf, pData, bytes, uid);
451 452
  int32_t *p1 =
      (int32_t *)taosHashGet(pRuntimeEnv->pResultRowHashTable, pRuntimeEnv->keyBuf, GET_RES_WINDOW_KEY_LEN(bytes));
453 454
  if (p1 != NULL) {
    pWindowResInfo->curIndex = *p1;
455
  } else {
H
Haojun Liao 已提交
456 457 458
    if (!masterscan) {  // not master scan, do not add new timewindow
      return NULL;
    }
459

H
Haojun Liao 已提交
460 461
    // more than the capacity, reallocate the resources
    if (pWindowResInfo->size >= pWindowResInfo->capacity) {
H
Haojun Liao 已提交
462
      int64_t newCapacity = 0;
463
      if (pWindowResInfo->capacity > 10000) {
H
Haojun Liao 已提交
464
        newCapacity = (int64_t)(pWindowResInfo->capacity * 1.25);
465
      } else {
H
Haojun Liao 已提交
466
        newCapacity = (int64_t)(pWindowResInfo->capacity * 1.5);
467 468
      }

469
      char *t = realloc(pWindowResInfo->pResult, (size_t)(newCapacity * POINTER_BYTES));
470 471
      if (t == NULL) {
        longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
472
      }
473

H
Haojun Liao 已提交
474
      pWindowResInfo->pResult = (SResultRow **)t;
475

H
Haojun Liao 已提交
476
      int32_t inc = (int32_t)newCapacity - pWindowResInfo->capacity;
477
      memset(&pWindowResInfo->pResult[pWindowResInfo->capacity], 0, POINTER_BYTES * inc);
478

479 480
      pWindowResInfo->capacity = (int32_t)newCapacity;
    }
481 482 483 484 485 486 487

    SResultRow *pResult = getNewResultRow(pRuntimeEnv->pool);
    pWindowResInfo->pResult[pWindowResInfo->size] = pResult;
    int32_t ret = initResultRow(pResult);
    if (ret != TSDB_CODE_SUCCESS) {
      longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
    }
H
Haojun Liao 已提交
488 489 490

    // add a new result set for a new group
    pWindowResInfo->curIndex = pWindowResInfo->size++;
491 492
    taosHashPut(pRuntimeEnv->pResultRowHashTable, pRuntimeEnv->keyBuf, GET_RES_WINDOW_KEY_LEN(bytes),
                (char *)&pWindowResInfo->curIndex, sizeof(int32_t));
493
  }
494

495 496 497 498 499
  // too many time window in query
  if (pWindowResInfo->size > MAX_INTERVAL_TIME_WINDOW) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_TOO_MANY_TIMEWINDOW);
  }

500
  return getResultRow(pWindowResInfo, pWindowResInfo->curIndex);
501 502 503 504 505
}

// get the correct time window according to the handled timestamp
static STimeWindow getActiveTimeWindow(SWindowResInfo *pWindowResInfo, int64_t ts, SQuery *pQuery) {
  STimeWindow w = {0};
506

507
 if (pWindowResInfo->curIndex == -1) {  // the first window, from the previous stored value
508
    w.skey = pWindowResInfo->prevSKey;
509 510
    if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
      w.ekey = taosTimeAdd(w.skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision);
511
    } else {
512
      w.ekey = w.skey + pQuery->interval.interval - 1;
513
    }
514
  } else {
515
    int32_t slot = curTimeWindowIndex(pWindowResInfo);
516
    SResultRow* pWindowRes = getResultRow(pWindowResInfo, slot);
517
    w = pWindowRes->win;
518
  }
519

520
  if (w.skey > ts || w.ekey < ts) {
521 522 523
    if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
      w.skey = taosTimeTruncate(ts, &pQuery->interval, pQuery->precision);
      w.ekey = taosTimeAdd(w.skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
524 525
    } else {
      int64_t st = w.skey;
526

527
      if (st > ts) {
528
        st -= ((st - ts + pQuery->interval.sliding - 1) / pQuery->interval.sliding) * pQuery->interval.sliding;
529
      }
530

531
      int64_t et = st + pQuery->interval.interval - 1;
532
      if (et < ts) {
533
        st += ((ts - et + pQuery->interval.sliding - 1) / pQuery->interval.sliding) * pQuery->interval.sliding;
534
      }
535

536
      w.skey = st;
537
      w.ekey = w.skey + pQuery->interval.interval - 1;
538
    }
539
  }
540

541 542 543 544 545 546 547
  /*
   * query border check, skey should not be bounded by the query time range, since the value skey will
   * be used as the time window index value. So we only change ekey of time window accordingly.
   */
  if (w.ekey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) {
    w.ekey = pQuery->window.ekey;
  }
548

549 550 551
  return w;
}

H
Haojun Liao 已提交
552
static int32_t addNewWindowResultBuf(SResultRow *pWindowRes, SDiskbasedResultBuf *pResultBuf, int32_t tid,
553
                                     int32_t numOfRowsPerPage) {
554
  if (pWindowRes->pageId != -1) {
555 556
    return 0;
  }
557

558
  tFilePage *pData = NULL;
559

560 561
  // in the first scan, new space needed for results
  int32_t pageId = -1;
H
Haojun Liao 已提交
562
  SIDList list = getDataBufPagesIdList(pResultBuf, tid);
563

H
Haojun Liao 已提交
564
  if (taosArrayGetSize(list) == 0) {
H
Haojun Liao 已提交
565
    pData = getNewDataBuf(pResultBuf, tid, &pageId);
566
  } else {
H
Haojun Liao 已提交
567 568 569
    SPageInfo* pi = getLastPageInfo(list);
    pData = getResBufPage(pResultBuf, pi->pageId);
    pageId = pi->pageId;
570

571
    if (pData->num >= numOfRowsPerPage) {
H
Haojun Liao 已提交
572 573 574
      // release current page first, and prepare the next one
      releaseResBufPageInfo(pResultBuf, pi);

H
Haojun Liao 已提交
575
      pData = getNewDataBuf(pResultBuf, tid, &pageId);
576
      if (pData != NULL) {
577
        assert(pData->num == 0);  // number of elements must be 0 for new allocated buffer
578 579 580
      }
    }
  }
581

582 583 584
  if (pData == NULL) {
    return -1;
  }
585

586
  // set the number of rows in current disk page
587 588 589
  if (pWindowRes->pageId == -1) {  // not allocated yet, allocate new buffer
    pWindowRes->pageId = pageId;
    pWindowRes->rowId = (int32_t)(pData->num++);
H
Haojun Liao 已提交
590

591
    assert(pWindowRes->pageId >= 0);
592
  }
593

594 595 596
  return 0;
}

H
Haojun Liao 已提交
597
static int32_t setWindowOutputBufByKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, SDataBlockInfo* pBockInfo,
598
                                       STimeWindow *win, bool masterscan, bool* newWind) {
599 600
  assert(win->skey <= win->ekey);
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
601

H
Haojun Liao 已提交
602 603
  SResultRow *pResultRow = doPrepareResultRowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&win->skey, TSDB_KEYSIZE, masterscan, pBockInfo->uid);
  if (pResultRow == NULL) {
604 605 606
    *newWind = false;

    return masterscan? -1:0;
607
  }
608

609
  *newWind = true;
H
Haojun Liao 已提交
610

611
  // not assign result buffer yet, add new result buffer
H
Haojun Liao 已提交
612 613
  if (pResultRow->pageId == -1) {
    int32_t ret = addNewWindowResultBuf(pResultRow, pResultBuf, pBockInfo->tid, pRuntimeEnv->numOfRowsPerPage);
H
Haojun Liao 已提交
614
    if (ret != TSDB_CODE_SUCCESS) {
615 616 617
      return -1;
    }
  }
618

619
  // set time window for current result
H
Haojun Liao 已提交
620 621
  pResultRow->win = (*win);
  setWindowResOutputBufInitCtx(pRuntimeEnv, pResultRow);
622 623 624
  return TSDB_CODE_SUCCESS;
}

625
static bool getTimeWindowResStatus(SWindowResInfo *pWindowResInfo, int32_t slot) {
626
  assert(slot >= 0 && slot < pWindowResInfo->size);
627
  return pWindowResInfo->pResult[slot]->closed;
628 629
}

H
Haojun Liao 已提交
630
static FORCE_INLINE int32_t getForwardStepsInBlock(int32_t numOfRows, __block_search_fn_t searchFn, TSKEY ekey, int16_t pos,
631 632
                                      int16_t order, int64_t *pData) {
  int32_t forwardStep = 0;
633

H
Haojun Liao 已提交
634 635 636 637
  if (order == TSDB_ORDER_ASC) {
    int32_t end = searchFn((char*) &pData[pos], numOfRows - pos, ekey, order);
    if (end >= 0) {
      forwardStep = end;
638

H
Haojun Liao 已提交
639 640 641 642 643 644 645 646 647 648 649 650
      if (pData[end + pos] == ekey) {
        forwardStep += 1;
      }
    }
  } else {
    int32_t end = searchFn((char *)pData, pos + 1, ekey, order);
    if (end >= 0) {
      forwardStep = pos - end;

      if (pData[end] == ekey) {
        forwardStep += 1;
      }
651 652
    }
  }
653

H
Haojun Liao 已提交
654
  assert(forwardStep > 0);
655 656 657 658 659 660
  return forwardStep;
}

/**
 * NOTE: the query status only set for the first scan of master scan.
 */
661
static int32_t doCheckQueryCompleted(SQueryRuntimeEnv *pRuntimeEnv, TSKEY lastKey, SWindowResInfo *pWindowResInfo) {
662
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
663
  if (pRuntimeEnv->scanFlag != MASTER_SCAN || (!QUERY_IS_INTERVAL_QUERY(pQuery))) {
664
    return pWindowResInfo->size;
665
  }
666

667
  // no qualified results exist, abort check
668
  int32_t numOfClosed = 0;
669

670
  if (pWindowResInfo->size == 0) {
671
    return pWindowResInfo->size;
672
  }
673

674
  // query completed
H
hjxilinx 已提交
675 676
  if ((lastKey >= pQuery->current->win.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (lastKey <= pQuery->current->win.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
677
    closeAllTimeWindow(pWindowResInfo);
678

679 680 681 682
    pWindowResInfo->curIndex = pWindowResInfo->size - 1;
    setQueryStatus(pQuery, QUERY_COMPLETED | QUERY_RESBUF_FULL);
  } else {  // set the current index to be the last unclosed window
    int32_t i = 0;
683
    int64_t skey = TSKEY_INITIAL_VAL;
684

685
    for (i = 0; i < pWindowResInfo->size; ++i) {
H
Haojun Liao 已提交
686
      SResultRow *pResult = pWindowResInfo->pResult[i];
687
      if (pResult->closed) {
688
        numOfClosed += 1;
689 690
        continue;
      }
691

692
      TSKEY ekey = pResult->win.ekey;
693
      if ((ekey <= lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
694
          (pResult->win.skey >= lastKey && !QUERY_IS_ASC_QUERY(pQuery))) {
695 696
        closeTimeWindow(pWindowResInfo, i);
      } else {
697
        skey = pResult->win.skey;
698 699 700
        break;
      }
    }
701

702
    // all windows are closed, set the last one to be the skey
703
    if (skey == TSKEY_INITIAL_VAL) {
704 705 706 707 708
      assert(i == pWindowResInfo->size);
      pWindowResInfo->curIndex = pWindowResInfo->size - 1;
    } else {
      pWindowResInfo->curIndex = i;
    }
709

710
    pWindowResInfo->prevSKey = pWindowResInfo->pResult[pWindowResInfo->curIndex]->win.skey;
711

712 713
    // the number of completed slots are larger than the threshold, return current generated results to client.
    if (numOfClosed > pWindowResInfo->threshold) {
714
      qDebug("QInfo:%p total result window:%d closed:%d, reached the output threshold %d, return",
715
          GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size, numOfClosed, pQuery->rec.threshold);
716

717
      setQueryStatus(pQuery, QUERY_RESBUF_FULL);
718
    } else {
719
      qDebug("QInfo:%p total result window:%d already closed:%d", GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size,
720
             numOfClosed);
721 722
    }
  }
723

724 725 726 727 728
  // output has reached the limitation, set query completed
  if (pQuery->limit.limit > 0 && (pQuery->limit.limit + pQuery->limit.offset) <= numOfClosed &&
      pRuntimeEnv->scanFlag == MASTER_SCAN) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
729

730
  assert(pWindowResInfo->prevSKey != TSKEY_INITIAL_VAL);
731
  return numOfClosed;
732 733 734
}

static int32_t getNumOfRowsInTimeWindow(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo, TSKEY *pPrimaryColumn,
H
hjxilinx 已提交
735
                                        int32_t startPos, TSKEY ekey, __block_search_fn_t searchFn, bool updateLastKey) {
736
  assert(startPos >= 0 && startPos < pDataBlockInfo->rows);
737

H
Haojun Liao 已提交
738
  int32_t num   = -1;
739
  int32_t order = pQuery->order.order;
H
Haojun Liao 已提交
740
  int32_t step  = GET_FORWARD_DIRECTION_FACTOR(order);
741

H
hjxilinx 已提交
742
  STableQueryInfo* item = pQuery->current;
743

744 745
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (ekey < pDataBlockInfo->window.ekey) {
746
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
H
Haojun Liao 已提交
747 748
      if (updateLastKey) { // update the last key
        item->lastKey = pPrimaryColumn[startPos + (num - 1)] + step;
749 750
      }
    } else {
751
      num = pDataBlockInfo->rows - startPos;
752
      if (updateLastKey) {
H
hjxilinx 已提交
753
        item->lastKey = pDataBlockInfo->window.ekey + step;
754 755 756 757
      }
    }
  } else {  // desc
    if (ekey > pDataBlockInfo->window.skey) {
758
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
H
Haojun Liao 已提交
759 760
      if (updateLastKey) {  // update the last key
        item->lastKey = pPrimaryColumn[startPos - (num - 1)] + step;
761 762 763 764
      }
    } else {
      num = startPos + 1;
      if (updateLastKey) {
H
hjxilinx 已提交
765
        item->lastKey = pDataBlockInfo->window.skey + step;
766 767 768
      }
    }
  }
769

H
Haojun Liao 已提交
770
  assert(num > 0);
771 772 773
  return num;
}

H
Haojun Liao 已提交
774 775
static void doBlockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, bool closed, STimeWindow *pWin, int32_t offset,
                                      int32_t forwardStep, TSKEY *tsCol, int32_t numOfTotal) {
776 777
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
778

H
Haojun Liao 已提交
779 780
  bool hasPrev = pCtx[0].preAggVals.isSet;

781
  if (IS_MASTER_SCAN(pRuntimeEnv) || closed) {
782 783 784 785
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
      pCtx[k].nStartQueryTimestamp = pWin->skey;
      pCtx[k].size = forwardStep;
      pCtx[k].startOffset = (QUERY_IS_ASC_QUERY(pQuery)) ? offset : offset - (forwardStep - 1);
786

H
Haojun Liao 已提交
787
      int32_t functionId = pQuery->pExpr1[k].base.functionId;
788
      if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
H
Haojun Liao 已提交
789
        pCtx[k].ptsList = &tsCol[pCtx[k].startOffset];
790
      }
791

792
      // not a whole block involved in query processing, statistics data can not be used
H
Haojun Liao 已提交
793 794 795 796
      // NOTE: the original value of isSet have been changed here
      if (pCtx[k].preAggVals.isSet && forwardStep < numOfTotal) {
        pCtx[k].preAggVals.isSet = false;
      }
H
Haojun Liao 已提交
797

798 799 800
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunction(&pCtx[k]);
      }
H
Haojun Liao 已提交
801 802 803

      // restore it
      pCtx[k].preAggVals.isSet = hasPrev;
804 805 806 807
    }
  }
}

808
static void doRowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, bool closed, STimeWindow *pWin, int32_t offset) {
809 810
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
811

812
  if (IS_MASTER_SCAN(pRuntimeEnv) || closed) {
813 814
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
      pCtx[k].nStartQueryTimestamp = pWin->skey;
815

H
Haojun Liao 已提交
816
      int32_t functionId = pQuery->pExpr1[k].base.functionId;
817 818 819
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunctionF(&pCtx[k], offset);
      }
820 821 822 823
    }
  }
}

H
Haojun Liao 已提交
824 825
static int32_t getNextQualifiedWindow(SQueryRuntimeEnv *pRuntimeEnv, STimeWindow *pNext, SDataBlockInfo *pDataBlockInfo,
    TSKEY *primaryKeys, __block_search_fn_t searchFn, int32_t prevPosition) {
826
  SQuery *pQuery = pRuntimeEnv->pQuery;
827

H
Haojun Liao 已提交
828
  getNextTimeWindow(pQuery, pNext);
829

H
Haojun Liao 已提交
830
  // next time window is not in current block
H
Haojun Liao 已提交
831 832
  if ((pNext->skey > pDataBlockInfo->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (pNext->ekey < pDataBlockInfo->window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
H
Haojun Liao 已提交
833 834
    return -1;
  }
835

H
Haojun Liao 已提交
836 837
  TSKEY startKey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
838
    startKey = pNext->skey;
H
Haojun Liao 已提交
839 840
    if (startKey < pQuery->window.skey) {
      startKey = pQuery->window.skey;
841
    }
H
Haojun Liao 已提交
842
  } else {
H
Haojun Liao 已提交
843
    startKey = pNext->ekey;
H
Haojun Liao 已提交
844 845
    if (startKey > pQuery->window.skey) {
      startKey = pQuery->window.skey;
846
    }
H
Haojun Liao 已提交
847
  }
848

H
Haojun Liao 已提交
849
  int32_t startPos = 0;
H
Haojun Liao 已提交
850

H
Haojun Liao 已提交
851
  // tumbling time window query, a special case of sliding time window query
852
  if (pQuery->interval.sliding == pQuery->interval.interval && prevPosition != -1) {
H
Haojun Liao 已提交
853 854 855
    int32_t factor = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
    startPos = prevPosition + factor;
  } else {
H
Haojun Liao 已提交
856
    if (startKey <= pDataBlockInfo->window.skey && QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
857
      startPos = 0;
H
Haojun Liao 已提交
858
    } else if (startKey >= pDataBlockInfo->window.ekey && !QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
859 860 861 862
      startPos = pDataBlockInfo->rows - 1;
    } else {
      startPos = searchFn((char *)primaryKeys, pDataBlockInfo->rows, startKey, pQuery->order.order);
    }
H
Haojun Liao 已提交
863
  }
864

H
Haojun Liao 已提交
865 866 867 868
  /*
   * This time window does not cover any data, try next time window,
   * this case may happen when the time window is too small
   */
H
Haojun Liao 已提交
869 870 871
  if (primaryKeys == NULL) {
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      assert(pDataBlockInfo->window.skey <= pNext->ekey);
872
    } else {
H
Haojun Liao 已提交
873
      assert(pDataBlockInfo->window.ekey >= pNext->skey);
874
    }
H
Haojun Liao 已提交
875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893
  } else {
    if (QUERY_IS_ASC_QUERY(pQuery) && primaryKeys[startPos] > pNext->ekey) {
      TSKEY next = primaryKeys[startPos];
      if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
        pNext->skey = taosTimeTruncate(next, &pQuery->interval, pQuery->precision);
        pNext->ekey = taosTimeAdd(pNext->skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
      } else {
        pNext->ekey += ((next - pNext->ekey + pQuery->interval.sliding - 1)/pQuery->interval.sliding) * pQuery->interval.sliding;
        pNext->skey = pNext->ekey - pQuery->interval.interval + 1;
      }
    } else if ((!QUERY_IS_ASC_QUERY(pQuery)) && primaryKeys[startPos] < pNext->skey) {
      TSKEY next = primaryKeys[startPos];
      if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
        pNext->skey = taosTimeTruncate(next, &pQuery->interval, pQuery->precision);
        pNext->ekey = taosTimeAdd(pNext->skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
      } else {
        pNext->skey -= ((pNext->skey - next + pQuery->interval.sliding - 1) / pQuery->interval.sliding) * pQuery->interval.sliding;
        pNext->ekey = pNext->skey + pQuery->interval.interval - 1;
      }
894
    }
895
  }
896

H
Haojun Liao 已提交
897
  return startPos;
898 899
}

H
Haojun Liao 已提交
900
static FORCE_INLINE TSKEY reviseWindowEkey(SQuery *pQuery, STimeWindow *pWindow) {
901 902 903 904 905 906 907 908 909 910 911 912
  TSKEY ekey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    ekey = pWindow->ekey;
    if (ekey > pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  } else {
    ekey = pWindow->skey;
    if (ekey < pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  }
913

914 915 916
  return ekey;
}

H
hjxilinx 已提交
917 918
//todo binary search
static void* getDataBlockImpl(SArray* pDataBlock, int32_t colId) {
S
TD-1057  
Shengliang Guan 已提交
919
  int32_t numOfCols = (int32_t)taosArrayGetSize(pDataBlock);
920

H
hjxilinx 已提交
921 922 923 924 925 926
  for (int32_t i = 0; i < numOfCols; ++i) {
    SColumnInfoData *p = taosArrayGet(pDataBlock, i);
    if (colId == p->info.colId) {
      return p->pData;
    }
  }
927

H
hjxilinx 已提交
928 929 930 931
  return NULL;
}

static char *getDataBlock(SQueryRuntimeEnv *pRuntimeEnv, SArithmeticSupport *sas, int32_t col, int32_t size,
932
                    SArray *pDataBlock) {
dengyihao's avatar
dengyihao 已提交
933 934 935
  if (pDataBlock == NULL) {
    return NULL;
  }
936

H
Haojun Liao 已提交
937
  char *dataBlock = NULL;
H
Haojun Liao 已提交
938
  SQuery *pQuery = pRuntimeEnv->pQuery;
939

H
Haojun Liao 已提交
940
  int32_t functionId = pQuery->pExpr1[col].base.functionId;
941
  if (functionId == TSDB_FUNC_ARITHM) {
H
Haojun Liao 已提交
942
    sas->pArithExpr = &pQuery->pExpr1[col];
943

944 945 946 947
    sas->offset  = 0;
    sas->colList = pQuery->colList;
    sas->numOfCols = pQuery->numOfCols;
    sas->data    = calloc(pQuery->numOfCols, POINTER_BYTES);
948

H
Haojun Liao 已提交
949 950 951 952
    if (sas->data == NULL) {
      longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
    }

953
    // here the pQuery->colList and sas->colList are identical
S
TD-1057  
Shengliang Guan 已提交
954
    int32_t numOfCols = (int32_t)taosArrayGetSize(pDataBlock);
955
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
956
      SColumnInfo *pColMsg = &pQuery->colList[i];
957

958 959 960 961 962 963 964 965
      dataBlock = NULL;
      for (int32_t k = 0; k < numOfCols; ++k) {  //todo refactor
        SColumnInfoData *p = taosArrayGet(pDataBlock, k);
        if (pColMsg->colId == p->info.colId) {
          dataBlock = p->pData;
          break;
        }
      }
966

967
      assert(dataBlock != NULL);
968
      sas->data[i] = dataBlock;  // start from the offset
969
    }
970

971
  } else {  // other type of query function
H
Haojun Liao 已提交
972
    SColIndex *pCol = &pQuery->pExpr1[col].base.colInfo;
H
Haojun Liao 已提交
973
    if (TSDB_COL_IS_NORMAL_COL(pCol->flag)) {
H
Haojun Liao 已提交
974
      SColIndex* pColIndex = &pQuery->pExpr1[col].base.colInfo;
H
Haojun Liao 已提交
975 976 977 978
      SColumnInfoData *p = taosArrayGet(pDataBlock, pColIndex->colIndex);
      assert(p->info.colId == pColIndex->colId);

      dataBlock = p->pData;
H
Haojun Liao 已提交
979 980
    } else {
      dataBlock = NULL;
981 982
    }
  }
983

984 985 986 987
  return dataBlock;
}

/**
H
Haojun Liao 已提交
988
 * todo set the last value for pQueryTableInfo as in rowwiseapplyfunctions
989 990
 * @param pRuntimeEnv
 * @param forwardStep
991
 * @param tsCols
992 993 994 995 996
 * @param pFields
 * @param isDiskFileBlock
 * @return                  the incremental number of output value, so it maybe 0 for fixed number of query,
 *                          such as count/min/max etc.
 */
997
static void blockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis,
998 999
                                       SDataBlockInfo *pDataBlockInfo, SWindowResInfo *pWindowResInfo,
                                       __block_search_fn_t searchFn, SArray *pDataBlock) {
1000
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1001 1002
  bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);

1003 1004
  SQuery *pQuery = pRuntimeEnv->pQuery;
  TSKEY  *tsCols = NULL;
1005
  if (pDataBlock != NULL) {
1006
    SColumnInfoData* pColInfo = taosArrayGet(pDataBlock, 0);
1007
    tsCols = (TSKEY *)(pColInfo->pData);
1008
  }
1009

1010
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
H
Haojun Liao 已提交
1011 1012 1013
  if (sasArray == NULL) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }
1014

H
Haojun Liao 已提交
1015
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
1016
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
1017
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
H
Haojun Liao 已提交
1018
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k, pQInfo->vgId);
1019
  }
1020

1021
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
1022
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
H
Haojun Liao 已提交
1023
    TSKEY ts = TSKEY_INITIAL_VAL;
1024

H
Haojun Liao 已提交
1025 1026 1027 1028 1029 1030 1031 1032
    if (tsCols == NULL) {
      ts = QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->window.skey:pDataBlockInfo->window.ekey;
    } else {
      int32_t offset = GET_COL_DATA_POS(pQuery, 0, step);
      ts = tsCols[offset];
    }

    bool        hasTimeWindow = false;
1033
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
H
Haojun Liao 已提交
1034
    if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo, &win, masterScan, &hasTimeWindow) != TSDB_CODE_SUCCESS) {
S
TD-1848  
Shengliang Guan 已提交
1035
      tfree(sasArray);
H
hjxilinx 已提交
1036
      return;
1037
    }
1038

H
Haojun Liao 已提交
1039 1040 1041
    int32_t forwardStep = 0;
    int32_t startPos = pQuery->pos;

1042
    if (hasTimeWindow) {
H
Haojun Liao 已提交
1043
      TSKEY ekey = reviseWindowEkey(pQuery, &win);
H
Haojun Liao 已提交
1044
      forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, pQuery->pos, ekey, searchFn, true);
1045

1046
      bool pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
H
Haojun Liao 已提交
1047
      doBlockwiseApplyFunctions(pRuntimeEnv, pStatus, &win, startPos, forwardStep, tsCols, pDataBlockInfo->rows);
1048
    }
1049

1050 1051
    int32_t     index = pWindowResInfo->curIndex;
    STimeWindow nextWin = win;
1052

1053
    while (1) {
H
Haojun Liao 已提交
1054 1055
      int32_t prevEndPos = (forwardStep - 1) * step + startPos;
      startPos = getNextQualifiedWindow(pRuntimeEnv, &nextWin, pDataBlockInfo, tsCols, searchFn, prevEndPos);
1056 1057 1058
      if (startPos < 0) {
        break;
      }
1059

1060
      // null data, failed to allocate more memory buffer
H
Haojun Liao 已提交
1061
      hasTimeWindow = false;
H
Haojun Liao 已提交
1062
      if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo, &nextWin, masterScan, &hasTimeWindow) != TSDB_CODE_SUCCESS) {
1063 1064
        break;
      }
1065

1066 1067 1068 1069 1070
      if (!hasTimeWindow) {
        continue;
      }

      TSKEY ekey = reviseWindowEkey(pQuery, &nextWin);
H
Haojun Liao 已提交
1071
      forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, startPos, ekey, searchFn, true);
1072

1073 1074
      bool closed = getTimeWindowResStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
      doBlockwiseApplyFunctions(pRuntimeEnv, closed, &nextWin, startPos, forwardStep, tsCols, pDataBlockInfo->rows);
1075
    }
1076

1077 1078 1079 1080 1081 1082 1083
    pWindowResInfo->curIndex = index;
  } else {
    /*
     * the sqlfunctionCtx parameters should be set done before all functions are invoked,
     * since the selectivity + tag_prj query needs all parameters been set done.
     * tag_prj function are changed to be TSDB_FUNC_TAG_DUMMY
     */
1084
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
Haojun Liao 已提交
1085
      int32_t functionId = pQuery->pExpr1[k].base.functionId;
1086 1087 1088 1089 1090
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunction(&pCtx[k]);
      }
    }
  }
1091

1092
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1093
    if (pQuery->pExpr1[i].base.functionId != TSDB_FUNC_ARITHM) {
1094 1095
      continue;
    }
1096

S
TD-1848  
Shengliang Guan 已提交
1097
    tfree(sasArray[i].data);
1098
  }
1099

S
TD-1848  
Shengliang Guan 已提交
1100
  tfree(sasArray);
1101 1102
}

1103
static int32_t setGroupResultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, char *pData, int16_t type, int16_t bytes, int32_t groupIndex) {
1104 1105 1106
  if (isNull(pData, type)) {  // ignore the null value
    return -1;
  }
1107

1108
  int32_t GROUPRESULTID = 1;
1109

1110
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
1111

1112
  // not assign result buffer yet, add new result buffer
1113 1114 1115 1116 1117 1118 1119
  char* d = pData;
  int16_t len = bytes;
  if (type == TSDB_DATA_TYPE_BINARY||type == TSDB_DATA_TYPE_NCHAR) {
    d = varDataVal(pData);
    len = varDataLen(pData);
  } else if (type == TSDB_DATA_TYPE_FLOAT || type == TSDB_DATA_TYPE_DOUBLE) {
    SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
H
Haojun Liao 已提交
1120
    qError("QInfo:%p group by not supported on double/float columns, abort", pQInfo);
1121 1122 1123 1124

    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_APP_ERROR);
  }

1125
  uint64_t uid = groupIndex; // uid is always set to be 0.
H
Haojun Liao 已提交
1126 1127
  SResultRow *pResultRow = doPrepareResultRowFromKey(pRuntimeEnv, &pRuntimeEnv->windowResInfo, d, len, true, uid);
  if (pResultRow == NULL) {
1128 1129 1130 1131
    return -1;
  }

  int64_t v = -1;
1132 1133 1134 1135 1136 1137 1138 1139
  switch(type) {
    case TSDB_DATA_TYPE_BOOL:
    case TSDB_DATA_TYPE_TINYINT:  v = GET_INT8_VAL(pData);  break;
    case TSDB_DATA_TYPE_SMALLINT: v = GET_INT16_VAL(pData); break;
    case TSDB_DATA_TYPE_INT:      v = GET_INT32_VAL(pData); break;
    case TSDB_DATA_TYPE_BIGINT:   v = GET_INT64_VAL(pData); break;
  }

1140
  if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
H
Haojun Liao 已提交
1141 1142
    pResultRow->key = malloc(varDataTLen(pData));
    varDataCopy(pResultRow->key, pData);
1143
  } else {
H
Haojun Liao 已提交
1144 1145
    pResultRow->win.skey = v;
    pResultRow->win.ekey = v;
1146
  }
1147

H
Haojun Liao 已提交
1148 1149
  if (pResultRow->pageId == -1) {
    int32_t ret = addNewWindowResultBuf(pResultRow, pResultBuf, GROUPRESULTID, pRuntimeEnv->numOfRowsPerPage);
1150 1151 1152 1153
    if (ret != 0) {
      return -1;
    }
  }
1154

H
Haojun Liao 已提交
1155
  setResultOutputBuf(pRuntimeEnv, pResultRow);
1156 1157 1158 1159
  initCtxOutputBuf(pRuntimeEnv);
  return TSDB_CODE_SUCCESS;
}

1160
static char *getGroupbyColumnData(SQuery *pQuery, int16_t *type, int16_t *bytes, SArray* pDataBlock) {
1161
  SSqlGroupbyExpr *pGroupbyExpr = pQuery->pGroupbyExpr;
1162

1163
  for (int32_t k = 0; k < pGroupbyExpr->numOfGroupCols; ++k) {
1164
    SColIndex* pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, k);
Y
TD-1230  
yihaoDeng 已提交
1165
    if (TSDB_COL_IS_TAG(pColIndex->flag)) {
1166 1167
      continue;
    }
1168

1169
    int16_t colIndex = -1;
1170
    int32_t colId = pColIndex->colId;
1171

1172
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
1173
      if (pQuery->colList[i].colId == colId) {
1174 1175 1176 1177
        colIndex = i;
        break;
      }
    }
1178

1179
    assert(colIndex >= 0 && colIndex < pQuery->numOfCols);
1180

1181 1182
    *type = pQuery->colList[colIndex].type;
    *bytes = pQuery->colList[colIndex].bytes;
1183
    /*
1184 1185 1186
     *  the colIndex is acquired from the first tables of all qualified tables in this vnode during query prepare
     * stage, the remain tables may not have the required column in cache actually. So, the validation of required
     * column in cache with the corresponding schema is reinforced.
1187
     */
S
TD-1057  
Shengliang Guan 已提交
1188
    int32_t numOfCols = (int32_t)taosArrayGetSize(pDataBlock);
1189

1190 1191 1192 1193 1194 1195
    for (int32_t i = 0; i < numOfCols; ++i) {
      SColumnInfoData *p = taosArrayGet(pDataBlock, i);
      if (pColIndex->colId == p->info.colId) {
        return p->pData;
      }
    }
1196
  }
1197

1198
  return NULL;
1199 1200 1201 1202
}

static int32_t doTSJoinFilter(SQueryRuntimeEnv *pRuntimeEnv, int32_t offset) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
1203

1204 1205
  STSElem         elem = tsBufGetElem(pRuntimeEnv->pTSBuf);
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1206

1207
  // compare tag first
H
Haojun Liao 已提交
1208
  if (tVariantCompare(&pCtx[0].tag, elem.tag) != 0) {
1209 1210
    return TS_JOIN_TAG_NOT_EQUALS;
  }
1211

S
TD-1057  
Shengliang Guan 已提交
1212
  TSKEY key = *(TSKEY *)((char*)pCtx[0].aInputElemBuf + TSDB_KEYSIZE * offset);
1213 1214

#if defined(_DEBUG_VIEW)
1215
  printf("elem in comp ts file:%" PRId64 ", key:%" PRId64 ", tag:%"PRIu64", query order:%d, ts order:%d, traverse:%d, index:%d\n",
H
Haojun Liao 已提交
1216
         elem.ts, key, elem.tag.i64Key, pQuery->order.order, pRuntimeEnv->pTSBuf->tsOrder,
1217 1218
         pRuntimeEnv->pTSBuf->cur.order, pRuntimeEnv->pTSBuf->cur.tsIndex);
#endif
1219

1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (key < elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key > elem.ts) {
      assert(false);
    }
  } else {
    if (key > elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key < elem.ts) {
      assert(false);
    }
  }
1233

1234 1235 1236 1237
  return TS_JOIN_TS_EQUAL;
}

static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId) {
H
Haojun Liao 已提交
1238
  SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx);
H
hjxilinx 已提交
1239
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
1240 1241 1242 1243 1244

  // in case of timestamp column, always generated results.
  if (functionId == TSDB_FUNC_TS) {
    return true;
  }
1245

1246 1247 1248
  if (pResInfo->complete || functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TS_DUMMY) {
    return false;
  }
1249

1250
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_FIRST) {
H
hjxilinx 已提交
1251 1252
    return QUERY_IS_ASC_QUERY(pQuery);
  }
1253 1254 1255 1256 1257 1258

  // todo add comments
  if ((functionId == TSDB_FUNC_LAST_DST || functionId == TSDB_FUNC_LAST)) {
    return pCtx->param[0].i64Key == pQuery->order.order;
  }

1259
  // in the supplementary scan, only the following functions need to be executed
H
Haojun Liao 已提交
1260
  if (IS_REVERSE_SCAN(pRuntimeEnv)) {
1261 1262
    return false;
  }
1263

1264 1265 1266
  return true;
}

1267 1268
static void rowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis, SDataBlockInfo *pDataBlockInfo,
    SWindowResInfo *pWindowResInfo, SArray *pDataBlock) {
1269
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1270
  bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);
1271

1272
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1273
  STableQueryInfo* item = pQuery->current;
H
Haojun Liao 已提交
1274 1275 1276 1277

  SColumnInfoData* pColumnInfoData = (SColumnInfoData *)taosArrayGet(pDataBlock, 0);

  TSKEY  *tsCols = (pColumnInfoData->info.type == TSDB_DATA_TYPE_TIMESTAMP)? (TSKEY*) pColumnInfoData->pData:NULL;
H
Haojun Liao 已提交
1278 1279
  bool    groupbyColumnValue = pRuntimeEnv->groupbyNormalCol;

1280
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
H
Haojun Liao 已提交
1281 1282 1283
  if (sasArray == NULL) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }
1284

1285 1286
  int16_t type = 0;
  int16_t bytes = 0;
1287

1288
  char *groupbyColumnData = NULL;
H
Haojun Liao 已提交
1289
  if (groupbyColumnValue) {
1290
    groupbyColumnData = getGroupbyColumnData(pQuery, &type, &bytes, pDataBlock);
1291
  }
1292

H
Haojun Liao 已提交
1293
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
1294
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
1295
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
H
Haojun Liao 已提交
1296
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k, pQInfo->vgId);
1297
  }
1298

1299 1300
  // set the input column data
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
1301
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
H
hjxilinx 已提交
1302 1303
    pFilterInfo->pData = getDataBlockImpl(pDataBlock, pFilterInfo->info.colId);
    assert(pFilterInfo->pData != NULL);
1304
  }
1305

1306
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1307

1308 1309 1310
  // from top to bottom in desc
  // from bottom to top in asc order
  if (pRuntimeEnv->pTSBuf != NULL) {
1311
    qDebug("QInfo:%p process data rows, numOfRows:%d, query order:%d, ts comp order:%d", pQInfo, pDataBlockInfo->rows,
1312 1313
           pQuery->order.order, pRuntimeEnv->pTSBuf->cur.order);
  }
1314

1315
  int32_t j = 0;
H
hjxilinx 已提交
1316
  int32_t offset = -1;
1317

1318
  for (j = 0; j < pDataBlockInfo->rows; ++j) {
H
hjxilinx 已提交
1319
    offset = GET_COL_DATA_POS(pQuery, j, step);
1320

1321 1322 1323 1324 1325 1326 1327 1328 1329 1330
    if (pRuntimeEnv->pTSBuf != NULL) {
      int32_t r = doTSJoinFilter(pRuntimeEnv, offset);
      if (r == TS_JOIN_TAG_NOT_EQUALS) {
        break;
      } else if (r == TS_JOIN_TS_NOT_EQUALS) {
        continue;
      } else {
        assert(r == TS_JOIN_TS_EQUAL);
      }
    }
1331

1332
    if (pQuery->numOfFilterCols > 0 && (!doFilterData(pQuery, offset))) {
1333 1334
      continue;
    }
1335

1336
    // interval window query, decide the time window according to the primary timestamp
H
Haojun Liao 已提交
1337
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1338
      int64_t     ts = tsCols[offset];
1339
      STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
1340

1341
      bool hasTimeWindow = false;
H
Haojun Liao 已提交
1342
      int32_t ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo, &win, masterScan, &hasTimeWindow);
1343 1344 1345
      if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
        continue;
      }
1346

1347 1348 1349 1350
      if (!hasTimeWindow) {
        continue;
      }

1351 1352
      bool closed = getTimeWindowResStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
      doRowwiseApplyFunctions(pRuntimeEnv, closed, &win, offset);
1353

1354 1355
      STimeWindow nextWin = win;
      int32_t     index = pWindowResInfo->curIndex;
1356

1357
      while (1) {
H
Haojun Liao 已提交
1358
        getNextTimeWindow(pQuery, &nextWin);
1359
        if ((nextWin.skey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
H
Haojun Liao 已提交
1360
            (nextWin.ekey < pQuery->window.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
1361 1362
          break;
        }
1363

1364 1365 1366
        if (ts < nextWin.skey || ts > nextWin.ekey) {
          break;
        }
1367

1368
        // null data, failed to allocate more memory buffer
H
Haojun Liao 已提交
1369
        hasTimeWindow = false;
H
Haojun Liao 已提交
1370
        if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo, &nextWin, masterScan, &hasTimeWindow) != TSDB_CODE_SUCCESS) {
1371 1372
          break;
        }
1373

1374
        if (hasTimeWindow) {
1375 1376
          closed = getTimeWindowResStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
          doRowwiseApplyFunctions(pRuntimeEnv, closed, &nextWin, offset);
1377
        }
1378
      }
1379

1380 1381 1382
      pWindowResInfo->curIndex = index;
    } else {  // other queries
      // decide which group this rows belongs to according to current state value
H
Haojun Liao 已提交
1383
      if (groupbyColumnValue) {
H
hjxilinx 已提交
1384
        char *val = groupbyColumnData + bytes * offset;
1385

1386
        int32_t ret = setGroupResultOutputBuf(pRuntimeEnv, val, type, bytes, item->groupIndex);
1387 1388 1389 1390
        if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
          continue;
        }
      }
1391

1392
      for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
Haojun Liao 已提交
1393
        int32_t functionId = pQuery->pExpr1[k].base.functionId;
1394 1395 1396 1397 1398
        if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
          aAggs[functionId].xFunctionF(&pCtx[k], offset);
        }
      }
    }
1399

1400 1401 1402
    if (pRuntimeEnv->pTSBuf != NULL) {
      // if timestamp filter list is empty, quit current query
      if (!tsBufNextPos(pRuntimeEnv->pTSBuf)) {
H
hjxilinx 已提交
1403
        setQueryStatus(pQuery, QUERY_COMPLETED);
1404 1405 1406 1407
        break;
      }
    }
  }
H
Haojun Liao 已提交
1408 1409 1410 1411 1412 1413 1414 1415

  assert(offset >= 0);
  if (tsCols != NULL) {
    item->lastKey = tsCols[offset] + step;
  } else {
    item->lastKey = (QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->window.ekey:pDataBlockInfo->window.skey) + step;
  }

H
Haojun Liao 已提交
1416 1417 1418
  if (pRuntimeEnv->pTSBuf != NULL) {
    item->cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);
  }
H
Haojun Liao 已提交
1419

1420 1421
  // todo refactor: extract method
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1422
    if (pQuery->pExpr1[i].base.functionId != TSDB_FUNC_ARITHM) {
1423 1424
      continue;
    }
1425

S
TD-1848  
Shengliang Guan 已提交
1426
    tfree(sasArray[i].data);
1427
  }
1428

1429 1430 1431 1432
  free(sasArray);
}

static int32_t tableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo,
H
hjxilinx 已提交
1433
                                          SDataStatis *pStatis, __block_search_fn_t searchFn, SArray *pDataBlock) {
H
hjxilinx 已提交
1434
  SQuery *pQuery = pRuntimeEnv->pQuery;
1435

H
hjxilinx 已提交
1436 1437
  STableQueryInfo* pTableQInfo = pQuery->current;
  SWindowResInfo*  pWindowResInfo = &pRuntimeEnv->windowResInfo;
1438

H
Haojun Liao 已提交
1439
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->groupbyNormalCol) {
1440
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
1441
  } else {
1442
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
1443
  }
1444

1445
  // update the lastkey of current table
1446
  TSKEY lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pDataBlockInfo->window.ekey : pDataBlockInfo->window.skey;
H
hjxilinx 已提交
1447
  pTableQInfo->lastKey = lastKey + GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1448

1449
  // interval query with limit applied
1450
  int32_t numOfRes = 0;
H
Haojun Liao 已提交
1451
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1452 1453
    numOfRes = doCheckQueryCompleted(pRuntimeEnv, lastKey, pWindowResInfo);
  } else {
S
TD-1057  
Shengliang Guan 已提交
1454
    numOfRes = (int32_t)getNumOfResult(pRuntimeEnv);
1455

1456 1457 1458 1459
    // update the number of output result
    if (numOfRes > 0 && pQuery->checkBuffer == 1) {
      assert(numOfRes >= pQuery->rec.rows);
      pQuery->rec.rows = numOfRes;
1460

1461 1462 1463
      if (numOfRes >= pQuery->rec.threshold) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
      }
1464

1465 1466 1467
      if ((pQuery->limit.limit >= 0) && (pQuery->limit.limit + pQuery->limit.offset) <= numOfRes) {
        setQueryStatus(pQuery, QUERY_COMPLETED);
      }
1468 1469 1470 1471 1472

      if (((pTableQInfo->lastKey > pTableQInfo->win.ekey) && QUERY_IS_ASC_QUERY(pQuery)) ||
          ((pTableQInfo->lastKey < pTableQInfo->win.ekey) && (!QUERY_IS_ASC_QUERY(pQuery)))) {
        setQueryStatus(pQuery, QUERY_COMPLETED);
      }
H
Haojun Liao 已提交
1473
    }
1474
  }
1475

1476
  return numOfRes;
1477 1478
}

H
Haojun Liao 已提交
1479
void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
H
Haojun Liao 已提交
1480
                   SDataStatis *pStatis, void *param, int32_t colIndex, int32_t vgId) {
1481

H
Haojun Liao 已提交
1482 1483
  int32_t functionId = pQuery->pExpr1[colIndex].base.functionId;
  int32_t colId = pQuery->pExpr1[colIndex].base.colInfo.colId;
1484

1485
  SDataStatis *tpField = NULL;
H
Haojun Liao 已提交
1486
  pCtx->hasNull = hasNullValue(&pQuery->pExpr1[colIndex].base.colInfo, pStatis, &tpField);
1487
  pCtx->aInputElemBuf = inputData;
1488

1489
  if (tpField != NULL) {
H
Haojun Liao 已提交
1490
    pCtx->preAggVals.isSet  = true;
1491 1492
    pCtx->preAggVals.statis = *tpField;
    assert(pCtx->preAggVals.statis.numOfNull <= pBlockInfo->rows);
1493 1494 1495
  } else {
    pCtx->preAggVals.isSet = false;
  }
1496

H
Haojun Liao 已提交
1497 1498
  pCtx->preAggVals.dataBlockLoaded = (inputData != NULL);

H
Haojun Liao 已提交
1499 1500
  // limit/offset query will affect this value
  pCtx->size = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->rows - pQuery->pos : pQuery->pos + 1;
1501

H
Haojun Liao 已提交
1502
  // minimum value no matter ascending/descending order query
H
Haojun Liao 已提交
1503 1504
  pCtx->startOffset = QUERY_IS_ASC_QUERY(pQuery) ? pQuery->pos: (pQuery->pos - pCtx->size + 1);
  assert(pCtx->startOffset >= 0);
H
Haojun Liao 已提交
1505

1506 1507
  uint32_t status = aAggs[functionId].nStatus;
  if (((status & (TSDB_FUNCSTATE_SELECTIVITY | TSDB_FUNCSTATE_NEED_TS)) != 0) && (tsCol != NULL)) {
H
Haojun Liao 已提交
1508
    pCtx->ptsList = &tsCol[pCtx->startOffset];
1509
  }
1510

1511 1512 1513 1514 1515
  if (functionId >= TSDB_FUNC_FIRST_DST && functionId <= TSDB_FUNC_LAST_DST) {
    // last_dist or first_dist function
    // store the first&last timestamp into the intermediate buffer [1], the true
    // value may be null but timestamp will never be null
  } else if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_TWA ||
1516
             functionId == TSDB_FUNC_DIFF || (functionId >= TSDB_FUNC_RATE && functionId <= TSDB_FUNC_AVG_IRATE)) {
1517
    /*
H
Haojun Liao 已提交
1518
     * least squares function needs two columns of input, currently, the x value of linear equation is set to
H
Haojun Liao 已提交
1519
     * timestamp column, and the y-value is the column specified in pQuery->pExpr1[i].colIdxInBuffer
1520 1521 1522 1523 1524
     *
     * top/bottom function needs timestamp to indicate when the
     * top/bottom values emerge, so does diff function
     */
    if (functionId == TSDB_FUNC_TWA) {
H
Haojun Liao 已提交
1525 1526
      SResultRowCellInfo* pInfo = GET_RES_INFO(pCtx);
      STwaInfo *pTWAInfo = (STwaInfo*) GET_ROWCELL_INTERBUF(pInfo);
1527 1528 1529
      pTWAInfo->SKey = pQuery->window.skey;
      pTWAInfo->EKey = pQuery->window.ekey;
    }
1530

1531 1532
  } else if (functionId == TSDB_FUNC_ARITHM) {
    pCtx->param[1].pz = param;
H
Haojun Liao 已提交
1533 1534 1535 1536 1537 1538
  } else if (functionId == TSDB_FUNC_SPREAD) {  // set the statistics data for primary time stamp column
    if (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
      pCtx->preAggVals.isSet  = true;
      pCtx->preAggVals.statis.min = pBlockInfo->window.skey;
      pCtx->preAggVals.statis.max = pBlockInfo->window.ekey;
    }
1539
  } else if (functionId == TSDB_FUNC_INTERP) {
H
Haojun Liao 已提交
1540 1541 1542
    SResultRowCellInfo* pInfo = GET_RES_INFO(pCtx);

    SInterpInfoDetail *pInterpInfo = (SInterpInfoDetail *)GET_ROWCELL_INTERBUF(pInfo);
S
TD-1057  
Shengliang Guan 已提交
1543
    pInterpInfo->type = (int8_t)pQuery->fillType;
1544 1545
    pInterpInfo->ts = pQuery->window.skey;
    pInterpInfo->primaryCol = (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX);
1546

1547 1548 1549 1550
    if (pQuery->fillVal != NULL) {
      if (isNull((const char*) &pQuery->fillVal[colIndex], pCtx->inputType)) {
        pCtx->param[1].nType = TSDB_DATA_TYPE_NULL;
      } else { // todo refactor, tVariantCreateFromBinary should handle the NULL value
H
Haojun Liao 已提交
1551 1552 1553
        if (pCtx->inputType != TSDB_DATA_TYPE_BINARY && pCtx->inputType != TSDB_DATA_TYPE_NCHAR) {
          tVariantCreateFromBinary(&pCtx->param[1], (char*) &pQuery->fillVal[colIndex], pCtx->inputBytes, pCtx->inputType);
        }
1554 1555
      }
    }
H
Haojun Liao 已提交
1556 1557 1558
  } else if (functionId == TSDB_FUNC_TS_COMP) {
    pCtx->param[0].i64Key = vgId;
    pCtx->param[0].nType = TSDB_DATA_TYPE_BIGINT;
1559
  }
1560

1561 1562 1563 1564 1565 1566
#if defined(_DEBUG_VIEW)
  //  int64_t *tsList = (int64_t *)primaryColumnData;
//  int64_t  s = tsList[0];
//  int64_t  e = tsList[size - 1];

//    if (IS_DATA_BLOCK_LOADED(blockStatus)) {
1567
//        qDebug("QInfo:%p query ts:%lld-%lld, offset:%d, rows:%d, bstatus:%d,
1568 1569 1570
//        functId:%d", GET_QINFO_ADDR(pQuery),
//               s, e, startOffset, size, blockStatus, functionId);
//    } else {
1571
//        qDebug("QInfo:%p block not loaded, bstatus:%d",
1572 1573 1574 1575 1576 1577
//        GET_QINFO_ADDR(pQuery), blockStatus);
//    }
#endif
}

// set the output buffer for the selectivity + tag query
H
Haojun Liao 已提交
1578
static int32_t setCtxTagColumnInfo(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx) {
H
Haojun Liao 已提交
1579 1580
  SQuery* pQuery = pRuntimeEnv->pQuery;

1581
  if (isSelectivityWithTagsQuery(pQuery)) {
1582
    int32_t num = 0;
1583
    int16_t tagLen = 0;
1584

1585
    SQLFunctionCtx *p = NULL;
1586
    SQLFunctionCtx **pTagCtx = calloc(pQuery->numOfOutput, POINTER_BYTES);
H
Haojun Liao 已提交
1587 1588 1589
    if (pTagCtx == NULL) {
      return TSDB_CODE_QRY_OUT_OF_MEMORY;
    }
H
Haojun Liao 已提交
1590

1591
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1592
      SSqlFuncMsg *pSqlFuncMsg = &pQuery->pExpr1[i].base;
1593

1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606
      if (pSqlFuncMsg->functionId == TSDB_FUNC_TAG_DUMMY || pSqlFuncMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        tagLen += pCtx[i].outputBytes;
        pTagCtx[num++] = &pCtx[i];
      } else if ((aAggs[pSqlFuncMsg->functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
        p = &pCtx[i];
      } else if (pSqlFuncMsg->functionId == TSDB_FUNC_TS || pSqlFuncMsg->functionId == TSDB_FUNC_TAG) {
        // tag function may be the group by tag column
        // ts may be the required primary timestamp column
        continue;
      } else {
        // the column may be the normal column, group by normal_column, the functionId is TSDB_FUNC_PRJ
      }
    }
dengyihao's avatar
dengyihao 已提交
1607 1608 1609 1610 1611
    if (p != NULL) {
      p->tagInfo.pTagCtxList = pTagCtx;
      p->tagInfo.numOfTagCols = num;
      p->tagInfo.tagsLen = tagLen;
    } else {
S
TD-1848  
Shengliang Guan 已提交
1612
      tfree(pTagCtx);
dengyihao's avatar
dengyihao 已提交
1613
    }
1614
  }
H
Haojun Liao 已提交
1615 1616

  return TSDB_CODE_SUCCESS;
1617 1618
}

1619
static int32_t setupQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv, int16_t order) {
1620
  qDebug("QInfo:%p setup runtime env", GET_QINFO_ADDR(pRuntimeEnv));
1621 1622
  SQuery *pQuery = pRuntimeEnv->pQuery;

1623
  pRuntimeEnv->pCtx = (SQLFunctionCtx *)calloc(pQuery->numOfOutput, sizeof(SQLFunctionCtx));
H
Haojun Liao 已提交
1624
  pRuntimeEnv->rowCellInfoOffset = calloc(pQuery->numOfOutput, sizeof(int32_t));
H
Haojun Liao 已提交
1625
  pRuntimeEnv->pResultRow = getNewResultRow(pRuntimeEnv->pool);
1626

1627
  if (pRuntimeEnv->pResultRow == NULL || pRuntimeEnv->pCtx == NULL || pRuntimeEnv->rowCellInfoOffset == NULL) {
1628
    goto _clean;
1629
  }
1630

1631
  pRuntimeEnv->offset[0] = 0;
1632
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1633
    SSqlFuncMsg *pSqlFuncMsg = &pQuery->pExpr1[i].base;
1634

1635
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1636
    SColIndex* pIndex = &pSqlFuncMsg->colInfo;
1637

Y
TD-1230  
yihaoDeng 已提交
1638
    if (TSDB_COL_REQ_NULL(pIndex->flag)) {
1639 1640
      pCtx->requireNull = true;
      pIndex->flag &= ~(TSDB_COL_NULL);
Y
TD-1230  
yihaoDeng 已提交
1641
    } else {
1642 1643
      pCtx->requireNull = false;
    }
Y
TD-1230  
yihaoDeng 已提交
1644

1645 1646
    int32_t index = pSqlFuncMsg->colInfo.colIndex;
    if (TSDB_COL_IS_TAG(pIndex->flag)) {
1647
      if (pIndex->colId == TSDB_TBNAME_COLUMN_INDEX) {  // todo refactor
H
Haojun Liao 已提交
1648 1649 1650 1651
        SSchema s = tGetTableNameColumnSchema();

        pCtx->inputBytes = s.bytes;
        pCtx->inputType = s.type;
1652 1653 1654 1655
      } else {
        pCtx->inputBytes = pQuery->tagColList[index].bytes;
        pCtx->inputType = pQuery->tagColList[index].type;
      }
H
Haojun Liao 已提交
1656 1657 1658
    } else if (TSDB_COL_IS_UD_COL(pIndex->flag)) {
      pCtx->inputBytes = pSqlFuncMsg->arg[0].argBytes;
      pCtx->inputType = pSqlFuncMsg->arg[0].argType;
1659 1660 1661 1662
    } else {
      pCtx->inputBytes = pQuery->colList[index].bytes;
      pCtx->inputType = pQuery->colList[index].type;
    }
1663

1664
    assert(isValidDataType(pCtx->inputType));
1665
    pCtx->ptsOutputBuf = NULL;
1666

H
Haojun Liao 已提交
1667 1668
    pCtx->outputBytes = pQuery->pExpr1[i].bytes;
    pCtx->outputType = pQuery->pExpr1[i].type;
1669

1670 1671
    pCtx->order = pQuery->order.order;
    pCtx->functionId = pSqlFuncMsg->functionId;
H
Haojun Liao 已提交
1672
    pCtx->stableQuery = pRuntimeEnv->stableQuery;
H
Haojun Liao 已提交
1673
    pCtx->interBufBytes = pQuery->pExpr1[i].interBytes;
1674

1675 1676 1677 1678 1679
    pCtx->numOfParams = pSqlFuncMsg->numOfParams;
    for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
      int16_t type = pSqlFuncMsg->arg[j].argType;
      int16_t bytes = pSqlFuncMsg->arg[j].argBytes;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
1680
        tVariantCreateFromBinary(&pCtx->param[j], pSqlFuncMsg->arg[j].argValue.pz, bytes, type);
1681 1682 1683 1684
      } else {
        tVariantCreateFromBinary(&pCtx->param[j], (char *)&pSqlFuncMsg->arg[j].argValue.i64, bytes, type);
      }
    }
1685

1686 1687
    // set the order information for top/bottom query
    int32_t functionId = pCtx->functionId;
1688

1689
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
H
Haojun Liao 已提交
1690
      int32_t f = pQuery->pExpr1[0].base.functionId;
1691
      assert(f == TSDB_FUNC_TS || f == TSDB_FUNC_TS_DUMMY);
1692

1693 1694 1695 1696
      pCtx->param[2].i64Key = order;
      pCtx->param[2].nType = TSDB_DATA_TYPE_BIGINT;
      pCtx->param[3].i64Key = functionId;
      pCtx->param[3].nType = TSDB_DATA_TYPE_BIGINT;
1697

1698 1699
      pCtx->param[1].i64Key = pQuery->order.orderColId;
    }
1700

1701 1702
    if (i > 0) {
      pRuntimeEnv->offset[i] = pRuntimeEnv->offset[i - 1] + pRuntimeEnv->pCtx[i - 1].outputBytes;
H
Haojun Liao 已提交
1703
      pRuntimeEnv->rowCellInfoOffset[i] = pRuntimeEnv->rowCellInfoOffset[i - 1] + sizeof(SResultRowCellInfo) + pQuery->pExpr1[i - 1].interBytes;
1704
    }
H
Haojun Liao 已提交
1705

1706
  }
1707

1708
  // if it is group by normal column, do not set output buffer, the output buffer is pResult
1709 1710
  // fixed output query/multi-output query for normal table
  if (!pRuntimeEnv->groupbyNormalCol && !pRuntimeEnv->stableQuery && !QUERY_IS_INTERVAL_QUERY(pRuntimeEnv->pQuery)) {
1711 1712
    resetCtxOutputBuf(pRuntimeEnv);
  }
1713

H
Haojun Liao 已提交
1714 1715 1716
  if (setCtxTagColumnInfo(pRuntimeEnv, pRuntimeEnv->pCtx) != TSDB_CODE_SUCCESS) {
    goto _clean;
  }
1717

H
Haojun Liao 已提交
1718
  qDebug("QInfo:%p init runtime completed", GET_QINFO_ADDR(pRuntimeEnv));
1719
  return TSDB_CODE_SUCCESS;
1720

1721
_clean:
S
TD-1848  
Shengliang Guan 已提交
1722
  tfree(pRuntimeEnv->pCtx);
1723

1724
  return TSDB_CODE_QRY_OUT_OF_MEMORY;
1725 1726
}

H
Haojun Liao 已提交
1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739
static void doFreeQueryHandle(SQInfo* pQInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;

  tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
  tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);

  pRuntimeEnv->pQueryHandle = NULL;
  pRuntimeEnv->pSecQueryHandle = NULL;

  SMemRef* pMemRef = &pQInfo->memRef;
  assert(pMemRef->ref == 0 && pMemRef->imem == NULL && pMemRef->mem == NULL);
}

1740 1741 1742 1743
static void teardownQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv) {
  if (pRuntimeEnv->pQuery == NULL) {
    return;
  }
1744

1745
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1746
  SQInfo* pQInfo = (SQInfo*) GET_QINFO_ADDR(pRuntimeEnv);
1747

1748
  qDebug("QInfo:%p teardown runtime env", pQInfo);
H
Haojun Liao 已提交
1749
  cleanupTimeWindowInfo(&pRuntimeEnv->windowResInfo);
1750

1751
  if (pRuntimeEnv->pCtx != NULL) {
1752
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1753
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1754

1755 1756 1757
      for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
        tVariantDestroy(&pCtx->param[j]);
      }
1758

1759
      tVariantDestroy(&pCtx->tag);
S
TD-1848  
Shengliang Guan 已提交
1760
      tfree(pCtx->tagInfo.pTagCtxList);
1761
    }
1762

S
TD-1848  
Shengliang Guan 已提交
1763
    tfree(pRuntimeEnv->pCtx);
1764
  }
1765

1766
  pRuntimeEnv->pFillInfo = taosDestroyFillInfo(pRuntimeEnv->pFillInfo);
1767

H
Haojun Liao 已提交
1768
  destroyResultBuf(pRuntimeEnv->pResultBuf);
H
Haojun Liao 已提交
1769
  doFreeQueryHandle(pQInfo);
1770

H
Haojun Liao 已提交
1771
  pRuntimeEnv->pTSBuf = tsBufDestroy(pRuntimeEnv->pTSBuf);
S
TD-1848  
Shengliang Guan 已提交
1772 1773
  tfree(pRuntimeEnv->keyBuf);
  tfree(pRuntimeEnv->rowCellInfoOffset);
H
Haojun Liao 已提交
1774

H
Haojun Liao 已提交
1775 1776
  taosHashCleanup(pRuntimeEnv->pResultRowHashTable);
  pRuntimeEnv->pResultRowHashTable = NULL;
1777

H
Haojun Liao 已提交
1778
  pRuntimeEnv->pool = destroyResultRowPool(pRuntimeEnv->pool);
1779 1780
}

H
Haojun Liao 已提交
1781
#define IS_QUERY_KILLED(_q) ((_q)->code == TSDB_CODE_TSC_QUERY_CANCELLED)
1782

H
Haojun Liao 已提交
1783
static void setQueryKilled(SQInfo *pQInfo) { pQInfo->code = TSDB_CODE_TSC_QUERY_CANCELLED;}
H
hjxilinx 已提交
1784

H
Haojun Liao 已提交
1785 1786 1787
static bool isFixedOutputQuery(SQueryRuntimeEnv* pRuntimeEnv) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1788 1789
    return false;
  }
1790

1791
  // Note:top/bottom query is fixed output query
H
Haojun Liao 已提交
1792
  if (pRuntimeEnv->topBotQuery || pRuntimeEnv->groupbyNormalCol) {
1793 1794
    return true;
  }
1795

1796
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1797
    SSqlFuncMsg *pExprMsg = &pQuery->pExpr1[i].base;
1798

1799 1800
    // ignore the ts_comp function
    if (i == 0 && pExprMsg->functionId == TSDB_FUNC_PRJ && pExprMsg->numOfParams == 1 &&
1801
        pExprMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
1802 1803
      continue;
    }
1804

1805 1806 1807
    if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
1808

1809 1810 1811 1812
    if (!IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus)) {
      return true;
    }
  }
1813

1814 1815 1816
  return false;
}

1817
// todo refactor with isLastRowQuery
H
hjxilinx 已提交
1818
static bool isPointInterpoQuery(SQuery *pQuery) {
1819
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1820
    int32_t functionID = pQuery->pExpr1[i].base.functionId;
1821
    if (functionID == TSDB_FUNC_INTERP) {
1822 1823 1824
      return true;
    }
  }
1825

1826 1827 1828 1829
  return false;
}

// TODO REFACTOR:MERGE WITH CLIENT-SIDE FUNCTION
H
hjxilinx 已提交
1830
static bool isSumAvgRateQuery(SQuery *pQuery) {
1831
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1832
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
1833 1834 1835
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
1836

1837 1838 1839 1840 1841
    if (functionId == TSDB_FUNC_SUM_RATE || functionId == TSDB_FUNC_SUM_IRATE || functionId == TSDB_FUNC_AVG_RATE ||
        functionId == TSDB_FUNC_AVG_IRATE) {
      return true;
    }
  }
1842

1843 1844 1845
  return false;
}

H
hjxilinx 已提交
1846
static bool isFirstLastRowQuery(SQuery *pQuery) {
1847
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1848
    int32_t functionID = pQuery->pExpr1[i].base.functionId;
1849 1850 1851 1852
    if (functionID == TSDB_FUNC_LAST_ROW) {
      return true;
    }
  }
1853

1854 1855 1856
  return false;
}

H
hjxilinx 已提交
1857
static bool needReverseScan(SQuery *pQuery) {
1858
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1859
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
1860 1861 1862
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG) {
      continue;
    }
1863

1864
    if ((functionId == TSDB_FUNC_FIRST || functionId == TSDB_FUNC_FIRST_DST) && !QUERY_IS_ASC_QUERY(pQuery)) {
1865 1866
      return true;
    }
1867 1868

    if (functionId == TSDB_FUNC_LAST || functionId == TSDB_FUNC_LAST_DST) {
H
Haojun Liao 已提交
1869
      // the scan order to acquire the last result of the specified column
H
Haojun Liao 已提交
1870
      int32_t order = (int32_t)pQuery->pExpr1[i].base.arg->argValue.i64;
H
Haojun Liao 已提交
1871 1872 1873
      if (order != pQuery->order.order) {
        return true;
      }
1874
    }
1875
  }
1876

1877 1878
  return false;
}
H
hjxilinx 已提交
1879

H
Haojun Liao 已提交
1880 1881 1882 1883
/**
 * The following 4 kinds of query are treated as the tags query
 * tagprj, tid_tag query, count(tbname), 'abc' (user defined constant value column) query
 */
H
hjxilinx 已提交
1884 1885
static bool onlyQueryTags(SQuery* pQuery) {
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1886
    SExprInfo* pExprInfo = &pQuery->pExpr1[i];
H
Haojun Liao 已提交
1887 1888

    int32_t functionId = pExprInfo->base.functionId;
H
Haojun Liao 已提交
1889 1890 1891 1892

    if (functionId != TSDB_FUNC_TAGPRJ &&
        functionId != TSDB_FUNC_TID_TAG &&
        (!(functionId == TSDB_FUNC_COUNT && pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX)) &&
Y
TD-1230  
yihaoDeng 已提交
1893
        (!(functionId == TSDB_FUNC_PRJ && TSDB_COL_IS_UD_COL(pExprInfo->base.colInfo.flag)))) {
H
hjxilinx 已提交
1894 1895 1896
      return false;
    }
  }
1897

H
hjxilinx 已提交
1898 1899 1900
  return true;
}

1901 1902
/////////////////////////////////////////////////////////////////////////////////////////////

H
Haojun Liao 已提交
1903
void getAlignQueryTimeWindow(SQuery *pQuery, int64_t key, int64_t keyFirst, int64_t keyLast, STimeWindow *win) {
1904 1905
  assert(key >= keyFirst && key <= keyLast && pQuery->interval.sliding <= pQuery->interval.interval);
  win->skey = taosTimeTruncate(key, &pQuery->interval, pQuery->precision);
H
Haojun Liao 已提交
1906 1907

  /*
1908
   * if the realSkey > INT64_MAX - pQuery->interval.interval, the query duration between
H
Haojun Liao 已提交
1909 1910
   * realSkey and realEkey must be less than one interval.Therefore, no need to adjust the query ranges.
   */
1911 1912
  if (keyFirst > (INT64_MAX - pQuery->interval.interval)) {
    assert(keyLast - keyFirst < pQuery->interval.interval);
1913
    win->ekey = INT64_MAX;
1914 1915
  } else if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
    win->ekey = taosTimeAdd(win->skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
H
Haojun Liao 已提交
1916
  } else {
1917
    win->ekey = win->skey + pQuery->interval.interval - 1;
1918 1919 1920 1921 1922
  }
}

static void setScanLimitationByResultBuffer(SQuery *pQuery) {
  if (isTopBottomQuery(pQuery)) {
1923
    pQuery->checkBuffer = 0;
1924
  } else if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
1925
    pQuery->checkBuffer = 0;
1926 1927
  } else {
    bool hasMultioutput = false;
1928
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1929
      SSqlFuncMsg *pExprMsg = &pQuery->pExpr1[i].base;
1930 1931 1932
      if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        continue;
      }
1933

1934 1935 1936 1937 1938
      hasMultioutput = IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus);
      if (!hasMultioutput) {
        break;
      }
    }
1939

1940
    pQuery->checkBuffer = hasMultioutput ? 1 : 0;
1941 1942 1943 1944 1945 1946
  }
}

/*
 * todo add more parameters to check soon..
 */
1947
bool colIdCheck(SQuery *pQuery) {
1948 1949
  // load data column information is incorrect
  for (int32_t i = 0; i < pQuery->numOfCols - 1; ++i) {
1950
    if (pQuery->colList[i].colId == pQuery->colList[i + 1].colId) {
S
slguan 已提交
1951
      qError("QInfo:%p invalid data load column for query", GET_QINFO_ADDR(pQuery));
1952 1953 1954
      return false;
    }
  }
1955

1956 1957 1958 1959 1960 1961
  return true;
}

// todo ignore the avg/sum/min/max/count/stddev/top/bottom functions, of which
// the scan order is not matter
static bool onlyOneQueryType(SQuery *pQuery, int32_t functId, int32_t functIdDst) {
1962
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1963
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
1964

1965 1966 1967 1968
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG ||
        functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
1969

1970 1971 1972 1973
    if (functionId != functId && functionId != functIdDst) {
      return false;
    }
  }
1974

1975 1976 1977 1978 1979 1980 1981
  return true;
}

static bool onlyFirstQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_FIRST, TSDB_FUNC_FIRST_DST); }

static bool onlyLastQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_LAST, TSDB_FUNC_LAST_DST); }

H
Haojun Liao 已提交
1982
// todo refactor, add iterator
1983 1984
static void doExchangeTimeWindow(SQInfo* pQInfo, STimeWindow* win) {
  size_t t = taosArrayGetSize(pQInfo->tableGroupInfo.pGroupList);
H
Haojun Liao 已提交
1985
  for(int32_t i = 0; i < t; ++i) {
1986
    SArray* p1 = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, i);
H
Haojun Liao 已提交
1987 1988 1989

    size_t len = taosArrayGetSize(p1);
    for(int32_t j = 0; j < len; ++j) {
1990
      STableKeyInfo* pInfo = taosArrayGet(p1, j);
H
Haojun Liao 已提交
1991

1992 1993 1994 1995
      // update the new lastkey if it is equalled to the value of the old skey
      if (pInfo->lastKey == win->ekey) {
        pInfo->lastKey = win->skey;
      }
H
Haojun Liao 已提交
1996 1997 1998 1999
    }
  }
}

2000
static void changeExecuteScanOrder(SQInfo *pQInfo, SQueryTableMsg* pQueryMsg, bool stableQuery) {
H
Haojun Liao 已提交
2001 2002
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;

2003 2004 2005
  // in case of point-interpolation query, use asc order scan
  char msg[] = "QInfo:%p scan order changed for %s query, old:%d, new:%d, qrange exchanged, old qrange:%" PRId64
               "-%" PRId64 ", new qrange:%" PRId64 "-%" PRId64;
2006

2007 2008
  // todo handle the case the the order irrelevant query type mixed up with order critical query type
  // descending order query for last_row query
H
Haojun Liao 已提交
2009
  if (isFirstLastRowQuery(pQuery)) {
H
Haojun Liao 已提交
2010
    qDebug("QInfo:%p scan order changed for last_row query, old:%d, new:%d", pQInfo, pQuery->order.order, TSDB_ORDER_ASC);
2011

H
Haojun Liao 已提交
2012
    pQuery->order.order = TSDB_ORDER_ASC;
H
Haojun Liao 已提交
2013 2014 2015
    if (pQuery->window.skey > pQuery->window.ekey) {
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
2016

2017 2018
    return;
  }
2019

H
Haojun Liao 已提交
2020
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr) && pQuery->order.order == TSDB_ORDER_DESC) {
H
Haojun Liao 已提交
2021
    pQuery->order.order = TSDB_ORDER_ASC;
H
Haojun Liao 已提交
2022 2023 2024
    if (pQuery->window.skey > pQuery->window.ekey) {
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
H
Haojun Liao 已提交
2025

2026
    doExchangeTimeWindow(pQInfo, &pQuery->window);
H
Haojun Liao 已提交
2027 2028 2029
    return;
  }

2030
  if (isPointInterpoQuery(pQuery) && pQuery->interval.interval == 0) {
H
Haojun Liao 已提交
2031 2032 2033 2034 2035
    if (!QUERY_IS_ASC_QUERY(pQuery)) {
      qDebug(msg, GET_QINFO_ADDR(pQuery), "interp", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
             pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
2036

2037
    pQuery->order.order = TSDB_ORDER_ASC;
2038 2039
    return;
  }
2040

2041
  if (pQuery->interval.interval == 0) {
2042 2043
    if (onlyFirstQuery(pQuery)) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
2044
        qDebug(msg, GET_QINFO_ADDR(pQuery), "only-first", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
2045 2046
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2047
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2048
        doExchangeTimeWindow(pQInfo, &pQuery->window);
2049
      }
2050

2051
      pQuery->order.order = TSDB_ORDER_ASC;
2052 2053
    } else if (onlyLastQuery(pQuery)) {
      if (QUERY_IS_ASC_QUERY(pQuery)) {
2054
        qDebug(msg, GET_QINFO_ADDR(pQuery), "only-last", pQuery->order.order, TSDB_ORDER_DESC, pQuery->window.skey,
2055 2056
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2057
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2058
        doExchangeTimeWindow(pQInfo, &pQuery->window);
2059
      }
2060

2061
      pQuery->order.order = TSDB_ORDER_DESC;
2062
    }
2063

2064
  } else {  // interval query
2065
    if (stableQuery) {
2066 2067
      if (onlyFirstQuery(pQuery)) {
        if (!QUERY_IS_ASC_QUERY(pQuery)) {
2068
          qDebug(msg, GET_QINFO_ADDR(pQuery), "only-first stable", pQuery->order.order, TSDB_ORDER_ASC,
2069 2070
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2071
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2072
          doExchangeTimeWindow(pQInfo, &pQuery->window);
2073
        }
2074

2075
        pQuery->order.order = TSDB_ORDER_ASC;
2076 2077
      } else if (onlyLastQuery(pQuery)) {
        if (QUERY_IS_ASC_QUERY(pQuery)) {
2078
          qDebug(msg, GET_QINFO_ADDR(pQuery), "only-last stable", pQuery->order.order, TSDB_ORDER_DESC,
2079 2080
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2081
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2082
          doExchangeTimeWindow(pQInfo, &pQuery->window);
2083
        }
2084

2085
        pQuery->order.order = TSDB_ORDER_DESC;
2086 2087 2088 2089 2090 2091 2092 2093
      }
    }
  }
}

static int32_t getInitialPageNum(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  int32_t INITIAL_RESULT_ROWS_VALUE = 16;
2094

2095
  int32_t num = 0;
2096

2097 2098
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
    num = 128;
H
Haojun Liao 已提交
2099
  } else if (QUERY_IS_INTERVAL_QUERY(pQuery)) {  // time window query, allocate one page for each table
2100
    size_t s = pQInfo->tableqinfoGroupInfo.numOfTables;
S
TD-1057  
Shengliang Guan 已提交
2101
    num = (int32_t)(MAX(s, INITIAL_RESULT_ROWS_VALUE));
2102 2103
  } else {    // for super table query, one page for each subset
    num = 1;  // pQInfo->pSidSet->numOfSubSet;
2104
  }
2105

2106 2107 2108 2109
  assert(num > 0);
  return num;
}

2110 2111
static void getIntermediateBufInfo(SQueryRuntimeEnv* pRuntimeEnv, int32_t* ps, int32_t* rowsize) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
2112
  int32_t MIN_ROWS_PER_PAGE = 4;
2113

S
TD-1057  
Shengliang Guan 已提交
2114
  *rowsize = (int32_t)(pQuery->rowSize * GET_ROW_PARAM_FOR_MULTIOUTPUT(pQuery, pRuntimeEnv->topBotQuery, pRuntimeEnv->stableQuery));
2115 2116 2117 2118
  int32_t overhead = sizeof(tFilePage);

  // one page contains at least two rows
  *ps = DEFAULT_INTERN_BUF_PAGE_SIZE;
H
Haojun Liao 已提交
2119
  while(((*rowsize) * MIN_ROWS_PER_PAGE) > (*ps) - overhead) {
2120 2121 2122 2123
    *ps = (*ps << 1u);
  }

  pRuntimeEnv->numOfRowsPerPage = ((*ps) - sizeof(tFilePage)) / (*rowsize);
2124
  assert(pRuntimeEnv->numOfRowsPerPage <= MAX_ROWS_PER_RESBUF_PAGE);
2125 2126
}

H
Haojun Liao 已提交
2127
#define IS_PREFILTER_TYPE(_t) ((_t) != TSDB_DATA_TYPE_BINARY && (_t) != TSDB_DATA_TYPE_NCHAR)
2128

H
Haojun Liao 已提交
2129 2130 2131 2132
static bool needToLoadDataBlock(SQueryRuntimeEnv* pRuntimeEnv, SDataStatis *pDataStatis, SQLFunctionCtx *pCtx,
    int32_t numOfRows) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
  if (pDataStatis == NULL || (pQuery->numOfFilterCols == 0 && (!pRuntimeEnv->topBotQuery))) {
2133 2134 2135 2136 2137
    return true;
  }

  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
2138

H
Haojun Liao 已提交
2139 2140 2141 2142 2143 2144 2145 2146
    int32_t index = -1;
    for(int32_t i = 0; i < pQuery->numOfCols; ++i) {
      if (pDataStatis[i].colId == pFilterInfo->info.colId) {
        index = i;
        break;
      }
    }

2147
    // no statistics data, load the true data block
H
Haojun Liao 已提交
2148
    if (index == -1) {
H
Haojun Liao 已提交
2149
      return true;
2150
    }
2151

2152
    // not support pre-filter operation on binary/nchar data type
H
Haojun Liao 已提交
2153
    if (!IS_PREFILTER_TYPE(pFilterInfo->info.type)) {
H
Haojun Liao 已提交
2154
      return true;
2155
    }
2156

2157
    // all data in current column are NULL, no need to check its boundary value
H
Haojun Liao 已提交
2158
    if (pDataStatis[index].numOfNull == numOfRows) {
2159 2160 2161 2162 2163 2164 2165 2166 2167

      // if isNULL query exists, load the null data column
      for (int32_t j = 0; j < pFilterInfo->numOfFilters; ++j) {
        SColumnFilterElem *pFilterElem = &pFilterInfo->pFilters[j];
        if (pFilterElem->fp == isNull_filter) {
          return true;
        }
      }

2168 2169
      continue;
    }
2170

H
Haojun Liao 已提交
2171 2172 2173
    SDataStatis* pDataBlockst = &pDataStatis[index];

    if (pFilterInfo->info.type == TSDB_DATA_TYPE_FLOAT) {
S
TD-1057  
Shengliang Guan 已提交
2174 2175
      float minval = (float)(*(double *)(&pDataBlockst->min));
      float maxval = (float)(*(double *)(&pDataBlockst->max));
2176

2177 2178 2179 2180 2181 2182 2183
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&minval, (char *)&maxval)) {
          return true;
        }
      }
    } else {
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
H
Haojun Liao 已提交
2184
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&pDataBlockst->min, (char *)&pDataBlockst->max)) {
2185 2186 2187 2188 2189
          return true;
        }
      }
    }
  }
2190

H
Haojun Liao 已提交
2191 2192
  if (pRuntimeEnv->topBotQuery) {
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2193
      int32_t functionId = pQuery->pExpr1[i].base.functionId;
H
Haojun Liao 已提交
2194 2195 2196 2197 2198
      if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
        return topbot_datablock_filter(&pCtx[i], functionId, (char *)&pDataStatis[i].min, (char *)&pDataStatis[i].max);
      }
    }
  }
2199

H
Haojun Liao 已提交
2200
  return false;
2201 2202
}

H
Haojun Liao 已提交
2203 2204 2205 2206 2207 2208 2209 2210
static bool overlapWithTimeWindow(SQuery* pQuery, SDataBlockInfo* pBlockInfo) {
  STimeWindow w = {0};

  TSKEY sk = MIN(pQuery->window.skey, pQuery->window.ekey);
  TSKEY ek = MAX(pQuery->window.skey, pQuery->window.ekey);

  if (QUERY_IS_ASC_QUERY(pQuery)) {
    getAlignQueryTimeWindow(pQuery, pBlockInfo->window.skey, sk, ek, &w);
H
Haojun Liao 已提交
2211
    assert(w.ekey >= pBlockInfo->window.skey);
H
Haojun Liao 已提交
2212

H
Haojun Liao 已提交
2213
    if (w.ekey < pBlockInfo->window.ekey) {
H
Haojun Liao 已提交
2214 2215 2216 2217
      return true;
    }

    while(1) {
H
Haojun Liao 已提交
2218
      getNextTimeWindow(pQuery, &w);
H
Haojun Liao 已提交
2219
      if (w.skey > pBlockInfo->window.ekey) {
H
Haojun Liao 已提交
2220 2221 2222
        break;
      }

H
Haojun Liao 已提交
2223 2224
      assert(w.ekey > pBlockInfo->window.ekey);
      if (w.skey <= pBlockInfo->window.ekey && w.skey > pBlockInfo->window.skey) {
H
Haojun Liao 已提交
2225 2226 2227 2228 2229
        return true;
      }
    }
  } else {
    getAlignQueryTimeWindow(pQuery, pBlockInfo->window.ekey, sk, ek, &w);
H
Haojun Liao 已提交
2230 2231 2232
    assert(w.skey <= pBlockInfo->window.ekey);

    if (w.skey > pBlockInfo->window.skey) {
H
Haojun Liao 已提交
2233 2234 2235 2236
      return true;
    }

    while(1) {
H
Haojun Liao 已提交
2237
      getNextTimeWindow(pQuery, &w);
H
Haojun Liao 已提交
2238 2239 2240 2241
      if (w.ekey < pBlockInfo->window.skey) {
        break;
      }

H
Haojun Liao 已提交
2242 2243
      assert(w.skey < pBlockInfo->window.skey);
      if (w.ekey < pBlockInfo->window.ekey && w.ekey >= pBlockInfo->window.skey) {
H
Haojun Liao 已提交
2244 2245 2246 2247 2248 2249 2250 2251
        return true;
      }
    }
  }

  return false;
}

H
Haojun Liao 已提交
2252
int32_t loadDataBlockOnDemand(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo * pWindowResInfo, void* pQueryHandle, SDataBlockInfo* pBlockInfo, SDataStatis **pStatis, SArray** pDataBlock, uint32_t* status) {
2253
  SQuery *pQuery = pRuntimeEnv->pQuery;
2254

H
Haojun Liao 已提交
2255 2256
  *status = BLK_DATA_NO_NEEDED;

H
Haojun Liao 已提交
2257
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf > 0) {
2258
    *status = BLK_DATA_ALL_NEEDED;
H
Haojun Liao 已提交
2259
  } else { // check if this data block is required to load
H
Haojun Liao 已提交
2260

H
Haojun Liao 已提交
2261
    // Calculate all time windows that are overlapping or contain current data block.
2262
    // If current data block is contained by all possible time window, do not load current data block.
H
Haojun Liao 已提交
2263
    if (QUERY_IS_INTERVAL_QUERY(pQuery) && overlapWithTimeWindow(pQuery, pBlockInfo)) {
2264
      *status = BLK_DATA_ALL_NEEDED;
2265
    }
2266

2267
    if ((*status) != BLK_DATA_ALL_NEEDED) {
H
Haojun Liao 已提交
2268 2269 2270 2271 2272 2273 2274 2275 2276
      // the pCtx[i] result is belonged to previous time window since the outputBuf has not been set yet,
      // the filter result may be incorrect. So in case of interval query, we need to set the correct time output buffer
      if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
        bool hasTimeWindow = false;
        bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);

        TSKEY k = QUERY_IS_ASC_QUERY(pQuery)? pBlockInfo->window.skey:pBlockInfo->window.ekey;

        STimeWindow win = getActiveTimeWindow(pWindowResInfo, k, pQuery);
H
Haojun Liao 已提交
2277
        if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pBlockInfo, &win, masterScan, &hasTimeWindow) !=
H
Haojun Liao 已提交
2278 2279 2280 2281 2282
            TSDB_CODE_SUCCESS) {
          // todo handle error in set result for timewindow
        }
      }

H
Haojun Liao 已提交
2283
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2284
        SSqlFuncMsg* pSqlFunc = &pQuery->pExpr1[i].base;
H
Haojun Liao 已提交
2285 2286 2287

        int32_t functionId = pSqlFunc->functionId;
        int32_t colId = pSqlFunc->colInfo.colId;
2288 2289
        (*status) |= aAggs[functionId].dataReqFunc(&pRuntimeEnv->pCtx[i], pBlockInfo->window.skey, pBlockInfo->window.ekey, colId);
        if (((*status) & BLK_DATA_ALL_NEEDED) == BLK_DATA_ALL_NEEDED) {
H
Haojun Liao 已提交
2290 2291 2292
          break;
        }
      }
2293 2294
    }
  }
2295

2296
  if ((*status) == BLK_DATA_NO_NEEDED) {
H
Haojun Liao 已提交
2297 2298
    qDebug("QInfo:%p data block discard, brange:%"PRId64 "-%"PRId64", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
           pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
2299
    pRuntimeEnv->summary.discardBlocks += 1;
2300 2301 2302 2303
  } else if ((*status) == BLK_DATA_STATIS_NEEDED) {

    // this function never returns error?
    tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis);
2304

2305
    pRuntimeEnv->summary.loadBlockStatis += 1;
2306

2307
    if (*pStatis == NULL) { // data block statistics does not exist, load data block
H
Haojun Liao 已提交
2308
      *pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2309
      pRuntimeEnv->summary.totalCheckedRows += pBlockInfo->rows;
2310 2311
    }
  } else {
2312
    assert((*status) == BLK_DATA_ALL_NEEDED);
2313

2314
    // load the data block statistics to perform further filter
H
Haojun Liao 已提交
2315
    pRuntimeEnv->summary.loadBlockStatis += 1;
2316
    tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis);
2317

H
Haojun Liao 已提交
2318
    if (!needToLoadDataBlock(pRuntimeEnv, *pStatis, pRuntimeEnv->pCtx, pBlockInfo->rows)) {
2319 2320
      // current block has been discard due to filter applied
      pRuntimeEnv->summary.discardBlocks += 1;
H
Haojun Liao 已提交
2321 2322
      qDebug("QInfo:%p data block discard, brange:%"PRId64 "-%"PRId64", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
          pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
2323
      (*status) = BLK_DATA_DISCARD;
2324
    }
2325

2326
    pRuntimeEnv->summary.totalCheckedRows += pBlockInfo->rows;
H
Haojun Liao 已提交
2327
    pRuntimeEnv->summary.loadBlocks += 1;
H
Haojun Liao 已提交
2328
    *pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2329 2330 2331
    if (*pDataBlock == NULL) {
      return terrno;
    }
2332
  }
2333

H
Haojun Liao 已提交
2334
  return TSDB_CODE_SUCCESS;
2335 2336
}

H
hjxilinx 已提交
2337
int32_t binarySearchForKey(char *pValue, int num, TSKEY key, int order) {
2338
  int32_t midPos = -1;
H
Haojun Liao 已提交
2339
  int32_t numOfRows;
2340

2341 2342 2343
  if (num <= 0) {
    return -1;
  }
2344

2345
  assert(order == TSDB_ORDER_ASC || order == TSDB_ORDER_DESC);
2346 2347

  TSKEY * keyList = (TSKEY *)pValue;
2348
  int32_t firstPos = 0;
2349
  int32_t lastPos = num - 1;
2350

2351
  if (order == TSDB_ORDER_DESC) {
H
hjxilinx 已提交
2352 2353 2354 2355 2356
    // find the first position which is smaller than the key
    while (1) {
      if (key >= keyList[lastPos]) return lastPos;
      if (key == keyList[firstPos]) return firstPos;
      if (key < keyList[firstPos]) return firstPos - 1;
2357

H
Haojun Liao 已提交
2358 2359
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2360

H
hjxilinx 已提交
2361 2362 2363 2364 2365 2366 2367 2368
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
2369

H
hjxilinx 已提交
2370 2371 2372 2373 2374
  } else {
    // find the first position which is bigger than the key
    while (1) {
      if (key <= keyList[firstPos]) return firstPos;
      if (key == keyList[lastPos]) return lastPos;
2375

H
hjxilinx 已提交
2376 2377 2378 2379 2380 2381 2382
      if (key > keyList[lastPos]) {
        lastPos = lastPos + 1;
        if (lastPos >= num)
          return -1;
        else
          return lastPos;
      }
2383

H
Haojun Liao 已提交
2384 2385
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2386

H
hjxilinx 已提交
2387 2388 2389 2390 2391 2392 2393 2394 2395
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
  }
2396

H
hjxilinx 已提交
2397 2398 2399
  return midPos;
}

2400 2401 2402 2403 2404 2405 2406 2407
static void ensureOutputBufferSimple(SQueryRuntimeEnv* pRuntimeEnv, int32_t capacity) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  if (capacity < pQuery->rec.capacity) {
    return;
  }

  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2408
    int32_t bytes = pQuery->pExpr1[i].bytes;
2409 2410 2411 2412
    assert(bytes > 0 && capacity > 0);

    char *tmp = realloc(pQuery->sdata[i], bytes * capacity + sizeof(tFilePage));
    if (tmp == NULL) {  // todo handle the oom
H
Haojun Liao 已提交
2413
      longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
2414 2415 2416 2417 2418 2419 2420 2421
    } else {
      pQuery->sdata[i] = (tFilePage *)tmp;
    }

    // set the pCtx output buffer position
    pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data;
  }

2422
  qDebug("QInfo:%p realloc output buffer to inc output buffer from: %" PRId64 " rows to:%d rows", GET_QINFO_ADDR(pRuntimeEnv),
2423 2424 2425 2426 2427
         pQuery->rec.capacity, capacity);

  pQuery->rec.capacity = capacity;
}

2428 2429 2430
static void ensureOutputBuffer(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pBlockInfo) {
  // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
  SQuery* pQuery = pRuntimeEnv->pQuery;
2431
  if (!QUERY_IS_INTERVAL_QUERY(pQuery) && !pRuntimeEnv->groupbyNormalCol && !isFixedOutputQuery(pRuntimeEnv) && !isTSCompQuery(pQuery)) {
2432
    SResultRec *pRec = &pQuery->rec;
2433

2434
    if (pQuery->rec.capacity - pQuery->rec.rows < pBlockInfo->rows) {
S
TD-1057  
Shengliang Guan 已提交
2435 2436
      int32_t remain = (int32_t)(pRec->capacity - pRec->rows);
      int32_t newSize = (int32_t)(pRec->capacity + (pBlockInfo->rows - remain));
2437

2438
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2439
        int32_t bytes = pQuery->pExpr1[i].bytes;
H
Haojun Liao 已提交
2440 2441
        assert(bytes > 0 && newSize > 0);

2442 2443
        char *tmp = realloc(pQuery->sdata[i], bytes * newSize + sizeof(tFilePage));
        if (tmp == NULL) {  // todo handle the oom
H
Haojun Liao 已提交
2444
          longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
2445
        } else {
2446
          memset(tmp + sizeof(tFilePage) + bytes * pRec->rows, 0, (size_t)((newSize - pRec->rows) * bytes));
2447 2448
          pQuery->sdata[i] = (tFilePage *)tmp;
        }
H
Haojun Liao 已提交
2449

2450 2451
        // set the pCtx output buffer position
        pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data + pRec->rows * bytes;
H
Haojun Liao 已提交
2452

H
Haojun Liao 已提交
2453
        int32_t functionId = pQuery->pExpr1[i].base.functionId;
2454 2455 2456 2457
        if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
          pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
        }
      }
H
Haojun Liao 已提交
2458

2459
      qDebug("QInfo:%p realloc output buffer, new size: %d rows, old:%" PRId64 ", remain:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv),
2460
             newSize, pRec->capacity, newSize - pRec->rows);
2461

2462 2463 2464 2465 2466
      pRec->capacity = newSize;
    }
  }
}

2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487
static void doSetInitialTimewindow(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pBlockInfo) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  if (QUERY_IS_INTERVAL_QUERY(pQuery) && pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL) {
    STimeWindow w = TSWINDOW_INITIALIZER;
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;

    if (QUERY_IS_ASC_QUERY(pQuery)) {
      getAlignQueryTimeWindow(pQuery, pBlockInfo->window.skey, pBlockInfo->window.skey, pQuery->window.ekey, &w);
      pWindowResInfo->startTime = w.skey;
      pWindowResInfo->prevSKey = w.skey;
    } else {
      // the start position of the first time window in the endpoint that spreads beyond the queried last timestamp
      getAlignQueryTimeWindow(pQuery, pBlockInfo->window.ekey, pQuery->window.ekey, pBlockInfo->window.ekey, &w);

      pWindowResInfo->startTime = pQuery->window.skey;
      pWindowResInfo->prevSKey = w.skey;
    }
  }
}

2488 2489
static int64_t doScanAllDataBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
2490
  STableQueryInfo* pTableQueryInfo = pQuery->current;
H
Haojun Liao 已提交
2491
  SQueryCostInfo*  summary  = &pRuntimeEnv->summary;
2492

2493
  qDebug("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", lastkey:%" PRId64 ", order:%d",
H
hjxilinx 已提交
2494 2495
         GET_QINFO_ADDR(pRuntimeEnv), pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, pTableQueryInfo->lastKey,
         pQuery->order.order);
2496

2497
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
H
Haojun Liao 已提交
2498
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
2499

H
Haojun Liao 已提交
2500
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
2501
  while (tsdbNextDataBlock(pQueryHandle)) {
H
Haojun Liao 已提交
2502
    summary->totalBlocks += 1;
H
Haojun Liao 已提交
2503

H
Haojun Liao 已提交
2504
    if (IS_QUERY_KILLED(GET_QINFO_ADDR(pRuntimeEnv))) {
H
Haojun Liao 已提交
2505
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
2506
    }
2507

H
Haojun Liao 已提交
2508
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
2509
    doSetInitialTimewindow(pRuntimeEnv, &blockInfo);
2510

H
hjxilinx 已提交
2511
    // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
2512
    ensureOutputBuffer(pRuntimeEnv, &blockInfo);
2513

2514
    SDataStatis *pStatis = NULL;
2515 2516
    SArray *     pDataBlock = NULL;
    uint32_t     status = 0;
2517

H
Haojun Liao 已提交
2518
    int32_t ret = loadDataBlockOnDemand(pRuntimeEnv, &pRuntimeEnv->windowResInfo, pQueryHandle, &blockInfo, &pStatis, &pDataBlock, &status);
2519
    if (ret != TSDB_CODE_SUCCESS) {
2520 2521 2522
      break;
    }

2523 2524 2525 2526 2527 2528
    if (status == BLK_DATA_DISCARD) {
      pQuery->current->lastKey =
              QUERY_IS_ASC_QUERY(pQuery) ? blockInfo.window.ekey + step : blockInfo.window.skey + step;
      continue;
    }

H
Haojun Liao 已提交
2529 2530
    // query start position can not move into tableApplyFunctionsOnBlock due to limit/offset condition
    pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : blockInfo.rows - 1;
H
hjxilinx 已提交
2531
    int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, binarySearchForKey, pDataBlock);
2532

H
Haojun Liao 已提交
2533
    summary->totalRows += blockInfo.rows;
2534
    qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
2535
           blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes, pQuery->current->lastKey);
2536

2537 2538
    // while the output buffer is full or limit/offset is applied, query may be paused here
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL | QUERY_COMPLETED)) {
H
hjxilinx 已提交
2539
      break;
2540 2541
    }
  }
2542

H
Haojun Liao 已提交
2543 2544 2545 2546
  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }

H
hjxilinx 已提交
2547
  // if the result buffer is not full, set the query complete
2548 2549 2550
  if (!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
2551

H
Haojun Liao 已提交
2552
  if (QUERY_IS_INTERVAL_QUERY(pQuery) && (IS_MASTER_SCAN(pRuntimeEnv)|| pRuntimeEnv->scanFlag == REPEAT_SCAN)) {
H
hjxilinx 已提交
2553
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
2554
      closeAllTimeWindow(&pRuntimeEnv->windowResInfo);
H
hjxilinx 已提交
2555
      pRuntimeEnv->windowResInfo.curIndex = pRuntimeEnv->windowResInfo.size - 1;  // point to the last time window
2556 2557 2558 2559
    } else {
      assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
    }
  }
2560

2561
  return 0;
2562 2563 2564 2565 2566 2567
}

/*
 * set tag value in SQLFunctionCtx
 * e.g.,tag information into input buffer
 */
2568
static void doSetTagValueInParam(void *tsdb, void* pTable, int32_t tagColId, tVariant *tag, int16_t type, int16_t bytes) {
H
[td-90]  
Haojun Liao 已提交
2569
  tVariantDestroy(tag);
2570

2571
  if (tagColId == TSDB_TBNAME_COLUMN_INDEX) {
2572
    char* val = tsdbGetTableName(pTable);
H
[td-90]  
Haojun Liao 已提交
2573
    assert(val != NULL);
2574

H
[td-90]  
Haojun Liao 已提交
2575
    tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), TSDB_DATA_TYPE_BINARY);
2576
  } else {
2577
    char* val = tsdbGetTableTagVal(pTable, tagColId, type, bytes);
H
[td-90]  
Haojun Liao 已提交
2578 2579 2580 2581
    if (val == NULL) {
      tag->nType = TSDB_DATA_TYPE_NULL;
      return;
    }
2582

H
hjxilinx 已提交
2583
    if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
H
Hongze Cheng 已提交
2584
      if (isNull(val, type)) {
H
Haojun Liao 已提交
2585 2586 2587 2588
        tag->nType = TSDB_DATA_TYPE_NULL;
        return;
      }

H
[td-90]  
Haojun Liao 已提交
2589
      tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), type);
H
hjxilinx 已提交
2590
    } else {
H
Haojun Liao 已提交
2591 2592 2593 2594 2595
      if (isNull(val, type)) {
        tag->nType = TSDB_DATA_TYPE_NULL;
        return;
      }

H
[td-90]  
Haojun Liao 已提交
2596
      tVariantCreateFromBinary(tag, val, bytes, type);
H
hjxilinx 已提交
2597
    }
2598
  }
2599 2600
}

2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612
static SColumnInfo* doGetTagColumnInfoById(SColumnInfo* pTagColList, int32_t numOfTags, int16_t colId) {
  assert(pTagColList != NULL && numOfTags > 0);

  for(int32_t i = 0; i < numOfTags; ++i) {
    if (pTagColList[i].colId == colId) {
      return &pTagColList[i];
    }
  }

  return NULL;
}

2613
void setTagVal(SQueryRuntimeEnv *pRuntimeEnv, void *pTable, void *tsdb) {
2614
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
2615
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
2616

H
Haojun Liao 已提交
2617
  SExprInfo *pExprInfo = &pQuery->pExpr1[0];
H
[td-90]  
Haojun Liao 已提交
2618 2619
  if (pQuery->numOfOutput == 1 && pExprInfo->base.functionId == TSDB_FUNC_TS_COMP) {
    assert(pExprInfo->base.numOfParams == 1);
H
Haojun Liao 已提交
2620

S
TD-1057  
Shengliang Guan 已提交
2621
    int16_t tagColId = (int16_t)pExprInfo->base.arg->argValue.i64;
2622
    SColumnInfo* pColInfo = doGetTagColumnInfoById(pQuery->tagColList, pQuery->numOfTags, tagColId);
H
Haojun Liao 已提交
2623

2624
    doSetTagValueInParam(tsdb, pTable, tagColId, &pRuntimeEnv->pCtx[0].tag, pColInfo->type, pColInfo->bytes);
2625 2626
  } else {
    // set tag value, by which the results are aggregated.
2627
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
H
Haojun Liao 已提交
2628
      SExprInfo* pLocalExprInfo = &pQuery->pExpr1[idx];
2629

2630
      // ts_comp column required the tag value for join filter
H
Haojun Liao 已提交
2631
      if (!TSDB_COL_IS_TAG(pLocalExprInfo->base.colInfo.flag)) {
2632 2633
        continue;
      }
2634

2635
      // todo use tag column index to optimize performance
2636
      doSetTagValueInParam(tsdb, pTable, pLocalExprInfo->base.colInfo.colId, &pRuntimeEnv->pCtx[idx].tag,
H
Haojun Liao 已提交
2637
                           pLocalExprInfo->type, pLocalExprInfo->bytes);
2638
    }
2639

2640
    // set the join tag for first column
H
[td-90]  
Haojun Liao 已提交
2641
    SSqlFuncMsg *pFuncMsg = &pExprInfo->base;
2642 2643
    if ((pFuncMsg->functionId == TSDB_FUNC_TS || pFuncMsg->functionId == TSDB_FUNC_PRJ) && pRuntimeEnv->pTSBuf != NULL &&
        pFuncMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
2644
      assert(pFuncMsg->numOfParams == 1);
H
Haojun Liao 已提交
2645

2646 2647
      int16_t      tagColId = (int16_t)pExprInfo->base.arg->argValue.i64;
      SColumnInfo *pColInfo = doGetTagColumnInfoById(pQuery->tagColList, pQuery->numOfTags, tagColId);
H
Haojun Liao 已提交
2648

2649
      doSetTagValueInParam(tsdb, pTable, tagColId, &pRuntimeEnv->pCtx[0].tag, pColInfo->type, pColInfo->bytes);
H
Haojun Liao 已提交
2650

2651 2652 2653 2654 2655 2656 2657 2658
      int16_t tagType = pRuntimeEnv->pCtx[0].tag.nType;
      if (tagType == TSDB_DATA_TYPE_BINARY || tagType == TSDB_DATA_TYPE_NCHAR) {
        qDebug("QInfo:%p set tag value for join comparison, colId:%" PRId64 ", val:%s", pQInfo,
               pExprInfo->base.arg->argValue.i64, pRuntimeEnv->pCtx[0].tag.pz);
      } else {
        qDebug("QInfo:%p set tag value for join comparison, colId:%" PRId64 ", val:%" PRId64, pQInfo,
               pExprInfo->base.arg->argValue.i64, pRuntimeEnv->pCtx[0].tag.i64Key);
      }
2659 2660 2661 2662
    }
  }
}

H
Haojun Liao 已提交
2663
static void doMerge(SQueryRuntimeEnv *pRuntimeEnv, int64_t timestamp, SResultRow *pWindowRes, bool mergeFlag) {
2664 2665
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
2666

2667
  tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes->pageId);
H
Haojun Liao 已提交
2668

2669
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2670
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
2671 2672 2673
    if (!mergeFlag) {
      pCtx[i].aOutputBuf = pCtx[i].aOutputBuf + pCtx[i].outputBytes;
      pCtx[i].currentStage = FIRST_STAGE_MERGE;
2674

2675
      RESET_RESULT_INFO(pCtx[i].resultInfo);
2676 2677
      aAggs[functionId].init(&pCtx[i]);
    }
2678

2679 2680
    pCtx[i].hasNull = true;
    pCtx[i].nStartQueryTimestamp = timestamp;
H
Haojun Liao 已提交
2681
    pCtx[i].aInputElemBuf = getPosInResultPage(pRuntimeEnv, i, pWindowRes, page);
2682

2683 2684 2685
    // in case of tag column, the tag information should be extracted from input buffer
    if (functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TAG) {
      tVariantDestroy(&pCtx[i].tag);
2686

2687 2688 2689 2690 2691 2692
      int32_t type = pCtx[i].outputType;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        tVariantCreateFromBinary(&pCtx[i].tag, varDataVal(pCtx[i].aInputElemBuf), varDataLen(pCtx[i].aInputElemBuf), type);
      } else {
        tVariantCreateFromBinary(&pCtx[i].tag, pCtx[i].aInputElemBuf, pCtx[i].inputBytes, pCtx[i].inputType);
      }
2693

2694 2695
    }
  }
2696

2697
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2698
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
2699 2700 2701
    if (functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
2702

2703 2704 2705 2706
    aAggs[functionId].distMergeFunc(&pCtx[i]);
  }
}

2707
static UNUSED_FUNC void printBinaryData(int32_t functionId, char *data, int32_t srcDataType) {
2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_LAST_DST) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_BINARY:
        printf("%" PRId64 ",%s\t", *(TSKEY *)data, (data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int8_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int16_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 ",%" PRId64 "\t", *(TSKEY *)data, *(TSKEY *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int32_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%" PRId64 ",%f\t", *(TSKEY *)data, *(float *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%" PRId64 ",%lf\t", *(TSKEY *)data, *(double *)(data + TSDB_KEYSIZE + 1));
        break;
    }
  } else if (functionId == TSDB_FUNC_AVG) {
    printf("%lf,%d\t", *(double *)data, *(int32_t *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_SPREAD) {
    printf("%lf,%lf\t", *(double *)data, *(double *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_TWA) {
    data += 1;
    printf("%lf,%" PRId64 ",%" PRId64 ",%" PRId64 "\t", *(double *)data, *(int64_t *)(data + 8),
           *(int64_t *)(data + 16), *(int64_t *)(data + 24));
  } else if (functionId == TSDB_FUNC_MIN || functionId == TSDB_FUNC_MAX) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%d\t", *(int8_t *)data);
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%d\t", *(int16_t *)data);
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 "\t", *(int64_t *)data);
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%d\t", *(int *)data);
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%f\t", *(float *)data);
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%f\t", *(float *)data);
        break;
    }
  } else if (functionId == TSDB_FUNC_SUM) {
    if (srcDataType == TSDB_DATA_TYPE_FLOAT || srcDataType == TSDB_DATA_TYPE_DOUBLE) {
      printf("%lf\t", *(float *)data);
    } else {
      printf("%" PRId64 "\t", *(int64_t *)data);
    }
  } else {
    printf("%s\t", data);
  }
}

2776
void UNUSED_FUNC displayInterResult(tFilePage **pdata, SQueryRuntimeEnv* pRuntimeEnv, int32_t numOfRows) {
2777
  SQuery* pQuery = pRuntimeEnv->pQuery;
2778
  int32_t numOfCols = pQuery->numOfOutput;
2779
  printf("super table query intermediate result, total:%d\n", numOfRows);
2780

2781 2782
  for (int32_t j = 0; j < numOfRows; ++j) {
    for (int32_t i = 0; i < numOfCols; ++i) {
2783

H
Haojun Liao 已提交
2784
      switch (pQuery->pExpr1[i].type) {
2785
        case TSDB_DATA_TYPE_BINARY: {
H
Haojun Liao 已提交
2786 2787
          int32_t type = pQuery->pExpr1[i].type;
          printBinaryData(pQuery->pExpr1[i].base.functionId, pdata[i]->data + pQuery->pExpr1[i].bytes * j,
2788 2789 2790 2791 2792
                          type);
          break;
        }
        case TSDB_DATA_TYPE_TIMESTAMP:
        case TSDB_DATA_TYPE_BIGINT:
H
Haojun Liao 已提交
2793
          printf("%" PRId64 "\t", *(int64_t *)(pdata[i]->data + pQuery->pExpr1[i].bytes * j));
2794 2795
          break;
        case TSDB_DATA_TYPE_INT:
H
Haojun Liao 已提交
2796
          printf("%d\t", *(int32_t *)(pdata[i]->data + pQuery->pExpr1[i].bytes * j));
2797 2798
          break;
        case TSDB_DATA_TYPE_FLOAT:
H
Haojun Liao 已提交
2799
          printf("%f\t", *(float *)(pdata[i]->data + pQuery->pExpr1[i].bytes * j));
2800 2801
          break;
        case TSDB_DATA_TYPE_DOUBLE:
H
Haojun Liao 已提交
2802
          printf("%lf\t", *(double *)(pdata[i]->data + pQuery->pExpr1[i].bytes * j));
2803 2804 2805 2806 2807 2808 2809 2810
          break;
      }
    }
    printf("\n");
  }
}

typedef struct SCompSupporter {
H
hjxilinx 已提交
2811 2812 2813
  STableQueryInfo **pTableQueryInfo;
  int32_t *         position;
  SQInfo *          pQInfo;
2814 2815 2816 2817 2818
} SCompSupporter;

int32_t tableResultComparFn(const void *pLeft, const void *pRight, void *param) {
  int32_t left = *(int32_t *)pLeft;
  int32_t right = *(int32_t *)pRight;
2819

2820 2821
  SCompSupporter *  supporter = (SCompSupporter *)param;
  SQueryRuntimeEnv *pRuntimeEnv = &supporter->pQInfo->runtimeEnv;
2822

2823 2824
  int32_t leftPos = supporter->position[left];
  int32_t rightPos = supporter->position[right];
2825

2826 2827 2828 2829
  /* left source is exhausted */
  if (leftPos == -1) {
    return 1;
  }
2830

2831 2832 2833 2834
  /* right source is exhausted*/
  if (rightPos == -1) {
    return -1;
  }
2835

H
hjxilinx 已提交
2836
  SWindowResInfo *pWindowResInfo1 = &supporter->pTableQueryInfo[left]->windowResInfo;
2837
  SResultRow * pWindowRes1 = getResultRow(pWindowResInfo1, leftPos);
2838
  tFilePage *page1 = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes1->pageId);
2839

H
Haojun Liao 已提交
2840
  char *b1 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes1, page1);
2841
  TSKEY leftTimestamp = GET_INT64_VAL(b1);
2842

H
hjxilinx 已提交
2843
  SWindowResInfo *pWindowResInfo2 = &supporter->pTableQueryInfo[right]->windowResInfo;
2844
  SResultRow * pWindowRes2 = getResultRow(pWindowResInfo2, rightPos);
2845
  tFilePage *page2 = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes2->pageId);
2846

H
Haojun Liao 已提交
2847
  char *b2 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes2, page2);
2848
  TSKEY rightTimestamp = GET_INT64_VAL(b2);
2849

2850 2851 2852
  if (leftTimestamp == rightTimestamp) {
    return 0;
  }
2853

2854 2855 2856
  return leftTimestamp > rightTimestamp ? 1 : -1;
}

2857
int32_t mergeIntoGroupResult(SQInfo *pQInfo) {
H
Haojun Liao 已提交
2858
  int64_t st = taosGetTimestampUs();
2859
  int32_t ret = TSDB_CODE_SUCCESS;
2860

S
TD-1057  
Shengliang Guan 已提交
2861
  int32_t numOfGroups = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
2862

2863
  while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
2864
    SArray *group = GET_TABLEGROUP(pQInfo, pQInfo->groupIndex);
2865
    ret = mergeIntoGroupResultImpl(pQInfo, group);
2866 2867 2868 2869
    if (ret < 0) {  // not enough disk space to save the data into disk
      return -1;
    }

2870
    pQInfo->groupIndex += 1;
2871 2872

    // this group generates at least one result, return results
2873 2874 2875
    if (ret > 0) {
      break;
    }
2876

H
Haojun Liao 已提交
2877
    assert(pQInfo->groupResInfo.numOfDataPages == 0);
2878
    qDebug("QInfo:%p no result in group %d, continue", pQInfo, pQInfo->groupIndex - 1);
2879
  }
2880

H
Haojun Liao 已提交
2881
  SGroupResInfo* info = &pQInfo->groupResInfo;
2882
  if (pQInfo->groupIndex == numOfGroups && info->pageId == info->numOfDataPages) {
H
Haojun Liao 已提交
2883 2884 2885
    SET_STABLE_QUERY_OVER(pQInfo);
  }

H
Haojun Liao 已提交
2886 2887 2888
  int64_t elapsedTime = taosGetTimestampUs() - st;
  qDebug("QInfo:%p merge res data into group, index:%d, total group:%d, elapsed time:%" PRId64 "us", pQInfo,
         pQInfo->groupIndex - 1, numOfGroups, elapsedTime);
2889

H
Haojun Liao 已提交
2890
  pQInfo->runtimeEnv.summary.firstStageMergeTime += elapsedTime;
2891 2892 2893 2894
  return TSDB_CODE_SUCCESS;
}

void copyResToQueryResultBuf(SQInfo *pQInfo, SQuery *pQuery) {
H
Haojun Liao 已提交
2895 2896 2897
  SGroupResInfo* pGroupResInfo = &pQInfo->groupResInfo;

  // all results have been return to client, try next group
2898
  if (pGroupResInfo->pageId == pGroupResInfo->numOfDataPages) {
H
Haojun Liao 已提交
2899
    pGroupResInfo->numOfDataPages = 0;
2900 2901
    pGroupResInfo->pageId = 0;
    pGroupResInfo->rowId = 0;
2902

2903
    // current results of group has been sent to client, try next group
2904
    if (mergeIntoGroupResult(pQInfo) != TSDB_CODE_SUCCESS) {
2905 2906
      return;  // failed to save data in the disk
    }
2907

2908
    // check if all results has been sent to client
S
TD-1057  
Shengliang Guan 已提交
2909
    int32_t numOfGroup = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
H
Haojun Liao 已提交
2910
    if (pGroupResInfo->numOfDataPages == 0 && pQInfo->groupIndex == numOfGroup) {
H
Haojun Liao 已提交
2911
      SET_STABLE_QUERY_OVER(pQInfo);
2912 2913
      return;
    }
2914
  }
2915 2916

  SQueryRuntimeEnv *   pRuntimeEnv = &pQInfo->runtimeEnv;
2917
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
2918

H
Haojun Liao 已提交
2919 2920
  int32_t id = pQInfo->groupResInfo.groupId;
  SIDList list = getDataBufPagesIdList(pResultBuf, id);
2921

2922
  int32_t offset = 0;
H
Haojun Liao 已提交
2923 2924 2925 2926 2927 2928
  int32_t numOfCopiedRows = 0;

  size_t size = taosArrayGetSize(list);
  assert(size == pGroupResInfo->numOfDataPages);

  bool done = false;
2929
  for (int32_t j = pGroupResInfo->pageId; j < size; ++j) {
H
Haojun Liao 已提交
2930
    SPageInfo* pi = *(SPageInfo**) taosArrayGet(list, j);
H
Haojun Liao 已提交
2931 2932
    tFilePage* pData = getResBufPage(pResultBuf, pi->pageId);

2933 2934
    assert(pData->num > 0 && pData->num <= pRuntimeEnv->numOfRowsPerPage && pGroupResInfo->rowId < pData->num);
    int32_t numOfRes = (int32_t)(pData->num - pGroupResInfo->rowId);
H
Haojun Liao 已提交
2935 2936

    if (numOfRes > pQuery->rec.capacity - offset) {
2937
      numOfCopiedRows = (int32_t)(pQuery->rec.capacity - offset);
2938
      pGroupResInfo->rowId += numOfCopiedRows;
H
Haojun Liao 已提交
2939 2940
      done = true;
    } else {
2941
      numOfCopiedRows = (int32_t)pData->num;
H
Haojun Liao 已提交
2942

2943 2944
      pGroupResInfo->pageId += 1;
      pGroupResInfo->rowId = 0;
H
Haojun Liao 已提交
2945
    }
2946

2947
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2948
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
2949
      char *  pDest = pQuery->sdata[i]->data;
2950

H
Haojun Liao 已提交
2951 2952
      memcpy(pDest + offset * bytes, pData->data + pRuntimeEnv->offset[i] * pRuntimeEnv->numOfRowsPerPage,
          (size_t)bytes * numOfCopiedRows);
2953
    }
2954

H
Haojun Liao 已提交
2955 2956 2957 2958
    offset += numOfCopiedRows;
    if (done) {
      break;
    }
2959
  }
2960

2961
  assert(pQuery->rec.rows == 0);
H
Haojun Liao 已提交
2962
  pQuery->rec.rows += offset;
2963 2964
}

2965 2966 2967
int64_t getNumOfResultWindowRes(SQueryRuntimeEnv* pRuntimeEnv, SResultRow *pResultRow) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

2968
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
2969
    int32_t functionId = pQuery->pExpr1[j].base.functionId;
2970

2971 2972 2973 2974 2975 2976 2977
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
      continue;
    }
2978

2979
    SResultRowCellInfo *pResultInfo = getResultCell(pRuntimeEnv, pResultRow, j);
H
Haojun Liao 已提交
2980
    assert(pResultInfo != NULL);
2981

H
Haojun Liao 已提交
2982 2983
    if (pResultInfo->numOfRes > 0) {
      return pResultInfo->numOfRes;
2984 2985
    }
  }
2986

H
Haojun Liao 已提交
2987
  return 0;
2988 2989
}

2990
int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *pGroup) {
2991
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
2992
  SQuery *          pQuery = pRuntimeEnv->pQuery;
2993

2994
  size_t size = taosArrayGetSize(pGroup);
2995
  tFilePage **buffer = pQuery->sdata;
2996

H
Haojun Liao 已提交
2997
  int32_t *posList = calloc(size, sizeof(int32_t));
H
hjxilinx 已提交
2998
  STableQueryInfo **pTableList = malloc(POINTER_BYTES * size);
2999

3000
  if (pTableList == NULL || posList == NULL) {
S
TD-1848  
Shengliang Guan 已提交
3001 3002
    tfree(posList);
    tfree(pTableList);
3003 3004

    qError("QInfo:%p failed alloc memory", pQInfo);
H
Haojun Liao 已提交
3005
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
3006 3007
  }

3008
  // todo opt for the case of one table per group
3009
  int32_t numOfTables = 0;
H
Haojun Liao 已提交
3010 3011 3012
  SIDList pageList = NULL;
  int32_t tid = -1;

3013
  for (int32_t i = 0; i < size; ++i) {
3014
    STableQueryInfo *item = taosArrayGetP(pGroup, i);
3015

H
Haojun Liao 已提交
3016
    SIDList list = getDataBufPagesIdList(pRuntimeEnv->pResultBuf, TSDB_TABLEID(item->pTable)->tid);
H
Haojun Liao 已提交
3017
    if (taosArrayGetSize(list) > 0 && item->windowResInfo.size > 0) {
H
Haojun Liao 已提交
3018
      pTableList[numOfTables++] = item;
3019 3020
      tid = TSDB_TABLEID(item->pTable)->tid;
      pageList = list;
3021 3022
    }
  }
3023

H
Haojun Liao 已提交
3024
  // there is no data in current group
3025
  if (numOfTables == 0) {
S
TD-1848  
Shengliang Guan 已提交
3026 3027
    tfree(posList);
    tfree(pTableList);
3028
    return 0;
H
Haojun Liao 已提交
3029
  } else if (numOfTables == 1) { // no need to merge results since only one table in each group
S
TD-1848  
Shengliang Guan 已提交
3030 3031
    tfree(posList);
    tfree(pTableList);
H
Haojun Liao 已提交
3032 3033 3034

    SGroupResInfo* pGroupResInfo = &pQInfo->groupResInfo;

3035
    pGroupResInfo->numOfDataPages = (int32_t)taosArrayGetSize(pageList);
H
Haojun Liao 已提交
3036
    pGroupResInfo->groupId = tid;
3037 3038
    pGroupResInfo->pageId = 0;
    pGroupResInfo->rowId = 0;
H
Haojun Liao 已提交
3039 3040

    return pGroupResInfo->numOfDataPages;
3041
  }
3042

3043
  SCompSupporter cs = {pTableList, posList, pQInfo};
3044

3045
  SLoserTreeInfo *pTree = NULL;
3046
  tLoserTreeCreate(&pTree, numOfTables, &cs, tableResultComparFn);
3047

3048
  SResultRow* pRow = getNewResultRow(pRuntimeEnv->pool);
H
Haojun Liao 已提交
3049
  resetMergeResultBuf(pRuntimeEnv, pRuntimeEnv->pCtx, pRow);
3050

H
Haojun Liao 已提交
3051 3052
  pQInfo->groupResInfo.groupId = getGroupResultId(pQInfo->groupIndex);

H
Haojun Liao 已提交
3053
  // todo add windowRes iterator
3054 3055
  int64_t lastTimestamp = -1;
  int64_t startt = taosGetTimestampMs();
3056

3057
  while (1) {
3058 3059
    if (IS_QUERY_KILLED(pQInfo)) {
      qDebug("QInfo:%p it is already killed, abort", pQInfo);
H
Haojun Liao 已提交
3060

S
TD-1848  
Shengliang Guan 已提交
3061 3062 3063
      tfree(pTableList);
      tfree(posList);
      tfree(pTree);
3064 3065 3066
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
    }

3067
    int32_t pos = pTree->pNode[0].index;
3068

H
hjxilinx 已提交
3069
    SWindowResInfo *pWindowResInfo = &pTableList[pos]->windowResInfo;
3070
    SResultRow  *pWindowRes = getResultRow(pWindowResInfo, cs.position[pos]);
3071
    tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes->pageId);
3072

H
Haojun Liao 已提交
3073
    char *b = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes, page);
3074
    TSKEY ts = GET_INT64_VAL(b);
3075

3076
    assert(ts == pWindowRes->win.skey);
3077
    int64_t num = getNumOfResultWindowRes(pRuntimeEnv, pWindowRes);
3078 3079
    if (num <= 0) {
      cs.position[pos] += 1;
3080

3081 3082
      if (cs.position[pos] >= pWindowResInfo->size) {
        cs.position[pos] = -1;
3083

3084
        // all input sources are exhausted
3085
        if (--numOfTables == 0) {
3086 3087 3088 3089 3090 3091 3092
          break;
        }
      }
    } else {
      if (ts == lastTimestamp) {  // merge with the last one
        doMerge(pRuntimeEnv, ts, pWindowRes, true);
      } else {  // copy data to disk buffer
3093
        if (buffer[0]->num == pQuery->rec.capacity) {
H
Haojun Liao 已提交
3094
          if (flushFromResultBuf(pRuntimeEnv, &pQInfo->groupResInfo) != TSDB_CODE_SUCCESS) {
3095 3096
            return -1;
          }
3097

H
Haojun Liao 已提交
3098
          resetMergeResultBuf(pRuntimeEnv, pRuntimeEnv->pCtx, pRow);
3099
        }
3100

3101
        doMerge(pRuntimeEnv, ts, pWindowRes, false);
3102
        buffer[0]->num += 1;
3103
      }
3104

3105
      lastTimestamp = ts;
3106

H
Haojun Liao 已提交
3107
      // move to the next element of current entry
3108
      int32_t currentPageId = pWindowRes->pageId;
H
Haojun Liao 已提交
3109

3110 3111 3112
      cs.position[pos] += 1;
      if (cs.position[pos] >= pWindowResInfo->size) {
        cs.position[pos] = -1;
3113

3114
        // all input sources are exhausted
3115
        if (--numOfTables == 0) {
3116 3117
          break;
        }
H
Haojun Liao 已提交
3118 3119
      } else {
        // current page is not needed anymore
3120
        SResultRow  *pNextWindowRes = getResultRow(pWindowResInfo, cs.position[pos]);
3121
        if (pNextWindowRes->pageId != currentPageId) {
H
Haojun Liao 已提交
3122 3123
          releaseResBufPage(pRuntimeEnv->pResultBuf, page);
        }
3124 3125
      }
    }
3126

3127 3128
    tLoserTreeAdjust(pTree, pos + pTree->numOfEntries);
  }
3129

3130
  if (buffer[0]->num != 0) {  // there are data in buffer
H
Haojun Liao 已提交
3131
    if (flushFromResultBuf(pRuntimeEnv, &pQInfo->groupResInfo) != TSDB_CODE_SUCCESS) {
S
slguan 已提交
3132
      qError("QInfo:%p failed to flush data into temp file, abort query", pQInfo);
3133

S
TD-1848  
Shengliang Guan 已提交
3134 3135 3136
      tfree(pTree);
      tfree(pTableList);
      tfree(posList);
3137 3138 3139
      return -1;
    }
  }
3140

3141 3142 3143
  int64_t endt = taosGetTimestampMs();

#ifdef _DEBUG_VIEW
3144
  displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
3145
#endif
3146

3147
  qDebug("QInfo:%p result merge completed for group:%d, elapsed time:%" PRId64 " ms", pQInfo, pQInfo->groupIndex, endt - startt);
3148

S
TD-1848  
Shengliang Guan 已提交
3149 3150 3151
  tfree(pTableList);
  tfree(posList);
  tfree(pTree);
3152

S
TD-1848  
Shengliang Guan 已提交
3153 3154
//  tfree(pResultInfo);
//  tfree(buf);
H
Haojun Liao 已提交
3155 3156

  return pQInfo->groupResInfo.numOfDataPages;
3157 3158
}

H
Haojun Liao 已提交
3159 3160
int32_t flushFromResultBuf(SQueryRuntimeEnv* pRuntimeEnv, SGroupResInfo* pGroupResInfo) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3161

3162
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
3163

3164 3165
  // the base value for group result, since the maximum number of table for each vnode will not exceed 100,000.
  int32_t pageId = -1;
3166
  int32_t capacity = pResultBuf->numOfRowsPerPage;
3167

H
Haojun Liao 已提交
3168
  int32_t remain = (int32_t) pQuery->sdata[0]->num;
3169
  int32_t offset = 0;
3170

3171
  while (remain > 0) {
H
Haojun Liao 已提交
3172 3173
    int32_t rows = (remain > capacity)? capacity:remain;
    assert(rows > 0);
3174

H
Haojun Liao 已提交
3175 3176 3177
    // get the output buffer page
    tFilePage *buf = getNewDataBuf(pResultBuf, pGroupResInfo->groupId, &pageId);
    buf->num = rows;
3178

H
Haojun Liao 已提交
3179
    // pagewisely copy to dest buffer
3180
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3181
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
3182

H
Haojun Liao 已提交
3183 3184
      char* output = buf->data + pRuntimeEnv->offset[i] * pRuntimeEnv->numOfRowsPerPage;
      char* src = ((char *) pQuery->sdata[i]->data) + offset * bytes;
3185
      memcpy(output, src, (size_t)(buf->num * bytes));
3186
    }
3187

H
Haojun Liao 已提交
3188 3189 3190 3191
    offset += rows;
    remain -= rows;

    pGroupResInfo->numOfDataPages += 1;
3192
  }
3193

3194 3195 3196
  return TSDB_CODE_SUCCESS;
}

H
Haojun Liao 已提交
3197 3198 3199
void resetMergeResultBuf(SQueryRuntimeEnv* pRuntimeEnv, SQLFunctionCtx *pCtx, SResultRow *pRow) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

3200
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
3201
    pCtx[k].aOutputBuf = pQuery->sdata[k]->data - pCtx[k].outputBytes;
3202 3203
    pCtx[k].size = 1;
    pCtx[k].startOffset = 0;
H
Haojun Liao 已提交
3204
    pCtx[k].resultInfo = getResultCell(pRuntimeEnv, pRow, k);
3205

3206
    pQuery->sdata[k]->num = 0;
3207 3208 3209
  }
}

3210 3211 3212 3213
static void updateTableQueryInfoForReverseScan(SQuery *pQuery, STableQueryInfo *pTableQueryInfo) {
  if (pTableQueryInfo == NULL) {
    return;
  }
3214

H
Haojun Liao 已提交
3215
  // order has changed already
3216
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
3217

H
Haojun Liao 已提交
3218 3219 3220 3221 3222 3223
  // TODO validate the assertion
//  if (!QUERY_IS_ASC_QUERY(pQuery)) {
//    assert(pTableQueryInfo->win.ekey >= pTableQueryInfo->lastKey + step);
//  } else {
//    assert(pTableQueryInfo->win.ekey <= pTableQueryInfo->lastKey + step);
//  }
H
Haojun Liao 已提交
3224 3225 3226 3227 3228 3229 3230

  if (pTableQueryInfo->lastKey == pTableQueryInfo->win.skey) {
    // do nothing, no results
  } else {
    pTableQueryInfo->win.ekey = pTableQueryInfo->lastKey + step;
  }

3231 3232
  SWAP(pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, TSKEY);
  pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
3233

3234 3235
  SWITCH_ORDER(pTableQueryInfo->cur.order);
  pTableQueryInfo->cur.vgroupIndex = -1;
H
Haojun Liao 已提交
3236 3237 3238

  // set the index at the end of time window
  pTableQueryInfo->windowResInfo.curIndex = pTableQueryInfo->windowResInfo.size - 1;
3239 3240
}

3241 3242
static void disableFuncInReverseScanImpl(SQueryRuntimeEnv* pRuntimeEnv, SWindowResInfo *pWindowResInfo, int32_t order) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
3243

3244
  for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
3245 3246
    bool closed = getTimeWindowResStatus(pWindowResInfo, i);
    if (!closed) {
3247 3248
      continue;
    }
3249

3250
    SResultRow *pRow = getResultRow(pWindowResInfo, i);
3251

3252
    // open/close the specified query for each group result
3253
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
3254
      int32_t functId = pQuery->pExpr1[j].base.functionId;
3255
      SResultRowCellInfo* pInfo = getResultCell(pRuntimeEnv, pRow, j);
3256

3257 3258
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
3259
        pInfo->complete = false;
3260
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
3261
        pInfo->complete = true;
3262 3263 3264 3265 3266
      }
    }
  }
}

3267 3268
void disableFuncInReverseScan(SQInfo *pQInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
3269
  SQuery *pQuery = pRuntimeEnv->pQuery;
3270
  int32_t order = pQuery->order.order;
3271

3272 3273
  // group by normal columns and interval query on normal table
  SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
3274
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3275
    disableFuncInReverseScanImpl(pRuntimeEnv, pWindowResInfo, order);
3276
  } else {  // for simple result of table query,
3277
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {  // todo refactor
H
Haojun Liao 已提交
3278
      int32_t functId = pQuery->pExpr1[j].base.functionId;
3279

3280
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[j];
3281 3282 3283
      if (pCtx->resultInfo == NULL) {
        continue; // resultInfo is NULL, means no data checked in previous scan
      }
3284

3285 3286
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
3287 3288 3289 3290 3291 3292
        pCtx->resultInfo->complete = false;
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
        pCtx->resultInfo->complete = true;
      }
    }
  }
H
Haojun Liao 已提交
3293 3294 3295 3296
}

static void setupQueryRangeForReverseScan(SQInfo* pQInfo) {
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
S
Shengliang Guan 已提交
3297
  int32_t numOfGroups = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
H
Haojun Liao 已提交
3298

H
hjxilinx 已提交
3299
  for(int32_t i = 0; i < numOfGroups; ++i) {
H
Haojun Liao 已提交
3300
    SArray *group = GET_TABLEGROUP(pQInfo, i);
H
Haojun Liao 已提交
3301 3302
    SArray *tableKeyGroup = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, i);

H
hjxilinx 已提交
3303 3304
    size_t t = taosArrayGetSize(group);
    for (int32_t j = 0; j < t; ++j) {
3305 3306
      STableQueryInfo *pCheckInfo = taosArrayGetP(group, j);
      updateTableQueryInfoForReverseScan(pQuery, pCheckInfo);
H
Haojun Liao 已提交
3307

H
Haojun Liao 已提交
3308 3309
      // update the last key in tableKeyInfo list, the tableKeyInfo is used to build the tsdbQueryHandle and decide
      // the start check timestamp of tsdbQueryHandle
H
Haojun Liao 已提交
3310 3311 3312 3313
      STableKeyInfo *pTableKeyInfo = taosArrayGet(tableKeyGroup, j);
      pTableKeyInfo->lastKey = pCheckInfo->lastKey;

      assert(pCheckInfo->pTable == pTableKeyInfo->pTable);
H
hjxilinx 已提交
3314 3315
    }
  }
3316 3317
}

3318
void switchCtxOrder(SQueryRuntimeEnv *pRuntimeEnv) {
3319
  SQuery *pQuery = pRuntimeEnv->pQuery;
3320
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
3321
    SWITCH_ORDER(pRuntimeEnv->pCtx[i].order);
3322 3323 3324
  }
}

3325
int32_t initResultRow(SResultRow *pResultRow) {
H
Haojun Liao 已提交
3326
  pResultRow->pCellInfo = (SResultRowCellInfo*)((char*)pResultRow + sizeof(SResultRow));
3327 3328
  pResultRow->pageId = -1;
  pResultRow->rowId = -1;
B
Bomin Zhang 已提交
3329
  return TSDB_CODE_SUCCESS;
3330 3331 3332 3333
}

void resetCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3334
  SResultRow* pRow = pRuntimeEnv->pResultRow;
3335

3336
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3337 3338
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
    pCtx->aOutputBuf = pQuery->sdata[i]->data;
3339

3340 3341 3342 3343
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT/TAGPRJ/PRJ/TAG etc.
     */
3344 3345 3346
    SResultRowCellInfo* pCellInfo = getResultCell(pRuntimeEnv, pRow, i);
    RESET_RESULT_INFO(pCellInfo);
    pCtx->resultInfo = pCellInfo;
3347

3348
    // set the timestamp output buffer for top/bottom/diff query
H
Haojun Liao 已提交
3349
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
3350 3351 3352
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3353

H
Haojun Liao 已提交
3354
    memset(pQuery->sdata[i]->data, 0, (size_t)(pQuery->pExpr1[i].bytes * pQuery->rec.capacity));
3355
  }
3356

3357 3358 3359 3360 3361
  initCtxOutputBuf(pRuntimeEnv);
}

void forwardCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, int64_t output) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3362

3363
  // reset the execution contexts
3364
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
3365
    int32_t functionId = pQuery->pExpr1[j].base.functionId;
3366
    assert(functionId != TSDB_FUNC_DIFF);
3367

3368 3369 3370 3371
    // set next output position
    if (IS_OUTER_FORWARD(aAggs[functionId].nStatus)) {
      pRuntimeEnv->pCtx[j].aOutputBuf += pRuntimeEnv->pCtx[j].outputBytes * output;
    }
3372

3373 3374 3375 3376 3377 3378 3379 3380
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      /*
       * NOTE: for top/bottom query, the value of first column of output (timestamp) are assigned
       * in the procedure of top/bottom routine
       * the output buffer in top/bottom routine is ptsOutputBuf, so we need to forward the output buffer
       *
       * diff function is handled in multi-output function
       */
S
TD-1057  
Shengliang Guan 已提交
3381
      pRuntimeEnv->pCtx[j].ptsOutputBuf = (char*)pRuntimeEnv->pCtx[j].ptsOutputBuf + TSDB_KEYSIZE * output;
3382
    }
3383

3384
    RESET_RESULT_INFO(pRuntimeEnv->pCtx[j].resultInfo);
3385 3386 3387 3388 3389
  }
}

void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3390

3391
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
3392
    int32_t functionId = pQuery->pExpr1[j].base.functionId;
3393
    pRuntimeEnv->pCtx[j].currentStage = 0;
3394

H
Haojun Liao 已提交
3395
    SResultRowCellInfo* pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
H
Haojun Liao 已提交
3396 3397 3398
    if (pResInfo->initialized) {
      continue;
    }
3399

3400 3401 3402 3403
    aAggs[functionId].init(&pRuntimeEnv->pCtx[j]);
  }
}

3404
void skipResults(SQueryRuntimeEnv *pRuntimeEnv) {
3405
  SQuery *pQuery = pRuntimeEnv->pQuery;
3406
  if (pQuery->rec.rows == 0 || pQuery->limit.offset == 0) {
3407 3408
    return;
  }
3409

3410
  if (pQuery->rec.rows <= pQuery->limit.offset) {
3411
    qDebug("QInfo:%p skip rows:%" PRId64 ", new offset:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), pQuery->rec.rows,
3412
        pQuery->limit.offset - pQuery->rec.rows);
3413

3414 3415
    pQuery->limit.offset -= pQuery->rec.rows;
    pQuery->rec.rows = 0;
3416

3417
    resetCtxOutputBuf(pRuntimeEnv);
3418

H
Haojun Liao 已提交
3419
    // clear the buffer full flag if exists
3420
    CLEAR_QUERY_STATUS(pQuery, QUERY_RESBUF_FULL);
3421
  } else {
3422
    int64_t numOfSkip = pQuery->limit.offset;
3423
    pQuery->rec.rows -= numOfSkip;
3424
    pQuery->limit.offset = 0;
3425

3426
    qDebug("QInfo:%p skip row:%"PRId64", new offset:%d, numOfRows remain:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), numOfSkip,
3427
           0, pQuery->rec.rows);
3428

3429
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
3430
      int32_t functionId = pQuery->pExpr1[i].base.functionId;
3431
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
H
Haojun Liao 已提交
3432

3433
      memmove(pQuery->sdata[i]->data, (char*)pQuery->sdata[i]->data + bytes * numOfSkip, (size_t)(pQuery->rec.rows * bytes));
H
Haojun Liao 已提交
3434
      pRuntimeEnv->pCtx[i].aOutputBuf = ((char*) pQuery->sdata[i]->data) + pQuery->rec.rows * bytes;
3435

3436
      if (functionId == TSDB_FUNC_DIFF || functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
3437
        pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
3438 3439
      }
    }
3440

S
TD-1057  
Shengliang Guan 已提交
3441
    updateNumOfResult(pRuntimeEnv, (int32_t)pQuery->rec.rows);
3442 3443 3444 3445 3446 3447 3448 3449
  }
}

void setQueryStatus(SQuery *pQuery, int8_t status) {
  if (status == QUERY_NOT_COMPLETED) {
    pQuery->status = status;
  } else {
    // QUERY_NOT_COMPLETED is not compatible with any other status, so clear its position first
3450
    CLEAR_QUERY_STATUS(pQuery, QUERY_NOT_COMPLETED);
3451 3452 3453 3454 3455 3456
    pQuery->status |= status;
  }
}

bool needScanDataBlocksAgain(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3457

H
hjxilinx 已提交
3458
  bool toContinue = false;
H
Haojun Liao 已提交
3459
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3460 3461
    // for each group result, call the finalize function for each column
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
3462

3463
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
3464
      SResultRow *pResult = getResultRow(pWindowResInfo, i);
3465
      if (!pResult->closed) {
3466 3467
        continue;
      }
3468

3469
      setResultOutputBuf(pRuntimeEnv, pResult);
3470

3471
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
3472
        int16_t functId = pQuery->pExpr1[j].base.functionId;
3473 3474 3475
        if (functId == TSDB_FUNC_TS) {
          continue;
        }
3476

3477
        aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
H
Haojun Liao 已提交
3478
        SResultRowCellInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3479

3480 3481 3482 3483
        toContinue |= (!pResInfo->complete);
      }
    }
  } else {
3484
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
3485
      int16_t functId = pQuery->pExpr1[j].base.functionId;
3486 3487 3488
      if (functId == TSDB_FUNC_TS) {
        continue;
      }
3489

3490
      aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
H
Haojun Liao 已提交
3491
      SResultRowCellInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3492

3493 3494 3495
      toContinue |= (!pResInfo->complete);
    }
  }
3496

3497 3498 3499
  return toContinue;
}

H
Haojun Liao 已提交
3500
static SQueryStatusInfo getQueryStatusInfo(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
3501
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3502
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3503

H
Haojun Liao 已提交
3504 3505
  assert((start <= pTableQueryInfo->lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (start >= pTableQueryInfo->lastKey && !QUERY_IS_ASC_QUERY(pQuery)));
3506

3507
  SQueryStatusInfo info = {
H
hjxilinx 已提交
3508
      .status      = pQuery->status,
3509
      .windowIndex = pRuntimeEnv->windowResInfo.curIndex,
H
Haojun Liao 已提交
3510
      .lastKey     = start,
3511
  };
3512

S
TD-1057  
Shengliang Guan 已提交
3513 3514 3515 3516 3517
  TIME_WINDOW_COPY(info.w, pQuery->window);
  TIME_WINDOW_COPY(info.curWindow, pTableQueryInfo->win);

  info.curWindow.skey = start;

3518 3519 3520
  return info;
}

3521 3522 3523 3524
static void setEnvBeforeReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQInfo *pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery *pQuery = pRuntimeEnv->pQuery;

3525 3526 3527
  pStatus->cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);  // save the cursor
  if (pRuntimeEnv->pTSBuf) {
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
3528 3529
    bool ret = tsBufNextPos(pRuntimeEnv->pTSBuf);
    assert(ret);
3530
  }
3531

3532
  // reverse order time range
3533 3534 3535
  pQuery->window = pStatus->curWindow;
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);

3536
  SWITCH_ORDER(pQuery->order.order);
3537 3538 3539 3540 3541 3542 3543

  if (QUERY_IS_ASC_QUERY(pQuery)) {
    assert(pQuery->window.skey <= pQuery->window.ekey);
  } else {
    assert(pQuery->window.skey >= pQuery->window.ekey);
  }

3544
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
3545

3546
  STsdbQueryCond cond = {
H
hjxilinx 已提交
3547
      .order   = pQuery->order.order,
3548
      .colList = pQuery->colList,
3549 3550
      .numOfCols = pQuery->numOfCols,
  };
3551

S
TD-1057  
Shengliang Guan 已提交
3552 3553
  TIME_WINDOW_COPY(cond.twindow, pQuery->window);

H
Haojun Liao 已提交
3554 3555 3556 3557 3558
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
  disableFuncInReverseScan(pQInfo);
  setupQueryRangeForReverseScan(pQInfo);

3559 3560 3561 3562
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
3563

H
Haojun Liao 已提交
3564
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef);
B
Bomin Zhang 已提交
3565 3566 3567
  if (pRuntimeEnv->pSecQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
3568 3569
}

3570 3571
static void clearEnvAfterReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3572
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3573

3574 3575
  SWITCH_ORDER(pQuery->order.order);
  switchCtxOrder(pRuntimeEnv);
3576

3577 3578 3579 3580
  tsBufSetCursor(pRuntimeEnv->pTSBuf, &pStatus->cur);
  if (pRuntimeEnv->pTSBuf) {
    pRuntimeEnv->pTSBuf->cur.order = pQuery->order.order;
  }
3581

3582
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
3583

3584
  // update the pQuery->window.skey and pQuery->window.ekey to limit the scan scope of sliding query during reverse scan
H
hjxilinx 已提交
3585
  pTableQueryInfo->lastKey = pStatus->lastKey;
3586
  pQuery->status = pStatus->status;
3587

H
hjxilinx 已提交
3588
  pTableQueryInfo->win = pStatus->w;
3589
  pQuery->window = pTableQueryInfo->win;
3590 3591
}

H
Haojun Liao 已提交
3592 3593 3594 3595 3596 3597 3598
static void restoreTimeWindow(STableGroupInfo* pTableGroupInfo, STsdbQueryCond* pCond) {
  assert(pTableGroupInfo->numOfTables == 1);
  SArray* pTableKeyGroup = taosArrayGetP(pTableGroupInfo->pGroupList, 0);
  STableKeyInfo* pKeyInfo = taosArrayGet(pTableKeyGroup, 0);
  pKeyInfo->lastKey = pCond->twindow.skey;
}

3599
void scanOneTableDataBlocks(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
H
hjxilinx 已提交
3600
  SQInfo *pQInfo = (SQInfo *) GET_QINFO_ADDR(pRuntimeEnv);
3601
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3602
  STableQueryInfo *pTableQueryInfo = pQuery->current;
3603

3604
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
3605

3606
  // store the start query position
H
Haojun Liao 已提交
3607
  SQueryStatusInfo qstatus = getQueryStatusInfo(pRuntimeEnv, start);
3608

3609 3610
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
3611

3612 3613
  while (1) {
    doScanAllDataBlocks(pRuntimeEnv);
3614

3615 3616
    if (pRuntimeEnv->scanFlag == MASTER_SCAN) {
      qstatus.status = pQuery->status;
3617 3618 3619 3620 3621 3622

      // do nothing if no data blocks are found qualified during scan
      if (qstatus.lastKey != pTableQueryInfo->lastKey) {
        qstatus.curWindow.ekey = pTableQueryInfo->lastKey - step;
      }

3623
      qstatus.lastKey = pTableQueryInfo->lastKey;
3624
    }
3625

3626
    if (!needScanDataBlocksAgain(pRuntimeEnv)) {
3627
      // restore the status code and jump out of loop
3628
      if (pRuntimeEnv->scanFlag == REPEAT_SCAN) {
3629
        pQuery->status = qstatus.status;
3630
      }
3631

3632 3633
      break;
    }
3634

3635
    STsdbQueryCond cond = {
H
hjxilinx 已提交
3636
        .order   = pQuery->order.order,
3637
        .colList = pQuery->colList,
3638
        .numOfCols = pQuery->numOfCols,
3639
    };
3640

S
TD-1057  
Shengliang Guan 已提交
3641 3642
    TIME_WINDOW_COPY(cond.twindow, qstatus.curWindow);

3643 3644
    if (pRuntimeEnv->pSecQueryHandle != NULL) {
      tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
3645
    }
3646

H
Haojun Liao 已提交
3647
    restoreTimeWindow(&pQInfo->tableGroupInfo, &cond);
H
Haojun Liao 已提交
3648
    pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef);
B
Bomin Zhang 已提交
3649 3650 3651
    if (pRuntimeEnv->pSecQueryHandle == NULL) {
      longjmp(pRuntimeEnv->env, terrno);
    }
3652

3653
    pRuntimeEnv->windowResInfo.curIndex = qstatus.windowIndex;
3654 3655
    setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
    pRuntimeEnv->scanFlag = REPEAT_SCAN;
3656

3657
    qDebug("QInfo:%p start to repeat scan data blocks due to query func required, qrange:%"PRId64"-%"PRId64, pQInfo,
3658
        cond.twindow.skey, cond.twindow.ekey);
3659

3660
    // check if query is killed or not
H
Haojun Liao 已提交
3661
    if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
3662
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
3663 3664
    }
  }
3665

H
hjxilinx 已提交
3666
  if (!needReverseScan(pQuery)) {
3667 3668
    return;
  }
3669

3670
  setEnvBeforeReverseScan(pRuntimeEnv, &qstatus);
3671

3672
  // reverse scan from current position
3673
  qDebug("QInfo:%p start to reverse scan", pQInfo);
3674
  doScanAllDataBlocks(pRuntimeEnv);
3675 3676

  clearEnvAfterReverseScan(pRuntimeEnv, &qstatus);
3677 3678
}

H
hjxilinx 已提交
3679
void finalizeQueryResult(SQueryRuntimeEnv *pRuntimeEnv) {
3680
  SQuery *pQuery = pRuntimeEnv->pQuery;
3681

H
Haojun Liao 已提交
3682
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3683 3684
    // for each group result, call the finalize function for each column
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
3685
    if (pRuntimeEnv->groupbyNormalCol) {
3686 3687
      closeAllTimeWindow(pWindowResInfo);
    }
3688

3689
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
H
Haojun Liao 已提交
3690
      SResultRow *buf = pWindowResInfo->pResult[i];
3691 3692 3693
      if (!isWindowResClosed(pWindowResInfo, i)) {
        continue;
      }
3694

3695
      setResultOutputBuf(pRuntimeEnv, buf);
3696

3697
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
3698
        aAggs[pQuery->pExpr1[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3699
      }
3700

3701 3702 3703 3704
      /*
       * set the number of output results for group by normal columns, the number of output rows usually is 1 except
       * the top and bottom query
       */
S
TD-1057  
Shengliang Guan 已提交
3705
      buf->numOfRows = (uint16_t)getNumOfResult(pRuntimeEnv);
3706
    }
3707

3708
  } else {
3709
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
3710
      aAggs[pQuery->pExpr1[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3711 3712 3713 3714 3715
    }
  }
}

static bool hasMainOutput(SQuery *pQuery) {
3716
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
3717
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
3718

3719 3720 3721 3722
    if (functionId != TSDB_FUNC_TS && functionId != TSDB_FUNC_TAG && functionId != TSDB_FUNC_TAGPRJ) {
      return true;
    }
  }
3723

3724 3725 3726
  return false;
}

H
Haojun Liao 已提交
3727
static STableQueryInfo *createTableQueryInfo(SQueryRuntimeEnv *pRuntimeEnv, void* pTable, STimeWindow win, void* buf) {
H
Haojun Liao 已提交
3728
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
3729

H
Haojun Liao 已提交
3730
  STableQueryInfo *pTableQueryInfo = buf;
3731

H
hjxilinx 已提交
3732 3733
  pTableQueryInfo->win = win;
  pTableQueryInfo->lastKey = win.skey;
3734

3735
  pTableQueryInfo->pTable = pTable;
3736
  pTableQueryInfo->cur.vgroupIndex = -1;
3737

H
Haojun Liao 已提交
3738 3739
  // set more initial size of interval/groupby query
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyNormalCol) {
H
Haojun Liao 已提交
3740
    int32_t initialSize = 16;
H
Haojun Liao 已提交
3741
    int32_t initialThreshold = 100;
3742
    int32_t code = initWindowResInfo(&pTableQueryInfo->windowResInfo, initialSize, initialThreshold, TSDB_DATA_TYPE_INT);
B
Bomin Zhang 已提交
3743 3744 3745
    if (code != TSDB_CODE_SUCCESS) {
      return NULL;
    }
H
Haojun Liao 已提交
3746
  } else { // in other aggregate query, do not initialize the windowResInfo
H
Haojun Liao 已提交
3747 3748
  }

3749 3750 3751
  return pTableQueryInfo;
}

H
Haojun Liao 已提交
3752
void destroyTableQueryInfoImpl(STableQueryInfo *pTableQueryInfo) {
3753 3754 3755
  if (pTableQueryInfo == NULL) {
    return;
  }
3756

H
Haojun Liao 已提交
3757
  tVariantDestroy(&pTableQueryInfo->tag);
H
Haojun Liao 已提交
3758
  cleanupTimeWindowInfo(&pTableQueryInfo->windowResInfo);
3759 3760 3761 3762 3763
}

/**
 * set output buffer for different group
 * @param pRuntimeEnv
3764
 * @param pDataBlockInfo
3765
 */
H
Haojun Liao 已提交
3766
void setExecutionContext(SQInfo *pQInfo, int32_t groupIndex, TSKEY nextKey) {
3767
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
Haojun Liao 已提交
3768 3769 3770
  STableQueryInfo  *pTableQueryInfo = pRuntimeEnv->pQuery->current;
  SWindowResInfo   *pWindowResInfo = &pRuntimeEnv->windowResInfo;

H
Haojun Liao 已提交
3771 3772
  // lastKey needs to be updated
  pTableQueryInfo->lastKey = nextKey;
H
Haojun Liao 已提交
3773 3774 3775 3776

  if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
    setAdditionalInfo(pQInfo, pTableQueryInfo->pTable, pTableQueryInfo);
  }
H
Haojun Liao 已提交
3777

H
Haojun Liao 已提交
3778 3779 3780
  if (pRuntimeEnv->prevGroupId != INT32_MIN && pRuntimeEnv->prevGroupId == groupIndex) {
    return;
  }
3781

H
Haojun Liao 已提交
3782
  uint64_t uid = 0; // uid is always set to be 0
H
Haojun Liao 已提交
3783
  SResultRow *pResultRow = doPrepareResultRowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&groupIndex,
H
Haojun Liao 已提交
3784
      sizeof(groupIndex), true, uid);
H
Haojun Liao 已提交
3785
  if (pResultRow == NULL) {
3786 3787
    return;
  }
3788

3789 3790 3791 3792
  /*
   * not assign result buffer yet, add new result buffer
   * all group belong to one result set, and each group result has different group id so set the id to be one
   */
H
Haojun Liao 已提交
3793 3794
  if (pResultRow->pageId == -1) {
    if (addNewWindowResultBuf(pResultRow, pRuntimeEnv->pResultBuf, groupIndex, pRuntimeEnv->numOfRowsPerPage) !=
3795 3796 3797 3798
        TSDB_CODE_SUCCESS) {
      return;
    }
  }
3799

H
Haojun Liao 已提交
3800 3801
  // record the current active group id
  pRuntimeEnv->prevGroupId = groupIndex;
H
Haojun Liao 已提交
3802
  setResultOutputBuf(pRuntimeEnv, pResultRow);
3803 3804 3805
  initCtxOutputBuf(pRuntimeEnv);
}

3806
void setResultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SResultRow *pResult) {
3807
  SQuery *pQuery = pRuntimeEnv->pQuery;
3808

3809
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
3810
  tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pageId);
H
Haojun Liao 已提交
3811

3812
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3813
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
H
Haojun Liao 已提交
3814
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult, page);
3815

H
Haojun Liao 已提交
3816
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
3817 3818 3819
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3820

3821
    /*
3822
     * set the output buffer information and intermediate buffer,
3823 3824
     * not all queries require the interResultBuf, such as COUNT
     */
3825
    pCtx->resultInfo = getResultCell(pRuntimeEnv, pResult, i);
3826 3827 3828
  }
}

H
Haojun Liao 已提交
3829
void setWindowResOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SResultRow *pResult) {
H
Haojun Liao 已提交
3830
  SQuery *pQuery = pRuntimeEnv->pQuery;
3831

H
Haojun Liao 已提交
3832
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
3833
  tFilePage* bufPage = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pageId);
H
Haojun Liao 已提交
3834

H
Haojun Liao 已提交
3835 3836 3837
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];

3838
    pCtx->resultInfo = getResultCell(pRuntimeEnv, pResult, i);
H
Haojun Liao 已提交
3839
    if (pCtx->resultInfo->initialized && pCtx->resultInfo->complete) {
H
Haojun Liao 已提交
3840 3841
      continue;
    }
3842

H
Haojun Liao 已提交
3843
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult, bufPage);
H
Haojun Liao 已提交
3844
    pCtx->currentStage = 0;
3845

H
Haojun Liao 已提交
3846 3847 3848 3849
    int32_t functionId = pCtx->functionId;
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3850

H
Haojun Liao 已提交
3851 3852 3853 3854 3855 3856
    if (!pCtx->resultInfo->initialized) {
      aAggs[functionId].init(pCtx);
    }
  }
}

3857
int32_t setAdditionalInfo(SQInfo *pQInfo, void* pTable, STableQueryInfo *pTableQueryInfo) {
3858
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
3859

3860
  setTagVal(pRuntimeEnv, pTable, pQInfo->tsdb);
3861

3862 3863
  // both the master and supplement scan needs to set the correct ts comp start position
  if (pRuntimeEnv->pTSBuf != NULL) {
H
Haojun Liao 已提交
3864 3865
    tVariant* pTag = &pRuntimeEnv->pCtx[0].tag;

H
Haojun Liao 已提交
3866 3867
    if (pTableQueryInfo->cur.vgroupIndex == -1) {
      tVariantAssign(&pTableQueryInfo->tag, pTag);
3868

H
Haojun Liao 已提交
3869 3870
      STSElem elem = tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, pQInfo->vgId, &pTableQueryInfo->tag);

H
Haojun Liao 已提交
3871
      // failed to find data with the specified tag value and vnodeId
3872
      if (!tsBufIsValidElem(&elem)) {
H
Haojun Liao 已提交
3873 3874 3875
        if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
          qError("QInfo:%p failed to find tag:%s in ts_comp", pQInfo, pTag->pz);
        } else {
H
Haojun Liao 已提交
3876
          qError("QInfo:%p failed to find tag:%" PRId64 " in ts_comp", pQInfo, pTag->i64Key);
H
Haojun Liao 已提交
3877 3878 3879 3880 3881
        }

        return false;
      }

H
Haojun Liao 已提交
3882
      // keep the cursor info of current meter
H
Haojun Liao 已提交
3883 3884
      pTableQueryInfo->cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);
      if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
H
Haojun Liao 已提交
3885
        qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex);
H
Haojun Liao 已提交
3886
      } else {
H
Haojun Liao 已提交
3887
        qDebug("QInfo:%p find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex);
H
Haojun Liao 已提交
3888 3889
      }

H
Haojun Liao 已提交
3890 3891
    } else {
      tsBufSetCursor(pRuntimeEnv->pTSBuf, &pTableQueryInfo->cur);
H
Haojun Liao 已提交
3892 3893

      if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
H
Haojun Liao 已提交
3894
        qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex);
H
Haojun Liao 已提交
3895
      } else {
H
Haojun Liao 已提交
3896
        qDebug("QInfo:%p find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex);
H
Haojun Liao 已提交
3897
      }
3898 3899
    }
  }
3900

3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912
  return 0;
}

/*
 * There are two cases to handle:
 *
 * 1. Query range is not set yet (queryRangeSet = 0). we need to set the query range info, including pQuery->lastKey,
 *    pQuery->window.skey, and pQuery->eKey.
 * 2. Query range is set and query is in progress. There may be another result with the same query ranges to be
 *    merged during merge stage. In this case, we need the pTableQueryInfo->lastResRows to decide if there
 *    is a previous result generated or not.
 */
H
hjxilinx 已提交
3913
void setIntervalQueryRange(SQInfo *pQInfo, TSKEY key) {
3914 3915
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3916
  STableQueryInfo *pTableQueryInfo = pQuery->current;
3917

3918 3919 3920
  if (pTableQueryInfo->queryRangeSet) {
    pTableQueryInfo->lastKey = key;
  } else {
3921
    pTableQueryInfo->win.skey = key;
3922
    STimeWindow win = {.skey = key, .ekey = pQuery->window.ekey};
3923

3924 3925 3926 3927 3928
    // for too small query range, no data in this interval.
    if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey < pQuery->window.skey)) ||
        (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey < pQuery->window.ekey))) {
      return;
    }
3929

3930 3931 3932 3933 3934 3935
    /**
     * In handling the both ascending and descending order super table query, we need to find the first qualified
     * timestamp of this table, and then set the first qualified start timestamp.
     * In ascending query, key is the first qualified timestamp. However, in the descending order query, additional
     * operations involve.
     */
H
Haojun Liao 已提交
3936
    STimeWindow     w = TSWINDOW_INITIALIZER;
3937
    SWindowResInfo *pWindowResInfo = &pTableQueryInfo->windowResInfo;
3938

H
Haojun Liao 已提交
3939 3940
    TSKEY sk = MIN(win.skey, win.ekey);
    TSKEY ek = MAX(win.skey, win.ekey);
H
Haojun Liao 已提交
3941
    getAlignQueryTimeWindow(pQuery, win.skey, sk, ek, &w);
3942
    pWindowResInfo->startTime = pTableQueryInfo->win.skey;  // windowSKey may be 0 in case of 1970 timestamp
3943

3944 3945
    if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
3946
        assert(win.ekey == pQuery->window.ekey);
3947
      }
3948

3949
      pWindowResInfo->prevSKey = w.skey;
3950
    }
3951

3952
    pTableQueryInfo->queryRangeSet = 1;
3953
    pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
3954 3955 3956 3957
  }
}

bool requireTimestamp(SQuery *pQuery) {
3958
  for (int32_t i = 0; i < pQuery->numOfOutput; i++) {
H
Haojun Liao 已提交
3959
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972
    if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_NEED_TS) != 0) {
      return true;
    }
  }
  return false;
}

bool needPrimaryTimestampCol(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo) {
  /*
   * 1. if skey or ekey locates in this block, we need to load the timestamp column to decide the precise position
   * 2. if there are top/bottom, first_dst/last_dst functions, we need to load timestamp column in any cases;
   */
  STimeWindow *w = &pDataBlockInfo->window;
H
hjxilinx 已提交
3973
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3974

H
hjxilinx 已提交
3975
  bool loadPrimaryTS = (pTableQueryInfo->lastKey >= w->skey && pTableQueryInfo->lastKey <= w->ekey) ||
3976 3977
                       (pQuery->window.ekey >= w->skey && pQuery->window.ekey <= w->ekey) || requireTimestamp(pQuery);

3978 3979 3980
  return loadPrimaryTS;
}

3981
static int32_t doCopyToSData(SQInfo *pQInfo, SWindowResInfo *pResultInfo, int32_t orderType) {
3982 3983
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
3984

3985 3986 3987
  int32_t numOfResult = 0;
  int32_t startIdx = 0;
  int32_t step = -1;
3988

3989
  qDebug("QInfo:%p start to copy data from windowResInfo to query buf", pQInfo);
3990
  int32_t totalSet = numOfClosedTimeWindow(pResultInfo);
H
Haojun Liao 已提交
3991
  SResultRow** result = pResultInfo->pResult;
3992

3993
  if (orderType == TSDB_ORDER_ASC) {
3994
    startIdx = pQInfo->groupIndex;
3995 3996
    step = 1;
  } else {  // desc order copy all data
3997
    startIdx = totalSet - pQInfo->groupIndex - 1;
3998 3999
    step = -1;
  }
4000

H
Haojun Liao 已提交
4001 4002
  SGroupResInfo* pGroupResInfo = &pQInfo->groupResInfo;

4003
  for (int32_t i = startIdx; (i < totalSet) && (i >= 0); i += step) {
4004
    if (result[i]->numOfRows == 0) {
4005
      pQInfo->groupIndex += 1;
4006
      pGroupResInfo->rowId = 0;
4007 4008
      continue;
    }
4009

4010
    int32_t numOfRowsToCopy = result[i]->numOfRows - pGroupResInfo->rowId;
4011
    int32_t oldOffset = pGroupResInfo->rowId;
4012

4013
    /*
H
Haojun Liao 已提交
4014 4015
     * current output space is not enough to accommodate all data of this page, only partial results
     * will be copied to SQuery object's result buffer
4016
     */
4017
    if (numOfRowsToCopy > pQuery->rec.capacity - numOfResult) {
H
Haojun Liao 已提交
4018
      numOfRowsToCopy = (int32_t) pQuery->rec.capacity - numOfResult;
4019
      pGroupResInfo->rowId += numOfRowsToCopy;
4020
    } else {
4021
      pGroupResInfo->rowId = 0;
4022
      pQInfo->groupIndex += 1;
4023
    }
4024

4025
    tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, result[i]->pageId);
H
Haojun Liao 已提交
4026

4027
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
4028
      int32_t size = pRuntimeEnv->pCtx[j].outputBytes;
4029

4030
      char *out = pQuery->sdata[j]->data + numOfResult * size;
4031
      char *in = getPosInResultPage(pRuntimeEnv, j, result[i], page);
4032 4033
      memcpy(out, in + oldOffset * size, size * numOfRowsToCopy);
    }
4034

4035
    numOfResult += numOfRowsToCopy;
4036 4037 4038
    if (numOfResult == pQuery->rec.capacity) {
      break;
    }
4039
  }
4040

4041
  qDebug("QInfo:%p copy data to query buf completed", pQInfo);
4042 4043

#ifdef _DEBUG_VIEW
4044
  displayInterResult(pQuery->sdata, pRuntimeEnv, numOfResult);
4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057
#endif
  return numOfResult;
}

/**
 * copyFromWindowResToSData support copy data in ascending/descending order
 * For interval query of both super table and table, copy the data in ascending order, since the output results are
 * ordered in SWindowResutl already. While handling the group by query for both table and super table,
 * all group result are completed already.
 *
 * @param pQInfo
 * @param result
 */
4058
void copyFromWindowResToSData(SQInfo *pQInfo, SWindowResInfo *pResultInfo) {
4059
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
4060

4061
  int32_t orderType = (pQuery->pGroupbyExpr != NULL) ? pQuery->pGroupbyExpr->orderType : TSDB_ORDER_ASC;
4062
  int32_t numOfResult = doCopyToSData(pQInfo, pResultInfo, orderType);
4063

4064
  pQuery->rec.rows += numOfResult;
4065

4066
  assert(pQuery->rec.rows <= pQuery->rec.capacity);
4067 4068
}

H
Haojun Liao 已提交
4069
static void updateWindowResNumOfRes(SQueryRuntimeEnv *pRuntimeEnv) {
4070
  SQuery *pQuery = pRuntimeEnv->pQuery;
4071

4072
  // update the number of result for each, only update the number of rows for the corresponding window result.
H
Haojun Liao 已提交
4073 4074 4075
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
    return;
  }
4076

H
Haojun Liao 已提交
4077
  for (int32_t i = 0; i < pRuntimeEnv->windowResInfo.size; ++i) {
H
Haojun Liao 已提交
4078
    SResultRow *pResult = pRuntimeEnv->windowResInfo.pResult[i];
4079

H
Haojun Liao 已提交
4080 4081 4082 4083
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
      int32_t functionId = pRuntimeEnv->pCtx[j].functionId;
      if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
        continue;
4084
      }
H
Haojun Liao 已提交
4085

4086 4087
      SResultRowCellInfo* pCell = getResultCell(pRuntimeEnv, pResult, j);
      pResult->numOfRows = (uint16_t)(MAX(pResult->numOfRows, pCell->numOfRes));
4088 4089 4090 4091
    }
  }
}

H
Haojun Liao 已提交
4092
static void stableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo, SDataStatis *pStatis,
4093
    SArray *pDataBlock, __block_search_fn_t searchFn) {
4094
  SQuery *         pQuery = pRuntimeEnv->pQuery;
4095
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4096

4097
  SWindowResInfo * pWindowResInfo = &pTableQueryInfo->windowResInfo;
H
hjxilinx 已提交
4098
  pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : pDataBlockInfo->rows - 1;
4099

H
Haojun Liao 已提交
4100
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->groupbyNormalCol) {
4101
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
4102
  } else {
4103
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
4104 4105 4106
  }
}

H
Haojun Liao 已提交
4107
bool queryHasRemainResForTableQuery(SQueryRuntimeEnv* pRuntimeEnv) {
4108 4109
  SQuery *pQuery = pRuntimeEnv->pQuery;
  SFillInfo *pFillInfo = pRuntimeEnv->pFillInfo;
4110

H
Haojun Liao 已提交
4111
  if (pQuery->limit.limit > 0 && pQuery->rec.total >= pQuery->limit.limit) {
4112 4113
    return false;
  }
4114

4115
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
H
Haojun Liao 已提交
4116
    // There are results not returned to client yet, so filling applied to the remain result is required firstly.
H
Haojun Liao 已提交
4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131
    int32_t remain = taosNumOfRemainRows(pFillInfo);
    if (remain > 0) {
      return true;
    }

    /*
     * While the code reaches here, there are no results remains now.
     * If query is not completed yet, the gaps between two results blocks need to be handled after next data block
     * is retrieved from TSDB.
     *
     * NOTE: If the result set is not the first block, the gap in front of the result set will be filled. If the result
     * set is the FIRST result block, the gap between the start time of query time window and the timestamp of the
     * first result row in the actual result set will fill nothing.
     */
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
H
Haojun Liao 已提交
4132
      int32_t numOfTotal = (int32_t)getNumOfResWithFill(pFillInfo, pQuery->window.ekey, (int32_t)pQuery->rec.capacity);
H
Haojun Liao 已提交
4133 4134 4135 4136 4137 4138
      return numOfTotal > 0;
    }

  } else {
    // there are results waiting for returned to client.
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED) &&
H
Haojun Liao 已提交
4139
        (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) &&
H
Haojun Liao 已提交
4140 4141 4142
        (pRuntimeEnv->windowResInfo.size > 0)) {
      return true;
    }
4143
  }
4144 4145

  return false;
4146 4147
}

H
Haojun Liao 已提交
4148 4149 4150 4151
static int16_t getNumOfFinalResCol(SQuery* pQuery) {
  return pQuery->pExpr2 == NULL? pQuery->numOfOutput:pQuery->numOfExpr2;
}

4152
static void doCopyQueryResultToMsg(SQInfo *pQInfo, int32_t numOfRows, char *data) {
4153
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
4154

H
Haojun Liao 已提交
4155 4156
  if (pQuery->pExpr2 == NULL) {
    for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
H
Haojun Liao 已提交
4157
      int32_t bytes = pQuery->pExpr1[col].bytes;
4158

H
Haojun Liao 已提交
4159 4160 4161 4162 4163 4164 4165 4166 4167 4168
      memmove(data, pQuery->sdata[col]->data, bytes * numOfRows);
      data += bytes * numOfRows;
    }
  } else {
    for (int32_t col = 0; col < pQuery->numOfExpr2; ++col) {
      int32_t bytes = pQuery->pExpr2[col].bytes;

      memmove(data, pQuery->sdata[col]->data, bytes * numOfRows);
      data += bytes * numOfRows;
    }
4169
  }
4170

weixin_48148422's avatar
weixin_48148422 已提交
4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182
  int32_t numOfTables = (int32_t)taosArrayGetSize(pQInfo->arrTableIdInfo);
  *(int32_t*)data = htonl(numOfTables);
  data += sizeof(int32_t);
  for(int32_t i = 0; i < numOfTables; i++) {
    STableIdInfo* pSrc = taosArrayGet(pQInfo->arrTableIdInfo, i);
    STableIdInfo* pDst = (STableIdInfo*)data;
    pDst->uid = htobe64(pSrc->uid);
    pDst->tid = htonl(pSrc->tid);
    pDst->key = htobe64(pSrc->key);
    data += sizeof(STableIdInfo);
  }

H
Haojun Liao 已提交
4183
  // Check if query is completed or not for stable query or normal table query respectively.
H
hjxilinx 已提交
4184
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4185
    if (pQInfo->runtimeEnv.stableQuery) {
H
Haojun Liao 已提交
4186
      if (IS_STASBLE_QUERY_OVER(pQInfo)) {
4187 4188 4189
        setQueryStatus(pQuery, QUERY_OVER);
      }
    } else {
H
Haojun Liao 已提交
4190
      if (!queryHasRemainResForTableQuery(&pQInfo->runtimeEnv)) {
4191 4192
        setQueryStatus(pQuery, QUERY_OVER);
      }
4193
    }
H
hjxilinx 已提交
4194
  }
4195 4196
}

H
Haojun Liao 已提交
4197
int32_t doFillGapsInResults(SQueryRuntimeEnv* pRuntimeEnv, tFilePage **pDst, int32_t *numOfFilled) {
4198
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
4199
  SQuery *pQuery = pRuntimeEnv->pQuery;
4200
  SFillInfo* pFillInfo = pRuntimeEnv->pFillInfo;
4201

4202
  while (1) {
H
Haojun Liao 已提交
4203
    int32_t ret = (int32_t)taosFillResultDataBlock(pFillInfo, (tFilePage**)pQuery->sdata, (int32_t)pQuery->rec.capacity);
4204

4205
    // todo apply limit output function
4206 4207
    /* reached the start position of according to offset value, return immediately */
    if (pQuery->limit.offset == 0) {
4208
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows", pQInfo, pFillInfo->numOfRows, ret);
4209 4210
      return ret;
    }
4211

4212
    if (pQuery->limit.offset < ret) {
4213
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%" PRId64 ". Discard due to offset, remain:%" PRId64 ", new offset:%d",
4214
             pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, ret - pQuery->limit.offset, 0);
4215

S
TD-1057  
Shengliang Guan 已提交
4216
      ret -= (int32_t)pQuery->limit.offset;
4217
      // todo !!!!there exactly number of interpo is not valid.
4218
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
4219 4220
        memmove(pDst[i]->data, pDst[i]->data + pQuery->pExpr1[i].bytes * pQuery->limit.offset,
                ret * pQuery->pExpr1[i].bytes);
4221
      }
4222

4223 4224 4225
      pQuery->limit.offset = 0;
      return ret;
    } else {
4226
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%" PRId64 ". Discard due to offset, "
B
Bomin Zhang 已提交
4227
             "remain:%d, new offset:%" PRId64, pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, 0,
4228
          pQuery->limit.offset - ret);
4229

4230
      pQuery->limit.offset -= ret;
4231
      pQuery->rec.rows = 0;
4232 4233
      ret = 0;
    }
4234

H
Haojun Liao 已提交
4235
    if (!queryHasRemainResForTableQuery(pRuntimeEnv)) {
4236 4237 4238 4239 4240
      return ret;
    }
  }
}

4241
static void queryCostStatis(SQInfo *pQInfo) {
4242
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4243
  SQueryCostInfo *pSummary = &pRuntimeEnv->summary;
H
Haojun Liao 已提交
4244

H
Haojun Liao 已提交
4245
  uint64_t hashSize = taosHashGetMemSize(pQInfo->runtimeEnv.pResultRowHashTable);
H
Haojun Liao 已提交
4246 4247 4248
  hashSize += taosHashGetMemSize(pQInfo->tableqinfoGroupInfo.map);
  pSummary->hashSize = hashSize;

H
Haojun Liao 已提交
4249 4250 4251
  // add the merge time
  pSummary->elapsedTime += pSummary->firstStageMergeTime;

H
Haojun Liao 已提交
4252 4253 4254
  SResultRowPool* p = pQInfo->runtimeEnv.pool;
  pSummary->winInfoSize = getResultRowPoolMemSize(p);
  pSummary->numOfTimeWindows = getNumOfAllocatedResultRows(p);
4255

H
Haojun Liao 已提交
4256 4257 4258
  qDebug("QInfo:%p :cost summary: elapsed time:%"PRId64" us, first merge:%"PRId64" us, total blocks:%d, "
         "load block statis:%d, load data block:%d, total rows:%"PRId64 ", check rows:%"PRId64,
         pQInfo, pSummary->elapsedTime, pSummary->firstStageMergeTime, pSummary->totalBlocks, pSummary->loadBlockStatis,
H
Haojun Liao 已提交
4259
         pSummary->loadBlocks, pSummary->totalRows, pSummary->totalCheckedRows);
4260

4261
  qDebug("QInfo:%p :cost summary: winResPool size:%.2f Kb, numOfWin:%"PRId64", tableInfoSize:%.2f Kb, hashTable:%.2f Kb", pQInfo, pSummary->winInfoSize/1024.0,
H
Haojun Liao 已提交
4262
      pSummary->numOfTimeWindows, pSummary->tableInfoSize/1024.0, pSummary->hashSize/1024.0);
4263 4264
}

4265 4266
static void updateOffsetVal(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pBlockInfo) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
4267
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4268

4269
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
4270

4271
  if (pQuery->limit.offset == pBlockInfo->rows) {  // current block will ignore completed
H
hjxilinx 已提交
4272
    pTableQueryInfo->lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->window.ekey + step : pBlockInfo->window.skey + step;
4273 4274 4275
    pQuery->limit.offset = 0;
    return;
  }
4276

4277
  if (QUERY_IS_ASC_QUERY(pQuery)) {
S
TD-1057  
Shengliang Guan 已提交
4278
    pQuery->pos = (int32_t)pQuery->limit.offset;
4279
  } else {
S
TD-1057  
Shengliang Guan 已提交
4280
    pQuery->pos = pBlockInfo->rows - (int32_t)pQuery->limit.offset - 1;
4281
  }
4282

4283
  assert(pQuery->pos >= 0 && pQuery->pos <= pBlockInfo->rows - 1);
4284

4285
  SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
4286
  SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);
4287

4288
  // update the pQuery->limit.offset value, and pQuery->pos value
H
Haojun Liao 已提交
4289
  TSKEY *keys = (TSKEY *) pColInfoData->pData;
4290 4291

  // update the offset value
H
hjxilinx 已提交
4292
  pTableQueryInfo->lastKey = keys[pQuery->pos];
4293
  pQuery->limit.offset = 0;
4294

H
hjxilinx 已提交
4295
  int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, pBlockInfo, NULL, binarySearchForKey, pDataBlock);
4296

4297
  qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
4298
         pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows, numOfRes, pQuery->current->lastKey);
4299
}
4300

4301 4302 4303 4304 4305
void skipBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;

  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0) {
    return;
4306
  }
4307

4308 4309 4310
  pQuery->pos = 0;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

H
hjxilinx 已提交
4311
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4312
  TsdbQueryHandleT pQueryHandle = pRuntimeEnv->pQueryHandle;
4313

H
Haojun Liao 已提交
4314
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4315
  while (tsdbNextDataBlock(pQueryHandle)) {
H
Haojun Liao 已提交
4316
    if (IS_QUERY_KILLED(GET_QINFO_ADDR(pRuntimeEnv))) {
H
Haojun Liao 已提交
4317
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4318
    }
4319

H
Haojun Liao 已提交
4320
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
4321

4322 4323
    if (pQuery->limit.offset > blockInfo.rows) {
      pQuery->limit.offset -= blockInfo.rows;
H
hjxilinx 已提交
4324 4325
      pTableQueryInfo->lastKey = (QUERY_IS_ASC_QUERY(pQuery)) ? blockInfo.window.ekey : blockInfo.window.skey;
      pTableQueryInfo->lastKey += step;
4326

4327
      qDebug("QInfo:%p skip rows:%d, offset:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv), blockInfo.rows,
4328 4329
             pQuery->limit.offset);
    } else {  // find the appropriated start position in current block
4330 4331 4332
      updateOffsetVal(pRuntimeEnv, &blockInfo);
      break;
    }
4333
  }
H
Haojun Liao 已提交
4334 4335 4336 4337

  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }
4338
}
4339

H
Haojun Liao 已提交
4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389
static TSKEY doSkipIntervalProcess(SQueryRuntimeEnv* pRuntimeEnv, STimeWindow* win, SDataBlockInfo* pBlockInfo, STableQueryInfo* pTableQueryInfo) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;

  assert(pQuery->limit.offset == 0);
  STimeWindow tw = *win;
  getNextTimeWindow(pQuery, &tw);

  if ((tw.skey <= pBlockInfo->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (tw.ekey >= pBlockInfo->window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {

    // load the data block and check data remaining in current data block
    // TODO optimize performance
    SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
    SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

    tw = *win;
    int32_t startPos =
        getNextQualifiedWindow(pRuntimeEnv, &tw, pBlockInfo, pColInfoData->pData, binarySearchForKey, -1);
    assert(startPos >= 0);

    // set the abort info
    pQuery->pos = startPos;

    // reset the query start timestamp
    pTableQueryInfo->win.skey = ((TSKEY *)pColInfoData->pData)[startPos];
    pQuery->window.skey = pTableQueryInfo->win.skey;
    TSKEY key = pTableQueryInfo->win.skey;

    pWindowResInfo->prevSKey = tw.skey;
    int32_t index = pRuntimeEnv->windowResInfo.curIndex;

    int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, pBlockInfo, NULL, binarySearchForKey, pDataBlock);
    pRuntimeEnv->windowResInfo.curIndex = index;  // restore the window index

    qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%" PRId64,
           GET_QINFO_ADDR(pRuntimeEnv), pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows, numOfRes,
           pQuery->current->lastKey);

    return key;
  } else {  // do nothing
    pQuery->window.skey = tw.skey;
    pWindowResInfo->prevSKey = tw.skey;

    return tw.skey;
  }

  return true;
}

H
Haojun Liao 已提交
4390
static bool skipTimeInterval(SQueryRuntimeEnv *pRuntimeEnv, TSKEY* start) {
4391
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
4392
  *start = pQuery->current->lastKey;
4393

4394
  // if queried with value filter, do NOT forward query start position
4395
  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->pFillInfo != NULL) {
4396
    return true;
4397
  }
4398

4399
  /*
4400 4401
   * 1. for interval without interpolation query we forward pQuery->interval.interval at a time for
   *    pQuery->limit.offset times. Since hole exists, pQuery->interval.interval*pQuery->limit.offset value is
4402 4403
   *    not valid. otherwise, we only forward pQuery->limit.offset number of points
   */
4404
  assert(pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL);
4405

H
Haojun Liao 已提交
4406
  STimeWindow w = TSWINDOW_INITIALIZER;
4407

4408
  SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
hjxilinx 已提交
4409
  STableQueryInfo *pTableQueryInfo = pQuery->current;
4410

H
Haojun Liao 已提交
4411
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4412
  while (tsdbNextDataBlock(pRuntimeEnv->pQueryHandle)) {
H
Haojun Liao 已提交
4413
    tsdbRetrieveDataBlockInfo(pRuntimeEnv->pQueryHandle, &blockInfo);
4414

H
Haojun Liao 已提交
4415 4416
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
H
Haojun Liao 已提交
4417
        getAlignQueryTimeWindow(pQuery, blockInfo.window.skey, blockInfo.window.skey, pQuery->window.ekey, &w);
H
Haojun Liao 已提交
4418 4419 4420
        pWindowResInfo->startTime = w.skey;
        pWindowResInfo->prevSKey = w.skey;
      }
4421
    } else {
H
Haojun Liao 已提交
4422
      getAlignQueryTimeWindow(pQuery, blockInfo.window.ekey, pQuery->window.ekey, blockInfo.window.ekey, &w);
4423

4424 4425 4426
      pWindowResInfo->startTime = pQuery->window.skey;
      pWindowResInfo->prevSKey = w.skey;
    }
4427

4428 4429
    // the first time window
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, pWindowResInfo->prevSKey, pQuery);
4430

4431
    while (pQuery->limit.offset > 0) {
H
Haojun Liao 已提交
4432 4433
      STimeWindow tw = win;

4434 4435 4436 4437
      if ((win.ekey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (win.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        pQuery->limit.offset -= 1;
        pWindowResInfo->prevSKey = win.skey;
H
Haojun Liao 已提交
4438
      }
4439

H
Haojun Liao 已提交
4440 4441
      // current window does not ended in current data block, try next data block
      getNextTimeWindow(pQuery, &tw);
4442
      if (pQuery->limit.offset == 0) {
H
Haojun Liao 已提交
4443 4444
        *start = doSkipIntervalProcess(pRuntimeEnv, &win, &blockInfo, pTableQueryInfo);
        return true;
4445 4446
      }

H
Haojun Liao 已提交
4447 4448 4449 4450 4451 4452 4453
      /*
       * If the next time window still starts from current data block,
       * load the primary timestamp column first, and then find the start position for the next queried time window.
       * Note that only the primary timestamp column is required.
       * TODO: Optimize for this cases. All data blocks are not needed to be loaded, only if the first actually required
       * time window resides in current data block.
       */
4454 4455 4456 4457 4458
      if ((tw.skey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (tw.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
        SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

H
Haojun Liao 已提交
4459 4460 4461 4462 4463 4464
        if ((win.ekey > blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
            (win.ekey < blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
          pQuery->limit.offset -= 1;
        }

        if (pQuery->limit.offset == 0) {
H
Haojun Liao 已提交
4465 4466
          *start = doSkipIntervalProcess(pRuntimeEnv, &win, &blockInfo, pTableQueryInfo);
          return true;
H
Haojun Liao 已提交
4467 4468 4469 4470 4471
        } else {
          tw = win;
          int32_t startPos =
              getNextQualifiedWindow(pRuntimeEnv, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey, -1);
          assert(startPos >= 0);
4472

H
Haojun Liao 已提交
4473 4474 4475 4476 4477 4478
          // set the abort info
          pQuery->pos = startPos;
          pTableQueryInfo->lastKey = ((TSKEY *)pColInfoData->pData)[startPos];
          pWindowResInfo->prevSKey = tw.skey;
          win = tw;
        }
4479
      } else {
H
Haojun Liao 已提交
4480
        break;  // offset is not 0, and next time window begins or ends in the next block.
4481 4482 4483
      }
    }
  }
4484

H
Haojun Liao 已提交
4485 4486 4487 4488 4489
  // check for error
  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }

4490 4491 4492
  return true;
}

H
Haojun Liao 已提交
4493 4494
static void doDestroyTableQueryInfo(STableGroupInfo* pTableqinfoGroupInfo);

B
Bomin Zhang 已提交
4495
static int32_t setupQueryHandle(void* tsdb, SQInfo* pQInfo, bool isSTableQuery) {
B
Bomin Zhang 已提交
4496
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4497 4498
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

B
Bomin Zhang 已提交
4499
  if (onlyQueryTags(pQuery)) {
B
Bomin Zhang 已提交
4500
    return TSDB_CODE_SUCCESS;
B
Bomin Zhang 已提交
4501 4502
  }

H
Haojun Liao 已提交
4503
  if (isSTableQuery && (!QUERY_IS_INTERVAL_QUERY(pQuery)) && (!isFixedOutputQuery(pRuntimeEnv))) {
B
Bomin Zhang 已提交
4504
    return TSDB_CODE_SUCCESS;
B
Bomin Zhang 已提交
4505
  }
4506 4507

  STsdbQueryCond cond = {
B
Bomin Zhang 已提交
4508 4509 4510
    .order   = pQuery->order.order,
    .colList = pQuery->colList,
    .numOfCols = pQuery->numOfCols,
4511
  };
weixin_48148422's avatar
weixin_48148422 已提交
4512

S
TD-1057  
Shengliang Guan 已提交
4513 4514
  TIME_WINDOW_COPY(cond.twindow, pQuery->window);

B
Bomin Zhang 已提交
4515
  if (!isSTableQuery
4516
    && (pQInfo->tableqinfoGroupInfo.numOfTables == 1)
4517
    && (cond.order == TSDB_ORDER_ASC)
H
Haojun Liao 已提交
4518
    && (!QUERY_IS_INTERVAL_QUERY(pQuery))
B
Bomin Zhang 已提交
4519
    && (!isGroupbyNormalCol(pQuery->pGroupbyExpr))
H
Haojun Liao 已提交
4520
    && (!isFixedOutputQuery(pRuntimeEnv))
B
Bomin Zhang 已提交
4521
  ) {
H
Haojun Liao 已提交
4522
    SArray* pa = GET_TABLEGROUP(pQInfo, 0);
4523 4524
    STableQueryInfo* pCheckInfo = taosArrayGetP(pa, 0);
    cond.twindow = pCheckInfo->win;
4525
  }
B
Bomin Zhang 已提交
4526

B
Bomin Zhang 已提交
4527
  terrno = TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
4528
  if (isFirstLastRowQuery(pQuery)) {
H
Haojun Liao 已提交
4529
    pRuntimeEnv->pQueryHandle = tsdbQueryLastRow(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef);
H
Haojun Liao 已提交
4530

H
Haojun Liao 已提交
4531 4532 4533
    // update the query time window
    pQuery->window = cond.twindow;

H
Haojun Liao 已提交
4534
    if (pQInfo->tableGroupInfo.numOfTables == 0) {
H
Haojun Liao 已提交
4535
      pQInfo->tableqinfoGroupInfo.numOfTables = 0;
H
Haojun Liao 已提交
4536 4537 4538 4539
    } else {
      size_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
      for(int32_t i = 0; i < numOfGroups; ++i) {
        SArray *group = GET_TABLEGROUP(pQInfo, i);
H
Haojun Liao 已提交
4540

H
Haojun Liao 已提交
4541 4542 4543
        size_t t = taosArrayGetSize(group);
        for (int32_t j = 0; j < t; ++j) {
          STableQueryInfo *pCheckInfo = taosArrayGetP(group, j);
H
Haojun Liao 已提交
4544

H
Haojun Liao 已提交
4545 4546 4547
          pCheckInfo->win = pQuery->window;
          pCheckInfo->lastKey = pCheckInfo->win.skey;
        }
H
Haojun Liao 已提交
4548 4549
      }
    }
4550
  } else if (isPointInterpoQuery(pQuery)) {
H
Haojun Liao 已提交
4551
    pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef);
H
Haojun Liao 已提交
4552
  } else {
H
Haojun Liao 已提交
4553
    pRuntimeEnv->pQueryHandle = tsdbQueryTables(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef);
H
Haojun Liao 已提交
4554
  }
4555

B
Bomin Zhang 已提交
4556
  return terrno;
B
Bomin Zhang 已提交
4557 4558
}

H
Haojun Liao 已提交
4559
static SFillColInfo* createFillColInfo(SQuery* pQuery) {
H
Haojun Liao 已提交
4560
  int32_t numOfCols = getNumOfFinalResCol(pQuery);
4561
  int32_t offset = 0;
4562

4563
  SFillColInfo* pFillCol = calloc(numOfCols, sizeof(SFillColInfo));
H
Haojun Liao 已提交
4564 4565 4566 4567
  if (pFillCol == NULL) {
    return NULL;
  }

H
Haojun Liao 已提交
4568
  // TODO refactor
4569
  for(int32_t i = 0; i < numOfCols; ++i) {
H
Haojun Liao 已提交
4570
    SExprInfo* pExprInfo = (pQuery->pExpr2 == NULL)? &pQuery->pExpr1[i]:&pQuery->pExpr2[i];
4571

4572
    pFillCol[i].col.bytes  = pExprInfo->bytes;
S
TD-1057  
Shengliang Guan 已提交
4573
    pFillCol[i].col.type   = (int8_t)pExprInfo->type;
4574
    pFillCol[i].col.offset = offset;
H
Haojun Liao 已提交
4575
    pFillCol[i].tagIndex   = -2;
4576 4577
    pFillCol[i].flag       = TSDB_COL_NORMAL;    // always be ta normal column for table query
    pFillCol[i].functionId = pExprInfo->base.functionId;
4578
    pFillCol[i].fillVal.i = pQuery->fillVal[i];
4579

4580 4581
    offset += pExprInfo->bytes;
  }
4582

4583 4584 4585
  return pFillCol;
}

4586
int32_t doInitQInfo(SQInfo *pQInfo, STSBuf *pTsBuf, void *tsdb, int32_t vgId, bool isSTableQuery) {
4587 4588 4589
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;

  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
4590 4591 4592

  pRuntimeEnv->topBotQuery = isTopBottomQuery(pQuery);
  pRuntimeEnv->hasTagResults = hasTagValOutput(pQuery);
4593 4594

  setScanLimitationByResultBuffer(pQuery);
H
Haojun Liao 已提交
4595

H
Haojun Liao 已提交
4596
  int32_t code = setupQueryHandle(tsdb, pQInfo, isSTableQuery);
B
Bomin Zhang 已提交
4597 4598 4599
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }
4600

4601
  pQInfo->tsdb = tsdb;
4602
  pQInfo->vgId = vgId;
4603 4604

  pRuntimeEnv->pQuery = pQuery;
H
Haojun Liao 已提交
4605
  pRuntimeEnv->pTSBuf = pTsBuf;
4606
  pRuntimeEnv->cur.vgroupIndex = -1;
4607
  pRuntimeEnv->stableQuery = isSTableQuery;
H
Haojun Liao 已提交
4608
  pRuntimeEnv->prevGroupId = INT32_MIN;
H
Haojun Liao 已提交
4609
  pRuntimeEnv->groupbyNormalCol = isGroupbyNormalCol(pQuery->pGroupbyExpr);
4610

H
Haojun Liao 已提交
4611
  if (pTsBuf != NULL) {
4612 4613 4614 4615 4616 4617 4618 4619 4620 4621
    int16_t order = (pQuery->order.order == pRuntimeEnv->pTSBuf->tsOrder) ? TSDB_ORDER_ASC : TSDB_ORDER_DESC;
    tsBufSetTraverseOrder(pRuntimeEnv->pTSBuf, order);
  }

  // create runtime environment
  code = setupQueryRuntimeEnv(pRuntimeEnv, pQuery->order.order);
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }

4622 4623 4624
  int32_t ps = DEFAULT_PAGE_SIZE;
  int32_t rowsize = 0;
  getIntermediateBufInfo(pRuntimeEnv, &ps, &rowsize);
H
Haojun Liao 已提交
4625
  int32_t TWOMB = 1024*1024*2;
4626

H
Haojun Liao 已提交
4627
  if (isSTableQuery && !onlyQueryTags(pRuntimeEnv->pQuery)) {
H
Haojun Liao 已提交
4628
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rowsize, ps, TWOMB, pQInfo);
4629 4630 4631 4632
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

H
Haojun Liao 已提交
4633
    if (!QUERY_IS_INTERVAL_QUERY(pQuery)) {
4634
      int16_t type = TSDB_DATA_TYPE_NULL;
4635
      int32_t threshold = 0;
4636

H
Haojun Liao 已提交
4637
      if (pRuntimeEnv->groupbyNormalCol) {  // group by columns not tags;
4638
        type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
4639
        threshold = 4000;
4640 4641
      } else {
        type = TSDB_DATA_TYPE_INT;  // group id
S
TD-1057  
Shengliang Guan 已提交
4642
        threshold = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
4643 4644 4645
        if (threshold < 8) {
          threshold = 8;
        }
4646 4647
      }

4648
      code = initWindowResInfo(&pRuntimeEnv->windowResInfo, 8, threshold, type);
B
Bomin Zhang 已提交
4649 4650 4651
      if (code != TSDB_CODE_SUCCESS) {
        return code;
      }
4652
    }
H
Haojun Liao 已提交
4653
  } else if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
4654 4655
    int32_t numOfResultRows = getInitialPageNum(pQInfo);
    getIntermediateBufInfo(pRuntimeEnv, &ps, &rowsize);
H
Haojun Liao 已提交
4656
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rowsize, ps, TWOMB, pQInfo);
4657 4658 4659 4660 4661
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

    int16_t type = TSDB_DATA_TYPE_NULL;
H
Haojun Liao 已提交
4662
    if (pRuntimeEnv->groupbyNormalCol) {
4663 4664 4665 4666 4667
      type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
    } else {
      type = TSDB_DATA_TYPE_TIMESTAMP;
    }

4668
    code = initWindowResInfo(&pRuntimeEnv->windowResInfo, numOfResultRows, 1024, type);
B
Bomin Zhang 已提交
4669 4670 4671
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }
4672 4673
  }

4674
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
H
Haojun Liao 已提交
4675
    SFillColInfo* pColInfo = createFillColInfo(pQuery);
H
Haojun Liao 已提交
4676 4677 4678 4679 4680 4681
    STimeWindow w = TSWINDOW_INITIALIZER;

    TSKEY sk = MIN(pQuery->window.skey, pQuery->window.ekey);
    TSKEY ek = MAX(pQuery->window.skey, pQuery->window.ekey);
    getAlignQueryTimeWindow(pQuery, pQuery->window.skey, sk, ek, &w);

H
Haojun Liao 已提交
4682
    int32_t numOfCols = getNumOfFinalResCol(pQuery);
H
Haojun Liao 已提交
4683
    pRuntimeEnv->pFillInfo = taosInitFillInfo(pQuery->order.order, w.skey, 0, (int32_t)pQuery->rec.capacity, numOfCols,
4684
                                              pQuery->interval.sliding, pQuery->interval.slidingUnit, (int8_t)pQuery->precision,
H
Haojun Liao 已提交
4685
                                              pQuery->fillType, pColInfo, pQInfo);
4686
  }
4687

H
Haojun Liao 已提交
4688
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
4689
  return TSDB_CODE_SUCCESS;
4690 4691
}

4692
static void enableExecutionForNextTable(SQueryRuntimeEnv *pRuntimeEnv) {
4693
  SQuery *pQuery = pRuntimeEnv->pQuery;
4694

4695
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
4696
    SResultRowCellInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[i]);
4697 4698 4699 4700 4701 4702
    if (pResInfo != NULL) {
      pResInfo->complete = false;
    }
  }
}

H
Haojun Liao 已提交
4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719
static FORCE_INLINE void setEnvForEachBlock(SQInfo* pQInfo, STableQueryInfo* pTableQueryInfo, SDataBlockInfo* pBlockInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

  if (!QUERY_IS_INTERVAL_QUERY(pQuery)) {
    setExecutionContext(pQInfo, pTableQueryInfo->groupIndex, pBlockInfo->window.ekey + step);
  } else {  // interval query
    TSKEY nextKey = pBlockInfo->window.skey;
    setIntervalQueryRange(pQInfo, nextKey);

    if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
      setAdditionalInfo(pQInfo, pTableQueryInfo->pTable, pTableQueryInfo);
    }
  }
}

H
Haojun Liao 已提交
4720
static int64_t scanMultiTableDataBlocks(SQInfo *pQInfo) {
4721
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
Haojun Liao 已提交
4722 4723
  SQuery*           pQuery = pRuntimeEnv->pQuery;
  SQueryCostInfo*   summary  = &pRuntimeEnv->summary;
H
Haojun Liao 已提交
4724

H
hjxilinx 已提交
4725
  int64_t st = taosGetTimestampMs();
4726

4727
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
H
Haojun Liao 已提交
4728
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4729

H
Haojun Liao 已提交
4730 4731
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

H
Haojun Liao 已提交
4732
  while (tsdbNextDataBlock(pQueryHandle)) {
4733
    summary->totalBlocks += 1;
H
Haojun Liao 已提交
4734

H
Haojun Liao 已提交
4735
    if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
4736
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4737
    }
4738

H
Haojun Liao 已提交
4739
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
H
Haojun Liao 已提交
4740 4741 4742 4743
    STableQueryInfo **pTableQueryInfo = (STableQueryInfo**) taosHashGet(pQInfo->tableqinfoGroupInfo.map, &blockInfo.tid, sizeof(blockInfo.tid));
    if(pTableQueryInfo == NULL) {
      break;
    }
4744

H
Haojun Liao 已提交
4745
    pQuery->current = *pTableQueryInfo;
H
Haojun Liao 已提交
4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      assert(
          ((*pTableQueryInfo)->win.skey <= (*pTableQueryInfo)->win.ekey) &&
          ((*pTableQueryInfo)->lastKey >= (*pTableQueryInfo)->win.skey) &&
          ((*pTableQueryInfo)->win.skey >= pQuery->window.skey && (*pTableQueryInfo)->win.ekey <= pQuery->window.ekey));
    } else {
      assert(
          ((*pTableQueryInfo)->win.skey >= (*pTableQueryInfo)->win.ekey) &&
          ((*pTableQueryInfo)->lastKey <= (*pTableQueryInfo)->win.skey) &&
          ((*pTableQueryInfo)->win.skey <= pQuery->window.skey && (*pTableQueryInfo)->win.ekey >= pQuery->window.ekey));
    }
4757

H
Haojun Liao 已提交
4758
    if (!pRuntimeEnv->groupbyNormalCol) {
H
Haojun Liao 已提交
4759
      setEnvForEachBlock(pQInfo, *pTableQueryInfo, &blockInfo);
4760
    }
4761

4762
    uint32_t     status = 0;
H
Haojun Liao 已提交
4763 4764
    SDataStatis *pStatis = NULL;
    SArray      *pDataBlock = NULL;
4765

H
Haojun Liao 已提交
4766
    int32_t ret = loadDataBlockOnDemand(pRuntimeEnv, &pQuery->current->windowResInfo, pQueryHandle, &blockInfo, &pStatis, &pDataBlock, &status);
4767 4768 4769 4770 4771
    if (ret != TSDB_CODE_SUCCESS) {
      break;
    }

    if (status == BLK_DATA_DISCARD) {
H
Haojun Liao 已提交
4772
      pQuery->current->lastKey = QUERY_IS_ASC_QUERY(pQuery)? blockInfo.window.ekey + step : blockInfo.window.skey + step;
H
Haojun Liao 已提交
4773 4774 4775
      continue;
    }

4776 4777
    summary->totalRows += blockInfo.rows;
    stableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, pDataBlock, binarySearchForKey);
H
Haojun Liao 已提交
4778

H
Haojun Liao 已提交
4779 4780 4781 4782
    qDebug("QInfo:%p check data block completed, uid:%"PRId64", tid:%d, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, "
           "lastKey:%" PRId64,
           pQInfo, blockInfo.uid, blockInfo.tid, blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows,
           pQuery->current->lastKey);
4783
  }
4784

H
Haojun Liao 已提交
4785 4786 4787 4788
  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }

H
Haojun Liao 已提交
4789 4790
  updateWindowResNumOfRes(pRuntimeEnv);

H
hjxilinx 已提交
4791 4792
  int64_t et = taosGetTimestampMs();
  return et - st;
4793 4794
}

4795 4796
static bool multiTableMultioutputHelper(SQInfo *pQInfo, int32_t index) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4797
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4798

4799
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
H
Haojun Liao 已提交
4800
  SArray *group = GET_TABLEGROUP(pQInfo, 0);
4801
  STableQueryInfo* pCheckInfo = taosArrayGetP(group, index);
4802

H
Haojun Liao 已提交
4803 4804 4805
  if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
    setTagVal(pRuntimeEnv, pCheckInfo->pTable, pQInfo->tsdb);
  }
4806

H
Haojun Liao 已提交
4807
  STableId* id = TSDB_TABLEID(pCheckInfo->pTable);
4808
  qDebug("QInfo:%p query on (%d): uid:%" PRIu64 ", tid:%d, qrange:%" PRId64 "-%" PRId64, pQInfo, index,
H
Haojun Liao 已提交
4809
         id->uid, id->tid, pCheckInfo->lastKey, pCheckInfo->win.ekey);
4810

4811
  STsdbQueryCond cond = {
4812
      .twindow   = {pCheckInfo->lastKey, pCheckInfo->win.ekey},
H
hjxilinx 已提交
4813 4814
      .order     = pQuery->order.order,
      .colList   = pQuery->colList,
4815
      .numOfCols = pQuery->numOfCols,
4816
  };
4817

H
hjxilinx 已提交
4818
  // todo refactor
4819
  SArray *g1 = taosArrayInit(1, POINTER_BYTES);
H
Haojun Liao 已提交
4820 4821 4822 4823
  SArray *tx = taosArrayInit(1, sizeof(STableKeyInfo));

  STableKeyInfo info = {.pTable = pCheckInfo->pTable, .lastKey = pCheckInfo->lastKey};
  taosArrayPush(tx, &info);
4824

4825
  taosArrayPush(g1, &tx);
4826
  STableGroupInfo gp = {.numOfTables = 1, .pGroupList = g1};
4827

4828
  // include only current table
4829 4830 4831 4832
  if (pRuntimeEnv->pQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
    pRuntimeEnv->pQueryHandle = NULL;
  }
4833

H
Haojun Liao 已提交
4834
  pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo, &pQInfo->memRef);
4835 4836
  taosArrayDestroy(tx);
  taosArrayDestroy(g1);
B
Bomin Zhang 已提交
4837 4838 4839
  if (pRuntimeEnv->pQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
4840

4841
  if (pRuntimeEnv->pTSBuf != NULL) {
H
Haojun Liao 已提交
4842 4843
      tVariant* pTag = &pRuntimeEnv->pCtx[0].tag;

4844
    if (pRuntimeEnv->cur.vgroupIndex == -1) {
H
Haojun Liao 已提交
4845
      STSElem elem = tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, pQInfo->vgId, pTag);
H
Haojun Liao 已提交
4846
      // failed to find data with the specified tag value and vnodeId
4847
      if (!tsBufIsValidElem(&elem)) {
H
Haojun Liao 已提交
4848 4849 4850 4851 4852 4853
        if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
          qError("QInfo:%p failed to find tag:%s in ts_comp", pQInfo, pTag->pz);
        } else {
          qError("QInfo:%p failed to find tag:%"PRId64" in ts_comp", pQInfo, pTag->i64Key);
        }

4854
        return false;
H
Haojun Liao 已提交
4855 4856
      } else {
        STSCursor cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);
H
Haojun Liao 已提交
4857 4858 4859 4860 4861 4862 4863 4864

        if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
          qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz,
                 cur.blockIndex, cur.tsIndex);
        } else {
          qDebug("QInfo:%p find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key,
                 cur.blockIndex, cur.tsIndex);
        }
4865 4866
      }
    } else {
H
Haojun Liao 已提交
4867
      STSElem elem = tsBufGetElem(pRuntimeEnv->pTSBuf);
H
Haojun Liao 已提交
4868
      if (tVariantCompare(elem.tag, &pRuntimeEnv->pCtx[0].tag) != 0) {
H
Haojun Liao 已提交
4869

H
Haojun Liao 已提交
4870
        STSElem elem1 = tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, pQInfo->vgId, pTag);
H
Haojun Liao 已提交
4871
        // failed to find data with the specified tag value and vnodeId
4872
        if (!tsBufIsValidElem(&elem1)) {
H
Haojun Liao 已提交
4873 4874 4875 4876 4877
          if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
            qError("QInfo:%p failed to find tag:%s in ts_comp", pQInfo, pTag->pz);
          } else {
            qError("QInfo:%p failed to find tag:%"PRId64" in ts_comp", pQInfo, pTag->i64Key);
          }
H
Haojun Liao 已提交
4878

H
Haojun Liao 已提交
4879
          return false;
H
Haojun Liao 已提交
4880 4881
        } else {
          STSCursor cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);
H
Haojun Liao 已提交
4882 4883 4884 4885 4886
          if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
            qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, cur.blockIndex, cur.tsIndex);
          } else {
            qDebug("QInfo:%p find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key, cur.blockIndex, cur.tsIndex);
          }
H
Haojun Liao 已提交
4887
        }
H
Haojun Liao 已提交
4888

H
Haojun Liao 已提交
4889 4890
      } else {
        tsBufSetCursor(pRuntimeEnv->pTSBuf, &pRuntimeEnv->cur);
H
Haojun Liao 已提交
4891
        STSCursor cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);
H
Haojun Liao 已提交
4892 4893 4894 4895 4896
        if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
          qDebug("QInfo:%p continue scan ts_comp file, tag:%s blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, cur.blockIndex, cur.tsIndex);
        } else {
          qDebug("QInfo:%p continue scan ts_comp file, tag:%"PRId64" blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key, cur.blockIndex, cur.tsIndex);
        }
H
Haojun Liao 已提交
4897
      }
4898 4899
    }
  }
4900

4901
  initCtxOutputBuf(pRuntimeEnv);
4902 4903 4904 4905 4906 4907 4908 4909 4910 4911
  return true;
}

/**
 * super table query handler
 * 1. super table projection query, group-by on normal columns query, ts-comp query
 * 2. point interpolation query, last row query
 *
 * @param pQInfo
 */
4912
static void sequentialTableProcess(SQInfo *pQInfo) {
4913
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4914
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4915
  setQueryStatus(pQuery, QUERY_COMPLETED);
4916

H
Haojun Liao 已提交
4917
  size_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
4918

4919
  if (isPointInterpoQuery(pQuery)) {
4920 4921
    resetCtxOutputBuf(pRuntimeEnv);
    assert(pQuery->limit.offset == 0 && pQuery->limit.limit != 0);
4922

4923
    while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
4924
      SArray* group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex);
4925

S
TD-1057  
Shengliang Guan 已提交
4926
      qDebug("QInfo:%p last_row query on group:%d, total group:%" PRIzu ", current group:%p", pQInfo, pQInfo->groupIndex,
dengyihao's avatar
dengyihao 已提交
4927
             numOfGroups, group);
H
Haojun Liao 已提交
4928 4929 4930 4931 4932 4933 4934

      STsdbQueryCond cond = {
          .colList = pQuery->colList,
          .order   = pQuery->order.order,
          .numOfCols = pQuery->numOfCols,
      };

S
TD-1057  
Shengliang Guan 已提交
4935 4936
      TIME_WINDOW_COPY(cond.twindow, pQuery->window);

H
Haojun Liao 已提交
4937 4938 4939
      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);
4940

H
Haojun Liao 已提交
4941 4942 4943 4944 4945 4946 4947
      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }
4948

H
Haojun Liao 已提交
4949
      pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(pQInfo->tsdb, &cond, &gp, pQInfo, &pQInfo->memRef);
B
Bomin Zhang 已提交
4950 4951 4952 4953 4954 4955

      taosArrayDestroy(tx);
      taosArrayDestroy(g1);
      if (pRuntimeEnv->pQueryHandle == NULL) {
        longjmp(pRuntimeEnv->env, terrno);
      }
H
Haojun Liao 已提交
4956

H
Haojun Liao 已提交
4957
      initCtxOutputBuf(pRuntimeEnv);
4958

4959
      SArray* s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle);
4960
      assert(taosArrayGetSize(s) >= 1);
4961

4962
      setTagVal(pRuntimeEnv, taosArrayGetP(s, 0), pQInfo->tsdb);
dengyihao's avatar
dengyihao 已提交
4963
      taosArrayDestroy(s);
H
Haojun Liao 已提交
4964

H
Haojun Liao 已提交
4965
      // here we simply set the first table as current table
4966 4967 4968
      SArray* first = GET_TABLEGROUP(pQInfo, pQInfo->groupIndex);
      pQuery->current = taosArrayGetP(first, 0);

4969
      scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
4970

H
Haojun Liao 已提交
4971 4972 4973 4974 4975
      int64_t numOfRes = getNumOfResult(pRuntimeEnv);
      if (numOfRes > 0) {
        pQuery->rec.rows += numOfRes;
        forwardCtxOutputBuf(pRuntimeEnv, numOfRes);
      }
4976

H
Haojun Liao 已提交
4977 4978 4979 4980 4981
      skipResults(pRuntimeEnv);
      pQInfo->groupIndex += 1;

      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
4982 4983 4984 4985 4986 4987

      if (pQuery->rec.rows >= pQuery->rec.capacity) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
        break;
      }
    }
H
Haojun Liao 已提交
4988
  } else if (pRuntimeEnv->groupbyNormalCol) { // group-by on normal columns query
4989
    while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
4990
      SArray* group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex);
4991

S
TD-1057  
Shengliang Guan 已提交
4992
      qDebug("QInfo:%p group by normal columns group:%d, total group:%" PRIzu "", pQInfo, pQInfo->groupIndex, numOfGroups);
4993 4994 4995 4996 4997 4998 4999

      STsdbQueryCond cond = {
          .colList = pQuery->colList,
          .order   = pQuery->order.order,
          .numOfCols = pQuery->numOfCols,
      };

S
TD-1057  
Shengliang Guan 已提交
5000 5001
      TIME_WINDOW_COPY(cond.twindow, pQuery->window);

5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013
      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);

      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }

H
Haojun Liao 已提交
5014
      // no need to update the lastkey for each table
H
Haojun Liao 已提交
5015
      pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo, &pQInfo->memRef);
H
Haojun Liao 已提交
5016

B
Bomin Zhang 已提交
5017 5018
      taosArrayDestroy(g1);
      taosArrayDestroy(tx);
B
Bomin Zhang 已提交
5019 5020 5021
      if (pRuntimeEnv->pQueryHandle == NULL) {
        longjmp(pRuntimeEnv->env, terrno);
      }
5022

5023
      SArray* s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle);
5024 5025
      assert(taosArrayGetSize(s) >= 1);

5026
      setTagVal(pRuntimeEnv, taosArrayGetP(s, 0), pQInfo->tsdb);
5027 5028 5029 5030 5031 5032 5033 5034

      // here we simply set the first table as current table
      scanMultiTableDataBlocks(pQInfo);
      pQInfo->groupIndex += 1;

      SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;

        // no results generated for current group, continue to try the next group
5035
      taosArrayDestroy(s);
5036 5037 5038 5039 5040
      if (pWindowResInfo->size <= 0) {
        continue;
      }

      for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
5041
        pWindowResInfo->pResult[i]->closed = true; // enable return all results for group by normal columns
5042

H
Haojun Liao 已提交
5043
        SResultRow *pResult = pWindowResInfo->pResult[i];
5044
        for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
5045 5046
          SResultRowCellInfo* pCell = getResultCell(pRuntimeEnv, pResult, j);
          pResult->numOfRows = (uint16_t)(MAX(pResult->numOfRows, pCell->numOfRes));
5047 5048 5049
        }
      }

5050
      qDebug("QInfo:%p generated groupby columns results %d rows for group %d completed", pQInfo, pWindowResInfo->size,
5051 5052 5053 5054 5055 5056 5057
          pQInfo->groupIndex);
      int32_t currentGroupIndex = pQInfo->groupIndex;

      pQuery->rec.rows = 0;
      pQInfo->groupIndex = 0;

      ensureOutputBufferSimple(pRuntimeEnv, pWindowResInfo->size);
5058
      copyFromWindowResToSData(pQInfo, pWindowResInfo);
5059 5060 5061 5062 5063 5064

      pQInfo->groupIndex = currentGroupIndex;  //restore the group index
      assert(pQuery->rec.rows == pWindowResInfo->size);

      clearClosedTimeWindow(pRuntimeEnv);
      break;
5065 5066 5067
    }
  } else {
    /*
5068
     * 1. super table projection query, 2. ts-comp query
5069 5070 5071
     * if the subgroup index is larger than 0, results generated by group by tbname,k is existed.
     * we need to return it to client in the first place.
     */
5072
    if (pQInfo->groupIndex > 0) {
5073
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5074
      pQuery->rec.total += pQuery->rec.rows;
5075

5076
      if (pQuery->rec.rows > 0) {
5077 5078 5079
        return;
      }
    }
5080

5081
    // all data have returned already
5082
    if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
5083 5084
      return;
    }
5085

5086 5087
    resetCtxOutputBuf(pRuntimeEnv);
    resetTimeWindowInfo(pRuntimeEnv, &pRuntimeEnv->windowResInfo);
5088

H
Haojun Liao 已提交
5089
    SArray *group = GET_TABLEGROUP(pQInfo, 0);
5090 5091
    assert(taosArrayGetSize(group) == pQInfo->tableqinfoGroupInfo.numOfTables &&
           1 == taosArrayGetSize(pQInfo->tableqinfoGroupInfo.pGroupList));
5092

5093
    while (pQInfo->tableIndex < pQInfo->tableqinfoGroupInfo.numOfTables) {
H
Haojun Liao 已提交
5094
      if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
5095
        longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
5096
      }
5097

5098
      pQuery->current = taosArrayGetP(group, pQInfo->tableIndex);
5099
      if (!multiTableMultioutputHelper(pQInfo, pQInfo->tableIndex)) {
5100
        pQInfo->tableIndex++;
5101 5102
        continue;
      }
5103

H
hjxilinx 已提交
5104
      // TODO handle the limit offset problem
5105
      if (pQuery->numOfFilterCols == 0 && pQuery->limit.offset > 0) {
5106 5107
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
          pQInfo->tableIndex++;
5108 5109 5110
          continue;
        }
      }
5111

5112
      scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
5113
      skipResults(pRuntimeEnv);
5114

5115
      // the limitation of output result is reached, set the query completed
5116
      if (limitResults(pRuntimeEnv)) {
H
Haojun Liao 已提交
5117
        SET_STABLE_QUERY_OVER(pQInfo);
5118 5119
        break;
      }
5120

5121 5122
      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
5123

5124
      if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
5125 5126 5127 5128 5129 5130
        /*
         * query range is identical in terms of all meters involved in query,
         * so we need to restore them at the *beginning* of query on each meter,
         * not the consecutive query on meter on which is aborted due to buffer limitation
         * to ensure that, we can reset the query range once query on a meter is completed.
         */
5131
        pQInfo->tableIndex++;
weixin_48148422's avatar
weixin_48148422 已提交
5132

H
Haojun Liao 已提交
5133
        STableIdInfo tidInfo = {0};
5134

H
Haojun Liao 已提交
5135 5136 5137
        STableId* id = TSDB_TABLEID(pQuery->current->pTable);
        tidInfo.uid = id->uid;
        tidInfo.tid = id->tid;
weixin_48148422's avatar
weixin_48148422 已提交
5138
        tidInfo.key = pQuery->current->lastKey;
weixin_48148422's avatar
weixin_48148422 已提交
5139 5140
        taosArrayPush(pQInfo->arrTableIdInfo, &tidInfo);

5141
        // if the buffer is full or group by each table, we need to jump out of the loop
H
Haojun Liao 已提交
5142
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
5143 5144
          break;
        }
5145

H
Haojun Liao 已提交
5146 5147 5148 5149
        if (pRuntimeEnv->pTSBuf != NULL) {
          pRuntimeEnv->cur = pRuntimeEnv->pTSBuf->cur;
        }

5150
      } else {
5151
        // all data in the result buffer are skipped due to the offset, continue to retrieve data from current meter
5152 5153
        if (pQuery->rec.rows == 0) {
          assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
5154 5155
          continue;
        } else {
5156 5157 5158
          // buffer is full, wait for the next round to retrieve data from current meter
          assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
          break;
5159 5160 5161
        }
      }
    }
H
Haojun Liao 已提交
5162

5163
    if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
H
Haojun Liao 已提交
5164 5165
      setQueryStatus(pQuery, QUERY_COMPLETED);
    }
5166
  }
5167

5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179
  /*
   * 1. super table projection query, group-by on normal columns query, ts-comp query
   * 2. point interpolation query, last row query
   *
   * group-by on normal columns query and last_row query do NOT invoke the finalizer here,
   * since the finalize stage will be done at the client side.
   *
   * projection query, point interpolation query do not need the finalizer.
   *
   * Only the ts-comp query requires the finalizer function to be executed here.
   */
  if (isTSCompQuery(pQuery)) {
H
hjxilinx 已提交
5180
    finalizeQueryResult(pRuntimeEnv);
5181
  }
5182

5183 5184 5185
  if (pRuntimeEnv->pTSBuf != NULL) {
    pRuntimeEnv->cur = pRuntimeEnv->pTSBuf->cur;
  }
5186

5187
  qDebug(
S
TD-1530  
Shengliang Guan 已提交
5188 5189
      "QInfo %p numOfTables:%" PRIu64 ", index:%d, numOfGroups:%" PRIzu ", %" PRId64 " points returned, total:%" PRId64 ", offset:%" PRId64,
      pQInfo, (uint64_t)pQInfo->tableqinfoGroupInfo.numOfTables, pQInfo->tableIndex, numOfGroups, pQuery->rec.rows, pQuery->rec.total,
5190
      pQuery->limit.offset);
5191 5192
}

5193 5194 5195 5196
static void doSaveContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

5197 5198 5199
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
  SWITCH_ORDER(pQuery->order.order);
5200

5201
  if (pRuntimeEnv->pTSBuf != NULL) {
H
Haojun Liao 已提交
5202
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
5203
  }
5204

5205 5206 5207 5208 5209
  STsdbQueryCond cond = {
      .order   = pQuery->order.order,
      .colList = pQuery->colList,
      .numOfCols = pQuery->numOfCols,
  };
H
Haojun Liao 已提交
5210

S
TD-1057  
Shengliang Guan 已提交
5211 5212
  TIME_WINDOW_COPY(cond.twindow, pQuery->window);

5213 5214 5215 5216
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
H
Haojun Liao 已提交
5217

H
Haojun Liao 已提交
5218 5219 5220 5221 5222
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
  disableFuncInReverseScan(pQInfo);
  setupQueryRangeForReverseScan(pQInfo);

H
Haojun Liao 已提交
5223
  pRuntimeEnv->prevGroupId = INT32_MIN;
H
Haojun Liao 已提交
5224
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef);
B
Bomin Zhang 已提交
5225 5226 5227
  if (pRuntimeEnv->pSecQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
H
hjxilinx 已提交
5228 5229
}

5230 5231 5232 5233
static void doRestoreContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

H
hjxilinx 已提交
5234
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
5235

5236
  if (pRuntimeEnv->pTSBuf != NULL) {
5237
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
5238
  }
5239

5240
  switchCtxOrder(pRuntimeEnv);
5241 5242 5243
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
}

5244 5245 5246
static void doCloseAllTimeWindowAfterScan(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
Haojun Liao 已提交
5247
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
H
Haojun Liao 已提交
5248
    size_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
5249
    for (int32_t i = 0; i < numOfGroup; ++i) {
H
Haojun Liao 已提交
5250
      SArray *group = GET_TABLEGROUP(pQInfo, i);
5251

5252
      size_t num = taosArrayGetSize(group);
5253
      for (int32_t j = 0; j < num; ++j) {
5254 5255
        STableQueryInfo* item = taosArrayGetP(group, j);
        closeAllTimeWindow(&item->windowResInfo);
5256
      }
H
hjxilinx 已提交
5257 5258 5259 5260 5261 5262 5263
    }
  } else {  // close results for group result
    closeAllTimeWindow(&pQInfo->runtimeEnv.windowResInfo);
  }
}

static void multiTableQueryProcess(SQInfo *pQInfo) {
5264 5265 5266
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

5267
  if (pQInfo->groupIndex > 0) {
5268
    /*
5269
     * if the groupIndex > 0, the query process must be completed yet, we only need to
5270 5271
     * copy the data into output buffer
     */
H
Haojun Liao 已提交
5272
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
5273 5274
      copyResToQueryResultBuf(pQInfo, pQuery);
#ifdef _DEBUG_VIEW
5275
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
5276 5277
#endif
    } else {
5278
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5279
    }
5280

5281
    qDebug("QInfo:%p current:%"PRId64", total:%"PRId64"", pQInfo, pQuery->rec.rows, pQuery->rec.total);
5282 5283
    return;
  }
5284

5285
  qDebug("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", order:%d, forward scan start", pQInfo,
5286 5287
         pQuery->window.skey, pQuery->window.ekey, pQuery->order.order);

H
hjxilinx 已提交
5288
  // do check all qualified data blocks
H
Haojun Liao 已提交
5289
  int64_t el = scanMultiTableDataBlocks(pQInfo);
5290
  qDebug("QInfo:%p master scan completed, elapsed time: %" PRId64 "ms, reverse scan start", pQInfo, el);
5291

H
hjxilinx 已提交
5292
  // query error occurred or query is killed, abort current execution
H
Haojun Liao 已提交
5293
  if (pQInfo->code != TSDB_CODE_SUCCESS || IS_QUERY_KILLED(pQInfo)) {
5294
    qDebug("QInfo:%p query killed or error occurred, code:%s, abort", pQInfo, tstrerror(pQInfo->code));
H
Haojun Liao 已提交
5295
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
5296
  }
5297

H
hjxilinx 已提交
5298 5299
  // close all time window results
  doCloseAllTimeWindowAfterScan(pQInfo);
5300

H
hjxilinx 已提交
5301 5302
  if (needReverseScan(pQuery)) {
    doSaveContext(pQInfo);
5303

H
Haojun Liao 已提交
5304
    el = scanMultiTableDataBlocks(pQInfo);
5305
    qDebug("QInfo:%p reversed scan completed, elapsed time: %" PRId64 "ms", pQInfo, el);
5306

H
Haojun Liao 已提交
5307
    doRestoreContext(pQInfo);
H
hjxilinx 已提交
5308
  } else {
5309
    qDebug("QInfo:%p no need to do reversed scan, query completed", pQInfo);
5310
  }
5311

5312
  setQueryStatus(pQuery, QUERY_COMPLETED);
5313

H
Haojun Liao 已提交
5314
  if (pQInfo->code != TSDB_CODE_SUCCESS || IS_QUERY_KILLED(pQInfo)) {
5315
    qDebug("QInfo:%p query killed or error occurred, code:%s, abort", pQInfo, tstrerror(pQInfo->code));
H
Haojun Liao 已提交
5316
    //TODO finalizeQueryResult may cause SEGSEV, since the memory may not allocated yet, add a cleanup function instead
H
Haojun Liao 已提交
5317
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
H
hjxilinx 已提交
5318
  }
5319

H
Haojun Liao 已提交
5320
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || isSumAvgRateQuery(pQuery)) {
5321
    if (mergeIntoGroupResult(pQInfo) == TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
5322
      copyResToQueryResultBuf(pQInfo, pQuery);
5323 5324

#ifdef _DEBUG_VIEW
5325
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
5326 5327 5328
#endif
    }
  } else {  // not a interval query
5329
    copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5330
  }
5331

5332
  // handle the limitation of output buffer
5333
  qDebug("QInfo:%p points returned:%" PRId64 ", total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
5334 5335
}

H
Haojun Liao 已提交
5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353

static char *getArithemicInputSrc(void *param, const char *name, int32_t colId) {
  SArithmeticSupport *pSupport = (SArithmeticSupport *) param;
  SExprInfo* pExprInfo = (SExprInfo*) pSupport->exprList;

  int32_t index = -1;
  for (int32_t i = 0; i < pSupport->numOfCols; ++i) {
    if (colId == pExprInfo[i].base.resColId) {
      index = i;
      break;
    }
  }

  assert(index >= 0 && index < pSupport->numOfCols);
  return pSupport->data[index] + pSupport->offset * pExprInfo[index].bytes;
}

static void doSecondaryArithmeticProcess(SQuery* pQuery) {
H
Haojun Liao 已提交
5354 5355 5356
  if (pQuery->numOfExpr2 == 0) {
    return;
  }
H
Haojun Liao 已提交
5357

H
Haojun Liao 已提交
5358
  SArithmeticSupport arithSup = {0};
H
Haojun Liao 已提交
5359 5360 5361 5362 5363 5364
  tFilePage **data = calloc(pQuery->numOfExpr2, POINTER_BYTES);
  for (int32_t i = 0; i < pQuery->numOfExpr2; ++i) {
    int32_t bytes = pQuery->pExpr2[i].bytes;
    data[i] = (tFilePage *)malloc(bytes * pQuery->rec.rows + sizeof(tFilePage));
  }

H
Haojun Liao 已提交
5365 5366 5367 5368
  arithSup.offset = 0;
  arithSup.numOfCols = (int32_t)pQuery->numOfOutput;
  arithSup.exprList  = pQuery->pExpr1;
  arithSup.data      = calloc(arithSup.numOfCols, POINTER_BYTES);
H
Haojun Liao 已提交
5369

H
Haojun Liao 已提交
5370 5371
  for (int32_t k = 0; k < arithSup.numOfCols; ++k) {
    arithSup.data[k] = pQuery->sdata[k]->data;
H
Haojun Liao 已提交
5372 5373 5374 5375 5376 5377 5378 5379 5380 5381
  }

  for (int i = 0; i < pQuery->numOfExpr2; ++i) {
    SExprInfo *pExpr = &pQuery->pExpr2[i];

    // calculate the result from several other columns
    SSqlFuncMsg* pSqlFunc = &pExpr->base;
    if (pSqlFunc->functionId != TSDB_FUNC_ARITHM) {

      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
5382 5383 5384
        if (pSqlFunc->functionId == pQuery->pExpr1[j].base.functionId &&
            pSqlFunc->colInfo.colId == pQuery->pExpr1[j].base.colInfo.colId) {
          memcpy(data[i]->data, pQuery->sdata[j]->data, pQuery->pExpr1[j].bytes * pQuery->rec.rows);
H
Haojun Liao 已提交
5385 5386 5387 5388
          break;
        }
      }
    } else {
H
Haojun Liao 已提交
5389 5390
      arithSup.pArithExpr = pExpr;
      tExprTreeCalcTraverse(arithSup.pArithExpr->pExpr, (int32_t)pQuery->rec.rows, data[i]->data, &arithSup, TSDB_ORDER_ASC,
H
Haojun Liao 已提交
5391 5392 5393 5394 5395 5396 5397 5398
                            getArithemicInputSrc);
    }
  }

  for (int32_t i = 0; i < pQuery->numOfExpr2; ++i) {
    memcpy(pQuery->sdata[i]->data, data[i]->data, pQuery->pExpr2[i].bytes * pQuery->rec.rows);
  }

H
Haojun Liao 已提交
5399 5400 5401 5402 5403 5404
  for (int32_t i = 0; i < pQuery->numOfExpr2; ++i) {
    tfree(data[i]);
  }

  tfree(data);
  tfree(arithSup.data);
H
Haojun Liao 已提交
5405 5406
}

5407 5408 5409 5410 5411 5412
/*
 * in each query, this function will be called only once, no retry for further result.
 *
 * select count(*)/top(field,k)/avg(field name) from table_name [where ts>now-1a];
 * select count(*) from table_name group by status_column;
 */
H
hjxilinx 已提交
5413
static void tableFixedOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
5414
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
5415

H
hjxilinx 已提交
5416
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
5417
  if (!pRuntimeEnv->topBotQuery && pQuery->limit.offset > 0) {  // no need to execute, since the output will be ignore.
H
Haojun Liao 已提交
5418 5419
    return;
  }
5420

H
hjxilinx 已提交
5421
  pQuery->current = pTableInfo;  // set current query table info
5422

5423
  scanOneTableDataBlocks(pRuntimeEnv, pTableInfo->lastKey);
H
hjxilinx 已提交
5424
  finalizeQueryResult(pRuntimeEnv);
5425

H
Haojun Liao 已提交
5426 5427 5428 5429
  // since the numOfRows must be identical for all sql functions that are allowed to be executed simutaneously.
  pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
  doSecondaryArithmeticProcess(pQuery);

H
Haojun Liao 已提交
5430
  if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
5431
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
5432
  }
5433

5434
  skipResults(pRuntimeEnv);
5435
  limitResults(pRuntimeEnv);
5436 5437
}

H
hjxilinx 已提交
5438
static void tableMultiOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
5439
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
5440

H
hjxilinx 已提交
5441 5442
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
5443

5444 5445 5446 5447
  // for ts_comp query, re-initialized is not allowed
  if (!isTSCompQuery(pQuery)) {
    resetCtxOutputBuf(pRuntimeEnv);
  }
5448

5449 5450 5451 5452 5453 5454
  // skip blocks without load the actual data block from file if no filter condition present
  skipBlocks(&pQInfo->runtimeEnv);
  if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return;
  }
5455 5456

  while (1) {
5457
    scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
H
hjxilinx 已提交
5458
    finalizeQueryResult(pRuntimeEnv);
5459

5460 5461
    pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
    if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols > 0 && pQuery->rec.rows > 0) {
5462
      skipResults(pRuntimeEnv);
5463 5464 5465
    }

    /*
H
hjxilinx 已提交
5466 5467
     * 1. if pQuery->size == 0, pQuery->limit.offset >= 0, still need to check data
     * 2. if pQuery->size > 0, pQuery->limit.offset must be 0
5468
     */
5469
    if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
5470 5471 5472
      break;
    }

5473
    qDebug("QInfo:%p skip current result, offset:%" PRId64 ", next qrange:%" PRId64 "-%" PRId64,
B
Bomin Zhang 已提交
5474
           pQInfo, pQuery->limit.offset, pQuery->current->lastKey, pQuery->current->win.ekey);
5475 5476 5477 5478

    resetCtxOutputBuf(pRuntimeEnv);
  }

5479
  limitResults(pRuntimeEnv);
5480
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
5481
    qDebug("QInfo:%p query paused due to output limitation, next qrange:%" PRId64 "-%" PRId64, pQInfo,
H
hjxilinx 已提交
5482
        pQuery->current->lastKey, pQuery->window.ekey);
weixin_48148422's avatar
weixin_48148422 已提交
5483 5484
  } else if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
    STableIdInfo tidInfo;
B
Bomin Zhang 已提交
5485
    STableId* id = TSDB_TABLEID(pQuery->current->pTable);
5486

H
Haojun Liao 已提交
5487 5488
    tidInfo.uid = id->uid;
    tidInfo.tid = id->tid;
weixin_48148422's avatar
weixin_48148422 已提交
5489 5490
    tidInfo.key = pQuery->current->lastKey;
    taosArrayPush(pQInfo->arrTableIdInfo, &tidInfo);
5491 5492
  }

5493 5494 5495
  if (!isTSCompQuery(pQuery)) {
    assert(pQuery->rec.rows <= pQuery->rec.capacity);
  }
5496 5497
}

H
Haojun Liao 已提交
5498
static void tableIntervalProcessImpl(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
5499
  SQuery *pQuery = pRuntimeEnv->pQuery;
5500

5501
  while (1) {
5502
    scanOneTableDataBlocks(pRuntimeEnv, start);
5503

5504
    assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_NOT_COMPLETED));
H
hjxilinx 已提交
5505
    finalizeQueryResult(pRuntimeEnv);
5506

5507 5508 5509
    // here we can ignore the records in case of no interpolation
    // todo handle offset, in case of top/bottom interval query
    if ((pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL) && pQuery->limit.offset > 0 &&
5510
        pQuery->fillType == TSDB_FILL_NONE) {
5511 5512
      // maxOutput <= 0, means current query does not generate any results
      int32_t numOfClosed = numOfClosedTimeWindow(&pRuntimeEnv->windowResInfo);
5513

S
TD-1057  
Shengliang Guan 已提交
5514
      int32_t c = (int32_t)(MIN(numOfClosed, pQuery->limit.offset));
5515 5516 5517
      clearFirstNTimeWindow(pRuntimeEnv, c);
      pQuery->limit.offset -= c;
    }
5518

5519
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED | QUERY_RESBUF_FULL)) {
5520 5521 5522 5523 5524
      break;
    }
  }
}

5525
// handle time interval query on table
H
hjxilinx 已提交
5526
static void tableIntervalProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
5527 5528
  SQueryRuntimeEnv *pRuntimeEnv = &(pQInfo->runtimeEnv);

H
hjxilinx 已提交
5529 5530
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
5531

H
Haojun Liao 已提交
5532
  int32_t numOfFilled = 0;
H
Haojun Liao 已提交
5533
  TSKEY newStartKey = TSKEY_INITIAL_VAL;
5534

5535
  // skip blocks without load the actual data block from file if no filter condition present
H
Haojun Liao 已提交
5536
  skipTimeInterval(pRuntimeEnv, &newStartKey);
5537
  if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0 && pRuntimeEnv->pFillInfo == NULL) {
5538 5539 5540 5541
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return;
  }

5542
  while (1) {
H
Haojun Liao 已提交
5543
    tableIntervalProcessImpl(pRuntimeEnv, newStartKey);
5544

H
Haojun Liao 已提交
5545
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
5546
      pQInfo->groupIndex = 0;  // always start from 0
5547
      pQuery->rec.rows = 0;
5548
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5549

5550
      clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
5551
    }
5552

H
Haojun Liao 已提交
5553 5554 5555 5556 5557 5558 5559
    // no result generated, abort
    if (pQuery->rec.rows == 0) {
      break;
    }

    doSecondaryArithmeticProcess(pQuery);
    
5560
    // the offset is handled at prepare stage if no interpolation involved
H
Haojun Liao 已提交
5561
    if (pQuery->fillType == TSDB_FILL_NONE) {
5562
      limitResults(pRuntimeEnv);
5563 5564
      break;
    } else {
S
TD-1057  
Shengliang Guan 已提交
5565
      taosFillSetStartInfo(pRuntimeEnv->pFillInfo, (int32_t)pQuery->rec.rows, pQuery->window.ekey);
H
Haojun Liao 已提交
5566
      taosFillCopyInputDataFromFilePage(pRuntimeEnv->pFillInfo, (const tFilePage**) pQuery->sdata);
H
Haojun Liao 已提交
5567
      numOfFilled = 0;
5568

H
Haojun Liao 已提交
5569
      pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, &numOfFilled);
5570
      if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
5571
        limitResults(pRuntimeEnv);
5572 5573
        break;
      }
5574

5575
      // no result generated yet, continue retrieve data
5576
      pQuery->rec.rows = 0;
5577 5578
    }
  }
5579

5580
  // all data scanned, the group by normal column can return
H
Haojun Liao 已提交
5581
  if (pRuntimeEnv->groupbyNormalCol) {  // todo refactor with merge interval time result
5582
    pQInfo->groupIndex = 0;
5583
    pQuery->rec.rows = 0;
5584
    copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5585
    clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
5586 5587 5588
  }
}

5589 5590 5591 5592
static void tableQueryImpl(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

H
Haojun Liao 已提交
5593
  if (queryHasRemainResForTableQuery(pRuntimeEnv)) {
5594

H
Haojun Liao 已提交
5595 5596 5597 5598 5599 5600 5601 5602 5603 5604 5605 5606
    if (pQuery->fillType != TSDB_FILL_NONE) {
      /*
       * There are remain results that are not returned due to result interpolation
       * So, we do keep in this procedure instead of launching retrieve procedure for next results.
       */
      int32_t numOfFilled = 0;
      pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, &numOfFilled);

      if (pQuery->rec.rows > 0) {
        limitResults(pRuntimeEnv);
      }

H
Haojun Liao 已提交
5607
      qDebug("QInfo:%p current:%" PRId64 " returned, total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total);
5608
      return;
H
Haojun Liao 已提交
5609
    } else {
5610
      pQuery->rec.rows = 0;
5611
      pQInfo->groupIndex = 0;  // always start from 0
5612

5613
      if (pRuntimeEnv->windowResInfo.size > 0) {
5614
        copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5615
        clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
5616

5617
        if (pQuery->rec.rows > 0) {
5618
          qDebug("QInfo:%p %"PRId64" rows returned from group results, total:%"PRId64"", pQInfo, pQuery->rec.rows, pQuery->rec.total);
H
Haojun Liao 已提交
5619 5620 5621

          // there are not data remains
          if (pRuntimeEnv->windowResInfo.size <= 0) {
H
Haojun Liao 已提交
5622
            qDebug("QInfo:%p query over, %"PRId64" rows are returned", pQInfo, pQuery->rec.total);
H
Haojun Liao 已提交
5623 5624
          }

5625 5626 5627 5628 5629
          return;
        }
      }
    }
  }
5630

H
hjxilinx 已提交
5631
  // number of points returned during this query
5632
  pQuery->rec.rows = 0;
5633
  int64_t st = taosGetTimestampUs();
5634

5635
  assert(pQInfo->tableqinfoGroupInfo.numOfTables == 1);
H
Haojun Liao 已提交
5636
  SArray* g = GET_TABLEGROUP(pQInfo, 0);
5637
  STableQueryInfo* item = taosArrayGetP(g, 0);
5638

5639
  // group by normal column, sliding window query, interval query are handled by interval query processor
H
Haojun Liao 已提交
5640
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyNormalCol) {  // interval (down sampling operation)
5641
    tableIntervalProcess(pQInfo, item);
H
Haojun Liao 已提交
5642
  } else if (isFixedOutputQuery(pRuntimeEnv)) {
5643
    tableFixedOutputProcess(pQInfo, item);
5644 5645
  } else {  // diff/add/multiply/subtract/division
    assert(pQuery->checkBuffer == 1);
5646
    tableMultiOutputProcess(pQInfo, item);
5647
  }
5648

5649
  // record the total elapsed time
5650
  pRuntimeEnv->summary.elapsedTime += (taosGetTimestampUs() - st);
5651
  assert(pQInfo->tableqinfoGroupInfo.numOfTables == 1);
5652 5653
}

5654
static void stableQueryImpl(SQInfo *pQInfo) {
H
Haojun Liao 已提交
5655 5656
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *pQuery = pRuntimeEnv->pQuery;
5657
  pQuery->rec.rows = 0;
5658

5659
  int64_t st = taosGetTimestampUs();
5660

H
Haojun Liao 已提交
5661
  if (QUERY_IS_INTERVAL_QUERY(pQuery) ||
5662
      (isFixedOutputQuery(pRuntimeEnv) && (!isPointInterpoQuery(pQuery)) && (!pRuntimeEnv->groupbyNormalCol))) {
H
hjxilinx 已提交
5663
    multiTableQueryProcess(pQInfo);
5664
  } else {
5665
    assert((pQuery->checkBuffer == 1 && pQuery->interval.interval == 0) || isPointInterpoQuery(pQuery) ||
H
Haojun Liao 已提交
5666
            isFirstLastRowQuery(pQuery) || pRuntimeEnv->groupbyNormalCol);
5667

5668
    sequentialTableProcess(pQInfo);
5669
  }
5670

H
hjxilinx 已提交
5671
  // record the total elapsed time
5672
  pQInfo->runtimeEnv.summary.elapsedTime += (taosGetTimestampUs() - st);
H
hjxilinx 已提交
5673 5674
}

5675
static int32_t getColumnIndexInSource(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
5676
  int32_t j = 0;
5677

5678
  if (TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) {
H
Haojun Liao 已提交
5679
    if (pExprMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
5680
      return TSDB_TBNAME_COLUMN_INDEX;
H
Haojun Liao 已提交
5681 5682
    }

5683 5684 5685 5686
    while(j < pQueryMsg->numOfTags) {
      if (pExprMsg->colInfo.colId == pTagCols[j].colId) {
        return j;
      }
5687

5688 5689
      j += 1;
    }
5690

Y
TD-1230  
yihaoDeng 已提交
5691
  } else if (TSDB_COL_IS_UD_COL(pExprMsg->colInfo.flag)) {  // user specified column data
H
Haojun Liao 已提交
5692
    return TSDB_UD_COLUMN_INDEX;
5693 5694 5695 5696 5697
  } else {
    while (j < pQueryMsg->numOfCols) {
      if (pExprMsg->colInfo.colId == pQueryMsg->colList[j].colId) {
        return j;
      }
5698

5699
      j += 1;
5700 5701
    }
  }
5702
  assert(0);
5703
  return -1;
5704 5705
}

5706 5707 5708
bool validateExprColumnInfo(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
  int32_t j = getColumnIndexInSource(pQueryMsg, pExprMsg, pTagCols);
  return j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags;
5709 5710
}

5711
static bool validateQueryMsg(SQueryTableMsg *pQueryMsg) {
5712 5713
  if (pQueryMsg->interval.interval < 0) {
    qError("qmsg:%p illegal value of interval time %" PRId64, pQueryMsg, pQueryMsg->interval.interval);
5714
    return false;
5715 5716
  }

H
hjxilinx 已提交
5717
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
5718
    qError("qmsg:%p illegal value of numOfTables %d", pQueryMsg, pQueryMsg->numOfTables);
5719
    return false;
5720 5721
  }

H
hjxilinx 已提交
5722
  if (pQueryMsg->numOfGroupCols < 0) {
S
slguan 已提交
5723
    qError("qmsg:%p illegal value of numOfGroupbyCols %d", pQueryMsg, pQueryMsg->numOfGroupCols);
5724
    return false;
5725 5726
  }

5727 5728
  if (pQueryMsg->numOfOutput > TSDB_MAX_COLUMNS || pQueryMsg->numOfOutput <= 0) {
    qError("qmsg:%p illegal value of output columns %d", pQueryMsg, pQueryMsg->numOfOutput);
5729
    return false;
5730 5731
  }

5732 5733 5734 5735 5736 5737 5738 5739 5740 5741
  return true;
}

static bool validateQuerySourceCols(SQueryTableMsg *pQueryMsg, SSqlFuncMsg** pExprMsg) {
  int32_t numOfTotal = pQueryMsg->numOfCols + pQueryMsg->numOfTags;
  if (pQueryMsg->numOfCols < 0 || pQueryMsg->numOfTags < 0 || numOfTotal > TSDB_MAX_COLUMNS) {
    qError("qmsg:%p illegal value of numOfCols %d numOfTags:%d", pQueryMsg, pQueryMsg->numOfCols, pQueryMsg->numOfTags);
    return false;
  } else if (numOfTotal == 0) {
    for(int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
H
Haojun Liao 已提交
5742 5743 5744 5745 5746
      SSqlFuncMsg* pFuncMsg = pExprMsg[i];

      if ((pFuncMsg->functionId == TSDB_FUNC_TAGPRJ) ||
          (pFuncMsg->functionId == TSDB_FUNC_TID_TAG && pFuncMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) ||
          (pFuncMsg->functionId == TSDB_FUNC_COUNT && pFuncMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX)) {
5747
        continue;
5748
      }
5749

5750
      return false;
5751 5752
    }
  }
5753

5754
  return true;
5755 5756
}

5757
static char *createTableIdList(SQueryTableMsg *pQueryMsg, char *pMsg, SArray **pTableIdList) {
H
hjxilinx 已提交
5758
  assert(pQueryMsg->numOfTables > 0);
5759

weixin_48148422's avatar
weixin_48148422 已提交
5760
  *pTableIdList = taosArrayInit(pQueryMsg->numOfTables, sizeof(STableIdInfo));
5761

weixin_48148422's avatar
weixin_48148422 已提交
5762 5763
  for (int32_t j = 0; j < pQueryMsg->numOfTables; ++j) {
    STableIdInfo* pTableIdInfo = (STableIdInfo *)pMsg;
5764

5765
    pTableIdInfo->tid = htonl(pTableIdInfo->tid);
H
hjxilinx 已提交
5766 5767
    pTableIdInfo->uid = htobe64(pTableIdInfo->uid);
    pTableIdInfo->key = htobe64(pTableIdInfo->key);
5768

H
hjxilinx 已提交
5769 5770 5771
    taosArrayPush(*pTableIdList, pTableIdInfo);
    pMsg += sizeof(STableIdInfo);
  }
5772

H
hjxilinx 已提交
5773 5774
  return pMsg;
}
5775

5776
/**
H
hjxilinx 已提交
5777
 * pQueryMsg->head has been converted before this function is called.
5778
 *
H
hjxilinx 已提交
5779
 * @param pQueryMsg
5780 5781 5782 5783
 * @param pTableIdList
 * @param pExpr
 * @return
 */
H
Haojun Liao 已提交
5784
static int32_t convertQueryMsg(SQueryTableMsg *pQueryMsg, SArray **pTableIdList, SSqlFuncMsg ***pExpr, SSqlFuncMsg ***pSecStageExpr,
weixin_48148422's avatar
weixin_48148422 已提交
5785
                               char **tagCond, char** tbnameCond, SColIndex **groupbyCols, SColumnInfo** tagCols) {
5786 5787
  int32_t code = TSDB_CODE_SUCCESS;

5788 5789 5790 5791
  pQueryMsg->numOfTables = htonl(pQueryMsg->numOfTables);

  pQueryMsg->window.skey = htobe64(pQueryMsg->window.skey);
  pQueryMsg->window.ekey = htobe64(pQueryMsg->window.ekey);
5792 5793 5794 5795 5796 5797
  pQueryMsg->interval.interval = htobe64(pQueryMsg->interval.interval);
  pQueryMsg->interval.sliding = htobe64(pQueryMsg->interval.sliding);
  pQueryMsg->interval.offset = htobe64(pQueryMsg->interval.offset);
  pQueryMsg->interval.intervalUnit = pQueryMsg->interval.intervalUnit;
  pQueryMsg->interval.slidingUnit = pQueryMsg->interval.slidingUnit;
  pQueryMsg->interval.offsetUnit = pQueryMsg->interval.offsetUnit;
5798 5799
  pQueryMsg->limit = htobe64(pQueryMsg->limit);
  pQueryMsg->offset = htobe64(pQueryMsg->offset);
H
hjxilinx 已提交
5800

5801 5802
  pQueryMsg->order = htons(pQueryMsg->order);
  pQueryMsg->orderColId = htons(pQueryMsg->orderColId);
H
Haojun Liao 已提交
5803
  pQueryMsg->queryType = htonl(pQueryMsg->queryType);
weixin_48148422's avatar
weixin_48148422 已提交
5804
  pQueryMsg->tagNameRelType = htons(pQueryMsg->tagNameRelType);
5805 5806

  pQueryMsg->numOfCols = htons(pQueryMsg->numOfCols);
5807
  pQueryMsg->numOfOutput = htons(pQueryMsg->numOfOutput);
H
hjxilinx 已提交
5808
  pQueryMsg->numOfGroupCols = htons(pQueryMsg->numOfGroupCols);
5809 5810 5811
  pQueryMsg->tagCondLen = htons(pQueryMsg->tagCondLen);
  pQueryMsg->tsOffset = htonl(pQueryMsg->tsOffset);
  pQueryMsg->tsLen = htonl(pQueryMsg->tsLen);
H
hjxilinx 已提交
5812
  pQueryMsg->tsNumOfBlocks = htonl(pQueryMsg->tsNumOfBlocks);
5813
  pQueryMsg->tsOrder = htonl(pQueryMsg->tsOrder);
5814
  pQueryMsg->numOfTags = htonl(pQueryMsg->numOfTags);
H
Haojun Liao 已提交
5815
  pQueryMsg->secondStageOutput = htonl(pQueryMsg->secondStageOutput);
5816

5817
  // query msg safety check
5818
  if (!validateQueryMsg(pQueryMsg)) {
5819 5820
    code = TSDB_CODE_QRY_INVALID_MSG;
    goto _cleanup;
5821 5822
  }

H
hjxilinx 已提交
5823 5824
  char *pMsg = (char *)(pQueryMsg->colList) + sizeof(SColumnInfo) * pQueryMsg->numOfCols;
  for (int32_t col = 0; col < pQueryMsg->numOfCols; ++col) {
5825 5826
    SColumnInfo *pColInfo = &pQueryMsg->colList[col];

H
hjxilinx 已提交
5827
    pColInfo->colId = htons(pColInfo->colId);
5828
    pColInfo->type = htons(pColInfo->type);
H
hjxilinx 已提交
5829 5830
    pColInfo->bytes = htons(pColInfo->bytes);
    pColInfo->numOfFilters = htons(pColInfo->numOfFilters);
5831

H
hjxilinx 已提交
5832
    assert(pColInfo->type >= TSDB_DATA_TYPE_BOOL && pColInfo->type <= TSDB_DATA_TYPE_NCHAR);
5833

H
hjxilinx 已提交
5834
    int32_t numOfFilters = pColInfo->numOfFilters;
5835
    if (numOfFilters > 0) {
H
hjxilinx 已提交
5836
      pColInfo->filters = calloc(numOfFilters, sizeof(SColumnFilterInfo));
H
Haojun Liao 已提交
5837 5838 5839 5840
      if (pColInfo->filters == NULL) {
        code = TSDB_CODE_QRY_OUT_OF_MEMORY;
        goto _cleanup;
      }
5841 5842 5843
    }

    for (int32_t f = 0; f < numOfFilters; ++f) {
5844
      SColumnFilterInfo *pFilterMsg = (SColumnFilterInfo *)pMsg;
5845

5846 5847
      SColumnFilterInfo *pColFilter = &pColInfo->filters[f];
      pColFilter->filterstr = htons(pFilterMsg->filterstr);
5848 5849 5850

      pMsg += sizeof(SColumnFilterInfo);

5851 5852
      if (pColFilter->filterstr) {
        pColFilter->len = htobe64(pFilterMsg->len);
5853

5854
        pColFilter->pz = (int64_t)calloc(1, (size_t)(pColFilter->len + 1 * TSDB_NCHAR_SIZE)); // note: null-terminator
H
Haojun Liao 已提交
5855 5856 5857 5858 5859
        if (pColFilter->pz == 0) {
          code = TSDB_CODE_QRY_OUT_OF_MEMORY;
          goto _cleanup;
        }

5860
        memcpy((void *)pColFilter->pz, pMsg, (size_t)pColFilter->len);
5861
        pMsg += (pColFilter->len + 1);
5862
      } else {
5863 5864
        pColFilter->lowerBndi = htobe64(pFilterMsg->lowerBndi);
        pColFilter->upperBndi = htobe64(pFilterMsg->upperBndi);
5865 5866
      }

5867 5868
      pColFilter->lowerRelOptr = htons(pFilterMsg->lowerRelOptr);
      pColFilter->upperRelOptr = htons(pFilterMsg->upperRelOptr);
5869 5870 5871
    }
  }

5872
  *pExpr = calloc(pQueryMsg->numOfOutput, POINTER_BYTES);
H
Haojun Liao 已提交
5873 5874 5875 5876 5877
  if (*pExpr == NULL) {
    code = TSDB_CODE_QRY_OUT_OF_MEMORY;
    goto _cleanup;
  }

5878
  SSqlFuncMsg *pExprMsg = (SSqlFuncMsg *)pMsg;
5879

5880
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5881
    (*pExpr)[i] = pExprMsg;
5882

5883
    pExprMsg->colInfo.colIndex = htons(pExprMsg->colInfo.colIndex);
5884
    pExprMsg->colInfo.colId = htons(pExprMsg->colInfo.colId);
H
Haojun Liao 已提交
5885 5886 5887 5888
    pExprMsg->colInfo.flag  = htons(pExprMsg->colInfo.flag);
    pExprMsg->functionId    = htons(pExprMsg->functionId);
    pExprMsg->numOfParams   = htons(pExprMsg->numOfParams);
    pExprMsg->resColId      = htons(pExprMsg->resColId);
5889

5890
    pMsg += sizeof(SSqlFuncMsg);
5891 5892

    for (int32_t j = 0; j < pExprMsg->numOfParams; ++j) {
5893
      pExprMsg->arg[j].argType = htons(pExprMsg->arg[j].argType);
5894 5895 5896 5897
      pExprMsg->arg[j].argBytes = htons(pExprMsg->arg[j].argBytes);

      if (pExprMsg->arg[j].argType == TSDB_DATA_TYPE_BINARY) {
        pExprMsg->arg[j].argValue.pz = pMsg;
5898
        pMsg += pExprMsg->arg[j].argBytes;  // one more for the string terminated char.
5899 5900 5901 5902 5903
      } else {
        pExprMsg->arg[j].argValue.i64 = htobe64(pExprMsg->arg[j].argValue.i64);
      }
    }

H
Haojun Liao 已提交
5904 5905
    int16_t functionId = pExprMsg->functionId;
    if (functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ || functionId == TSDB_FUNC_TAG_DUMMY) {
Y
TD-1230  
yihaoDeng 已提交
5906
      if (!TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) {  // ignore the column  index check for arithmetic expression.
5907 5908
        code = TSDB_CODE_QRY_INVALID_MSG;
        goto _cleanup;
5909 5910
      }
    } else {
5911
//      if (!validateExprColumnInfo(pQueryMsg, pExprMsg)) {
5912
//        return TSDB_CODE_QRY_INVALID_MSG;
5913
//      }
5914 5915
    }

5916
    pExprMsg = (SSqlFuncMsg *)pMsg;
5917
  }
5918

5919
  if (!validateQuerySourceCols(pQueryMsg, *pExpr)) {
5920
    code = TSDB_CODE_QRY_INVALID_MSG;
dengyihao's avatar
dengyihao 已提交
5921
    goto _cleanup;
5922
  }
5923

H
Haojun Liao 已提交
5924 5925 5926 5927 5928 5929 5930 5931 5932 5933 5934 5935 5936 5937 5938 5939 5940 5941 5942 5943 5944 5945 5946 5947 5948 5949 5950 5951 5952 5953 5954 5955 5956 5957 5958 5959 5960 5961 5962 5963 5964 5965 5966
  if (pQueryMsg->secondStageOutput) {
    pExprMsg = (SSqlFuncMsg *)pMsg;
    *pSecStageExpr = calloc(pQueryMsg->secondStageOutput, POINTER_BYTES);
    
    for (int32_t i = 0; i < pQueryMsg->secondStageOutput; ++i) {
      (*pSecStageExpr)[i] = pExprMsg;

      pExprMsg->colInfo.colIndex = htons(pExprMsg->colInfo.colIndex);
      pExprMsg->colInfo.colId = htons(pExprMsg->colInfo.colId);
      pExprMsg->colInfo.flag = htons(pExprMsg->colInfo.flag);
      pExprMsg->functionId = htons(pExprMsg->functionId);
      pExprMsg->numOfParams = htons(pExprMsg->numOfParams);

      pMsg += sizeof(SSqlFuncMsg);

      for (int32_t j = 0; j < pExprMsg->numOfParams; ++j) {
        pExprMsg->arg[j].argType = htons(pExprMsg->arg[j].argType);
        pExprMsg->arg[j].argBytes = htons(pExprMsg->arg[j].argBytes);

        if (pExprMsg->arg[j].argType == TSDB_DATA_TYPE_BINARY) {
          pExprMsg->arg[j].argValue.pz = pMsg;
          pMsg += pExprMsg->arg[j].argBytes;  // one more for the string terminated char.
        } else {
          pExprMsg->arg[j].argValue.i64 = htobe64(pExprMsg->arg[j].argValue.i64);
        }
      }

      int16_t functionId = pExprMsg->functionId;
      if (functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ || functionId == TSDB_FUNC_TAG_DUMMY) {
        if (!TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) {  // ignore the column  index check for arithmetic expression.
          code = TSDB_CODE_QRY_INVALID_MSG;
          goto _cleanup;
        }
      } else {
//      if (!validateExprColumnInfo(pQueryMsg, pExprMsg)) {
//        return TSDB_CODE_QRY_INVALID_MSG;
//      }
      }

      pExprMsg = (SSqlFuncMsg *)pMsg;
    }
  }

H
hjxilinx 已提交
5967
  pMsg = createTableIdList(pQueryMsg, pMsg, pTableIdList);
5968

H
hjxilinx 已提交
5969
  if (pQueryMsg->numOfGroupCols > 0) {  // group by tag columns
5970
    *groupbyCols = malloc(pQueryMsg->numOfGroupCols * sizeof(SColIndex));
5971 5972 5973 5974
    if (*groupbyCols == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }
5975 5976 5977

    for (int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
      (*groupbyCols)[i].colId = *(int16_t *)pMsg;
5978
      pMsg += sizeof((*groupbyCols)[i].colId);
5979 5980

      (*groupbyCols)[i].colIndex = *(int16_t *)pMsg;
5981 5982
      pMsg += sizeof((*groupbyCols)[i].colIndex);

5983
      (*groupbyCols)[i].flag = *(int16_t *)pMsg;
5984 5985 5986 5987 5988
      pMsg += sizeof((*groupbyCols)[i].flag);

      memcpy((*groupbyCols)[i].name, pMsg, tListLen(groupbyCols[i]->name));
      pMsg += tListLen((*groupbyCols)[i].name);
    }
5989

H
hjxilinx 已提交
5990 5991
    pQueryMsg->orderByIdx = htons(pQueryMsg->orderByIdx);
    pQueryMsg->orderType = htons(pQueryMsg->orderType);
5992 5993
  }

5994 5995
  pQueryMsg->fillType = htons(pQueryMsg->fillType);
  if (pQueryMsg->fillType != TSDB_FILL_NONE) {
5996
    pQueryMsg->fillVal = (uint64_t)(pMsg);
5997 5998

    int64_t *v = (int64_t *)pMsg;
5999
    for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
6000 6001
      v[i] = htobe64(v[i]);
    }
6002

6003
    pMsg += sizeof(int64_t) * pQueryMsg->numOfOutput;
6004
  }
6005

6006 6007
  if (pQueryMsg->numOfTags > 0) {
    (*tagCols) = calloc(1, sizeof(SColumnInfo) * pQueryMsg->numOfTags);
H
Haojun Liao 已提交
6008 6009 6010 6011 6012
    if (*tagCols == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }

6013 6014
    for (int32_t i = 0; i < pQueryMsg->numOfTags; ++i) {
      SColumnInfo* pTagCol = (SColumnInfo*) pMsg;
6015

6016 6017 6018 6019
      pTagCol->colId = htons(pTagCol->colId);
      pTagCol->bytes = htons(pTagCol->bytes);
      pTagCol->type  = htons(pTagCol->type);
      pTagCol->numOfFilters = 0;
6020

6021
      (*tagCols)[i] = *pTagCol;
6022
      pMsg += sizeof(SColumnInfo);
6023
    }
H
hjxilinx 已提交
6024
  }
6025

6026 6027 6028
  // the tag query condition expression string is located at the end of query msg
  if (pQueryMsg->tagCondLen > 0) {
    *tagCond = calloc(1, pQueryMsg->tagCondLen);
H
Haojun Liao 已提交
6029 6030 6031 6032 6033 6034

    if (*tagCond == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;

    }
6035 6036 6037
    memcpy(*tagCond, pMsg, pQueryMsg->tagCondLen);
    pMsg += pQueryMsg->tagCondLen;
  }
6038

weixin_48148422's avatar
weixin_48148422 已提交
6039
  if (*pMsg != 0) {
6040
    size_t len = strlen(pMsg) + 1;
6041

6042
    *tbnameCond = malloc(len);
6043 6044 6045 6046 6047
    if (*tbnameCond == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }

weixin_48148422's avatar
weixin_48148422 已提交
6048
    strcpy(*tbnameCond, pMsg);
6049
    pMsg += len;
weixin_48148422's avatar
weixin_48148422 已提交
6050
  }
6051

6052
  qDebug("qmsg:%p query %d tables, type:%d, qrange:%" PRId64 "-%" PRId64 ", numOfGroupbyTagCols:%d, order:%d, "
H
Haojun Liao 已提交
6053 6054
         "outputCols:%d, numOfCols:%d, interval:%" PRId64 ", fillType:%d, comptsLen:%d, compNumOfBlocks:%d, limit:%" PRId64 ", offset:%" PRId64,
         pQueryMsg, pQueryMsg->numOfTables, pQueryMsg->queryType, pQueryMsg->window.skey, pQueryMsg->window.ekey, pQueryMsg->numOfGroupCols,
6055
         pQueryMsg->order, pQueryMsg->numOfOutput, pQueryMsg->numOfCols, pQueryMsg->interval.interval,
H
Haojun Liao 已提交
6056
         pQueryMsg->fillType, pQueryMsg->tsLen, pQueryMsg->tsNumOfBlocks, pQueryMsg->limit, pQueryMsg->offset);
6057 6058

  return TSDB_CODE_SUCCESS;
dengyihao's avatar
dengyihao 已提交
6059 6060

_cleanup:
S
TD-1848  
Shengliang Guan 已提交
6061
  tfree(*pExpr);
dengyihao's avatar
dengyihao 已提交
6062 6063
  taosArrayDestroy(*pTableIdList);
  *pTableIdList = NULL;
S
TD-1848  
Shengliang Guan 已提交
6064 6065 6066 6067
  tfree(*tbnameCond);
  tfree(*groupbyCols);
  tfree(*tagCols);
  tfree(*tagCond);
6068 6069

  return code;
6070 6071
}

H
Haojun Liao 已提交
6072 6073
static int32_t buildArithmeticExprFromMsg(SExprInfo *pArithExprInfo, SQueryTableMsg *pQueryMsg) {
  qDebug("qmsg:%p create arithmetic expr from binary", pQueryMsg);
weixin_48148422's avatar
weixin_48148422 已提交
6074 6075

  tExprNode* pExprNode = NULL;
dengyihao's avatar
dengyihao 已提交
6076
  TRY(TSDB_MAX_TAG_CONDITIONS) {
weixin_48148422's avatar
weixin_48148422 已提交
6077 6078 6079
    pExprNode = exprTreeFromBinary(pArithExprInfo->base.arg[0].argValue.pz, pArithExprInfo->base.arg[0].argBytes);
  } CATCH( code ) {
    CLEANUP_EXECUTE();
dengyihao's avatar
TD-816  
dengyihao 已提交
6080
    qError("qmsg:%p failed to create arithmetic expression string from:%s, reason: %s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz, tstrerror(code));
weixin_48148422's avatar
weixin_48148422 已提交
6081 6082 6083
    return code;
  } END_TRY

H
hjxilinx 已提交
6084
  if (pExprNode == NULL) {
6085
    qError("qmsg:%p failed to create arithmetic expression string from:%s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz);
6086
    return TSDB_CODE_QRY_APP_ERROR;
6087
  }
6088

6089
  pArithExprInfo->pExpr = pExprNode;
6090 6091 6092
  return TSDB_CODE_SUCCESS;
}

H
Haojun Liao 已提交
6093
static int32_t createQueryFuncExprFromMsg(SQueryTableMsg *pQueryMsg, int32_t numOfOutput, SExprInfo **pExprInfo, SSqlFuncMsg **pExprMsg,
6094 6095
    SColumnInfo* pTagCols) {
  *pExprInfo = NULL;
H
hjxilinx 已提交
6096
  int32_t code = TSDB_CODE_SUCCESS;
6097

H
Haojun Liao 已提交
6098
  SExprInfo *pExprs = (SExprInfo *)calloc(pQueryMsg->numOfOutput, sizeof(SExprInfo));
6099
  if (pExprs == NULL) {
6100
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
6101 6102 6103 6104 6105
  }

  bool    isSuperTable = QUERY_IS_STABLE_QUERY(pQueryMsg->queryType);
  int16_t tagLen = 0;

H
Haojun Liao 已提交
6106
  for (int32_t i = 0; i < numOfOutput; ++i) {
6107
    pExprs[i].base = *pExprMsg[i];
6108
    pExprs[i].bytes = 0;
6109 6110 6111 6112

    int16_t type = 0;
    int16_t bytes = 0;

6113
    // parse the arithmetic expression
6114
    if (pExprs[i].base.functionId == TSDB_FUNC_ARITHM) {
H
Haojun Liao 已提交
6115
      code = buildArithmeticExprFromMsg(&pExprs[i], pQueryMsg);
6116

6117
      if (code != TSDB_CODE_SUCCESS) {
S
TD-1848  
Shengliang Guan 已提交
6118
        tfree(pExprs);
6119
        return code;
6120 6121
      }

6122
      type  = TSDB_DATA_TYPE_DOUBLE;
6123
      bytes = tDataTypeDesc[type].nSize;
H
Haojun Liao 已提交
6124
    } else if (pExprs[i].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX && pExprs[i].base.functionId == TSDB_FUNC_TAGPRJ) {  // parse the normal column
H
Haojun Liao 已提交
6125
      SSchema s = tGetTableNameColumnSchema();
H
Haojun Liao 已提交
6126
      type = s.type;
H
Haojun Liao 已提交
6127
      bytes = s.bytes;
6128 6129
    } else if (pExprs[i].base.colInfo.colId <= TSDB_UD_COLUMN_INDEX) {
      // it is a user-defined constant value column
H
Haojun Liao 已提交
6130 6131
      assert(pExprs[i].base.functionId == TSDB_FUNC_PRJ);

6132 6133
      type = pExprs[i].base.arg[1].argType;
      bytes = pExprs[i].base.arg[1].argBytes;
H
Haojun Liao 已提交
6134 6135 6136 6137 6138

      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        bytes += VARSTR_HEADER_SIZE;
      }
    } else {
6139
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
dengyihao's avatar
dengyihao 已提交
6140
      assert(j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags);
H
Haojun Liao 已提交
6141

dengyihao's avatar
dengyihao 已提交
6142
      if (pExprs[i].base.colInfo.colId != TSDB_TBNAME_COLUMN_INDEX && j >= 0) {
H
Haojun Liao 已提交
6143 6144 6145 6146
        SColumnInfo* pCol = (TSDB_COL_IS_TAG(pExprs[i].base.colInfo.flag))? &pTagCols[j]:&pQueryMsg->colList[j];
        type = pCol->type;
        bytes = pCol->bytes;
      } else {
H
Haojun Liao 已提交
6147
        SSchema s = tGetTableNameColumnSchema();
H
hjxilinx 已提交
6148

H
Haojun Liao 已提交
6149 6150 6151
        type  = s.type;
        bytes = s.bytes;
      }
6152 6153
    }

S
TD-1057  
Shengliang Guan 已提交
6154
    int32_t param = (int32_t)pExprs[i].base.arg[0].argValue.i64;
6155
    if (getResultDataInfo(type, bytes, pExprs[i].base.functionId, param, &pExprs[i].type, &pExprs[i].bytes,
6156
                          &pExprs[i].interBytes, 0, isSuperTable) != TSDB_CODE_SUCCESS) {
S
TD-1848  
Shengliang Guan 已提交
6157
      tfree(pExprs);
6158
      return TSDB_CODE_QRY_INVALID_MSG;
6159 6160
    }

6161
    if (pExprs[i].base.functionId == TSDB_FUNC_TAG_DUMMY || pExprs[i].base.functionId == TSDB_FUNC_TS_DUMMY) {
6162
      tagLen += pExprs[i].bytes;
6163
    }
6164
    assert(isValidDataType(pExprs[i].type));
6165 6166 6167
  }

  // TODO refactor
H
Haojun Liao 已提交
6168
  for (int32_t i = 0; i < numOfOutput; ++i) {
6169 6170
    pExprs[i].base = *pExprMsg[i];
    int16_t functId = pExprs[i].base.functionId;
6171

6172
    if (functId == TSDB_FUNC_TOP || functId == TSDB_FUNC_BOTTOM) {
6173
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
6174 6175 6176 6177 6178 6179 6180 6181 6182
      if (j < 0 || j >= pQueryMsg->numOfCols) {
        assert(0);
      } else {
        SColumnInfo *pCol = &pQueryMsg->colList[j];
        int32_t ret =
            getResultDataInfo(pCol->type, pCol->bytes, functId, (int32_t)pExprs[i].base.arg[0].argValue.i64,
                              &pExprs[i].type, &pExprs[i].bytes, &pExprs[i].interBytes, tagLen, isSuperTable);
        assert(ret == TSDB_CODE_SUCCESS);
      }
6183 6184 6185
    }
  }

6186
  *pExprInfo = pExprs;
6187 6188 6189
  return TSDB_CODE_SUCCESS;
}

6190
static SSqlGroupbyExpr *createGroupbyExprFromMsg(SQueryTableMsg *pQueryMsg, SColIndex *pColIndex, int32_t *code) {
6191 6192 6193 6194 6195
  if (pQueryMsg->numOfGroupCols == 0) {
    return NULL;
  }

  // using group by tag columns
6196
  SSqlGroupbyExpr *pGroupbyExpr = (SSqlGroupbyExpr *)calloc(1, sizeof(SSqlGroupbyExpr));
6197
  if (pGroupbyExpr == NULL) {
6198
    *code = TSDB_CODE_QRY_OUT_OF_MEMORY;
6199 6200 6201 6202 6203 6204 6205
    return NULL;
  }

  pGroupbyExpr->numOfGroupCols = pQueryMsg->numOfGroupCols;
  pGroupbyExpr->orderType = pQueryMsg->orderType;
  pGroupbyExpr->orderIndex = pQueryMsg->orderByIdx;

6206 6207 6208 6209
  pGroupbyExpr->columnInfo = taosArrayInit(pQueryMsg->numOfGroupCols, sizeof(SColIndex));
  for(int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
    taosArrayPush(pGroupbyExpr->columnInfo, &pColIndex[i]);
  }
6210

6211 6212 6213
  return pGroupbyExpr;
}

6214
static int32_t createFilterInfo(void *pQInfo, SQuery *pQuery) {
6215
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
6216
    if (pQuery->colList[i].numOfFilters > 0) {
6217 6218 6219 6220 6221 6222 6223 6224 6225
      pQuery->numOfFilterCols++;
    }
  }

  if (pQuery->numOfFilterCols == 0) {
    return TSDB_CODE_SUCCESS;
  }

  pQuery->pFilterInfo = calloc(1, sizeof(SSingleColumnFilterInfo) * pQuery->numOfFilterCols);
H
Haojun Liao 已提交
6226 6227 6228
  if (pQuery->pFilterInfo == NULL) {
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
  }
6229 6230

  for (int32_t i = 0, j = 0; i < pQuery->numOfCols; ++i) {
6231
    if (pQuery->colList[i].numOfFilters > 0) {
6232 6233
      SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[j];

B
Bomin Zhang 已提交
6234
      memcpy(&pFilterInfo->info, &pQuery->colList[i], sizeof(SColumnInfo));
6235
      pFilterInfo->info = pQuery->colList[i];
6236

6237
      pFilterInfo->numOfFilters = pQuery->colList[i].numOfFilters;
6238
      pFilterInfo->pFilters = calloc(pFilterInfo->numOfFilters, sizeof(SColumnFilterElem));
H
Haojun Liao 已提交
6239 6240 6241
      if (pFilterInfo->pFilters == NULL) {
        return TSDB_CODE_QRY_OUT_OF_MEMORY;
      }
6242 6243 6244

      for (int32_t f = 0; f < pFilterInfo->numOfFilters; ++f) {
        SColumnFilterElem *pSingleColFilter = &pFilterInfo->pFilters[f];
6245
        pSingleColFilter->filterInfo = pQuery->colList[i].filters[f];
6246 6247 6248 6249 6250

        int32_t lower = pSingleColFilter->filterInfo.lowerRelOptr;
        int32_t upper = pSingleColFilter->filterInfo.upperRelOptr;

        if (lower == TSDB_RELATION_INVALID && upper == TSDB_RELATION_INVALID) {
S
slguan 已提交
6251
          qError("QInfo:%p invalid filter info", pQInfo);
6252
          return TSDB_CODE_QRY_INVALID_MSG;
6253 6254
        }

6255 6256
        int16_t type  = pQuery->colList[i].type;
        int16_t bytes = pQuery->colList[i].bytes;
6257

6258 6259 6260
        // todo refactor
        __filter_func_t *rangeFilterArray = getRangeFilterFuncArray(type);
        __filter_func_t *filterArray = getValueFilterFuncArray(type);
6261 6262

        if (rangeFilterArray == NULL && filterArray == NULL) {
S
slguan 已提交
6263
          qError("QInfo:%p failed to get filter function, invalid data type:%d", pQInfo, type);
6264
          return TSDB_CODE_QRY_INVALID_MSG;
6265 6266
        }

6267
        if ((lower == TSDB_RELATION_GREATER_EQUAL || lower == TSDB_RELATION_GREATER) &&
6268
            (upper == TSDB_RELATION_LESS_EQUAL || upper == TSDB_RELATION_LESS)) {
dengyihao's avatar
dengyihao 已提交
6269
          assert(rangeFilterArray != NULL);
6270
          if (lower == TSDB_RELATION_GREATER_EQUAL) {
6271 6272 6273 6274 6275 6276 6277 6278 6279 6280 6281 6282 6283
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[4];
            } else {
              pSingleColFilter->fp = rangeFilterArray[2];
            }
          } else {
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[3];
            } else {
              pSingleColFilter->fp = rangeFilterArray[1];
            }
          }
        } else {  // set callback filter function
dengyihao's avatar
dengyihao 已提交
6284
          assert(filterArray != NULL);
6285 6286 6287 6288
          if (lower != TSDB_RELATION_INVALID) {
            pSingleColFilter->fp = filterArray[lower];

            if (upper != TSDB_RELATION_INVALID) {
dengyihao's avatar
dengyihao 已提交
6289
              qError("pQInfo:%p failed to get filter function, invalid filter condition: %d", pQInfo, type);
6290
              return TSDB_CODE_QRY_INVALID_MSG;
6291 6292 6293 6294 6295 6296 6297 6298 6299 6300 6301 6302 6303 6304 6305 6306
            }
          } else {
            pSingleColFilter->fp = filterArray[upper];
          }
        }
        assert(pSingleColFilter->fp != NULL);
        pSingleColFilter->bytes = bytes;
      }

      j++;
    }
  }

  return TSDB_CODE_SUCCESS;
}

6307
static void doUpdateExprColumnIndex(SQuery *pQuery) {
H
Haojun Liao 已提交
6308
  assert(pQuery->pExpr1 != NULL && pQuery != NULL);
6309

6310
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
Haojun Liao 已提交
6311
    SSqlFuncMsg *pSqlExprMsg = &pQuery->pExpr1[k].base;
6312
    if (pSqlExprMsg->functionId == TSDB_FUNC_ARITHM) {
6313 6314
      continue;
    }
6315

6316
    // todo opt performance
H
Haojun Liao 已提交
6317
    SColIndex *pColIndex = &pSqlExprMsg->colInfo;
H
Haojun Liao 已提交
6318
    if (TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
6319 6320
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfCols; ++f) {
H
Haojun Liao 已提交
6321 6322
        if (pColIndex->colId == pQuery->colList[f].colId) {
          pColIndex->colIndex = f;
6323 6324 6325
          break;
        }
      }
H
Haojun Liao 已提交
6326 6327

      assert(f < pQuery->numOfCols);
6328 6329
    } else if (pColIndex->colId <= TSDB_UD_COLUMN_INDEX) {
      // do nothing for user-defined constant value result columns
6330
    } else {
6331 6332
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfTags; ++f) {
H
Haojun Liao 已提交
6333 6334
        if (pColIndex->colId == pQuery->tagColList[f].colId) {
          pColIndex->colIndex = f;
6335 6336
          break;
        }
6337
      }
6338

6339
      assert(f < pQuery->numOfTags || pColIndex->colId == TSDB_TBNAME_COLUMN_INDEX);
6340 6341 6342 6343
    }
  }
}

dengyihao's avatar
dengyihao 已提交
6344 6345
static void freeQInfo(SQInfo *pQInfo);

H
Haojun Liao 已提交
6346 6347 6348
static void calResultBufSize(SQuery* pQuery) {
  const int32_t RESULT_MSG_MIN_SIZE  = 1024 * (1024 + 512);  // bytes
  const int32_t RESULT_MSG_MIN_ROWS  = 8192;
S
TD-1057  
Shengliang Guan 已提交
6349
  const float RESULT_THRESHOLD_RATIO = 0.85f;
H
Haojun Liao 已提交
6350

6351 6352 6353 6354 6355
  if (isProjQuery(pQuery)) {
    int32_t numOfRes = RESULT_MSG_MIN_SIZE / pQuery->rowSize;
    if (numOfRes < RESULT_MSG_MIN_ROWS) {
      numOfRes = RESULT_MSG_MIN_ROWS;
    }
H
Haojun Liao 已提交
6356

6357
    pQuery->rec.capacity  = numOfRes;
S
TD-1057  
Shengliang Guan 已提交
6358
    pQuery->rec.threshold = (int32_t)(numOfRes * RESULT_THRESHOLD_RATIO);
6359 6360
  } else {  // in case of non-prj query, a smaller output buffer will be used.
    pQuery->rec.capacity = 4096;
S
TD-1057  
Shengliang Guan 已提交
6361
    pQuery->rec.threshold = (int32_t)(pQuery->rec.capacity * RESULT_THRESHOLD_RATIO);
6362
  }
H
Haojun Liao 已提交
6363 6364
}

6365
static SQInfo *createQInfoImpl(SQueryTableMsg *pQueryMsg, SSqlGroupbyExpr *pGroupbyExpr, SExprInfo *pExprs,
H
Haojun Liao 已提交
6366
                               SExprInfo *pSecExprs, STableGroupInfo *pTableGroupInfo, SColumnInfo* pTagCols, bool stableQuery) {
B
Bomin Zhang 已提交
6367 6368 6369
  int16_t numOfCols = pQueryMsg->numOfCols;
  int16_t numOfOutput = pQueryMsg->numOfOutput;

6370 6371
  SQInfo *pQInfo = (SQInfo *)calloc(1, sizeof(SQInfo));
  if (pQInfo == NULL) {
B
Bomin Zhang 已提交
6372
    goto _cleanup_qinfo;
6373
  }
6374

B
Bomin Zhang 已提交
6375 6376 6377
  // to make sure third party won't overwrite this structure
  pQInfo->signature = pQInfo;
  pQInfo->tableGroupInfo = *pTableGroupInfo;
6378 6379

  SQuery *pQuery = calloc(1, sizeof(SQuery));
B
Bomin Zhang 已提交
6380 6381 6382
  if (pQuery == NULL) {
    goto _cleanup_query;
  }
H
Haojun Liao 已提交
6383

6384 6385
  pQInfo->runtimeEnv.pQuery = pQuery;

6386
  pQuery->numOfCols       = numOfCols;
H
hjxilinx 已提交
6387
  pQuery->numOfOutput     = numOfOutput;
6388 6389 6390
  pQuery->limit.limit     = pQueryMsg->limit;
  pQuery->limit.offset    = pQueryMsg->offset;
  pQuery->order.order     = pQueryMsg->order;
6391
  pQuery->order.orderColId = pQueryMsg->orderColId;
H
Haojun Liao 已提交
6392
  pQuery->pExpr1          = pExprs;
H
Haojun Liao 已提交
6393 6394
  pQuery->pExpr2          = pSecExprs;
  pQuery->numOfExpr2      = pQueryMsg->secondStageOutput;
6395
  pQuery->pGroupbyExpr    = pGroupbyExpr;
6396
  memcpy(&pQuery->interval, &pQueryMsg->interval, sizeof(pQuery->interval));
6397
  pQuery->fillType        = pQueryMsg->fillType;
6398
  pQuery->numOfTags       = pQueryMsg->numOfTags;
B
Bomin Zhang 已提交
6399
  pQuery->tagColList      = pTagCols;
H
Haojun Liao 已提交
6400

6401
  pQuery->colList = calloc(numOfCols, sizeof(SSingleColumnFilterInfo));
6402
  if (pQuery->colList == NULL) {
6403
    goto _cleanup;
6404
  }
6405

H
hjxilinx 已提交
6406
  for (int16_t i = 0; i < numOfCols; ++i) {
6407
    pQuery->colList[i] = pQueryMsg->colList[i];
6408
    pQuery->colList[i].filters = tscFilterInfoClone(pQueryMsg->colList[i].filters, pQuery->colList[i].numOfFilters);
H
hjxilinx 已提交
6409
  }
6410

6411
  // calculate the result row size
6412 6413 6414
  for (int16_t col = 0; col < numOfOutput; ++col) {
    assert(pExprs[col].bytes > 0);
    pQuery->rowSize += pExprs[col].bytes;
6415
  }
6416

6417
  doUpdateExprColumnIndex(pQuery);
6418

6419
  int32_t ret = createFilterInfo(pQInfo, pQuery);
6420
  if (ret != TSDB_CODE_SUCCESS) {
6421
    goto _cleanup;
6422 6423 6424
  }

  // prepare the result buffer
6425
  pQuery->sdata = (tFilePage **)calloc(pQuery->numOfOutput, POINTER_BYTES);
6426
  if (pQuery->sdata == NULL) {
6427
    goto _cleanup;
6428 6429
  }

H
Haojun Liao 已提交
6430
  calResultBufSize(pQuery);
6431

6432
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
6433
    // allocate additional memory for interResults that are usually larger then final results
H
Haojun Liao 已提交
6434
    // TODO refactor
H
Haojun Liao 已提交
6435 6436 6437 6438 6439 6440 6441 6442
    int16_t bytes = 0;
    if (pQuery->pExpr2 == NULL || col > pQuery->numOfExpr2) {
      bytes = pExprs[col].bytes;
    } else {
      bytes = MAX(pQuery->pExpr2[col].bytes, pExprs[col].bytes);
    }

    size_t size = (size_t)((pQuery->rec.capacity + 1) * bytes + pExprs[col].interBytes + sizeof(tFilePage));
6443
    pQuery->sdata[col] = (tFilePage *)calloc(1, size);
6444
    if (pQuery->sdata[col] == NULL) {
6445
      goto _cleanup;
6446 6447 6448
    }
  }

6449
  if (pQuery->fillType != TSDB_FILL_NONE) {
6450 6451
    pQuery->fillVal = malloc(sizeof(int64_t) * pQuery->numOfOutput);
    if (pQuery->fillVal == NULL) {
6452
      goto _cleanup;
6453 6454 6455
    }

    // the first column is the timestamp
6456
    memcpy(pQuery->fillVal, (char *)pQueryMsg->fillVal, pQuery->numOfOutput * sizeof(int64_t));
6457 6458
  }

dengyihao's avatar
dengyihao 已提交
6459 6460 6461 6462 6463 6464
  size_t numOfGroups = 0;
  if (pTableGroupInfo->pGroupList != NULL) {
    numOfGroups = taosArrayGetSize(pTableGroupInfo->pGroupList);

    pQInfo->tableqinfoGroupInfo.pGroupList = taosArrayInit(numOfGroups, POINTER_BYTES);
    pQInfo->tableqinfoGroupInfo.numOfTables = pTableGroupInfo->numOfTables;
H
Haojun Liao 已提交
6465
    pQInfo->tableqinfoGroupInfo.map = taosHashInit(pTableGroupInfo->numOfTables,
H
Haojun Liao 已提交
6466
                                                   taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_NO_LOCK);
H
Haojun Liao 已提交
6467
  }
6468

weixin_48148422's avatar
weixin_48148422 已提交
6469
  int tableIndex = 0;
6470

H
Haojun Liao 已提交
6471
  pQInfo->runtimeEnv.interBufSize = getOutputInterResultBufSize(pQuery);
H
Haojun Liao 已提交
6472 6473
  pQInfo->runtimeEnv.summary.tableInfoSize += (pTableGroupInfo->numOfTables * sizeof(STableQueryInfo));

H
Haojun Liao 已提交
6474
  pQInfo->runtimeEnv.pResultRowHashTable = taosHashInit(pTableGroupInfo->numOfTables, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_NO_LOCK);
H
Haojun Liao 已提交
6475
  pQInfo->runtimeEnv.keyBuf = malloc(TSDB_MAX_BYTES_PER_ROW);
H
Haojun Liao 已提交
6476
  pQInfo->runtimeEnv.pool = initResultRowPool(getWindowResultSize(&pQInfo->runtimeEnv));
H
Haojun Liao 已提交
6477

H
Haojun Liao 已提交
6478
  pQInfo->pBuf = calloc(pTableGroupInfo->numOfTables, sizeof(STableQueryInfo));
H
Haojun Liao 已提交
6479 6480 6481 6482
  if (pQInfo->pBuf == NULL) {
    goto _cleanup;
  }

H
Haojun Liao 已提交
6483
  // NOTE: pTableCheckInfo need to update the query time range and the lastKey info
6484 6485
  pQInfo->arrTableIdInfo = taosArrayInit(tableIndex, sizeof(STableIdInfo));
  pQInfo->dataReady = QUERY_RESULT_NOT_READY;
H
Haojun Liao 已提交
6486
  pQInfo->rspContext = NULL;
6487
  pthread_mutex_init(&pQInfo->lock, NULL);
H
Haojun Liao 已提交
6488
  tsem_init(&pQInfo->ready, 0, 0);
6489 6490 6491 6492 6493 6494

  pQuery->pos = -1;
  pQuery->window = pQueryMsg->window;
  changeExecuteScanOrder(pQInfo, pQueryMsg, stableQuery);

  STimeWindow window = pQuery->window;
H
Haojun Liao 已提交
6495

H
Haojun Liao 已提交
6496 6497
  int32_t index = 0;

H
hjxilinx 已提交
6498
  for(int32_t i = 0; i < numOfGroups; ++i) {
6499
    SArray* pa = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, i);
6500

H
Haojun Liao 已提交
6501
    size_t s = taosArrayGetSize(pa);
6502
    SArray* p1 = taosArrayInit(s, POINTER_BYTES);
B
Bomin Zhang 已提交
6503 6504 6505
    if (p1 == NULL) {
      goto _cleanup;
    }
H
Haojun Liao 已提交
6506

Y
yihaoDeng 已提交
6507
    taosArrayPush(pQInfo->tableqinfoGroupInfo.pGroupList, &p1);
6508

H
hjxilinx 已提交
6509
    for(int32_t j = 0; j < s; ++j) {
H
Haojun Liao 已提交
6510
      STableKeyInfo* info = taosArrayGet(pa, j);
6511

H
Haojun Liao 已提交
6512
      void* buf = (char*) pQInfo->pBuf + index * sizeof(STableQueryInfo);
H
Haojun Liao 已提交
6513

6514
      window.skey = info->lastKey;
H
Haojun Liao 已提交
6515
      STableQueryInfo* item = createTableQueryInfo(&pQInfo->runtimeEnv, info->pTable, window, buf);
B
Bomin Zhang 已提交
6516 6517 6518
      if (item == NULL) {
        goto _cleanup;
      }
H
Haojun Liao 已提交
6519

6520
      item->groupIndex = i;
H
hjxilinx 已提交
6521
      taosArrayPush(p1, &item);
6522 6523

      STableId* id = TSDB_TABLEID(info->pTable);
H
Haojun Liao 已提交
6524 6525
      taosHashPut(pQInfo->tableqinfoGroupInfo.map, &id->tid, sizeof(id->tid), &item, POINTER_BYTES);
      index += 1;
H
hjxilinx 已提交
6526 6527
    }
  }
6528

6529
  colIdCheck(pQuery);
6530

6531
  qDebug("qmsg:%p QInfo:%p created", pQueryMsg, pQInfo);
6532 6533
  return pQInfo;

B
Bomin Zhang 已提交
6534
_cleanup_qinfo:
H
Haojun Liao 已提交
6535
  tsdbDestroyTableGroup(pTableGroupInfo);
B
Bomin Zhang 已提交
6536 6537

_cleanup_query:
6538 6539 6540 6541
  if (pGroupbyExpr != NULL) {
    taosArrayDestroy(pGroupbyExpr->columnInfo);
    free(pGroupbyExpr);
  }
H
Haojun Liao 已提交
6542

S
TD-1848  
Shengliang Guan 已提交
6543
  tfree(pTagCols);
B
Bomin Zhang 已提交
6544 6545 6546 6547 6548 6549
  for (int32_t i = 0; i < numOfOutput; ++i) {
    SExprInfo* pExprInfo = &pExprs[i];
    if (pExprInfo->pExpr != NULL) {
      tExprTreeDestroy(&pExprInfo->pExpr, NULL);
    }
  }
H
Haojun Liao 已提交
6550

S
TD-1848  
Shengliang Guan 已提交
6551
  tfree(pExprs);
B
Bomin Zhang 已提交
6552

6553
_cleanup:
dengyihao's avatar
dengyihao 已提交
6554
  freeQInfo(pQInfo);
6555 6556 6557
  return NULL;
}

H
hjxilinx 已提交
6558
static bool isValidQInfo(void *param) {
H
hjxilinx 已提交
6559 6560 6561 6562
  SQInfo *pQInfo = (SQInfo *)param;
  if (pQInfo == NULL) {
    return false;
  }
6563

H
hjxilinx 已提交
6564 6565 6566 6567
  /*
   * pQInfo->signature may be changed by another thread, so we assign value of signature
   * into local variable, then compare by using local variable
   */
6568
  uint64_t sig = (uint64_t)pQInfo->signature;
H
hjxilinx 已提交
6569 6570 6571
  return (sig == (uint64_t)pQInfo);
}

6572
static int32_t initQInfo(SQueryTableMsg *pQueryMsg, void *tsdb, int32_t vgId, SQInfo *pQInfo, bool isSTable) {
H
hjxilinx 已提交
6573
  int32_t code = TSDB_CODE_SUCCESS;
6574
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
6575

H
hjxilinx 已提交
6576
  STSBuf *pTSBuf = NULL;
6577
  if (pQueryMsg->tsLen > 0) { // open new file to save the result
H
Haojun Liao 已提交
6578
    char *tsBlock = (char *) pQueryMsg + pQueryMsg->tsOffset;
H
Haojun Liao 已提交
6579
    pTSBuf = tsBufCreateFromCompBlocks(tsBlock, pQueryMsg->tsNumOfBlocks, pQueryMsg->tsLen, pQueryMsg->tsOrder, vgId);
6580

H
hjxilinx 已提交
6581
    tsBufResetPos(pTSBuf);
dengyihao's avatar
dengyihao 已提交
6582
    bool ret = tsBufNextPos(pTSBuf);
6583

dengyihao's avatar
dengyihao 已提交
6584
    UNUSED(ret);
H
hjxilinx 已提交
6585
  }
Y
TD-1665  
yihaoDeng 已提交
6586 6587
  
  pQuery->precision = tsdbGetCfg(tsdb)->precision;
6588

6589 6590
  if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey > pQuery->window.ekey)) ||
      (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey > pQuery->window.skey))) {
6591
    qDebug("QInfo:%p no result in time range %" PRId64 "-%" PRId64 ", order %d", pQInfo, pQuery->window.skey,
6592
           pQuery->window.ekey, pQuery->order.order);
6593
    setQueryStatus(pQuery, QUERY_COMPLETED);
B
Bomin Zhang 已提交
6594
    pQInfo->tableqinfoGroupInfo.numOfTables = 0;
6595 6596
    return TSDB_CODE_SUCCESS;
  }
6597

6598
  if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
6599
    qDebug("QInfo:%p no table qualified for tag filter, abort query", pQInfo);
6600 6601 6602
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return TSDB_CODE_SUCCESS;
  }
H
hjxilinx 已提交
6603 6604

  // filter the qualified
6605
  if ((code = doInitQInfo(pQInfo, pTSBuf, tsdb, vgId, isSTable)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6606 6607
    goto _error;
  }
6608

H
hjxilinx 已提交
6609 6610 6611 6612
  return code;

_error:
  // table query ref will be decrease during error handling
6613
  freeQInfo(pQInfo);
H
hjxilinx 已提交
6614 6615 6616
  return code;
}

B
Bomin Zhang 已提交
6617
static void freeColumnFilterInfo(SColumnFilterInfo* pFilter, int32_t numOfFilters) {
H
Haojun Liao 已提交
6618
    if (pFilter == NULL || numOfFilters == 0) {
B
Bomin Zhang 已提交
6619 6620
      return;
    }
H
Haojun Liao 已提交
6621

B
Bomin Zhang 已提交
6622 6623 6624 6625 6626
    for (int32_t i = 0; i < numOfFilters; i++) {
      if (pFilter[i].filterstr) {
        free((void*)(pFilter[i].pz));
      }
    }
H
Haojun Liao 已提交
6627

B
Bomin Zhang 已提交
6628 6629 6630
    free(pFilter);
}

H
Haojun Liao 已提交
6631 6632
static void doDestroyTableQueryInfo(STableGroupInfo* pTableqinfoGroupInfo) {
  if (pTableqinfoGroupInfo->pGroupList != NULL) {
H
Haojun Liao 已提交
6633
    int32_t numOfGroups = (int32_t) taosArrayGetSize(pTableqinfoGroupInfo->pGroupList);
H
Haojun Liao 已提交
6634 6635 6636 6637 6638 6639 6640 6641 6642 6643 6644 6645 6646 6647 6648 6649 6650 6651 6652 6653 6654
    for (int32_t i = 0; i < numOfGroups; ++i) {
      SArray *p = taosArrayGetP(pTableqinfoGroupInfo->pGroupList, i);

      size_t num = taosArrayGetSize(p);
      for(int32_t j = 0; j < num; ++j) {
        STableQueryInfo* item = taosArrayGetP(p, j);
        destroyTableQueryInfoImpl(item);
      }

      taosArrayDestroy(p);
    }
  }

  taosArrayDestroy(pTableqinfoGroupInfo->pGroupList);
  taosHashCleanup(pTableqinfoGroupInfo->map);

  pTableqinfoGroupInfo->pGroupList = NULL;
  pTableqinfoGroupInfo->map = NULL;
  pTableqinfoGroupInfo->numOfTables = 0;
}

H
Haojun Liao 已提交
6655 6656 6657 6658 6659 6660 6661 6662 6663 6664 6665 6666 6667 6668 6669 6670
static void* destroyQueryFuncExpr(SExprInfo* pExprInfo, int32_t numOfExpr) {
  if (pExprInfo == NULL) {
    assert(numOfExpr == 0);
    return NULL;
  }

  for (int32_t i = 0; i < numOfExpr; ++i) {
    if (pExprInfo[i].pExpr != NULL) {
      tExprNodeDestroy(pExprInfo[i].pExpr, NULL);
    }
  }

  tfree(pExprInfo);
  return NULL;
}

H
hjxilinx 已提交
6671 6672 6673 6674
static void freeQInfo(SQInfo *pQInfo) {
  if (!isValidQInfo(pQInfo)) {
    return;
  }
6675

6676
  qDebug("QInfo:%p start to free QInfo", pQInfo);
6677

H
Haojun Liao 已提交
6678
  releaseQueryBuf(pQInfo->tableqinfoGroupInfo.numOfTables);
H
Haojun Liao 已提交
6679

6680
  teardownQueryRuntimeEnv(&pQInfo->runtimeEnv);
6681

H
Haojun Liao 已提交
6682 6683 6684 6685
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  if (pQuery != NULL) {
    if (pQuery->sdata != NULL) {
      for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
S
TD-1848  
Shengliang Guan 已提交
6686
        tfree(pQuery->sdata[col]);
H
Haojun Liao 已提交
6687
      }
S
TD-1848  
Shengliang Guan 已提交
6688
      tfree(pQuery->sdata);
H
hjxilinx 已提交
6689
    }
6690

H
Haojun Liao 已提交
6691
    if (pQuery->fillVal != NULL) {
S
TD-1848  
Shengliang Guan 已提交
6692
      tfree(pQuery->fillVal);
H
Haojun Liao 已提交
6693
    }
6694

H
Haojun Liao 已提交
6695 6696 6697
    for (int32_t i = 0; i < pQuery->numOfFilterCols; ++i) {
      SSingleColumnFilterInfo *pColFilter = &pQuery->pFilterInfo[i];
      if (pColFilter->numOfFilters > 0) {
S
TD-1848  
Shengliang Guan 已提交
6698
        tfree(pColFilter->pFilters);
H
hjxilinx 已提交
6699
      }
H
hjxilinx 已提交
6700
    }
6701

H
Haojun Liao 已提交
6702 6703
    pQuery->pExpr1 = destroyQueryFuncExpr(pQuery->pExpr1, pQuery->numOfOutput);
    pQuery->pExpr2 = destroyQueryFuncExpr(pQuery->pExpr2, pQuery->numOfExpr2);
6704

S
TD-1848  
Shengliang Guan 已提交
6705 6706
    tfree(pQuery->tagColList);
    tfree(pQuery->pFilterInfo);
H
Haojun Liao 已提交
6707 6708 6709 6710 6711 6712

    if (pQuery->colList != NULL) {
      for (int32_t i = 0; i < pQuery->numOfCols; i++) {
        SColumnInfo *column = pQuery->colList + i;
        freeColumnFilterInfo(column->filters, column->numOfFilters);
      }
S
TD-1848  
Shengliang Guan 已提交
6713
      tfree(pQuery->colList);
H
Haojun Liao 已提交
6714 6715
    }

H
Haojun Liao 已提交
6716 6717 6718 6719 6720
    if (pQuery->pGroupbyExpr != NULL) {
      taosArrayDestroy(pQuery->pGroupbyExpr->columnInfo);
      tfree(pQuery->pGroupbyExpr);
    }

S
TD-1848  
Shengliang Guan 已提交
6721
    tfree(pQuery);
H
hjxilinx 已提交
6722
  }
6723

H
Haojun Liao 已提交
6724
  doDestroyTableQueryInfo(&pQInfo->tableqinfoGroupInfo);
H
Haojun Liao 已提交
6725

S
TD-1848  
Shengliang Guan 已提交
6726
  tfree(pQInfo->pBuf);
H
Haojun Liao 已提交
6727
  tsdbDestroyTableGroup(&pQInfo->tableGroupInfo);
weixin_48148422's avatar
weixin_48148422 已提交
6728
  taosArrayDestroy(pQInfo->arrTableIdInfo);
6729

6730
  pQInfo->signature = 0;
6731

6732
  qDebug("QInfo:%p QInfo is freed", pQInfo);
6733

S
TD-1848  
Shengliang Guan 已提交
6734
  tfree(pQInfo);
H
hjxilinx 已提交
6735 6736
}

H
hjxilinx 已提交
6737
static size_t getResultSize(SQInfo *pQInfo, int64_t *numOfRows) {
6738 6739
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
hjxilinx 已提交
6740 6741 6742 6743 6744 6745 6746 6747 6748 6749 6750
  /*
   * get the file size and set the numOfRows to be the file size, since for tsComp query,
   * the returned row size is equalled to 1
   * TODO handle the case that the file is too large to send back one time
   */
  if (isTSCompQuery(pQuery) && (*numOfRows) > 0) {
    struct stat fstat;
    if (stat(pQuery->sdata[0]->data, &fstat) == 0) {
      *numOfRows = fstat.st_size;
      return fstat.st_size;
    } else {
S
slguan 已提交
6751
      qError("QInfo:%p failed to get file info, path:%s, reason:%s", pQInfo, pQuery->sdata[0]->data, strerror(errno));
H
hjxilinx 已提交
6752 6753 6754
      return 0;
    }
  } else {
6755
    return (size_t)(pQuery->rowSize * (*numOfRows));
6756
  }
H
hjxilinx 已提交
6757
}
6758

H
hjxilinx 已提交
6759 6760 6761
static int32_t doDumpQueryResult(SQInfo *pQInfo, char *data) {
  // the remained number of retrieved rows, not the interpolated result
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
6762

H
hjxilinx 已提交
6763 6764 6765
  // load data from file to msg buffer
  if (isTSCompQuery(pQuery)) {
    int32_t fd = open(pQuery->sdata[0]->data, O_RDONLY, 0666);
6766

H
hjxilinx 已提交
6767 6768
    // make sure file exist
    if (FD_VALID(fd)) {
6769 6770 6771
      uint64_t s = lseek(fd, 0, SEEK_END);

      qDebug("QInfo:%p ts comp data return, file:%s, size:%"PRId64, pQInfo, pQuery->sdata[0]->data, s);
H
Haojun Liao 已提交
6772
      if (lseek(fd, 0, SEEK_SET) >= 0) {
6773
        size_t sz = read(fd, data, (uint32_t)s);
6774 6775 6776
        if(sz < s) {  // todo handle error
          assert(0);
        }
H
Haojun Liao 已提交
6777
      } else {
6778
        UNUSED(s);
dengyihao's avatar
dengyihao 已提交
6779
      }
H
Haojun Liao 已提交
6780

H
hjxilinx 已提交
6781 6782 6783
      close(fd);
      unlink(pQuery->sdata[0]->data);
    } else {
dengyihao's avatar
dengyihao 已提交
6784
      // todo return the error code to client and handle invalid fd
S
slguan 已提交
6785
      qError("QInfo:%p failed to open tmp file to send ts-comp data to client, path:%s, reason:%s", pQInfo,
H
hjxilinx 已提交
6786
             pQuery->sdata[0]->data, strerror(errno));
dengyihao's avatar
dengyihao 已提交
6787
      if (fd != -1) {
6788
        close(fd);
dengyihao's avatar
dengyihao 已提交
6789
      }
H
hjxilinx 已提交
6790
    }
6791

H
hjxilinx 已提交
6792 6793 6794 6795
    // all data returned, set query over
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
      setQueryStatus(pQuery, QUERY_OVER);
    }
H
hjxilinx 已提交
6796
  } else {
S
TD-1057  
Shengliang Guan 已提交
6797
    doCopyQueryResultToMsg(pQInfo, (int32_t)pQuery->rec.rows, data);
6798
  }
6799

6800
  pQuery->rec.total += pQuery->rec.rows;
6801
  qDebug("QInfo:%p current numOfRes rows:%" PRId64 ", total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total);
6802

6803
  if (pQuery->limit.limit > 0 && pQuery->limit.limit == pQuery->rec.total) {
6804
    qDebug("QInfo:%p results limitation reached, limitation:%"PRId64, pQInfo, pQuery->limit.limit);
6805 6806
    setQueryStatus(pQuery, QUERY_OVER);
  }
6807

H
hjxilinx 已提交
6808
  return TSDB_CODE_SUCCESS;
6809 6810
}

6811 6812 6813 6814 6815 6816 6817
typedef struct SQueryMgmt {
  SCacheObj      *qinfoPool;      // query handle pool
  int32_t         vgId;
  bool            closed;
  pthread_mutex_t lock;
} SQueryMgmt;

6818
int32_t qCreateQueryInfo(void* tsdb, int32_t vgId, SQueryTableMsg* pQueryMsg, qinfo_t* pQInfo) {
6819
  assert(pQueryMsg != NULL && tsdb != NULL);
6820 6821

  int32_t code = TSDB_CODE_SUCCESS;
6822

H
Haojun Liao 已提交
6823 6824
  char            *tagCond      = NULL;
  char            *tbnameCond   = NULL;
6825
  SArray          *pTableIdList = NULL;
H
Haojun Liao 已提交
6826 6827 6828 6829 6830
  SSqlFuncMsg    **pExprMsg     = NULL;
  SSqlFuncMsg    **pSecExprMsg  = NULL;
  SExprInfo       *pExprs       = NULL;
  SExprInfo       *pSecExprs    = NULL;

6831 6832 6833
  SColIndex       *pGroupColIndex = NULL;
  SColumnInfo     *pTagColumnInfo = NULL;
  SSqlGroupbyExpr *pGroupbyExpr   = NULL;
6834

H
Haojun Liao 已提交
6835
  code = convertQueryMsg(pQueryMsg, &pTableIdList, &pExprMsg, &pSecExprMsg, &tagCond, &tbnameCond, &pGroupColIndex, &pTagColumnInfo);
6836
  if (code != TSDB_CODE_SUCCESS) {
B
Bomin Zhang 已提交
6837
    goto _over;
6838 6839
  }

H
hjxilinx 已提交
6840
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
6841
    qError("Invalid number of tables to query, numOfTables:%d", pQueryMsg->numOfTables);
6842
    code = TSDB_CODE_QRY_INVALID_MSG;
H
hjxilinx 已提交
6843
    goto _over;
6844 6845
  }

H
hjxilinx 已提交
6846
  if (pTableIdList == NULL || taosArrayGetSize(pTableIdList) == 0) {
S
slguan 已提交
6847
    qError("qmsg:%p, SQueryTableMsg wrong format", pQueryMsg);
6848
    code = TSDB_CODE_QRY_INVALID_MSG;
H
hjxilinx 已提交
6849
    goto _over;
6850 6851
  }

H
Haojun Liao 已提交
6852
  if ((code = createQueryFuncExprFromMsg(pQueryMsg, pQueryMsg->numOfOutput, &pExprs, pExprMsg, pTagColumnInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6853
    goto _over;
6854 6855
  }

H
Haojun Liao 已提交
6856
  if (pSecExprMsg != NULL) {
H
Haojun Liao 已提交
6857
    if ((code = createQueryFuncExprFromMsg(pQueryMsg, pQueryMsg->secondStageOutput, &pSecExprs, pSecExprMsg, pTagColumnInfo)) != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
6858 6859 6860 6861
      goto _over;
    }
  }

dengyihao's avatar
dengyihao 已提交
6862
  pGroupbyExpr = createGroupbyExprFromMsg(pQueryMsg, pGroupColIndex, &code);
H
hjxilinx 已提交
6863
  if ((pGroupbyExpr == NULL && pQueryMsg->numOfGroupCols != 0) || code != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6864
    goto _over;
6865
  }
6866

H
hjxilinx 已提交
6867
  bool isSTableQuery = false;
6868
  STableGroupInfo tableGroupInfo = {0};
6869 6870
  int64_t st = taosGetTimestampUs();

H
Haojun Liao 已提交
6871
  if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_TABLE_QUERY)) {
weixin_48148422's avatar
weixin_48148422 已提交
6872
    STableIdInfo *id = taosArrayGet(pTableIdList, 0);
H
Haojun Liao 已提交
6873

6874
    qDebug("qmsg:%p query normal table, uid:%"PRId64", tid:%d", pQueryMsg, id->uid, id->tid);
H
Haojun Liao 已提交
6875
    if ((code = tsdbGetOneTableGroup(tsdb, id->uid, pQueryMsg->window.skey, &tableGroupInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6876
      goto _over;
6877
    }
H
Haojun Liao 已提交
6878
  } else if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY|TSDB_QUERY_TYPE_STABLE_QUERY)) {
6879
    isSTableQuery = true;
H
Haojun Liao 已提交
6880 6881 6882

    // also note there's possibility that only one table in the super table
    if (!TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY)) {
weixin_48148422's avatar
weixin_48148422 已提交
6883 6884 6885 6886 6887 6888 6889
      STableIdInfo *id = taosArrayGet(pTableIdList, 0);

      // group by normal column, do not pass the group by condition to tsdb to group table into different group
      int32_t numOfGroupByCols = pQueryMsg->numOfGroupCols;
      if (pQueryMsg->numOfGroupCols == 1 && !TSDB_COL_IS_TAG(pGroupColIndex->flag)) {
        numOfGroupByCols = 0;
      }
6890 6891

      qDebug("qmsg:%p query stable, uid:%"PRId64", tid:%d", pQueryMsg, id->uid, id->tid);
H
Haojun Liao 已提交
6892 6893 6894
      code = tsdbQuerySTableByTagCond(tsdb, id->uid, pQueryMsg->window.skey, tagCond, pQueryMsg->tagCondLen,
          pQueryMsg->tagNameRelType, tbnameCond, &tableGroupInfo, pGroupColIndex, numOfGroupByCols);

6895
      if (code != TSDB_CODE_SUCCESS) {
6896
        qError("qmsg:%p failed to query stable, reason: %s", pQueryMsg, tstrerror(code));
6897 6898
        goto _over;
      }
weixin_48148422's avatar
weixin_48148422 已提交
6899
    } else {
6900 6901 6902 6903
      code = tsdbGetTableGroupFromIdList(tsdb, pTableIdList, &tableGroupInfo);
      if (code != TSDB_CODE_SUCCESS) {
        goto _over;
      }
H
Haojun Liao 已提交
6904

S
TD-1057  
Shengliang Guan 已提交
6905
      qDebug("qmsg:%p query on %" PRIzu " tables in one group from client", pQueryMsg, tableGroupInfo.numOfTables);
6906
    }
6907 6908

    int64_t el = taosGetTimestampUs() - st;
S
TD-1057  
Shengliang Guan 已提交
6909
    qDebug("qmsg:%p tag filter completed, numOfTables:%" PRIzu ", elapsed time:%"PRId64"us", pQueryMsg, tableGroupInfo.numOfTables, el);
H
hjxilinx 已提交
6910
  } else {
6911
    assert(0);
6912
  }
6913

H
Haojun Liao 已提交
6914 6915 6916 6917 6918
  code = checkForQueryBuf(tableGroupInfo.numOfTables);
  if (code != TSDB_CODE_SUCCESS) {  // not enough query buffer, abort
    goto _over;
  }

H
Haojun Liao 已提交
6919
  (*pQInfo) = createQInfoImpl(pQueryMsg, pGroupbyExpr, pExprs, pSecExprs, &tableGroupInfo, pTagColumnInfo, isSTableQuery);
H
Haojun Liao 已提交
6920

dengyihao's avatar
dengyihao 已提交
6921
  pExprs = NULL;
H
Haojun Liao 已提交
6922
  pSecExprs = NULL;
dengyihao's avatar
dengyihao 已提交
6923 6924
  pGroupbyExpr = NULL;
  pTagColumnInfo = NULL;
6925

6926
  if ((*pQInfo) == NULL) {
6927
    code = TSDB_CODE_QRY_OUT_OF_MEMORY;
H
hjxilinx 已提交
6928
    goto _over;
6929
  }
6930

6931
  code = initQInfo(pQueryMsg, tsdb, vgId, *pQInfo, isSTableQuery);
6932

H
hjxilinx 已提交
6933
_over:
dengyihao's avatar
dengyihao 已提交
6934 6935 6936
  free(tagCond);
  free(tbnameCond);
  free(pGroupColIndex);
H
Haojun Liao 已提交
6937

dengyihao's avatar
dengyihao 已提交
6938 6939
  if (pGroupbyExpr != NULL) {
    taosArrayDestroy(pGroupbyExpr->columnInfo);
dengyihao's avatar
dengyihao 已提交
6940
    free(pGroupbyExpr);
6941
  }
H
Haojun Liao 已提交
6942

dengyihao's avatar
dengyihao 已提交
6943 6944
  free(pTagColumnInfo);
  free(pExprs);
H
Haojun Liao 已提交
6945 6946
  free(pSecExprs);

dengyihao's avatar
dengyihao 已提交
6947
  free(pExprMsg);
H
Haojun Liao 已提交
6948 6949
  free(pSecExprMsg);

H
hjxilinx 已提交
6950
  taosArrayDestroy(pTableIdList);
6951

B
Bomin Zhang 已提交
6952 6953 6954 6955 6956
  for (int32_t i = 0; i < pQueryMsg->numOfCols; i++) {
    SColumnInfo* column = pQueryMsg->colList + i;
    freeColumnFilterInfo(column->filters, column->numOfFilters);
  }

H
Haojun Liao 已提交
6957
  //pQInfo already freed in initQInfo, but *pQInfo may not pointer to null;
6958 6959 6960 6961
  if (code != TSDB_CODE_SUCCESS) {
    *pQInfo = NULL;
  }

6962
  // if failed to add ref for all tables in this query, abort current query
6963
  return code;
H
hjxilinx 已提交
6964 6965
}

H
Haojun Liao 已提交
6966
void qDestroyQueryInfo(qinfo_t qHandle) {
H
Haojun Liao 已提交
6967 6968 6969 6970 6971
  SQInfo* pQInfo = (SQInfo*) qHandle;
  if (!isValidQInfo(pQInfo)) {
    return;
  }

H
Haojun Liao 已提交
6972 6973 6974
  qDebug("QInfo:%p query completed", pQInfo);
  queryCostStatis(pQInfo);   // print the query cost summary
  freeQInfo(pQInfo);
H
Haojun Liao 已提交
6975 6976
}

6977 6978 6979 6980 6981 6982 6983 6984
static bool doBuildResCheck(SQInfo* pQInfo) {
  bool buildRes = false;

  pthread_mutex_lock(&pQInfo->lock);

  pQInfo->dataReady = QUERY_RESULT_READY;
  buildRes = (pQInfo->rspContext != NULL);

H
Haojun Liao 已提交
6985 6986
  // clear qhandle owner, it must be in the secure area. other thread may run ahead before current, after it is
  // put into task to be executed.
6987 6988
  assert(pQInfo->owner == taosGetPthreadId());
  pQInfo->owner = 0;
H
Haojun Liao 已提交
6989

H
Haojun Liao 已提交
6990
  pthread_mutex_unlock(&pQInfo->lock);
H
Haojun Liao 已提交
6991 6992

  tsem_post(&pQInfo->ready);
6993 6994 6995
  return buildRes;
}

6996
bool qTableQuery(qinfo_t qinfo) {
6997
  SQInfo *pQInfo = (SQInfo *)qinfo;
H
Haojun Liao 已提交
6998
  assert(pQInfo && pQInfo->signature == pQInfo);
6999
  int64_t threadId = taosGetPthreadId();
7000

7001 7002 7003 7004
  int64_t curOwner = 0;
  if ((curOwner = atomic_val_compare_exchange_64(&pQInfo->owner, 0, threadId)) != 0) {
    qError("QInfo:%p qhandle is now executed by thread:%p", pQInfo, (void*) curOwner);
    pQInfo->code = TSDB_CODE_QRY_IN_EXEC;
7005
    return false;
H
hjxilinx 已提交
7006
  }
7007

H
Haojun Liao 已提交
7008
  if (IS_QUERY_KILLED(pQInfo)) {
7009
    qDebug("QInfo:%p it is already killed, abort", pQInfo);
7010
    return doBuildResCheck(pQInfo);
H
hjxilinx 已提交
7011
  }
7012

7013 7014
  if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
    qDebug("QInfo:%p no table exists for query, abort", pQInfo);
7015 7016
    setQueryStatus(pQInfo->runtimeEnv.pQuery, QUERY_COMPLETED);
    return doBuildResCheck(pQInfo);
7017 7018 7019
  }

  // error occurs, record the error code and return to client
H
Haojun Liao 已提交
7020
  int32_t ret = setjmp(pQInfo->runtimeEnv.env);
7021 7022
  if (ret != TSDB_CODE_SUCCESS) {
    pQInfo->code = ret;
H
Haojun Liao 已提交
7023
    qDebug("QInfo:%p query abort due to error/cancel occurs, code:%s", pQInfo, tstrerror(pQInfo->code));
7024
    return doBuildResCheck(pQInfo);
7025 7026
  }

7027
  qDebug("QInfo:%p query task is launched", pQInfo);
7028

7029
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
7030
  if (onlyQueryTags(pQInfo->runtimeEnv.pQuery)) {
H
Haojun Liao 已提交
7031
    assert(pQInfo->runtimeEnv.pQueryHandle == NULL);
7032
    buildTagQueryResult(pQInfo);
H
hjxilinx 已提交
7033
  } else if (pQInfo->runtimeEnv.stableQuery) {
7034
    stableQueryImpl(pQInfo);
H
hjxilinx 已提交
7035
  } else {
7036
    tableQueryImpl(pQInfo);
H
hjxilinx 已提交
7037
  }
7038

7039
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
7040
  if (IS_QUERY_KILLED(pQInfo)) {
7041 7042
    qDebug("QInfo:%p query is killed", pQInfo);
  } else if (pQuery->rec.rows == 0) {
S
TD-1057  
Shengliang Guan 已提交
7043
    qDebug("QInfo:%p over, %" PRIzu " tables queried, %"PRId64" rows are returned", pQInfo, pQInfo->tableqinfoGroupInfo.numOfTables, pQuery->rec.total);
7044 7045 7046 7047 7048
  } else {
    qDebug("QInfo:%p query paused, %" PRId64 " rows returned, numOfTotal:%" PRId64 " rows",
           pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
  }

7049
  return doBuildResCheck(pQInfo);
H
hjxilinx 已提交
7050 7051
}

7052
int32_t qRetrieveQueryResultInfo(qinfo_t qinfo, bool* buildRes, void* pRspContext) {
7053 7054
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
7055
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
H
Haojun Liao 已提交
7056
    qError("QInfo:%p invalid qhandle", pQInfo);
7057
    return TSDB_CODE_QRY_INVALID_QHANDLE;
H
hjxilinx 已提交
7058
  }
7059

7060
  *buildRes = false;
H
Haojun Liao 已提交
7061
  if (IS_QUERY_KILLED(pQInfo)) {
B
Bomin Zhang 已提交
7062
    qDebug("QInfo:%p query is killed, code:0x%08x", pQInfo, pQInfo->code);
H
hjxilinx 已提交
7063
    return pQInfo->code;
H
hjxilinx 已提交
7064
  }
7065

7066
  int32_t code = TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
7067

H
Haojun Liao 已提交
7068
#if _NON_BLOCKING_RETRIEVE
H
Haojun Liao 已提交
7069 7070
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

7071
  pthread_mutex_lock(&pQInfo->lock);
H
Haojun Liao 已提交
7072 7073
  assert(pQInfo->rspContext == NULL);

7074 7075 7076 7077 7078
  if (pQInfo->dataReady == QUERY_RESULT_READY) {
    *buildRes = true;
    qDebug("QInfo:%p retrieve result info, rowsize:%d, rows:%"PRId64", code:%d", pQInfo, pQuery->rowSize, pQuery->rec.rows,
           pQInfo->code);
  } else {
H
Haojun Liao 已提交
7079
    *buildRes = false;
7080
    qDebug("QInfo:%p retrieve req set query return result after paused", pQInfo);
7081
    pQInfo->rspContext = pRspContext;
H
Haojun Liao 已提交
7082
    assert(pQInfo->rspContext != NULL);
7083
  }
7084

7085
  code = pQInfo->code;
7086
  pthread_mutex_unlock(&pQInfo->lock);
H
Haojun Liao 已提交
7087 7088 7089 7090 7091 7092
#else
  tsem_wait(&pQInfo->ready);
  *buildRes = true;
  code = pQInfo->code;
#endif

7093
  return code;
H
hjxilinx 已提交
7094
}
7095

7096
int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *contLen, bool* continueExec) {
7097 7098
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
7099
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
7100
    return TSDB_CODE_QRY_INVALID_QHANDLE;
7101
  }
7102

7103
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
7104 7105
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  size_t  size = getResultSize(pQInfo, &pQuery->rec.rows);
7106

weixin_48148422's avatar
weixin_48148422 已提交
7107 7108
  size += sizeof(int32_t);
  size += sizeof(STableIdInfo) * taosArrayGetSize(pQInfo->arrTableIdInfo);
7109

S
TD-1057  
Shengliang Guan 已提交
7110
  *contLen = (int32_t)(size + sizeof(SRetrieveTableRsp));
7111

B
Bomin Zhang 已提交
7112 7113
  // todo proper handle failed to allocate memory,
  // current solution only avoid crash, but cannot return error code to client
7114
  *pRsp = (SRetrieveTableRsp *)rpcMallocCont(*contLen);
B
Bomin Zhang 已提交
7115 7116 7117
  if (*pRsp == NULL) {
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
  }
7118

S
TD-1057  
Shengliang Guan 已提交
7119
  (*pRsp)->numOfRows = htonl((int32_t)pQuery->rec.rows);
7120

H
Haojun Liao 已提交
7121
  if (pQInfo->code == TSDB_CODE_SUCCESS) {
7122
    (*pRsp)->offset   = htobe64(pQuery->limit.offset);
7123
    (*pRsp)->useconds = htobe64(pRuntimeEnv->summary.elapsedTime);
7124
  } else {
7125 7126
    (*pRsp)->offset   = 0;
    (*pRsp)->useconds = htobe64(pRuntimeEnv->summary.elapsedTime);
7127
  }
7128

7129
  (*pRsp)->precision = htons(pQuery->precision);
H
Haojun Liao 已提交
7130 7131
  if (pQuery->rec.rows > 0 && pQInfo->code == TSDB_CODE_SUCCESS) {
    doDumpQueryResult(pQInfo, (*pRsp)->data);
7132
  } else {
H
hjxilinx 已提交
7133
    setQueryStatus(pQuery, QUERY_OVER);
7134
  }
7135

7136
  pQInfo->rspContext = NULL;
7137
  pQInfo->dataReady  = QUERY_RESULT_NOT_READY;
7138

H
Haojun Liao 已提交
7139
  if (IS_QUERY_KILLED(pQInfo) || Q_STATUS_EQUAL(pQuery->status, QUERY_OVER)) {
H
Haojun Liao 已提交
7140
    // here current thread hold the refcount, so it is safe to free tsdbQueryHandle.
7141
    *continueExec = false;
7142
    (*pRsp)->completed = 1;  // notify no more result to client
7143
  } else {
7144
    *continueExec = true;
7145
    qDebug("QInfo:%p has more results waits for client retrieve", pQInfo);
7146 7147
  }

H
Haojun Liao 已提交
7148
  return pQInfo->code;
7149
}
H
hjxilinx 已提交
7150

7151 7152 7153 7154 7155 7156 7157 7158 7159 7160 7161
int32_t qQueryCompleted(qinfo_t qinfo) {
  SQInfo *pQInfo = (SQInfo *)qinfo;

  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
    return TSDB_CODE_QRY_INVALID_QHANDLE;
  }

  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
  return IS_QUERY_KILLED(pQInfo) || Q_STATUS_EQUAL(pQuery->status, QUERY_OVER);
}

H
Haojun Liao 已提交
7162
int32_t qKillQuery(qinfo_t qinfo) {
H
Haojun Liao 已提交
7163 7164 7165 7166 7167 7168 7169
  SQInfo *pQInfo = (SQInfo *)qinfo;

  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
    return TSDB_CODE_QRY_INVALID_QHANDLE;
  }

  setQueryKilled(pQInfo);
7170 7171 7172

  // Wait for the query executing thread being stopped/
  // Once the query is stopped, the owner of qHandle will be cleared immediately.
7173
  while (pQInfo->owner != 0) {
7174 7175 7176
    taosMsleep(100);
  }

H
Haojun Liao 已提交
7177 7178 7179
  return TSDB_CODE_SUCCESS;
}

7180 7181 7182 7183 7184 7185 7186 7187 7188 7189 7190 7191 7192 7193 7194 7195
static void doSetTagValueToResultBuf(char* output, const char* val, int16_t type, int16_t bytes) {
  if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
    if (val == NULL) {
      setVardataNull(output, type);
    } else {
      memcpy(output, val, varDataTLen(val));
    }
  } else {
    if (val == NULL) {
      setNull(output, type, bytes);
    } else {  // todo here stop will cause client crash
      memcpy(output, val, bytes);
    }
  }
}

H
hjxilinx 已提交
7196 7197 7198
static void buildTagQueryResult(SQInfo* pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
7199

H
Haojun Liao 已提交
7200
  size_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
H
Haojun Liao 已提交
7201
  assert(numOfGroup == 0 || numOfGroup == 1);
7202

H
Haojun Liao 已提交
7203
  if (numOfGroup == 0) {
7204 7205
    return;
  }
7206

H
Haojun Liao 已提交
7207
  SArray* pa = GET_TABLEGROUP(pQInfo, 0);
7208

H
Haojun Liao 已提交
7209
  size_t num = taosArrayGetSize(pa);
7210
  assert(num == pQInfo->tableqinfoGroupInfo.numOfTables);
7211

H
Haojun Liao 已提交
7212
  int32_t count = 0;
H
Haojun Liao 已提交
7213
  int32_t functionId = pQuery->pExpr1[0].base.functionId;
7214 7215
  if (functionId == TSDB_FUNC_TID_TAG) { // return the tags & table Id
    assert(pQuery->numOfOutput == 1);
7216

H
Haojun Liao 已提交
7217
    SExprInfo* pExprInfo = &pQuery->pExpr1[0];
7218
    int32_t rsize = pExprInfo->bytes;
H
Haojun Liao 已提交
7219
    count = 0;
7220

H
Haojun Liao 已提交
7221 7222 7223 7224 7225 7226 7227 7228 7229 7230 7231
    int16_t bytes = pExprInfo->bytes;
    int16_t type = pExprInfo->type;

    for(int32_t i = 0; i < pQuery->numOfTags; ++i) {
      if (pQuery->tagColList[i].colId == pExprInfo->base.colInfo.colId) {
        bytes = pQuery->tagColList[i].bytes;
        type = pQuery->tagColList[i].type;
        break;
      }
    }

H
Haojun Liao 已提交
7232 7233
    while(pQInfo->tableIndex < num && count < pQuery->rec.capacity) {
      int32_t i = pQInfo->tableIndex++;
7234
      STableQueryInfo *item = taosArrayGetP(pa, i);
7235

7236
      char *output = pQuery->sdata[0]->data + count * rsize;
7237
      varDataSetLen(output, rsize - VARSTR_HEADER_SIZE);
7238

7239
      output = varDataVal(output);
H
Haojun Liao 已提交
7240
      STableId* id = TSDB_TABLEID(item->pTable);
7241

7242 7243 7244
      *(int16_t *)output = 0;
      output += sizeof(int16_t);

H
Haojun Liao 已提交
7245 7246
      *(int64_t *)output = id->uid;  // memory align problem, todo serialize
      output += sizeof(id->uid);
7247

H
Haojun Liao 已提交
7248 7249
      *(int32_t *)output = id->tid;
      output += sizeof(id->tid);
7250

7251
      *(int32_t *)output = pQInfo->vgId;
7252
      output += sizeof(pQInfo->vgId);
7253

7254
      if (pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
7255
        char* data = tsdbGetTableName(item->pTable);
7256
        memcpy(output, data, varDataTLen(data));
H
[td-90]  
Haojun Liao 已提交
7257
      } else {
7258 7259
        char* data = tsdbGetTableTagVal(item->pTable, pExprInfo->base.colInfo.colId, type, bytes);
        doSetTagValueToResultBuf(output, data, type, bytes);
H
[td-90]  
Haojun Liao 已提交
7260
      }
7261

H
Haojun Liao 已提交
7262
      count += 1;
7263
    }
7264

7265
    qDebug("QInfo:%p create (tableId, tag) info completed, rows:%d", pQInfo, count);
7266

H
Haojun Liao 已提交
7267 7268 7269 7270
  } else if (functionId == TSDB_FUNC_COUNT) {// handle the "count(tbname)" query
    *(int64_t*) pQuery->sdata[0]->data = num;

    count = 1;
H
Haojun Liao 已提交
7271
    SET_STABLE_QUERY_OVER(pQInfo);
7272
    qDebug("QInfo:%p create count(tbname) query, res:%d rows:1", pQInfo, count);
7273
  } else {  // return only the tags|table name etc.
H
Haojun Liao 已提交
7274
    count = 0;
H
Haojun Liao 已提交
7275
    SSchema tbnameSchema = tGetTableNameColumnSchema();
7276

S
TD-1057  
Shengliang Guan 已提交
7277
    int32_t maxNumOfTables = (int32_t)pQuery->rec.capacity;
7278
    if (pQuery->limit.limit >= 0 && pQuery->limit.limit < pQuery->rec.capacity) {
S
TD-1057  
Shengliang Guan 已提交
7279
      maxNumOfTables = (int32_t)pQuery->limit.limit;
7280 7281
    }

7282
    while(pQInfo->tableIndex < num && count < maxNumOfTables) {
H
Haojun Liao 已提交
7283
      int32_t i = pQInfo->tableIndex++;
7284

7285 7286 7287 7288 7289 7290
      // discard current result due to offset
      if (pQuery->limit.offset > 0) {
        pQuery->limit.offset -= 1;
        continue;
      }

H
Haojun Liao 已提交
7291
      SExprInfo* pExprInfo = pQuery->pExpr1;
7292
      STableQueryInfo* item = taosArrayGetP(pa, i);
7293

7294 7295
      char *data = NULL, *dst = NULL;
      int16_t type = 0, bytes = 0;
7296
      for(int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
7297
        // not assign value in case of user defined constant output column
Y
TD-1230  
yihaoDeng 已提交
7298
        if (TSDB_COL_IS_UD_COL(pExprInfo[j].base.colInfo.flag)) {
H
Haojun Liao 已提交
7299 7300
          continue;
        }
7301

7302
        if (pExprInfo[j].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
7303 7304 7305 7306 7307 7308 7309 7310
          bytes = tbnameSchema.bytes;
          type = tbnameSchema.type;

          data = tsdbGetTableName(item->pTable);
          dst = pQuery->sdata[j]->data + count * tbnameSchema.bytes;
        } else {
          type = pExprInfo[j].type;
          bytes = pExprInfo[j].bytes;
7311

7312 7313
          data = tsdbGetTableTagVal(item->pTable, pExprInfo[j].base.colInfo.colId, type, bytes);
          dst = pQuery->sdata[j]->data + count * pExprInfo[j].bytes;
7314

7315
        }
7316 7317

        doSetTagValueToResultBuf(dst, data, type, bytes);
H
hjxilinx 已提交
7318
      }
H
Haojun Liao 已提交
7319
      count += 1;
H
hjxilinx 已提交
7320
    }
7321

7322
    qDebug("QInfo:%p create tag values results completed, rows:%d", pQInfo, count);
H
hjxilinx 已提交
7323
  }
7324

H
Haojun Liao 已提交
7325
  pQuery->rec.rows = count;
H
hjxilinx 已提交
7326
  setQueryStatus(pQuery, QUERY_COMPLETED);
H
hjxilinx 已提交
7327 7328
}

H
Haojun Liao 已提交
7329
static int64_t getQuerySupportBufSize(size_t numOfTables) {
H
Haojun Liao 已提交
7330 7331 7332 7333
  size_t s1 = sizeof(STableQueryInfo);
  size_t s2 = sizeof(SHashNode);

//  size_t s3 = sizeof(STableCheckInfo);  buffer consumption in tsdb
H
Haojun Liao 已提交
7334
  return (int64_t)((s1 + s2) * 1.5 * numOfTables);
H
Haojun Liao 已提交
7335 7336
}

H
Haojun Liao 已提交
7337
int32_t checkForQueryBuf(size_t numOfTables) {
H
Haojun Liao 已提交
7338 7339
  int64_t t = getQuerySupportBufSize(numOfTables);
  if (tsQueryBufferSize < 0) {
H
Haojun Liao 已提交
7340
    return TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
7341 7342 7343 7344 7345 7346 7347 7348 7349 7350 7351 7352 7353 7354 7355 7356 7357 7358 7359
  } else if (tsQueryBufferSize > 0) {

    while(1) {
      int64_t s = tsQueryBufferSize;
      int64_t remain = s - t;
      if (remain >= 0) {
        if (atomic_val_compare_exchange_64(&tsQueryBufferSize, s, remain) == s) {
          return TSDB_CODE_SUCCESS;
        }
      } else {
        return TSDB_CODE_QRY_NOT_ENOUGH_BUFFER;
      }
    }
  }

  // disable query processing if the value of tsQueryBufferSize is zero.
  return TSDB_CODE_QRY_NOT_ENOUGH_BUFFER;
}

H
Haojun Liao 已提交
7360
void releaseQueryBuf(size_t numOfTables) {
H
Haojun Liao 已提交
7361 7362 7363 7364 7365 7366 7367 7368 7369 7370
  if (tsQueryBufferSize <= 0) {
    return;
  }

  int64_t t = getQuerySupportBufSize(numOfTables);

  // restore value is not enough buffer available
  atomic_add_fetch_64(&tsQueryBufferSize, t);
}

7371 7372 7373 7374 7375 7376 7377
void* qGetResultRetrieveMsg(qinfo_t qinfo) {
  SQInfo* pQInfo = (SQInfo*) qinfo;
  assert(pQInfo != NULL);

  return pQInfo->rspContext;
}

7378 7379 7380 7381 7382 7383 7384
void freeqinfoFn(void *qhandle) {
  void** handle = qhandle;
  if (handle == NULL || *handle == NULL) {
    return;
  }

  qKillQuery(*handle);
H
Haojun Liao 已提交
7385
  qDestroyQueryInfo(*handle);
7386 7387 7388
}

void* qOpenQueryMgmt(int32_t vgId) {
H
Haojun Liao 已提交
7389
  const int32_t REFRESH_HANDLE_INTERVAL = 30; // every 30 seconds, refresh handle pool
7390 7391 7392 7393

  char cacheName[128] = {0};
  sprintf(cacheName, "qhandle_%d", vgId);

7394
  SQueryMgmt* pQueryMgmt = calloc(1, sizeof(SQueryMgmt));
H
Haojun Liao 已提交
7395 7396 7397 7398
  if (pQueryMgmt == NULL) {
    terrno = TSDB_CODE_QRY_OUT_OF_MEMORY;
    return NULL;
  }
7399

S
TD-1530  
Shengliang Guan 已提交
7400
  pQueryMgmt->qinfoPool = taosCacheInit(TSDB_CACHE_PTR_KEY, REFRESH_HANDLE_INTERVAL, true, freeqinfoFn, cacheName);
7401 7402 7403 7404
  pQueryMgmt->closed    = false;
  pQueryMgmt->vgId      = vgId;

  pthread_mutex_init(&pQueryMgmt->lock, NULL);
7405 7406

  qDebug("vgId:%d, open querymgmt success", vgId);
7407
  return pQueryMgmt;
7408 7409
}

H
Haojun Liao 已提交
7410
static void queryMgmtKillQueryFn(void* handle) {
H
Hui Li 已提交
7411 7412
  void** fp = (void**)handle;
  qKillQuery(*fp);
H
Haojun Liao 已提交
7413 7414 7415
}

void qQueryMgmtNotifyClosed(void* pQMgmt) {
7416 7417 7418 7419 7420 7421 7422
  if (pQMgmt == NULL) {
    return;
  }

  SQueryMgmt* pQueryMgmt = pQMgmt;
  qDebug("vgId:%d, set querymgmt closed, wait for all queries cancelled", pQueryMgmt->vgId);

H
Haojun Liao 已提交
7423
//  pthread_mutex_lock(&pQueryMgmt->lock);
7424
  pQueryMgmt->closed = true;
H
Haojun Liao 已提交
7425
//  pthread_mutex_unlock(&pQueryMgmt->lock);
7426

H
Haojun Liao 已提交
7427
  taosCacheRefresh(pQueryMgmt->qinfoPool, queryMgmtKillQueryFn);
7428 7429 7430 7431 7432 7433 7434 7435 7436 7437 7438 7439 7440 7441 7442 7443 7444
}

void qCleanupQueryMgmt(void* pQMgmt) {
  if (pQMgmt == NULL) {
    return;
  }

  SQueryMgmt* pQueryMgmt = pQMgmt;
  int32_t vgId = pQueryMgmt->vgId;

  assert(pQueryMgmt->closed);

  SCacheObj* pqinfoPool = pQueryMgmt->qinfoPool;
  pQueryMgmt->qinfoPool = NULL;

  taosCacheCleanup(pqinfoPool);
  pthread_mutex_destroy(&pQueryMgmt->lock);
S
TD-1848  
Shengliang Guan 已提交
7445
  tfree(pQueryMgmt);
7446

S
Shengliang Guan 已提交
7447
  qDebug("vgId:%d, queryMgmt cleanup completed", vgId);
7448 7449
}

7450
void** qRegisterQInfo(void* pMgmt, uint64_t qInfo) {
7451
  if (pMgmt == NULL) {
7452
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
7453 7454 7455
    return NULL;
  }

7456
  const int32_t DEFAULT_QHANDLE_LIFE_SPAN = tsShellActivityTimer * 2 * 1000;
7457

7458 7459
  SQueryMgmt *pQueryMgmt = pMgmt;
  if (pQueryMgmt->qinfoPool == NULL) {
7460
    qError("QInfo:%p failed to add qhandle into qMgmt, since qMgmt is closed", (void *)qInfo);
7461
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
7462 7463 7464
    return NULL;
  }

H
Haojun Liao 已提交
7465
//  pthread_mutex_lock(&pQueryMgmt->lock);
7466
  if (pQueryMgmt->closed) {
H
Haojun Liao 已提交
7467
//    pthread_mutex_unlock(&pQueryMgmt->lock);
7468
    qError("QInfo:%p failed to add qhandle into cache, since qMgmt is colsing", (void *)qInfo);
7469
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
7470 7471
    return NULL;
  } else {
S
TD-1530  
Shengliang Guan 已提交
7472 7473
    TSDB_CACHE_PTR_TYPE handleVal = (TSDB_CACHE_PTR_TYPE) qInfo;
    void** handle = taosCachePut(pQueryMgmt->qinfoPool, &handleVal, sizeof(TSDB_CACHE_PTR_TYPE), &qInfo, sizeof(TSDB_CACHE_PTR_TYPE), DEFAULT_QHANDLE_LIFE_SPAN);
H
Haojun Liao 已提交
7474
//    pthread_mutex_unlock(&pQueryMgmt->lock);
7475 7476 7477 7478 7479

    return handle;
  }
}

S
TD-1530  
Shengliang Guan 已提交
7480
void** qAcquireQInfo(void* pMgmt, uint64_t _key) {
7481 7482
  SQueryMgmt *pQueryMgmt = pMgmt;

B
Bomin Zhang 已提交
7483 7484 7485 7486 7487 7488 7489
  if (pQueryMgmt->closed) {
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
    return NULL;
  }

  if (pQueryMgmt->qinfoPool == NULL) {
    terrno = TSDB_CODE_QRY_INVALID_QHANDLE;
7490 7491 7492
    return NULL;
  }

S
TD-1530  
Shengliang Guan 已提交
7493 7494
  TSDB_CACHE_PTR_TYPE key = (TSDB_CACHE_PTR_TYPE)_key;
  void** handle = taosCacheAcquireByKey(pQueryMgmt->qinfoPool, &key, sizeof(TSDB_CACHE_PTR_TYPE));
7495
  if (handle == NULL || *handle == NULL) {
B
Bomin Zhang 已提交
7496
    terrno = TSDB_CODE_QRY_INVALID_QHANDLE;
7497 7498 7499 7500 7501 7502
    return NULL;
  } else {
    return handle;
  }
}

H
Haojun Liao 已提交
7503
void** qReleaseQInfo(void* pMgmt, void* pQInfo, bool freeHandle) {
7504 7505 7506 7507 7508
  SQueryMgmt *pQueryMgmt = pMgmt;
  if (pQueryMgmt->qinfoPool == NULL) {
    return NULL;
  }

H
Haojun Liao 已提交
7509
  taosCacheRelease(pQueryMgmt->qinfoPool, pQInfo, freeHandle);
7510 7511 7512
  return 0;
}

7513