qExecutor.c 253.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
#include "os.h"
H
Haojun Liao 已提交
16 17
#include "qFill.h"
#include "taosmsg.h"
18 19
#include "tcache.h"
#include "tglobal.h"
20

H
Haojun Liao 已提交
21
#include "exception.h"
22
#include "hash.h"
H
Haojun Liao 已提交
23 24 25 26
#include "qAst.h"
#include "qExecutor.h"
#include "qResultbuf.h"
#include "qUtil.h"
H
hjxilinx 已提交
27
#include "query.h"
S
slguan 已提交
28
#include "queryLog.h"
29
#include "tlosertree.h"
30

H
Haojun Liao 已提交
31
#define MAX_ROWS_PER_RESBUF_PAGE  ((1u<<12) - 1)
32 33 34 35 36

/**
 * check if the primary column is load by default, otherwise, the program will
 * forced to load primary column explicitly.
 */
37
#define Q_STATUS_EQUAL(p, s)  (((p) & (s)) != 0)
38 39
#define QUERY_IS_ASC_QUERY(q) (GET_FORWARD_DIRECTION_FACTOR((q)->order.order) == QUERY_ASC_FORWARD_STEP)

40
#define IS_MASTER_SCAN(runtime)        ((runtime)->scanFlag == MASTER_SCAN)
H
hjxilinx 已提交
41
#define IS_REVERSE_SCAN(runtime)       ((runtime)->scanFlag == REVERSE_SCAN)
42
#define SET_MASTER_SCAN_FLAG(runtime)  ((runtime)->scanFlag = MASTER_SCAN)
H
hjxilinx 已提交
43
#define SET_REVERSE_SCAN_FLAG(runtime) ((runtime)->scanFlag = REVERSE_SCAN)
44

H
Haojun Liao 已提交
45
#define GET_QINFO_ADDR(x) ((SQInfo *)((char *)(x)-offsetof(SQInfo, runtimeEnv)))
46

47
#define GET_COL_DATA_POS(query, index, step) ((query)->pos + (index) * (step))
48
#define SWITCH_ORDER(n) (((n) = ((n) == TSDB_ORDER_ASC) ? TSDB_ORDER_DESC : TSDB_ORDER_ASC))
49

H
Haojun Liao 已提交
50 51
#define SDATA_BLOCK_INITIALIZER (SDataBlockInfo) {{0}, 0}

S
TD-1057  
Shengliang Guan 已提交
52 53 54 55 56
#define TIME_WINDOW_COPY(_dst, _src)  do {\
   _dst.skey = _src.skey;\
   _dst.ekey = _src.ekey;\
} while (0);

57
enum {
H
hjxilinx 已提交
58
  // when query starts to execute, this status will set
59 60
  QUERY_NOT_COMPLETED = 0x1u,

H
hjxilinx 已提交
61 62
  /* result output buffer is full, current query is paused.
   * this status is only exist in group-by clause and diff/add/division/multiply/ query.
63
   */
64 65
  QUERY_RESBUF_FULL = 0x2u,

H
hjxilinx 已提交
66 67 68
  /* query is over
   * 1. this status is used in one row result query process, e.g., count/sum/first/last/ avg...etc.
   * 2. when all data within queried time window, it is also denoted as query_completed
69
   */
70
  QUERY_COMPLETED = 0x4u,
71

H
hjxilinx 已提交
72 73
  /* when the result is not completed return to client, this status will be
   * usually used in case of interval query with interpolation option
74
   */
75
  QUERY_OVER = 0x8u,
76
};
77 78

enum {
79 80
  TS_JOIN_TS_EQUAL       = 0,
  TS_JOIN_TS_NOT_EQUALS  = 1,
81 82 83
  TS_JOIN_TAG_NOT_EQUALS = 2,
};

84
typedef struct {
85 86 87 88 89 90
  int32_t     status;       // query status
  TSKEY       lastKey;      // the lastKey value before query executed
  STimeWindow w;            // whole query time window
  STimeWindow curWindow;    // current query window
  int32_t     windowIndex;  // index of active time window result for interval query
  STSCursor   cur;
91 92
} SQueryStatusInfo;

H
Haojun Liao 已提交
93
#if 0
H
Haojun Liao 已提交
94
static UNUSED_FUNC void *u_malloc (size_t __size) {
H
Haojun Liao 已提交
95
  uint32_t v = rand();
H
Haojun Liao 已提交
96 97

  if (v % 1000 <= 0) {
H
Haojun Liao 已提交
98 99
    return NULL;
  } else {
H
Haojun Liao 已提交
100
    return malloc(__size);
H
Haojun Liao 已提交
101
  }
H
Haojun Liao 已提交
102 103
}

H
Haojun Liao 已提交
104 105
static UNUSED_FUNC void* u_calloc(size_t num, size_t __size) {
  uint32_t v = rand();
H
Haojun Liao 已提交
106
  if (v % 1000 <= 0) {
H
Haojun Liao 已提交
107 108 109 110 111 112
    return NULL;
  } else {
    return calloc(num, __size);
  }
}

H
Haojun Liao 已提交
113 114 115 116 117 118 119 120 121
static UNUSED_FUNC void* u_realloc(void* p, size_t __size) {
  uint32_t v = rand();
  if (v % 5 <= 1) {
    return NULL;
  } else {
    return realloc(p, __size);
  }
}

H
Haojun Liao 已提交
122
#define calloc  u_calloc
H
Haojun Liao 已提交
123
#define malloc  u_malloc
H
Haojun Liao 已提交
124
#define realloc u_realloc
H
Haojun Liao 已提交
125
#endif
H
Haojun Liao 已提交
126

127
#define CLEAR_QUERY_STATUS(q, st)   ((q)->status &= (~(st)))
H
Haojun Liao 已提交
128 129 130
#define GET_NUM_OF_TABLEGROUP(q)    taosArrayGetSize((q)->tableqinfoGroupInfo.pGroupList)
#define GET_TABLEGROUP(q, _index)   ((SArray*) taosArrayGetP((q)->tableqinfoGroupInfo.pGroupList, (_index)))

131
static void setQueryStatus(SQuery *pQuery, int8_t status);
H
Haojun Liao 已提交
132
static void finalizeQueryResult(SQueryRuntimeEnv *pRuntimeEnv);
133

134
#define QUERY_IS_INTERVAL_QUERY(_q) ((_q)->interval.interval > 0)
135

136 137
static void getNextTimeWindow(SQuery* pQuery, STimeWindow* tw) {
  int32_t factor = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
138 139 140
  if (pQuery->interval.intervalUnit != 'n' && pQuery->interval.intervalUnit != 'y') {
    tw->skey += pQuery->interval.sliding * factor;
    tw->ekey = tw->skey + pQuery->interval.interval - 1;
141 142 143
    return;
  }

144
  int64_t key = tw->skey / 1000, interval = pQuery->interval.interval;
145 146 147
  if (pQuery->precision == TSDB_TIME_PRECISION_MICRO) {
    key /= 1000;
  }
148
  if (pQuery->interval.intervalUnit == 'y') {
149 150
    interval *= 12;
  }
151 152 153 154 155

  struct tm tm;
  time_t t = (time_t)key;
  localtime_r(&t, &tm);

S
Shengliang Guan 已提交
156
  int mon = (int)(tm.tm_year * 12 + tm.tm_mon + interval * factor);
157 158 159 160
  tm.tm_year = mon / 12;
  tm.tm_mon = mon % 12;
  tw->skey = mktime(&tm) * 1000L;

S
Shengliang Guan 已提交
161
  mon = (int)(mon + interval);
162 163 164 165 166 167 168 169 170 171 172
  tm.tm_year = mon / 12;
  tm.tm_mon = mon % 12;
  tw->ekey = mktime(&tm) * 1000L;

  if (pQuery->precision == TSDB_TIME_PRECISION_MICRO) {
    tw->skey *= 1000L;
    tw->ekey *= 1000L;
  }
  tw->ekey -= 1;
}

173 174
#define SET_STABLE_QUERY_OVER(_q) ((_q)->tableIndex = (int32_t)((_q)->tableqinfoGroupInfo.numOfTables))
#define IS_STASBLE_QUERY_OVER(_q) ((_q)->tableIndex >= (int32_t)((_q)->tableqinfoGroupInfo.numOfTables))
H
Haojun Liao 已提交
175

H
hjxilinx 已提交
176
// todo move to utility
177
static int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *group);
178

179
static void setResultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SResultRow *pResult);
H
Haojun Liao 已提交
180
static void setResultRowOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SResultRow *pResult);
H
Haojun Liao 已提交
181
static void resetMergeResultBuf(SQueryRuntimeEnv* pRuntimeEnv, SQLFunctionCtx *pCtx, SResultRow *pRow);
182
static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId);
183

184
static void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
H
Haojun Liao 已提交
185
                          SDataStatis *pStatis, void *param, int32_t colIndex, int32_t vgId);
186

187
static void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
H
Haojun Liao 已提交
188
static void destroyTableQueryInfoImpl(STableQueryInfo *pTableQueryInfo);
H
Haojun Liao 已提交
189
static void resetDefaultResInfoOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
190
static bool hasMainOutput(SQuery *pQuery);
H
hjxilinx 已提交
191
static void buildTagQueryResult(SQInfo *pQInfo);
192

193
static int32_t setAdditionalInfo(SQInfo *pQInfo, void *pTable, STableQueryInfo *pTableQueryInfo);
H
Haojun Liao 已提交
194
static int32_t flushFromResultBuf(SQueryRuntimeEnv* pRuntimeEnv, SGroupResInfo* pGroupResInfo);
H
Haojun Liao 已提交
195 196
static int32_t checkForQueryBuf(size_t numOfTables);
static void releaseQueryBuf(size_t numOfTables);
197

198
bool doFilterData(SQuery *pQuery, int32_t elemPos) {
199 200
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
201

S
TD-1057  
Shengliang Guan 已提交
202
    char *pElem = (char*)pFilterInfo->pData + pFilterInfo->info.bytes * elemPos;
203

204 205
    bool qualified = false;
    for (int32_t j = 0; j < pFilterInfo->numOfFilters; ++j) {
206
      SColumnFilterElem *pFilterElem = &pFilterInfo->pFilters[j];
207

H
Haojun Liao 已提交
208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224
      bool isnull = isNull(pElem, pFilterInfo->info.type);
      if (isnull) {
        if (pFilterElem->fp == isNull_filter) {
          qualified = true;
          break;
        } else {
          continue;
        }
      } else {
        if (pFilterElem->fp == notNull_filter) {
          qualified = true;
          break;
        } else if (pFilterElem->fp == isNull_filter) {
          continue;
        }
      }

225 226 227 228 229
      if (pFilterElem->fp(pFilterElem, pElem, pElem)) {
        qualified = true;
        break;
      }
    }
230

231 232 233 234
    if (!qualified) {
      return false;
    }
  }
235

236 237 238 239 240 241
  return true;
}

int64_t getNumOfResult(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  bool    hasMainFunction = hasMainOutput(pQuery);
242

243
  int64_t maxOutput = 0;
244
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
245
    int32_t functionId = pQuery->pExpr1[j].base.functionId;
246

247 248 249 250 251 252 253 254
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (hasMainFunction &&
        (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ)) {
      continue;
    }
255

H
Haojun Liao 已提交
256
    SResultRowCellInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
257 258 259 260
    if (pResInfo != NULL && maxOutput < pResInfo->numOfRes) {
      maxOutput = pResInfo->numOfRes;
    }
  }
261

262
  assert(maxOutput >= 0);
263 264 265
  return maxOutput;
}

266 267 268 269 270
/*
 * the value of number of result needs to be update due to offset value upated.
 */
void updateNumOfResult(SQueryRuntimeEnv *pRuntimeEnv, int32_t numOfRes) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
271

272
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
273
    SResultRowCellInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
274

H
Haojun Liao 已提交
275 276 277 278 279
    int16_t functionId = pRuntimeEnv->pCtx[j].functionId;
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ ||
        functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
280

H
Haojun Liao 已提交
281
    assert(pResInfo->numOfRes > numOfRes);
282 283 284 285
    pResInfo->numOfRes = numOfRes;
  }
}

H
Haojun Liao 已提交
286 287
static int32_t getMergeResultGroupId(int32_t groupIndex) {
  int32_t base = 50000000;
288 289 290 291 292 293 294
  return base + (groupIndex * 10000);
}

bool isGroupbyNormalCol(SSqlGroupbyExpr *pGroupbyExpr) {
  if (pGroupbyExpr == NULL || pGroupbyExpr->numOfGroupCols == 0) {
    return false;
  }
295

296
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
297
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
Y
TD-1230  
yihaoDeng 已提交
298
    if (TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
299
      //make sure the normal column locates at the second position if tbname exists in group by clause
300
      if (pGroupbyExpr->numOfGroupCols > 1) {
301
        assert(pColIndex->colIndex > 0);
302
      }
303

304 305 306
      return true;
    }
  }
307

308 309 310 311 312
  return false;
}

int16_t getGroupbyColumnType(SQuery *pQuery, SSqlGroupbyExpr *pGroupbyExpr) {
  assert(pGroupbyExpr != NULL);
313

314 315
  int32_t colId = -2;
  int16_t type = TSDB_DATA_TYPE_NULL;
316

317
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
318
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
Y
TD-1230  
yihaoDeng 已提交
319
    if (TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
320 321 322 323
      colId = pColIndex->colId;
      break;
    }
  }
324

325
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
326 327
    if (colId == pQuery->colList[i].colId) {
      type = pQuery->colList[i].type;
328 329 330
      break;
    }
  }
331

332 333 334 335 336 337
  return type;
}

bool isSelectivityWithTagsQuery(SQuery *pQuery) {
  bool    hasTags = false;
  int32_t numOfSelectivity = 0;
338

339
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
340
    int32_t functId = pQuery->pExpr1[i].base.functionId;
341 342 343 344
    if (functId == TSDB_FUNC_TAG_DUMMY || functId == TSDB_FUNC_TS_DUMMY) {
      hasTags = true;
      continue;
    }
345

346 347 348 349
    if ((aAggs[functId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
      numOfSelectivity++;
    }
  }
350

351 352 353
  if (numOfSelectivity > 0 && hasTags) {
    return true;
  }
354

355 356 357
  return false;
}

358 359
bool isProjQuery(SQuery *pQuery) {
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
360
    int32_t functId = pQuery->pExpr1[i].base.functionId;
361 362 363 364 365 366 367 368
    if (functId != TSDB_FUNC_PRJ && functId != TSDB_FUNC_TAGPRJ) {
      return false;
    }
  }

  return true;
}

H
Haojun Liao 已提交
369
bool isTSCompQuery(SQuery *pQuery) { return pQuery->pExpr1[0].base.functionId == TSDB_FUNC_TS_COMP; }
370

371 372 373
static bool limitResults(SQueryRuntimeEnv* pRuntimeEnv) {
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery* pQuery = pRuntimeEnv->pQuery;
374

375 376
  if ((pQuery->limit.limit > 0) && (pQuery->rec.total + pQuery->rec.rows > pQuery->limit.limit)) {
    pQuery->rec.rows = pQuery->limit.limit - pQuery->rec.total;
377

378
    qDebug("QInfo:%p discard remain data due to result limitation, limit:%"PRId64", current return:%" PRId64 ", total:%"PRId64,
379 380
        pQInfo, pQuery->limit.limit, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
    assert(pQuery->rec.rows >= 0);
381 382 383
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return true;
  }
384

385 386 387 388
  return false;
}

static bool isTopBottomQuery(SQuery *pQuery) {
389
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
390
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
391 392 393
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
394

395 396 397 398
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      return true;
    }
  }
399

400 401 402
  return false;
}

H
Haojun Liao 已提交
403
static bool hasTagValOutput(SQuery* pQuery) {
H
Haojun Liao 已提交
404
  SExprInfo *pExprInfo = &pQuery->pExpr1[0];
H
Haojun Liao 已提交
405 406 407 408
  if (pQuery->numOfOutput == 1 && pExprInfo->base.functionId == TSDB_FUNC_TS_COMP) {
    return true;
  } else {  // set tag value, by which the results are aggregated.
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
H
Haojun Liao 已提交
409
      SExprInfo *pLocalExprInfo = &pQuery->pExpr1[idx];
H
Haojun Liao 已提交
410 411 412 413 414 415 416 417 418 419 420

      // ts_comp column required the tag value for join filter
      if (TSDB_COL_IS_TAG(pLocalExprInfo->base.colInfo.flag)) {
        return true;
      }
    }
  }

  return false;
}

421 422 423 424 425 426 427 428
/**
 * @param pQuery
 * @param col
 * @param pDataBlockInfo
 * @param pStatis
 * @param pColStatis
 * @return
 */
H
Haojun Liao 已提交
429
static bool hasNullValue(SColIndex* pColIndex, SDataStatis *pStatis, SDataStatis **pColStatis) {
H
Haojun Liao 已提交
430
  if (pStatis != NULL && TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
H
Haojun Liao 已提交
431 432
    *pColStatis = &pStatis[pColIndex->colIndex];
    assert((*pColStatis)->colId == pColIndex->colId);
H
hjxilinx 已提交
433 434
  } else {
    *pColStatis = NULL;
435
  }
436

H
Haojun Liao 已提交
437
  if (TSDB_COL_IS_TAG(pColIndex->flag) || TSDB_COL_IS_UD_COL(pColIndex->flag) || pColIndex->colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
H
Haojun Liao 已提交
438 439 440
    return false;
  }

441 442 443
  if ((*pColStatis) != NULL && (*pColStatis)->numOfNull == 0) {
    return false;
  }
444

445 446 447
  return true;
}

H
Haojun Liao 已提交
448
static SResultRow *doPrepareResultRowFromKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, char *pData,
H
Haojun Liao 已提交
449 450
                                             int16_t bytes, bool masterscan, uint64_t uid) {
  SET_RES_WINDOW_KEY(pRuntimeEnv->keyBuf, pData, bytes, uid);
451 452
  int32_t *p1 =
      (int32_t *)taosHashGet(pRuntimeEnv->pResultRowHashTable, pRuntimeEnv->keyBuf, GET_RES_WINDOW_KEY_LEN(bytes));
453 454
  if (p1 != NULL) {
    pWindowResInfo->curIndex = *p1;
455
  } else {
H
Haojun Liao 已提交
456 457 458
    if (!masterscan) {  // not master scan, do not add new timewindow
      return NULL;
    }
459

H
Haojun Liao 已提交
460
    // TODO refactor
H
Haojun Liao 已提交
461 462
    // more than the capacity, reallocate the resources
    if (pWindowResInfo->size >= pWindowResInfo->capacity) {
H
Haojun Liao 已提交
463
      int64_t newCapacity = 0;
464
      if (pWindowResInfo->capacity > 10000) {
H
Haojun Liao 已提交
465
        newCapacity = (int64_t)(pWindowResInfo->capacity * 1.25);
466
      } else {
H
Haojun Liao 已提交
467
        newCapacity = (int64_t)(pWindowResInfo->capacity * 1.5);
468 469
      }

470
      char *t = realloc(pWindowResInfo->pResult, (size_t)(newCapacity * POINTER_BYTES));
471 472
      if (t == NULL) {
        longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
473
      }
474

H
Haojun Liao 已提交
475
      pWindowResInfo->pResult = (SResultRow **)t;
476

H
Haojun Liao 已提交
477
      int32_t inc = (int32_t)newCapacity - pWindowResInfo->capacity;
478
      memset(&pWindowResInfo->pResult[pWindowResInfo->capacity], 0, POINTER_BYTES * inc);
479

480 481
      pWindowResInfo->capacity = (int32_t)newCapacity;
    }
482 483 484 485 486 487 488

    SResultRow *pResult = getNewResultRow(pRuntimeEnv->pool);
    pWindowResInfo->pResult[pWindowResInfo->size] = pResult;
    int32_t ret = initResultRow(pResult);
    if (ret != TSDB_CODE_SUCCESS) {
      longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
    }
H
Haojun Liao 已提交
489 490 491

    // add a new result set for a new group
    pWindowResInfo->curIndex = pWindowResInfo->size++;
492 493
    taosHashPut(pRuntimeEnv->pResultRowHashTable, pRuntimeEnv->keyBuf, GET_RES_WINDOW_KEY_LEN(bytes),
                (char *)&pWindowResInfo->curIndex, sizeof(int32_t));
494
  }
495

496 497 498 499 500
  // too many time window in query
  if (pWindowResInfo->size > MAX_INTERVAL_TIME_WINDOW) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_TOO_MANY_TIMEWINDOW);
  }

501
  return getResultRow(pWindowResInfo, pWindowResInfo->curIndex);
502 503 504 505 506
}

// get the correct time window according to the handled timestamp
static STimeWindow getActiveTimeWindow(SWindowResInfo *pWindowResInfo, int64_t ts, SQuery *pQuery) {
  STimeWindow w = {0};
507

508
 if (pWindowResInfo->curIndex == -1) {  // the first window, from the previous stored value
509
    w.skey = pWindowResInfo->prevSKey;
510 511
    if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
      w.ekey = taosTimeAdd(w.skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision);
512
    } else {
513
      w.ekey = w.skey + pQuery->interval.interval - 1;
514
    }
515
  } else {
516
    int32_t slot = curTimeWindowIndex(pWindowResInfo);
517
    SResultRow* pWindowRes = getResultRow(pWindowResInfo, slot);
518
    w = pWindowRes->win;
519
  }
520

521
  if (w.skey > ts || w.ekey < ts) {
522 523 524
    if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
      w.skey = taosTimeTruncate(ts, &pQuery->interval, pQuery->precision);
      w.ekey = taosTimeAdd(w.skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
525 526
    } else {
      int64_t st = w.skey;
527

528
      if (st > ts) {
529
        st -= ((st - ts + pQuery->interval.sliding - 1) / pQuery->interval.sliding) * pQuery->interval.sliding;
530
      }
531

532
      int64_t et = st + pQuery->interval.interval - 1;
533
      if (et < ts) {
534
        st += ((ts - et + pQuery->interval.sliding - 1) / pQuery->interval.sliding) * pQuery->interval.sliding;
535
      }
536

537
      w.skey = st;
538
      w.ekey = w.skey + pQuery->interval.interval - 1;
539
    }
540
  }
541

542 543 544 545 546 547 548
  /*
   * query border check, skey should not be bounded by the query time range, since the value skey will
   * be used as the time window index value. So we only change ekey of time window accordingly.
   */
  if (w.ekey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) {
    w.ekey = pQuery->window.ekey;
  }
549

550 551 552
  return w;
}

H
Haojun Liao 已提交
553
static int32_t addNewWindowResultBuf(SResultRow *pWindowRes, SDiskbasedResultBuf *pResultBuf, int32_t tid,
554
                                     int32_t numOfRowsPerPage) {
555
  if (pWindowRes->pageId != -1) {
556 557
    return 0;
  }
558

559
  tFilePage *pData = NULL;
560

561 562
  // in the first scan, new space needed for results
  int32_t pageId = -1;
H
Haojun Liao 已提交
563
  SIDList list = getDataBufPagesIdList(pResultBuf, tid);
564

H
Haojun Liao 已提交
565
  if (taosArrayGetSize(list) == 0) {
H
Haojun Liao 已提交
566
    pData = getNewDataBuf(pResultBuf, tid, &pageId);
567
  } else {
H
Haojun Liao 已提交
568 569 570
    SPageInfo* pi = getLastPageInfo(list);
    pData = getResBufPage(pResultBuf, pi->pageId);
    pageId = pi->pageId;
571

572
    if (pData->num >= numOfRowsPerPage) {
H
Haojun Liao 已提交
573 574 575
      // release current page first, and prepare the next one
      releaseResBufPageInfo(pResultBuf, pi);

H
Haojun Liao 已提交
576
      pData = getNewDataBuf(pResultBuf, tid, &pageId);
577
      if (pData != NULL) {
578
        assert(pData->num == 0);  // number of elements must be 0 for new allocated buffer
579 580 581
      }
    }
  }
582

583 584 585
  if (pData == NULL) {
    return -1;
  }
586

587
  // set the number of rows in current disk page
588 589 590
  if (pWindowRes->pageId == -1) {  // not allocated yet, allocate new buffer
    pWindowRes->pageId = pageId;
    pWindowRes->rowId = (int32_t)(pData->num++);
H
Haojun Liao 已提交
591

592
    assert(pWindowRes->pageId >= 0);
593
  }
594

595 596 597
  return 0;
}

H
Haojun Liao 已提交
598
static int32_t setWindowOutputBufByKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, SDataBlockInfo* pBockInfo,
599
                                       STimeWindow *win, bool masterscan, bool* newWind) {
600 601
  assert(win->skey <= win->ekey);
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
602

H
Haojun Liao 已提交
603 604
  SResultRow *pResultRow = doPrepareResultRowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&win->skey, TSDB_KEYSIZE, masterscan, pBockInfo->uid);
  if (pResultRow == NULL) {
605 606 607
    *newWind = false;

    return masterscan? -1:0;
608
  }
609

610
  *newWind = true;
H
Haojun Liao 已提交
611

612
  // not assign result buffer yet, add new result buffer
H
Haojun Liao 已提交
613 614
  if (pResultRow->pageId == -1) {
    int32_t ret = addNewWindowResultBuf(pResultRow, pResultBuf, pBockInfo->tid, pRuntimeEnv->numOfRowsPerPage);
H
Haojun Liao 已提交
615
    if (ret != TSDB_CODE_SUCCESS) {
616 617 618
      return -1;
    }
  }
619

620
  // set time window for current result
H
Haojun Liao 已提交
621
  pResultRow->win = (*win);
H
Haojun Liao 已提交
622
  setResultRowOutputBufInitCtx(pRuntimeEnv, pResultRow);
623 624 625
  return TSDB_CODE_SUCCESS;
}

626
static bool getTimeWindowResStatus(SWindowResInfo *pWindowResInfo, int32_t slot) {
627
  assert(slot >= 0 && slot < pWindowResInfo->size);
628
  return pWindowResInfo->pResult[slot]->closed;
629 630
}

H
Haojun Liao 已提交
631
static FORCE_INLINE int32_t getForwardStepsInBlock(int32_t numOfRows, __block_search_fn_t searchFn, TSKEY ekey, int16_t pos,
632 633
                                      int16_t order, int64_t *pData) {
  int32_t forwardStep = 0;
634

H
Haojun Liao 已提交
635 636 637 638
  if (order == TSDB_ORDER_ASC) {
    int32_t end = searchFn((char*) &pData[pos], numOfRows - pos, ekey, order);
    if (end >= 0) {
      forwardStep = end;
639

H
Haojun Liao 已提交
640 641 642 643 644 645 646 647 648 649 650 651
      if (pData[end + pos] == ekey) {
        forwardStep += 1;
      }
    }
  } else {
    int32_t end = searchFn((char *)pData, pos + 1, ekey, order);
    if (end >= 0) {
      forwardStep = pos - end;

      if (pData[end] == ekey) {
        forwardStep += 1;
      }
652 653
    }
  }
654

H
Haojun Liao 已提交
655
  assert(forwardStep > 0);
656 657 658 659 660 661
  return forwardStep;
}

/**
 * NOTE: the query status only set for the first scan of master scan.
 */
662
static int32_t doCheckQueryCompleted(SQueryRuntimeEnv *pRuntimeEnv, TSKEY lastKey, SWindowResInfo *pWindowResInfo) {
663
  SQuery *pQuery = pRuntimeEnv->pQuery;
664 665 666 667 668 669 670
  if (pRuntimeEnv->scanFlag != MASTER_SCAN) {
    return pWindowResInfo->size;
  }

  // for group by normal column query, close time window and return.
  if (!QUERY_IS_INTERVAL_QUERY(pQuery)) {
    closeAllTimeWindow(pWindowResInfo);
671
    return pWindowResInfo->size;
672
  }
673

674
  // no qualified results exist, abort check
675
  int32_t numOfClosed = 0;
676

677
  if (pWindowResInfo->size == 0) {
678
    return pWindowResInfo->size;
679
  }
680

681
  // query completed
H
hjxilinx 已提交
682 683
  if ((lastKey >= pQuery->current->win.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (lastKey <= pQuery->current->win.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
684
    closeAllTimeWindow(pWindowResInfo);
685

686 687 688 689
    pWindowResInfo->curIndex = pWindowResInfo->size - 1;
    setQueryStatus(pQuery, QUERY_COMPLETED | QUERY_RESBUF_FULL);
  } else {  // set the current index to be the last unclosed window
    int32_t i = 0;
690
    int64_t skey = TSKEY_INITIAL_VAL;
691

692
    for (i = 0; i < pWindowResInfo->size; ++i) {
H
Haojun Liao 已提交
693
      SResultRow *pResult = pWindowResInfo->pResult[i];
694
      if (pResult->closed) {
695
        numOfClosed += 1;
696 697
        continue;
      }
698

699
      TSKEY ekey = pResult->win.ekey;
700
      if ((ekey <= lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
701
          (pResult->win.skey >= lastKey && !QUERY_IS_ASC_QUERY(pQuery))) {
702 703
        closeTimeWindow(pWindowResInfo, i);
      } else {
704
        skey = pResult->win.skey;
705 706 707
        break;
      }
    }
708

709
    // all windows are closed, set the last one to be the skey
710
    if (skey == TSKEY_INITIAL_VAL) {
711 712 713 714 715
      assert(i == pWindowResInfo->size);
      pWindowResInfo->curIndex = pWindowResInfo->size - 1;
    } else {
      pWindowResInfo->curIndex = i;
    }
716

717
    pWindowResInfo->prevSKey = pWindowResInfo->pResult[pWindowResInfo->curIndex]->win.skey;
718

719 720
    // the number of completed slots are larger than the threshold, return current generated results to client.
    if (numOfClosed > pWindowResInfo->threshold) {
721
      qDebug("QInfo:%p total result window:%d closed:%d, reached the output threshold %d, return",
722
          GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size, numOfClosed, pQuery->rec.threshold);
723

724
      setQueryStatus(pQuery, QUERY_RESBUF_FULL);
725
    } else {
726
      qDebug("QInfo:%p total result window:%d already closed:%d", GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size,
727
             numOfClosed);
728 729
    }
  }
730

731 732 733 734 735
  // output has reached the limitation, set query completed
  if (pQuery->limit.limit > 0 && (pQuery->limit.limit + pQuery->limit.offset) <= numOfClosed &&
      pRuntimeEnv->scanFlag == MASTER_SCAN) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
736

737
  assert(pWindowResInfo->prevSKey != TSKEY_INITIAL_VAL);
738
  return numOfClosed;
739 740 741
}

static int32_t getNumOfRowsInTimeWindow(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo, TSKEY *pPrimaryColumn,
H
hjxilinx 已提交
742
                                        int32_t startPos, TSKEY ekey, __block_search_fn_t searchFn, bool updateLastKey) {
743
  assert(startPos >= 0 && startPos < pDataBlockInfo->rows);
744

H
Haojun Liao 已提交
745
  int32_t num   = -1;
746
  int32_t order = pQuery->order.order;
H
Haojun Liao 已提交
747
  int32_t step  = GET_FORWARD_DIRECTION_FACTOR(order);
748

H
hjxilinx 已提交
749
  STableQueryInfo* item = pQuery->current;
750

751 752
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (ekey < pDataBlockInfo->window.ekey) {
753
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
H
Haojun Liao 已提交
754 755
      if (updateLastKey) { // update the last key
        item->lastKey = pPrimaryColumn[startPos + (num - 1)] + step;
756 757
      }
    } else {
758
      num = pDataBlockInfo->rows - startPos;
759
      if (updateLastKey) {
H
hjxilinx 已提交
760
        item->lastKey = pDataBlockInfo->window.ekey + step;
761 762 763 764
      }
    }
  } else {  // desc
    if (ekey > pDataBlockInfo->window.skey) {
765
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
H
Haojun Liao 已提交
766 767
      if (updateLastKey) {  // update the last key
        item->lastKey = pPrimaryColumn[startPos - (num - 1)] + step;
768 769 770 771
      }
    } else {
      num = startPos + 1;
      if (updateLastKey) {
H
hjxilinx 已提交
772
        item->lastKey = pDataBlockInfo->window.skey + step;
773 774 775
      }
    }
  }
776

H
Haojun Liao 已提交
777
  assert(num > 0);
778 779 780
  return num;
}

H
Haojun Liao 已提交
781 782
static void doBlockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, bool closed, STimeWindow *pWin, int32_t offset,
                                      int32_t forwardStep, TSKEY *tsCol, int32_t numOfTotal) {
783 784
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
785

H
Haojun Liao 已提交
786 787
  bool hasPrev = pCtx[0].preAggVals.isSet;

788
  if (IS_MASTER_SCAN(pRuntimeEnv) || closed) {
789 790 791 792
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
      pCtx[k].nStartQueryTimestamp = pWin->skey;
      pCtx[k].size = forwardStep;
      pCtx[k].startOffset = (QUERY_IS_ASC_QUERY(pQuery)) ? offset : offset - (forwardStep - 1);
793

H
Haojun Liao 已提交
794
      int32_t functionId = pQuery->pExpr1[k].base.functionId;
795
      if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
H
Haojun Liao 已提交
796
        pCtx[k].ptsList = &tsCol[pCtx[k].startOffset];
797
      }
798

799
      // not a whole block involved in query processing, statistics data can not be used
H
Haojun Liao 已提交
800 801 802 803
      // NOTE: the original value of isSet have been changed here
      if (pCtx[k].preAggVals.isSet && forwardStep < numOfTotal) {
        pCtx[k].preAggVals.isSet = false;
      }
H
Haojun Liao 已提交
804

805 806 807
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunction(&pCtx[k]);
      }
H
Haojun Liao 已提交
808 809 810

      // restore it
      pCtx[k].preAggVals.isSet = hasPrev;
811 812 813 814
    }
  }
}

815
static void doRowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, bool closed, STimeWindow *pWin, int32_t offset) {
816 817
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
818

819
  if (IS_MASTER_SCAN(pRuntimeEnv) || closed) {
820 821
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
      pCtx[k].nStartQueryTimestamp = pWin->skey;
822

H
Haojun Liao 已提交
823
      int32_t functionId = pQuery->pExpr1[k].base.functionId;
824 825 826
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunctionF(&pCtx[k], offset);
      }
827 828 829 830
    }
  }
}

H
Haojun Liao 已提交
831 832
static int32_t getNextQualifiedWindow(SQueryRuntimeEnv *pRuntimeEnv, STimeWindow *pNext, SDataBlockInfo *pDataBlockInfo,
    TSKEY *primaryKeys, __block_search_fn_t searchFn, int32_t prevPosition) {
833
  SQuery *pQuery = pRuntimeEnv->pQuery;
834

H
Haojun Liao 已提交
835
  getNextTimeWindow(pQuery, pNext);
836

H
Haojun Liao 已提交
837
  // next time window is not in current block
H
Haojun Liao 已提交
838 839
  if ((pNext->skey > pDataBlockInfo->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (pNext->ekey < pDataBlockInfo->window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
H
Haojun Liao 已提交
840 841
    return -1;
  }
842

H
Haojun Liao 已提交
843 844
  TSKEY startKey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
845
    startKey = pNext->skey;
H
Haojun Liao 已提交
846 847
    if (startKey < pQuery->window.skey) {
      startKey = pQuery->window.skey;
848
    }
H
Haojun Liao 已提交
849
  } else {
H
Haojun Liao 已提交
850
    startKey = pNext->ekey;
H
Haojun Liao 已提交
851 852
    if (startKey > pQuery->window.skey) {
      startKey = pQuery->window.skey;
853
    }
H
Haojun Liao 已提交
854
  }
855

H
Haojun Liao 已提交
856
  int32_t startPos = 0;
H
Haojun Liao 已提交
857

H
Haojun Liao 已提交
858
  // tumbling time window query, a special case of sliding time window query
859
  if (pQuery->interval.sliding == pQuery->interval.interval && prevPosition != -1) {
H
Haojun Liao 已提交
860 861 862
    int32_t factor = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
    startPos = prevPosition + factor;
  } else {
H
Haojun Liao 已提交
863
    if (startKey <= pDataBlockInfo->window.skey && QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
864
      startPos = 0;
H
Haojun Liao 已提交
865
    } else if (startKey >= pDataBlockInfo->window.ekey && !QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
866 867 868 869
      startPos = pDataBlockInfo->rows - 1;
    } else {
      startPos = searchFn((char *)primaryKeys, pDataBlockInfo->rows, startKey, pQuery->order.order);
    }
H
Haojun Liao 已提交
870
  }
871

H
Haojun Liao 已提交
872 873 874 875
  /*
   * This time window does not cover any data, try next time window,
   * this case may happen when the time window is too small
   */
H
Haojun Liao 已提交
876 877 878
  if (primaryKeys == NULL) {
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      assert(pDataBlockInfo->window.skey <= pNext->ekey);
879
    } else {
H
Haojun Liao 已提交
880
      assert(pDataBlockInfo->window.ekey >= pNext->skey);
881
    }
H
Haojun Liao 已提交
882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900
  } else {
    if (QUERY_IS_ASC_QUERY(pQuery) && primaryKeys[startPos] > pNext->ekey) {
      TSKEY next = primaryKeys[startPos];
      if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
        pNext->skey = taosTimeTruncate(next, &pQuery->interval, pQuery->precision);
        pNext->ekey = taosTimeAdd(pNext->skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
      } else {
        pNext->ekey += ((next - pNext->ekey + pQuery->interval.sliding - 1)/pQuery->interval.sliding) * pQuery->interval.sliding;
        pNext->skey = pNext->ekey - pQuery->interval.interval + 1;
      }
    } else if ((!QUERY_IS_ASC_QUERY(pQuery)) && primaryKeys[startPos] < pNext->skey) {
      TSKEY next = primaryKeys[startPos];
      if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
        pNext->skey = taosTimeTruncate(next, &pQuery->interval, pQuery->precision);
        pNext->ekey = taosTimeAdd(pNext->skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
      } else {
        pNext->skey -= ((pNext->skey - next + pQuery->interval.sliding - 1) / pQuery->interval.sliding) * pQuery->interval.sliding;
        pNext->ekey = pNext->skey + pQuery->interval.interval - 1;
      }
901
    }
902
  }
903

H
Haojun Liao 已提交
904
  return startPos;
905 906
}

H
Haojun Liao 已提交
907
static FORCE_INLINE TSKEY reviseWindowEkey(SQuery *pQuery, STimeWindow *pWindow) {
908 909 910 911 912 913 914 915 916 917 918 919
  TSKEY ekey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    ekey = pWindow->ekey;
    if (ekey > pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  } else {
    ekey = pWindow->skey;
    if (ekey < pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  }
920

921 922 923
  return ekey;
}

H
hjxilinx 已提交
924 925
//todo binary search
static void* getDataBlockImpl(SArray* pDataBlock, int32_t colId) {
S
TD-1057  
Shengliang Guan 已提交
926
  int32_t numOfCols = (int32_t)taosArrayGetSize(pDataBlock);
927

H
hjxilinx 已提交
928 929 930 931 932 933
  for (int32_t i = 0; i < numOfCols; ++i) {
    SColumnInfoData *p = taosArrayGet(pDataBlock, i);
    if (colId == p->info.colId) {
      return p->pData;
    }
  }
934

H
hjxilinx 已提交
935 936 937 938
  return NULL;
}

static char *getDataBlock(SQueryRuntimeEnv *pRuntimeEnv, SArithmeticSupport *sas, int32_t col, int32_t size,
939
                    SArray *pDataBlock) {
dengyihao's avatar
dengyihao 已提交
940 941 942
  if (pDataBlock == NULL) {
    return NULL;
  }
943

H
Haojun Liao 已提交
944
  char *dataBlock = NULL;
H
Haojun Liao 已提交
945
  SQuery *pQuery = pRuntimeEnv->pQuery;
946

H
Haojun Liao 已提交
947
  int32_t functionId = pQuery->pExpr1[col].base.functionId;
948
  if (functionId == TSDB_FUNC_ARITHM) {
H
Haojun Liao 已提交
949
    sas->pArithExpr = &pQuery->pExpr1[col];
950

H
Haojun Liao 已提交
951 952
    sas->offset    = (QUERY_IS_ASC_QUERY(pQuery)) ? pQuery->pos : pQuery->pos - (size - 1);
    sas->colList   = pQuery->colList;
953
    sas->numOfCols = pQuery->numOfCols;
H
Haojun Liao 已提交
954
    sas->data      = calloc(pQuery->numOfCols, POINTER_BYTES);
955

H
Haojun Liao 已提交
956 957 958 959
    if (sas->data == NULL) {
      longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
    }

960
    // here the pQuery->colList and sas->colList are identical
S
TD-1057  
Shengliang Guan 已提交
961
    int32_t numOfCols = (int32_t)taosArrayGetSize(pDataBlock);
962
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
963
      SColumnInfo *pColMsg = &pQuery->colList[i];
964

965 966 967 968 969 970 971 972
      dataBlock = NULL;
      for (int32_t k = 0; k < numOfCols; ++k) {  //todo refactor
        SColumnInfoData *p = taosArrayGet(pDataBlock, k);
        if (pColMsg->colId == p->info.colId) {
          dataBlock = p->pData;
          break;
        }
      }
973

974
      assert(dataBlock != NULL);
975
      sas->data[i] = dataBlock;  // start from the offset
976
    }
977

978
  } else {  // other type of query function
H
Haojun Liao 已提交
979
    SColIndex *pCol = &pQuery->pExpr1[col].base.colInfo;
H
Haojun Liao 已提交
980
    if (TSDB_COL_IS_NORMAL_COL(pCol->flag)) {
H
Haojun Liao 已提交
981
      SColIndex* pColIndex = &pQuery->pExpr1[col].base.colInfo;
H
Haojun Liao 已提交
982 983 984 985
      SColumnInfoData *p = taosArrayGet(pDataBlock, pColIndex->colIndex);
      assert(p->info.colId == pColIndex->colId);

      dataBlock = p->pData;
H
Haojun Liao 已提交
986 987
    } else {
      dataBlock = NULL;
988 989
    }
  }
990

991 992 993 994
  return dataBlock;
}

/**
H
Haojun Liao 已提交
995
 * todo set the last value for pQueryTableInfo as in rowwiseapplyfunctions
996 997
 * @param pRuntimeEnv
 * @param forwardStep
998
 * @param tsCols
999 1000 1001 1002 1003
 * @param pFields
 * @param isDiskFileBlock
 * @return                  the incremental number of output value, so it maybe 0 for fixed number of query,
 *                          such as count/min/max etc.
 */
H
Haojun Liao 已提交
1004 1005
static void blockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis, SDataBlockInfo *pDataBlockInfo,
                                    SWindowResInfo *pWindowResInfo, __block_search_fn_t searchFn, SArray *pDataBlock) {
1006
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1007 1008
  bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);

1009 1010
  SQuery *pQuery = pRuntimeEnv->pQuery;
  TSKEY  *tsCols = NULL;
1011
  if (pDataBlock != NULL) {
1012
    SColumnInfoData* pColInfo = taosArrayGet(pDataBlock, 0);
1013
    tsCols = (TSKEY *)(pColInfo->pData);
1014
  }
1015

1016
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
H
Haojun Liao 已提交
1017 1018 1019
  if (sasArray == NULL) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }
1020

H
Haojun Liao 已提交
1021
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
1022
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
1023
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
H
Haojun Liao 已提交
1024
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k, pQInfo->vgId);
1025
  }
1026

1027
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
1028
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
H
Haojun Liao 已提交
1029
    TSKEY ts = TSKEY_INITIAL_VAL;
1030

H
Haojun Liao 已提交
1031 1032 1033 1034 1035 1036 1037 1038
    if (tsCols == NULL) {
      ts = QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->window.skey:pDataBlockInfo->window.ekey;
    } else {
      int32_t offset = GET_COL_DATA_POS(pQuery, 0, step);
      ts = tsCols[offset];
    }

    bool        hasTimeWindow = false;
1039
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
H
Haojun Liao 已提交
1040
    if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo, &win, masterScan, &hasTimeWindow) != TSDB_CODE_SUCCESS) {
S
TD-1848  
Shengliang Guan 已提交
1041
      tfree(sasArray);
H
hjxilinx 已提交
1042
      return;
1043
    }
1044

H
Haojun Liao 已提交
1045 1046 1047
    int32_t forwardStep = 0;
    int32_t startPos = pQuery->pos;

H
Haojun Liao 已提交
1048
    // in case of repeat scan/reverse scan, no new time window added.
1049
    if (hasTimeWindow) {
H
Haojun Liao 已提交
1050
      TSKEY ekey = reviseWindowEkey(pQuery, &win);
H
Haojun Liao 已提交
1051
      forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, pQuery->pos, ekey, searchFn, true);
1052

1053
      bool pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
H
Haojun Liao 已提交
1054
      doBlockwiseApplyFunctions(pRuntimeEnv, pStatus, &win, startPos, forwardStep, tsCols, pDataBlockInfo->rows);
1055
    }
1056

1057 1058
    int32_t     index = pWindowResInfo->curIndex;
    STimeWindow nextWin = win;
1059

1060
    while (1) {
H
Haojun Liao 已提交
1061 1062
      int32_t prevEndPos = (forwardStep - 1) * step + startPos;
      startPos = getNextQualifiedWindow(pRuntimeEnv, &nextWin, pDataBlockInfo, tsCols, searchFn, prevEndPos);
1063 1064 1065
      if (startPos < 0) {
        break;
      }
1066

1067
      // null data, failed to allocate more memory buffer
H
Haojun Liao 已提交
1068
      hasTimeWindow = false;
H
Haojun Liao 已提交
1069
      if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo, &nextWin, masterScan, &hasTimeWindow) != TSDB_CODE_SUCCESS) {
1070 1071
        break;
      }
1072

1073 1074 1075 1076 1077
      if (!hasTimeWindow) {
        continue;
      }

      TSKEY ekey = reviseWindowEkey(pQuery, &nextWin);
H
Haojun Liao 已提交
1078
      forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, startPos, ekey, searchFn, true);
1079

1080 1081
      bool closed = getTimeWindowResStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
      doBlockwiseApplyFunctions(pRuntimeEnv, closed, &nextWin, startPos, forwardStep, tsCols, pDataBlockInfo->rows);
1082
    }
1083

1084 1085 1086 1087 1088 1089 1090
    pWindowResInfo->curIndex = index;
  } else {
    /*
     * the sqlfunctionCtx parameters should be set done before all functions are invoked,
     * since the selectivity + tag_prj query needs all parameters been set done.
     * tag_prj function are changed to be TSDB_FUNC_TAG_DUMMY
     */
1091
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
Haojun Liao 已提交
1092
      int32_t functionId = pQuery->pExpr1[k].base.functionId;
1093
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
1094
        pCtx[k].nStartQueryTimestamp = pDataBlockInfo->window.skey;
1095 1096 1097 1098
        aAggs[functionId].xFunction(&pCtx[k]);
      }
    }
  }
1099

1100
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1101
    if (pQuery->pExpr1[i].base.functionId != TSDB_FUNC_ARITHM) {
1102 1103
      continue;
    }
1104

S
TD-1848  
Shengliang Guan 已提交
1105
    tfree(sasArray[i].data);
1106
  }
1107

S
TD-1848  
Shengliang Guan 已提交
1108
  tfree(sasArray);
1109 1110
}

1111
static int32_t setGroupResultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, char *pData, int16_t type, int16_t bytes, int32_t groupIndex) {
1112 1113 1114
  if (isNull(pData, type)) {  // ignore the null value
    return -1;
  }
1115

1116
  int32_t GROUPRESULTID = 1;
1117

1118
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
1119

H
Haojun Liao 已提交
1120
  // not assign result buffer yet, add new result buffer, TODO remove it
1121 1122 1123 1124 1125 1126 1127
  char* d = pData;
  int16_t len = bytes;
  if (type == TSDB_DATA_TYPE_BINARY||type == TSDB_DATA_TYPE_NCHAR) {
    d = varDataVal(pData);
    len = varDataLen(pData);
  } else if (type == TSDB_DATA_TYPE_FLOAT || type == TSDB_DATA_TYPE_DOUBLE) {
    SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
H
Haojun Liao 已提交
1128
    qError("QInfo:%p group by not supported on double/float columns, abort", pQInfo);
1129 1130 1131 1132

    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_APP_ERROR);
  }

H
Haojun Liao 已提交
1133
  uint64_t uid = groupIndex;
H
Haojun Liao 已提交
1134 1135
  SResultRow *pResultRow = doPrepareResultRowFromKey(pRuntimeEnv, &pRuntimeEnv->windowResInfo, d, len, true, uid);
  if (pResultRow == NULL) {
1136 1137 1138 1139
    return -1;
  }

  int64_t v = -1;
1140 1141 1142 1143 1144 1145 1146 1147
  switch(type) {
    case TSDB_DATA_TYPE_BOOL:
    case TSDB_DATA_TYPE_TINYINT:  v = GET_INT8_VAL(pData);  break;
    case TSDB_DATA_TYPE_SMALLINT: v = GET_INT16_VAL(pData); break;
    case TSDB_DATA_TYPE_INT:      v = GET_INT32_VAL(pData); break;
    case TSDB_DATA_TYPE_BIGINT:   v = GET_INT64_VAL(pData); break;
  }

1148
  if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
H
Haojun Liao 已提交
1149 1150
    pResultRow->key = malloc(varDataTLen(pData));
    varDataCopy(pResultRow->key, pData);
1151
  } else {
H
Haojun Liao 已提交
1152 1153
    pResultRow->win.skey = v;
    pResultRow->win.ekey = v;
1154
  }
1155

H
Haojun Liao 已提交
1156 1157
  if (pResultRow->pageId == -1) {
    int32_t ret = addNewWindowResultBuf(pResultRow, pResultBuf, GROUPRESULTID, pRuntimeEnv->numOfRowsPerPage);
1158 1159 1160 1161
    if (ret != 0) {
      return -1;
    }
  }
1162

H
Haojun Liao 已提交
1163
  setResultOutputBuf(pRuntimeEnv, pResultRow);
1164 1165 1166 1167
  initCtxOutputBuf(pRuntimeEnv);
  return TSDB_CODE_SUCCESS;
}

1168
static char *getGroupbyColumnData(SQuery *pQuery, int16_t *type, int16_t *bytes, SArray* pDataBlock) {
1169
  SSqlGroupbyExpr *pGroupbyExpr = pQuery->pGroupbyExpr;
1170

1171
  for (int32_t k = 0; k < pGroupbyExpr->numOfGroupCols; ++k) {
1172
    SColIndex* pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, k);
Y
TD-1230  
yihaoDeng 已提交
1173
    if (TSDB_COL_IS_TAG(pColIndex->flag)) {
1174 1175
      continue;
    }
1176

1177
    int16_t colIndex = -1;
1178
    int32_t colId = pColIndex->colId;
1179

1180
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
1181
      if (pQuery->colList[i].colId == colId) {
1182 1183 1184 1185
        colIndex = i;
        break;
      }
    }
1186

1187
    assert(colIndex >= 0 && colIndex < pQuery->numOfCols);
1188

1189 1190
    *type = pQuery->colList[colIndex].type;
    *bytes = pQuery->colList[colIndex].bytes;
1191
    /*
1192 1193 1194
     *  the colIndex is acquired from the first tables of all qualified tables in this vnode during query prepare
     * stage, the remain tables may not have the required column in cache actually. So, the validation of required
     * column in cache with the corresponding schema is reinforced.
1195
     */
S
TD-1057  
Shengliang Guan 已提交
1196
    int32_t numOfCols = (int32_t)taosArrayGetSize(pDataBlock);
1197

1198 1199 1200 1201 1202 1203
    for (int32_t i = 0; i < numOfCols; ++i) {
      SColumnInfoData *p = taosArrayGet(pDataBlock, i);
      if (pColIndex->colId == p->info.colId) {
        return p->pData;
      }
    }
1204
  }
1205

1206
  return NULL;
1207 1208 1209 1210
}

static int32_t doTSJoinFilter(SQueryRuntimeEnv *pRuntimeEnv, int32_t offset) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
1211

1212 1213
  STSElem         elem = tsBufGetElem(pRuntimeEnv->pTSBuf);
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1214

1215
  // compare tag first
H
Haojun Liao 已提交
1216
  if (tVariantCompare(&pCtx[0].tag, elem.tag) != 0) {
1217 1218
    return TS_JOIN_TAG_NOT_EQUALS;
  }
1219

S
TD-1057  
Shengliang Guan 已提交
1220
  TSKEY key = *(TSKEY *)((char*)pCtx[0].aInputElemBuf + TSDB_KEYSIZE * offset);
1221 1222

#if defined(_DEBUG_VIEW)
1223
  printf("elem in comp ts file:%" PRId64 ", key:%" PRId64 ", tag:%"PRIu64", query order:%d, ts order:%d, traverse:%d, index:%d\n",
H
Haojun Liao 已提交
1224
         elem.ts, key, elem.tag.i64Key, pQuery->order.order, pRuntimeEnv->pTSBuf->tsOrder,
1225 1226
         pRuntimeEnv->pTSBuf->cur.order, pRuntimeEnv->pTSBuf->cur.tsIndex);
#endif
1227

1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (key < elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key > elem.ts) {
      assert(false);
    }
  } else {
    if (key > elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key < elem.ts) {
      assert(false);
    }
  }
1241

1242 1243 1244 1245
  return TS_JOIN_TS_EQUAL;
}

static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId) {
H
Haojun Liao 已提交
1246
  SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx);
H
hjxilinx 已提交
1247
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
1248 1249 1250 1251 1252

  // in case of timestamp column, always generated results.
  if (functionId == TSDB_FUNC_TS) {
    return true;
  }
1253

1254 1255 1256
  if (pResInfo->complete || functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TS_DUMMY) {
    return false;
  }
1257

1258
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_FIRST) {
H
hjxilinx 已提交
1259 1260
    return QUERY_IS_ASC_QUERY(pQuery);
  }
1261

H
Haojun Liao 已提交
1262
  // denote the order type
1263 1264 1265 1266
  if ((functionId == TSDB_FUNC_LAST_DST || functionId == TSDB_FUNC_LAST)) {
    return pCtx->param[0].i64Key == pQuery->order.order;
  }

1267
  // in the supplementary scan, only the following functions need to be executed
H
Haojun Liao 已提交
1268
  if (IS_REVERSE_SCAN(pRuntimeEnv)) {
1269 1270
    return false;
  }
1271

1272 1273 1274
  return true;
}

1275 1276
static void rowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis, SDataBlockInfo *pDataBlockInfo,
    SWindowResInfo *pWindowResInfo, SArray *pDataBlock) {
1277
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1278
  bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);
1279

1280
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1281
  STableQueryInfo* item = pQuery->current;
H
Haojun Liao 已提交
1282 1283 1284 1285

  SColumnInfoData* pColumnInfoData = (SColumnInfoData *)taosArrayGet(pDataBlock, 0);

  TSKEY  *tsCols = (pColumnInfoData->info.type == TSDB_DATA_TYPE_TIMESTAMP)? (TSKEY*) pColumnInfoData->pData:NULL;
H
Haojun Liao 已提交
1286 1287
  bool    groupbyColumnValue = pRuntimeEnv->groupbyNormalCol;

1288
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
H
Haojun Liao 已提交
1289 1290 1291
  if (sasArray == NULL) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }
1292

1293 1294
  int16_t type = 0;
  int16_t bytes = 0;
1295

1296
  char *groupbyColumnData = NULL;
H
Haojun Liao 已提交
1297
  if (groupbyColumnValue) {
1298
    groupbyColumnData = getGroupbyColumnData(pQuery, &type, &bytes, pDataBlock);
1299
  }
1300

H
Haojun Liao 已提交
1301
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
1302
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
1303
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
H
Haojun Liao 已提交
1304
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k, pQInfo->vgId);
1305
  }
1306

1307 1308
  // set the input column data
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
1309
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
H
hjxilinx 已提交
1310 1311
    pFilterInfo->pData = getDataBlockImpl(pDataBlock, pFilterInfo->info.colId);
    assert(pFilterInfo->pData != NULL);
1312
  }
1313

1314
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1315

1316 1317 1318
  // from top to bottom in desc
  // from bottom to top in asc order
  if (pRuntimeEnv->pTSBuf != NULL) {
1319
    qDebug("QInfo:%p process data rows, numOfRows:%d, query order:%d, ts comp order:%d", pQInfo, pDataBlockInfo->rows,
1320 1321
           pQuery->order.order, pRuntimeEnv->pTSBuf->cur.order);
  }
1322

1323
  int32_t j = 0;
H
hjxilinx 已提交
1324
  int32_t offset = -1;
1325

1326
  for (j = 0; j < pDataBlockInfo->rows; ++j) {
H
hjxilinx 已提交
1327
    offset = GET_COL_DATA_POS(pQuery, j, step);
1328

1329 1330 1331 1332 1333 1334 1335 1336 1337 1338
    if (pRuntimeEnv->pTSBuf != NULL) {
      int32_t r = doTSJoinFilter(pRuntimeEnv, offset);
      if (r == TS_JOIN_TAG_NOT_EQUALS) {
        break;
      } else if (r == TS_JOIN_TS_NOT_EQUALS) {
        continue;
      } else {
        assert(r == TS_JOIN_TS_EQUAL);
      }
    }
1339

1340
    if (pQuery->numOfFilterCols > 0 && (!doFilterData(pQuery, offset))) {
1341 1342
      continue;
    }
1343

1344
    // interval window query, decide the time window according to the primary timestamp
H
Haojun Liao 已提交
1345
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1346
      int64_t     ts = tsCols[offset];
1347
      STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
1348

1349
      bool hasTimeWindow = false;
H
Haojun Liao 已提交
1350
      int32_t ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo, &win, masterScan, &hasTimeWindow);
1351 1352 1353
      if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
        continue;
      }
1354

1355 1356 1357 1358
      if (!hasTimeWindow) {
        continue;
      }

1359 1360
      bool closed = getTimeWindowResStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
      doRowwiseApplyFunctions(pRuntimeEnv, closed, &win, offset);
1361

1362 1363
      STimeWindow nextWin = win;
      int32_t     index = pWindowResInfo->curIndex;
1364

1365
      while (1) {
H
Haojun Liao 已提交
1366
        getNextTimeWindow(pQuery, &nextWin);
1367
        if ((nextWin.skey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
H
Haojun Liao 已提交
1368
            (nextWin.ekey < pQuery->window.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
1369 1370
          break;
        }
1371

1372 1373 1374
        if (ts < nextWin.skey || ts > nextWin.ekey) {
          break;
        }
1375

1376
        // null data, failed to allocate more memory buffer
H
Haojun Liao 已提交
1377
        hasTimeWindow = false;
H
Haojun Liao 已提交
1378
        if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo, &nextWin, masterScan, &hasTimeWindow) != TSDB_CODE_SUCCESS) {
1379 1380
          break;
        }
1381

1382
        if (hasTimeWindow) {
1383 1384
          closed = getTimeWindowResStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
          doRowwiseApplyFunctions(pRuntimeEnv, closed, &nextWin, offset);
1385
        }
1386
      }
1387

1388 1389 1390
      pWindowResInfo->curIndex = index;
    } else {  // other queries
      // decide which group this rows belongs to according to current state value
H
Haojun Liao 已提交
1391
      if (groupbyColumnValue) {
H
hjxilinx 已提交
1392
        char *val = groupbyColumnData + bytes * offset;
1393

1394
        int32_t ret = setGroupResultOutputBuf(pRuntimeEnv, val, type, bytes, item->groupIndex);
1395 1396 1397 1398
        if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
          continue;
        }
      }
1399

1400
      for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
Haojun Liao 已提交
1401
        int32_t functionId = pQuery->pExpr1[k].base.functionId;
1402 1403 1404 1405 1406
        if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
          aAggs[functionId].xFunctionF(&pCtx[k], offset);
        }
      }
    }
1407

1408 1409 1410
    if (pRuntimeEnv->pTSBuf != NULL) {
      // if timestamp filter list is empty, quit current query
      if (!tsBufNextPos(pRuntimeEnv->pTSBuf)) {
H
hjxilinx 已提交
1411
        setQueryStatus(pQuery, QUERY_COMPLETED);
1412 1413 1414 1415
        break;
      }
    }
  }
H
Haojun Liao 已提交
1416 1417 1418 1419 1420 1421 1422 1423

  assert(offset >= 0);
  if (tsCols != NULL) {
    item->lastKey = tsCols[offset] + step;
  } else {
    item->lastKey = (QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->window.ekey:pDataBlockInfo->window.skey) + step;
  }

H
Haojun Liao 已提交
1424 1425 1426
  if (pRuntimeEnv->pTSBuf != NULL) {
    item->cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);
  }
H
Haojun Liao 已提交
1427

1428 1429
  // todo refactor: extract method
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1430
    if (pQuery->pExpr1[i].base.functionId != TSDB_FUNC_ARITHM) {
1431 1432
      continue;
    }
1433

S
TD-1848  
Shengliang Guan 已提交
1434
    tfree(sasArray[i].data);
1435
  }
1436

1437 1438 1439 1440
  free(sasArray);
}

static int32_t tableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo,
H
hjxilinx 已提交
1441
                                          SDataStatis *pStatis, __block_search_fn_t searchFn, SArray *pDataBlock) {
H
hjxilinx 已提交
1442
  SQuery *pQuery = pRuntimeEnv->pQuery;
1443

H
hjxilinx 已提交
1444 1445
  STableQueryInfo* pTableQInfo = pQuery->current;
  SWindowResInfo*  pWindowResInfo = &pRuntimeEnv->windowResInfo;
1446

H
Haojun Liao 已提交
1447
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->groupbyNormalCol) {
1448
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
1449
  } else {
1450
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
1451
  }
1452

1453
  // update the lastkey of current table
1454
  TSKEY lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pDataBlockInfo->window.ekey : pDataBlockInfo->window.skey;
H
hjxilinx 已提交
1455
  pTableQInfo->lastKey = lastKey + GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1456

1457
  // interval query with limit applied
1458
  int32_t numOfRes = 0;
1459
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyNormalCol) {
1460 1461
    numOfRes = doCheckQueryCompleted(pRuntimeEnv, lastKey, pWindowResInfo);
  } else {
S
TD-1057  
Shengliang Guan 已提交
1462
    numOfRes = (int32_t)getNumOfResult(pRuntimeEnv);
1463

1464 1465 1466 1467
    // update the number of output result
    if (numOfRes > 0 && pQuery->checkBuffer == 1) {
      assert(numOfRes >= pQuery->rec.rows);
      pQuery->rec.rows = numOfRes;
1468

1469 1470 1471
      if (numOfRes >= pQuery->rec.threshold) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
      }
1472

1473 1474 1475
      if ((pQuery->limit.limit >= 0) && (pQuery->limit.limit + pQuery->limit.offset) <= numOfRes) {
        setQueryStatus(pQuery, QUERY_COMPLETED);
      }
1476 1477 1478 1479 1480

      if (((pTableQInfo->lastKey > pTableQInfo->win.ekey) && QUERY_IS_ASC_QUERY(pQuery)) ||
          ((pTableQInfo->lastKey < pTableQInfo->win.ekey) && (!QUERY_IS_ASC_QUERY(pQuery)))) {
        setQueryStatus(pQuery, QUERY_COMPLETED);
      }
H
Haojun Liao 已提交
1481
    }
1482
  }
1483

1484
  return numOfRes;
1485 1486
}

H
Haojun Liao 已提交
1487
void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
H
Haojun Liao 已提交
1488
                   SDataStatis *pStatis, void *param, int32_t colIndex, int32_t vgId) {
1489

H
Haojun Liao 已提交
1490 1491
  int32_t functionId = pQuery->pExpr1[colIndex].base.functionId;
  int32_t colId = pQuery->pExpr1[colIndex].base.colInfo.colId;
1492

1493
  SDataStatis *tpField = NULL;
H
Haojun Liao 已提交
1494
  pCtx->hasNull = hasNullValue(&pQuery->pExpr1[colIndex].base.colInfo, pStatis, &tpField);
1495
  pCtx->aInputElemBuf = inputData;
1496

1497
  if (tpField != NULL) {
H
Haojun Liao 已提交
1498
    pCtx->preAggVals.isSet  = true;
1499 1500
    pCtx->preAggVals.statis = *tpField;
    assert(pCtx->preAggVals.statis.numOfNull <= pBlockInfo->rows);
1501 1502 1503
  } else {
    pCtx->preAggVals.isSet = false;
  }
1504

H
Haojun Liao 已提交
1505 1506
  pCtx->preAggVals.dataBlockLoaded = (inputData != NULL);

H
Haojun Liao 已提交
1507 1508
  // limit/offset query will affect this value
  pCtx->size = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->rows - pQuery->pos : pQuery->pos + 1;
1509

H
Haojun Liao 已提交
1510
  // minimum value no matter ascending/descending order query
H
Haojun Liao 已提交
1511 1512
  pCtx->startOffset = QUERY_IS_ASC_QUERY(pQuery) ? pQuery->pos: (pQuery->pos - pCtx->size + 1);
  assert(pCtx->startOffset >= 0);
H
Haojun Liao 已提交
1513

1514 1515
  uint32_t status = aAggs[functionId].nStatus;
  if (((status & (TSDB_FUNCSTATE_SELECTIVITY | TSDB_FUNCSTATE_NEED_TS)) != 0) && (tsCol != NULL)) {
H
Haojun Liao 已提交
1516
    pCtx->ptsList = &tsCol[pCtx->startOffset];
1517
  }
1518

1519 1520 1521 1522 1523
  if (functionId >= TSDB_FUNC_FIRST_DST && functionId <= TSDB_FUNC_LAST_DST) {
    // last_dist or first_dist function
    // store the first&last timestamp into the intermediate buffer [1], the true
    // value may be null but timestamp will never be null
  } else if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_TWA ||
1524
             functionId == TSDB_FUNC_DIFF || (functionId >= TSDB_FUNC_RATE && functionId <= TSDB_FUNC_AVG_IRATE)) {
1525
    /*
H
Haojun Liao 已提交
1526
     * least squares function needs two columns of input, currently, the x value of linear equation is set to
H
Haojun Liao 已提交
1527
     * timestamp column, and the y-value is the column specified in pQuery->pExpr1[i].colIdxInBuffer
1528 1529 1530 1531 1532
     *
     * top/bottom function needs timestamp to indicate when the
     * top/bottom values emerge, so does diff function
     */
    if (functionId == TSDB_FUNC_TWA) {
H
Haojun Liao 已提交
1533 1534
      SResultRowCellInfo* pInfo = GET_RES_INFO(pCtx);
      STwaInfo *pTWAInfo = (STwaInfo*) GET_ROWCELL_INTERBUF(pInfo);
1535 1536 1537
      pTWAInfo->SKey = pQuery->window.skey;
      pTWAInfo->EKey = pQuery->window.ekey;
    }
1538

1539 1540
  } else if (functionId == TSDB_FUNC_ARITHM) {
    pCtx->param[1].pz = param;
H
Haojun Liao 已提交
1541 1542 1543 1544 1545 1546
  } else if (functionId == TSDB_FUNC_SPREAD) {  // set the statistics data for primary time stamp column
    if (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
      pCtx->preAggVals.isSet  = true;
      pCtx->preAggVals.statis.min = pBlockInfo->window.skey;
      pCtx->preAggVals.statis.max = pBlockInfo->window.ekey;
    }
1547
  } else if (functionId == TSDB_FUNC_INTERP) {
H
Haojun Liao 已提交
1548 1549 1550
    SResultRowCellInfo* pInfo = GET_RES_INFO(pCtx);

    SInterpInfoDetail *pInterpInfo = (SInterpInfoDetail *)GET_ROWCELL_INTERBUF(pInfo);
S
TD-1057  
Shengliang Guan 已提交
1551
    pInterpInfo->type = (int8_t)pQuery->fillType;
1552 1553
    pInterpInfo->ts = pQuery->window.skey;
    pInterpInfo->primaryCol = (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX);
1554

1555 1556 1557 1558
    if (pQuery->fillVal != NULL) {
      if (isNull((const char*) &pQuery->fillVal[colIndex], pCtx->inputType)) {
        pCtx->param[1].nType = TSDB_DATA_TYPE_NULL;
      } else { // todo refactor, tVariantCreateFromBinary should handle the NULL value
H
Haojun Liao 已提交
1559 1560 1561
        if (pCtx->inputType != TSDB_DATA_TYPE_BINARY && pCtx->inputType != TSDB_DATA_TYPE_NCHAR) {
          tVariantCreateFromBinary(&pCtx->param[1], (char*) &pQuery->fillVal[colIndex], pCtx->inputBytes, pCtx->inputType);
        }
1562 1563
      }
    }
H
Haojun Liao 已提交
1564 1565 1566
  } else if (functionId == TSDB_FUNC_TS_COMP) {
    pCtx->param[0].i64Key = vgId;
    pCtx->param[0].nType = TSDB_DATA_TYPE_BIGINT;
1567
  }
1568

1569 1570 1571 1572 1573 1574
#if defined(_DEBUG_VIEW)
  //  int64_t *tsList = (int64_t *)primaryColumnData;
//  int64_t  s = tsList[0];
//  int64_t  e = tsList[size - 1];

//    if (IS_DATA_BLOCK_LOADED(blockStatus)) {
1575
//        qDebug("QInfo:%p query ts:%lld-%lld, offset:%d, rows:%d, bstatus:%d,
1576 1577 1578
//        functId:%d", GET_QINFO_ADDR(pQuery),
//               s, e, startOffset, size, blockStatus, functionId);
//    } else {
1579
//        qDebug("QInfo:%p block not loaded, bstatus:%d",
1580 1581 1582 1583 1584 1585
//        GET_QINFO_ADDR(pQuery), blockStatus);
//    }
#endif
}

// set the output buffer for the selectivity + tag query
H
Haojun Liao 已提交
1586
static int32_t setCtxTagColumnInfo(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx) {
H
Haojun Liao 已提交
1587 1588
  SQuery* pQuery = pRuntimeEnv->pQuery;

1589
  if (isSelectivityWithTagsQuery(pQuery)) {
1590
    int32_t num = 0;
1591
    int16_t tagLen = 0;
1592

1593
    SQLFunctionCtx *p = NULL;
1594
    SQLFunctionCtx **pTagCtx = calloc(pQuery->numOfOutput, POINTER_BYTES);
H
Haojun Liao 已提交
1595 1596 1597
    if (pTagCtx == NULL) {
      return TSDB_CODE_QRY_OUT_OF_MEMORY;
    }
H
Haojun Liao 已提交
1598

1599
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1600
      SSqlFuncMsg *pSqlFuncMsg = &pQuery->pExpr1[i].base;
1601

1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614
      if (pSqlFuncMsg->functionId == TSDB_FUNC_TAG_DUMMY || pSqlFuncMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        tagLen += pCtx[i].outputBytes;
        pTagCtx[num++] = &pCtx[i];
      } else if ((aAggs[pSqlFuncMsg->functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
        p = &pCtx[i];
      } else if (pSqlFuncMsg->functionId == TSDB_FUNC_TS || pSqlFuncMsg->functionId == TSDB_FUNC_TAG) {
        // tag function may be the group by tag column
        // ts may be the required primary timestamp column
        continue;
      } else {
        // the column may be the normal column, group by normal_column, the functionId is TSDB_FUNC_PRJ
      }
    }
dengyihao's avatar
dengyihao 已提交
1615 1616 1617 1618 1619
    if (p != NULL) {
      p->tagInfo.pTagCtxList = pTagCtx;
      p->tagInfo.numOfTagCols = num;
      p->tagInfo.tagsLen = tagLen;
    } else {
S
TD-1848  
Shengliang Guan 已提交
1620
      tfree(pTagCtx);
dengyihao's avatar
dengyihao 已提交
1621
    }
1622
  }
H
Haojun Liao 已提交
1623 1624

  return TSDB_CODE_SUCCESS;
1625 1626
}

1627
static int32_t setupQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv, int16_t order) {
1628
  qDebug("QInfo:%p setup runtime env", GET_QINFO_ADDR(pRuntimeEnv));
1629 1630
  SQuery *pQuery = pRuntimeEnv->pQuery;

1631
  pRuntimeEnv->pCtx = (SQLFunctionCtx *)calloc(pQuery->numOfOutput, sizeof(SQLFunctionCtx));
H
Haojun Liao 已提交
1632
  pRuntimeEnv->offset = calloc(pQuery->numOfOutput, sizeof(int16_t));
H
Haojun Liao 已提交
1633
  pRuntimeEnv->rowCellInfoOffset = calloc(pQuery->numOfOutput, sizeof(int32_t));
1634

H
Haojun Liao 已提交
1635
  if (pRuntimeEnv->offset == NULL || pRuntimeEnv->pCtx == NULL || pRuntimeEnv->rowCellInfoOffset == NULL) {
1636
    goto _clean;
1637
  }
1638

1639
  pRuntimeEnv->offset[0] = 0;
1640
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1641
    SSqlFuncMsg *pSqlFuncMsg = &pQuery->pExpr1[i].base;
1642

1643
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1644
    SColIndex* pIndex = &pSqlFuncMsg->colInfo;
1645

Y
TD-1230  
yihaoDeng 已提交
1646
    if (TSDB_COL_REQ_NULL(pIndex->flag)) {
1647 1648
      pCtx->requireNull = true;
      pIndex->flag &= ~(TSDB_COL_NULL);
Y
TD-1230  
yihaoDeng 已提交
1649
    } else {
1650 1651
      pCtx->requireNull = false;
    }
Y
TD-1230  
yihaoDeng 已提交
1652

1653 1654
    int32_t index = pSqlFuncMsg->colInfo.colIndex;
    if (TSDB_COL_IS_TAG(pIndex->flag)) {
1655
      if (pIndex->colId == TSDB_TBNAME_COLUMN_INDEX) {  // todo refactor
H
Haojun Liao 已提交
1656 1657 1658 1659
        SSchema s = tGetTableNameColumnSchema();

        pCtx->inputBytes = s.bytes;
        pCtx->inputType = s.type;
1660 1661 1662 1663
      } else {
        pCtx->inputBytes = pQuery->tagColList[index].bytes;
        pCtx->inputType = pQuery->tagColList[index].type;
      }
H
Haojun Liao 已提交
1664 1665 1666
    } else if (TSDB_COL_IS_UD_COL(pIndex->flag)) {
      pCtx->inputBytes = pSqlFuncMsg->arg[0].argBytes;
      pCtx->inputType = pSqlFuncMsg->arg[0].argType;
1667 1668 1669 1670
    } else {
      pCtx->inputBytes = pQuery->colList[index].bytes;
      pCtx->inputType = pQuery->colList[index].type;
    }
1671

1672
    assert(isValidDataType(pCtx->inputType));
1673
    pCtx->ptsOutputBuf = NULL;
1674

H
Haojun Liao 已提交
1675 1676
    pCtx->outputBytes  = pQuery->pExpr1[i].bytes;
    pCtx->outputType   = pQuery->pExpr1[i].type;
1677

H
Haojun Liao 已提交
1678 1679 1680
    pCtx->order        = pQuery->order.order;
    pCtx->functionId   = pSqlFuncMsg->functionId;
    pCtx->stableQuery  = pRuntimeEnv->stableQuery;
H
Haojun Liao 已提交
1681
    pCtx->interBufBytes = pQuery->pExpr1[i].interBytes;
1682

H
Haojun Liao 已提交
1683
    pCtx->numOfParams  = pSqlFuncMsg->numOfParams;
1684 1685 1686 1687
    for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
      int16_t type = pSqlFuncMsg->arg[j].argType;
      int16_t bytes = pSqlFuncMsg->arg[j].argBytes;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
1688
        tVariantCreateFromBinary(&pCtx->param[j], pSqlFuncMsg->arg[j].argValue.pz, bytes, type);
1689 1690 1691 1692
      } else {
        tVariantCreateFromBinary(&pCtx->param[j], (char *)&pSqlFuncMsg->arg[j].argValue.i64, bytes, type);
      }
    }
1693

1694 1695
    // set the order information for top/bottom query
    int32_t functionId = pCtx->functionId;
1696

1697
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
H
Haojun Liao 已提交
1698
      int32_t f = pQuery->pExpr1[0].base.functionId;
1699
      assert(f == TSDB_FUNC_TS || f == TSDB_FUNC_TS_DUMMY);
1700

1701 1702 1703 1704
      pCtx->param[2].i64Key = order;
      pCtx->param[2].nType = TSDB_DATA_TYPE_BIGINT;
      pCtx->param[3].i64Key = functionId;
      pCtx->param[3].nType = TSDB_DATA_TYPE_BIGINT;
1705

1706 1707
      pCtx->param[1].i64Key = pQuery->order.orderColId;
    }
1708

1709 1710
    if (i > 0) {
      pRuntimeEnv->offset[i] = pRuntimeEnv->offset[i - 1] + pRuntimeEnv->pCtx[i - 1].outputBytes;
H
Haojun Liao 已提交
1711
      pRuntimeEnv->rowCellInfoOffset[i] = pRuntimeEnv->rowCellInfoOffset[i - 1] + sizeof(SResultRowCellInfo) + pQuery->pExpr1[i - 1].interBytes;
1712
    }
H
Haojun Liao 已提交
1713

1714
  }
1715

1716
  // if it is group by normal column, do not set output buffer, the output buffer is pResult
1717 1718
  // fixed output query/multi-output query for normal table
  if (!pRuntimeEnv->groupbyNormalCol && !pRuntimeEnv->stableQuery && !QUERY_IS_INTERVAL_QUERY(pRuntimeEnv->pQuery)) {
H
Haojun Liao 已提交
1719
    resetDefaultResInfoOutputBuf(pRuntimeEnv);
1720
  }
1721

H
Haojun Liao 已提交
1722 1723 1724
  if (setCtxTagColumnInfo(pRuntimeEnv, pRuntimeEnv->pCtx) != TSDB_CODE_SUCCESS) {
    goto _clean;
  }
1725

H
Haojun Liao 已提交
1726
  qDebug("QInfo:%p init runtime completed", GET_QINFO_ADDR(pRuntimeEnv));
1727
  return TSDB_CODE_SUCCESS;
1728

1729
_clean:
S
TD-1848  
Shengliang Guan 已提交
1730
  tfree(pRuntimeEnv->pCtx);
H
Haojun Liao 已提交
1731 1732
  tfree(pRuntimeEnv->offset);
  tfree(pRuntimeEnv->rowCellInfoOffset);
1733

1734
  return TSDB_CODE_QRY_OUT_OF_MEMORY;
1735 1736
}

H
Haojun Liao 已提交
1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749
static void doFreeQueryHandle(SQInfo* pQInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;

  tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
  tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);

  pRuntimeEnv->pQueryHandle = NULL;
  pRuntimeEnv->pSecQueryHandle = NULL;

  SMemRef* pMemRef = &pQInfo->memRef;
  assert(pMemRef->ref == 0 && pMemRef->imem == NULL && pMemRef->mem == NULL);
}

1750 1751 1752 1753
static void teardownQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv) {
  if (pRuntimeEnv->pQuery == NULL) {
    return;
  }
1754

1755
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1756
  SQInfo* pQInfo = (SQInfo*) GET_QINFO_ADDR(pRuntimeEnv);
1757

1758
  qDebug("QInfo:%p teardown runtime env", pQInfo);
H
Haojun Liao 已提交
1759
  cleanupTimeWindowInfo(&pRuntimeEnv->windowResInfo);
1760

1761
  if (pRuntimeEnv->pCtx != NULL) {
1762
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1763
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1764

1765 1766 1767
      for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
        tVariantDestroy(&pCtx->param[j]);
      }
1768

1769
      tVariantDestroy(&pCtx->tag);
S
TD-1848  
Shengliang Guan 已提交
1770
      tfree(pCtx->tagInfo.pTagCtxList);
1771
    }
1772

S
TD-1848  
Shengliang Guan 已提交
1773
    tfree(pRuntimeEnv->pCtx);
1774
  }
1775

1776
  pRuntimeEnv->pFillInfo = taosDestroyFillInfo(pRuntimeEnv->pFillInfo);
1777

H
Haojun Liao 已提交
1778
  destroyResultBuf(pRuntimeEnv->pResultBuf);
H
Haojun Liao 已提交
1779
  doFreeQueryHandle(pQInfo);
1780

H
Haojun Liao 已提交
1781
  pRuntimeEnv->pTSBuf = tsBufDestroy(pRuntimeEnv->pTSBuf);
H
Haojun Liao 已提交
1782 1783

  tfree(pRuntimeEnv->offset);
S
TD-1848  
Shengliang Guan 已提交
1784 1785
  tfree(pRuntimeEnv->keyBuf);
  tfree(pRuntimeEnv->rowCellInfoOffset);
H
Haojun Liao 已提交
1786

H
Haojun Liao 已提交
1787 1788
  taosHashCleanup(pRuntimeEnv->pResultRowHashTable);
  pRuntimeEnv->pResultRowHashTable = NULL;
1789

H
Haojun Liao 已提交
1790
  pRuntimeEnv->pool = destroyResultRowPool(pRuntimeEnv->pool);
1791 1792
}

H
Haojun Liao 已提交
1793
#define IS_QUERY_KILLED(_q) ((_q)->code == TSDB_CODE_TSC_QUERY_CANCELLED)
1794

H
Haojun Liao 已提交
1795
static void setQueryKilled(SQInfo *pQInfo) { pQInfo->code = TSDB_CODE_TSC_QUERY_CANCELLED;}
H
hjxilinx 已提交
1796

H
Haojun Liao 已提交
1797 1798 1799
static bool isFixedOutputQuery(SQueryRuntimeEnv* pRuntimeEnv) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1800 1801
    return false;
  }
1802

1803
  // Note:top/bottom query is fixed output query
H
Haojun Liao 已提交
1804
  if (pRuntimeEnv->topBotQuery || pRuntimeEnv->groupbyNormalCol) {
1805 1806
    return true;
  }
1807

1808
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1809
    SSqlFuncMsg *pExprMsg = &pQuery->pExpr1[i].base;
1810

1811 1812
    // ignore the ts_comp function
    if (i == 0 && pExprMsg->functionId == TSDB_FUNC_PRJ && pExprMsg->numOfParams == 1 &&
1813
        pExprMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
1814 1815
      continue;
    }
1816

1817 1818 1819
    if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
1820

1821 1822 1823 1824
    if (!IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus)) {
      return true;
    }
  }
1825

1826 1827 1828
  return false;
}

1829
// todo refactor with isLastRowQuery
1830
bool isPointInterpoQuery(SQuery *pQuery) {
1831
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1832
    int32_t functionID = pQuery->pExpr1[i].base.functionId;
1833
    if (functionID == TSDB_FUNC_INTERP) {
1834 1835 1836
      return true;
    }
  }
1837

1838 1839 1840 1841
  return false;
}

// TODO REFACTOR:MERGE WITH CLIENT-SIDE FUNCTION
H
hjxilinx 已提交
1842
static bool isSumAvgRateQuery(SQuery *pQuery) {
1843
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1844
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
1845 1846 1847
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
1848

1849 1850 1851 1852 1853
    if (functionId == TSDB_FUNC_SUM_RATE || functionId == TSDB_FUNC_SUM_IRATE || functionId == TSDB_FUNC_AVG_RATE ||
        functionId == TSDB_FUNC_AVG_IRATE) {
      return true;
    }
  }
1854

1855 1856 1857
  return false;
}

H
hjxilinx 已提交
1858
static bool isFirstLastRowQuery(SQuery *pQuery) {
1859
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1860
    int32_t functionID = pQuery->pExpr1[i].base.functionId;
1861 1862 1863 1864
    if (functionID == TSDB_FUNC_LAST_ROW) {
      return true;
    }
  }
1865

1866 1867 1868
  return false;
}

H
hjxilinx 已提交
1869
static bool needReverseScan(SQuery *pQuery) {
1870
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1871
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
1872 1873 1874
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG) {
      continue;
    }
1875

1876
    if ((functionId == TSDB_FUNC_FIRST || functionId == TSDB_FUNC_FIRST_DST) && !QUERY_IS_ASC_QUERY(pQuery)) {
1877 1878
      return true;
    }
1879 1880

    if (functionId == TSDB_FUNC_LAST || functionId == TSDB_FUNC_LAST_DST) {
H
Haojun Liao 已提交
1881
      // the scan order to acquire the last result of the specified column
H
Haojun Liao 已提交
1882
      int32_t order = (int32_t)pQuery->pExpr1[i].base.arg->argValue.i64;
H
Haojun Liao 已提交
1883 1884 1885
      if (order != pQuery->order.order) {
        return true;
      }
1886
    }
1887
  }
1888

1889 1890
  return false;
}
H
hjxilinx 已提交
1891

H
Haojun Liao 已提交
1892 1893 1894 1895
/**
 * The following 4 kinds of query are treated as the tags query
 * tagprj, tid_tag query, count(tbname), 'abc' (user defined constant value column) query
 */
H
hjxilinx 已提交
1896 1897
static bool onlyQueryTags(SQuery* pQuery) {
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1898
    SExprInfo* pExprInfo = &pQuery->pExpr1[i];
H
Haojun Liao 已提交
1899 1900

    int32_t functionId = pExprInfo->base.functionId;
H
Haojun Liao 已提交
1901 1902 1903 1904

    if (functionId != TSDB_FUNC_TAGPRJ &&
        functionId != TSDB_FUNC_TID_TAG &&
        (!(functionId == TSDB_FUNC_COUNT && pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX)) &&
Y
TD-1230  
yihaoDeng 已提交
1905
        (!(functionId == TSDB_FUNC_PRJ && TSDB_COL_IS_UD_COL(pExprInfo->base.colInfo.flag)))) {
H
hjxilinx 已提交
1906 1907 1908
      return false;
    }
  }
1909

H
hjxilinx 已提交
1910 1911 1912
  return true;
}

1913 1914
/////////////////////////////////////////////////////////////////////////////////////////////

H
Haojun Liao 已提交
1915
void getAlignQueryTimeWindow(SQuery *pQuery, int64_t key, int64_t keyFirst, int64_t keyLast, STimeWindow *win) {
1916 1917
  assert(key >= keyFirst && key <= keyLast && pQuery->interval.sliding <= pQuery->interval.interval);
  win->skey = taosTimeTruncate(key, &pQuery->interval, pQuery->precision);
H
Haojun Liao 已提交
1918 1919

  /*
1920
   * if the realSkey > INT64_MAX - pQuery->interval.interval, the query duration between
H
Haojun Liao 已提交
1921 1922
   * realSkey and realEkey must be less than one interval.Therefore, no need to adjust the query ranges.
   */
1923 1924
  if (keyFirst > (INT64_MAX - pQuery->interval.interval)) {
    assert(keyLast - keyFirst < pQuery->interval.interval);
1925
    win->ekey = INT64_MAX;
1926 1927
  } else if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
    win->ekey = taosTimeAdd(win->skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
H
Haojun Liao 已提交
1928
  } else {
1929
    win->ekey = win->skey + pQuery->interval.interval - 1;
1930 1931 1932 1933 1934
  }
}

static void setScanLimitationByResultBuffer(SQuery *pQuery) {
  if (isTopBottomQuery(pQuery)) {
1935
    pQuery->checkBuffer = 0;
1936
  } else if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
1937
    pQuery->checkBuffer = 0;
1938 1939
  } else {
    bool hasMultioutput = false;
1940
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1941
      SSqlFuncMsg *pExprMsg = &pQuery->pExpr1[i].base;
1942 1943 1944
      if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        continue;
      }
1945

1946 1947 1948 1949 1950
      hasMultioutput = IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus);
      if (!hasMultioutput) {
        break;
      }
    }
1951

1952
    pQuery->checkBuffer = hasMultioutput ? 1 : 0;
1953 1954 1955 1956 1957 1958
  }
}

/*
 * todo add more parameters to check soon..
 */
1959
bool colIdCheck(SQuery *pQuery) {
1960 1961
  // load data column information is incorrect
  for (int32_t i = 0; i < pQuery->numOfCols - 1; ++i) {
1962
    if (pQuery->colList[i].colId == pQuery->colList[i + 1].colId) {
S
slguan 已提交
1963
      qError("QInfo:%p invalid data load column for query", GET_QINFO_ADDR(pQuery));
1964 1965 1966
      return false;
    }
  }
1967

1968 1969 1970 1971 1972 1973
  return true;
}

// todo ignore the avg/sum/min/max/count/stddev/top/bottom functions, of which
// the scan order is not matter
static bool onlyOneQueryType(SQuery *pQuery, int32_t functId, int32_t functIdDst) {
1974
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1975
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
1976

1977 1978 1979 1980
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG ||
        functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
1981

1982 1983 1984 1985
    if (functionId != functId && functionId != functIdDst) {
      return false;
    }
  }
1986

1987 1988 1989 1990 1991 1992 1993
  return true;
}

static bool onlyFirstQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_FIRST, TSDB_FUNC_FIRST_DST); }

static bool onlyLastQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_LAST, TSDB_FUNC_LAST_DST); }

H
Haojun Liao 已提交
1994
// todo refactor, add iterator
1995 1996
static void doExchangeTimeWindow(SQInfo* pQInfo, STimeWindow* win) {
  size_t t = taosArrayGetSize(pQInfo->tableGroupInfo.pGroupList);
H
Haojun Liao 已提交
1997
  for(int32_t i = 0; i < t; ++i) {
1998
    SArray* p1 = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, i);
H
Haojun Liao 已提交
1999 2000 2001

    size_t len = taosArrayGetSize(p1);
    for(int32_t j = 0; j < len; ++j) {
2002
      STableKeyInfo* pInfo = taosArrayGet(p1, j);
H
Haojun Liao 已提交
2003

2004 2005 2006 2007
      // update the new lastkey if it is equalled to the value of the old skey
      if (pInfo->lastKey == win->ekey) {
        pInfo->lastKey = win->skey;
      }
H
Haojun Liao 已提交
2008 2009 2010 2011
    }
  }
}

2012
static void changeExecuteScanOrder(SQInfo *pQInfo, SQueryTableMsg* pQueryMsg, bool stableQuery) {
H
Haojun Liao 已提交
2013 2014
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;

2015 2016 2017
  // in case of point-interpolation query, use asc order scan
  char msg[] = "QInfo:%p scan order changed for %s query, old:%d, new:%d, qrange exchanged, old qrange:%" PRId64
               "-%" PRId64 ", new qrange:%" PRId64 "-%" PRId64;
2018

2019 2020
  // todo handle the case the the order irrelevant query type mixed up with order critical query type
  // descending order query for last_row query
H
Haojun Liao 已提交
2021
  if (isFirstLastRowQuery(pQuery)) {
H
Haojun Liao 已提交
2022
    qDebug("QInfo:%p scan order changed for last_row query, old:%d, new:%d", pQInfo, pQuery->order.order, TSDB_ORDER_ASC);
2023

H
Haojun Liao 已提交
2024
    pQuery->order.order = TSDB_ORDER_ASC;
H
Haojun Liao 已提交
2025 2026 2027
    if (pQuery->window.skey > pQuery->window.ekey) {
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
2028

2029 2030
    return;
  }
2031

H
Haojun Liao 已提交
2032
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr) && pQuery->order.order == TSDB_ORDER_DESC) {
H
Haojun Liao 已提交
2033
    pQuery->order.order = TSDB_ORDER_ASC;
H
Haojun Liao 已提交
2034 2035 2036
    if (pQuery->window.skey > pQuery->window.ekey) {
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
H
Haojun Liao 已提交
2037

2038
    doExchangeTimeWindow(pQInfo, &pQuery->window);
H
Haojun Liao 已提交
2039 2040 2041
    return;
  }

2042
  if (isPointInterpoQuery(pQuery) && pQuery->interval.interval == 0) {
H
Haojun Liao 已提交
2043 2044 2045 2046 2047
    if (!QUERY_IS_ASC_QUERY(pQuery)) {
      qDebug(msg, GET_QINFO_ADDR(pQuery), "interp", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
             pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
2048

2049
    pQuery->order.order = TSDB_ORDER_ASC;
2050 2051
    return;
  }
2052

2053
  if (pQuery->interval.interval == 0) {
2054 2055
    if (onlyFirstQuery(pQuery)) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
2056
        qDebug(msg, GET_QINFO_ADDR(pQuery), "only-first", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
2057 2058
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2059
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2060
        doExchangeTimeWindow(pQInfo, &pQuery->window);
2061
      }
2062

2063
      pQuery->order.order = TSDB_ORDER_ASC;
2064 2065
    } else if (onlyLastQuery(pQuery)) {
      if (QUERY_IS_ASC_QUERY(pQuery)) {
2066
        qDebug(msg, GET_QINFO_ADDR(pQuery), "only-last", pQuery->order.order, TSDB_ORDER_DESC, pQuery->window.skey,
2067 2068
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2069
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2070
        doExchangeTimeWindow(pQInfo, &pQuery->window);
2071
      }
2072

2073
      pQuery->order.order = TSDB_ORDER_DESC;
2074
    }
2075

2076
  } else {  // interval query
2077
    if (stableQuery) {
2078 2079
      if (onlyFirstQuery(pQuery)) {
        if (!QUERY_IS_ASC_QUERY(pQuery)) {
2080
          qDebug(msg, GET_QINFO_ADDR(pQuery), "only-first stable", pQuery->order.order, TSDB_ORDER_ASC,
2081 2082
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2083
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2084
          doExchangeTimeWindow(pQInfo, &pQuery->window);
2085
        }
2086

2087
        pQuery->order.order = TSDB_ORDER_ASC;
2088 2089
      } else if (onlyLastQuery(pQuery)) {
        if (QUERY_IS_ASC_QUERY(pQuery)) {
2090
          qDebug(msg, GET_QINFO_ADDR(pQuery), "only-last stable", pQuery->order.order, TSDB_ORDER_DESC,
2091 2092
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2093
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2094
          doExchangeTimeWindow(pQInfo, &pQuery->window);
2095
        }
2096

2097
        pQuery->order.order = TSDB_ORDER_DESC;
2098 2099 2100 2101 2102 2103 2104 2105
      }
    }
  }
}

static int32_t getInitialPageNum(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  int32_t INITIAL_RESULT_ROWS_VALUE = 16;
2106

2107
  int32_t num = 0;
2108

2109 2110
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
    num = 128;
H
Haojun Liao 已提交
2111
  } else if (QUERY_IS_INTERVAL_QUERY(pQuery)) {  // time window query, allocate one page for each table
2112
    size_t s = pQInfo->tableqinfoGroupInfo.numOfTables;
S
TD-1057  
Shengliang Guan 已提交
2113
    num = (int32_t)(MAX(s, INITIAL_RESULT_ROWS_VALUE));
2114 2115
  } else {    // for super table query, one page for each subset
    num = 1;  // pQInfo->pSidSet->numOfSubSet;
2116
  }
2117

2118 2119 2120 2121
  assert(num > 0);
  return num;
}

2122 2123
static void getIntermediateBufInfo(SQueryRuntimeEnv* pRuntimeEnv, int32_t* ps, int32_t* rowsize) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
2124
  int32_t MIN_ROWS_PER_PAGE = 4;
2125

S
TD-1057  
Shengliang Guan 已提交
2126
  *rowsize = (int32_t)(pQuery->rowSize * GET_ROW_PARAM_FOR_MULTIOUTPUT(pQuery, pRuntimeEnv->topBotQuery, pRuntimeEnv->stableQuery));
2127 2128 2129 2130
  int32_t overhead = sizeof(tFilePage);

  // one page contains at least two rows
  *ps = DEFAULT_INTERN_BUF_PAGE_SIZE;
H
Haojun Liao 已提交
2131
  while(((*rowsize) * MIN_ROWS_PER_PAGE) > (*ps) - overhead) {
2132 2133 2134 2135
    *ps = (*ps << 1u);
  }

  pRuntimeEnv->numOfRowsPerPage = ((*ps) - sizeof(tFilePage)) / (*rowsize);
2136
  assert(pRuntimeEnv->numOfRowsPerPage <= MAX_ROWS_PER_RESBUF_PAGE);
2137 2138
}

H
Haojun Liao 已提交
2139
#define IS_PREFILTER_TYPE(_t) ((_t) != TSDB_DATA_TYPE_BINARY && (_t) != TSDB_DATA_TYPE_NCHAR)
2140

H
Haojun Liao 已提交
2141 2142 2143 2144
static bool needToLoadDataBlock(SQueryRuntimeEnv* pRuntimeEnv, SDataStatis *pDataStatis, SQLFunctionCtx *pCtx,
    int32_t numOfRows) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
  if (pDataStatis == NULL || (pQuery->numOfFilterCols == 0 && (!pRuntimeEnv->topBotQuery))) {
2145 2146 2147 2148 2149
    return true;
  }

  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
2150

H
Haojun Liao 已提交
2151 2152 2153 2154 2155 2156 2157 2158
    int32_t index = -1;
    for(int32_t i = 0; i < pQuery->numOfCols; ++i) {
      if (pDataStatis[i].colId == pFilterInfo->info.colId) {
        index = i;
        break;
      }
    }

2159
    // no statistics data, load the true data block
H
Haojun Liao 已提交
2160
    if (index == -1) {
H
Haojun Liao 已提交
2161
      return true;
2162
    }
2163

2164
    // not support pre-filter operation on binary/nchar data type
H
Haojun Liao 已提交
2165
    if (!IS_PREFILTER_TYPE(pFilterInfo->info.type)) {
H
Haojun Liao 已提交
2166
      return true;
2167
    }
2168

2169
    // all data in current column are NULL, no need to check its boundary value
H
Haojun Liao 已提交
2170
    if (pDataStatis[index].numOfNull == numOfRows) {
2171 2172 2173 2174 2175 2176 2177 2178 2179

      // if isNULL query exists, load the null data column
      for (int32_t j = 0; j < pFilterInfo->numOfFilters; ++j) {
        SColumnFilterElem *pFilterElem = &pFilterInfo->pFilters[j];
        if (pFilterElem->fp == isNull_filter) {
          return true;
        }
      }

2180 2181
      continue;
    }
2182

H
Haojun Liao 已提交
2183 2184 2185
    SDataStatis* pDataBlockst = &pDataStatis[index];

    if (pFilterInfo->info.type == TSDB_DATA_TYPE_FLOAT) {
S
TD-1057  
Shengliang Guan 已提交
2186 2187
      float minval = (float)(*(double *)(&pDataBlockst->min));
      float maxval = (float)(*(double *)(&pDataBlockst->max));
2188

2189 2190 2191 2192 2193 2194 2195
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&minval, (char *)&maxval)) {
          return true;
        }
      }
    } else {
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
H
Haojun Liao 已提交
2196
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&pDataBlockst->min, (char *)&pDataBlockst->max)) {
2197 2198 2199 2200 2201
          return true;
        }
      }
    }
  }
2202

H
Haojun Liao 已提交
2203 2204
  if (pRuntimeEnv->topBotQuery) {
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2205
      int32_t functionId = pQuery->pExpr1[i].base.functionId;
H
Haojun Liao 已提交
2206 2207 2208 2209 2210
      if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
        return topbot_datablock_filter(&pCtx[i], functionId, (char *)&pDataStatis[i].min, (char *)&pDataStatis[i].max);
      }
    }
  }
2211

H
Haojun Liao 已提交
2212
  return false;
2213 2214
}

H
Haojun Liao 已提交
2215 2216 2217 2218 2219 2220 2221 2222
static bool overlapWithTimeWindow(SQuery* pQuery, SDataBlockInfo* pBlockInfo) {
  STimeWindow w = {0};

  TSKEY sk = MIN(pQuery->window.skey, pQuery->window.ekey);
  TSKEY ek = MAX(pQuery->window.skey, pQuery->window.ekey);

  if (QUERY_IS_ASC_QUERY(pQuery)) {
    getAlignQueryTimeWindow(pQuery, pBlockInfo->window.skey, sk, ek, &w);
H
Haojun Liao 已提交
2223
    assert(w.ekey >= pBlockInfo->window.skey);
H
Haojun Liao 已提交
2224

H
Haojun Liao 已提交
2225
    if (w.ekey < pBlockInfo->window.ekey) {
H
Haojun Liao 已提交
2226 2227 2228 2229
      return true;
    }

    while(1) {
H
Haojun Liao 已提交
2230
      getNextTimeWindow(pQuery, &w);
H
Haojun Liao 已提交
2231
      if (w.skey > pBlockInfo->window.ekey) {
H
Haojun Liao 已提交
2232 2233 2234
        break;
      }

H
Haojun Liao 已提交
2235 2236
      assert(w.ekey > pBlockInfo->window.ekey);
      if (w.skey <= pBlockInfo->window.ekey && w.skey > pBlockInfo->window.skey) {
H
Haojun Liao 已提交
2237 2238 2239 2240 2241
        return true;
      }
    }
  } else {
    getAlignQueryTimeWindow(pQuery, pBlockInfo->window.ekey, sk, ek, &w);
H
Haojun Liao 已提交
2242 2243 2244
    assert(w.skey <= pBlockInfo->window.ekey);

    if (w.skey > pBlockInfo->window.skey) {
H
Haojun Liao 已提交
2245 2246 2247 2248
      return true;
    }

    while(1) {
H
Haojun Liao 已提交
2249
      getNextTimeWindow(pQuery, &w);
H
Haojun Liao 已提交
2250 2251 2252 2253
      if (w.ekey < pBlockInfo->window.skey) {
        break;
      }

H
Haojun Liao 已提交
2254 2255
      assert(w.skey < pBlockInfo->window.skey);
      if (w.ekey < pBlockInfo->window.ekey && w.ekey >= pBlockInfo->window.skey) {
H
Haojun Liao 已提交
2256 2257 2258 2259 2260 2261 2262 2263
        return true;
      }
    }
  }

  return false;
}

H
Haojun Liao 已提交
2264
int32_t loadDataBlockOnDemand(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo * pWindowResInfo, void* pQueryHandle, SDataBlockInfo* pBlockInfo, SDataStatis **pStatis, SArray** pDataBlock, uint32_t* status) {
2265
  SQuery *pQuery = pRuntimeEnv->pQuery;
2266

H
Haojun Liao 已提交
2267 2268
  *status = BLK_DATA_NO_NEEDED;

H
Haojun Liao 已提交
2269
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf > 0) {
2270
    *status = BLK_DATA_ALL_NEEDED;
H
Haojun Liao 已提交
2271
  } else { // check if this data block is required to load
H
Haojun Liao 已提交
2272

H
Haojun Liao 已提交
2273
    // Calculate all time windows that are overlapping or contain current data block.
2274
    // If current data block is contained by all possible time window, do not load current data block.
H
Haojun Liao 已提交
2275
    if (QUERY_IS_INTERVAL_QUERY(pQuery) && overlapWithTimeWindow(pQuery, pBlockInfo)) {
2276
      *status = BLK_DATA_ALL_NEEDED;
2277
    }
2278

2279
    if ((*status) != BLK_DATA_ALL_NEEDED) {
H
Haojun Liao 已提交
2280 2281 2282 2283 2284 2285 2286 2287 2288
      // the pCtx[i] result is belonged to previous time window since the outputBuf has not been set yet,
      // the filter result may be incorrect. So in case of interval query, we need to set the correct time output buffer
      if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
        bool hasTimeWindow = false;
        bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);

        TSKEY k = QUERY_IS_ASC_QUERY(pQuery)? pBlockInfo->window.skey:pBlockInfo->window.ekey;

        STimeWindow win = getActiveTimeWindow(pWindowResInfo, k, pQuery);
H
Haojun Liao 已提交
2289
        if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pBlockInfo, &win, masterScan, &hasTimeWindow) !=
H
Haojun Liao 已提交
2290 2291 2292 2293 2294
            TSDB_CODE_SUCCESS) {
          // todo handle error in set result for timewindow
        }
      }

H
Haojun Liao 已提交
2295
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2296
        SSqlFuncMsg* pSqlFunc = &pQuery->pExpr1[i].base;
H
Haojun Liao 已提交
2297 2298 2299

        int32_t functionId = pSqlFunc->functionId;
        int32_t colId = pSqlFunc->colInfo.colId;
2300 2301
        (*status) |= aAggs[functionId].dataReqFunc(&pRuntimeEnv->pCtx[i], pBlockInfo->window.skey, pBlockInfo->window.ekey, colId);
        if (((*status) & BLK_DATA_ALL_NEEDED) == BLK_DATA_ALL_NEEDED) {
H
Haojun Liao 已提交
2302 2303 2304
          break;
        }
      }
2305 2306
    }
  }
2307

2308
  if ((*status) == BLK_DATA_NO_NEEDED) {
H
Haojun Liao 已提交
2309 2310
    qDebug("QInfo:%p data block discard, brange:%"PRId64 "-%"PRId64", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
           pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
2311
    pRuntimeEnv->summary.discardBlocks += 1;
2312 2313 2314 2315
  } else if ((*status) == BLK_DATA_STATIS_NEEDED) {

    // this function never returns error?
    tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis);
2316

2317
    pRuntimeEnv->summary.loadBlockStatis += 1;
2318

2319
    if (*pStatis == NULL) { // data block statistics does not exist, load data block
H
Haojun Liao 已提交
2320
      *pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2321
      pRuntimeEnv->summary.totalCheckedRows += pBlockInfo->rows;
2322 2323
    }
  } else {
2324
    assert((*status) == BLK_DATA_ALL_NEEDED);
2325

2326
    // load the data block statistics to perform further filter
H
Haojun Liao 已提交
2327
    pRuntimeEnv->summary.loadBlockStatis += 1;
2328
    tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis);
2329

H
Haojun Liao 已提交
2330
    if (!needToLoadDataBlock(pRuntimeEnv, *pStatis, pRuntimeEnv->pCtx, pBlockInfo->rows)) {
2331 2332
      // current block has been discard due to filter applied
      pRuntimeEnv->summary.discardBlocks += 1;
H
Haojun Liao 已提交
2333 2334
      qDebug("QInfo:%p data block discard, brange:%"PRId64 "-%"PRId64", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
          pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
2335
      (*status) = BLK_DATA_DISCARD;
2336
    }
2337

2338
    pRuntimeEnv->summary.totalCheckedRows += pBlockInfo->rows;
H
Haojun Liao 已提交
2339
    pRuntimeEnv->summary.loadBlocks += 1;
H
Haojun Liao 已提交
2340
    *pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2341 2342 2343
    if (*pDataBlock == NULL) {
      return terrno;
    }
2344
  }
2345

H
Haojun Liao 已提交
2346
  return TSDB_CODE_SUCCESS;
2347 2348
}

H
hjxilinx 已提交
2349
int32_t binarySearchForKey(char *pValue, int num, TSKEY key, int order) {
2350
  int32_t midPos = -1;
H
Haojun Liao 已提交
2351
  int32_t numOfRows;
2352

2353 2354 2355
  if (num <= 0) {
    return -1;
  }
2356

2357
  assert(order == TSDB_ORDER_ASC || order == TSDB_ORDER_DESC);
2358 2359

  TSKEY * keyList = (TSKEY *)pValue;
2360
  int32_t firstPos = 0;
2361
  int32_t lastPos = num - 1;
2362

2363
  if (order == TSDB_ORDER_DESC) {
H
hjxilinx 已提交
2364 2365 2366 2367 2368
    // find the first position which is smaller than the key
    while (1) {
      if (key >= keyList[lastPos]) return lastPos;
      if (key == keyList[firstPos]) return firstPos;
      if (key < keyList[firstPos]) return firstPos - 1;
2369

H
Haojun Liao 已提交
2370 2371
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2372

H
hjxilinx 已提交
2373 2374 2375 2376 2377 2378 2379 2380
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
2381

H
hjxilinx 已提交
2382 2383 2384 2385 2386
  } else {
    // find the first position which is bigger than the key
    while (1) {
      if (key <= keyList[firstPos]) return firstPos;
      if (key == keyList[lastPos]) return lastPos;
2387

H
hjxilinx 已提交
2388 2389 2390 2391 2392 2393 2394
      if (key > keyList[lastPos]) {
        lastPos = lastPos + 1;
        if (lastPos >= num)
          return -1;
        else
          return lastPos;
      }
2395

H
Haojun Liao 已提交
2396 2397
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2398

H
hjxilinx 已提交
2399 2400 2401 2402 2403 2404 2405 2406 2407
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
  }
2408

H
hjxilinx 已提交
2409 2410 2411
  return midPos;
}

2412 2413 2414 2415 2416 2417 2418 2419
static void ensureOutputBufferSimple(SQueryRuntimeEnv* pRuntimeEnv, int32_t capacity) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  if (capacity < pQuery->rec.capacity) {
    return;
  }

  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2420
    int32_t bytes = pQuery->pExpr1[i].bytes;
2421 2422 2423
    assert(bytes > 0 && capacity > 0);

    char *tmp = realloc(pQuery->sdata[i], bytes * capacity + sizeof(tFilePage));
H
Haojun Liao 已提交
2424
    if (tmp == NULL) {
H
Haojun Liao 已提交
2425
      longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
2426 2427 2428 2429 2430 2431 2432 2433
    } else {
      pQuery->sdata[i] = (tFilePage *)tmp;
    }

    // set the pCtx output buffer position
    pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data;
  }

2434
  qDebug("QInfo:%p realloc output buffer to inc output buffer from: %" PRId64 " rows to:%d rows", GET_QINFO_ADDR(pRuntimeEnv),
2435 2436 2437 2438 2439
         pQuery->rec.capacity, capacity);

  pQuery->rec.capacity = capacity;
}

2440 2441 2442
static void ensureOutputBuffer(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pBlockInfo) {
  // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
  SQuery* pQuery = pRuntimeEnv->pQuery;
2443
  if (!QUERY_IS_INTERVAL_QUERY(pQuery) && !pRuntimeEnv->groupbyNormalCol && !isFixedOutputQuery(pRuntimeEnv) && !isTSCompQuery(pQuery)) {
2444
    SResultRec *pRec = &pQuery->rec;
2445

2446
    if (pQuery->rec.capacity - pQuery->rec.rows < pBlockInfo->rows) {
S
TD-1057  
Shengliang Guan 已提交
2447 2448
      int32_t remain = (int32_t)(pRec->capacity - pRec->rows);
      int32_t newSize = (int32_t)(pRec->capacity + (pBlockInfo->rows - remain));
2449

2450
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2451
        int32_t bytes = pQuery->pExpr1[i].bytes;
H
Haojun Liao 已提交
2452 2453
        assert(bytes > 0 && newSize > 0);

2454
        char *tmp = realloc(pQuery->sdata[i], bytes * newSize + sizeof(tFilePage));
H
Haojun Liao 已提交
2455
        if (tmp == NULL) {
H
Haojun Liao 已提交
2456
          longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
2457
        } else {
2458
          memset(tmp + sizeof(tFilePage) + bytes * pRec->rows, 0, (size_t)((newSize - pRec->rows) * bytes));
2459 2460
          pQuery->sdata[i] = (tFilePage *)tmp;
        }
H
Haojun Liao 已提交
2461

2462 2463
        // set the pCtx output buffer position
        pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data + pRec->rows * bytes;
H
Haojun Liao 已提交
2464

H
Haojun Liao 已提交
2465
        int32_t functionId = pQuery->pExpr1[i].base.functionId;
2466 2467 2468 2469
        if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
          pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
        }
      }
H
Haojun Liao 已提交
2470

2471
      qDebug("QInfo:%p realloc output buffer, new size: %d rows, old:%" PRId64 ", remain:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv),
2472
             newSize, pRec->capacity, newSize - pRec->rows);
2473

2474 2475 2476 2477 2478
      pRec->capacity = newSize;
    }
  }
}

2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499
static void doSetInitialTimewindow(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pBlockInfo) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  if (QUERY_IS_INTERVAL_QUERY(pQuery) && pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL) {
    STimeWindow w = TSWINDOW_INITIALIZER;
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;

    if (QUERY_IS_ASC_QUERY(pQuery)) {
      getAlignQueryTimeWindow(pQuery, pBlockInfo->window.skey, pBlockInfo->window.skey, pQuery->window.ekey, &w);
      pWindowResInfo->startTime = w.skey;
      pWindowResInfo->prevSKey = w.skey;
    } else {
      // the start position of the first time window in the endpoint that spreads beyond the queried last timestamp
      getAlignQueryTimeWindow(pQuery, pBlockInfo->window.ekey, pQuery->window.ekey, pBlockInfo->window.ekey, &w);

      pWindowResInfo->startTime = pQuery->window.skey;
      pWindowResInfo->prevSKey = w.skey;
    }
  }
}

2500 2501
static int64_t doScanAllDataBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
2502
  STableQueryInfo* pTableQueryInfo = pQuery->current;
H
Haojun Liao 已提交
2503
  SQueryCostInfo*  summary  = &pRuntimeEnv->summary;
2504

2505
  qDebug("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", lastkey:%" PRId64 ", order:%d",
H
hjxilinx 已提交
2506 2507
         GET_QINFO_ADDR(pRuntimeEnv), pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, pTableQueryInfo->lastKey,
         pQuery->order.order);
2508

2509
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
H
Haojun Liao 已提交
2510
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
2511

H
Haojun Liao 已提交
2512
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
2513
  while (tsdbNextDataBlock(pQueryHandle)) {
H
Haojun Liao 已提交
2514
    summary->totalBlocks += 1;
H
Haojun Liao 已提交
2515

H
Haojun Liao 已提交
2516
    if (IS_QUERY_KILLED(GET_QINFO_ADDR(pRuntimeEnv))) {
H
Haojun Liao 已提交
2517
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
2518
    }
2519

H
Haojun Liao 已提交
2520
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
2521
    doSetInitialTimewindow(pRuntimeEnv, &blockInfo);
2522

H
hjxilinx 已提交
2523
    // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
2524
    ensureOutputBuffer(pRuntimeEnv, &blockInfo);
2525

2526
    SDataStatis *pStatis = NULL;
2527 2528
    SArray *     pDataBlock = NULL;
    uint32_t     status = 0;
2529

H
Haojun Liao 已提交
2530
    int32_t ret = loadDataBlockOnDemand(pRuntimeEnv, &pRuntimeEnv->windowResInfo, pQueryHandle, &blockInfo, &pStatis, &pDataBlock, &status);
2531
    if (ret != TSDB_CODE_SUCCESS) {
2532 2533 2534
      break;
    }

2535 2536 2537 2538 2539 2540
    if (status == BLK_DATA_DISCARD) {
      pQuery->current->lastKey =
              QUERY_IS_ASC_QUERY(pQuery) ? blockInfo.window.ekey + step : blockInfo.window.skey + step;
      continue;
    }

H
Haojun Liao 已提交
2541 2542
    // query start position can not move into tableApplyFunctionsOnBlock due to limit/offset condition
    pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : blockInfo.rows - 1;
H
hjxilinx 已提交
2543
    int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, binarySearchForKey, pDataBlock);
2544

H
Haojun Liao 已提交
2545
    summary->totalRows += blockInfo.rows;
2546
    qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
2547
           blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes, pQuery->current->lastKey);
2548

2549 2550
    // while the output buffer is full or limit/offset is applied, query may be paused here
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL | QUERY_COMPLETED)) {
H
hjxilinx 已提交
2551
      break;
2552 2553
    }
  }
2554

H
Haojun Liao 已提交
2555 2556 2557 2558
  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }

H
hjxilinx 已提交
2559
  // if the result buffer is not full, set the query complete
2560 2561 2562
  if (!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
2563

H
Haojun Liao 已提交
2564
  if (QUERY_IS_INTERVAL_QUERY(pQuery) && (IS_MASTER_SCAN(pRuntimeEnv)|| pRuntimeEnv->scanFlag == REPEAT_SCAN)) {
H
hjxilinx 已提交
2565
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
2566
      closeAllTimeWindow(&pRuntimeEnv->windowResInfo);
H
hjxilinx 已提交
2567
      pRuntimeEnv->windowResInfo.curIndex = pRuntimeEnv->windowResInfo.size - 1;  // point to the last time window
2568 2569 2570 2571
    } else {
      assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
    }
  }
2572

2573
  return 0;
2574 2575 2576 2577 2578 2579
}

/*
 * set tag value in SQLFunctionCtx
 * e.g.,tag information into input buffer
 */
2580
static void doSetTagValueInParam(void *tsdb, void* pTable, int32_t tagColId, tVariant *tag, int16_t type, int16_t bytes) {
H
[td-90]  
Haojun Liao 已提交
2581
  tVariantDestroy(tag);
2582

2583
  if (tagColId == TSDB_TBNAME_COLUMN_INDEX) {
2584
    char* val = tsdbGetTableName(pTable);
H
[td-90]  
Haojun Liao 已提交
2585
    assert(val != NULL);
2586

H
[td-90]  
Haojun Liao 已提交
2587
    tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), TSDB_DATA_TYPE_BINARY);
2588
  } else {
2589
    char* val = tsdbGetTableTagVal(pTable, tagColId, type, bytes);
H
[td-90]  
Haojun Liao 已提交
2590 2591 2592 2593
    if (val == NULL) {
      tag->nType = TSDB_DATA_TYPE_NULL;
      return;
    }
2594

H
hjxilinx 已提交
2595
    if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
H
Hongze Cheng 已提交
2596
      if (isNull(val, type)) {
H
Haojun Liao 已提交
2597 2598 2599 2600
        tag->nType = TSDB_DATA_TYPE_NULL;
        return;
      }

H
[td-90]  
Haojun Liao 已提交
2601
      tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), type);
H
hjxilinx 已提交
2602
    } else {
H
Haojun Liao 已提交
2603 2604 2605 2606 2607
      if (isNull(val, type)) {
        tag->nType = TSDB_DATA_TYPE_NULL;
        return;
      }

H
[td-90]  
Haojun Liao 已提交
2608
      tVariantCreateFromBinary(tag, val, bytes, type);
H
hjxilinx 已提交
2609
    }
2610
  }
2611 2612
}

2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624
static SColumnInfo* doGetTagColumnInfoById(SColumnInfo* pTagColList, int32_t numOfTags, int16_t colId) {
  assert(pTagColList != NULL && numOfTags > 0);

  for(int32_t i = 0; i < numOfTags; ++i) {
    if (pTagColList[i].colId == colId) {
      return &pTagColList[i];
    }
  }

  return NULL;
}

2625
void setTagVal(SQueryRuntimeEnv *pRuntimeEnv, void *pTable, void *tsdb) {
2626
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
2627
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
2628

H
Haojun Liao 已提交
2629
  SExprInfo *pExprInfo = &pQuery->pExpr1[0];
H
[td-90]  
Haojun Liao 已提交
2630 2631
  if (pQuery->numOfOutput == 1 && pExprInfo->base.functionId == TSDB_FUNC_TS_COMP) {
    assert(pExprInfo->base.numOfParams == 1);
H
Haojun Liao 已提交
2632

S
TD-1057  
Shengliang Guan 已提交
2633
    int16_t tagColId = (int16_t)pExprInfo->base.arg->argValue.i64;
2634
    SColumnInfo* pColInfo = doGetTagColumnInfoById(pQuery->tagColList, pQuery->numOfTags, tagColId);
H
Haojun Liao 已提交
2635

2636
    doSetTagValueInParam(tsdb, pTable, tagColId, &pRuntimeEnv->pCtx[0].tag, pColInfo->type, pColInfo->bytes);
2637 2638
  } else {
    // set tag value, by which the results are aggregated.
2639
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
H
Haojun Liao 已提交
2640
      SExprInfo* pLocalExprInfo = &pQuery->pExpr1[idx];
2641

2642
      // ts_comp column required the tag value for join filter
H
Haojun Liao 已提交
2643
      if (!TSDB_COL_IS_TAG(pLocalExprInfo->base.colInfo.flag)) {
2644 2645
        continue;
      }
2646

2647
      // todo use tag column index to optimize performance
2648
      doSetTagValueInParam(tsdb, pTable, pLocalExprInfo->base.colInfo.colId, &pRuntimeEnv->pCtx[idx].tag,
H
Haojun Liao 已提交
2649
                           pLocalExprInfo->type, pLocalExprInfo->bytes);
2650
    }
2651

2652
    // set the join tag for first column
H
[td-90]  
Haojun Liao 已提交
2653
    SSqlFuncMsg *pFuncMsg = &pExprInfo->base;
2654 2655
    if ((pFuncMsg->functionId == TSDB_FUNC_TS || pFuncMsg->functionId == TSDB_FUNC_PRJ) && pRuntimeEnv->pTSBuf != NULL &&
        pFuncMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
2656
      assert(pFuncMsg->numOfParams == 1);
H
Haojun Liao 已提交
2657

2658 2659
      int16_t      tagColId = (int16_t)pExprInfo->base.arg->argValue.i64;
      SColumnInfo *pColInfo = doGetTagColumnInfoById(pQuery->tagColList, pQuery->numOfTags, tagColId);
H
Haojun Liao 已提交
2660

2661
      doSetTagValueInParam(tsdb, pTable, tagColId, &pRuntimeEnv->pCtx[0].tag, pColInfo->type, pColInfo->bytes);
H
Haojun Liao 已提交
2662

2663 2664 2665 2666 2667 2668 2669 2670
      int16_t tagType = pRuntimeEnv->pCtx[0].tag.nType;
      if (tagType == TSDB_DATA_TYPE_BINARY || tagType == TSDB_DATA_TYPE_NCHAR) {
        qDebug("QInfo:%p set tag value for join comparison, colId:%" PRId64 ", val:%s", pQInfo,
               pExprInfo->base.arg->argValue.i64, pRuntimeEnv->pCtx[0].tag.pz);
      } else {
        qDebug("QInfo:%p set tag value for join comparison, colId:%" PRId64 ", val:%" PRId64, pQInfo,
               pExprInfo->base.arg->argValue.i64, pRuntimeEnv->pCtx[0].tag.i64Key);
      }
2671 2672 2673 2674
    }
  }
}

H
Haojun Liao 已提交
2675
static void doMerge(SQueryRuntimeEnv *pRuntimeEnv, int64_t timestamp, SResultRow *pWindowRes, bool mergeFlag) {
2676 2677
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
2678

2679
  tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes->pageId);
H
Haojun Liao 已提交
2680

2681
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2682
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
2683 2684 2685
    if (!mergeFlag) {
      pCtx[i].aOutputBuf = pCtx[i].aOutputBuf + pCtx[i].outputBytes;
      pCtx[i].currentStage = FIRST_STAGE_MERGE;
2686

2687
      RESET_RESULT_INFO(pCtx[i].resultInfo);
2688 2689
      aAggs[functionId].init(&pCtx[i]);
    }
2690

2691 2692
    pCtx[i].hasNull = true;
    pCtx[i].nStartQueryTimestamp = timestamp;
H
Haojun Liao 已提交
2693
    pCtx[i].aInputElemBuf = getPosInResultPage(pRuntimeEnv, i, pWindowRes, page);
2694

2695 2696 2697
    // in case of tag column, the tag information should be extracted from input buffer
    if (functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TAG) {
      tVariantDestroy(&pCtx[i].tag);
2698

2699 2700 2701 2702 2703 2704
      int32_t type = pCtx[i].outputType;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        tVariantCreateFromBinary(&pCtx[i].tag, varDataVal(pCtx[i].aInputElemBuf), varDataLen(pCtx[i].aInputElemBuf), type);
      } else {
        tVariantCreateFromBinary(&pCtx[i].tag, pCtx[i].aInputElemBuf, pCtx[i].inputBytes, pCtx[i].inputType);
      }
2705

2706 2707
    }
  }
2708

2709
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2710
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
2711 2712 2713
    if (functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
2714

2715 2716 2717 2718
    aAggs[functionId].distMergeFunc(&pCtx[i]);
  }
}

2719
static UNUSED_FUNC void printBinaryData(int32_t functionId, char *data, int32_t srcDataType) {
2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_LAST_DST) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_BINARY:
        printf("%" PRId64 ",%s\t", *(TSKEY *)data, (data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int8_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int16_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 ",%" PRId64 "\t", *(TSKEY *)data, *(TSKEY *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int32_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%" PRId64 ",%f\t", *(TSKEY *)data, *(float *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%" PRId64 ",%lf\t", *(TSKEY *)data, *(double *)(data + TSDB_KEYSIZE + 1));
        break;
    }
  } else if (functionId == TSDB_FUNC_AVG) {
    printf("%lf,%d\t", *(double *)data, *(int32_t *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_SPREAD) {
    printf("%lf,%lf\t", *(double *)data, *(double *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_TWA) {
    data += 1;
    printf("%lf,%" PRId64 ",%" PRId64 ",%" PRId64 "\t", *(double *)data, *(int64_t *)(data + 8),
           *(int64_t *)(data + 16), *(int64_t *)(data + 24));
  } else if (functionId == TSDB_FUNC_MIN || functionId == TSDB_FUNC_MAX) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%d\t", *(int8_t *)data);
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%d\t", *(int16_t *)data);
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 "\t", *(int64_t *)data);
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%d\t", *(int *)data);
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%f\t", *(float *)data);
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%f\t", *(float *)data);
        break;
    }
  } else if (functionId == TSDB_FUNC_SUM) {
    if (srcDataType == TSDB_DATA_TYPE_FLOAT || srcDataType == TSDB_DATA_TYPE_DOUBLE) {
      printf("%lf\t", *(float *)data);
    } else {
      printf("%" PRId64 "\t", *(int64_t *)data);
    }
  } else {
    printf("%s\t", data);
  }
}

2788
void UNUSED_FUNC displayInterResult(tFilePage **pdata, SQueryRuntimeEnv* pRuntimeEnv, int32_t numOfRows) {
2789
  SQuery* pQuery = pRuntimeEnv->pQuery;
2790
  int32_t numOfCols = pQuery->numOfOutput;
2791
  printf("super table query intermediate result, total:%d\n", numOfRows);
2792

2793 2794
  for (int32_t j = 0; j < numOfRows; ++j) {
    for (int32_t i = 0; i < numOfCols; ++i) {
2795

H
Haojun Liao 已提交
2796
      switch (pQuery->pExpr1[i].type) {
2797
        case TSDB_DATA_TYPE_BINARY: {
H
Haojun Liao 已提交
2798 2799
          int32_t type = pQuery->pExpr1[i].type;
          printBinaryData(pQuery->pExpr1[i].base.functionId, pdata[i]->data + pQuery->pExpr1[i].bytes * j,
2800 2801 2802 2803 2804
                          type);
          break;
        }
        case TSDB_DATA_TYPE_TIMESTAMP:
        case TSDB_DATA_TYPE_BIGINT:
H
Haojun Liao 已提交
2805
          printf("%" PRId64 "\t", *(int64_t *)(pdata[i]->data + pQuery->pExpr1[i].bytes * j));
2806 2807
          break;
        case TSDB_DATA_TYPE_INT:
H
Haojun Liao 已提交
2808
          printf("%d\t", *(int32_t *)(pdata[i]->data + pQuery->pExpr1[i].bytes * j));
2809 2810
          break;
        case TSDB_DATA_TYPE_FLOAT:
H
Haojun Liao 已提交
2811
          printf("%f\t", *(float *)(pdata[i]->data + pQuery->pExpr1[i].bytes * j));
2812 2813
          break;
        case TSDB_DATA_TYPE_DOUBLE:
H
Haojun Liao 已提交
2814
          printf("%lf\t", *(double *)(pdata[i]->data + pQuery->pExpr1[i].bytes * j));
2815 2816 2817 2818 2819 2820 2821 2822
          break;
      }
    }
    printf("\n");
  }
}

typedef struct SCompSupporter {
H
hjxilinx 已提交
2823 2824 2825
  STableQueryInfo **pTableQueryInfo;
  int32_t *         position;
  SQInfo *          pQInfo;
2826 2827 2828 2829 2830
} SCompSupporter;

int32_t tableResultComparFn(const void *pLeft, const void *pRight, void *param) {
  int32_t left = *(int32_t *)pLeft;
  int32_t right = *(int32_t *)pRight;
2831

2832 2833
  SCompSupporter *  supporter = (SCompSupporter *)param;
  SQueryRuntimeEnv *pRuntimeEnv = &supporter->pQInfo->runtimeEnv;
2834

2835 2836
  int32_t leftPos = supporter->position[left];
  int32_t rightPos = supporter->position[right];
2837

2838 2839 2840 2841
  /* left source is exhausted */
  if (leftPos == -1) {
    return 1;
  }
2842

2843 2844 2845 2846
  /* right source is exhausted*/
  if (rightPos == -1) {
    return -1;
  }
2847

H
hjxilinx 已提交
2848
  SWindowResInfo *pWindowResInfo1 = &supporter->pTableQueryInfo[left]->windowResInfo;
2849
  SResultRow * pWindowRes1 = getResultRow(pWindowResInfo1, leftPos);
2850
  tFilePage *page1 = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes1->pageId);
2851

H
Haojun Liao 已提交
2852
  char *b1 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes1, page1);
2853
  TSKEY leftTimestamp = GET_INT64_VAL(b1);
2854

H
hjxilinx 已提交
2855
  SWindowResInfo *pWindowResInfo2 = &supporter->pTableQueryInfo[right]->windowResInfo;
2856
  SResultRow * pWindowRes2 = getResultRow(pWindowResInfo2, rightPos);
2857
  tFilePage *page2 = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes2->pageId);
2858

H
Haojun Liao 已提交
2859
  char *b2 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes2, page2);
2860
  TSKEY rightTimestamp = GET_INT64_VAL(b2);
2861

2862 2863 2864
  if (leftTimestamp == rightTimestamp) {
    return 0;
  }
2865

2866 2867 2868
  return leftTimestamp > rightTimestamp ? 1 : -1;
}

2869
int32_t mergeIntoGroupResult(SQInfo *pQInfo) {
H
Haojun Liao 已提交
2870
  int64_t st = taosGetTimestampUs();
2871
  int32_t ret = TSDB_CODE_SUCCESS;
2872

S
TD-1057  
Shengliang Guan 已提交
2873
  int32_t numOfGroups = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
2874

2875
  while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
2876
    SArray *group = GET_TABLEGROUP(pQInfo, pQInfo->groupIndex);
2877
    ret = mergeIntoGroupResultImpl(pQInfo, group);
2878 2879 2880 2881
    if (ret < 0) {  // not enough disk space to save the data into disk
      return -1;
    }

2882
    pQInfo->groupIndex += 1;
2883 2884

    // this group generates at least one result, return results
2885 2886 2887
    if (ret > 0) {
      break;
    }
2888

H
Haojun Liao 已提交
2889
    assert(pQInfo->groupResInfo.numOfDataPages == 0);
2890
    qDebug("QInfo:%p no result in group %d, continue", pQInfo, pQInfo->groupIndex - 1);
2891
  }
2892

H
Haojun Liao 已提交
2893
  SGroupResInfo* info = &pQInfo->groupResInfo;
2894
  if (pQInfo->groupIndex == numOfGroups && info->pageId == info->numOfDataPages) {
H
Haojun Liao 已提交
2895 2896 2897
    SET_STABLE_QUERY_OVER(pQInfo);
  }

H
Haojun Liao 已提交
2898 2899 2900
  int64_t elapsedTime = taosGetTimestampUs() - st;
  qDebug("QInfo:%p merge res data into group, index:%d, total group:%d, elapsed time:%" PRId64 "us", pQInfo,
         pQInfo->groupIndex - 1, numOfGroups, elapsedTime);
2901

H
Haojun Liao 已提交
2902
  pQInfo->runtimeEnv.summary.firstStageMergeTime += elapsedTime;
2903 2904 2905 2906
  return TSDB_CODE_SUCCESS;
}

void copyResToQueryResultBuf(SQInfo *pQInfo, SQuery *pQuery) {
H
Haojun Liao 已提交
2907 2908 2909
  SGroupResInfo* pGroupResInfo = &pQInfo->groupResInfo;

  // all results have been return to client, try next group
2910
  if (pGroupResInfo->pageId == pGroupResInfo->numOfDataPages) {
H
Haojun Liao 已提交
2911
    pGroupResInfo->numOfDataPages = 0;
2912 2913
    pGroupResInfo->pageId = 0;
    pGroupResInfo->rowId = 0;
2914

2915
    // current results of group has been sent to client, try next group
2916
    if (mergeIntoGroupResult(pQInfo) != TSDB_CODE_SUCCESS) {
2917 2918
      return;  // failed to save data in the disk
    }
2919

2920
    // check if all results has been sent to client
S
TD-1057  
Shengliang Guan 已提交
2921
    int32_t numOfGroup = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
H
Haojun Liao 已提交
2922
    if (pGroupResInfo->numOfDataPages == 0 && pQInfo->groupIndex == numOfGroup) {
H
Haojun Liao 已提交
2923
      SET_STABLE_QUERY_OVER(pQInfo);
2924 2925
      return;
    }
2926
  }
2927 2928

  SQueryRuntimeEnv *   pRuntimeEnv = &pQInfo->runtimeEnv;
2929
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
2930

H
Haojun Liao 已提交
2931 2932
  int32_t id = pQInfo->groupResInfo.groupId;
  SIDList list = getDataBufPagesIdList(pResultBuf, id);
2933

2934
  int32_t offset = 0;
H
Haojun Liao 已提交
2935 2936 2937 2938 2939 2940
  int32_t numOfCopiedRows = 0;

  size_t size = taosArrayGetSize(list);
  assert(size == pGroupResInfo->numOfDataPages);

  bool done = false;
H
Haojun Liao 已提交
2941 2942 2943 2944

  //TODO add API for release none-dirty pages
//  SPageInfo* prev = NULL;

2945
  for (int32_t j = pGroupResInfo->pageId; j < size; ++j) {
H
Haojun Liao 已提交
2946
    SPageInfo* pi = *(SPageInfo**) taosArrayGet(list, j);
H
Haojun Liao 已提交
2947 2948
    tFilePage* pData = getResBufPage(pResultBuf, pi->pageId);

H
Haojun Liao 已提交
2949 2950 2951 2952 2953 2954 2955 2956 2957 2958
    // release previous buffer pages
//    if (prev == NULL) {
//      prev = pi;
//    } else {
//      if (prev->pageId != pi->pageId) {
//        releaseResBufPageInfo(pResultBuf, prev);
//        prev = pi;
//      }
//    }

2959 2960
    assert(pData->num > 0 && pData->num <= pRuntimeEnv->numOfRowsPerPage && pGroupResInfo->rowId < pData->num);
    int32_t numOfRes = (int32_t)(pData->num - pGroupResInfo->rowId);
H
Haojun Liao 已提交
2961 2962

    if (numOfRes > pQuery->rec.capacity - offset) {
2963
      numOfCopiedRows = (int32_t)(pQuery->rec.capacity - offset);
2964
      pGroupResInfo->rowId += numOfCopiedRows;
H
Haojun Liao 已提交
2965 2966
      done = true;
    } else {
2967
      numOfCopiedRows = (int32_t)pData->num;
H
Haojun Liao 已提交
2968

2969 2970
      pGroupResInfo->pageId += 1;
      pGroupResInfo->rowId = 0;
H
Haojun Liao 已提交
2971
    }
2972

2973
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2974
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
2975
      char *  pDest = pQuery->sdata[i]->data;
2976

H
Haojun Liao 已提交
2977 2978
      memcpy(pDest + offset * bytes, pData->data + pRuntimeEnv->offset[i] * pRuntimeEnv->numOfRowsPerPage,
          (size_t)bytes * numOfCopiedRows);
2979
    }
2980

H
Haojun Liao 已提交
2981 2982 2983 2984
    offset += numOfCopiedRows;
    if (done) {
      break;
    }
2985
  }
2986

2987
  assert(pQuery->rec.rows == 0);
H
Haojun Liao 已提交
2988
  pQuery->rec.rows += offset;
2989 2990
}

2991 2992 2993
int64_t getNumOfResultWindowRes(SQueryRuntimeEnv* pRuntimeEnv, SResultRow *pResultRow) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

2994
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
2995
    int32_t functionId = pQuery->pExpr1[j].base.functionId;
2996

2997 2998 2999 3000 3001 3002 3003
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
      continue;
    }
3004

3005
    SResultRowCellInfo *pResultInfo = getResultCell(pRuntimeEnv, pResultRow, j);
H
Haojun Liao 已提交
3006
    assert(pResultInfo != NULL);
3007

H
Haojun Liao 已提交
3008 3009
    if (pResultInfo->numOfRes > 0) {
      return pResultInfo->numOfRes;
3010 3011
    }
  }
3012

H
Haojun Liao 已提交
3013
  return 0;
3014 3015
}

3016
int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *pGroup) {
3017
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
3018
  SQuery *          pQuery = pRuntimeEnv->pQuery;
3019

3020
  size_t size = taosArrayGetSize(pGroup);
3021
  tFilePage **buffer = pQuery->sdata;
3022

H
Haojun Liao 已提交
3023
  int32_t *posList = calloc(size, sizeof(int32_t));
H
hjxilinx 已提交
3024
  STableQueryInfo **pTableList = malloc(POINTER_BYTES * size);
3025

3026
  if (pTableList == NULL || posList == NULL) {
S
TD-1848  
Shengliang Guan 已提交
3027 3028
    tfree(posList);
    tfree(pTableList);
3029 3030

    qError("QInfo:%p failed alloc memory", pQInfo);
H
Haojun Liao 已提交
3031
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
3032 3033
  }

3034
  // todo opt for the case of one table per group
3035
  int32_t numOfTables = 0;
H
Haojun Liao 已提交
3036 3037 3038
  SIDList pageList = NULL;
  int32_t tid = -1;

3039
  for (int32_t i = 0; i < size; ++i) {
3040
    STableQueryInfo *item = taosArrayGetP(pGroup, i);
3041

H
Haojun Liao 已提交
3042
    SIDList list = getDataBufPagesIdList(pRuntimeEnv->pResultBuf, TSDB_TABLEID(item->pTable)->tid);
H
Haojun Liao 已提交
3043
    if (taosArrayGetSize(list) > 0 && item->windowResInfo.size > 0) {
H
Haojun Liao 已提交
3044
      pTableList[numOfTables++] = item;
3045 3046
      tid = TSDB_TABLEID(item->pTable)->tid;
      pageList = list;
3047 3048
    }
  }
3049

H
Haojun Liao 已提交
3050
  // there is no data in current group
3051
  if (numOfTables == 0) {
S
TD-1848  
Shengliang Guan 已提交
3052 3053
    tfree(posList);
    tfree(pTableList);
3054
    return 0;
H
Haojun Liao 已提交
3055
  } else if (numOfTables == 1) { // no need to merge results since only one table in each group
S
TD-1848  
Shengliang Guan 已提交
3056 3057
    tfree(posList);
    tfree(pTableList);
H
Haojun Liao 已提交
3058 3059 3060

    SGroupResInfo* pGroupResInfo = &pQInfo->groupResInfo;

3061
    pGroupResInfo->numOfDataPages = (int32_t)taosArrayGetSize(pageList);
H
Haojun Liao 已提交
3062
    pGroupResInfo->groupId = tid;
3063 3064
    pGroupResInfo->pageId = 0;
    pGroupResInfo->rowId = 0;
H
Haojun Liao 已提交
3065 3066

    return pGroupResInfo->numOfDataPages;
3067
  }
3068

3069
  SCompSupporter cs = {pTableList, posList, pQInfo};
3070

3071
  SLoserTreeInfo *pTree = NULL;
3072
  tLoserTreeCreate(&pTree, numOfTables, &cs, tableResultComparFn);
3073

3074
  SResultRow* pRow = getNewResultRow(pRuntimeEnv->pool);
H
Haojun Liao 已提交
3075
  resetMergeResultBuf(pRuntimeEnv, pRuntimeEnv->pCtx, pRow);
3076

H
Haojun Liao 已提交
3077
  pQInfo->groupResInfo.groupId = getMergeResultGroupId(pQInfo->groupIndex);
H
Haojun Liao 已提交
3078

H
Haojun Liao 已提交
3079
  // todo add windowRes iterator
3080 3081
  int64_t lastTimestamp = -1;
  int64_t startt = taosGetTimestampMs();
3082

3083
  while (1) {
3084 3085
    if (IS_QUERY_KILLED(pQInfo)) {
      qDebug("QInfo:%p it is already killed, abort", pQInfo);
H
Haojun Liao 已提交
3086

S
TD-1848  
Shengliang Guan 已提交
3087 3088 3089
      tfree(pTableList);
      tfree(posList);
      tfree(pTree);
3090 3091 3092
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
    }

3093
    int32_t pos = pTree->pNode[0].index;
3094

H
hjxilinx 已提交
3095
    SWindowResInfo *pWindowResInfo = &pTableList[pos]->windowResInfo;
3096
    SResultRow  *pWindowRes = getResultRow(pWindowResInfo, cs.position[pos]);
3097
    tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes->pageId);
3098

H
Haojun Liao 已提交
3099
    char *b = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes, page);
3100
    TSKEY ts = GET_INT64_VAL(b);
3101

3102
    assert(ts == pWindowRes->win.skey);
3103
    int64_t num = getNumOfResultWindowRes(pRuntimeEnv, pWindowRes);
3104 3105
    if (num <= 0) {
      cs.position[pos] += 1;
3106

3107 3108
      if (cs.position[pos] >= pWindowResInfo->size) {
        cs.position[pos] = -1;
3109

3110
        // all input sources are exhausted
3111
        if (--numOfTables == 0) {
3112 3113 3114 3115 3116 3117 3118
          break;
        }
      }
    } else {
      if (ts == lastTimestamp) {  // merge with the last one
        doMerge(pRuntimeEnv, ts, pWindowRes, true);
      } else {  // copy data to disk buffer
3119
        if (buffer[0]->num == pQuery->rec.capacity) {
H
Haojun Liao 已提交
3120
          if (flushFromResultBuf(pRuntimeEnv, &pQInfo->groupResInfo) != TSDB_CODE_SUCCESS) {
3121 3122
            return -1;
          }
3123

H
Haojun Liao 已提交
3124
          resetMergeResultBuf(pRuntimeEnv, pRuntimeEnv->pCtx, pRow);
3125
        }
3126

3127
        doMerge(pRuntimeEnv, ts, pWindowRes, false);
3128
        buffer[0]->num += 1;
3129
      }
3130

3131
      lastTimestamp = ts;
3132

H
Haojun Liao 已提交
3133
      // move to the next element of current entry
3134
      int32_t currentPageId = pWindowRes->pageId;
H
Haojun Liao 已提交
3135

3136 3137 3138
      cs.position[pos] += 1;
      if (cs.position[pos] >= pWindowResInfo->size) {
        cs.position[pos] = -1;
3139

3140
        // all input sources are exhausted
3141
        if (--numOfTables == 0) {
3142 3143
          break;
        }
H
Haojun Liao 已提交
3144 3145
      } else {
        // current page is not needed anymore
3146
        SResultRow  *pNextWindowRes = getResultRow(pWindowResInfo, cs.position[pos]);
3147
        if (pNextWindowRes->pageId != currentPageId) {
H
Haojun Liao 已提交
3148 3149
          releaseResBufPage(pRuntimeEnv->pResultBuf, page);
        }
3150 3151
      }
    }
3152

3153 3154
    tLoserTreeAdjust(pTree, pos + pTree->numOfEntries);
  }
3155

3156
  if (buffer[0]->num != 0) {  // there are data in buffer
H
Haojun Liao 已提交
3157
    if (flushFromResultBuf(pRuntimeEnv, &pQInfo->groupResInfo) != TSDB_CODE_SUCCESS) {
S
slguan 已提交
3158
      qError("QInfo:%p failed to flush data into temp file, abort query", pQInfo);
3159

S
TD-1848  
Shengliang Guan 已提交
3160 3161 3162
      tfree(pTree);
      tfree(pTableList);
      tfree(posList);
3163 3164 3165
      return -1;
    }
  }
3166

3167 3168 3169
  int64_t endt = taosGetTimestampMs();

#ifdef _DEBUG_VIEW
3170
  displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
3171
#endif
3172

3173
  qDebug("QInfo:%p result merge completed for group:%d, elapsed time:%" PRId64 " ms", pQInfo, pQInfo->groupIndex, endt - startt);
3174

S
TD-1848  
Shengliang Guan 已提交
3175 3176 3177
  tfree(pTableList);
  tfree(posList);
  tfree(pTree);
3178

S
TD-1848  
Shengliang Guan 已提交
3179 3180
//  tfree(pResultInfo);
//  tfree(buf);
H
Haojun Liao 已提交
3181 3182

  return pQInfo->groupResInfo.numOfDataPages;
3183 3184
}

H
Haojun Liao 已提交
3185 3186
int32_t flushFromResultBuf(SQueryRuntimeEnv* pRuntimeEnv, SGroupResInfo* pGroupResInfo) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3187

3188
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
3189

3190 3191
  // the base value for group result, since the maximum number of table for each vnode will not exceed 100,000.
  int32_t pageId = -1;
3192
  int32_t capacity = pResultBuf->numOfRowsPerPage;
3193

H
Haojun Liao 已提交
3194
  int32_t remain = (int32_t) pQuery->sdata[0]->num;
3195
  int32_t offset = 0;
3196

3197
  while (remain > 0) {
H
Haojun Liao 已提交
3198 3199
    int32_t rows = (remain > capacity)? capacity:remain;
    assert(rows > 0);
3200

H
Haojun Liao 已提交
3201 3202 3203
    // get the output buffer page
    tFilePage *buf = getNewDataBuf(pResultBuf, pGroupResInfo->groupId, &pageId);
    buf->num = rows;
3204

H
Haojun Liao 已提交
3205
    // pagewisely copy to dest buffer
3206
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3207
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
3208

H
Haojun Liao 已提交
3209 3210
      char* output = buf->data + pRuntimeEnv->offset[i] * pRuntimeEnv->numOfRowsPerPage;
      char* src = ((char *) pQuery->sdata[i]->data) + offset * bytes;
3211
      memcpy(output, src, (size_t)(buf->num * bytes));
3212
    }
3213

H
Haojun Liao 已提交
3214 3215 3216 3217
    offset += rows;
    remain -= rows;

    pGroupResInfo->numOfDataPages += 1;
3218
  }
3219

3220 3221 3222
  return TSDB_CODE_SUCCESS;
}

H
Haojun Liao 已提交
3223 3224 3225
void resetMergeResultBuf(SQueryRuntimeEnv* pRuntimeEnv, SQLFunctionCtx *pCtx, SResultRow *pRow) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

3226
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
3227
    pCtx[k].aOutputBuf = pQuery->sdata[k]->data - pCtx[k].outputBytes;
3228 3229
    pCtx[k].size = 1;
    pCtx[k].startOffset = 0;
H
Haojun Liao 已提交
3230
    pCtx[k].resultInfo = getResultCell(pRuntimeEnv, pRow, k);
3231

3232
    pQuery->sdata[k]->num = 0;
3233 3234 3235
  }
}

3236 3237 3238 3239
static void updateTableQueryInfoForReverseScan(SQuery *pQuery, STableQueryInfo *pTableQueryInfo) {
  if (pTableQueryInfo == NULL) {
    return;
  }
3240

H
Haojun Liao 已提交
3241
  // order has changed already
3242
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
3243

H
Haojun Liao 已提交
3244 3245 3246 3247 3248 3249
  // TODO validate the assertion
//  if (!QUERY_IS_ASC_QUERY(pQuery)) {
//    assert(pTableQueryInfo->win.ekey >= pTableQueryInfo->lastKey + step);
//  } else {
//    assert(pTableQueryInfo->win.ekey <= pTableQueryInfo->lastKey + step);
//  }
H
Haojun Liao 已提交
3250 3251 3252 3253 3254 3255 3256

  if (pTableQueryInfo->lastKey == pTableQueryInfo->win.skey) {
    // do nothing, no results
  } else {
    pTableQueryInfo->win.ekey = pTableQueryInfo->lastKey + step;
  }

3257 3258
  SWAP(pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, TSKEY);
  pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
3259

3260 3261
  SWITCH_ORDER(pTableQueryInfo->cur.order);
  pTableQueryInfo->cur.vgroupIndex = -1;
H
Haojun Liao 已提交
3262 3263 3264

  // set the index at the end of time window
  pTableQueryInfo->windowResInfo.curIndex = pTableQueryInfo->windowResInfo.size - 1;
3265 3266
}

3267 3268
static void disableFuncInReverseScanImpl(SQueryRuntimeEnv* pRuntimeEnv, SWindowResInfo *pWindowResInfo, int32_t order) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
3269

3270
  for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
3271 3272
    bool closed = getTimeWindowResStatus(pWindowResInfo, i);
    if (!closed) {
3273 3274
      continue;
    }
3275

3276
    SResultRow *pRow = getResultRow(pWindowResInfo, i);
3277

3278
    // open/close the specified query for each group result
3279
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
3280
      int32_t functId = pQuery->pExpr1[j].base.functionId;
3281
      SResultRowCellInfo* pInfo = getResultCell(pRuntimeEnv, pRow, j);
3282

3283 3284
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
3285
        pInfo->complete = false;
3286
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
3287
        pInfo->complete = true;
3288 3289 3290 3291 3292
      }
    }
  }
}

3293 3294
void disableFuncInReverseScan(SQInfo *pQInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
3295
  SQuery *pQuery = pRuntimeEnv->pQuery;
3296
  int32_t order = pQuery->order.order;
3297

3298 3299
  // group by normal columns and interval query on normal table
  SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
3300
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3301
    disableFuncInReverseScanImpl(pRuntimeEnv, pWindowResInfo, order);
3302
  } else {  // for simple result of table query,
3303
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {  // todo refactor
H
Haojun Liao 已提交
3304
      int32_t functId = pQuery->pExpr1[j].base.functionId;
3305

3306
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[j];
3307 3308 3309
      if (pCtx->resultInfo == NULL) {
        continue; // resultInfo is NULL, means no data checked in previous scan
      }
3310

3311 3312
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
3313 3314 3315 3316 3317 3318
        pCtx->resultInfo->complete = false;
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
        pCtx->resultInfo->complete = true;
      }
    }
  }
H
Haojun Liao 已提交
3319 3320 3321 3322
}

static void setupQueryRangeForReverseScan(SQInfo* pQInfo) {
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
S
Shengliang Guan 已提交
3323
  int32_t numOfGroups = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
H
Haojun Liao 已提交
3324

H
hjxilinx 已提交
3325
  for(int32_t i = 0; i < numOfGroups; ++i) {
H
Haojun Liao 已提交
3326
    SArray *group = GET_TABLEGROUP(pQInfo, i);
H
Haojun Liao 已提交
3327 3328
    SArray *tableKeyGroup = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, i);

H
hjxilinx 已提交
3329 3330
    size_t t = taosArrayGetSize(group);
    for (int32_t j = 0; j < t; ++j) {
3331 3332
      STableQueryInfo *pCheckInfo = taosArrayGetP(group, j);
      updateTableQueryInfoForReverseScan(pQuery, pCheckInfo);
H
Haojun Liao 已提交
3333

H
Haojun Liao 已提交
3334 3335
      // update the last key in tableKeyInfo list, the tableKeyInfo is used to build the tsdbQueryHandle and decide
      // the start check timestamp of tsdbQueryHandle
H
Haojun Liao 已提交
3336 3337 3338 3339
      STableKeyInfo *pTableKeyInfo = taosArrayGet(tableKeyGroup, j);
      pTableKeyInfo->lastKey = pCheckInfo->lastKey;

      assert(pCheckInfo->pTable == pTableKeyInfo->pTable);
H
hjxilinx 已提交
3340 3341
    }
  }
3342 3343
}

3344
void switchCtxOrder(SQueryRuntimeEnv *pRuntimeEnv) {
3345
  SQuery *pQuery = pRuntimeEnv->pQuery;
3346
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
3347
    SWITCH_ORDER(pRuntimeEnv->pCtx[i].order);
3348 3349 3350
  }
}

3351
int32_t initResultRow(SResultRow *pResultRow) {
H
Haojun Liao 已提交
3352
  pResultRow->pCellInfo = (SResultRowCellInfo*)((char*)pResultRow + sizeof(SResultRow));
3353 3354
  pResultRow->pageId = -1;
  pResultRow->rowId = -1;
B
Bomin Zhang 已提交
3355
  return TSDB_CODE_SUCCESS;
3356 3357
}

H
Haojun Liao 已提交
3358
void resetDefaultResInfoOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
3359
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
3360

H
Haojun Liao 已提交
3361 3362 3363
  int32_t tid = 0;
  int64_t uid = getResultInfoUId(pRuntimeEnv);
  SResultRow* pRow = doPrepareResultRowFromKey(pRuntimeEnv, &pRuntimeEnv->windowResInfo, (char *)&tid, sizeof(tid), true, uid);
3364

3365
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3366 3367
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
    pCtx->aOutputBuf = pQuery->sdata[i]->data;
3368

3369 3370 3371 3372
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT/TAGPRJ/PRJ/TAG etc.
     */
3373 3374 3375
    SResultRowCellInfo* pCellInfo = getResultCell(pRuntimeEnv, pRow, i);
    RESET_RESULT_INFO(pCellInfo);
    pCtx->resultInfo = pCellInfo;
3376

3377
    // set the timestamp output buffer for top/bottom/diff query
H
Haojun Liao 已提交
3378
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
3379 3380 3381
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3382

H
Haojun Liao 已提交
3383
    memset(pQuery->sdata[i]->data, 0, (size_t)(pQuery->pExpr1[i].bytes * pQuery->rec.capacity));
3384
  }
3385

3386 3387 3388 3389 3390
  initCtxOutputBuf(pRuntimeEnv);
}

void forwardCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, int64_t output) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3391

3392
  // reset the execution contexts
3393
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
3394
    int32_t functionId = pQuery->pExpr1[j].base.functionId;
3395
    assert(functionId != TSDB_FUNC_DIFF);
3396

3397 3398 3399 3400
    // set next output position
    if (IS_OUTER_FORWARD(aAggs[functionId].nStatus)) {
      pRuntimeEnv->pCtx[j].aOutputBuf += pRuntimeEnv->pCtx[j].outputBytes * output;
    }
3401

3402 3403 3404 3405 3406 3407 3408 3409
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      /*
       * NOTE: for top/bottom query, the value of first column of output (timestamp) are assigned
       * in the procedure of top/bottom routine
       * the output buffer in top/bottom routine is ptsOutputBuf, so we need to forward the output buffer
       *
       * diff function is handled in multi-output function
       */
S
TD-1057  
Shengliang Guan 已提交
3410
      pRuntimeEnv->pCtx[j].ptsOutputBuf = (char*)pRuntimeEnv->pCtx[j].ptsOutputBuf + TSDB_KEYSIZE * output;
3411
    }
3412

3413
    RESET_RESULT_INFO(pRuntimeEnv->pCtx[j].resultInfo);
3414 3415 3416 3417 3418
  }
}

void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3419

3420
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
3421
    int32_t functionId = pQuery->pExpr1[j].base.functionId;
3422
    pRuntimeEnv->pCtx[j].currentStage = 0;
3423

H
Haojun Liao 已提交
3424
    SResultRowCellInfo* pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
H
Haojun Liao 已提交
3425 3426 3427
    if (pResInfo->initialized) {
      continue;
    }
3428

3429 3430 3431 3432
    aAggs[functionId].init(&pRuntimeEnv->pCtx[j]);
  }
}

3433
void skipResults(SQueryRuntimeEnv *pRuntimeEnv) {
3434
  SQuery *pQuery = pRuntimeEnv->pQuery;
3435
  if (pQuery->rec.rows == 0 || pQuery->limit.offset == 0) {
3436 3437
    return;
  }
3438

3439
  if (pQuery->rec.rows <= pQuery->limit.offset) {
3440
    qDebug("QInfo:%p skip rows:%" PRId64 ", new offset:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), pQuery->rec.rows,
3441
        pQuery->limit.offset - pQuery->rec.rows);
3442

3443 3444
    pQuery->limit.offset -= pQuery->rec.rows;
    pQuery->rec.rows = 0;
3445

H
Haojun Liao 已提交
3446
    resetDefaultResInfoOutputBuf(pRuntimeEnv);
3447

H
Haojun Liao 已提交
3448
    // clear the buffer full flag if exists
3449
    CLEAR_QUERY_STATUS(pQuery, QUERY_RESBUF_FULL);
3450
  } else {
3451
    int64_t numOfSkip = pQuery->limit.offset;
3452
    pQuery->rec.rows -= numOfSkip;
3453
    pQuery->limit.offset = 0;
3454

3455
    qDebug("QInfo:%p skip row:%"PRId64", new offset:%d, numOfRows remain:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), numOfSkip,
3456
           0, pQuery->rec.rows);
3457

3458
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
3459
      int32_t functionId = pQuery->pExpr1[i].base.functionId;
3460
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
H
Haojun Liao 已提交
3461

3462
      memmove(pQuery->sdata[i]->data, (char*)pQuery->sdata[i]->data + bytes * numOfSkip, (size_t)(pQuery->rec.rows * bytes));
H
Haojun Liao 已提交
3463
      pRuntimeEnv->pCtx[i].aOutputBuf = ((char*) pQuery->sdata[i]->data) + pQuery->rec.rows * bytes;
3464

3465
      if (functionId == TSDB_FUNC_DIFF || functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
3466
        pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
3467 3468
      }
    }
3469

S
TD-1057  
Shengliang Guan 已提交
3470
    updateNumOfResult(pRuntimeEnv, (int32_t)pQuery->rec.rows);
3471 3472 3473 3474 3475 3476 3477 3478
  }
}

void setQueryStatus(SQuery *pQuery, int8_t status) {
  if (status == QUERY_NOT_COMPLETED) {
    pQuery->status = status;
  } else {
    // QUERY_NOT_COMPLETED is not compatible with any other status, so clear its position first
3479
    CLEAR_QUERY_STATUS(pQuery, QUERY_NOT_COMPLETED);
3480 3481 3482 3483 3484 3485
    pQuery->status |= status;
  }
}

bool needScanDataBlocksAgain(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3486

H
hjxilinx 已提交
3487
  bool toContinue = false;
H
Haojun Liao 已提交
3488
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3489 3490
    // for each group result, call the finalize function for each column
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
3491

3492
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
3493
      SResultRow *pResult = getResultRow(pWindowResInfo, i);
3494
      if (!pResult->closed) {
3495 3496
        continue;
      }
3497

3498
      setResultOutputBuf(pRuntimeEnv, pResult);
3499

3500
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
3501
        int16_t functId = pQuery->pExpr1[j].base.functionId;
3502 3503 3504
        if (functId == TSDB_FUNC_TS) {
          continue;
        }
3505

3506
        aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
H
Haojun Liao 已提交
3507
        SResultRowCellInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3508

3509 3510 3511 3512
        toContinue |= (!pResInfo->complete);
      }
    }
  } else {
3513
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
3514
      int16_t functId = pQuery->pExpr1[j].base.functionId;
3515 3516 3517
      if (functId == TSDB_FUNC_TS) {
        continue;
      }
3518

3519
      aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
H
Haojun Liao 已提交
3520
      SResultRowCellInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3521

3522 3523 3524
      toContinue |= (!pResInfo->complete);
    }
  }
3525

3526 3527 3528
  return toContinue;
}

H
Haojun Liao 已提交
3529
static SQueryStatusInfo getQueryStatusInfo(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
3530
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3531
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3532

H
Haojun Liao 已提交
3533 3534
  assert((start <= pTableQueryInfo->lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (start >= pTableQueryInfo->lastKey && !QUERY_IS_ASC_QUERY(pQuery)));
3535

3536
  SQueryStatusInfo info = {
H
hjxilinx 已提交
3537
      .status      = pQuery->status,
3538
      .windowIndex = pRuntimeEnv->windowResInfo.curIndex,
H
Haojun Liao 已提交
3539
      .lastKey     = start,
3540
  };
3541

S
TD-1057  
Shengliang Guan 已提交
3542 3543 3544 3545 3546
  TIME_WINDOW_COPY(info.w, pQuery->window);
  TIME_WINDOW_COPY(info.curWindow, pTableQueryInfo->win);

  info.curWindow.skey = start;

3547 3548 3549
  return info;
}

3550 3551 3552 3553
static void setEnvBeforeReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQInfo *pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery *pQuery = pRuntimeEnv->pQuery;

3554 3555 3556
  pStatus->cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);  // save the cursor
  if (pRuntimeEnv->pTSBuf) {
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
3557 3558
    bool ret = tsBufNextPos(pRuntimeEnv->pTSBuf);
    assert(ret);
3559
  }
3560

3561
  // reverse order time range
3562 3563 3564
  pQuery->window = pStatus->curWindow;
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);

3565
  SWITCH_ORDER(pQuery->order.order);
3566 3567 3568 3569 3570 3571 3572

  if (QUERY_IS_ASC_QUERY(pQuery)) {
    assert(pQuery->window.skey <= pQuery->window.ekey);
  } else {
    assert(pQuery->window.skey >= pQuery->window.ekey);
  }

3573
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
3574

3575
  STsdbQueryCond cond = {
H
hjxilinx 已提交
3576
      .order   = pQuery->order.order,
3577
      .colList = pQuery->colList,
3578 3579
      .numOfCols = pQuery->numOfCols,
  };
3580

S
TD-1057  
Shengliang Guan 已提交
3581 3582
  TIME_WINDOW_COPY(cond.twindow, pQuery->window);

H
Haojun Liao 已提交
3583 3584 3585 3586 3587
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
  disableFuncInReverseScan(pQInfo);
  setupQueryRangeForReverseScan(pQInfo);

3588 3589 3590 3591
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
3592

H
Haojun Liao 已提交
3593
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef);
B
Bomin Zhang 已提交
3594 3595 3596
  if (pRuntimeEnv->pSecQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
3597 3598
}

3599 3600
static void clearEnvAfterReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3601
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3602

3603 3604
  SWITCH_ORDER(pQuery->order.order);
  switchCtxOrder(pRuntimeEnv);
3605

3606 3607 3608 3609
  tsBufSetCursor(pRuntimeEnv->pTSBuf, &pStatus->cur);
  if (pRuntimeEnv->pTSBuf) {
    pRuntimeEnv->pTSBuf->cur.order = pQuery->order.order;
  }
3610

3611
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
3612

3613
  // update the pQuery->window.skey and pQuery->window.ekey to limit the scan scope of sliding query during reverse scan
H
hjxilinx 已提交
3614
  pTableQueryInfo->lastKey = pStatus->lastKey;
3615
  pQuery->status = pStatus->status;
3616

H
hjxilinx 已提交
3617
  pTableQueryInfo->win = pStatus->w;
3618
  pQuery->window = pTableQueryInfo->win;
3619 3620
}

H
Haojun Liao 已提交
3621 3622 3623 3624 3625 3626 3627
static void restoreTimeWindow(STableGroupInfo* pTableGroupInfo, STsdbQueryCond* pCond) {
  assert(pTableGroupInfo->numOfTables == 1);
  SArray* pTableKeyGroup = taosArrayGetP(pTableGroupInfo->pGroupList, 0);
  STableKeyInfo* pKeyInfo = taosArrayGet(pTableKeyGroup, 0);
  pKeyInfo->lastKey = pCond->twindow.skey;
}

3628
void scanOneTableDataBlocks(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
H
hjxilinx 已提交
3629
  SQInfo *pQInfo = (SQInfo *) GET_QINFO_ADDR(pRuntimeEnv);
3630
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3631
  STableQueryInfo *pTableQueryInfo = pQuery->current;
3632

3633
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
3634

3635
  // store the start query position
H
Haojun Liao 已提交
3636
  SQueryStatusInfo qstatus = getQueryStatusInfo(pRuntimeEnv, start);
3637

3638 3639
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
3640

3641 3642
  while (1) {
    doScanAllDataBlocks(pRuntimeEnv);
3643

3644 3645
    if (pRuntimeEnv->scanFlag == MASTER_SCAN) {
      qstatus.status = pQuery->status;
3646 3647 3648 3649 3650 3651

      // do nothing if no data blocks are found qualified during scan
      if (qstatus.lastKey != pTableQueryInfo->lastKey) {
        qstatus.curWindow.ekey = pTableQueryInfo->lastKey - step;
      }

3652
      qstatus.lastKey = pTableQueryInfo->lastKey;
3653
    }
3654

3655
    if (!needScanDataBlocksAgain(pRuntimeEnv)) {
3656
      // restore the status code and jump out of loop
3657
      if (pRuntimeEnv->scanFlag == REPEAT_SCAN) {
3658
        pQuery->status = qstatus.status;
3659
      }
3660

3661 3662
      break;
    }
3663

3664
    STsdbQueryCond cond = {
H
hjxilinx 已提交
3665
        .order   = pQuery->order.order,
3666
        .colList = pQuery->colList,
3667
        .numOfCols = pQuery->numOfCols,
3668
    };
3669

S
TD-1057  
Shengliang Guan 已提交
3670 3671
    TIME_WINDOW_COPY(cond.twindow, qstatus.curWindow);

3672 3673
    if (pRuntimeEnv->pSecQueryHandle != NULL) {
      tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
3674
    }
3675

H
Haojun Liao 已提交
3676
    restoreTimeWindow(&pQInfo->tableGroupInfo, &cond);
H
Haojun Liao 已提交
3677
    pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef);
B
Bomin Zhang 已提交
3678 3679 3680
    if (pRuntimeEnv->pSecQueryHandle == NULL) {
      longjmp(pRuntimeEnv->env, terrno);
    }
3681

3682
    pRuntimeEnv->windowResInfo.curIndex = qstatus.windowIndex;
3683 3684
    setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
    pRuntimeEnv->scanFlag = REPEAT_SCAN;
3685

3686
    qDebug("QInfo:%p start to repeat scan data blocks due to query func required, qrange:%"PRId64"-%"PRId64, pQInfo,
3687
        cond.twindow.skey, cond.twindow.ekey);
3688

3689
    // check if query is killed or not
H
Haojun Liao 已提交
3690
    if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
3691
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
3692 3693
    }
  }
3694

H
hjxilinx 已提交
3695
  if (!needReverseScan(pQuery)) {
3696 3697
    return;
  }
3698

3699
  setEnvBeforeReverseScan(pRuntimeEnv, &qstatus);
3700

3701
  // reverse scan from current position
3702
  qDebug("QInfo:%p start to reverse scan", pQInfo);
3703
  doScanAllDataBlocks(pRuntimeEnv);
3704 3705

  clearEnvAfterReverseScan(pRuntimeEnv, &qstatus);
3706 3707
}

H
hjxilinx 已提交
3708
void finalizeQueryResult(SQueryRuntimeEnv *pRuntimeEnv) {
3709
  SQuery *pQuery = pRuntimeEnv->pQuery;
3710

H
Haojun Liao 已提交
3711
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3712 3713
    // for each group result, call the finalize function for each column
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
3714
    if (pRuntimeEnv->groupbyNormalCol) {
3715 3716
      closeAllTimeWindow(pWindowResInfo);
    }
3717

3718
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
H
Haojun Liao 已提交
3719
      SResultRow *buf = pWindowResInfo->pResult[i];
3720 3721 3722
      if (!isWindowResClosed(pWindowResInfo, i)) {
        continue;
      }
3723

3724
      setResultOutputBuf(pRuntimeEnv, buf);
3725

3726
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
3727
        aAggs[pQuery->pExpr1[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3728
      }
3729

3730 3731 3732 3733
      /*
       * set the number of output results for group by normal columns, the number of output rows usually is 1 except
       * the top and bottom query
       */
S
TD-1057  
Shengliang Guan 已提交
3734
      buf->numOfRows = (uint16_t)getNumOfResult(pRuntimeEnv);
3735
    }
3736

3737
  } else {
3738
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
3739
      aAggs[pQuery->pExpr1[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3740 3741 3742 3743 3744
    }
  }
}

static bool hasMainOutput(SQuery *pQuery) {
3745
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
3746
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
3747

3748 3749 3750 3751
    if (functionId != TSDB_FUNC_TS && functionId != TSDB_FUNC_TAG && functionId != TSDB_FUNC_TAGPRJ) {
      return true;
    }
  }
3752

3753 3754 3755
  return false;
}

H
Haojun Liao 已提交
3756
static STableQueryInfo *createTableQueryInfo(SQueryRuntimeEnv *pRuntimeEnv, void* pTable, STimeWindow win, void* buf) {
H
Haojun Liao 已提交
3757
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
3758

H
Haojun Liao 已提交
3759
  STableQueryInfo *pTableQueryInfo = buf;
3760

H
hjxilinx 已提交
3761 3762
  pTableQueryInfo->win = win;
  pTableQueryInfo->lastKey = win.skey;
3763

3764
  pTableQueryInfo->pTable = pTable;
3765
  pTableQueryInfo->cur.vgroupIndex = -1;
3766

H
Haojun Liao 已提交
3767 3768
  // set more initial size of interval/groupby query
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyNormalCol) {
H
Haojun Liao 已提交
3769
    int32_t initialSize = 16;
H
Haojun Liao 已提交
3770
    int32_t initialThreshold = 100;
3771
    int32_t code = initWindowResInfo(&pTableQueryInfo->windowResInfo, initialSize, initialThreshold, TSDB_DATA_TYPE_INT);
B
Bomin Zhang 已提交
3772 3773 3774
    if (code != TSDB_CODE_SUCCESS) {
      return NULL;
    }
H
Haojun Liao 已提交
3775
  } else { // in other aggregate query, do not initialize the windowResInfo
H
Haojun Liao 已提交
3776 3777
  }

3778 3779 3780
  return pTableQueryInfo;
}

H
Haojun Liao 已提交
3781
void destroyTableQueryInfoImpl(STableQueryInfo *pTableQueryInfo) {
3782 3783 3784
  if (pTableQueryInfo == NULL) {
    return;
  }
3785

H
Haojun Liao 已提交
3786
  tVariantDestroy(&pTableQueryInfo->tag);
H
Haojun Liao 已提交
3787
  cleanupTimeWindowInfo(&pTableQueryInfo->windowResInfo);
3788 3789 3790 3791 3792
}

/**
 * set output buffer for different group
 * @param pRuntimeEnv
3793
 * @param pDataBlockInfo
3794
 */
H
Haojun Liao 已提交
3795
void setExecutionContext(SQInfo *pQInfo, int32_t groupIndex, TSKEY nextKey) {
3796
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
Haojun Liao 已提交
3797 3798 3799
  STableQueryInfo  *pTableQueryInfo = pRuntimeEnv->pQuery->current;
  SWindowResInfo   *pWindowResInfo = &pRuntimeEnv->windowResInfo;

H
Haojun Liao 已提交
3800 3801
  // lastKey needs to be updated
  pTableQueryInfo->lastKey = nextKey;
H
Haojun Liao 已提交
3802 3803 3804 3805

  if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
    setAdditionalInfo(pQInfo, pTableQueryInfo->pTable, pTableQueryInfo);
  }
H
Haojun Liao 已提交
3806

H
Haojun Liao 已提交
3807 3808 3809
  if (pRuntimeEnv->prevGroupId != INT32_MIN && pRuntimeEnv->prevGroupId == groupIndex) {
    return;
  }
3810

H
Haojun Liao 已提交
3811
  uint64_t uid = getResultInfoUId(pRuntimeEnv);
H
Haojun Liao 已提交
3812
  SResultRow *pResultRow = doPrepareResultRowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&groupIndex,
H
Haojun Liao 已提交
3813
      sizeof(groupIndex), true, uid);
H
Haojun Liao 已提交
3814
  if (pResultRow == NULL) {
3815 3816
    return;
  }
3817

3818 3819 3820 3821
  /*
   * not assign result buffer yet, add new result buffer
   * all group belong to one result set, and each group result has different group id so set the id to be one
   */
H
Haojun Liao 已提交
3822 3823
  if (pResultRow->pageId == -1) {
    if (addNewWindowResultBuf(pResultRow, pRuntimeEnv->pResultBuf, groupIndex, pRuntimeEnv->numOfRowsPerPage) !=
3824 3825 3826 3827
        TSDB_CODE_SUCCESS) {
      return;
    }
  }
3828

H
Haojun Liao 已提交
3829 3830
  // record the current active group id
  pRuntimeEnv->prevGroupId = groupIndex;
H
Haojun Liao 已提交
3831
  setResultOutputBuf(pRuntimeEnv, pResultRow);
3832 3833 3834
  initCtxOutputBuf(pRuntimeEnv);
}

3835
void setResultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SResultRow *pResult) {
3836
  SQuery *pQuery = pRuntimeEnv->pQuery;
3837

3838
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
3839
  tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pageId);
H
Haojun Liao 已提交
3840

3841
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3842
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
H
Haojun Liao 已提交
3843
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult, page);
3844

H
Haojun Liao 已提交
3845
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
3846 3847 3848
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3849

3850
    /*
3851
     * set the output buffer information and intermediate buffer,
3852 3853
     * not all queries require the interResultBuf, such as COUNT
     */
3854
    pCtx->resultInfo = getResultCell(pRuntimeEnv, pResult, i);
3855 3856 3857
  }
}

H
Haojun Liao 已提交
3858
void setResultRowOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SResultRow *pResult) {
H
Haojun Liao 已提交
3859
  SQuery *pQuery = pRuntimeEnv->pQuery;
3860

H
Haojun Liao 已提交
3861
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
3862
  tFilePage* bufPage = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pageId);
H
Haojun Liao 已提交
3863

H
Haojun Liao 已提交
3864 3865 3866
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];

3867
    pCtx->resultInfo = getResultCell(pRuntimeEnv, pResult, i);
H
Haojun Liao 已提交
3868
    if (pCtx->resultInfo->initialized && pCtx->resultInfo->complete) {
H
Haojun Liao 已提交
3869 3870
      continue;
    }
3871

H
Haojun Liao 已提交
3872
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult, bufPage);
H
Haojun Liao 已提交
3873
    pCtx->currentStage = 0;
3874

H
Haojun Liao 已提交
3875 3876 3877 3878
    int32_t functionId = pCtx->functionId;
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3879

H
Haojun Liao 已提交
3880 3881 3882 3883 3884 3885
    if (!pCtx->resultInfo->initialized) {
      aAggs[functionId].init(pCtx);
    }
  }
}

3886
int32_t setAdditionalInfo(SQInfo *pQInfo, void* pTable, STableQueryInfo *pTableQueryInfo) {
3887
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
3888

3889
  setTagVal(pRuntimeEnv, pTable, pQInfo->tsdb);
3890

3891 3892
  // both the master and supplement scan needs to set the correct ts comp start position
  if (pRuntimeEnv->pTSBuf != NULL) {
H
Haojun Liao 已提交
3893 3894
    tVariant* pTag = &pRuntimeEnv->pCtx[0].tag;

H
Haojun Liao 已提交
3895 3896
    if (pTableQueryInfo->cur.vgroupIndex == -1) {
      tVariantAssign(&pTableQueryInfo->tag, pTag);
3897

H
Haojun Liao 已提交
3898 3899
      STSElem elem = tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, pQInfo->vgId, &pTableQueryInfo->tag);

H
Haojun Liao 已提交
3900
      // failed to find data with the specified tag value and vnodeId
3901
      if (!tsBufIsValidElem(&elem)) {
H
Haojun Liao 已提交
3902 3903 3904
        if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
          qError("QInfo:%p failed to find tag:%s in ts_comp", pQInfo, pTag->pz);
        } else {
H
Haojun Liao 已提交
3905
          qError("QInfo:%p failed to find tag:%" PRId64 " in ts_comp", pQInfo, pTag->i64Key);
H
Haojun Liao 已提交
3906 3907 3908 3909 3910
        }

        return false;
      }

H
Haojun Liao 已提交
3911
      // keep the cursor info of current meter
H
Haojun Liao 已提交
3912 3913
      pTableQueryInfo->cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);
      if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
H
Haojun Liao 已提交
3914
        qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex);
H
Haojun Liao 已提交
3915
      } else {
H
Haojun Liao 已提交
3916
        qDebug("QInfo:%p find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex);
H
Haojun Liao 已提交
3917 3918
      }

H
Haojun Liao 已提交
3919 3920
    } else {
      tsBufSetCursor(pRuntimeEnv->pTSBuf, &pTableQueryInfo->cur);
H
Haojun Liao 已提交
3921 3922

      if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
H
Haojun Liao 已提交
3923
        qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex);
H
Haojun Liao 已提交
3924
      } else {
H
Haojun Liao 已提交
3925
        qDebug("QInfo:%p find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex);
H
Haojun Liao 已提交
3926
      }
3927 3928
    }
  }
3929

3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941
  return 0;
}

/*
 * There are two cases to handle:
 *
 * 1. Query range is not set yet (queryRangeSet = 0). we need to set the query range info, including pQuery->lastKey,
 *    pQuery->window.skey, and pQuery->eKey.
 * 2. Query range is set and query is in progress. There may be another result with the same query ranges to be
 *    merged during merge stage. In this case, we need the pTableQueryInfo->lastResRows to decide if there
 *    is a previous result generated or not.
 */
H
hjxilinx 已提交
3942
void setIntervalQueryRange(SQInfo *pQInfo, TSKEY key) {
3943 3944
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3945
  STableQueryInfo *pTableQueryInfo = pQuery->current;
3946

3947 3948 3949
  if (pTableQueryInfo->queryRangeSet) {
    pTableQueryInfo->lastKey = key;
  } else {
3950
    pTableQueryInfo->win.skey = key;
3951
    STimeWindow win = {.skey = key, .ekey = pQuery->window.ekey};
3952

3953 3954 3955 3956 3957
    // for too small query range, no data in this interval.
    if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey < pQuery->window.skey)) ||
        (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey < pQuery->window.ekey))) {
      return;
    }
3958

3959 3960 3961 3962 3963 3964
    /**
     * In handling the both ascending and descending order super table query, we need to find the first qualified
     * timestamp of this table, and then set the first qualified start timestamp.
     * In ascending query, key is the first qualified timestamp. However, in the descending order query, additional
     * operations involve.
     */
H
Haojun Liao 已提交
3965
    STimeWindow     w = TSWINDOW_INITIALIZER;
3966
    SWindowResInfo *pWindowResInfo = &pTableQueryInfo->windowResInfo;
3967

H
Haojun Liao 已提交
3968 3969
    TSKEY sk = MIN(win.skey, win.ekey);
    TSKEY ek = MAX(win.skey, win.ekey);
H
Haojun Liao 已提交
3970
    getAlignQueryTimeWindow(pQuery, win.skey, sk, ek, &w);
3971
    pWindowResInfo->startTime = pTableQueryInfo->win.skey;  // windowSKey may be 0 in case of 1970 timestamp
3972

3973 3974
    if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
3975
        assert(win.ekey == pQuery->window.ekey);
3976
      }
3977

3978
      pWindowResInfo->prevSKey = w.skey;
3979
    }
3980

3981
    pTableQueryInfo->queryRangeSet = 1;
3982
    pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
3983 3984 3985 3986
  }
}

bool requireTimestamp(SQuery *pQuery) {
3987
  for (int32_t i = 0; i < pQuery->numOfOutput; i++) {
H
Haojun Liao 已提交
3988
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001
    if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_NEED_TS) != 0) {
      return true;
    }
  }
  return false;
}

bool needPrimaryTimestampCol(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo) {
  /*
   * 1. if skey or ekey locates in this block, we need to load the timestamp column to decide the precise position
   * 2. if there are top/bottom, first_dst/last_dst functions, we need to load timestamp column in any cases;
   */
  STimeWindow *w = &pDataBlockInfo->window;
H
hjxilinx 已提交
4002
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4003

H
hjxilinx 已提交
4004
  bool loadPrimaryTS = (pTableQueryInfo->lastKey >= w->skey && pTableQueryInfo->lastKey <= w->ekey) ||
4005 4006
                       (pQuery->window.ekey >= w->skey && pQuery->window.ekey <= w->ekey) || requireTimestamp(pQuery);

4007 4008 4009
  return loadPrimaryTS;
}

4010
static int32_t doCopyToSData(SQInfo *pQInfo, SWindowResInfo *pResultInfo, int32_t orderType) {
4011 4012
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4013

4014 4015 4016
  int32_t numOfResult = 0;
  int32_t startIdx = 0;
  int32_t step = -1;
4017

4018
  qDebug("QInfo:%p start to copy data from windowResInfo to query buf", pQInfo);
4019
  int32_t totalSet = numOfClosedTimeWindow(pResultInfo);
H
Haojun Liao 已提交
4020
  SResultRow** result = pResultInfo->pResult;
4021

4022
  if (orderType == TSDB_ORDER_ASC) {
4023
    startIdx = pQInfo->groupIndex;
4024 4025
    step = 1;
  } else {  // desc order copy all data
4026
    startIdx = totalSet - pQInfo->groupIndex - 1;
4027 4028
    step = -1;
  }
4029

H
Haojun Liao 已提交
4030 4031
  SGroupResInfo* pGroupResInfo = &pQInfo->groupResInfo;

4032
  for (int32_t i = startIdx; (i < totalSet) && (i >= 0); i += step) {
4033
    if (result[i]->numOfRows == 0) {
4034
      pQInfo->groupIndex += 1;
4035
      pGroupResInfo->rowId = 0;
4036 4037
      continue;
    }
4038

4039
    int32_t numOfRowsToCopy = result[i]->numOfRows - pGroupResInfo->rowId;
4040
    int32_t oldOffset = pGroupResInfo->rowId;
4041

4042
    /*
H
Haojun Liao 已提交
4043 4044
     * current output space is not enough to accommodate all data of this page, only partial results
     * will be copied to SQuery object's result buffer
4045
     */
4046
    if (numOfRowsToCopy > pQuery->rec.capacity - numOfResult) {
H
Haojun Liao 已提交
4047
      numOfRowsToCopy = (int32_t) pQuery->rec.capacity - numOfResult;
4048
      pGroupResInfo->rowId += numOfRowsToCopy;
4049
    } else {
4050
      pGroupResInfo->rowId = 0;
4051
      pQInfo->groupIndex += 1;
4052
    }
4053

4054
    tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, result[i]->pageId);
H
Haojun Liao 已提交
4055

4056
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
4057
      int32_t size = pRuntimeEnv->pCtx[j].outputBytes;
4058

4059
      char *out = pQuery->sdata[j]->data + numOfResult * size;
4060
      char *in = getPosInResultPage(pRuntimeEnv, j, result[i], page);
4061 4062
      memcpy(out, in + oldOffset * size, size * numOfRowsToCopy);
    }
4063

4064
    numOfResult += numOfRowsToCopy;
4065 4066 4067
    if (numOfResult == pQuery->rec.capacity) {
      break;
    }
4068
  }
4069

4070
  qDebug("QInfo:%p copy data to query buf completed", pQInfo);
4071 4072

#ifdef _DEBUG_VIEW
4073
  displayInterResult(pQuery->sdata, pRuntimeEnv, numOfResult);
4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086
#endif
  return numOfResult;
}

/**
 * copyFromWindowResToSData support copy data in ascending/descending order
 * For interval query of both super table and table, copy the data in ascending order, since the output results are
 * ordered in SWindowResutl already. While handling the group by query for both table and super table,
 * all group result are completed already.
 *
 * @param pQInfo
 * @param result
 */
4087
void copyFromWindowResToSData(SQInfo *pQInfo, SWindowResInfo *pResultInfo) {
4088
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
4089

4090
  int32_t orderType = (pQuery->pGroupbyExpr != NULL) ? pQuery->pGroupbyExpr->orderType : TSDB_ORDER_ASC;
4091
  int32_t numOfResult = doCopyToSData(pQInfo, pResultInfo, orderType);
4092

4093
  pQuery->rec.rows += numOfResult;
4094

4095
  assert(pQuery->rec.rows <= pQuery->rec.capacity);
4096 4097
}

H
Haojun Liao 已提交
4098
static void updateWindowResNumOfRes(SQueryRuntimeEnv *pRuntimeEnv) {
4099
  SQuery *pQuery = pRuntimeEnv->pQuery;
4100

4101
  // update the number of result for each, only update the number of rows for the corresponding window result.
H
Haojun Liao 已提交
4102 4103 4104
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
    return;
  }
4105

H
Haojun Liao 已提交
4106
  for (int32_t i = 0; i < pRuntimeEnv->windowResInfo.size; ++i) {
H
Haojun Liao 已提交
4107
    SResultRow *pResult = pRuntimeEnv->windowResInfo.pResult[i];
4108

H
Haojun Liao 已提交
4109 4110 4111 4112
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
      int32_t functionId = pRuntimeEnv->pCtx[j].functionId;
      if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
        continue;
4113
      }
H
Haojun Liao 已提交
4114

4115 4116
      SResultRowCellInfo* pCell = getResultCell(pRuntimeEnv, pResult, j);
      pResult->numOfRows = (uint16_t)(MAX(pResult->numOfRows, pCell->numOfRes));
4117 4118 4119 4120
    }
  }
}

H
Haojun Liao 已提交
4121
static void stableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo, SDataStatis *pStatis,
4122
    SArray *pDataBlock, __block_search_fn_t searchFn) {
4123
  SQuery *         pQuery = pRuntimeEnv->pQuery;
4124
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4125

4126
  SWindowResInfo * pWindowResInfo = &pTableQueryInfo->windowResInfo;
H
hjxilinx 已提交
4127
  pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : pDataBlockInfo->rows - 1;
4128

H
Haojun Liao 已提交
4129
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->groupbyNormalCol) {
4130
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
4131
  } else {
4132
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
4133 4134 4135
  }
}

H
Haojun Liao 已提交
4136
bool queryHasRemainResForTableQuery(SQueryRuntimeEnv* pRuntimeEnv) {
4137 4138
  SQuery *pQuery = pRuntimeEnv->pQuery;
  SFillInfo *pFillInfo = pRuntimeEnv->pFillInfo;
4139

H
Haojun Liao 已提交
4140
  if (pQuery->limit.limit > 0 && pQuery->rec.total >= pQuery->limit.limit) {
4141 4142
    return false;
  }
4143

4144
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
H
Haojun Liao 已提交
4145
    // There are results not returned to client yet, so filling applied to the remain result is required firstly.
H
Haojun Liao 已提交
4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160
    int32_t remain = taosNumOfRemainRows(pFillInfo);
    if (remain > 0) {
      return true;
    }

    /*
     * While the code reaches here, there are no results remains now.
     * If query is not completed yet, the gaps between two results blocks need to be handled after next data block
     * is retrieved from TSDB.
     *
     * NOTE: If the result set is not the first block, the gap in front of the result set will be filled. If the result
     * set is the FIRST result block, the gap between the start time of query time window and the timestamp of the
     * first result row in the actual result set will fill nothing.
     */
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
H
Haojun Liao 已提交
4161
      int32_t numOfTotal = (int32_t)getNumOfResWithFill(pFillInfo, pQuery->window.ekey, (int32_t)pQuery->rec.capacity);
H
Haojun Liao 已提交
4162 4163 4164 4165 4166 4167
      return numOfTotal > 0;
    }

  } else {
    // there are results waiting for returned to client.
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED) &&
H
Haojun Liao 已提交
4168
        (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) &&
H
Haojun Liao 已提交
4169 4170 4171
        (pRuntimeEnv->windowResInfo.size > 0)) {
      return true;
    }
4172
  }
4173 4174

  return false;
4175 4176
}

H
Haojun Liao 已提交
4177 4178 4179 4180
static int16_t getNumOfFinalResCol(SQuery* pQuery) {
  return pQuery->pExpr2 == NULL? pQuery->numOfOutput:pQuery->numOfExpr2;
}

4181
static void doCopyQueryResultToMsg(SQInfo *pQInfo, int32_t numOfRows, char *data) {
4182
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
4183

H
Haojun Liao 已提交
4184 4185
  if (pQuery->pExpr2 == NULL) {
    for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
H
Haojun Liao 已提交
4186
      int32_t bytes = pQuery->pExpr1[col].bytes;
4187

H
Haojun Liao 已提交
4188 4189 4190 4191 4192 4193 4194 4195 4196 4197
      memmove(data, pQuery->sdata[col]->data, bytes * numOfRows);
      data += bytes * numOfRows;
    }
  } else {
    for (int32_t col = 0; col < pQuery->numOfExpr2; ++col) {
      int32_t bytes = pQuery->pExpr2[col].bytes;

      memmove(data, pQuery->sdata[col]->data, bytes * numOfRows);
      data += bytes * numOfRows;
    }
4198
  }
4199

weixin_48148422's avatar
weixin_48148422 已提交
4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211
  int32_t numOfTables = (int32_t)taosArrayGetSize(pQInfo->arrTableIdInfo);
  *(int32_t*)data = htonl(numOfTables);
  data += sizeof(int32_t);
  for(int32_t i = 0; i < numOfTables; i++) {
    STableIdInfo* pSrc = taosArrayGet(pQInfo->arrTableIdInfo, i);
    STableIdInfo* pDst = (STableIdInfo*)data;
    pDst->uid = htobe64(pSrc->uid);
    pDst->tid = htonl(pSrc->tid);
    pDst->key = htobe64(pSrc->key);
    data += sizeof(STableIdInfo);
  }

H
Haojun Liao 已提交
4212
  // Check if query is completed or not for stable query or normal table query respectively.
H
hjxilinx 已提交
4213
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4214
    if (pQInfo->runtimeEnv.stableQuery) {
H
Haojun Liao 已提交
4215
      if (IS_STASBLE_QUERY_OVER(pQInfo)) {
4216 4217 4218
        setQueryStatus(pQuery, QUERY_OVER);
      }
    } else {
H
Haojun Liao 已提交
4219
      if (!queryHasRemainResForTableQuery(&pQInfo->runtimeEnv)) {
4220 4221
        setQueryStatus(pQuery, QUERY_OVER);
      }
4222
    }
H
hjxilinx 已提交
4223
  }
4224 4225
}

H
Haojun Liao 已提交
4226
int32_t doFillGapsInResults(SQueryRuntimeEnv* pRuntimeEnv, tFilePage **pDst, int32_t *numOfFilled) {
4227
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
4228
  SQuery *pQuery = pRuntimeEnv->pQuery;
4229
  SFillInfo* pFillInfo = pRuntimeEnv->pFillInfo;
4230

4231
  while (1) {
H
Haojun Liao 已提交
4232
    int32_t ret = (int32_t)taosFillResultDataBlock(pFillInfo, (tFilePage**)pQuery->sdata, (int32_t)pQuery->rec.capacity);
4233

4234
    // todo apply limit output function
4235 4236
    /* reached the start position of according to offset value, return immediately */
    if (pQuery->limit.offset == 0) {
4237
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows", pQInfo, pFillInfo->numOfRows, ret);
4238 4239
      return ret;
    }
4240

4241
    if (pQuery->limit.offset < ret) {
4242
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%" PRId64 ". Discard due to offset, remain:%" PRId64 ", new offset:%d",
4243
             pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, ret - pQuery->limit.offset, 0);
4244

S
TD-1057  
Shengliang Guan 已提交
4245
      ret -= (int32_t)pQuery->limit.offset;
4246
      // todo !!!!there exactly number of interpo is not valid.
4247
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
4248 4249
        memmove(pDst[i]->data, pDst[i]->data + pQuery->pExpr1[i].bytes * pQuery->limit.offset,
                ret * pQuery->pExpr1[i].bytes);
4250
      }
4251

4252 4253 4254
      pQuery->limit.offset = 0;
      return ret;
    } else {
4255
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%" PRId64 ". Discard due to offset, "
B
Bomin Zhang 已提交
4256
             "remain:%d, new offset:%" PRId64, pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, 0,
4257
          pQuery->limit.offset - ret);
4258

4259
      pQuery->limit.offset -= ret;
4260
      pQuery->rec.rows = 0;
4261 4262
      ret = 0;
    }
4263

H
Haojun Liao 已提交
4264
    if (!queryHasRemainResForTableQuery(pRuntimeEnv)) {
4265 4266 4267 4268 4269
      return ret;
    }
  }
}

4270
static void queryCostStatis(SQInfo *pQInfo) {
4271
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4272
  SQueryCostInfo *pSummary = &pRuntimeEnv->summary;
H
Haojun Liao 已提交
4273

H
Haojun Liao 已提交
4274
  uint64_t hashSize = taosHashGetMemSize(pQInfo->runtimeEnv.pResultRowHashTable);
H
Haojun Liao 已提交
4275 4276 4277
  hashSize += taosHashGetMemSize(pQInfo->tableqinfoGroupInfo.map);
  pSummary->hashSize = hashSize;

H
Haojun Liao 已提交
4278 4279 4280
  // add the merge time
  pSummary->elapsedTime += pSummary->firstStageMergeTime;

H
Haojun Liao 已提交
4281 4282 4283
  SResultRowPool* p = pQInfo->runtimeEnv.pool;
  pSummary->winInfoSize = getResultRowPoolMemSize(p);
  pSummary->numOfTimeWindows = getNumOfAllocatedResultRows(p);
4284

H
Haojun Liao 已提交
4285 4286 4287
  qDebug("QInfo:%p :cost summary: elapsed time:%"PRId64" us, first merge:%"PRId64" us, total blocks:%d, "
         "load block statis:%d, load data block:%d, total rows:%"PRId64 ", check rows:%"PRId64,
         pQInfo, pSummary->elapsedTime, pSummary->firstStageMergeTime, pSummary->totalBlocks, pSummary->loadBlockStatis,
H
Haojun Liao 已提交
4288
         pSummary->loadBlocks, pSummary->totalRows, pSummary->totalCheckedRows);
4289

4290
  qDebug("QInfo:%p :cost summary: winResPool size:%.2f Kb, numOfWin:%"PRId64", tableInfoSize:%.2f Kb, hashTable:%.2f Kb", pQInfo, pSummary->winInfoSize/1024.0,
H
Haojun Liao 已提交
4291
      pSummary->numOfTimeWindows, pSummary->tableInfoSize/1024.0, pSummary->hashSize/1024.0);
4292 4293
}

4294 4295
static void updateOffsetVal(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pBlockInfo) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
4296
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4297

4298
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
4299

4300
  if (pQuery->limit.offset == pBlockInfo->rows) {  // current block will ignore completed
H
hjxilinx 已提交
4301
    pTableQueryInfo->lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->window.ekey + step : pBlockInfo->window.skey + step;
4302 4303 4304
    pQuery->limit.offset = 0;
    return;
  }
4305

4306
  if (QUERY_IS_ASC_QUERY(pQuery)) {
S
TD-1057  
Shengliang Guan 已提交
4307
    pQuery->pos = (int32_t)pQuery->limit.offset;
4308
  } else {
S
TD-1057  
Shengliang Guan 已提交
4309
    pQuery->pos = pBlockInfo->rows - (int32_t)pQuery->limit.offset - 1;
4310
  }
4311

4312
  assert(pQuery->pos >= 0 && pQuery->pos <= pBlockInfo->rows - 1);
4313

4314
  SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
4315
  SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);
4316

4317
  // update the pQuery->limit.offset value, and pQuery->pos value
H
Haojun Liao 已提交
4318
  TSKEY *keys = (TSKEY *) pColInfoData->pData;
4319 4320

  // update the offset value
H
hjxilinx 已提交
4321
  pTableQueryInfo->lastKey = keys[pQuery->pos];
4322
  pQuery->limit.offset = 0;
4323

H
hjxilinx 已提交
4324
  int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, pBlockInfo, NULL, binarySearchForKey, pDataBlock);
4325

4326
  qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
4327
         pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows, numOfRes, pQuery->current->lastKey);
4328
}
4329

4330 4331 4332 4333 4334
void skipBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;

  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0) {
    return;
4335
  }
4336

4337 4338 4339
  pQuery->pos = 0;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

H
hjxilinx 已提交
4340
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4341
  TsdbQueryHandleT pQueryHandle = pRuntimeEnv->pQueryHandle;
4342

H
Haojun Liao 已提交
4343
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4344
  while (tsdbNextDataBlock(pQueryHandle)) {
H
Haojun Liao 已提交
4345
    if (IS_QUERY_KILLED(GET_QINFO_ADDR(pRuntimeEnv))) {
H
Haojun Liao 已提交
4346
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4347
    }
4348

H
Haojun Liao 已提交
4349
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
4350

4351 4352
    if (pQuery->limit.offset > blockInfo.rows) {
      pQuery->limit.offset -= blockInfo.rows;
H
hjxilinx 已提交
4353 4354
      pTableQueryInfo->lastKey = (QUERY_IS_ASC_QUERY(pQuery)) ? blockInfo.window.ekey : blockInfo.window.skey;
      pTableQueryInfo->lastKey += step;
4355

4356
      qDebug("QInfo:%p skip rows:%d, offset:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv), blockInfo.rows,
4357 4358
             pQuery->limit.offset);
    } else {  // find the appropriated start position in current block
4359 4360 4361
      updateOffsetVal(pRuntimeEnv, &blockInfo);
      break;
    }
4362
  }
H
Haojun Liao 已提交
4363 4364 4365 4366

  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }
4367
}
4368

H
Haojun Liao 已提交
4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418
static TSKEY doSkipIntervalProcess(SQueryRuntimeEnv* pRuntimeEnv, STimeWindow* win, SDataBlockInfo* pBlockInfo, STableQueryInfo* pTableQueryInfo) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;

  assert(pQuery->limit.offset == 0);
  STimeWindow tw = *win;
  getNextTimeWindow(pQuery, &tw);

  if ((tw.skey <= pBlockInfo->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (tw.ekey >= pBlockInfo->window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {

    // load the data block and check data remaining in current data block
    // TODO optimize performance
    SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
    SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

    tw = *win;
    int32_t startPos =
        getNextQualifiedWindow(pRuntimeEnv, &tw, pBlockInfo, pColInfoData->pData, binarySearchForKey, -1);
    assert(startPos >= 0);

    // set the abort info
    pQuery->pos = startPos;

    // reset the query start timestamp
    pTableQueryInfo->win.skey = ((TSKEY *)pColInfoData->pData)[startPos];
    pQuery->window.skey = pTableQueryInfo->win.skey;
    TSKEY key = pTableQueryInfo->win.skey;

    pWindowResInfo->prevSKey = tw.skey;
    int32_t index = pRuntimeEnv->windowResInfo.curIndex;

    int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, pBlockInfo, NULL, binarySearchForKey, pDataBlock);
    pRuntimeEnv->windowResInfo.curIndex = index;  // restore the window index

    qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%" PRId64,
           GET_QINFO_ADDR(pRuntimeEnv), pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows, numOfRes,
           pQuery->current->lastKey);

    return key;
  } else {  // do nothing
    pQuery->window.skey = tw.skey;
    pWindowResInfo->prevSKey = tw.skey;

    return tw.skey;
  }

  return true;
}

H
Haojun Liao 已提交
4419
static bool skipTimeInterval(SQueryRuntimeEnv *pRuntimeEnv, TSKEY* start) {
4420
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
4421
  *start = pQuery->current->lastKey;
4422

4423
  // if queried with value filter, do NOT forward query start position
4424
  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->pFillInfo != NULL) {
4425
    return true;
4426
  }
4427

4428
  /*
4429 4430
   * 1. for interval without interpolation query we forward pQuery->interval.interval at a time for
   *    pQuery->limit.offset times. Since hole exists, pQuery->interval.interval*pQuery->limit.offset value is
4431 4432
   *    not valid. otherwise, we only forward pQuery->limit.offset number of points
   */
4433
  assert(pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL);
4434

H
Haojun Liao 已提交
4435
  STimeWindow w = TSWINDOW_INITIALIZER;
4436

4437
  SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
hjxilinx 已提交
4438
  STableQueryInfo *pTableQueryInfo = pQuery->current;
4439

H
Haojun Liao 已提交
4440
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4441
  while (tsdbNextDataBlock(pRuntimeEnv->pQueryHandle)) {
H
Haojun Liao 已提交
4442
    tsdbRetrieveDataBlockInfo(pRuntimeEnv->pQueryHandle, &blockInfo);
4443

H
Haojun Liao 已提交
4444 4445
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
H
Haojun Liao 已提交
4446
        getAlignQueryTimeWindow(pQuery, blockInfo.window.skey, blockInfo.window.skey, pQuery->window.ekey, &w);
H
Haojun Liao 已提交
4447 4448 4449
        pWindowResInfo->startTime = w.skey;
        pWindowResInfo->prevSKey = w.skey;
      }
4450
    } else {
H
Haojun Liao 已提交
4451
      getAlignQueryTimeWindow(pQuery, blockInfo.window.ekey, pQuery->window.ekey, blockInfo.window.ekey, &w);
4452

4453 4454 4455
      pWindowResInfo->startTime = pQuery->window.skey;
      pWindowResInfo->prevSKey = w.skey;
    }
4456

4457 4458
    // the first time window
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, pWindowResInfo->prevSKey, pQuery);
4459

4460
    while (pQuery->limit.offset > 0) {
H
Haojun Liao 已提交
4461 4462
      STimeWindow tw = win;

4463 4464 4465 4466
      if ((win.ekey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (win.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        pQuery->limit.offset -= 1;
        pWindowResInfo->prevSKey = win.skey;
H
Haojun Liao 已提交
4467
      }
4468

H
Haojun Liao 已提交
4469 4470
      // current window does not ended in current data block, try next data block
      getNextTimeWindow(pQuery, &tw);
4471
      if (pQuery->limit.offset == 0) {
H
Haojun Liao 已提交
4472 4473
        *start = doSkipIntervalProcess(pRuntimeEnv, &win, &blockInfo, pTableQueryInfo);
        return true;
4474 4475
      }

H
Haojun Liao 已提交
4476 4477 4478 4479 4480 4481 4482
      /*
       * If the next time window still starts from current data block,
       * load the primary timestamp column first, and then find the start position for the next queried time window.
       * Note that only the primary timestamp column is required.
       * TODO: Optimize for this cases. All data blocks are not needed to be loaded, only if the first actually required
       * time window resides in current data block.
       */
4483 4484 4485 4486 4487
      if ((tw.skey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (tw.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
        SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

H
Haojun Liao 已提交
4488 4489 4490 4491 4492 4493
        if ((win.ekey > blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
            (win.ekey < blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
          pQuery->limit.offset -= 1;
        }

        if (pQuery->limit.offset == 0) {
H
Haojun Liao 已提交
4494 4495
          *start = doSkipIntervalProcess(pRuntimeEnv, &win, &blockInfo, pTableQueryInfo);
          return true;
H
Haojun Liao 已提交
4496 4497 4498 4499 4500
        } else {
          tw = win;
          int32_t startPos =
              getNextQualifiedWindow(pRuntimeEnv, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey, -1);
          assert(startPos >= 0);
4501

H
Haojun Liao 已提交
4502 4503 4504 4505 4506 4507
          // set the abort info
          pQuery->pos = startPos;
          pTableQueryInfo->lastKey = ((TSKEY *)pColInfoData->pData)[startPos];
          pWindowResInfo->prevSKey = tw.skey;
          win = tw;
        }
4508
      } else {
H
Haojun Liao 已提交
4509
        break;  // offset is not 0, and next time window begins or ends in the next block.
4510 4511 4512
      }
    }
  }
4513

H
Haojun Liao 已提交
4514 4515 4516 4517 4518
  // check for error
  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }

4519 4520 4521
  return true;
}

H
Haojun Liao 已提交
4522 4523
static void doDestroyTableQueryInfo(STableGroupInfo* pTableqinfoGroupInfo);

B
Bomin Zhang 已提交
4524
static int32_t setupQueryHandle(void* tsdb, SQInfo* pQInfo, bool isSTableQuery) {
B
Bomin Zhang 已提交
4525
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4526 4527
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

B
Bomin Zhang 已提交
4528
  if (onlyQueryTags(pQuery)) {
B
Bomin Zhang 已提交
4529
    return TSDB_CODE_SUCCESS;
B
Bomin Zhang 已提交
4530 4531
  }

H
Haojun Liao 已提交
4532
  if (isSTableQuery && (!QUERY_IS_INTERVAL_QUERY(pQuery)) && (!isFixedOutputQuery(pRuntimeEnv))) {
B
Bomin Zhang 已提交
4533
    return TSDB_CODE_SUCCESS;
B
Bomin Zhang 已提交
4534
  }
4535 4536

  STsdbQueryCond cond = {
B
Bomin Zhang 已提交
4537 4538 4539
    .order   = pQuery->order.order,
    .colList = pQuery->colList,
    .numOfCols = pQuery->numOfCols,
4540
  };
weixin_48148422's avatar
weixin_48148422 已提交
4541

S
TD-1057  
Shengliang Guan 已提交
4542 4543
  TIME_WINDOW_COPY(cond.twindow, pQuery->window);

B
Bomin Zhang 已提交
4544
  if (!isSTableQuery
4545
    && (pQInfo->tableqinfoGroupInfo.numOfTables == 1)
4546
    && (cond.order == TSDB_ORDER_ASC)
H
Haojun Liao 已提交
4547
    && (!QUERY_IS_INTERVAL_QUERY(pQuery))
B
Bomin Zhang 已提交
4548
    && (!isGroupbyNormalCol(pQuery->pGroupbyExpr))
H
Haojun Liao 已提交
4549
    && (!isFixedOutputQuery(pRuntimeEnv))
B
Bomin Zhang 已提交
4550
  ) {
H
Haojun Liao 已提交
4551
    SArray* pa = GET_TABLEGROUP(pQInfo, 0);
4552 4553
    STableQueryInfo* pCheckInfo = taosArrayGetP(pa, 0);
    cond.twindow = pCheckInfo->win;
4554
  }
B
Bomin Zhang 已提交
4555

B
Bomin Zhang 已提交
4556
  terrno = TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
4557
  if (isFirstLastRowQuery(pQuery)) {
H
Haojun Liao 已提交
4558
    pRuntimeEnv->pQueryHandle = tsdbQueryLastRow(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef);
H
Haojun Liao 已提交
4559

H
Haojun Liao 已提交
4560 4561 4562
    // update the query time window
    pQuery->window = cond.twindow;

H
Haojun Liao 已提交
4563
    if (pQInfo->tableGroupInfo.numOfTables == 0) {
H
Haojun Liao 已提交
4564
      pQInfo->tableqinfoGroupInfo.numOfTables = 0;
H
Haojun Liao 已提交
4565 4566 4567 4568
    } else {
      size_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
      for(int32_t i = 0; i < numOfGroups; ++i) {
        SArray *group = GET_TABLEGROUP(pQInfo, i);
H
Haojun Liao 已提交
4569

H
Haojun Liao 已提交
4570 4571 4572
        size_t t = taosArrayGetSize(group);
        for (int32_t j = 0; j < t; ++j) {
          STableQueryInfo *pCheckInfo = taosArrayGetP(group, j);
H
Haojun Liao 已提交
4573

H
Haojun Liao 已提交
4574 4575 4576
          pCheckInfo->win = pQuery->window;
          pCheckInfo->lastKey = pCheckInfo->win.skey;
        }
H
Haojun Liao 已提交
4577 4578
      }
    }
4579
  } else if (isPointInterpoQuery(pQuery)) {
H
Haojun Liao 已提交
4580
    pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef);
H
Haojun Liao 已提交
4581
  } else {
H
Haojun Liao 已提交
4582
    pRuntimeEnv->pQueryHandle = tsdbQueryTables(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef);
H
Haojun Liao 已提交
4583
  }
4584

B
Bomin Zhang 已提交
4585
  return terrno;
B
Bomin Zhang 已提交
4586 4587
}

H
Haojun Liao 已提交
4588
static SFillColInfo* createFillColInfo(SQuery* pQuery) {
H
Haojun Liao 已提交
4589
  int32_t numOfCols = getNumOfFinalResCol(pQuery);
4590
  int32_t offset = 0;
4591

4592
  SFillColInfo* pFillCol = calloc(numOfCols, sizeof(SFillColInfo));
H
Haojun Liao 已提交
4593 4594 4595 4596
  if (pFillCol == NULL) {
    return NULL;
  }

H
Haojun Liao 已提交
4597
  // TODO refactor
4598
  for(int32_t i = 0; i < numOfCols; ++i) {
H
Haojun Liao 已提交
4599
    SExprInfo* pExprInfo = (pQuery->pExpr2 == NULL)? &pQuery->pExpr1[i]:&pQuery->pExpr2[i];
4600

4601
    pFillCol[i].col.bytes  = pExprInfo->bytes;
S
TD-1057  
Shengliang Guan 已提交
4602
    pFillCol[i].col.type   = (int8_t)pExprInfo->type;
4603
    pFillCol[i].col.offset = offset;
H
Haojun Liao 已提交
4604
    pFillCol[i].tagIndex   = -2;
4605 4606
    pFillCol[i].flag       = TSDB_COL_NORMAL;    // always be ta normal column for table query
    pFillCol[i].functionId = pExprInfo->base.functionId;
4607
    pFillCol[i].fillVal.i = pQuery->fillVal[i];
4608

4609 4610
    offset += pExprInfo->bytes;
  }
4611

4612 4613 4614
  return pFillCol;
}

4615
int32_t doInitQInfo(SQInfo *pQInfo, STSBuf *pTsBuf, void *tsdb, int32_t vgId, bool isSTableQuery) {
4616 4617 4618
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;

  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
4619 4620 4621

  pRuntimeEnv->topBotQuery = isTopBottomQuery(pQuery);
  pRuntimeEnv->hasTagResults = hasTagValOutput(pQuery);
4622 4623

  setScanLimitationByResultBuffer(pQuery);
H
Haojun Liao 已提交
4624

H
Haojun Liao 已提交
4625
  int32_t code = setupQueryHandle(tsdb, pQInfo, isSTableQuery);
B
Bomin Zhang 已提交
4626 4627 4628
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }
4629

4630
  pQInfo->tsdb = tsdb;
4631
  pQInfo->vgId = vgId;
4632 4633

  pRuntimeEnv->pQuery = pQuery;
H
Haojun Liao 已提交
4634
  pRuntimeEnv->pTSBuf = pTsBuf;
4635
  pRuntimeEnv->cur.vgroupIndex = -1;
4636
  pRuntimeEnv->stableQuery = isSTableQuery;
H
Haojun Liao 已提交
4637
  pRuntimeEnv->prevGroupId = INT32_MIN;
H
Haojun Liao 已提交
4638
  pRuntimeEnv->groupbyNormalCol = isGroupbyNormalCol(pQuery->pGroupbyExpr);
4639

H
Haojun Liao 已提交
4640
  if (pTsBuf != NULL) {
4641 4642 4643 4644
    int16_t order = (pQuery->order.order == pRuntimeEnv->pTSBuf->tsOrder) ? TSDB_ORDER_ASC : TSDB_ORDER_DESC;
    tsBufSetTraverseOrder(pRuntimeEnv->pTSBuf, order);
  }

4645 4646 4647
  int32_t ps = DEFAULT_PAGE_SIZE;
  int32_t rowsize = 0;
  getIntermediateBufInfo(pRuntimeEnv, &ps, &rowsize);
H
Haojun Liao 已提交
4648
  int32_t TENMB = 1024*1024*10;
4649

H
Haojun Liao 已提交
4650
  if (isSTableQuery && !onlyQueryTags(pRuntimeEnv->pQuery)) {
H
Haojun Liao 已提交
4651
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rowsize, ps, TENMB, pQInfo);
4652 4653 4654 4655
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

H
Haojun Liao 已提交
4656
    if (!QUERY_IS_INTERVAL_QUERY(pQuery)) {
4657
      int16_t type = TSDB_DATA_TYPE_NULL;
4658
      int32_t threshold = 0;
4659

H
Haojun Liao 已提交
4660
      if (pRuntimeEnv->groupbyNormalCol) {  // group by columns not tags;
4661
        type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
4662
        threshold = 4000;
4663 4664
      } else {
        type = TSDB_DATA_TYPE_INT;  // group id
S
TD-1057  
Shengliang Guan 已提交
4665
        threshold = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
4666 4667 4668
        if (threshold < 8) {
          threshold = 8;
        }
4669 4670
      }

4671
      code = initWindowResInfo(&pRuntimeEnv->windowResInfo, 8, threshold, type);
B
Bomin Zhang 已提交
4672 4673 4674
      if (code != TSDB_CODE_SUCCESS) {
        return code;
      }
4675
    }
H
Haojun Liao 已提交
4676
  } else if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery) || (!isSTableQuery)) {
4677 4678
    int32_t numOfResultRows = getInitialPageNum(pQInfo);
    getIntermediateBufInfo(pRuntimeEnv, &ps, &rowsize);
H
Haojun Liao 已提交
4679
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rowsize, ps, TENMB, pQInfo);
4680 4681 4682 4683 4684
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

    int16_t type = TSDB_DATA_TYPE_NULL;
H
Haojun Liao 已提交
4685
    if (pRuntimeEnv->groupbyNormalCol) {
4686 4687 4688 4689 4690
      type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
    } else {
      type = TSDB_DATA_TYPE_TIMESTAMP;
    }

4691
    code = initWindowResInfo(&pRuntimeEnv->windowResInfo, numOfResultRows, 1024, type);
B
Bomin Zhang 已提交
4692 4693 4694
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }
4695 4696
  }

H
Haojun Liao 已提交
4697 4698 4699 4700 4701 4702
  // create runtime environment
  code = setupQueryRuntimeEnv(pRuntimeEnv, pQuery->order.order);
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }

4703
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
H
Haojun Liao 已提交
4704
    SFillColInfo* pColInfo = createFillColInfo(pQuery);
H
Haojun Liao 已提交
4705 4706 4707 4708 4709 4710
    STimeWindow w = TSWINDOW_INITIALIZER;

    TSKEY sk = MIN(pQuery->window.skey, pQuery->window.ekey);
    TSKEY ek = MAX(pQuery->window.skey, pQuery->window.ekey);
    getAlignQueryTimeWindow(pQuery, pQuery->window.skey, sk, ek, &w);

H
Haojun Liao 已提交
4711
    int32_t numOfCols = getNumOfFinalResCol(pQuery);
H
Haojun Liao 已提交
4712
    pRuntimeEnv->pFillInfo = taosInitFillInfo(pQuery->order.order, w.skey, 0, (int32_t)pQuery->rec.capacity, numOfCols,
4713
                                              pQuery->interval.sliding, pQuery->interval.slidingUnit, (int8_t)pQuery->precision,
H
Haojun Liao 已提交
4714
                                              pQuery->fillType, pColInfo, pQInfo);
4715
  }
4716

H
Haojun Liao 已提交
4717
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
4718
  return TSDB_CODE_SUCCESS;
4719 4720
}

4721
static void enableExecutionForNextTable(SQueryRuntimeEnv *pRuntimeEnv) {
4722
  SQuery *pQuery = pRuntimeEnv->pQuery;
4723

4724
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
4725
    SResultRowCellInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[i]);
4726 4727 4728 4729 4730 4731
    if (pResInfo != NULL) {
      pResInfo->complete = false;
    }
  }
}

H
Haojun Liao 已提交
4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748
static FORCE_INLINE void setEnvForEachBlock(SQInfo* pQInfo, STableQueryInfo* pTableQueryInfo, SDataBlockInfo* pBlockInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

  if (!QUERY_IS_INTERVAL_QUERY(pQuery)) {
    setExecutionContext(pQInfo, pTableQueryInfo->groupIndex, pBlockInfo->window.ekey + step);
  } else {  // interval query
    TSKEY nextKey = pBlockInfo->window.skey;
    setIntervalQueryRange(pQInfo, nextKey);

    if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
      setAdditionalInfo(pQInfo, pTableQueryInfo->pTable, pTableQueryInfo);
    }
  }
}

H
Haojun Liao 已提交
4749
static int64_t scanMultiTableDataBlocks(SQInfo *pQInfo) {
4750
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
Haojun Liao 已提交
4751 4752
  SQuery*           pQuery = pRuntimeEnv->pQuery;
  SQueryCostInfo*   summary  = &pRuntimeEnv->summary;
H
Haojun Liao 已提交
4753

H
hjxilinx 已提交
4754
  int64_t st = taosGetTimestampMs();
4755

4756
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
H
Haojun Liao 已提交
4757
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4758

H
Haojun Liao 已提交
4759 4760
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

H
Haojun Liao 已提交
4761
  while (tsdbNextDataBlock(pQueryHandle)) {
4762
    summary->totalBlocks += 1;
H
Haojun Liao 已提交
4763

H
Haojun Liao 已提交
4764
    if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
4765
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4766
    }
4767

H
Haojun Liao 已提交
4768
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
H
Haojun Liao 已提交
4769 4770 4771 4772
    STableQueryInfo **pTableQueryInfo = (STableQueryInfo**) taosHashGet(pQInfo->tableqinfoGroupInfo.map, &blockInfo.tid, sizeof(blockInfo.tid));
    if(pTableQueryInfo == NULL) {
      break;
    }
4773

H
Haojun Liao 已提交
4774
    pQuery->current = *pTableQueryInfo;
H
Haojun Liao 已提交
4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      assert(
          ((*pTableQueryInfo)->win.skey <= (*pTableQueryInfo)->win.ekey) &&
          ((*pTableQueryInfo)->lastKey >= (*pTableQueryInfo)->win.skey) &&
          ((*pTableQueryInfo)->win.skey >= pQuery->window.skey && (*pTableQueryInfo)->win.ekey <= pQuery->window.ekey));
    } else {
      assert(
          ((*pTableQueryInfo)->win.skey >= (*pTableQueryInfo)->win.ekey) &&
          ((*pTableQueryInfo)->lastKey <= (*pTableQueryInfo)->win.skey) &&
          ((*pTableQueryInfo)->win.skey <= pQuery->window.skey && (*pTableQueryInfo)->win.ekey >= pQuery->window.ekey));
    }
4786

H
Haojun Liao 已提交
4787
    if (!pRuntimeEnv->groupbyNormalCol) {
H
Haojun Liao 已提交
4788
      setEnvForEachBlock(pQInfo, *pTableQueryInfo, &blockInfo);
4789
    }
4790

4791
    uint32_t     status = 0;
H
Haojun Liao 已提交
4792 4793
    SDataStatis *pStatis = NULL;
    SArray      *pDataBlock = NULL;
4794

H
Haojun Liao 已提交
4795
    int32_t ret = loadDataBlockOnDemand(pRuntimeEnv, &pQuery->current->windowResInfo, pQueryHandle, &blockInfo, &pStatis, &pDataBlock, &status);
4796 4797 4798 4799 4800
    if (ret != TSDB_CODE_SUCCESS) {
      break;
    }

    if (status == BLK_DATA_DISCARD) {
H
Haojun Liao 已提交
4801
      pQuery->current->lastKey = QUERY_IS_ASC_QUERY(pQuery)? blockInfo.window.ekey + step : blockInfo.window.skey + step;
H
Haojun Liao 已提交
4802 4803 4804
      continue;
    }

4805 4806
    summary->totalRows += blockInfo.rows;
    stableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, pDataBlock, binarySearchForKey);
H
Haojun Liao 已提交
4807

H
Haojun Liao 已提交
4808 4809 4810 4811
    qDebug("QInfo:%p check data block completed, uid:%"PRId64", tid:%d, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, "
           "lastKey:%" PRId64,
           pQInfo, blockInfo.uid, blockInfo.tid, blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows,
           pQuery->current->lastKey);
4812
  }
4813

H
Haojun Liao 已提交
4814 4815 4816 4817
  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }

H
Haojun Liao 已提交
4818 4819
  updateWindowResNumOfRes(pRuntimeEnv);

H
hjxilinx 已提交
4820 4821
  int64_t et = taosGetTimestampMs();
  return et - st;
4822 4823
}

4824 4825
static bool multiTableMultioutputHelper(SQInfo *pQInfo, int32_t index) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4826
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4827

4828
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
H
Haojun Liao 已提交
4829
  SArray *group = GET_TABLEGROUP(pQInfo, 0);
4830
  STableQueryInfo* pCheckInfo = taosArrayGetP(group, index);
4831

H
Haojun Liao 已提交
4832 4833 4834
  if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
    setTagVal(pRuntimeEnv, pCheckInfo->pTable, pQInfo->tsdb);
  }
4835

H
Haojun Liao 已提交
4836
  STableId* id = TSDB_TABLEID(pCheckInfo->pTable);
4837
  qDebug("QInfo:%p query on (%d): uid:%" PRIu64 ", tid:%d, qrange:%" PRId64 "-%" PRId64, pQInfo, index,
H
Haojun Liao 已提交
4838
         id->uid, id->tid, pCheckInfo->lastKey, pCheckInfo->win.ekey);
4839

4840
  STsdbQueryCond cond = {
4841
      .twindow   = {pCheckInfo->lastKey, pCheckInfo->win.ekey},
H
hjxilinx 已提交
4842 4843
      .order     = pQuery->order.order,
      .colList   = pQuery->colList,
4844
      .numOfCols = pQuery->numOfCols,
4845
  };
4846

H
hjxilinx 已提交
4847
  // todo refactor
4848
  SArray *g1 = taosArrayInit(1, POINTER_BYTES);
H
Haojun Liao 已提交
4849 4850 4851 4852
  SArray *tx = taosArrayInit(1, sizeof(STableKeyInfo));

  STableKeyInfo info = {.pTable = pCheckInfo->pTable, .lastKey = pCheckInfo->lastKey};
  taosArrayPush(tx, &info);
4853

4854
  taosArrayPush(g1, &tx);
4855
  STableGroupInfo gp = {.numOfTables = 1, .pGroupList = g1};
4856

4857
  // include only current table
4858 4859 4860 4861
  if (pRuntimeEnv->pQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
    pRuntimeEnv->pQueryHandle = NULL;
  }
4862

H
Haojun Liao 已提交
4863
  pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo, &pQInfo->memRef);
4864 4865
  taosArrayDestroy(tx);
  taosArrayDestroy(g1);
B
Bomin Zhang 已提交
4866 4867 4868
  if (pRuntimeEnv->pQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
4869

4870
  if (pRuntimeEnv->pTSBuf != NULL) {
H
Haojun Liao 已提交
4871 4872
      tVariant* pTag = &pRuntimeEnv->pCtx[0].tag;

4873
    if (pRuntimeEnv->cur.vgroupIndex == -1) {
H
Haojun Liao 已提交
4874
      STSElem elem = tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, pQInfo->vgId, pTag);
H
Haojun Liao 已提交
4875
      // failed to find data with the specified tag value and vnodeId
4876
      if (!tsBufIsValidElem(&elem)) {
H
Haojun Liao 已提交
4877 4878 4879 4880 4881 4882
        if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
          qError("QInfo:%p failed to find tag:%s in ts_comp", pQInfo, pTag->pz);
        } else {
          qError("QInfo:%p failed to find tag:%"PRId64" in ts_comp", pQInfo, pTag->i64Key);
        }

4883
        return false;
H
Haojun Liao 已提交
4884 4885
      } else {
        STSCursor cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);
H
Haojun Liao 已提交
4886 4887 4888 4889 4890 4891 4892 4893

        if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
          qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz,
                 cur.blockIndex, cur.tsIndex);
        } else {
          qDebug("QInfo:%p find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key,
                 cur.blockIndex, cur.tsIndex);
        }
4894 4895
      }
    } else {
H
Haojun Liao 已提交
4896
      STSElem elem = tsBufGetElem(pRuntimeEnv->pTSBuf);
H
Haojun Liao 已提交
4897
      if (tVariantCompare(elem.tag, &pRuntimeEnv->pCtx[0].tag) != 0) {
H
Haojun Liao 已提交
4898

H
Haojun Liao 已提交
4899
        STSElem elem1 = tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, pQInfo->vgId, pTag);
H
Haojun Liao 已提交
4900
        // failed to find data with the specified tag value and vnodeId
4901
        if (!tsBufIsValidElem(&elem1)) {
H
Haojun Liao 已提交
4902 4903 4904 4905 4906
          if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
            qError("QInfo:%p failed to find tag:%s in ts_comp", pQInfo, pTag->pz);
          } else {
            qError("QInfo:%p failed to find tag:%"PRId64" in ts_comp", pQInfo, pTag->i64Key);
          }
H
Haojun Liao 已提交
4907

H
Haojun Liao 已提交
4908
          return false;
H
Haojun Liao 已提交
4909 4910
        } else {
          STSCursor cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);
H
Haojun Liao 已提交
4911 4912 4913 4914 4915
          if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
            qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, cur.blockIndex, cur.tsIndex);
          } else {
            qDebug("QInfo:%p find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key, cur.blockIndex, cur.tsIndex);
          }
H
Haojun Liao 已提交
4916
        }
H
Haojun Liao 已提交
4917

H
Haojun Liao 已提交
4918 4919
      } else {
        tsBufSetCursor(pRuntimeEnv->pTSBuf, &pRuntimeEnv->cur);
H
Haojun Liao 已提交
4920
        STSCursor cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);
H
Haojun Liao 已提交
4921 4922 4923 4924 4925
        if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
          qDebug("QInfo:%p continue scan ts_comp file, tag:%s blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, cur.blockIndex, cur.tsIndex);
        } else {
          qDebug("QInfo:%p continue scan ts_comp file, tag:%"PRId64" blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key, cur.blockIndex, cur.tsIndex);
        }
H
Haojun Liao 已提交
4926
      }
4927 4928
    }
  }
4929

4930
  initCtxOutputBuf(pRuntimeEnv);
4931 4932 4933 4934 4935 4936 4937 4938 4939 4940
  return true;
}

/**
 * super table query handler
 * 1. super table projection query, group-by on normal columns query, ts-comp query
 * 2. point interpolation query, last row query
 *
 * @param pQInfo
 */
4941
static void sequentialTableProcess(SQInfo *pQInfo) {
4942
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4943
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4944
  setQueryStatus(pQuery, QUERY_COMPLETED);
4945

H
Haojun Liao 已提交
4946
  size_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
4947

4948
  if (isPointInterpoQuery(pQuery)) {
H
Haojun Liao 已提交
4949
    resetDefaultResInfoOutputBuf(pRuntimeEnv);
4950
    assert(pQuery->limit.offset == 0 && pQuery->limit.limit != 0);
4951

4952
    while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
4953
      SArray* group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex);
4954

S
TD-1057  
Shengliang Guan 已提交
4955
      qDebug("QInfo:%p last_row query on group:%d, total group:%" PRIzu ", current group:%p", pQInfo, pQInfo->groupIndex,
dengyihao's avatar
dengyihao 已提交
4956
             numOfGroups, group);
H
Haojun Liao 已提交
4957 4958 4959 4960 4961 4962 4963

      STsdbQueryCond cond = {
          .colList = pQuery->colList,
          .order   = pQuery->order.order,
          .numOfCols = pQuery->numOfCols,
      };

S
TD-1057  
Shengliang Guan 已提交
4964 4965
      TIME_WINDOW_COPY(cond.twindow, pQuery->window);

H
Haojun Liao 已提交
4966 4967 4968
      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);
4969

H
Haojun Liao 已提交
4970 4971 4972 4973 4974 4975 4976
      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }
4977

H
Haojun Liao 已提交
4978
      pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(pQInfo->tsdb, &cond, &gp, pQInfo, &pQInfo->memRef);
B
Bomin Zhang 已提交
4979 4980 4981 4982 4983 4984

      taosArrayDestroy(tx);
      taosArrayDestroy(g1);
      if (pRuntimeEnv->pQueryHandle == NULL) {
        longjmp(pRuntimeEnv->env, terrno);
      }
H
Haojun Liao 已提交
4985

H
Haojun Liao 已提交
4986
      initCtxOutputBuf(pRuntimeEnv);
4987

4988
      SArray* s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle);
4989
      assert(taosArrayGetSize(s) >= 1);
4990

4991
      setTagVal(pRuntimeEnv, taosArrayGetP(s, 0), pQInfo->tsdb);
dengyihao's avatar
dengyihao 已提交
4992
      taosArrayDestroy(s);
H
Haojun Liao 已提交
4993

H
Haojun Liao 已提交
4994
      // here we simply set the first table as current table
4995 4996 4997
      SArray* first = GET_TABLEGROUP(pQInfo, pQInfo->groupIndex);
      pQuery->current = taosArrayGetP(first, 0);

4998
      scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
4999

H
Haojun Liao 已提交
5000 5001 5002 5003 5004
      int64_t numOfRes = getNumOfResult(pRuntimeEnv);
      if (numOfRes > 0) {
        pQuery->rec.rows += numOfRes;
        forwardCtxOutputBuf(pRuntimeEnv, numOfRes);
      }
5005

H
Haojun Liao 已提交
5006 5007 5008 5009 5010
      skipResults(pRuntimeEnv);
      pQInfo->groupIndex += 1;

      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
5011 5012 5013 5014 5015 5016

      if (pQuery->rec.rows >= pQuery->rec.capacity) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
        break;
      }
    }
H
Haojun Liao 已提交
5017
  } else if (pRuntimeEnv->groupbyNormalCol) { // group-by on normal columns query
5018
    while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
5019
      SArray* group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex);
5020

S
TD-1057  
Shengliang Guan 已提交
5021
      qDebug("QInfo:%p group by normal columns group:%d, total group:%" PRIzu "", pQInfo, pQInfo->groupIndex, numOfGroups);
5022 5023 5024 5025 5026 5027 5028

      STsdbQueryCond cond = {
          .colList = pQuery->colList,
          .order   = pQuery->order.order,
          .numOfCols = pQuery->numOfCols,
      };

S
TD-1057  
Shengliang Guan 已提交
5029 5030
      TIME_WINDOW_COPY(cond.twindow, pQuery->window);

5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042
      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);

      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }

H
Haojun Liao 已提交
5043
      // no need to update the lastkey for each table
H
Haojun Liao 已提交
5044
      pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo, &pQInfo->memRef);
H
Haojun Liao 已提交
5045

B
Bomin Zhang 已提交
5046 5047
      taosArrayDestroy(g1);
      taosArrayDestroy(tx);
B
Bomin Zhang 已提交
5048 5049 5050
      if (pRuntimeEnv->pQueryHandle == NULL) {
        longjmp(pRuntimeEnv->env, terrno);
      }
5051

5052
      SArray* s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle);
5053 5054
      assert(taosArrayGetSize(s) >= 1);

5055
      setTagVal(pRuntimeEnv, taosArrayGetP(s, 0), pQInfo->tsdb);
5056 5057 5058 5059 5060 5061 5062 5063

      // here we simply set the first table as current table
      scanMultiTableDataBlocks(pQInfo);
      pQInfo->groupIndex += 1;

      SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;

        // no results generated for current group, continue to try the next group
5064
      taosArrayDestroy(s);
5065 5066 5067 5068 5069
      if (pWindowResInfo->size <= 0) {
        continue;
      }

      for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
5070
        pWindowResInfo->pResult[i]->closed = true; // enable return all results for group by normal columns
5071

H
Haojun Liao 已提交
5072
        SResultRow *pResult = pWindowResInfo->pResult[i];
5073
        for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
5074 5075
          SResultRowCellInfo* pCell = getResultCell(pRuntimeEnv, pResult, j);
          pResult->numOfRows = (uint16_t)(MAX(pResult->numOfRows, pCell->numOfRes));
5076 5077 5078
        }
      }

5079
      qDebug("QInfo:%p generated groupby columns results %d rows for group %d completed", pQInfo, pWindowResInfo->size,
5080 5081 5082 5083 5084 5085 5086
          pQInfo->groupIndex);
      int32_t currentGroupIndex = pQInfo->groupIndex;

      pQuery->rec.rows = 0;
      pQInfo->groupIndex = 0;

      ensureOutputBufferSimple(pRuntimeEnv, pWindowResInfo->size);
5087
      copyFromWindowResToSData(pQInfo, pWindowResInfo);
5088 5089 5090 5091 5092 5093

      pQInfo->groupIndex = currentGroupIndex;  //restore the group index
      assert(pQuery->rec.rows == pWindowResInfo->size);

      clearClosedTimeWindow(pRuntimeEnv);
      break;
5094 5095 5096
    }
  } else {
    /*
5097
     * 1. super table projection query, 2. ts-comp query
5098 5099 5100
     * if the subgroup index is larger than 0, results generated by group by tbname,k is existed.
     * we need to return it to client in the first place.
     */
5101
    if (pQInfo->groupIndex > 0) {
5102
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5103
      pQuery->rec.total += pQuery->rec.rows;
5104

5105
      if (pQuery->rec.rows > 0) {
5106 5107 5108
        return;
      }
    }
5109

5110
    // all data have returned already
5111
    if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
5112 5113
      return;
    }
5114

H
Haojun Liao 已提交
5115
    resetDefaultResInfoOutputBuf(pRuntimeEnv);
5116
    resetTimeWindowInfo(pRuntimeEnv, &pRuntimeEnv->windowResInfo);
5117

H
Haojun Liao 已提交
5118
    SArray *group = GET_TABLEGROUP(pQInfo, 0);
5119 5120
    assert(taosArrayGetSize(group) == pQInfo->tableqinfoGroupInfo.numOfTables &&
           1 == taosArrayGetSize(pQInfo->tableqinfoGroupInfo.pGroupList));
5121

5122
    while (pQInfo->tableIndex < pQInfo->tableqinfoGroupInfo.numOfTables) {
H
Haojun Liao 已提交
5123
      if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
5124
        longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
5125
      }
5126

5127
      pQuery->current = taosArrayGetP(group, pQInfo->tableIndex);
5128
      if (!multiTableMultioutputHelper(pQInfo, pQInfo->tableIndex)) {
5129
        pQInfo->tableIndex++;
5130 5131
        continue;
      }
5132

H
hjxilinx 已提交
5133
      // TODO handle the limit offset problem
5134
      if (pQuery->numOfFilterCols == 0 && pQuery->limit.offset > 0) {
5135 5136
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
          pQInfo->tableIndex++;
5137 5138 5139
          continue;
        }
      }
5140

5141
      scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
5142
      skipResults(pRuntimeEnv);
5143

5144
      // the limitation of output result is reached, set the query completed
5145
      if (limitResults(pRuntimeEnv)) {
H
Haojun Liao 已提交
5146
        SET_STABLE_QUERY_OVER(pQInfo);
5147 5148
        break;
      }
5149

5150 5151
      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
5152

5153
      if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
5154 5155 5156 5157 5158 5159
        /*
         * query range is identical in terms of all meters involved in query,
         * so we need to restore them at the *beginning* of query on each meter,
         * not the consecutive query on meter on which is aborted due to buffer limitation
         * to ensure that, we can reset the query range once query on a meter is completed.
         */
5160
        pQInfo->tableIndex++;
weixin_48148422's avatar
weixin_48148422 已提交
5161

H
Haojun Liao 已提交
5162
        STableIdInfo tidInfo = {0};
5163

H
Haojun Liao 已提交
5164 5165 5166
        STableId* id = TSDB_TABLEID(pQuery->current->pTable);
        tidInfo.uid = id->uid;
        tidInfo.tid = id->tid;
weixin_48148422's avatar
weixin_48148422 已提交
5167
        tidInfo.key = pQuery->current->lastKey;
weixin_48148422's avatar
weixin_48148422 已提交
5168 5169
        taosArrayPush(pQInfo->arrTableIdInfo, &tidInfo);

5170
        // if the buffer is full or group by each table, we need to jump out of the loop
H
Haojun Liao 已提交
5171
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
5172 5173
          break;
        }
5174

H
Haojun Liao 已提交
5175 5176 5177 5178
        if (pRuntimeEnv->pTSBuf != NULL) {
          pRuntimeEnv->cur = pRuntimeEnv->pTSBuf->cur;
        }

5179
      } else {
5180
        // all data in the result buffer are skipped due to the offset, continue to retrieve data from current meter
5181 5182
        if (pQuery->rec.rows == 0) {
          assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
5183 5184
          continue;
        } else {
5185 5186 5187
          // buffer is full, wait for the next round to retrieve data from current meter
          assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
          break;
5188 5189 5190
        }
      }
    }
H
Haojun Liao 已提交
5191

5192
    if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
H
Haojun Liao 已提交
5193 5194
      setQueryStatus(pQuery, QUERY_COMPLETED);
    }
5195
  }
5196

5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208
  /*
   * 1. super table projection query, group-by on normal columns query, ts-comp query
   * 2. point interpolation query, last row query
   *
   * group-by on normal columns query and last_row query do NOT invoke the finalizer here,
   * since the finalize stage will be done at the client side.
   *
   * projection query, point interpolation query do not need the finalizer.
   *
   * Only the ts-comp query requires the finalizer function to be executed here.
   */
  if (isTSCompQuery(pQuery)) {
H
hjxilinx 已提交
5209
    finalizeQueryResult(pRuntimeEnv);
5210
  }
5211

5212 5213 5214
  if (pRuntimeEnv->pTSBuf != NULL) {
    pRuntimeEnv->cur = pRuntimeEnv->pTSBuf->cur;
  }
5215

5216
  qDebug(
S
TD-1530  
Shengliang Guan 已提交
5217 5218
      "QInfo %p numOfTables:%" PRIu64 ", index:%d, numOfGroups:%" PRIzu ", %" PRId64 " points returned, total:%" PRId64 ", offset:%" PRId64,
      pQInfo, (uint64_t)pQInfo->tableqinfoGroupInfo.numOfTables, pQInfo->tableIndex, numOfGroups, pQuery->rec.rows, pQuery->rec.total,
5219
      pQuery->limit.offset);
5220 5221
}

5222 5223 5224 5225
static void doSaveContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

5226 5227 5228
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
  SWITCH_ORDER(pQuery->order.order);
5229

5230
  if (pRuntimeEnv->pTSBuf != NULL) {
H
Haojun Liao 已提交
5231
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
5232
  }
5233

5234 5235 5236 5237 5238
  STsdbQueryCond cond = {
      .order   = pQuery->order.order,
      .colList = pQuery->colList,
      .numOfCols = pQuery->numOfCols,
  };
H
Haojun Liao 已提交
5239

S
TD-1057  
Shengliang Guan 已提交
5240 5241
  TIME_WINDOW_COPY(cond.twindow, pQuery->window);

5242 5243 5244 5245
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
H
Haojun Liao 已提交
5246

H
Haojun Liao 已提交
5247 5248 5249 5250 5251
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
  disableFuncInReverseScan(pQInfo);
  setupQueryRangeForReverseScan(pQInfo);

H
Haojun Liao 已提交
5252
  pRuntimeEnv->prevGroupId = INT32_MIN;
H
Haojun Liao 已提交
5253
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef);
B
Bomin Zhang 已提交
5254 5255 5256
  if (pRuntimeEnv->pSecQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
H
hjxilinx 已提交
5257 5258
}

5259 5260 5261 5262
static void doRestoreContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

H
hjxilinx 已提交
5263
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
5264

5265
  if (pRuntimeEnv->pTSBuf != NULL) {
5266
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
5267
  }
5268

5269
  switchCtxOrder(pRuntimeEnv);
5270 5271 5272
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
}

5273 5274 5275
static void doCloseAllTimeWindowAfterScan(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
Haojun Liao 已提交
5276
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
H
Haojun Liao 已提交
5277
    size_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
5278
    for (int32_t i = 0; i < numOfGroup; ++i) {
H
Haojun Liao 已提交
5279
      SArray *group = GET_TABLEGROUP(pQInfo, i);
5280

5281
      size_t num = taosArrayGetSize(group);
5282
      for (int32_t j = 0; j < num; ++j) {
5283 5284
        STableQueryInfo* item = taosArrayGetP(group, j);
        closeAllTimeWindow(&item->windowResInfo);
5285
      }
H
hjxilinx 已提交
5286 5287 5288 5289 5290 5291 5292
    }
  } else {  // close results for group result
    closeAllTimeWindow(&pQInfo->runtimeEnv.windowResInfo);
  }
}

static void multiTableQueryProcess(SQInfo *pQInfo) {
5293 5294 5295
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

5296
  if (pQInfo->groupIndex > 0) {
5297
    /*
5298
     * if the groupIndex > 0, the query process must be completed yet, we only need to
5299 5300
     * copy the data into output buffer
     */
H
Haojun Liao 已提交
5301
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
5302 5303
      copyResToQueryResultBuf(pQInfo, pQuery);
#ifdef _DEBUG_VIEW
5304
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
5305 5306
#endif
    } else {
5307
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5308
    }
5309

5310
    qDebug("QInfo:%p current:%"PRId64", total:%"PRId64"", pQInfo, pQuery->rec.rows, pQuery->rec.total);
5311 5312
    return;
  }
5313

5314
  qDebug("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", order:%d, forward scan start", pQInfo,
5315 5316
         pQuery->window.skey, pQuery->window.ekey, pQuery->order.order);

H
hjxilinx 已提交
5317
  // do check all qualified data blocks
H
Haojun Liao 已提交
5318
  int64_t el = scanMultiTableDataBlocks(pQInfo);
5319
  qDebug("QInfo:%p master scan completed, elapsed time: %" PRId64 "ms, reverse scan start", pQInfo, el);
5320

H
hjxilinx 已提交
5321
  // query error occurred or query is killed, abort current execution
H
Haojun Liao 已提交
5322
  if (pQInfo->code != TSDB_CODE_SUCCESS || IS_QUERY_KILLED(pQInfo)) {
5323
    qDebug("QInfo:%p query killed or error occurred, code:%s, abort", pQInfo, tstrerror(pQInfo->code));
H
Haojun Liao 已提交
5324
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
5325
  }
5326

H
hjxilinx 已提交
5327 5328
  // close all time window results
  doCloseAllTimeWindowAfterScan(pQInfo);
5329

H
hjxilinx 已提交
5330 5331
  if (needReverseScan(pQuery)) {
    doSaveContext(pQInfo);
5332

H
Haojun Liao 已提交
5333
    el = scanMultiTableDataBlocks(pQInfo);
5334
    qDebug("QInfo:%p reversed scan completed, elapsed time: %" PRId64 "ms", pQInfo, el);
5335

H
Haojun Liao 已提交
5336
    doRestoreContext(pQInfo);
H
hjxilinx 已提交
5337
  } else {
5338
    qDebug("QInfo:%p no need to do reversed scan, query completed", pQInfo);
5339
  }
5340

5341
  setQueryStatus(pQuery, QUERY_COMPLETED);
5342

H
Haojun Liao 已提交
5343
  if (pQInfo->code != TSDB_CODE_SUCCESS || IS_QUERY_KILLED(pQInfo)) {
5344
    qDebug("QInfo:%p query killed or error occurred, code:%s, abort", pQInfo, tstrerror(pQInfo->code));
H
Haojun Liao 已提交
5345
    //TODO finalizeQueryResult may cause SEGSEV, since the memory may not allocated yet, add a cleanup function instead
H
Haojun Liao 已提交
5346
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
H
hjxilinx 已提交
5347
  }
5348

H
Haojun Liao 已提交
5349
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || isSumAvgRateQuery(pQuery)) {
5350
    if (mergeIntoGroupResult(pQInfo) == TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
5351
      copyResToQueryResultBuf(pQInfo, pQuery);
5352 5353

#ifdef _DEBUG_VIEW
5354
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
5355 5356 5357
#endif
    }
  } else {  // not a interval query
5358
    copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5359
  }
5360

5361
  // handle the limitation of output buffer
5362
  qDebug("QInfo:%p points returned:%" PRId64 ", total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
5363 5364
}

H
Haojun Liao 已提交
5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382

static char *getArithemicInputSrc(void *param, const char *name, int32_t colId) {
  SArithmeticSupport *pSupport = (SArithmeticSupport *) param;
  SExprInfo* pExprInfo = (SExprInfo*) pSupport->exprList;

  int32_t index = -1;
  for (int32_t i = 0; i < pSupport->numOfCols; ++i) {
    if (colId == pExprInfo[i].base.resColId) {
      index = i;
      break;
    }
  }

  assert(index >= 0 && index < pSupport->numOfCols);
  return pSupport->data[index] + pSupport->offset * pExprInfo[index].bytes;
}

static void doSecondaryArithmeticProcess(SQuery* pQuery) {
H
Haojun Liao 已提交
5383 5384 5385
  if (pQuery->numOfExpr2 == 0) {
    return;
  }
H
Haojun Liao 已提交
5386

H
Haojun Liao 已提交
5387
  SArithmeticSupport arithSup = {0};
H
Haojun Liao 已提交
5388 5389 5390 5391 5392 5393
  tFilePage **data = calloc(pQuery->numOfExpr2, POINTER_BYTES);
  for (int32_t i = 0; i < pQuery->numOfExpr2; ++i) {
    int32_t bytes = pQuery->pExpr2[i].bytes;
    data[i] = (tFilePage *)malloc(bytes * pQuery->rec.rows + sizeof(tFilePage));
  }

H
Haojun Liao 已提交
5394 5395 5396 5397
  arithSup.offset = 0;
  arithSup.numOfCols = (int32_t)pQuery->numOfOutput;
  arithSup.exprList  = pQuery->pExpr1;
  arithSup.data      = calloc(arithSup.numOfCols, POINTER_BYTES);
H
Haojun Liao 已提交
5398

H
Haojun Liao 已提交
5399 5400
  for (int32_t k = 0; k < arithSup.numOfCols; ++k) {
    arithSup.data[k] = pQuery->sdata[k]->data;
H
Haojun Liao 已提交
5401 5402 5403 5404 5405 5406 5407 5408 5409 5410
  }

  for (int i = 0; i < pQuery->numOfExpr2; ++i) {
    SExprInfo *pExpr = &pQuery->pExpr2[i];

    // calculate the result from several other columns
    SSqlFuncMsg* pSqlFunc = &pExpr->base;
    if (pSqlFunc->functionId != TSDB_FUNC_ARITHM) {

      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
5411 5412 5413
        if (pSqlFunc->functionId == pQuery->pExpr1[j].base.functionId &&
            pSqlFunc->colInfo.colId == pQuery->pExpr1[j].base.colInfo.colId) {
          memcpy(data[i]->data, pQuery->sdata[j]->data, pQuery->pExpr1[j].bytes * pQuery->rec.rows);
H
Haojun Liao 已提交
5414 5415 5416 5417
          break;
        }
      }
    } else {
H
Haojun Liao 已提交
5418 5419
      arithSup.pArithExpr = pExpr;
      tExprTreeCalcTraverse(arithSup.pArithExpr->pExpr, (int32_t)pQuery->rec.rows, data[i]->data, &arithSup, TSDB_ORDER_ASC,
H
Haojun Liao 已提交
5420 5421 5422 5423 5424 5425 5426 5427
                            getArithemicInputSrc);
    }
  }

  for (int32_t i = 0; i < pQuery->numOfExpr2; ++i) {
    memcpy(pQuery->sdata[i]->data, data[i]->data, pQuery->pExpr2[i].bytes * pQuery->rec.rows);
  }

H
Haojun Liao 已提交
5428 5429 5430 5431 5432 5433
  for (int32_t i = 0; i < pQuery->numOfExpr2; ++i) {
    tfree(data[i]);
  }

  tfree(data);
  tfree(arithSup.data);
H
Haojun Liao 已提交
5434 5435
}

5436 5437 5438 5439 5440 5441
/*
 * in each query, this function will be called only once, no retry for further result.
 *
 * select count(*)/top(field,k)/avg(field name) from table_name [where ts>now-1a];
 * select count(*) from table_name group by status_column;
 */
H
hjxilinx 已提交
5442
static void tableFixedOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
5443
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
5444

H
hjxilinx 已提交
5445
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
5446
  if (!pRuntimeEnv->topBotQuery && pQuery->limit.offset > 0) {  // no need to execute, since the output will be ignore.
H
Haojun Liao 已提交
5447 5448
    return;
  }
5449

H
hjxilinx 已提交
5450
  pQuery->current = pTableInfo;  // set current query table info
5451

5452
  scanOneTableDataBlocks(pRuntimeEnv, pTableInfo->lastKey);
H
hjxilinx 已提交
5453
  finalizeQueryResult(pRuntimeEnv);
5454

H
Haojun Liao 已提交
5455 5456 5457 5458
  // since the numOfRows must be identical for all sql functions that are allowed to be executed simutaneously.
  pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
  doSecondaryArithmeticProcess(pQuery);

H
Haojun Liao 已提交
5459
  if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
5460
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
5461
  }
5462

5463
  skipResults(pRuntimeEnv);
5464
  limitResults(pRuntimeEnv);
5465 5466
}

H
hjxilinx 已提交
5467
static void tableMultiOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
5468
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
5469

H
hjxilinx 已提交
5470 5471
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
5472

5473 5474
  // for ts_comp query, re-initialized is not allowed
  if (!isTSCompQuery(pQuery)) {
H
Haojun Liao 已提交
5475
    resetDefaultResInfoOutputBuf(pRuntimeEnv);
5476
  }
5477

5478 5479 5480 5481 5482 5483
  // skip blocks without load the actual data block from file if no filter condition present
  skipBlocks(&pQInfo->runtimeEnv);
  if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return;
  }
5484 5485

  while (1) {
5486
    scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
H
hjxilinx 已提交
5487
    finalizeQueryResult(pRuntimeEnv);
5488

5489 5490
    pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
    if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols > 0 && pQuery->rec.rows > 0) {
5491
      skipResults(pRuntimeEnv);
5492 5493 5494
    }

    /*
H
hjxilinx 已提交
5495 5496
     * 1. if pQuery->size == 0, pQuery->limit.offset >= 0, still need to check data
     * 2. if pQuery->size > 0, pQuery->limit.offset must be 0
5497
     */
5498
    if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
5499 5500 5501
      break;
    }

5502
    qDebug("QInfo:%p skip current result, offset:%" PRId64 ", next qrange:%" PRId64 "-%" PRId64,
B
Bomin Zhang 已提交
5503
           pQInfo, pQuery->limit.offset, pQuery->current->lastKey, pQuery->current->win.ekey);
5504

H
Haojun Liao 已提交
5505
    resetDefaultResInfoOutputBuf(pRuntimeEnv);
5506 5507
  }

5508
  limitResults(pRuntimeEnv);
5509
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
5510
    qDebug("QInfo:%p query paused due to output limitation, next qrange:%" PRId64 "-%" PRId64, pQInfo,
H
hjxilinx 已提交
5511
        pQuery->current->lastKey, pQuery->window.ekey);
weixin_48148422's avatar
weixin_48148422 已提交
5512 5513
  } else if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
    STableIdInfo tidInfo;
B
Bomin Zhang 已提交
5514
    STableId* id = TSDB_TABLEID(pQuery->current->pTable);
5515

H
Haojun Liao 已提交
5516 5517
    tidInfo.uid = id->uid;
    tidInfo.tid = id->tid;
weixin_48148422's avatar
weixin_48148422 已提交
5518 5519
    tidInfo.key = pQuery->current->lastKey;
    taosArrayPush(pQInfo->arrTableIdInfo, &tidInfo);
5520 5521
  }

5522 5523 5524
  if (!isTSCompQuery(pQuery)) {
    assert(pQuery->rec.rows <= pQuery->rec.capacity);
  }
5525 5526
}

H
Haojun Liao 已提交
5527
static void tableIntervalProcessImpl(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
5528
  SQuery *pQuery = pRuntimeEnv->pQuery;
5529

5530
  while (1) {
5531
    scanOneTableDataBlocks(pRuntimeEnv, start);
5532

5533
    assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_NOT_COMPLETED));
H
hjxilinx 已提交
5534
    finalizeQueryResult(pRuntimeEnv);
5535

5536 5537 5538
    // here we can ignore the records in case of no interpolation
    // todo handle offset, in case of top/bottom interval query
    if ((pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL) && pQuery->limit.offset > 0 &&
5539
        pQuery->fillType == TSDB_FILL_NONE) {
5540 5541
      // maxOutput <= 0, means current query does not generate any results
      int32_t numOfClosed = numOfClosedTimeWindow(&pRuntimeEnv->windowResInfo);
5542

S
TD-1057  
Shengliang Guan 已提交
5543
      int32_t c = (int32_t)(MIN(numOfClosed, pQuery->limit.offset));
H
Haojun Liao 已提交
5544
      clearFirstNWindowRes(pRuntimeEnv, c);
5545 5546
      pQuery->limit.offset -= c;
    }
5547

5548
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED | QUERY_RESBUF_FULL)) {
5549 5550 5551 5552 5553
      break;
    }
  }
}

5554
// handle time interval query on table
H
hjxilinx 已提交
5555
static void tableIntervalProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
5556 5557
  SQueryRuntimeEnv *pRuntimeEnv = &(pQInfo->runtimeEnv);

H
hjxilinx 已提交
5558 5559
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
5560

H
Haojun Liao 已提交
5561
  int32_t numOfFilled = 0;
H
Haojun Liao 已提交
5562
  TSKEY newStartKey = TSKEY_INITIAL_VAL;
5563

5564
  // skip blocks without load the actual data block from file if no filter condition present
H
Haojun Liao 已提交
5565 5566 5567 5568 5569 5570
  if (!pRuntimeEnv->groupbyNormalCol) {
    skipTimeInterval(pRuntimeEnv, &newStartKey);
    if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0 && pRuntimeEnv->pFillInfo == NULL) {
      setQueryStatus(pQuery, QUERY_COMPLETED);
      return;
    }
5571 5572
  }

5573
  while (1) {
H
Haojun Liao 已提交
5574
    tableIntervalProcessImpl(pRuntimeEnv, newStartKey);
5575

H
Haojun Liao 已提交
5576
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
5577
      pQInfo->groupIndex = 0;  // always start from 0
5578
      pQuery->rec.rows = 0;
5579
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5580

H
Haojun Liao 已提交
5581
      clearFirstNWindowRes(pRuntimeEnv, pQInfo->groupIndex);
5582
    }
5583

H
Haojun Liao 已提交
5584
    // no result generated, abort
H
Haojun Liao 已提交
5585
    if (pQuery->rec.rows == 0 || pRuntimeEnv->groupbyNormalCol) {
H
Haojun Liao 已提交
5586 5587 5588 5589 5590
      break;
    }

    doSecondaryArithmeticProcess(pQuery);
    
5591
    // the offset is handled at prepare stage if no interpolation involved
H
Haojun Liao 已提交
5592
    if (pQuery->fillType == TSDB_FILL_NONE) {
5593
      limitResults(pRuntimeEnv);
5594 5595
      break;
    } else {
S
TD-1057  
Shengliang Guan 已提交
5596
      taosFillSetStartInfo(pRuntimeEnv->pFillInfo, (int32_t)pQuery->rec.rows, pQuery->window.ekey);
H
Haojun Liao 已提交
5597
      taosFillCopyInputDataFromFilePage(pRuntimeEnv->pFillInfo, (const tFilePage**) pQuery->sdata);
H
Haojun Liao 已提交
5598
      numOfFilled = 0;
5599

H
Haojun Liao 已提交
5600
      pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, &numOfFilled);
5601
      if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
5602
        limitResults(pRuntimeEnv);
5603 5604
        break;
      }
5605

5606
      // no result generated yet, continue retrieve data
5607
      pQuery->rec.rows = 0;
5608 5609
    }
  }
5610

5611
  // all data scanned, the group by normal column can return
H
Haojun Liao 已提交
5612
  if (pRuntimeEnv->groupbyNormalCol) {  // todo refactor with merge interval time result
H
Haojun Liao 已提交
5613 5614
    // maxOutput <= 0, means current query does not generate any results
    int32_t numOfClosed = numOfClosedTimeWindow(&pRuntimeEnv->windowResInfo);
5615

H
Haojun Liao 已提交
5616 5617
    if ((pQuery->limit.offset > 0 && pQuery->limit.offset < numOfClosed) || pQuery->limit.offset == 0) {
      // skip offset result rows
H
Haojun Liao 已提交
5618
      clearFirstNWindowRes(pRuntimeEnv, (int32_t) pQuery->limit.offset);
H
Haojun Liao 已提交
5619 5620 5621 5622

      pQuery->rec.rows   = 0;
      pQInfo->groupIndex = 0;
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
H
Haojun Liao 已提交
5623
      clearFirstNWindowRes(pRuntimeEnv, pQInfo->groupIndex);
H
Haojun Liao 已提交
5624 5625 5626 5627

      doSecondaryArithmeticProcess(pQuery);
      limitResults(pRuntimeEnv);
    }
5628 5629 5630
  }
}

5631 5632 5633 5634
static void tableQueryImpl(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

H
Haojun Liao 已提交
5635
  if (queryHasRemainResForTableQuery(pRuntimeEnv)) {
5636

H
Haojun Liao 已提交
5637 5638 5639 5640 5641 5642 5643 5644 5645 5646 5647 5648
    if (pQuery->fillType != TSDB_FILL_NONE) {
      /*
       * There are remain results that are not returned due to result interpolation
       * So, we do keep in this procedure instead of launching retrieve procedure for next results.
       */
      int32_t numOfFilled = 0;
      pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, &numOfFilled);

      if (pQuery->rec.rows > 0) {
        limitResults(pRuntimeEnv);
      }

H
Haojun Liao 已提交
5649
      qDebug("QInfo:%p current:%" PRId64 " returned, total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total);
5650
      return;
H
Haojun Liao 已提交
5651
    } else {
5652
      pQuery->rec.rows = 0;
5653
      pQInfo->groupIndex = 0;  // always start from 0
5654

5655
      if (pRuntimeEnv->windowResInfo.size > 0) {
5656
        copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
H
Haojun Liao 已提交
5657
        clearFirstNWindowRes(pRuntimeEnv, pQInfo->groupIndex);
5658

5659
        if (pQuery->rec.rows > 0) {
5660
          qDebug("QInfo:%p %"PRId64" rows returned from group results, total:%"PRId64"", pQInfo, pQuery->rec.rows, pQuery->rec.total);
H
Haojun Liao 已提交
5661 5662 5663

          // there are not data remains
          if (pRuntimeEnv->windowResInfo.size <= 0) {
H
Haojun Liao 已提交
5664
            qDebug("QInfo:%p query over, %"PRId64" rows are returned", pQInfo, pQuery->rec.total);
H
Haojun Liao 已提交
5665 5666
          }

5667 5668 5669 5670 5671
          return;
        }
      }
    }
  }
5672

H
hjxilinx 已提交
5673
  // number of points returned during this query
5674
  pQuery->rec.rows = 0;
5675
  int64_t st = taosGetTimestampUs();
5676

5677
  assert(pQInfo->tableqinfoGroupInfo.numOfTables == 1);
H
Haojun Liao 已提交
5678
  SArray* g = GET_TABLEGROUP(pQInfo, 0);
5679
  STableQueryInfo* item = taosArrayGetP(g, 0);
5680

5681
  // group by normal column, sliding window query, interval query are handled by interval query processor
H
Haojun Liao 已提交
5682
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyNormalCol) {  // interval (down sampling operation)
5683
    tableIntervalProcess(pQInfo, item);
H
Haojun Liao 已提交
5684
  } else if (isFixedOutputQuery(pRuntimeEnv)) {
5685
    tableFixedOutputProcess(pQInfo, item);
5686 5687
  } else {  // diff/add/multiply/subtract/division
    assert(pQuery->checkBuffer == 1);
5688
    tableMultiOutputProcess(pQInfo, item);
5689
  }
5690

5691
  // record the total elapsed time
5692
  pRuntimeEnv->summary.elapsedTime += (taosGetTimestampUs() - st);
5693
  assert(pQInfo->tableqinfoGroupInfo.numOfTables == 1);
5694 5695
}

5696
static void stableQueryImpl(SQInfo *pQInfo) {
H
Haojun Liao 已提交
5697 5698
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *pQuery = pRuntimeEnv->pQuery;
5699
  pQuery->rec.rows = 0;
5700

5701
  int64_t st = taosGetTimestampUs();
5702

H
Haojun Liao 已提交
5703
  if (QUERY_IS_INTERVAL_QUERY(pQuery) ||
5704
      (isFixedOutputQuery(pRuntimeEnv) && (!isPointInterpoQuery(pQuery)) && (!pRuntimeEnv->groupbyNormalCol))) {
H
hjxilinx 已提交
5705
    multiTableQueryProcess(pQInfo);
5706
  } else {
5707
    assert((pQuery->checkBuffer == 1 && pQuery->interval.interval == 0) || isPointInterpoQuery(pQuery) ||
H
Haojun Liao 已提交
5708
            isFirstLastRowQuery(pQuery) || pRuntimeEnv->groupbyNormalCol);
5709

5710
    sequentialTableProcess(pQInfo);
5711
  }
5712

H
hjxilinx 已提交
5713
  // record the total elapsed time
5714
  pQInfo->runtimeEnv.summary.elapsedTime += (taosGetTimestampUs() - st);
H
hjxilinx 已提交
5715 5716
}

5717
static int32_t getColumnIndexInSource(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
5718
  int32_t j = 0;
5719

5720
  if (TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) {
H
Haojun Liao 已提交
5721
    if (pExprMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
5722
      return TSDB_TBNAME_COLUMN_INDEX;
H
Haojun Liao 已提交
5723 5724
    }

5725 5726 5727 5728
    while(j < pQueryMsg->numOfTags) {
      if (pExprMsg->colInfo.colId == pTagCols[j].colId) {
        return j;
      }
5729

5730 5731
      j += 1;
    }
5732

Y
TD-1230  
yihaoDeng 已提交
5733
  } else if (TSDB_COL_IS_UD_COL(pExprMsg->colInfo.flag)) {  // user specified column data
H
Haojun Liao 已提交
5734
    return TSDB_UD_COLUMN_INDEX;
5735 5736 5737 5738 5739
  } else {
    while (j < pQueryMsg->numOfCols) {
      if (pExprMsg->colInfo.colId == pQueryMsg->colList[j].colId) {
        return j;
      }
5740

5741
      j += 1;
5742 5743
    }
  }
5744
  assert(0);
5745
  return -1;
5746 5747
}

5748 5749 5750
bool validateExprColumnInfo(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
  int32_t j = getColumnIndexInSource(pQueryMsg, pExprMsg, pTagCols);
  return j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags;
5751 5752
}

5753
static bool validateQueryMsg(SQueryTableMsg *pQueryMsg) {
5754 5755
  if (pQueryMsg->interval.interval < 0) {
    qError("qmsg:%p illegal value of interval time %" PRId64, pQueryMsg, pQueryMsg->interval.interval);
5756
    return false;
5757 5758
  }

H
hjxilinx 已提交
5759
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
5760
    qError("qmsg:%p illegal value of numOfTables %d", pQueryMsg, pQueryMsg->numOfTables);
5761
    return false;
5762 5763
  }

H
hjxilinx 已提交
5764
  if (pQueryMsg->numOfGroupCols < 0) {
S
slguan 已提交
5765
    qError("qmsg:%p illegal value of numOfGroupbyCols %d", pQueryMsg, pQueryMsg->numOfGroupCols);
5766
    return false;
5767 5768
  }

5769 5770
  if (pQueryMsg->numOfOutput > TSDB_MAX_COLUMNS || pQueryMsg->numOfOutput <= 0) {
    qError("qmsg:%p illegal value of output columns %d", pQueryMsg, pQueryMsg->numOfOutput);
5771
    return false;
5772 5773
  }

5774 5775 5776 5777 5778 5779 5780 5781 5782 5783
  return true;
}

static bool validateQuerySourceCols(SQueryTableMsg *pQueryMsg, SSqlFuncMsg** pExprMsg) {
  int32_t numOfTotal = pQueryMsg->numOfCols + pQueryMsg->numOfTags;
  if (pQueryMsg->numOfCols < 0 || pQueryMsg->numOfTags < 0 || numOfTotal > TSDB_MAX_COLUMNS) {
    qError("qmsg:%p illegal value of numOfCols %d numOfTags:%d", pQueryMsg, pQueryMsg->numOfCols, pQueryMsg->numOfTags);
    return false;
  } else if (numOfTotal == 0) {
    for(int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
H
Haojun Liao 已提交
5784 5785 5786 5787 5788
      SSqlFuncMsg* pFuncMsg = pExprMsg[i];

      if ((pFuncMsg->functionId == TSDB_FUNC_TAGPRJ) ||
          (pFuncMsg->functionId == TSDB_FUNC_TID_TAG && pFuncMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) ||
          (pFuncMsg->functionId == TSDB_FUNC_COUNT && pFuncMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX)) {
5789
        continue;
5790
      }
5791

5792
      return false;
5793 5794
    }
  }
5795

5796
  return true;
5797 5798
}

5799
static char *createTableIdList(SQueryTableMsg *pQueryMsg, char *pMsg, SArray **pTableIdList) {
H
hjxilinx 已提交
5800
  assert(pQueryMsg->numOfTables > 0);
5801

weixin_48148422's avatar
weixin_48148422 已提交
5802
  *pTableIdList = taosArrayInit(pQueryMsg->numOfTables, sizeof(STableIdInfo));
5803

weixin_48148422's avatar
weixin_48148422 已提交
5804 5805
  for (int32_t j = 0; j < pQueryMsg->numOfTables; ++j) {
    STableIdInfo* pTableIdInfo = (STableIdInfo *)pMsg;
5806

5807
    pTableIdInfo->tid = htonl(pTableIdInfo->tid);
H
hjxilinx 已提交
5808 5809
    pTableIdInfo->uid = htobe64(pTableIdInfo->uid);
    pTableIdInfo->key = htobe64(pTableIdInfo->key);
5810

H
hjxilinx 已提交
5811 5812 5813
    taosArrayPush(*pTableIdList, pTableIdInfo);
    pMsg += sizeof(STableIdInfo);
  }
5814

H
hjxilinx 已提交
5815 5816
  return pMsg;
}
5817

5818
/**
H
hjxilinx 已提交
5819
 * pQueryMsg->head has been converted before this function is called.
5820
 *
H
hjxilinx 已提交
5821
 * @param pQueryMsg
5822 5823 5824 5825
 * @param pTableIdList
 * @param pExpr
 * @return
 */
H
Haojun Liao 已提交
5826
static int32_t convertQueryMsg(SQueryTableMsg *pQueryMsg, SArray **pTableIdList, SSqlFuncMsg ***pExpr, SSqlFuncMsg ***pSecStageExpr,
weixin_48148422's avatar
weixin_48148422 已提交
5827
                               char **tagCond, char** tbnameCond, SColIndex **groupbyCols, SColumnInfo** tagCols) {
5828 5829
  int32_t code = TSDB_CODE_SUCCESS;

5830 5831 5832 5833
  if (taosCheckVersion(pQueryMsg->version, version, 3) != 0) {
    return TSDB_CODE_QRY_INVALID_MSG;
  }

5834 5835 5836 5837
  pQueryMsg->numOfTables = htonl(pQueryMsg->numOfTables);

  pQueryMsg->window.skey = htobe64(pQueryMsg->window.skey);
  pQueryMsg->window.ekey = htobe64(pQueryMsg->window.ekey);
5838 5839 5840 5841 5842 5843
  pQueryMsg->interval.interval = htobe64(pQueryMsg->interval.interval);
  pQueryMsg->interval.sliding = htobe64(pQueryMsg->interval.sliding);
  pQueryMsg->interval.offset = htobe64(pQueryMsg->interval.offset);
  pQueryMsg->interval.intervalUnit = pQueryMsg->interval.intervalUnit;
  pQueryMsg->interval.slidingUnit = pQueryMsg->interval.slidingUnit;
  pQueryMsg->interval.offsetUnit = pQueryMsg->interval.offsetUnit;
5844 5845
  pQueryMsg->limit = htobe64(pQueryMsg->limit);
  pQueryMsg->offset = htobe64(pQueryMsg->offset);
H
hjxilinx 已提交
5846

5847 5848
  pQueryMsg->order = htons(pQueryMsg->order);
  pQueryMsg->orderColId = htons(pQueryMsg->orderColId);
H
Haojun Liao 已提交
5849
  pQueryMsg->queryType = htonl(pQueryMsg->queryType);
weixin_48148422's avatar
weixin_48148422 已提交
5850
  pQueryMsg->tagNameRelType = htons(pQueryMsg->tagNameRelType);
5851 5852

  pQueryMsg->numOfCols = htons(pQueryMsg->numOfCols);
5853
  pQueryMsg->numOfOutput = htons(pQueryMsg->numOfOutput);
H
hjxilinx 已提交
5854
  pQueryMsg->numOfGroupCols = htons(pQueryMsg->numOfGroupCols);
5855 5856 5857
  pQueryMsg->tagCondLen = htons(pQueryMsg->tagCondLen);
  pQueryMsg->tsOffset = htonl(pQueryMsg->tsOffset);
  pQueryMsg->tsLen = htonl(pQueryMsg->tsLen);
H
hjxilinx 已提交
5858
  pQueryMsg->tsNumOfBlocks = htonl(pQueryMsg->tsNumOfBlocks);
5859
  pQueryMsg->tsOrder = htonl(pQueryMsg->tsOrder);
5860
  pQueryMsg->numOfTags = htonl(pQueryMsg->numOfTags);
H
Haojun Liao 已提交
5861
  pQueryMsg->secondStageOutput = htonl(pQueryMsg->secondStageOutput);
5862

5863
  // query msg safety check
5864
  if (!validateQueryMsg(pQueryMsg)) {
5865 5866
    code = TSDB_CODE_QRY_INVALID_MSG;
    goto _cleanup;
5867 5868
  }

H
hjxilinx 已提交
5869 5870
  char *pMsg = (char *)(pQueryMsg->colList) + sizeof(SColumnInfo) * pQueryMsg->numOfCols;
  for (int32_t col = 0; col < pQueryMsg->numOfCols; ++col) {
5871 5872
    SColumnInfo *pColInfo = &pQueryMsg->colList[col];

H
hjxilinx 已提交
5873
    pColInfo->colId = htons(pColInfo->colId);
5874
    pColInfo->type = htons(pColInfo->type);
H
hjxilinx 已提交
5875 5876
    pColInfo->bytes = htons(pColInfo->bytes);
    pColInfo->numOfFilters = htons(pColInfo->numOfFilters);
5877

H
hjxilinx 已提交
5878
    assert(pColInfo->type >= TSDB_DATA_TYPE_BOOL && pColInfo->type <= TSDB_DATA_TYPE_NCHAR);
5879

H
hjxilinx 已提交
5880
    int32_t numOfFilters = pColInfo->numOfFilters;
5881
    if (numOfFilters > 0) {
H
hjxilinx 已提交
5882
      pColInfo->filters = calloc(numOfFilters, sizeof(SColumnFilterInfo));
H
Haojun Liao 已提交
5883 5884 5885 5886
      if (pColInfo->filters == NULL) {
        code = TSDB_CODE_QRY_OUT_OF_MEMORY;
        goto _cleanup;
      }
5887 5888 5889
    }

    for (int32_t f = 0; f < numOfFilters; ++f) {
5890
      SColumnFilterInfo *pFilterMsg = (SColumnFilterInfo *)pMsg;
5891

5892 5893
      SColumnFilterInfo *pColFilter = &pColInfo->filters[f];
      pColFilter->filterstr = htons(pFilterMsg->filterstr);
5894 5895 5896

      pMsg += sizeof(SColumnFilterInfo);

5897 5898
      if (pColFilter->filterstr) {
        pColFilter->len = htobe64(pFilterMsg->len);
5899

5900
        pColFilter->pz = (int64_t)calloc(1, (size_t)(pColFilter->len + 1 * TSDB_NCHAR_SIZE)); // note: null-terminator
H
Haojun Liao 已提交
5901 5902 5903 5904 5905
        if (pColFilter->pz == 0) {
          code = TSDB_CODE_QRY_OUT_OF_MEMORY;
          goto _cleanup;
        }

5906
        memcpy((void *)pColFilter->pz, pMsg, (size_t)pColFilter->len);
5907
        pMsg += (pColFilter->len + 1);
5908
      } else {
5909 5910
        pColFilter->lowerBndi = htobe64(pFilterMsg->lowerBndi);
        pColFilter->upperBndi = htobe64(pFilterMsg->upperBndi);
5911 5912
      }

5913 5914
      pColFilter->lowerRelOptr = htons(pFilterMsg->lowerRelOptr);
      pColFilter->upperRelOptr = htons(pFilterMsg->upperRelOptr);
5915 5916 5917
    }
  }

5918
  *pExpr = calloc(pQueryMsg->numOfOutput, POINTER_BYTES);
H
Haojun Liao 已提交
5919 5920 5921 5922 5923
  if (*pExpr == NULL) {
    code = TSDB_CODE_QRY_OUT_OF_MEMORY;
    goto _cleanup;
  }

5924
  SSqlFuncMsg *pExprMsg = (SSqlFuncMsg *)pMsg;
5925

5926
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5927
    (*pExpr)[i] = pExprMsg;
5928

5929
    pExprMsg->colInfo.colIndex = htons(pExprMsg->colInfo.colIndex);
5930
    pExprMsg->colInfo.colId = htons(pExprMsg->colInfo.colId);
H
Haojun Liao 已提交
5931 5932 5933 5934
    pExprMsg->colInfo.flag  = htons(pExprMsg->colInfo.flag);
    pExprMsg->functionId    = htons(pExprMsg->functionId);
    pExprMsg->numOfParams   = htons(pExprMsg->numOfParams);
    pExprMsg->resColId      = htons(pExprMsg->resColId);
5935

5936
    pMsg += sizeof(SSqlFuncMsg);
5937 5938

    for (int32_t j = 0; j < pExprMsg->numOfParams; ++j) {
5939
      pExprMsg->arg[j].argType = htons(pExprMsg->arg[j].argType);
5940 5941 5942 5943
      pExprMsg->arg[j].argBytes = htons(pExprMsg->arg[j].argBytes);

      if (pExprMsg->arg[j].argType == TSDB_DATA_TYPE_BINARY) {
        pExprMsg->arg[j].argValue.pz = pMsg;
5944
        pMsg += pExprMsg->arg[j].argBytes;  // one more for the string terminated char.
5945 5946 5947 5948 5949
      } else {
        pExprMsg->arg[j].argValue.i64 = htobe64(pExprMsg->arg[j].argValue.i64);
      }
    }

H
Haojun Liao 已提交
5950 5951
    int16_t functionId = pExprMsg->functionId;
    if (functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ || functionId == TSDB_FUNC_TAG_DUMMY) {
Y
TD-1230  
yihaoDeng 已提交
5952
      if (!TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) {  // ignore the column  index check for arithmetic expression.
5953 5954
        code = TSDB_CODE_QRY_INVALID_MSG;
        goto _cleanup;
5955 5956
      }
    } else {
5957
//      if (!validateExprColumnInfo(pQueryMsg, pExprMsg)) {
5958
//        return TSDB_CODE_QRY_INVALID_MSG;
5959
//      }
5960 5961
    }

5962
    pExprMsg = (SSqlFuncMsg *)pMsg;
5963
  }
5964

5965
  if (!validateQuerySourceCols(pQueryMsg, *pExpr)) {
5966
    code = TSDB_CODE_QRY_INVALID_MSG;
dengyihao's avatar
dengyihao 已提交
5967
    goto _cleanup;
5968
  }
5969

H
Haojun Liao 已提交
5970 5971 5972 5973 5974 5975 5976 5977 5978 5979 5980 5981 5982 5983 5984 5985 5986 5987 5988 5989 5990 5991 5992 5993 5994 5995 5996 5997 5998 5999 6000 6001 6002 6003 6004 6005 6006 6007 6008 6009 6010 6011 6012
  if (pQueryMsg->secondStageOutput) {
    pExprMsg = (SSqlFuncMsg *)pMsg;
    *pSecStageExpr = calloc(pQueryMsg->secondStageOutput, POINTER_BYTES);
    
    for (int32_t i = 0; i < pQueryMsg->secondStageOutput; ++i) {
      (*pSecStageExpr)[i] = pExprMsg;

      pExprMsg->colInfo.colIndex = htons(pExprMsg->colInfo.colIndex);
      pExprMsg->colInfo.colId = htons(pExprMsg->colInfo.colId);
      pExprMsg->colInfo.flag = htons(pExprMsg->colInfo.flag);
      pExprMsg->functionId = htons(pExprMsg->functionId);
      pExprMsg->numOfParams = htons(pExprMsg->numOfParams);

      pMsg += sizeof(SSqlFuncMsg);

      for (int32_t j = 0; j < pExprMsg->numOfParams; ++j) {
        pExprMsg->arg[j].argType = htons(pExprMsg->arg[j].argType);
        pExprMsg->arg[j].argBytes = htons(pExprMsg->arg[j].argBytes);

        if (pExprMsg->arg[j].argType == TSDB_DATA_TYPE_BINARY) {
          pExprMsg->arg[j].argValue.pz = pMsg;
          pMsg += pExprMsg->arg[j].argBytes;  // one more for the string terminated char.
        } else {
          pExprMsg->arg[j].argValue.i64 = htobe64(pExprMsg->arg[j].argValue.i64);
        }
      }

      int16_t functionId = pExprMsg->functionId;
      if (functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ || functionId == TSDB_FUNC_TAG_DUMMY) {
        if (!TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) {  // ignore the column  index check for arithmetic expression.
          code = TSDB_CODE_QRY_INVALID_MSG;
          goto _cleanup;
        }
      } else {
//      if (!validateExprColumnInfo(pQueryMsg, pExprMsg)) {
//        return TSDB_CODE_QRY_INVALID_MSG;
//      }
      }

      pExprMsg = (SSqlFuncMsg *)pMsg;
    }
  }

H
hjxilinx 已提交
6013
  pMsg = createTableIdList(pQueryMsg, pMsg, pTableIdList);
6014

H
hjxilinx 已提交
6015
  if (pQueryMsg->numOfGroupCols > 0) {  // group by tag columns
6016
    *groupbyCols = malloc(pQueryMsg->numOfGroupCols * sizeof(SColIndex));
6017 6018 6019 6020
    if (*groupbyCols == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }
6021 6022 6023

    for (int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
      (*groupbyCols)[i].colId = *(int16_t *)pMsg;
6024
      pMsg += sizeof((*groupbyCols)[i].colId);
6025 6026

      (*groupbyCols)[i].colIndex = *(int16_t *)pMsg;
6027 6028
      pMsg += sizeof((*groupbyCols)[i].colIndex);

6029
      (*groupbyCols)[i].flag = *(int16_t *)pMsg;
6030 6031 6032 6033 6034
      pMsg += sizeof((*groupbyCols)[i].flag);

      memcpy((*groupbyCols)[i].name, pMsg, tListLen(groupbyCols[i]->name));
      pMsg += tListLen((*groupbyCols)[i].name);
    }
6035

H
hjxilinx 已提交
6036 6037
    pQueryMsg->orderByIdx = htons(pQueryMsg->orderByIdx);
    pQueryMsg->orderType = htons(pQueryMsg->orderType);
6038 6039
  }

6040 6041
  pQueryMsg->fillType = htons(pQueryMsg->fillType);
  if (pQueryMsg->fillType != TSDB_FILL_NONE) {
6042
    pQueryMsg->fillVal = (uint64_t)(pMsg);
6043 6044

    int64_t *v = (int64_t *)pMsg;
6045
    for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
6046 6047
      v[i] = htobe64(v[i]);
    }
6048

6049
    pMsg += sizeof(int64_t) * pQueryMsg->numOfOutput;
6050
  }
6051

6052 6053
  if (pQueryMsg->numOfTags > 0) {
    (*tagCols) = calloc(1, sizeof(SColumnInfo) * pQueryMsg->numOfTags);
H
Haojun Liao 已提交
6054 6055 6056 6057 6058
    if (*tagCols == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }

6059 6060
    for (int32_t i = 0; i < pQueryMsg->numOfTags; ++i) {
      SColumnInfo* pTagCol = (SColumnInfo*) pMsg;
6061

6062 6063 6064 6065
      pTagCol->colId = htons(pTagCol->colId);
      pTagCol->bytes = htons(pTagCol->bytes);
      pTagCol->type  = htons(pTagCol->type);
      pTagCol->numOfFilters = 0;
6066

6067
      (*tagCols)[i] = *pTagCol;
6068
      pMsg += sizeof(SColumnInfo);
6069
    }
H
hjxilinx 已提交
6070
  }
6071

6072 6073 6074
  // the tag query condition expression string is located at the end of query msg
  if (pQueryMsg->tagCondLen > 0) {
    *tagCond = calloc(1, pQueryMsg->tagCondLen);
H
Haojun Liao 已提交
6075 6076 6077 6078 6079 6080

    if (*tagCond == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;

    }
6081 6082 6083
    memcpy(*tagCond, pMsg, pQueryMsg->tagCondLen);
    pMsg += pQueryMsg->tagCondLen;
  }
6084

weixin_48148422's avatar
weixin_48148422 已提交
6085
  if (*pMsg != 0) {
6086
    size_t len = strlen(pMsg) + 1;
6087

6088
    *tbnameCond = malloc(len);
6089 6090 6091 6092 6093
    if (*tbnameCond == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }

weixin_48148422's avatar
weixin_48148422 已提交
6094
    strcpy(*tbnameCond, pMsg);
6095
    pMsg += len;
weixin_48148422's avatar
weixin_48148422 已提交
6096
  }
6097

6098
  qDebug("qmsg:%p query %d tables, type:%d, qrange:%" PRId64 "-%" PRId64 ", numOfGroupbyTagCols:%d, order:%d, "
H
Haojun Liao 已提交
6099 6100
         "outputCols:%d, numOfCols:%d, interval:%" PRId64 ", fillType:%d, comptsLen:%d, compNumOfBlocks:%d, limit:%" PRId64 ", offset:%" PRId64,
         pQueryMsg, pQueryMsg->numOfTables, pQueryMsg->queryType, pQueryMsg->window.skey, pQueryMsg->window.ekey, pQueryMsg->numOfGroupCols,
6101
         pQueryMsg->order, pQueryMsg->numOfOutput, pQueryMsg->numOfCols, pQueryMsg->interval.interval,
H
Haojun Liao 已提交
6102
         pQueryMsg->fillType, pQueryMsg->tsLen, pQueryMsg->tsNumOfBlocks, pQueryMsg->limit, pQueryMsg->offset);
6103 6104

  return TSDB_CODE_SUCCESS;
dengyihao's avatar
dengyihao 已提交
6105 6106

_cleanup:
S
TD-1848  
Shengliang Guan 已提交
6107
  tfree(*pExpr);
dengyihao's avatar
dengyihao 已提交
6108 6109
  taosArrayDestroy(*pTableIdList);
  *pTableIdList = NULL;
S
TD-1848  
Shengliang Guan 已提交
6110 6111 6112 6113
  tfree(*tbnameCond);
  tfree(*groupbyCols);
  tfree(*tagCols);
  tfree(*tagCond);
6114 6115

  return code;
6116 6117
}

H
Haojun Liao 已提交
6118 6119
static int32_t buildArithmeticExprFromMsg(SExprInfo *pArithExprInfo, SQueryTableMsg *pQueryMsg) {
  qDebug("qmsg:%p create arithmetic expr from binary", pQueryMsg);
weixin_48148422's avatar
weixin_48148422 已提交
6120 6121

  tExprNode* pExprNode = NULL;
dengyihao's avatar
dengyihao 已提交
6122
  TRY(TSDB_MAX_TAG_CONDITIONS) {
weixin_48148422's avatar
weixin_48148422 已提交
6123 6124 6125
    pExprNode = exprTreeFromBinary(pArithExprInfo->base.arg[0].argValue.pz, pArithExprInfo->base.arg[0].argBytes);
  } CATCH( code ) {
    CLEANUP_EXECUTE();
dengyihao's avatar
TD-816  
dengyihao 已提交
6126
    qError("qmsg:%p failed to create arithmetic expression string from:%s, reason: %s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz, tstrerror(code));
weixin_48148422's avatar
weixin_48148422 已提交
6127 6128 6129
    return code;
  } END_TRY

H
hjxilinx 已提交
6130
  if (pExprNode == NULL) {
6131
    qError("qmsg:%p failed to create arithmetic expression string from:%s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz);
6132
    return TSDB_CODE_QRY_APP_ERROR;
6133
  }
6134

6135
  pArithExprInfo->pExpr = pExprNode;
6136 6137 6138
  return TSDB_CODE_SUCCESS;
}

H
Haojun Liao 已提交
6139
static int32_t createQueryFuncExprFromMsg(SQueryTableMsg *pQueryMsg, int32_t numOfOutput, SExprInfo **pExprInfo, SSqlFuncMsg **pExprMsg,
6140 6141
    SColumnInfo* pTagCols) {
  *pExprInfo = NULL;
H
hjxilinx 已提交
6142
  int32_t code = TSDB_CODE_SUCCESS;
6143

H
Haojun Liao 已提交
6144
  SExprInfo *pExprs = (SExprInfo *)calloc(pQueryMsg->numOfOutput, sizeof(SExprInfo));
6145
  if (pExprs == NULL) {
6146
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
6147 6148 6149 6150 6151
  }

  bool    isSuperTable = QUERY_IS_STABLE_QUERY(pQueryMsg->queryType);
  int16_t tagLen = 0;

H
Haojun Liao 已提交
6152
  for (int32_t i = 0; i < numOfOutput; ++i) {
6153
    pExprs[i].base = *pExprMsg[i];
6154
    pExprs[i].bytes = 0;
6155 6156 6157 6158

    int16_t type = 0;
    int16_t bytes = 0;

6159
    // parse the arithmetic expression
6160
    if (pExprs[i].base.functionId == TSDB_FUNC_ARITHM) {
H
Haojun Liao 已提交
6161
      code = buildArithmeticExprFromMsg(&pExprs[i], pQueryMsg);
6162

6163
      if (code != TSDB_CODE_SUCCESS) {
S
TD-1848  
Shengliang Guan 已提交
6164
        tfree(pExprs);
6165
        return code;
6166 6167
      }

6168
      type  = TSDB_DATA_TYPE_DOUBLE;
6169
      bytes = tDataTypeDesc[type].nSize;
H
Haojun Liao 已提交
6170
    } else if (pExprs[i].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX && pExprs[i].base.functionId == TSDB_FUNC_TAGPRJ) {  // parse the normal column
H
Haojun Liao 已提交
6171
      SSchema s = tGetTableNameColumnSchema();
H
Haojun Liao 已提交
6172
      type = s.type;
H
Haojun Liao 已提交
6173
      bytes = s.bytes;
6174 6175
    } else if (pExprs[i].base.colInfo.colId <= TSDB_UD_COLUMN_INDEX) {
      // it is a user-defined constant value column
H
Haojun Liao 已提交
6176 6177
      assert(pExprs[i].base.functionId == TSDB_FUNC_PRJ);

6178 6179
      type = pExprs[i].base.arg[1].argType;
      bytes = pExprs[i].base.arg[1].argBytes;
H
Haojun Liao 已提交
6180 6181 6182 6183 6184

      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        bytes += VARSTR_HEADER_SIZE;
      }
    } else {
6185
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
dengyihao's avatar
dengyihao 已提交
6186
      assert(j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags);
H
Haojun Liao 已提交
6187

dengyihao's avatar
dengyihao 已提交
6188
      if (pExprs[i].base.colInfo.colId != TSDB_TBNAME_COLUMN_INDEX && j >= 0) {
H
Haojun Liao 已提交
6189 6190 6191 6192
        SColumnInfo* pCol = (TSDB_COL_IS_TAG(pExprs[i].base.colInfo.flag))? &pTagCols[j]:&pQueryMsg->colList[j];
        type = pCol->type;
        bytes = pCol->bytes;
      } else {
H
Haojun Liao 已提交
6193
        SSchema s = tGetTableNameColumnSchema();
H
hjxilinx 已提交
6194

H
Haojun Liao 已提交
6195 6196 6197
        type  = s.type;
        bytes = s.bytes;
      }
6198 6199
    }

S
TD-1057  
Shengliang Guan 已提交
6200
    int32_t param = (int32_t)pExprs[i].base.arg[0].argValue.i64;
6201
    if (getResultDataInfo(type, bytes, pExprs[i].base.functionId, param, &pExprs[i].type, &pExprs[i].bytes,
6202
                          &pExprs[i].interBytes, 0, isSuperTable) != TSDB_CODE_SUCCESS) {
S
TD-1848  
Shengliang Guan 已提交
6203
      tfree(pExprs);
6204
      return TSDB_CODE_QRY_INVALID_MSG;
6205 6206
    }

6207
    if (pExprs[i].base.functionId == TSDB_FUNC_TAG_DUMMY || pExprs[i].base.functionId == TSDB_FUNC_TS_DUMMY) {
6208
      tagLen += pExprs[i].bytes;
6209
    }
6210
    assert(isValidDataType(pExprs[i].type));
6211 6212 6213
  }

  // TODO refactor
H
Haojun Liao 已提交
6214
  for (int32_t i = 0; i < numOfOutput; ++i) {
6215 6216
    pExprs[i].base = *pExprMsg[i];
    int16_t functId = pExprs[i].base.functionId;
6217

6218
    if (functId == TSDB_FUNC_TOP || functId == TSDB_FUNC_BOTTOM) {
6219
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
6220 6221 6222 6223 6224 6225 6226 6227 6228
      if (j < 0 || j >= pQueryMsg->numOfCols) {
        assert(0);
      } else {
        SColumnInfo *pCol = &pQueryMsg->colList[j];
        int32_t ret =
            getResultDataInfo(pCol->type, pCol->bytes, functId, (int32_t)pExprs[i].base.arg[0].argValue.i64,
                              &pExprs[i].type, &pExprs[i].bytes, &pExprs[i].interBytes, tagLen, isSuperTable);
        assert(ret == TSDB_CODE_SUCCESS);
      }
6229 6230 6231
    }
  }

6232
  *pExprInfo = pExprs;
6233 6234 6235
  return TSDB_CODE_SUCCESS;
}

6236
static SSqlGroupbyExpr *createGroupbyExprFromMsg(SQueryTableMsg *pQueryMsg, SColIndex *pColIndex, int32_t *code) {
6237 6238 6239 6240 6241
  if (pQueryMsg->numOfGroupCols == 0) {
    return NULL;
  }

  // using group by tag columns
6242
  SSqlGroupbyExpr *pGroupbyExpr = (SSqlGroupbyExpr *)calloc(1, sizeof(SSqlGroupbyExpr));
6243
  if (pGroupbyExpr == NULL) {
6244
    *code = TSDB_CODE_QRY_OUT_OF_MEMORY;
6245 6246 6247 6248 6249 6250 6251
    return NULL;
  }

  pGroupbyExpr->numOfGroupCols = pQueryMsg->numOfGroupCols;
  pGroupbyExpr->orderType = pQueryMsg->orderType;
  pGroupbyExpr->orderIndex = pQueryMsg->orderByIdx;

6252 6253 6254 6255
  pGroupbyExpr->columnInfo = taosArrayInit(pQueryMsg->numOfGroupCols, sizeof(SColIndex));
  for(int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
    taosArrayPush(pGroupbyExpr->columnInfo, &pColIndex[i]);
  }
6256

6257 6258 6259
  return pGroupbyExpr;
}

6260
static int32_t createFilterInfo(void *pQInfo, SQuery *pQuery) {
6261
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
6262
    if (pQuery->colList[i].numOfFilters > 0) {
6263 6264 6265 6266 6267 6268 6269 6270 6271
      pQuery->numOfFilterCols++;
    }
  }

  if (pQuery->numOfFilterCols == 0) {
    return TSDB_CODE_SUCCESS;
  }

  pQuery->pFilterInfo = calloc(1, sizeof(SSingleColumnFilterInfo) * pQuery->numOfFilterCols);
H
Haojun Liao 已提交
6272 6273 6274
  if (pQuery->pFilterInfo == NULL) {
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
  }
6275 6276

  for (int32_t i = 0, j = 0; i < pQuery->numOfCols; ++i) {
6277
    if (pQuery->colList[i].numOfFilters > 0) {
6278 6279
      SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[j];

B
Bomin Zhang 已提交
6280
      memcpy(&pFilterInfo->info, &pQuery->colList[i], sizeof(SColumnInfo));
6281
      pFilterInfo->info = pQuery->colList[i];
6282

6283
      pFilterInfo->numOfFilters = pQuery->colList[i].numOfFilters;
6284
      pFilterInfo->pFilters = calloc(pFilterInfo->numOfFilters, sizeof(SColumnFilterElem));
H
Haojun Liao 已提交
6285 6286 6287
      if (pFilterInfo->pFilters == NULL) {
        return TSDB_CODE_QRY_OUT_OF_MEMORY;
      }
6288 6289 6290

      for (int32_t f = 0; f < pFilterInfo->numOfFilters; ++f) {
        SColumnFilterElem *pSingleColFilter = &pFilterInfo->pFilters[f];
6291
        pSingleColFilter->filterInfo = pQuery->colList[i].filters[f];
6292 6293 6294 6295 6296

        int32_t lower = pSingleColFilter->filterInfo.lowerRelOptr;
        int32_t upper = pSingleColFilter->filterInfo.upperRelOptr;

        if (lower == TSDB_RELATION_INVALID && upper == TSDB_RELATION_INVALID) {
S
slguan 已提交
6297
          qError("QInfo:%p invalid filter info", pQInfo);
6298
          return TSDB_CODE_QRY_INVALID_MSG;
6299 6300
        }

6301 6302
        int16_t type  = pQuery->colList[i].type;
        int16_t bytes = pQuery->colList[i].bytes;
6303

6304 6305 6306
        // todo refactor
        __filter_func_t *rangeFilterArray = getRangeFilterFuncArray(type);
        __filter_func_t *filterArray = getValueFilterFuncArray(type);
6307 6308

        if (rangeFilterArray == NULL && filterArray == NULL) {
S
slguan 已提交
6309
          qError("QInfo:%p failed to get filter function, invalid data type:%d", pQInfo, type);
6310
          return TSDB_CODE_QRY_INVALID_MSG;
6311 6312
        }

6313
        if ((lower == TSDB_RELATION_GREATER_EQUAL || lower == TSDB_RELATION_GREATER) &&
6314
            (upper == TSDB_RELATION_LESS_EQUAL || upper == TSDB_RELATION_LESS)) {
dengyihao's avatar
dengyihao 已提交
6315
          assert(rangeFilterArray != NULL);
6316
          if (lower == TSDB_RELATION_GREATER_EQUAL) {
6317 6318 6319 6320 6321 6322 6323 6324 6325 6326 6327 6328 6329
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[4];
            } else {
              pSingleColFilter->fp = rangeFilterArray[2];
            }
          } else {
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[3];
            } else {
              pSingleColFilter->fp = rangeFilterArray[1];
            }
          }
        } else {  // set callback filter function
dengyihao's avatar
dengyihao 已提交
6330
          assert(filterArray != NULL);
6331 6332 6333 6334
          if (lower != TSDB_RELATION_INVALID) {
            pSingleColFilter->fp = filterArray[lower];

            if (upper != TSDB_RELATION_INVALID) {
dengyihao's avatar
dengyihao 已提交
6335
              qError("pQInfo:%p failed to get filter function, invalid filter condition: %d", pQInfo, type);
6336
              return TSDB_CODE_QRY_INVALID_MSG;
6337 6338 6339 6340 6341 6342 6343 6344 6345 6346 6347 6348 6349 6350 6351 6352
            }
          } else {
            pSingleColFilter->fp = filterArray[upper];
          }
        }
        assert(pSingleColFilter->fp != NULL);
        pSingleColFilter->bytes = bytes;
      }

      j++;
    }
  }

  return TSDB_CODE_SUCCESS;
}

6353
static void doUpdateExprColumnIndex(SQuery *pQuery) {
H
Haojun Liao 已提交
6354
  assert(pQuery->pExpr1 != NULL && pQuery != NULL);
6355

6356
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
Haojun Liao 已提交
6357
    SSqlFuncMsg *pSqlExprMsg = &pQuery->pExpr1[k].base;
6358
    if (pSqlExprMsg->functionId == TSDB_FUNC_ARITHM) {
6359 6360
      continue;
    }
6361

6362
    // todo opt performance
H
Haojun Liao 已提交
6363
    SColIndex *pColIndex = &pSqlExprMsg->colInfo;
H
Haojun Liao 已提交
6364
    if (TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
6365 6366
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfCols; ++f) {
H
Haojun Liao 已提交
6367 6368
        if (pColIndex->colId == pQuery->colList[f].colId) {
          pColIndex->colIndex = f;
6369 6370 6371
          break;
        }
      }
H
Haojun Liao 已提交
6372 6373

      assert(f < pQuery->numOfCols);
6374 6375
    } else if (pColIndex->colId <= TSDB_UD_COLUMN_INDEX) {
      // do nothing for user-defined constant value result columns
6376
    } else {
6377 6378
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfTags; ++f) {
H
Haojun Liao 已提交
6379 6380
        if (pColIndex->colId == pQuery->tagColList[f].colId) {
          pColIndex->colIndex = f;
6381 6382
          break;
        }
6383
      }
6384

6385
      assert(f < pQuery->numOfTags || pColIndex->colId == TSDB_TBNAME_COLUMN_INDEX);
6386 6387 6388 6389
    }
  }
}

dengyihao's avatar
dengyihao 已提交
6390 6391
static void freeQInfo(SQInfo *pQInfo);

H
Haojun Liao 已提交
6392 6393 6394
static void calResultBufSize(SQuery* pQuery) {
  const int32_t RESULT_MSG_MIN_SIZE  = 1024 * (1024 + 512);  // bytes
  const int32_t RESULT_MSG_MIN_ROWS  = 8192;
S
TD-1057  
Shengliang Guan 已提交
6395
  const float RESULT_THRESHOLD_RATIO = 0.85f;
H
Haojun Liao 已提交
6396

6397 6398 6399 6400 6401
  if (isProjQuery(pQuery)) {
    int32_t numOfRes = RESULT_MSG_MIN_SIZE / pQuery->rowSize;
    if (numOfRes < RESULT_MSG_MIN_ROWS) {
      numOfRes = RESULT_MSG_MIN_ROWS;
    }
H
Haojun Liao 已提交
6402

6403
    pQuery->rec.capacity  = numOfRes;
S
TD-1057  
Shengliang Guan 已提交
6404
    pQuery->rec.threshold = (int32_t)(numOfRes * RESULT_THRESHOLD_RATIO);
6405 6406
  } else {  // in case of non-prj query, a smaller output buffer will be used.
    pQuery->rec.capacity = 4096;
S
TD-1057  
Shengliang Guan 已提交
6407
    pQuery->rec.threshold = (int32_t)(pQuery->rec.capacity * RESULT_THRESHOLD_RATIO);
6408
  }
H
Haojun Liao 已提交
6409 6410
}

6411
static SQInfo *createQInfoImpl(SQueryTableMsg *pQueryMsg, SSqlGroupbyExpr *pGroupbyExpr, SExprInfo *pExprs,
H
Haojun Liao 已提交
6412
                               SExprInfo *pSecExprs, STableGroupInfo *pTableGroupInfo, SColumnInfo* pTagCols, bool stableQuery) {
B
Bomin Zhang 已提交
6413 6414 6415
  int16_t numOfCols = pQueryMsg->numOfCols;
  int16_t numOfOutput = pQueryMsg->numOfOutput;

6416 6417
  SQInfo *pQInfo = (SQInfo *)calloc(1, sizeof(SQInfo));
  if (pQInfo == NULL) {
B
Bomin Zhang 已提交
6418
    goto _cleanup_qinfo;
6419
  }
6420

B
Bomin Zhang 已提交
6421 6422 6423
  // to make sure third party won't overwrite this structure
  pQInfo->signature = pQInfo;
  pQInfo->tableGroupInfo = *pTableGroupInfo;
6424 6425

  SQuery *pQuery = calloc(1, sizeof(SQuery));
B
Bomin Zhang 已提交
6426 6427 6428
  if (pQuery == NULL) {
    goto _cleanup_query;
  }
H
Haojun Liao 已提交
6429

6430 6431
  pQInfo->runtimeEnv.pQuery = pQuery;

6432
  pQuery->numOfCols       = numOfCols;
H
hjxilinx 已提交
6433
  pQuery->numOfOutput     = numOfOutput;
6434 6435 6436
  pQuery->limit.limit     = pQueryMsg->limit;
  pQuery->limit.offset    = pQueryMsg->offset;
  pQuery->order.order     = pQueryMsg->order;
6437
  pQuery->order.orderColId = pQueryMsg->orderColId;
H
Haojun Liao 已提交
6438
  pQuery->pExpr1          = pExprs;
H
Haojun Liao 已提交
6439 6440
  pQuery->pExpr2          = pSecExprs;
  pQuery->numOfExpr2      = pQueryMsg->secondStageOutput;
6441
  pQuery->pGroupbyExpr    = pGroupbyExpr;
6442
  memcpy(&pQuery->interval, &pQueryMsg->interval, sizeof(pQuery->interval));
6443
  pQuery->fillType        = pQueryMsg->fillType;
6444
  pQuery->numOfTags       = pQueryMsg->numOfTags;
B
Bomin Zhang 已提交
6445
  pQuery->tagColList      = pTagCols;
H
Haojun Liao 已提交
6446

6447
  pQuery->colList = calloc(numOfCols, sizeof(SSingleColumnFilterInfo));
6448
  if (pQuery->colList == NULL) {
6449
    goto _cleanup;
6450
  }
6451

H
hjxilinx 已提交
6452
  for (int16_t i = 0; i < numOfCols; ++i) {
6453
    pQuery->colList[i] = pQueryMsg->colList[i];
6454
    pQuery->colList[i].filters = tscFilterInfoClone(pQueryMsg->colList[i].filters, pQuery->colList[i].numOfFilters);
H
hjxilinx 已提交
6455
  }
6456

6457
  // calculate the result row size
6458 6459 6460
  for (int16_t col = 0; col < numOfOutput; ++col) {
    assert(pExprs[col].bytes > 0);
    pQuery->rowSize += pExprs[col].bytes;
6461
  }
6462

6463
  doUpdateExprColumnIndex(pQuery);
6464

6465
  int32_t ret = createFilterInfo(pQInfo, pQuery);
6466
  if (ret != TSDB_CODE_SUCCESS) {
6467
    goto _cleanup;
6468 6469 6470
  }

  // prepare the result buffer
6471
  pQuery->sdata = (tFilePage **)calloc(pQuery->numOfOutput, POINTER_BYTES);
6472
  if (pQuery->sdata == NULL) {
6473
    goto _cleanup;
6474 6475
  }

H
Haojun Liao 已提交
6476
  calResultBufSize(pQuery);
6477

6478
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
6479
    // allocate additional memory for interResults that are usually larger then final results
H
Haojun Liao 已提交
6480
    // TODO refactor
H
Haojun Liao 已提交
6481 6482 6483 6484 6485 6486 6487 6488
    int16_t bytes = 0;
    if (pQuery->pExpr2 == NULL || col > pQuery->numOfExpr2) {
      bytes = pExprs[col].bytes;
    } else {
      bytes = MAX(pQuery->pExpr2[col].bytes, pExprs[col].bytes);
    }

    size_t size = (size_t)((pQuery->rec.capacity + 1) * bytes + pExprs[col].interBytes + sizeof(tFilePage));
6489
    pQuery->sdata[col] = (tFilePage *)calloc(1, size);
6490
    if (pQuery->sdata[col] == NULL) {
6491
      goto _cleanup;
6492 6493 6494
    }
  }

6495
  if (pQuery->fillType != TSDB_FILL_NONE) {
6496 6497
    pQuery->fillVal = malloc(sizeof(int64_t) * pQuery->numOfOutput);
    if (pQuery->fillVal == NULL) {
6498
      goto _cleanup;
6499 6500 6501
    }

    // the first column is the timestamp
6502
    memcpy(pQuery->fillVal, (char *)pQueryMsg->fillVal, pQuery->numOfOutput * sizeof(int64_t));
6503 6504
  }

dengyihao's avatar
dengyihao 已提交
6505 6506 6507 6508 6509 6510
  size_t numOfGroups = 0;
  if (pTableGroupInfo->pGroupList != NULL) {
    numOfGroups = taosArrayGetSize(pTableGroupInfo->pGroupList);

    pQInfo->tableqinfoGroupInfo.pGroupList = taosArrayInit(numOfGroups, POINTER_BYTES);
    pQInfo->tableqinfoGroupInfo.numOfTables = pTableGroupInfo->numOfTables;
H
Haojun Liao 已提交
6511
    pQInfo->tableqinfoGroupInfo.map = taosHashInit(pTableGroupInfo->numOfTables,
H
Haojun Liao 已提交
6512
                                                   taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_NO_LOCK);
H
Haojun Liao 已提交
6513
  }
6514

weixin_48148422's avatar
weixin_48148422 已提交
6515
  int tableIndex = 0;
6516

H
Haojun Liao 已提交
6517
  pQInfo->runtimeEnv.interBufSize = getOutputInterResultBufSize(pQuery);
H
Haojun Liao 已提交
6518 6519
  pQInfo->runtimeEnv.summary.tableInfoSize += (pTableGroupInfo->numOfTables * sizeof(STableQueryInfo));

H
Haojun Liao 已提交
6520
  pQInfo->runtimeEnv.pResultRowHashTable = taosHashInit(pTableGroupInfo->numOfTables, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_NO_LOCK);
H
Haojun Liao 已提交
6521
  pQInfo->runtimeEnv.keyBuf = malloc(TSDB_MAX_BYTES_PER_ROW);
H
Haojun Liao 已提交
6522
  pQInfo->runtimeEnv.pool = initResultRowPool(getWindowResultSize(&pQInfo->runtimeEnv));
H
Haojun Liao 已提交
6523

H
Haojun Liao 已提交
6524
  pQInfo->pBuf = calloc(pTableGroupInfo->numOfTables, sizeof(STableQueryInfo));
H
Haojun Liao 已提交
6525 6526 6527 6528
  if (pQInfo->pBuf == NULL) {
    goto _cleanup;
  }

H
Haojun Liao 已提交
6529
  // NOTE: pTableCheckInfo need to update the query time range and the lastKey info
6530 6531
  pQInfo->arrTableIdInfo = taosArrayInit(tableIndex, sizeof(STableIdInfo));
  pQInfo->dataReady = QUERY_RESULT_NOT_READY;
H
Haojun Liao 已提交
6532
  pQInfo->rspContext = NULL;
6533
  pthread_mutex_init(&pQInfo->lock, NULL);
H
Haojun Liao 已提交
6534
  tsem_init(&pQInfo->ready, 0, 0);
6535 6536 6537 6538 6539 6540

  pQuery->pos = -1;
  pQuery->window = pQueryMsg->window;
  changeExecuteScanOrder(pQInfo, pQueryMsg, stableQuery);

  STimeWindow window = pQuery->window;
H
Haojun Liao 已提交
6541

H
Haojun Liao 已提交
6542 6543
  int32_t index = 0;

H
hjxilinx 已提交
6544
  for(int32_t i = 0; i < numOfGroups; ++i) {
6545
    SArray* pa = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, i);
6546

H
Haojun Liao 已提交
6547
    size_t s = taosArrayGetSize(pa);
6548
    SArray* p1 = taosArrayInit(s, POINTER_BYTES);
B
Bomin Zhang 已提交
6549 6550 6551
    if (p1 == NULL) {
      goto _cleanup;
    }
H
Haojun Liao 已提交
6552

Y
yihaoDeng 已提交
6553
    taosArrayPush(pQInfo->tableqinfoGroupInfo.pGroupList, &p1);
6554

H
hjxilinx 已提交
6555
    for(int32_t j = 0; j < s; ++j) {
H
Haojun Liao 已提交
6556
      STableKeyInfo* info = taosArrayGet(pa, j);
6557

H
Haojun Liao 已提交
6558
      void* buf = (char*) pQInfo->pBuf + index * sizeof(STableQueryInfo);
H
Haojun Liao 已提交
6559

6560
      window.skey = info->lastKey;
H
Haojun Liao 已提交
6561
      STableQueryInfo* item = createTableQueryInfo(&pQInfo->runtimeEnv, info->pTable, window, buf);
B
Bomin Zhang 已提交
6562 6563 6564
      if (item == NULL) {
        goto _cleanup;
      }
H
Haojun Liao 已提交
6565

6566
      item->groupIndex = i;
H
hjxilinx 已提交
6567
      taosArrayPush(p1, &item);
6568 6569

      STableId* id = TSDB_TABLEID(info->pTable);
H
Haojun Liao 已提交
6570 6571
      taosHashPut(pQInfo->tableqinfoGroupInfo.map, &id->tid, sizeof(id->tid), &item, POINTER_BYTES);
      index += 1;
H
hjxilinx 已提交
6572 6573
    }
  }
6574

6575
  colIdCheck(pQuery);
6576

6577
  qDebug("qmsg:%p QInfo:%p created", pQueryMsg, pQInfo);
6578 6579
  return pQInfo;

B
Bomin Zhang 已提交
6580
_cleanup_qinfo:
H
Haojun Liao 已提交
6581
  tsdbDestroyTableGroup(pTableGroupInfo);
B
Bomin Zhang 已提交
6582 6583

_cleanup_query:
6584 6585 6586 6587
  if (pGroupbyExpr != NULL) {
    taosArrayDestroy(pGroupbyExpr->columnInfo);
    free(pGroupbyExpr);
  }
H
Haojun Liao 已提交
6588

S
TD-1848  
Shengliang Guan 已提交
6589
  tfree(pTagCols);
B
Bomin Zhang 已提交
6590 6591 6592 6593 6594 6595
  for (int32_t i = 0; i < numOfOutput; ++i) {
    SExprInfo* pExprInfo = &pExprs[i];
    if (pExprInfo->pExpr != NULL) {
      tExprTreeDestroy(&pExprInfo->pExpr, NULL);
    }
  }
H
Haojun Liao 已提交
6596

S
TD-1848  
Shengliang Guan 已提交
6597
  tfree(pExprs);
B
Bomin Zhang 已提交
6598

6599
_cleanup:
dengyihao's avatar
dengyihao 已提交
6600
  freeQInfo(pQInfo);
6601 6602 6603
  return NULL;
}

H
hjxilinx 已提交
6604
static bool isValidQInfo(void *param) {
H
hjxilinx 已提交
6605 6606 6607 6608
  SQInfo *pQInfo = (SQInfo *)param;
  if (pQInfo == NULL) {
    return false;
  }
6609

H
hjxilinx 已提交
6610 6611 6612 6613
  /*
   * pQInfo->signature may be changed by another thread, so we assign value of signature
   * into local variable, then compare by using local variable
   */
6614
  uint64_t sig = (uint64_t)pQInfo->signature;
H
hjxilinx 已提交
6615 6616 6617
  return (sig == (uint64_t)pQInfo);
}

6618
static int32_t initQInfo(SQueryTableMsg *pQueryMsg, void *tsdb, int32_t vgId, SQInfo *pQInfo, bool isSTable) {
H
hjxilinx 已提交
6619
  int32_t code = TSDB_CODE_SUCCESS;
6620
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
6621

H
hjxilinx 已提交
6622
  STSBuf *pTSBuf = NULL;
6623
  if (pQueryMsg->tsLen > 0) { // open new file to save the result
H
Haojun Liao 已提交
6624
    char *tsBlock = (char *) pQueryMsg + pQueryMsg->tsOffset;
H
Haojun Liao 已提交
6625
    pTSBuf = tsBufCreateFromCompBlocks(tsBlock, pQueryMsg->tsNumOfBlocks, pQueryMsg->tsLen, pQueryMsg->tsOrder, vgId);
6626

H
hjxilinx 已提交
6627
    tsBufResetPos(pTSBuf);
dengyihao's avatar
dengyihao 已提交
6628
    bool ret = tsBufNextPos(pTSBuf);
6629

dengyihao's avatar
dengyihao 已提交
6630
    UNUSED(ret);
H
hjxilinx 已提交
6631
  }
Y
TD-1665  
yihaoDeng 已提交
6632 6633
  
  pQuery->precision = tsdbGetCfg(tsdb)->precision;
6634

6635 6636
  if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey > pQuery->window.ekey)) ||
      (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey > pQuery->window.skey))) {
6637
    qDebug("QInfo:%p no result in time range %" PRId64 "-%" PRId64 ", order %d", pQInfo, pQuery->window.skey,
6638
           pQuery->window.ekey, pQuery->order.order);
6639
    setQueryStatus(pQuery, QUERY_COMPLETED);
B
Bomin Zhang 已提交
6640
    pQInfo->tableqinfoGroupInfo.numOfTables = 0;
6641 6642
    return TSDB_CODE_SUCCESS;
  }
6643

6644
  if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
6645
    qDebug("QInfo:%p no table qualified for tag filter, abort query", pQInfo);
6646 6647 6648
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return TSDB_CODE_SUCCESS;
  }
H
hjxilinx 已提交
6649 6650

  // filter the qualified
6651
  if ((code = doInitQInfo(pQInfo, pTSBuf, tsdb, vgId, isSTable)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6652 6653
    goto _error;
  }
6654

H
hjxilinx 已提交
6655 6656 6657 6658
  return code;

_error:
  // table query ref will be decrease during error handling
6659
  freeQInfo(pQInfo);
H
hjxilinx 已提交
6660 6661 6662
  return code;
}

B
Bomin Zhang 已提交
6663
static void freeColumnFilterInfo(SColumnFilterInfo* pFilter, int32_t numOfFilters) {
H
Haojun Liao 已提交
6664
    if (pFilter == NULL || numOfFilters == 0) {
B
Bomin Zhang 已提交
6665 6666
      return;
    }
H
Haojun Liao 已提交
6667

B
Bomin Zhang 已提交
6668 6669 6670 6671 6672
    for (int32_t i = 0; i < numOfFilters; i++) {
      if (pFilter[i].filterstr) {
        free((void*)(pFilter[i].pz));
      }
    }
H
Haojun Liao 已提交
6673

B
Bomin Zhang 已提交
6674 6675 6676
    free(pFilter);
}

H
Haojun Liao 已提交
6677 6678
static void doDestroyTableQueryInfo(STableGroupInfo* pTableqinfoGroupInfo) {
  if (pTableqinfoGroupInfo->pGroupList != NULL) {
H
Haojun Liao 已提交
6679
    int32_t numOfGroups = (int32_t) taosArrayGetSize(pTableqinfoGroupInfo->pGroupList);
H
Haojun Liao 已提交
6680 6681 6682 6683 6684 6685 6686 6687 6688 6689 6690 6691 6692 6693 6694 6695 6696 6697 6698 6699 6700
    for (int32_t i = 0; i < numOfGroups; ++i) {
      SArray *p = taosArrayGetP(pTableqinfoGroupInfo->pGroupList, i);

      size_t num = taosArrayGetSize(p);
      for(int32_t j = 0; j < num; ++j) {
        STableQueryInfo* item = taosArrayGetP(p, j);
        destroyTableQueryInfoImpl(item);
      }

      taosArrayDestroy(p);
    }
  }

  taosArrayDestroy(pTableqinfoGroupInfo->pGroupList);
  taosHashCleanup(pTableqinfoGroupInfo->map);

  pTableqinfoGroupInfo->pGroupList = NULL;
  pTableqinfoGroupInfo->map = NULL;
  pTableqinfoGroupInfo->numOfTables = 0;
}

H
Haojun Liao 已提交
6701 6702 6703 6704 6705 6706 6707 6708 6709 6710 6711 6712 6713 6714 6715 6716
static void* destroyQueryFuncExpr(SExprInfo* pExprInfo, int32_t numOfExpr) {
  if (pExprInfo == NULL) {
    assert(numOfExpr == 0);
    return NULL;
  }

  for (int32_t i = 0; i < numOfExpr; ++i) {
    if (pExprInfo[i].pExpr != NULL) {
      tExprNodeDestroy(pExprInfo[i].pExpr, NULL);
    }
  }

  tfree(pExprInfo);
  return NULL;
}

H
hjxilinx 已提交
6717 6718 6719 6720
static void freeQInfo(SQInfo *pQInfo) {
  if (!isValidQInfo(pQInfo)) {
    return;
  }
6721

6722
  qDebug("QInfo:%p start to free QInfo", pQInfo);
6723

H
Haojun Liao 已提交
6724
  releaseQueryBuf(pQInfo->tableqinfoGroupInfo.numOfTables);
H
Haojun Liao 已提交
6725

6726
  teardownQueryRuntimeEnv(&pQInfo->runtimeEnv);
6727

H
Haojun Liao 已提交
6728 6729 6730 6731
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  if (pQuery != NULL) {
    if (pQuery->sdata != NULL) {
      for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
S
TD-1848  
Shengliang Guan 已提交
6732
        tfree(pQuery->sdata[col]);
H
Haojun Liao 已提交
6733
      }
S
TD-1848  
Shengliang Guan 已提交
6734
      tfree(pQuery->sdata);
H
hjxilinx 已提交
6735
    }
6736

H
Haojun Liao 已提交
6737
    if (pQuery->fillVal != NULL) {
S
TD-1848  
Shengliang Guan 已提交
6738
      tfree(pQuery->fillVal);
H
Haojun Liao 已提交
6739
    }
6740

H
Haojun Liao 已提交
6741 6742 6743
    for (int32_t i = 0; i < pQuery->numOfFilterCols; ++i) {
      SSingleColumnFilterInfo *pColFilter = &pQuery->pFilterInfo[i];
      if (pColFilter->numOfFilters > 0) {
S
TD-1848  
Shengliang Guan 已提交
6744
        tfree(pColFilter->pFilters);
H
hjxilinx 已提交
6745
      }
H
hjxilinx 已提交
6746
    }
6747

H
Haojun Liao 已提交
6748 6749
    pQuery->pExpr1 = destroyQueryFuncExpr(pQuery->pExpr1, pQuery->numOfOutput);
    pQuery->pExpr2 = destroyQueryFuncExpr(pQuery->pExpr2, pQuery->numOfExpr2);
6750

S
TD-1848  
Shengliang Guan 已提交
6751 6752
    tfree(pQuery->tagColList);
    tfree(pQuery->pFilterInfo);
H
Haojun Liao 已提交
6753 6754 6755 6756 6757 6758

    if (pQuery->colList != NULL) {
      for (int32_t i = 0; i < pQuery->numOfCols; i++) {
        SColumnInfo *column = pQuery->colList + i;
        freeColumnFilterInfo(column->filters, column->numOfFilters);
      }
S
TD-1848  
Shengliang Guan 已提交
6759
      tfree(pQuery->colList);
H
Haojun Liao 已提交
6760 6761
    }

H
Haojun Liao 已提交
6762 6763 6764 6765 6766
    if (pQuery->pGroupbyExpr != NULL) {
      taosArrayDestroy(pQuery->pGroupbyExpr->columnInfo);
      tfree(pQuery->pGroupbyExpr);
    }

S
TD-1848  
Shengliang Guan 已提交
6767
    tfree(pQuery);
H
hjxilinx 已提交
6768
  }
6769

H
Haojun Liao 已提交
6770
  doDestroyTableQueryInfo(&pQInfo->tableqinfoGroupInfo);
H
Haojun Liao 已提交
6771

S
TD-1848  
Shengliang Guan 已提交
6772
  tfree(pQInfo->pBuf);
H
Haojun Liao 已提交
6773
  tsdbDestroyTableGroup(&pQInfo->tableGroupInfo);
weixin_48148422's avatar
weixin_48148422 已提交
6774
  taosArrayDestroy(pQInfo->arrTableIdInfo);
6775

6776
  pQInfo->signature = 0;
6777

6778
  qDebug("QInfo:%p QInfo is freed", pQInfo);
6779

S
TD-1848  
Shengliang Guan 已提交
6780
  tfree(pQInfo);
H
hjxilinx 已提交
6781 6782
}

H
hjxilinx 已提交
6783
static size_t getResultSize(SQInfo *pQInfo, int64_t *numOfRows) {
6784 6785
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
hjxilinx 已提交
6786 6787 6788 6789 6790 6791 6792 6793 6794 6795 6796
  /*
   * get the file size and set the numOfRows to be the file size, since for tsComp query,
   * the returned row size is equalled to 1
   * TODO handle the case that the file is too large to send back one time
   */
  if (isTSCompQuery(pQuery) && (*numOfRows) > 0) {
    struct stat fstat;
    if (stat(pQuery->sdata[0]->data, &fstat) == 0) {
      *numOfRows = fstat.st_size;
      return fstat.st_size;
    } else {
S
slguan 已提交
6797
      qError("QInfo:%p failed to get file info, path:%s, reason:%s", pQInfo, pQuery->sdata[0]->data, strerror(errno));
H
hjxilinx 已提交
6798 6799 6800
      return 0;
    }
  } else {
6801
    return (size_t)(pQuery->rowSize * (*numOfRows));
6802
  }
H
hjxilinx 已提交
6803
}
6804

H
hjxilinx 已提交
6805 6806 6807
static int32_t doDumpQueryResult(SQInfo *pQInfo, char *data) {
  // the remained number of retrieved rows, not the interpolated result
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
6808

H
hjxilinx 已提交
6809 6810 6811
  // load data from file to msg buffer
  if (isTSCompQuery(pQuery)) {
    int32_t fd = open(pQuery->sdata[0]->data, O_RDONLY, 0666);
6812

H
hjxilinx 已提交
6813 6814
    // make sure file exist
    if (FD_VALID(fd)) {
6815 6816 6817
      uint64_t s = lseek(fd, 0, SEEK_END);

      qDebug("QInfo:%p ts comp data return, file:%s, size:%"PRId64, pQInfo, pQuery->sdata[0]->data, s);
H
Haojun Liao 已提交
6818
      if (lseek(fd, 0, SEEK_SET) >= 0) {
H
Haojun Liao 已提交
6819
        size_t sz = read(fd, data, (uint32_t) s);
6820 6821 6822
        if(sz < s) {  // todo handle error
          assert(0);
        }
H
Haojun Liao 已提交
6823
      } else {
6824
        UNUSED(s);
dengyihao's avatar
dengyihao 已提交
6825
      }
H
Haojun Liao 已提交
6826

H
hjxilinx 已提交
6827 6828 6829
      close(fd);
      unlink(pQuery->sdata[0]->data);
    } else {
dengyihao's avatar
dengyihao 已提交
6830
      // todo return the error code to client and handle invalid fd
S
slguan 已提交
6831
      qError("QInfo:%p failed to open tmp file to send ts-comp data to client, path:%s, reason:%s", pQInfo,
H
hjxilinx 已提交
6832
             pQuery->sdata[0]->data, strerror(errno));
dengyihao's avatar
dengyihao 已提交
6833
      if (fd != -1) {
6834
        close(fd);
dengyihao's avatar
dengyihao 已提交
6835
      }
H
hjxilinx 已提交
6836
    }
6837

H
hjxilinx 已提交
6838 6839 6840 6841
    // all data returned, set query over
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
      setQueryStatus(pQuery, QUERY_OVER);
    }
H
hjxilinx 已提交
6842
  } else {
S
TD-1057  
Shengliang Guan 已提交
6843
    doCopyQueryResultToMsg(pQInfo, (int32_t)pQuery->rec.rows, data);
6844
  }
6845

6846
  pQuery->rec.total += pQuery->rec.rows;
6847
  qDebug("QInfo:%p current numOfRes rows:%" PRId64 ", total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total);
6848

6849
  if (pQuery->limit.limit > 0 && pQuery->limit.limit == pQuery->rec.total) {
6850
    qDebug("QInfo:%p results limitation reached, limitation:%"PRId64, pQInfo, pQuery->limit.limit);
6851 6852
    setQueryStatus(pQuery, QUERY_OVER);
  }
6853

H
hjxilinx 已提交
6854
  return TSDB_CODE_SUCCESS;
6855 6856
}

6857 6858 6859 6860 6861 6862 6863
typedef struct SQueryMgmt {
  SCacheObj      *qinfoPool;      // query handle pool
  int32_t         vgId;
  bool            closed;
  pthread_mutex_t lock;
} SQueryMgmt;

6864
int32_t qCreateQueryInfo(void* tsdb, int32_t vgId, SQueryTableMsg* pQueryMsg, qinfo_t* pQInfo) {
6865
  assert(pQueryMsg != NULL && tsdb != NULL);
6866 6867

  int32_t code = TSDB_CODE_SUCCESS;
6868

H
Haojun Liao 已提交
6869 6870
  char            *tagCond      = NULL;
  char            *tbnameCond   = NULL;
6871
  SArray          *pTableIdList = NULL;
H
Haojun Liao 已提交
6872 6873 6874 6875 6876
  SSqlFuncMsg    **pExprMsg     = NULL;
  SSqlFuncMsg    **pSecExprMsg  = NULL;
  SExprInfo       *pExprs       = NULL;
  SExprInfo       *pSecExprs    = NULL;

6877 6878 6879
  SColIndex       *pGroupColIndex = NULL;
  SColumnInfo     *pTagColumnInfo = NULL;
  SSqlGroupbyExpr *pGroupbyExpr   = NULL;
6880

H
Haojun Liao 已提交
6881
  code = convertQueryMsg(pQueryMsg, &pTableIdList, &pExprMsg, &pSecExprMsg, &tagCond, &tbnameCond, &pGroupColIndex, &pTagColumnInfo);
6882
  if (code != TSDB_CODE_SUCCESS) {
B
Bomin Zhang 已提交
6883
    goto _over;
6884 6885
  }

H
hjxilinx 已提交
6886
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
6887
    qError("Invalid number of tables to query, numOfTables:%d", pQueryMsg->numOfTables);
6888
    code = TSDB_CODE_QRY_INVALID_MSG;
H
hjxilinx 已提交
6889
    goto _over;
6890 6891
  }

H
hjxilinx 已提交
6892
  if (pTableIdList == NULL || taosArrayGetSize(pTableIdList) == 0) {
S
slguan 已提交
6893
    qError("qmsg:%p, SQueryTableMsg wrong format", pQueryMsg);
6894
    code = TSDB_CODE_QRY_INVALID_MSG;
H
hjxilinx 已提交
6895
    goto _over;
6896 6897
  }

H
Haojun Liao 已提交
6898
  if ((code = createQueryFuncExprFromMsg(pQueryMsg, pQueryMsg->numOfOutput, &pExprs, pExprMsg, pTagColumnInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6899
    goto _over;
6900 6901
  }

H
Haojun Liao 已提交
6902
  if (pSecExprMsg != NULL) {
H
Haojun Liao 已提交
6903
    if ((code = createQueryFuncExprFromMsg(pQueryMsg, pQueryMsg->secondStageOutput, &pSecExprs, pSecExprMsg, pTagColumnInfo)) != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
6904 6905 6906 6907
      goto _over;
    }
  }

dengyihao's avatar
dengyihao 已提交
6908
  pGroupbyExpr = createGroupbyExprFromMsg(pQueryMsg, pGroupColIndex, &code);
H
hjxilinx 已提交
6909
  if ((pGroupbyExpr == NULL && pQueryMsg->numOfGroupCols != 0) || code != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6910
    goto _over;
6911
  }
6912

H
hjxilinx 已提交
6913
  bool isSTableQuery = false;
6914
  STableGroupInfo tableGroupInfo = {0};
6915 6916
  int64_t st = taosGetTimestampUs();

H
Haojun Liao 已提交
6917
  if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_TABLE_QUERY)) {
weixin_48148422's avatar
weixin_48148422 已提交
6918
    STableIdInfo *id = taosArrayGet(pTableIdList, 0);
H
Haojun Liao 已提交
6919

6920
    qDebug("qmsg:%p query normal table, uid:%"PRId64", tid:%d", pQueryMsg, id->uid, id->tid);
H
Haojun Liao 已提交
6921
    if ((code = tsdbGetOneTableGroup(tsdb, id->uid, pQueryMsg->window.skey, &tableGroupInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6922
      goto _over;
6923
    }
H
Haojun Liao 已提交
6924
  } else if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY|TSDB_QUERY_TYPE_STABLE_QUERY)) {
6925
    isSTableQuery = true;
H
Haojun Liao 已提交
6926 6927 6928

    // also note there's possibility that only one table in the super table
    if (!TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY)) {
weixin_48148422's avatar
weixin_48148422 已提交
6929 6930 6931 6932 6933 6934 6935
      STableIdInfo *id = taosArrayGet(pTableIdList, 0);

      // group by normal column, do not pass the group by condition to tsdb to group table into different group
      int32_t numOfGroupByCols = pQueryMsg->numOfGroupCols;
      if (pQueryMsg->numOfGroupCols == 1 && !TSDB_COL_IS_TAG(pGroupColIndex->flag)) {
        numOfGroupByCols = 0;
      }
6936 6937

      qDebug("qmsg:%p query stable, uid:%"PRId64", tid:%d", pQueryMsg, id->uid, id->tid);
H
Haojun Liao 已提交
6938 6939 6940
      code = tsdbQuerySTableByTagCond(tsdb, id->uid, pQueryMsg->window.skey, tagCond, pQueryMsg->tagCondLen,
          pQueryMsg->tagNameRelType, tbnameCond, &tableGroupInfo, pGroupColIndex, numOfGroupByCols);

6941
      if (code != TSDB_CODE_SUCCESS) {
6942
        qError("qmsg:%p failed to query stable, reason: %s", pQueryMsg, tstrerror(code));
6943 6944
        goto _over;
      }
weixin_48148422's avatar
weixin_48148422 已提交
6945
    } else {
6946 6947 6948 6949
      code = tsdbGetTableGroupFromIdList(tsdb, pTableIdList, &tableGroupInfo);
      if (code != TSDB_CODE_SUCCESS) {
        goto _over;
      }
H
Haojun Liao 已提交
6950

S
TD-1057  
Shengliang Guan 已提交
6951
      qDebug("qmsg:%p query on %" PRIzu " tables in one group from client", pQueryMsg, tableGroupInfo.numOfTables);
6952
    }
6953 6954

    int64_t el = taosGetTimestampUs() - st;
S
TD-1057  
Shengliang Guan 已提交
6955
    qDebug("qmsg:%p tag filter completed, numOfTables:%" PRIzu ", elapsed time:%"PRId64"us", pQueryMsg, tableGroupInfo.numOfTables, el);
H
hjxilinx 已提交
6956
  } else {
6957
    assert(0);
6958
  }
6959

H
Haojun Liao 已提交
6960 6961 6962 6963 6964
  code = checkForQueryBuf(tableGroupInfo.numOfTables);
  if (code != TSDB_CODE_SUCCESS) {  // not enough query buffer, abort
    goto _over;
  }

H
Haojun Liao 已提交
6965
  (*pQInfo) = createQInfoImpl(pQueryMsg, pGroupbyExpr, pExprs, pSecExprs, &tableGroupInfo, pTagColumnInfo, isSTableQuery);
H
Haojun Liao 已提交
6966

dengyihao's avatar
dengyihao 已提交
6967
  pExprs = NULL;
H
Haojun Liao 已提交
6968
  pSecExprs = NULL;
dengyihao's avatar
dengyihao 已提交
6969 6970
  pGroupbyExpr = NULL;
  pTagColumnInfo = NULL;
6971

6972
  if ((*pQInfo) == NULL) {
6973
    code = TSDB_CODE_QRY_OUT_OF_MEMORY;
H
hjxilinx 已提交
6974
    goto _over;
6975
  }
6976

6977
  code = initQInfo(pQueryMsg, tsdb, vgId, *pQInfo, isSTableQuery);
6978

H
hjxilinx 已提交
6979
_over:
dengyihao's avatar
dengyihao 已提交
6980 6981 6982
  free(tagCond);
  free(tbnameCond);
  free(pGroupColIndex);
H
Haojun Liao 已提交
6983

dengyihao's avatar
dengyihao 已提交
6984 6985
  if (pGroupbyExpr != NULL) {
    taosArrayDestroy(pGroupbyExpr->columnInfo);
dengyihao's avatar
dengyihao 已提交
6986
    free(pGroupbyExpr);
6987
  }
H
Haojun Liao 已提交
6988

dengyihao's avatar
dengyihao 已提交
6989 6990
  free(pTagColumnInfo);
  free(pExprs);
H
Haojun Liao 已提交
6991 6992
  free(pSecExprs);

dengyihao's avatar
dengyihao 已提交
6993
  free(pExprMsg);
H
Haojun Liao 已提交
6994 6995
  free(pSecExprMsg);

H
hjxilinx 已提交
6996
  taosArrayDestroy(pTableIdList);
6997

B
Bomin Zhang 已提交
6998 6999 7000 7001 7002
  for (int32_t i = 0; i < pQueryMsg->numOfCols; i++) {
    SColumnInfo* column = pQueryMsg->colList + i;
    freeColumnFilterInfo(column->filters, column->numOfFilters);
  }

H
Haojun Liao 已提交
7003
  //pQInfo already freed in initQInfo, but *pQInfo may not pointer to null;
7004 7005 7006 7007
  if (code != TSDB_CODE_SUCCESS) {
    *pQInfo = NULL;
  }

7008
  // if failed to add ref for all tables in this query, abort current query
7009
  return code;
H
hjxilinx 已提交
7010 7011
}

H
Haojun Liao 已提交
7012
void qDestroyQueryInfo(qinfo_t qHandle) {
H
Haojun Liao 已提交
7013 7014 7015 7016 7017
  SQInfo* pQInfo = (SQInfo*) qHandle;
  if (!isValidQInfo(pQInfo)) {
    return;
  }

H
Haojun Liao 已提交
7018 7019 7020
  qDebug("QInfo:%p query completed", pQInfo);
  queryCostStatis(pQInfo);   // print the query cost summary
  freeQInfo(pQInfo);
H
Haojun Liao 已提交
7021 7022
}

7023 7024 7025 7026 7027 7028 7029 7030
static bool doBuildResCheck(SQInfo* pQInfo) {
  bool buildRes = false;

  pthread_mutex_lock(&pQInfo->lock);

  pQInfo->dataReady = QUERY_RESULT_READY;
  buildRes = (pQInfo->rspContext != NULL);

H
Haojun Liao 已提交
7031 7032
  // clear qhandle owner, it must be in the secure area. other thread may run ahead before current, after it is
  // put into task to be executed.
7033 7034
  assert(pQInfo->owner == taosGetPthreadId());
  pQInfo->owner = 0;
H
Haojun Liao 已提交
7035

H
Haojun Liao 已提交
7036
  pthread_mutex_unlock(&pQInfo->lock);
H
Haojun Liao 已提交
7037 7038

  tsem_post(&pQInfo->ready);
7039 7040 7041
  return buildRes;
}

7042
bool qTableQuery(qinfo_t qinfo) {
7043
  SQInfo *pQInfo = (SQInfo *)qinfo;
H
Haojun Liao 已提交
7044
  assert(pQInfo && pQInfo->signature == pQInfo);
7045
  int64_t threadId = taosGetPthreadId();
7046

7047 7048 7049 7050
  int64_t curOwner = 0;
  if ((curOwner = atomic_val_compare_exchange_64(&pQInfo->owner, 0, threadId)) != 0) {
    qError("QInfo:%p qhandle is now executed by thread:%p", pQInfo, (void*) curOwner);
    pQInfo->code = TSDB_CODE_QRY_IN_EXEC;
7051
    return false;
H
hjxilinx 已提交
7052
  }
7053

H
Haojun Liao 已提交
7054
  if (IS_QUERY_KILLED(pQInfo)) {
7055
    qDebug("QInfo:%p it is already killed, abort", pQInfo);
7056
    return doBuildResCheck(pQInfo);
H
hjxilinx 已提交
7057
  }
7058

7059 7060
  if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
    qDebug("QInfo:%p no table exists for query, abort", pQInfo);
7061 7062
    setQueryStatus(pQInfo->runtimeEnv.pQuery, QUERY_COMPLETED);
    return doBuildResCheck(pQInfo);
7063 7064 7065
  }

  // error occurs, record the error code and return to client
H
Haojun Liao 已提交
7066
  int32_t ret = setjmp(pQInfo->runtimeEnv.env);
7067 7068
  if (ret != TSDB_CODE_SUCCESS) {
    pQInfo->code = ret;
H
Haojun Liao 已提交
7069
    qDebug("QInfo:%p query abort due to error/cancel occurs, code:%s", pQInfo, tstrerror(pQInfo->code));
7070
    return doBuildResCheck(pQInfo);
7071 7072
  }

7073
  qDebug("QInfo:%p query task is launched", pQInfo);
7074

7075
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
7076
  if (onlyQueryTags(pQInfo->runtimeEnv.pQuery)) {
H
Haojun Liao 已提交
7077
    assert(pQInfo->runtimeEnv.pQueryHandle == NULL);
7078
    buildTagQueryResult(pQInfo);
H
hjxilinx 已提交
7079
  } else if (pQInfo->runtimeEnv.stableQuery) {
7080
    stableQueryImpl(pQInfo);
H
hjxilinx 已提交
7081
  } else {
7082
    tableQueryImpl(pQInfo);
H
hjxilinx 已提交
7083
  }
7084

7085
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
7086
  if (IS_QUERY_KILLED(pQInfo)) {
7087 7088
    qDebug("QInfo:%p query is killed", pQInfo);
  } else if (pQuery->rec.rows == 0) {
S
TD-1057  
Shengliang Guan 已提交
7089
    qDebug("QInfo:%p over, %" PRIzu " tables queried, %"PRId64" rows are returned", pQInfo, pQInfo->tableqinfoGroupInfo.numOfTables, pQuery->rec.total);
7090 7091 7092 7093 7094
  } else {
    qDebug("QInfo:%p query paused, %" PRId64 " rows returned, numOfTotal:%" PRId64 " rows",
           pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
  }

7095
  return doBuildResCheck(pQInfo);
H
hjxilinx 已提交
7096 7097
}

7098
int32_t qRetrieveQueryResultInfo(qinfo_t qinfo, bool* buildRes, void* pRspContext) {
7099 7100
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
7101
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
H
Haojun Liao 已提交
7102
    qError("QInfo:%p invalid qhandle", pQInfo);
7103
    return TSDB_CODE_QRY_INVALID_QHANDLE;
H
hjxilinx 已提交
7104
  }
7105

7106
  *buildRes = false;
H
Haojun Liao 已提交
7107
  if (IS_QUERY_KILLED(pQInfo)) {
B
Bomin Zhang 已提交
7108
    qDebug("QInfo:%p query is killed, code:0x%08x", pQInfo, pQInfo->code);
H
hjxilinx 已提交
7109
    return pQInfo->code;
H
hjxilinx 已提交
7110
  }
7111

7112
  int32_t code = TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
7113

H
Haojun Liao 已提交
7114
#if _NON_BLOCKING_RETRIEVE
H
Haojun Liao 已提交
7115 7116
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

7117
  pthread_mutex_lock(&pQInfo->lock);
H
Haojun Liao 已提交
7118 7119
  assert(pQInfo->rspContext == NULL);

7120 7121 7122 7123 7124
  if (pQInfo->dataReady == QUERY_RESULT_READY) {
    *buildRes = true;
    qDebug("QInfo:%p retrieve result info, rowsize:%d, rows:%"PRId64", code:%d", pQInfo, pQuery->rowSize, pQuery->rec.rows,
           pQInfo->code);
  } else {
H
Haojun Liao 已提交
7125
    *buildRes = false;
7126
    qDebug("QInfo:%p retrieve req set query return result after paused", pQInfo);
7127
    pQInfo->rspContext = pRspContext;
H
Haojun Liao 已提交
7128
    assert(pQInfo->rspContext != NULL);
7129
  }
7130

7131
  code = pQInfo->code;
7132
  pthread_mutex_unlock(&pQInfo->lock);
H
Haojun Liao 已提交
7133 7134 7135 7136 7137 7138
#else
  tsem_wait(&pQInfo->ready);
  *buildRes = true;
  code = pQInfo->code;
#endif

7139
  return code;
H
hjxilinx 已提交
7140
}
7141

7142
int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *contLen, bool* continueExec) {
7143 7144
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
7145
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
7146
    return TSDB_CODE_QRY_INVALID_QHANDLE;
7147
  }
7148

7149
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
7150 7151
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  size_t  size = getResultSize(pQInfo, &pQuery->rec.rows);
7152

weixin_48148422's avatar
weixin_48148422 已提交
7153 7154
  size += sizeof(int32_t);
  size += sizeof(STableIdInfo) * taosArrayGetSize(pQInfo->arrTableIdInfo);
7155

S
TD-1057  
Shengliang Guan 已提交
7156
  *contLen = (int32_t)(size + sizeof(SRetrieveTableRsp));
7157

B
Bomin Zhang 已提交
7158 7159
  // todo proper handle failed to allocate memory,
  // current solution only avoid crash, but cannot return error code to client
7160
  *pRsp = (SRetrieveTableRsp *)rpcMallocCont(*contLen);
B
Bomin Zhang 已提交
7161 7162 7163
  if (*pRsp == NULL) {
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
  }
7164

S
TD-1057  
Shengliang Guan 已提交
7165
  (*pRsp)->numOfRows = htonl((int32_t)pQuery->rec.rows);
7166

H
Haojun Liao 已提交
7167
  if (pQInfo->code == TSDB_CODE_SUCCESS) {
7168
    (*pRsp)->offset   = htobe64(pQuery->limit.offset);
7169
    (*pRsp)->useconds = htobe64(pRuntimeEnv->summary.elapsedTime);
7170
  } else {
7171 7172
    (*pRsp)->offset   = 0;
    (*pRsp)->useconds = htobe64(pRuntimeEnv->summary.elapsedTime);
7173
  }
7174

7175
  (*pRsp)->precision = htons(pQuery->precision);
H
Haojun Liao 已提交
7176 7177
  if (pQuery->rec.rows > 0 && pQInfo->code == TSDB_CODE_SUCCESS) {
    doDumpQueryResult(pQInfo, (*pRsp)->data);
7178
  } else {
H
hjxilinx 已提交
7179
    setQueryStatus(pQuery, QUERY_OVER);
7180
  }
7181

7182
  pQInfo->rspContext = NULL;
7183
  pQInfo->dataReady  = QUERY_RESULT_NOT_READY;
7184

H
Haojun Liao 已提交
7185
  if (IS_QUERY_KILLED(pQInfo) || Q_STATUS_EQUAL(pQuery->status, QUERY_OVER)) {
H
Haojun Liao 已提交
7186
    // here current thread hold the refcount, so it is safe to free tsdbQueryHandle.
7187
    *continueExec = false;
7188
    (*pRsp)->completed = 1;  // notify no more result to client
7189
  } else {
7190
    *continueExec = true;
7191
    qDebug("QInfo:%p has more results waits for client retrieve", pQInfo);
7192 7193
  }

H
Haojun Liao 已提交
7194
  return pQInfo->code;
7195
}
H
hjxilinx 已提交
7196

7197 7198 7199 7200 7201 7202 7203 7204 7205 7206 7207
int32_t qQueryCompleted(qinfo_t qinfo) {
  SQInfo *pQInfo = (SQInfo *)qinfo;

  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
    return TSDB_CODE_QRY_INVALID_QHANDLE;
  }

  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
  return IS_QUERY_KILLED(pQInfo) || Q_STATUS_EQUAL(pQuery->status, QUERY_OVER);
}

H
Haojun Liao 已提交
7208
int32_t qKillQuery(qinfo_t qinfo) {
H
Haojun Liao 已提交
7209 7210 7211 7212 7213 7214 7215
  SQInfo *pQInfo = (SQInfo *)qinfo;

  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
    return TSDB_CODE_QRY_INVALID_QHANDLE;
  }

  setQueryKilled(pQInfo);
7216 7217 7218

  // Wait for the query executing thread being stopped/
  // Once the query is stopped, the owner of qHandle will be cleared immediately.
7219
  while (pQInfo->owner != 0) {
7220 7221 7222
    taosMsleep(100);
  }

H
Haojun Liao 已提交
7223 7224 7225
  return TSDB_CODE_SUCCESS;
}

7226 7227 7228 7229 7230 7231 7232 7233 7234 7235 7236 7237 7238 7239 7240 7241
static void doSetTagValueToResultBuf(char* output, const char* val, int16_t type, int16_t bytes) {
  if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
    if (val == NULL) {
      setVardataNull(output, type);
    } else {
      memcpy(output, val, varDataTLen(val));
    }
  } else {
    if (val == NULL) {
      setNull(output, type, bytes);
    } else {  // todo here stop will cause client crash
      memcpy(output, val, bytes);
    }
  }
}

H
hjxilinx 已提交
7242 7243 7244
static void buildTagQueryResult(SQInfo* pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
7245

H
Haojun Liao 已提交
7246
  size_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
H
Haojun Liao 已提交
7247
  assert(numOfGroup == 0 || numOfGroup == 1);
7248

H
Haojun Liao 已提交
7249
  if (numOfGroup == 0) {
7250 7251
    return;
  }
7252

H
Haojun Liao 已提交
7253
  SArray* pa = GET_TABLEGROUP(pQInfo, 0);
7254

H
Haojun Liao 已提交
7255
  size_t num = taosArrayGetSize(pa);
7256
  assert(num == pQInfo->tableqinfoGroupInfo.numOfTables);
7257

H
Haojun Liao 已提交
7258
  int32_t count = 0;
H
Haojun Liao 已提交
7259
  int32_t functionId = pQuery->pExpr1[0].base.functionId;
7260 7261
  if (functionId == TSDB_FUNC_TID_TAG) { // return the tags & table Id
    assert(pQuery->numOfOutput == 1);
7262

H
Haojun Liao 已提交
7263
    SExprInfo* pExprInfo = &pQuery->pExpr1[0];
7264
    int32_t rsize = pExprInfo->bytes;
H
Haojun Liao 已提交
7265
    count = 0;
7266

H
Haojun Liao 已提交
7267 7268 7269 7270 7271 7272 7273 7274 7275 7276 7277
    int16_t bytes = pExprInfo->bytes;
    int16_t type = pExprInfo->type;

    for(int32_t i = 0; i < pQuery->numOfTags; ++i) {
      if (pQuery->tagColList[i].colId == pExprInfo->base.colInfo.colId) {
        bytes = pQuery->tagColList[i].bytes;
        type = pQuery->tagColList[i].type;
        break;
      }
    }

H
Haojun Liao 已提交
7278 7279
    while(pQInfo->tableIndex < num && count < pQuery->rec.capacity) {
      int32_t i = pQInfo->tableIndex++;
7280
      STableQueryInfo *item = taosArrayGetP(pa, i);
7281

7282
      char *output = pQuery->sdata[0]->data + count * rsize;
7283
      varDataSetLen(output, rsize - VARSTR_HEADER_SIZE);
7284

7285
      output = varDataVal(output);
H
Haojun Liao 已提交
7286
      STableId* id = TSDB_TABLEID(item->pTable);
7287

7288 7289 7290
      *(int16_t *)output = 0;
      output += sizeof(int16_t);

H
Haojun Liao 已提交
7291 7292
      *(int64_t *)output = id->uid;  // memory align problem, todo serialize
      output += sizeof(id->uid);
7293

H
Haojun Liao 已提交
7294 7295
      *(int32_t *)output = id->tid;
      output += sizeof(id->tid);
7296

7297
      *(int32_t *)output = pQInfo->vgId;
7298
      output += sizeof(pQInfo->vgId);
7299

7300
      if (pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
7301
        char* data = tsdbGetTableName(item->pTable);
7302
        memcpy(output, data, varDataTLen(data));
H
[td-90]  
Haojun Liao 已提交
7303
      } else {
7304 7305
        char* data = tsdbGetTableTagVal(item->pTable, pExprInfo->base.colInfo.colId, type, bytes);
        doSetTagValueToResultBuf(output, data, type, bytes);
H
[td-90]  
Haojun Liao 已提交
7306
      }
7307

H
Haojun Liao 已提交
7308
      count += 1;
7309
    }
7310

7311
    qDebug("QInfo:%p create (tableId, tag) info completed, rows:%d", pQInfo, count);
7312

H
Haojun Liao 已提交
7313 7314 7315 7316
  } else if (functionId == TSDB_FUNC_COUNT) {// handle the "count(tbname)" query
    *(int64_t*) pQuery->sdata[0]->data = num;

    count = 1;
H
Haojun Liao 已提交
7317
    SET_STABLE_QUERY_OVER(pQInfo);
7318
    qDebug("QInfo:%p create count(tbname) query, res:%d rows:1", pQInfo, count);
7319
  } else {  // return only the tags|table name etc.
H
Haojun Liao 已提交
7320
    count = 0;
H
Haojun Liao 已提交
7321
    SSchema tbnameSchema = tGetTableNameColumnSchema();
7322

S
TD-1057  
Shengliang Guan 已提交
7323
    int32_t maxNumOfTables = (int32_t)pQuery->rec.capacity;
7324
    if (pQuery->limit.limit >= 0 && pQuery->limit.limit < pQuery->rec.capacity) {
S
TD-1057  
Shengliang Guan 已提交
7325
      maxNumOfTables = (int32_t)pQuery->limit.limit;
7326 7327
    }

7328
    while(pQInfo->tableIndex < num && count < maxNumOfTables) {
H
Haojun Liao 已提交
7329
      int32_t i = pQInfo->tableIndex++;
7330

7331 7332 7333 7334 7335 7336
      // discard current result due to offset
      if (pQuery->limit.offset > 0) {
        pQuery->limit.offset -= 1;
        continue;
      }

H
Haojun Liao 已提交
7337
      SExprInfo* pExprInfo = pQuery->pExpr1;
7338
      STableQueryInfo* item = taosArrayGetP(pa, i);
7339

7340 7341
      char *data = NULL, *dst = NULL;
      int16_t type = 0, bytes = 0;
7342
      for(int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
7343
        // not assign value in case of user defined constant output column
Y
TD-1230  
yihaoDeng 已提交
7344
        if (TSDB_COL_IS_UD_COL(pExprInfo[j].base.colInfo.flag)) {
H
Haojun Liao 已提交
7345 7346
          continue;
        }
7347

7348
        if (pExprInfo[j].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
7349 7350 7351 7352 7353 7354 7355 7356
          bytes = tbnameSchema.bytes;
          type = tbnameSchema.type;

          data = tsdbGetTableName(item->pTable);
          dst = pQuery->sdata[j]->data + count * tbnameSchema.bytes;
        } else {
          type = pExprInfo[j].type;
          bytes = pExprInfo[j].bytes;
7357

7358 7359
          data = tsdbGetTableTagVal(item->pTable, pExprInfo[j].base.colInfo.colId, type, bytes);
          dst = pQuery->sdata[j]->data + count * pExprInfo[j].bytes;
7360

7361
        }
7362 7363

        doSetTagValueToResultBuf(dst, data, type, bytes);
H
hjxilinx 已提交
7364
      }
H
Haojun Liao 已提交
7365
      count += 1;
H
hjxilinx 已提交
7366
    }
7367

7368
    qDebug("QInfo:%p create tag values results completed, rows:%d", pQInfo, count);
H
hjxilinx 已提交
7369
  }
7370

H
Haojun Liao 已提交
7371
  pQuery->rec.rows = count;
H
hjxilinx 已提交
7372
  setQueryStatus(pQuery, QUERY_COMPLETED);
H
hjxilinx 已提交
7373 7374
}

H
Haojun Liao 已提交
7375
static int64_t getQuerySupportBufSize(size_t numOfTables) {
H
Haojun Liao 已提交
7376 7377 7378 7379
  size_t s1 = sizeof(STableQueryInfo);
  size_t s2 = sizeof(SHashNode);

//  size_t s3 = sizeof(STableCheckInfo);  buffer consumption in tsdb
H
Haojun Liao 已提交
7380
  return (int64_t)((s1 + s2) * 1.5 * numOfTables);
H
Haojun Liao 已提交
7381 7382
}

H
Haojun Liao 已提交
7383
int32_t checkForQueryBuf(size_t numOfTables) {
H
Haojun Liao 已提交
7384 7385
  int64_t t = getQuerySupportBufSize(numOfTables);
  if (tsQueryBufferSize < 0) {
H
Haojun Liao 已提交
7386
    return TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
7387 7388 7389 7390 7391 7392 7393 7394 7395 7396 7397 7398 7399 7400 7401 7402 7403 7404 7405
  } else if (tsQueryBufferSize > 0) {

    while(1) {
      int64_t s = tsQueryBufferSize;
      int64_t remain = s - t;
      if (remain >= 0) {
        if (atomic_val_compare_exchange_64(&tsQueryBufferSize, s, remain) == s) {
          return TSDB_CODE_SUCCESS;
        }
      } else {
        return TSDB_CODE_QRY_NOT_ENOUGH_BUFFER;
      }
    }
  }

  // disable query processing if the value of tsQueryBufferSize is zero.
  return TSDB_CODE_QRY_NOT_ENOUGH_BUFFER;
}

H
Haojun Liao 已提交
7406
void releaseQueryBuf(size_t numOfTables) {
H
Haojun Liao 已提交
7407 7408 7409 7410 7411 7412 7413 7414 7415 7416
  if (tsQueryBufferSize <= 0) {
    return;
  }

  int64_t t = getQuerySupportBufSize(numOfTables);

  // restore value is not enough buffer available
  atomic_add_fetch_64(&tsQueryBufferSize, t);
}

7417 7418 7419 7420 7421 7422 7423
void* qGetResultRetrieveMsg(qinfo_t qinfo) {
  SQInfo* pQInfo = (SQInfo*) qinfo;
  assert(pQInfo != NULL);

  return pQInfo->rspContext;
}

7424 7425 7426 7427 7428 7429 7430
void freeqinfoFn(void *qhandle) {
  void** handle = qhandle;
  if (handle == NULL || *handle == NULL) {
    return;
  }

  qKillQuery(*handle);
H
Haojun Liao 已提交
7431
  qDestroyQueryInfo(*handle);
7432 7433 7434
}

void* qOpenQueryMgmt(int32_t vgId) {
H
Haojun Liao 已提交
7435
  const int32_t REFRESH_HANDLE_INTERVAL = 30; // every 30 seconds, refresh handle pool
7436 7437 7438 7439

  char cacheName[128] = {0};
  sprintf(cacheName, "qhandle_%d", vgId);

7440
  SQueryMgmt* pQueryMgmt = calloc(1, sizeof(SQueryMgmt));
H
Haojun Liao 已提交
7441 7442 7443 7444
  if (pQueryMgmt == NULL) {
    terrno = TSDB_CODE_QRY_OUT_OF_MEMORY;
    return NULL;
  }
7445

S
TD-1530  
Shengliang Guan 已提交
7446
  pQueryMgmt->qinfoPool = taosCacheInit(TSDB_CACHE_PTR_KEY, REFRESH_HANDLE_INTERVAL, true, freeqinfoFn, cacheName);
7447 7448 7449 7450
  pQueryMgmt->closed    = false;
  pQueryMgmt->vgId      = vgId;

  pthread_mutex_init(&pQueryMgmt->lock, NULL);
7451 7452

  qDebug("vgId:%d, open querymgmt success", vgId);
7453
  return pQueryMgmt;
7454 7455
}

H
Haojun Liao 已提交
7456
static void queryMgmtKillQueryFn(void* handle) {
H
Hui Li 已提交
7457 7458
  void** fp = (void**)handle;
  qKillQuery(*fp);
H
Haojun Liao 已提交
7459 7460 7461
}

void qQueryMgmtNotifyClosed(void* pQMgmt) {
7462 7463 7464 7465 7466 7467 7468
  if (pQMgmt == NULL) {
    return;
  }

  SQueryMgmt* pQueryMgmt = pQMgmt;
  qDebug("vgId:%d, set querymgmt closed, wait for all queries cancelled", pQueryMgmt->vgId);

H
Haojun Liao 已提交
7469
//  pthread_mutex_lock(&pQueryMgmt->lock);
7470
  pQueryMgmt->closed = true;
H
Haojun Liao 已提交
7471
//  pthread_mutex_unlock(&pQueryMgmt->lock);
7472

H
Haojun Liao 已提交
7473
  taosCacheRefresh(pQueryMgmt->qinfoPool, queryMgmtKillQueryFn);
7474 7475 7476 7477 7478 7479 7480 7481 7482 7483 7484 7485 7486 7487 7488 7489 7490
}

void qCleanupQueryMgmt(void* pQMgmt) {
  if (pQMgmt == NULL) {
    return;
  }

  SQueryMgmt* pQueryMgmt = pQMgmt;
  int32_t vgId = pQueryMgmt->vgId;

  assert(pQueryMgmt->closed);

  SCacheObj* pqinfoPool = pQueryMgmt->qinfoPool;
  pQueryMgmt->qinfoPool = NULL;

  taosCacheCleanup(pqinfoPool);
  pthread_mutex_destroy(&pQueryMgmt->lock);
S
TD-1848  
Shengliang Guan 已提交
7491
  tfree(pQueryMgmt);
7492

S
Shengliang Guan 已提交
7493
  qDebug("vgId:%d, queryMgmt cleanup completed", vgId);
7494 7495
}

7496
void** qRegisterQInfo(void* pMgmt, uint64_t qInfo) {
7497
  if (pMgmt == NULL) {
7498
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
7499 7500 7501
    return NULL;
  }

7502
  const int32_t DEFAULT_QHANDLE_LIFE_SPAN = tsShellActivityTimer * 2 * 1000;
7503

7504 7505
  SQueryMgmt *pQueryMgmt = pMgmt;
  if (pQueryMgmt->qinfoPool == NULL) {
7506
    qError("QInfo:%p failed to add qhandle into qMgmt, since qMgmt is closed", (void *)qInfo);
7507
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
7508 7509 7510
    return NULL;
  }

H
Haojun Liao 已提交
7511
//  pthread_mutex_lock(&pQueryMgmt->lock);
7512
  if (pQueryMgmt->closed) {
H
Haojun Liao 已提交
7513
//    pthread_mutex_unlock(&pQueryMgmt->lock);
7514
    qError("QInfo:%p failed to add qhandle into cache, since qMgmt is colsing", (void *)qInfo);
7515
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
7516 7517
    return NULL;
  } else {
S
TD-1530  
Shengliang Guan 已提交
7518 7519
    TSDB_CACHE_PTR_TYPE handleVal = (TSDB_CACHE_PTR_TYPE) qInfo;
    void** handle = taosCachePut(pQueryMgmt->qinfoPool, &handleVal, sizeof(TSDB_CACHE_PTR_TYPE), &qInfo, sizeof(TSDB_CACHE_PTR_TYPE), DEFAULT_QHANDLE_LIFE_SPAN);
H
Haojun Liao 已提交
7520
//    pthread_mutex_unlock(&pQueryMgmt->lock);
7521 7522 7523 7524 7525

    return handle;
  }
}

S
TD-1530  
Shengliang Guan 已提交
7526
void** qAcquireQInfo(void* pMgmt, uint64_t _key) {
7527 7528
  SQueryMgmt *pQueryMgmt = pMgmt;

B
Bomin Zhang 已提交
7529 7530 7531 7532 7533 7534 7535
  if (pQueryMgmt->closed) {
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
    return NULL;
  }

  if (pQueryMgmt->qinfoPool == NULL) {
    terrno = TSDB_CODE_QRY_INVALID_QHANDLE;
7536 7537 7538
    return NULL;
  }

S
TD-1530  
Shengliang Guan 已提交
7539 7540
  TSDB_CACHE_PTR_TYPE key = (TSDB_CACHE_PTR_TYPE)_key;
  void** handle = taosCacheAcquireByKey(pQueryMgmt->qinfoPool, &key, sizeof(TSDB_CACHE_PTR_TYPE));
7541
  if (handle == NULL || *handle == NULL) {
B
Bomin Zhang 已提交
7542
    terrno = TSDB_CODE_QRY_INVALID_QHANDLE;
7543 7544 7545 7546 7547 7548
    return NULL;
  } else {
    return handle;
  }
}

H
Haojun Liao 已提交
7549
void** qReleaseQInfo(void* pMgmt, void* pQInfo, bool freeHandle) {
7550 7551 7552 7553 7554
  SQueryMgmt *pQueryMgmt = pMgmt;
  if (pQueryMgmt->qinfoPool == NULL) {
    return NULL;
  }

H
Haojun Liao 已提交
7555
  taosCacheRelease(pQueryMgmt->qinfoPool, pQInfo, freeHandle);
7556 7557 7558
  return 0;
}

7559