qExecutor.c 237.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
#include "os.h"
H
Haojun Liao 已提交
16 17
#include "qFill.h"
#include "taosmsg.h"
18 19
#include "tcache.h"
#include "tglobal.h"
20

H
Haojun Liao 已提交
21
#include "exception.h"
22
#include "hash.h"
H
Haojun Liao 已提交
23 24 25 26
#include "qAst.h"
#include "qExecutor.h"
#include "qResultbuf.h"
#include "qUtil.h"
H
hjxilinx 已提交
27
#include "query.h"
S
slguan 已提交
28
#include "queryLog.h"
29
#include "tlosertree.h"
30

H
Haojun Liao 已提交
31
#define MAX_ROWS_PER_RESBUF_PAGE  ((1u<<12) - 1)
32 33 34 35 36

/**
 * check if the primary column is load by default, otherwise, the program will
 * forced to load primary column explicitly.
 */
37
#define Q_STATUS_EQUAL(p, s)  (((p) & (s)) != 0)
38 39
#define QUERY_IS_ASC_QUERY(q) (GET_FORWARD_DIRECTION_FACTOR((q)->order.order) == QUERY_ASC_FORWARD_STEP)

40
#define IS_MASTER_SCAN(runtime)        ((runtime)->scanFlag == MASTER_SCAN)
H
hjxilinx 已提交
41
#define IS_REVERSE_SCAN(runtime)       ((runtime)->scanFlag == REVERSE_SCAN)
42
#define SET_MASTER_SCAN_FLAG(runtime)  ((runtime)->scanFlag = MASTER_SCAN)
H
hjxilinx 已提交
43
#define SET_REVERSE_SCAN_FLAG(runtime) ((runtime)->scanFlag = REVERSE_SCAN)
44

H
Haojun Liao 已提交
45
#define GET_QINFO_ADDR(x) ((SQInfo *)((char *)(x)-offsetof(SQInfo, runtimeEnv)))
46

47
#define GET_COL_DATA_POS(query, index, step) ((query)->pos + (index) * (step))
48
#define SWITCH_ORDER(n) (((n) = ((n) == TSDB_ORDER_ASC) ? TSDB_ORDER_DESC : TSDB_ORDER_ASC))
49

H
Haojun Liao 已提交
50 51
#define SDATA_BLOCK_INITIALIZER (SDataBlockInfo) {{0}, 0}

S
TD-1057  
Shengliang Guan 已提交
52 53 54 55 56
#define TIME_WINDOW_COPY(_dst, _src)  do {\
   _dst.skey = _src.skey;\
   _dst.ekey = _src.ekey;\
} while (0);

57
enum {
H
hjxilinx 已提交
58
  // when query starts to execute, this status will set
59 60
  QUERY_NOT_COMPLETED = 0x1u,

H
hjxilinx 已提交
61 62
  /* result output buffer is full, current query is paused.
   * this status is only exist in group-by clause and diff/add/division/multiply/ query.
63
   */
64 65
  QUERY_RESBUF_FULL = 0x2u,

H
hjxilinx 已提交
66 67 68
  /* query is over
   * 1. this status is used in one row result query process, e.g., count/sum/first/last/ avg...etc.
   * 2. when all data within queried time window, it is also denoted as query_completed
69
   */
70
  QUERY_COMPLETED = 0x4u,
71

H
hjxilinx 已提交
72 73
  /* when the result is not completed return to client, this status will be
   * usually used in case of interval query with interpolation option
74
   */
75
  QUERY_OVER = 0x8u,
76
};
77 78

enum {
79 80
  TS_JOIN_TS_EQUAL       = 0,
  TS_JOIN_TS_NOT_EQUALS  = 1,
81 82 83
  TS_JOIN_TAG_NOT_EQUALS = 2,
};

84
typedef struct {
85 86 87 88 89 90
  int32_t     status;       // query status
  TSKEY       lastKey;      // the lastKey value before query executed
  STimeWindow w;            // whole query time window
  STimeWindow curWindow;    // current query window
  int32_t     windowIndex;  // index of active time window result for interval query
  STSCursor   cur;
91 92
} SQueryStatusInfo;

H
Haojun Liao 已提交
93
#if 0
H
Haojun Liao 已提交
94
static UNUSED_FUNC void *u_malloc (size_t __size) {
H
Haojun Liao 已提交
95
  uint32_t v = rand();
H
Haojun Liao 已提交
96 97

  if (v % 1000 <= 0) {
H
Haojun Liao 已提交
98 99
    return NULL;
  } else {
H
Haojun Liao 已提交
100
    return malloc(__size);
H
Haojun Liao 已提交
101
  }
H
Haojun Liao 已提交
102 103
}

H
Haojun Liao 已提交
104 105
static UNUSED_FUNC void* u_calloc(size_t num, size_t __size) {
  uint32_t v = rand();
H
Haojun Liao 已提交
106
  if (v % 1000 <= 0) {
H
Haojun Liao 已提交
107 108 109 110 111 112
    return NULL;
  } else {
    return calloc(num, __size);
  }
}

H
Haojun Liao 已提交
113 114 115 116 117 118 119 120 121
static UNUSED_FUNC void* u_realloc(void* p, size_t __size) {
  uint32_t v = rand();
  if (v % 5 <= 1) {
    return NULL;
  } else {
    return realloc(p, __size);
  }
}

H
Haojun Liao 已提交
122
#define calloc  u_calloc
H
Haojun Liao 已提交
123
#define malloc  u_malloc
H
Haojun Liao 已提交
124
#define realloc u_realloc
H
Haojun Liao 已提交
125
#endif
H
Haojun Liao 已提交
126

127
#define CLEAR_QUERY_STATUS(q, st)   ((q)->status &= (~(st)))
H
Haojun Liao 已提交
128 129 130
#define GET_NUM_OF_TABLEGROUP(q)    taosArrayGetSize((q)->tableqinfoGroupInfo.pGroupList)
#define GET_TABLEGROUP(q, _index)   ((SArray*) taosArrayGetP((q)->tableqinfoGroupInfo.pGroupList, (_index)))

131
static void setQueryStatus(SQuery *pQuery, int8_t status);
H
Haojun Liao 已提交
132
static void finalizeQueryResult(SQueryRuntimeEnv *pRuntimeEnv);
133

134
#define QUERY_IS_INTERVAL_QUERY(_q) ((_q)->interval.interval > 0)
135

136 137
static void getNextTimeWindow(SQuery* pQuery, STimeWindow* tw) {
  int32_t factor = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
138 139 140
  if (pQuery->interval.intervalUnit != 'n' && pQuery->interval.intervalUnit != 'y') {
    tw->skey += pQuery->interval.sliding * factor;
    tw->ekey = tw->skey + pQuery->interval.interval - 1;
141 142 143
    return;
  }

144
  int64_t key = tw->skey / 1000, interval = pQuery->interval.interval;
145 146 147
  if (pQuery->precision == TSDB_TIME_PRECISION_MICRO) {
    key /= 1000;
  }
148
  if (pQuery->interval.intervalUnit == 'y') {
149 150
    interval *= 12;
  }
151 152 153 154 155

  struct tm tm;
  time_t t = (time_t)key;
  localtime_r(&t, &tm);

S
Shengliang Guan 已提交
156
  int mon = (int)(tm.tm_year * 12 + tm.tm_mon + interval * factor);
157 158 159 160
  tm.tm_year = mon / 12;
  tm.tm_mon = mon % 12;
  tw->skey = mktime(&tm) * 1000L;

S
Shengliang Guan 已提交
161
  mon = (int)(mon + interval);
162 163 164 165 166 167 168 169 170 171 172 173
  tm.tm_year = mon / 12;
  tm.tm_mon = mon % 12;
  tw->ekey = mktime(&tm) * 1000L;

  if (pQuery->precision == TSDB_TIME_PRECISION_MICRO) {
    tw->skey *= 1000L;
    tw->ekey *= 1000L;
  }
  tw->ekey -= 1;
}

#define GET_NEXT_TIMEWINDOW(_q, tw) getNextTimeWindow((_q), (tw))
H
Haojun Liao 已提交
174

175 176
#define SET_STABLE_QUERY_OVER(_q) ((_q)->tableIndex = (int32_t)((_q)->tableqinfoGroupInfo.numOfTables))
#define IS_STASBLE_QUERY_OVER(_q) ((_q)->tableIndex >= (int32_t)((_q)->tableqinfoGroupInfo.numOfTables))
H
Haojun Liao 已提交
177

H
hjxilinx 已提交
178
// todo move to utility
179
static int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *group);
180

H
hjxilinx 已提交
181
static void setWindowResOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult);
H
Haojun Liao 已提交
182
static void setWindowResOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult);
183 184
static void resetMergeResultBuf(SQuery *pQuery, SQLFunctionCtx *pCtx, SResultInfo *pResultInfo);
static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId);
185

186 187 188
static void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
                          SDataStatis *pStatis, void *param, int32_t colIndex);

189
static void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
H
Haojun Liao 已提交
190
static void destroyTableQueryInfo(STableQueryInfo *pTableQueryInfo);
191 192
static void resetCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
static bool hasMainOutput(SQuery *pQuery);
H
hjxilinx 已提交
193
static void buildTagQueryResult(SQInfo *pQInfo);
194

195
static int32_t setAdditionalInfo(SQInfo *pQInfo, void *pTable, STableQueryInfo *pTableQueryInfo);
H
Haojun Liao 已提交
196
static int32_t flushFromResultBuf(SQueryRuntimeEnv* pRuntimeEnv, SGroupResInfo* pGroupResInfo);
197

198
bool doFilterData(SQuery *pQuery, int32_t elemPos) {
199 200
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
201

S
TD-1057  
Shengliang Guan 已提交
202
    char *pElem = (char*)pFilterInfo->pData + pFilterInfo->info.bytes * elemPos;
203

204 205
    bool qualified = false;
    for (int32_t j = 0; j < pFilterInfo->numOfFilters; ++j) {
206
      SColumnFilterElem *pFilterElem = &pFilterInfo->pFilters[j];
207

H
Haojun Liao 已提交
208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224
      bool isnull = isNull(pElem, pFilterInfo->info.type);
      if (isnull) {
        if (pFilterElem->fp == isNull_filter) {
          qualified = true;
          break;
        } else {
          continue;
        }
      } else {
        if (pFilterElem->fp == notNull_filter) {
          qualified = true;
          break;
        } else if (pFilterElem->fp == isNull_filter) {
          continue;
        }
      }

225 226 227 228 229
      if (pFilterElem->fp(pFilterElem, pElem, pElem)) {
        qualified = true;
        break;
      }
    }
230

231 232 233 234
    if (!qualified) {
      return false;
    }
  }
235

236 237 238 239 240 241
  return true;
}

int64_t getNumOfResult(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  bool    hasMainFunction = hasMainOutput(pQuery);
242

243
  int64_t maxOutput = 0;
244
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
245
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
246

247 248 249 250 251 252 253 254
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (hasMainFunction &&
        (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ)) {
      continue;
    }
255

256 257 258 259 260
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
    if (pResInfo != NULL && maxOutput < pResInfo->numOfRes) {
      maxOutput = pResInfo->numOfRes;
    }
  }
261

262
  assert(maxOutput >= 0);
263 264 265
  return maxOutput;
}

266 267 268 269 270
/*
 * the value of number of result needs to be update due to offset value upated.
 */
void updateNumOfResult(SQueryRuntimeEnv *pRuntimeEnv, int32_t numOfRes) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
271

272 273
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
274

H
Haojun Liao 已提交
275 276 277 278 279
    int16_t functionId = pRuntimeEnv->pCtx[j].functionId;
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ ||
        functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
280

H
Haojun Liao 已提交
281
    assert(pResInfo->numOfRes > numOfRes);
282 283 284 285
    pResInfo->numOfRes = numOfRes;
  }
}

H
Haojun Liao 已提交
286
static UNUSED_FUNC int32_t getGroupResultId(int32_t groupIndex) {
H
Haojun Liao 已提交
287
  int32_t base = 20000000;
288 289 290 291 292 293 294
  return base + (groupIndex * 10000);
}

bool isGroupbyNormalCol(SSqlGroupbyExpr *pGroupbyExpr) {
  if (pGroupbyExpr == NULL || pGroupbyExpr->numOfGroupCols == 0) {
    return false;
  }
295

296
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
297
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
Y
TD-1230  
yihaoDeng 已提交
298
    if (TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
299
      //make sure the normal column locates at the second position if tbname exists in group by clause
300
      if (pGroupbyExpr->numOfGroupCols > 1) {
301
        assert(pColIndex->colIndex > 0);
302
      }
303

304 305 306
      return true;
    }
  }
307

308 309 310 311 312
  return false;
}

int16_t getGroupbyColumnType(SQuery *pQuery, SSqlGroupbyExpr *pGroupbyExpr) {
  assert(pGroupbyExpr != NULL);
313

314 315
  int32_t colId = -2;
  int16_t type = TSDB_DATA_TYPE_NULL;
316

317
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
318
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
Y
TD-1230  
yihaoDeng 已提交
319
    if (TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
320 321 322 323
      colId = pColIndex->colId;
      break;
    }
  }
324

325
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
326 327
    if (colId == pQuery->colList[i].colId) {
      type = pQuery->colList[i].type;
328 329 330
      break;
    }
  }
331

332 333 334 335 336 337
  return type;
}

bool isSelectivityWithTagsQuery(SQuery *pQuery) {
  bool    hasTags = false;
  int32_t numOfSelectivity = 0;
338

339
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
340
    int32_t functId = pQuery->pSelectExpr[i].base.functionId;
341 342 343 344
    if (functId == TSDB_FUNC_TAG_DUMMY || functId == TSDB_FUNC_TS_DUMMY) {
      hasTags = true;
      continue;
    }
345

346 347 348 349
    if ((aAggs[functId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
      numOfSelectivity++;
    }
  }
350

351 352 353
  if (numOfSelectivity > 0 && hasTags) {
    return true;
  }
354

355 356 357
  return false;
}

358 359 360 361 362 363 364 365 366 367 368
bool isProjQuery(SQuery *pQuery) {
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    int32_t functId = pQuery->pSelectExpr[i].base.functionId;
    if (functId != TSDB_FUNC_PRJ && functId != TSDB_FUNC_TAGPRJ) {
      return false;
    }
  }

  return true;
}

369
bool isTSCompQuery(SQuery *pQuery) { return pQuery->pSelectExpr[0].base.functionId == TSDB_FUNC_TS_COMP; }
370

371 372 373
static bool limitResults(SQueryRuntimeEnv* pRuntimeEnv) {
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery* pQuery = pRuntimeEnv->pQuery;
374

375 376
  if ((pQuery->limit.limit > 0) && (pQuery->rec.total + pQuery->rec.rows > pQuery->limit.limit)) {
    pQuery->rec.rows = pQuery->limit.limit - pQuery->rec.total;
377

378
    qDebug("QInfo:%p discard remain data due to result limitation, limit:%"PRId64", current return:%" PRId64 ", total:%"PRId64,
379 380
        pQInfo, pQuery->limit.limit, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
    assert(pQuery->rec.rows >= 0);
381 382 383
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return true;
  }
384

385 386 387 388
  return false;
}

static bool isTopBottomQuery(SQuery *pQuery) {
389
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
390
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
391 392 393
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
394

395 396 397 398
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      return true;
    }
  }
399

400 401 402
  return false;
}

H
Haojun Liao 已提交
403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420
static bool hasTagValOutput(SQuery* pQuery) {
  SExprInfo *pExprInfo = &pQuery->pSelectExpr[0];
  if (pQuery->numOfOutput == 1 && pExprInfo->base.functionId == TSDB_FUNC_TS_COMP) {
    return true;
  } else {  // set tag value, by which the results are aggregated.
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
      SExprInfo *pLocalExprInfo = &pQuery->pSelectExpr[idx];

      // ts_comp column required the tag value for join filter
      if (TSDB_COL_IS_TAG(pLocalExprInfo->base.colInfo.flag)) {
        return true;
      }
    }
  }

  return false;
}

421 422 423 424 425 426 427 428
/**
 * @param pQuery
 * @param col
 * @param pDataBlockInfo
 * @param pStatis
 * @param pColStatis
 * @return
 */
H
Haojun Liao 已提交
429
static bool hasNullValue(SColIndex* pColIndex, SDataStatis *pStatis, SDataStatis **pColStatis) {
H
Haojun Liao 已提交
430
  if (pStatis != NULL && TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
H
Haojun Liao 已提交
431 432
    *pColStatis = &pStatis[pColIndex->colIndex];
    assert((*pColStatis)->colId == pColIndex->colId);
H
hjxilinx 已提交
433 434
  } else {
    *pColStatis = NULL;
435
  }
436

H
Haojun Liao 已提交
437
  if (TSDB_COL_IS_TAG(pColIndex->flag) || TSDB_COL_IS_UD_COL(pColIndex->flag) || pColIndex->colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
H
Haojun Liao 已提交
438 439 440
    return false;
  }

441 442 443
  if ((*pColStatis) != NULL && (*pColStatis)->numOfNull == 0) {
    return false;
  }
444

445 446 447 448
  return true;
}

static SWindowResult *doSetTimeWindowFromKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, char *pData,
449
                                             int16_t bytes, bool masterscan) {
450
  SQuery *pQuery = pRuntimeEnv->pQuery;
451

452
  int32_t *p1 = (int32_t *) taosHashGet(pWindowResInfo->hashList, pData, bytes);
453 454
  if (p1 != NULL) {
    pWindowResInfo->curIndex = *p1;
455
  } else {
H
Haojun Liao 已提交
456 457 458
    if (!masterscan) {  // not master scan, do not add new timewindow
      return NULL;
    }
459

H
Haojun Liao 已提交
460 461
    // more than the capacity, reallocate the resources
    if (pWindowResInfo->size >= pWindowResInfo->capacity) {
462 463
      int64_t newCap = 0;
      if (pWindowResInfo->capacity > 10000) {
S
Shengliang Guan 已提交
464
        newCap = (int64_t)(pWindowResInfo->capacity * 1.25);
465
      } else {
S
Shengliang Guan 已提交
466
        newCap = (int64_t)(pWindowResInfo->capacity * 1.5);
467 468
      }

469
      char *t = realloc(pWindowResInfo->pResult, (size_t)(newCap * sizeof(SWindowResult)));
470
      pRuntimeEnv->summary.internalSupSize += (newCap - pWindowResInfo->capacity) * sizeof(SWindowResult);
471
      pRuntimeEnv->summary.numOfTimeWindows += (newCap - pWindowResInfo->capacity);
472

473 474
      if (t == NULL) {
        longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
475
      }
476

477 478
      pWindowResInfo->pResult = (SWindowResult *)t;

S
TD-1057  
Shengliang Guan 已提交
479
      int32_t inc = (int32_t)newCap - pWindowResInfo->capacity;
480 481
      memset(&pWindowResInfo->pResult[pWindowResInfo->capacity], 0, sizeof(SWindowResult) * inc);

482 483
      pRuntimeEnv->summary.internalSupSize += (pQuery->numOfOutput * sizeof(SResultInfo) + pRuntimeEnv->interBufSize) * inc;

H
Haojun Liao 已提交
484
      for (int32_t i = pWindowResInfo->capacity; i < newCap; ++i) {
H
Haojun Liao 已提交
485 486 487 488
        int32_t ret = createQueryResultInfo(pQuery, &pWindowResInfo->pResult[i], pRuntimeEnv->stableQuery, pRuntimeEnv->interBufSize);
        if (ret != TSDB_CODE_SUCCESS) {
          longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
        }
H
Haojun Liao 已提交
489 490
      }

S
TD-1057  
Shengliang Guan 已提交
491
      pWindowResInfo->capacity = (int32_t)newCap;
492
    }
H
Haojun Liao 已提交
493 494 495 496

    // add a new result set for a new group
    pWindowResInfo->curIndex = pWindowResInfo->size++;
    taosHashPut(pWindowResInfo->hashList, pData, bytes, (char *)&pWindowResInfo->curIndex, sizeof(int32_t));
497
  }
498

499 500 501 502 503
  // too many time window in query
  if (pWindowResInfo->size > MAX_INTERVAL_TIME_WINDOW) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_TOO_MANY_TIMEWINDOW);
  }

504 505 506 507 508 509
  return getWindowResult(pWindowResInfo, pWindowResInfo->curIndex);
}

// get the correct time window according to the handled timestamp
static STimeWindow getActiveTimeWindow(SWindowResInfo *pWindowResInfo, int64_t ts, SQuery *pQuery) {
  STimeWindow w = {0};
510

511
 if (pWindowResInfo->curIndex == -1) {  // the first window, from the previous stored value
512
    w.skey = pWindowResInfo->prevSKey;
513 514
    if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
      w.ekey = taosTimeAdd(w.skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision);
515
    } else {
516
      w.ekey = w.skey + pQuery->interval.interval - 1;
517
    }
518
  } else {
519 520
    int32_t slot = curTimeWindowIndex(pWindowResInfo);
    SWindowResult* pWindowRes = getWindowResult(pWindowResInfo, slot);
521
    w = pWindowRes->win;
522
  }
523

524
  if (w.skey > ts || w.ekey < ts) {
525 526 527
    if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
      w.skey = taosTimeTruncate(ts, &pQuery->interval, pQuery->precision);
      w.ekey = taosTimeAdd(w.skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
528 529
    } else {
      int64_t st = w.skey;
530

531
      if (st > ts) {
532
        st -= ((st - ts + pQuery->interval.sliding - 1) / pQuery->interval.sliding) * pQuery->interval.sliding;
533
      }
534

535
      int64_t et = st + pQuery->interval.interval - 1;
536
      if (et < ts) {
537
        st += ((ts - et + pQuery->interval.sliding - 1) / pQuery->interval.sliding) * pQuery->interval.sliding;
538
      }
539

540
      w.skey = st;
541
      w.ekey = w.skey + pQuery->interval.interval - 1;
542
    }
543
  }
544

545 546 547 548 549 550 551
  /*
   * query border check, skey should not be bounded by the query time range, since the value skey will
   * be used as the time window index value. So we only change ekey of time window accordingly.
   */
  if (w.ekey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) {
    w.ekey = pQuery->window.ekey;
  }
552

553 554 555 556 557 558 559 560
  return w;
}

static int32_t addNewWindowResultBuf(SWindowResult *pWindowRes, SDiskbasedResultBuf *pResultBuf, int32_t sid,
                                     int32_t numOfRowsPerPage) {
  if (pWindowRes->pos.pageId != -1) {
    return 0;
  }
561

562
  tFilePage *pData = NULL;
563

564 565 566
  // in the first scan, new space needed for results
  int32_t pageId = -1;
  SIDList list = getDataBufPagesIdList(pResultBuf, sid);
567

H
Haojun Liao 已提交
568
  if (taosArrayGetSize(list) == 0) {
569 570
    pData = getNewDataBuf(pResultBuf, sid, &pageId);
  } else {
H
Haojun Liao 已提交
571 572 573
    SPageInfo* pi = getLastPageInfo(list);
    pData = getResBufPage(pResultBuf, pi->pageId);
    pageId = pi->pageId;
574

575
    if (pData->num >= numOfRowsPerPage) {
H
Haojun Liao 已提交
576 577 578
      // release current page first, and prepare the next one
      releaseResBufPageInfo(pResultBuf, pi);

579 580
      pData = getNewDataBuf(pResultBuf, sid, &pageId);
      if (pData != NULL) {
581
        assert(pData->num == 0);  // number of elements must be 0 for new allocated buffer
582 583 584
      }
    }
  }
585

586 587 588
  if (pData == NULL) {
    return -1;
  }
589

590 591 592
  // set the number of rows in current disk page
  if (pWindowRes->pos.pageId == -1) {  // not allocated yet, allocate new buffer
    pWindowRes->pos.pageId = pageId;
S
TD-1057  
Shengliang Guan 已提交
593
    pWindowRes->pos.rowId = (int32_t)(pData->num++);
H
Haojun Liao 已提交
594 595

    assert(pWindowRes->pos.pageId >= 0);
596
  }
597

598 599 600 601
  return 0;
}

static int32_t setWindowOutputBufByKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, int32_t sid,
602
                                       STimeWindow *win, bool masterscan, bool* newWind) {
603 604
  assert(win->skey <= win->ekey);
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
605

606 607
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&win->skey,
      TSDB_KEYSIZE, masterscan);
608
  if (pWindowRes == NULL) {
609 610 611
    *newWind = false;

    return masterscan? -1:0;
612
  }
613

614
  *newWind = true;
H
Haojun Liao 已提交
615

616 617 618
  // not assign result buffer yet, add new result buffer
  if (pWindowRes->pos.pageId == -1) {
    int32_t ret = addNewWindowResultBuf(pWindowRes, pResultBuf, sid, pRuntimeEnv->numOfRowsPerPage);
H
Haojun Liao 已提交
619
    if (ret != TSDB_CODE_SUCCESS) {
620 621 622
      return -1;
    }
  }
623

624
  // set time window for current result
625
  pWindowRes->win = (*win);
626

H
Haojun Liao 已提交
627
  setWindowResOutputBufInitCtx(pRuntimeEnv, pWindowRes);
628 629 630
  return TSDB_CODE_SUCCESS;
}

631
static bool getTimeWindowResStatus(SWindowResInfo *pWindowResInfo, int32_t slot) {
632
  assert(slot >= 0 && slot < pWindowResInfo->size);
633
  return pWindowResInfo->pResult[slot].closed;
634 635
}

H
Haojun Liao 已提交
636
static FORCE_INLINE int32_t getForwardStepsInBlock(int32_t numOfRows, __block_search_fn_t searchFn, TSKEY ekey, int16_t pos,
637 638
                                      int16_t order, int64_t *pData) {
  int32_t forwardStep = 0;
639

H
Haojun Liao 已提交
640 641 642 643
  if (order == TSDB_ORDER_ASC) {
    int32_t end = searchFn((char*) &pData[pos], numOfRows - pos, ekey, order);
    if (end >= 0) {
      forwardStep = end;
644

H
Haojun Liao 已提交
645 646 647 648 649 650 651 652 653 654 655 656
      if (pData[end + pos] == ekey) {
        forwardStep += 1;
      }
    }
  } else {
    int32_t end = searchFn((char *)pData, pos + 1, ekey, order);
    if (end >= 0) {
      forwardStep = pos - end;

      if (pData[end] == ekey) {
        forwardStep += 1;
      }
657 658
    }
  }
659

H
Haojun Liao 已提交
660
  assert(forwardStep > 0);
661 662 663 664 665 666
  return forwardStep;
}

/**
 * NOTE: the query status only set for the first scan of master scan.
 */
667
static int32_t doCheckQueryCompleted(SQueryRuntimeEnv *pRuntimeEnv, TSKEY lastKey, SWindowResInfo *pWindowResInfo) {
668
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
669
  if (pRuntimeEnv->scanFlag != MASTER_SCAN || (!QUERY_IS_INTERVAL_QUERY(pQuery))) {
670
    return pWindowResInfo->size;
671
  }
672

673
  // no qualified results exist, abort check
674
  int32_t numOfClosed = 0;
675

676
  if (pWindowResInfo->size == 0) {
677
    return pWindowResInfo->size;
678
  }
679

680
  // query completed
H
hjxilinx 已提交
681 682
  if ((lastKey >= pQuery->current->win.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (lastKey <= pQuery->current->win.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
683
    closeAllTimeWindow(pWindowResInfo);
684

685 686 687 688
    pWindowResInfo->curIndex = pWindowResInfo->size - 1;
    setQueryStatus(pQuery, QUERY_COMPLETED | QUERY_RESBUF_FULL);
  } else {  // set the current index to be the last unclosed window
    int32_t i = 0;
689
    int64_t skey = TSKEY_INITIAL_VAL;
690

691 692
    for (i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *pResult = &pWindowResInfo->pResult[i];
693
      if (pResult->closed) {
694
        numOfClosed += 1;
695 696
        continue;
      }
697

698
      TSKEY ekey = pResult->win.ekey;
699
      if ((ekey <= lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
700
          (pResult->win.skey >= lastKey && !QUERY_IS_ASC_QUERY(pQuery))) {
701 702
        closeTimeWindow(pWindowResInfo, i);
      } else {
703
        skey = pResult->win.skey;
704 705 706
        break;
      }
    }
707

708
    // all windows are closed, set the last one to be the skey
709
    if (skey == TSKEY_INITIAL_VAL) {
710 711 712 713 714
      assert(i == pWindowResInfo->size);
      pWindowResInfo->curIndex = pWindowResInfo->size - 1;
    } else {
      pWindowResInfo->curIndex = i;
    }
715

716
    pWindowResInfo->prevSKey = pWindowResInfo->pResult[pWindowResInfo->curIndex].win.skey;
717

718 719
    // the number of completed slots are larger than the threshold, return current generated results to client.
    if (numOfClosed > pWindowResInfo->threshold) {
720
      qDebug("QInfo:%p total result window:%d closed:%d, reached the output threshold %d, return",
721
          GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size, numOfClosed, pQuery->rec.threshold);
722

723
      setQueryStatus(pQuery, QUERY_RESBUF_FULL);
724
    } else {
725
      qDebug("QInfo:%p total result window:%d already closed:%d", GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size,
726
             numOfClosed);
727 728
    }
  }
729

730 731 732 733 734
  // output has reached the limitation, set query completed
  if (pQuery->limit.limit > 0 && (pQuery->limit.limit + pQuery->limit.offset) <= numOfClosed &&
      pRuntimeEnv->scanFlag == MASTER_SCAN) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
735

736
  assert(pWindowResInfo->prevSKey != TSKEY_INITIAL_VAL);
737
  return numOfClosed;
738 739 740
}

static int32_t getNumOfRowsInTimeWindow(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo, TSKEY *pPrimaryColumn,
H
hjxilinx 已提交
741
                                        int32_t startPos, TSKEY ekey, __block_search_fn_t searchFn, bool updateLastKey) {
742
  assert(startPos >= 0 && startPos < pDataBlockInfo->rows);
743

H
Haojun Liao 已提交
744
  int32_t num   = -1;
745
  int32_t order = pQuery->order.order;
H
Haojun Liao 已提交
746
  int32_t step  = GET_FORWARD_DIRECTION_FACTOR(order);
747

H
hjxilinx 已提交
748
  STableQueryInfo* item = pQuery->current;
749

750 751
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (ekey < pDataBlockInfo->window.ekey) {
752
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
H
Haojun Liao 已提交
753 754
      if (updateLastKey) { // update the last key
        item->lastKey = pPrimaryColumn[startPos + (num - 1)] + step;
755 756
      }
    } else {
757
      num = pDataBlockInfo->rows - startPos;
758
      if (updateLastKey) {
H
hjxilinx 已提交
759
        item->lastKey = pDataBlockInfo->window.ekey + step;
760 761 762 763
      }
    }
  } else {  // desc
    if (ekey > pDataBlockInfo->window.skey) {
764
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
H
Haojun Liao 已提交
765 766
      if (updateLastKey) {  // update the last key
        item->lastKey = pPrimaryColumn[startPos - (num - 1)] + step;
767 768 769 770
      }
    } else {
      num = startPos + 1;
      if (updateLastKey) {
H
hjxilinx 已提交
771
        item->lastKey = pDataBlockInfo->window.skey + step;
772 773 774
      }
    }
  }
775

H
Haojun Liao 已提交
776
  assert(num > 0);
777 778 779
  return num;
}

H
Haojun Liao 已提交
780 781
static void doBlockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, bool closed, STimeWindow *pWin, int32_t offset,
                                      int32_t forwardStep, TSKEY *tsCol, int32_t numOfTotal) {
782 783
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
784

H
Haojun Liao 已提交
785 786
  bool hasPrev = pCtx[0].preAggVals.isSet;

787
  if (IS_MASTER_SCAN(pRuntimeEnv) || closed) {
788 789 790 791
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
      pCtx[k].nStartQueryTimestamp = pWin->skey;
      pCtx[k].size = forwardStep;
      pCtx[k].startOffset = (QUERY_IS_ASC_QUERY(pQuery)) ? offset : offset - (forwardStep - 1);
792

H
Haojun Liao 已提交
793
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
794
      if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
H
Haojun Liao 已提交
795
        pCtx[k].ptsList = &tsCol[pCtx[k].startOffset];
796
      }
797

798
      // not a whole block involved in query processing, statistics data can not be used
H
Haojun Liao 已提交
799 800 801 802
      // NOTE: the original value of isSet have been changed here
      if (pCtx[k].preAggVals.isSet && forwardStep < numOfTotal) {
        pCtx[k].preAggVals.isSet = false;
      }
H
Haojun Liao 已提交
803

804 805 806
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunction(&pCtx[k]);
      }
H
Haojun Liao 已提交
807 808 809

      // restore it
      pCtx[k].preAggVals.isSet = hasPrev;
810 811 812 813
    }
  }
}

814
static void doRowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, bool closed, STimeWindow *pWin, int32_t offset) {
815 816
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
817

818
  if (IS_MASTER_SCAN(pRuntimeEnv) || closed) {
819 820
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
      pCtx[k].nStartQueryTimestamp = pWin->skey;
821

822 823 824 825
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunctionF(&pCtx[k], offset);
      }
826 827 828 829
    }
  }
}

H
Haojun Liao 已提交
830 831
static int32_t getNextQualifiedWindow(SQueryRuntimeEnv *pRuntimeEnv, STimeWindow *pNext, SDataBlockInfo *pDataBlockInfo,
    TSKEY *primaryKeys, __block_search_fn_t searchFn, int32_t prevPosition) {
832
  SQuery *pQuery = pRuntimeEnv->pQuery;
833

H
Haojun Liao 已提交
834
  GET_NEXT_TIMEWINDOW(pQuery, pNext);
835

H
Haojun Liao 已提交
836
  // next time window is not in current block
H
Haojun Liao 已提交
837 838
  if ((pNext->skey > pDataBlockInfo->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (pNext->ekey < pDataBlockInfo->window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
H
Haojun Liao 已提交
839 840
    return -1;
  }
841

H
Haojun Liao 已提交
842 843
  TSKEY startKey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
844
    startKey = pNext->skey;
H
Haojun Liao 已提交
845 846
    if (startKey < pQuery->window.skey) {
      startKey = pQuery->window.skey;
847
    }
H
Haojun Liao 已提交
848
  } else {
H
Haojun Liao 已提交
849
    startKey = pNext->ekey;
H
Haojun Liao 已提交
850 851
    if (startKey > pQuery->window.skey) {
      startKey = pQuery->window.skey;
852
    }
H
Haojun Liao 已提交
853
  }
854

H
Haojun Liao 已提交
855 856
  int32_t startPos = 0;
  // tumbling time window query, a special case of sliding time window query
857
  if (pQuery->interval.sliding == pQuery->interval.interval && prevPosition != -1) {
H
Haojun Liao 已提交
858 859 860 861 862
    int32_t factor = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
    startPos = prevPosition + factor;
  } else {
    startPos = searchFn((char *)primaryKeys, pDataBlockInfo->rows, startKey, pQuery->order.order);
  }
863

H
Haojun Liao 已提交
864 865 866 867
  /*
   * This time window does not cover any data, try next time window,
   * this case may happen when the time window is too small
   */
H
Haojun Liao 已提交
868
  if (QUERY_IS_ASC_QUERY(pQuery) && primaryKeys[startPos] > pNext->ekey) {
H
Haojun Liao 已提交
869
    TSKEY next = primaryKeys[startPos];
870 871 872
    if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
      pNext->skey = taosTimeTruncate(next, &pQuery->interval, pQuery->precision);
      pNext->ekey = taosTimeAdd(pNext->skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
873
    } else {
874 875
      pNext->ekey += ((next - pNext->ekey + pQuery->interval.sliding - 1)/pQuery->interval.sliding) * pQuery->interval.sliding;
      pNext->skey = pNext->ekey - pQuery->interval.interval + 1;
876
    }
H
Haojun Liao 已提交
877
  } else if ((!QUERY_IS_ASC_QUERY(pQuery)) && primaryKeys[startPos] < pNext->skey) {
H
Haojun Liao 已提交
878
    TSKEY next = primaryKeys[startPos];
879 880 881
    if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
      pNext->skey = taosTimeTruncate(next, &pQuery->interval, pQuery->precision);
      pNext->ekey = taosTimeAdd(pNext->skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
882
    } else {
883 884
      pNext->skey -= ((pNext->skey - next + pQuery->interval.sliding - 1) / pQuery->interval.sliding) * pQuery->interval.sliding;
      pNext->ekey = pNext->skey + pQuery->interval.interval - 1;
885
    }
886
  }
887

H
Haojun Liao 已提交
888
  return startPos;
889 890
}

H
Haojun Liao 已提交
891
static FORCE_INLINE TSKEY reviseWindowEkey(SQuery *pQuery, STimeWindow *pWindow) {
892 893 894 895 896 897 898 899 900 901 902 903
  TSKEY ekey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    ekey = pWindow->ekey;
    if (ekey > pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  } else {
    ekey = pWindow->skey;
    if (ekey < pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  }
904

905 906 907
  return ekey;
}

H
hjxilinx 已提交
908 909
//todo binary search
static void* getDataBlockImpl(SArray* pDataBlock, int32_t colId) {
S
TD-1057  
Shengliang Guan 已提交
910
  int32_t numOfCols = (int32_t)taosArrayGetSize(pDataBlock);
911

H
hjxilinx 已提交
912 913 914 915 916 917
  for (int32_t i = 0; i < numOfCols; ++i) {
    SColumnInfoData *p = taosArrayGet(pDataBlock, i);
    if (colId == p->info.colId) {
      return p->pData;
    }
  }
918

H
hjxilinx 已提交
919 920 921 922
  return NULL;
}

static char *getDataBlock(SQueryRuntimeEnv *pRuntimeEnv, SArithmeticSupport *sas, int32_t col, int32_t size,
923
                    SArray *pDataBlock) {
dengyihao's avatar
dengyihao 已提交
924 925 926
  if (pDataBlock == NULL) {
    return NULL;
  }
927

H
Haojun Liao 已提交
928
  char *dataBlock = NULL;
H
Haojun Liao 已提交
929
  SQuery *pQuery = pRuntimeEnv->pQuery;
930

931
  int32_t functionId = pQuery->pSelectExpr[col].base.functionId;
932
  if (functionId == TSDB_FUNC_ARITHM) {
933
    sas->pArithExpr = &pQuery->pSelectExpr[col];
934

935 936 937 938
    sas->offset  = 0;
    sas->colList = pQuery->colList;
    sas->numOfCols = pQuery->numOfCols;
    sas->data    = calloc(pQuery->numOfCols, POINTER_BYTES);
939

H
Haojun Liao 已提交
940
    if (sas->data == NULL) {
H
Haojun Liao 已提交
941
      finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
H
Haojun Liao 已提交
942 943 944
      longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
    }

945
    // here the pQuery->colList and sas->colList are identical
S
TD-1057  
Shengliang Guan 已提交
946
    int32_t numOfCols = (int32_t)taosArrayGetSize(pDataBlock);
947
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
948
      SColumnInfo *pColMsg = &pQuery->colList[i];
949

950 951 952 953 954 955 956 957
      dataBlock = NULL;
      for (int32_t k = 0; k < numOfCols; ++k) {  //todo refactor
        SColumnInfoData *p = taosArrayGet(pDataBlock, k);
        if (pColMsg->colId == p->info.colId) {
          dataBlock = p->pData;
          break;
        }
      }
958

959
      assert(dataBlock != NULL);
960
      sas->data[i] = dataBlock;  // start from the offset
961
    }
962

963
  } else {  // other type of query function
964
    SColIndex *pCol = &pQuery->pSelectExpr[col].base.colInfo;
H
Haojun Liao 已提交
965
    if (TSDB_COL_IS_NORMAL_COL(pCol->flag)) {
H
Haojun Liao 已提交
966 967 968 969 970
      SColIndex* pColIndex = &pQuery->pSelectExpr[col].base.colInfo;
      SColumnInfoData *p = taosArrayGet(pDataBlock, pColIndex->colIndex);
      assert(p->info.colId == pColIndex->colId);

      dataBlock = p->pData;
H
Haojun Liao 已提交
971 972
    } else {
      dataBlock = NULL;
973 974
    }
  }
975

976 977 978 979
  return dataBlock;
}

/**
H
Haojun Liao 已提交
980
 * todo set the last value for pQueryTableInfo as in rowwiseapplyfunctions
981 982
 * @param pRuntimeEnv
 * @param forwardStep
983
 * @param tsCols
984 985 986 987 988
 * @param pFields
 * @param isDiskFileBlock
 * @return                  the incremental number of output value, so it maybe 0 for fixed number of query,
 *                          such as count/min/max etc.
 */
989
static void blockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis,
990 991
                                       SDataBlockInfo *pDataBlockInfo, SWindowResInfo *pWindowResInfo,
                                       __block_search_fn_t searchFn, SArray *pDataBlock) {
992
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
993 994
  bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);

995 996
  SQuery *pQuery = pRuntimeEnv->pQuery;
  TSKEY  *tsCols = NULL;
997
  if (pDataBlock != NULL) {
998
    SColumnInfoData* pColInfo = taosArrayGet(pDataBlock, 0);
999
    tsCols = (TSKEY *)(pColInfo->pData);
1000
  }
1001

1002
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
H
Haojun Liao 已提交
1003
  if (sasArray == NULL) {
H
Haojun Liao 已提交
1004
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
H
Haojun Liao 已提交
1005 1006
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }
1007

1008
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
1009
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
1010
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k);
1011
  }
1012

1013
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
1014
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
H
Haojun Liao 已提交
1015
    TSKEY ts = TSKEY_INITIAL_VAL;
1016

H
Haojun Liao 已提交
1017 1018 1019 1020 1021 1022 1023 1024
    if (tsCols == NULL) {
      ts = QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->window.skey:pDataBlockInfo->window.ekey;
    } else {
      int32_t offset = GET_COL_DATA_POS(pQuery, 0, step);
      ts = tsCols[offset];
    }

    bool        hasTimeWindow = false;
1025
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
H
Haojun Liao 已提交
1026 1027
    if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &win, masterScan, &hasTimeWindow) !=
        TSDB_CODE_SUCCESS) {
S
Shengliang Guan 已提交
1028
      taosTFree(sasArray);
H
hjxilinx 已提交
1029
      return;
1030
    }
1031

H
Haojun Liao 已提交
1032 1033 1034
    int32_t forwardStep = 0;
    int32_t startPos = pQuery->pos;

1035
    if (hasTimeWindow) {
H
Haojun Liao 已提交
1036
      TSKEY ekey = reviseWindowEkey(pQuery, &win);
H
Haojun Liao 已提交
1037
      forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, pQuery->pos, ekey, searchFn, true);
1038

1039
      bool pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
H
Haojun Liao 已提交
1040
      doBlockwiseApplyFunctions(pRuntimeEnv, pStatus, &win, startPos, forwardStep, tsCols, pDataBlockInfo->rows);
1041
    }
1042

1043 1044
    int32_t     index = pWindowResInfo->curIndex;
    STimeWindow nextWin = win;
1045

1046
    while (1) {
H
Haojun Liao 已提交
1047 1048
      int32_t prevEndPos = (forwardStep - 1) * step + startPos;
      startPos = getNextQualifiedWindow(pRuntimeEnv, &nextWin, pDataBlockInfo, tsCols, searchFn, prevEndPos);
1049 1050 1051
      if (startPos < 0) {
        break;
      }
1052

1053
      // null data, failed to allocate more memory buffer
H
Haojun Liao 已提交
1054
      hasTimeWindow = false;
H
Haojun Liao 已提交
1055 1056
      if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &nextWin, masterScan,
                                  &hasTimeWindow) != TSDB_CODE_SUCCESS) {
1057 1058
        break;
      }
1059

1060 1061 1062 1063 1064
      if (!hasTimeWindow) {
        continue;
      }

      TSKEY ekey = reviseWindowEkey(pQuery, &nextWin);
H
Haojun Liao 已提交
1065
      forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, startPos, ekey, searchFn, true);
1066

1067 1068
      bool closed = getTimeWindowResStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
      doBlockwiseApplyFunctions(pRuntimeEnv, closed, &nextWin, startPos, forwardStep, tsCols, pDataBlockInfo->rows);
1069
    }
1070

1071 1072 1073 1074 1075 1076 1077
    pWindowResInfo->curIndex = index;
  } else {
    /*
     * the sqlfunctionCtx parameters should be set done before all functions are invoked,
     * since the selectivity + tag_prj query needs all parameters been set done.
     * tag_prj function are changed to be TSDB_FUNC_TAG_DUMMY
     */
1078
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
1079
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
1080 1081 1082 1083 1084
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunction(&pCtx[k]);
      }
    }
  }
1085

1086 1087 1088 1089
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    if (pQuery->pSelectExpr[i].base.functionId != TSDB_FUNC_ARITHM) {
      continue;
    }
1090

S
Shengliang Guan 已提交
1091
    taosTFree(sasArray[i].data);
1092
  }
1093

S
Shengliang Guan 已提交
1094
  taosTFree(sasArray);
1095 1096 1097 1098 1099 1100
}

static int32_t setGroupResultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, char *pData, int16_t type, int16_t bytes) {
  if (isNull(pData, type)) {  // ignore the null value
    return -1;
  }
1101

1102
  int32_t GROUPRESULTID = 1;
1103

1104
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
1105

1106
  // not assign result buffer yet, add new result buffer
1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124
  char* d = pData;
  int16_t len = bytes;
  if (type == TSDB_DATA_TYPE_BINARY||type == TSDB_DATA_TYPE_NCHAR) {
    d = varDataVal(pData);
    len = varDataLen(pData);
  } else if (type == TSDB_DATA_TYPE_FLOAT || type == TSDB_DATA_TYPE_DOUBLE) {
    SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
    qError("QInfo:%p group by not supported on double/float/binary/nchar columns, abort", pQInfo);

    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_APP_ERROR);
  }

  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, &pRuntimeEnv->windowResInfo, d, len, true);
  if (pWindowRes == NULL) {
    return -1;
  }

  int64_t v = -1;
1125 1126 1127 1128 1129 1130 1131 1132
  switch(type) {
    case TSDB_DATA_TYPE_BOOL:
    case TSDB_DATA_TYPE_TINYINT:  v = GET_INT8_VAL(pData);  break;
    case TSDB_DATA_TYPE_SMALLINT: v = GET_INT16_VAL(pData); break;
    case TSDB_DATA_TYPE_INT:      v = GET_INT32_VAL(pData); break;
    case TSDB_DATA_TYPE_BIGINT:   v = GET_INT64_VAL(pData); break;
  }

1133 1134 1135 1136 1137 1138
  if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
    pWindowRes->key = malloc(varDataTLen(pData));
    varDataCopy(pWindowRes->key, pData);
  } else {
    pWindowRes->win.skey = v;
    pWindowRes->win.ekey = v;
1139
  }
1140

1141
  assert(pRuntimeEnv->windowResInfo.interval == 0);
1142

1143 1144 1145 1146 1147 1148
  if (pWindowRes->pos.pageId == -1) {
    int32_t ret = addNewWindowResultBuf(pWindowRes, pResultBuf, GROUPRESULTID, pRuntimeEnv->numOfRowsPerPage);
    if (ret != 0) {
      return -1;
    }
  }
1149

1150 1151 1152 1153 1154
  setWindowResOutputBuf(pRuntimeEnv, pWindowRes);
  initCtxOutputBuf(pRuntimeEnv);
  return TSDB_CODE_SUCCESS;
}

1155
static char *getGroupbyColumnData(SQuery *pQuery, int16_t *type, int16_t *bytes, SArray* pDataBlock) {
1156
  SSqlGroupbyExpr *pGroupbyExpr = pQuery->pGroupbyExpr;
1157

1158
  for (int32_t k = 0; k < pGroupbyExpr->numOfGroupCols; ++k) {
1159
    SColIndex* pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, k);
Y
TD-1230  
yihaoDeng 已提交
1160
    if (TSDB_COL_IS_TAG(pColIndex->flag)) {
1161 1162
      continue;
    }
1163

1164
    int16_t colIndex = -1;
1165
    int32_t colId = pColIndex->colId;
1166

1167
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
1168
      if (pQuery->colList[i].colId == colId) {
1169 1170 1171 1172
        colIndex = i;
        break;
      }
    }
1173

1174
    assert(colIndex >= 0 && colIndex < pQuery->numOfCols);
1175

1176 1177
    *type = pQuery->colList[colIndex].type;
    *bytes = pQuery->colList[colIndex].bytes;
1178
    /*
1179 1180 1181
     *  the colIndex is acquired from the first tables of all qualified tables in this vnode during query prepare
     * stage, the remain tables may not have the required column in cache actually. So, the validation of required
     * column in cache with the corresponding schema is reinforced.
1182
     */
S
TD-1057  
Shengliang Guan 已提交
1183
    int32_t numOfCols = (int32_t)taosArrayGetSize(pDataBlock);
1184

1185 1186 1187 1188 1189 1190
    for (int32_t i = 0; i < numOfCols; ++i) {
      SColumnInfoData *p = taosArrayGet(pDataBlock, i);
      if (pColIndex->colId == p->info.colId) {
        return p->pData;
      }
    }
1191
  }
1192

1193
  return NULL;
1194 1195 1196 1197
}

static int32_t doTSJoinFilter(SQueryRuntimeEnv *pRuntimeEnv, int32_t offset) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
1198

1199 1200
  STSElem         elem = tsBufGetElem(pRuntimeEnv->pTSBuf);
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1201

1202
  // compare tag first
1203
  if (tVariantCompare(&pCtx[0].tag, &elem.tag) != 0) {
1204 1205
    return TS_JOIN_TAG_NOT_EQUALS;
  }
1206

S
TD-1057  
Shengliang Guan 已提交
1207
  TSKEY key = *(TSKEY *)((char*)pCtx[0].aInputElemBuf + TSDB_KEYSIZE * offset);
1208 1209

#if defined(_DEBUG_VIEW)
1210
  printf("elem in comp ts file:%" PRId64 ", key:%" PRId64 ", tag:%"PRIu64", query order:%d, ts order:%d, traverse:%d, index:%d\n",
H
Haojun Liao 已提交
1211
         elem.ts, key, elem.tag.i64Key, pQuery->order.order, pRuntimeEnv->pTSBuf->tsOrder,
1212 1213
         pRuntimeEnv->pTSBuf->cur.order, pRuntimeEnv->pTSBuf->cur.tsIndex);
#endif
1214

1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (key < elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key > elem.ts) {
      assert(false);
    }
  } else {
    if (key > elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key < elem.ts) {
      assert(false);
    }
  }
1228

1229 1230 1231 1232 1233
  return TS_JOIN_TS_EQUAL;
}

static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId) {
  SResultInfo *pResInfo = GET_RES_INFO(pCtx);
H
hjxilinx 已提交
1234
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
1235 1236 1237 1238 1239

  // in case of timestamp column, always generated results.
  if (functionId == TSDB_FUNC_TS) {
    return true;
  }
1240

1241 1242 1243
  if (pResInfo->complete || functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TS_DUMMY) {
    return false;
  }
1244

1245
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_FIRST) {
H
hjxilinx 已提交
1246 1247
    return QUERY_IS_ASC_QUERY(pQuery);
  }
1248 1249 1250 1251 1252 1253

  // todo add comments
  if ((functionId == TSDB_FUNC_LAST_DST || functionId == TSDB_FUNC_LAST)) {
    return pCtx->param[0].i64Key == pQuery->order.order;
  }

1254
  // in the supplementary scan, only the following functions need to be executed
H
Haojun Liao 已提交
1255
  if (IS_REVERSE_SCAN(pRuntimeEnv)) {
1256 1257
    return false;
  }
1258

1259 1260 1261
  return true;
}

1262 1263
static void rowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis, SDataBlockInfo *pDataBlockInfo,
    SWindowResInfo *pWindowResInfo, SArray *pDataBlock) {
1264
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1265
  bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);
1266

1267
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1268
  STableQueryInfo* item = pQuery->current;
H
Haojun Liao 已提交
1269 1270 1271 1272

  SColumnInfoData* pColumnInfoData = (SColumnInfoData *)taosArrayGet(pDataBlock, 0);

  TSKEY  *tsCols = (pColumnInfoData->info.type == TSDB_DATA_TYPE_TIMESTAMP)? (TSKEY*) pColumnInfoData->pData:NULL;
H
Haojun Liao 已提交
1273 1274
  bool    groupbyColumnValue = pRuntimeEnv->groupbyNormalCol;

1275
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
H
Haojun Liao 已提交
1276
  if (sasArray == NULL) {
H
Haojun Liao 已提交
1277
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
H
Haojun Liao 已提交
1278 1279
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }
1280

1281 1282
  int16_t type = 0;
  int16_t bytes = 0;
1283

1284
  char *groupbyColumnData = NULL;
H
Haojun Liao 已提交
1285
  if (groupbyColumnValue) {
1286
    groupbyColumnData = getGroupbyColumnData(pQuery, &type, &bytes, pDataBlock);
1287
  }
1288

1289
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
1290
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
1291
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k);
1292
  }
1293

1294 1295
  // set the input column data
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
1296
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
H
hjxilinx 已提交
1297 1298
    pFilterInfo->pData = getDataBlockImpl(pDataBlock, pFilterInfo->info.colId);
    assert(pFilterInfo->pData != NULL);
1299
  }
1300

1301
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1302

1303 1304 1305
  // from top to bottom in desc
  // from bottom to top in asc order
  if (pRuntimeEnv->pTSBuf != NULL) {
H
Haojun Liao 已提交
1306
    SQInfo *pQInfo = (SQInfo *)GET_QINFO_ADDR(pRuntimeEnv);
1307
    qDebug("QInfo:%p process data rows, numOfRows:%d, query order:%d, ts comp order:%d", pQInfo, pDataBlockInfo->rows,
1308 1309
           pQuery->order.order, pRuntimeEnv->pTSBuf->cur.order);
  }
1310

1311
  int32_t j = 0;
H
hjxilinx 已提交
1312
  int32_t offset = -1;
1313

1314
  for (j = 0; j < pDataBlockInfo->rows; ++j) {
H
hjxilinx 已提交
1315
    offset = GET_COL_DATA_POS(pQuery, j, step);
1316

1317 1318 1319 1320 1321 1322 1323 1324 1325 1326
    if (pRuntimeEnv->pTSBuf != NULL) {
      int32_t r = doTSJoinFilter(pRuntimeEnv, offset);
      if (r == TS_JOIN_TAG_NOT_EQUALS) {
        break;
      } else if (r == TS_JOIN_TS_NOT_EQUALS) {
        continue;
      } else {
        assert(r == TS_JOIN_TS_EQUAL);
      }
    }
1327

1328
    if (pQuery->numOfFilterCols > 0 && (!doFilterData(pQuery, offset))) {
1329 1330
      continue;
    }
1331

1332
    // interval window query, decide the time window according to the primary timestamp
H
Haojun Liao 已提交
1333
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1334
      int64_t     ts = tsCols[offset];
1335
      STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
1336

1337 1338
      bool hasTimeWindow = false;
      int32_t ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &win, masterScan, &hasTimeWindow);
1339 1340 1341
      if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
        continue;
      }
1342

1343 1344 1345 1346
      if (!hasTimeWindow) {
        continue;
      }

1347 1348
      bool closed = getTimeWindowResStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
      doRowwiseApplyFunctions(pRuntimeEnv, closed, &win, offset);
1349

1350 1351
      STimeWindow nextWin = win;
      int32_t     index = pWindowResInfo->curIndex;
1352

1353
      while (1) {
H
Haojun Liao 已提交
1354
        GET_NEXT_TIMEWINDOW(pQuery, &nextWin);
1355
        if ((nextWin.skey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
H
Haojun Liao 已提交
1356
            (nextWin.skey < pQuery->window.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
1357 1358
          break;
        }
1359

1360 1361 1362
        if (ts < nextWin.skey || ts > nextWin.ekey) {
          break;
        }
1363

1364
        // null data, failed to allocate more memory buffer
H
Haojun Liao 已提交
1365
        hasTimeWindow = false;
1366
        if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &nextWin, masterScan, &hasTimeWindow) != TSDB_CODE_SUCCESS) {
1367 1368
          break;
        }
1369

1370
        if (hasTimeWindow) {
1371 1372
          closed = getTimeWindowResStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
          doRowwiseApplyFunctions(pRuntimeEnv, closed, &nextWin, offset);
1373
        }
1374
      }
1375

1376 1377 1378
      pWindowResInfo->curIndex = index;
    } else {  // other queries
      // decide which group this rows belongs to according to current state value
H
Haojun Liao 已提交
1379
      if (groupbyColumnValue) {
H
hjxilinx 已提交
1380
        char *val = groupbyColumnData + bytes * offset;
1381

H
hjxilinx 已提交
1382
        int32_t ret = setGroupResultOutputBuf(pRuntimeEnv, val, type, bytes);
1383 1384 1385 1386
        if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
          continue;
        }
      }
1387

1388
      for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
1389
        int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
1390 1391 1392 1393 1394
        if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
          aAggs[functionId].xFunctionF(&pCtx[k], offset);
        }
      }
    }
1395

1396 1397 1398
    if (pRuntimeEnv->pTSBuf != NULL) {
      // if timestamp filter list is empty, quit current query
      if (!tsBufNextPos(pRuntimeEnv->pTSBuf)) {
H
hjxilinx 已提交
1399
        setQueryStatus(pQuery, QUERY_COMPLETED);
1400 1401 1402 1403
        break;
      }
    }
  }
H
Haojun Liao 已提交
1404 1405 1406 1407 1408 1409 1410 1411

  assert(offset >= 0);
  if (tsCols != NULL) {
    item->lastKey = tsCols[offset] + step;
  } else {
    item->lastKey = (QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->window.ekey:pDataBlockInfo->window.skey) + step;
  }

1412 1413 1414 1415 1416
  // todo refactor: extract method
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    if (pQuery->pSelectExpr[i].base.functionId != TSDB_FUNC_ARITHM) {
      continue;
    }
1417

S
Shengliang Guan 已提交
1418
    taosTFree(sasArray[i].data);
1419
  }
1420

1421 1422 1423 1424
  free(sasArray);
}

static int32_t tableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo,
H
hjxilinx 已提交
1425
                                          SDataStatis *pStatis, __block_search_fn_t searchFn, SArray *pDataBlock) {
H
hjxilinx 已提交
1426
  SQuery *pQuery = pRuntimeEnv->pQuery;
1427

H
hjxilinx 已提交
1428 1429
  STableQueryInfo* pTableQInfo = pQuery->current;
  SWindowResInfo*  pWindowResInfo = &pRuntimeEnv->windowResInfo;
1430

H
Haojun Liao 已提交
1431
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->groupbyNormalCol) {
1432
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
1433
  } else {
1434
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
1435
  }
1436

1437
  // update the lastkey of current table
1438
  TSKEY lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pDataBlockInfo->window.ekey : pDataBlockInfo->window.skey;
H
hjxilinx 已提交
1439
  pTableQInfo->lastKey = lastKey + GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1440

1441
  // interval query with limit applied
1442
  int32_t numOfRes = 0;
H
Haojun Liao 已提交
1443
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1444 1445
    numOfRes = doCheckQueryCompleted(pRuntimeEnv, lastKey, pWindowResInfo);
  } else {
S
TD-1057  
Shengliang Guan 已提交
1446
    numOfRes = (int32_t)getNumOfResult(pRuntimeEnv);
1447

1448 1449 1450 1451
    // update the number of output result
    if (numOfRes > 0 && pQuery->checkBuffer == 1) {
      assert(numOfRes >= pQuery->rec.rows);
      pQuery->rec.rows = numOfRes;
1452

1453 1454 1455
      if (numOfRes >= pQuery->rec.threshold) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
      }
1456

1457 1458 1459
      if ((pQuery->limit.limit >= 0) && (pQuery->limit.limit + pQuery->limit.offset) <= numOfRes) {
        setQueryStatus(pQuery, QUERY_COMPLETED);
      }
1460 1461 1462 1463 1464

      if (((pTableQInfo->lastKey > pTableQInfo->win.ekey) && QUERY_IS_ASC_QUERY(pQuery)) ||
          ((pTableQInfo->lastKey < pTableQInfo->win.ekey) && (!QUERY_IS_ASC_QUERY(pQuery)))) {
        setQueryStatus(pQuery, QUERY_COMPLETED);
      }
H
Haojun Liao 已提交
1465
    }
1466
  }
1467

1468
  return numOfRes;
1469 1470
}

H
Haojun Liao 已提交
1471
void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
1472
                   SDataStatis *pStatis, void *param, int32_t colIndex) {
1473

1474 1475
  int32_t functionId = pQuery->pSelectExpr[colIndex].base.functionId;
  int32_t colId = pQuery->pSelectExpr[colIndex].base.colInfo.colId;
1476

1477
  SDataStatis *tpField = NULL;
H
Haojun Liao 已提交
1478
  pCtx->hasNull = hasNullValue(&pQuery->pSelectExpr[colIndex].base.colInfo, pStatis, &tpField);
1479
  pCtx->aInputElemBuf = inputData;
1480

1481
  if (tpField != NULL) {
H
Haojun Liao 已提交
1482
    pCtx->preAggVals.isSet  = true;
1483 1484
    pCtx->preAggVals.statis = *tpField;
    assert(pCtx->preAggVals.statis.numOfNull <= pBlockInfo->rows);
1485 1486 1487
  } else {
    pCtx->preAggVals.isSet = false;
  }
1488

H
Haojun Liao 已提交
1489 1490
  pCtx->preAggVals.dataBlockLoaded = (inputData != NULL);

H
Haojun Liao 已提交
1491 1492
  // limit/offset query will affect this value
  pCtx->size = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->rows - pQuery->pos : pQuery->pos + 1;
1493

H
Haojun Liao 已提交
1494
  // minimum value no matter ascending/descending order query
H
Haojun Liao 已提交
1495 1496
  pCtx->startOffset = QUERY_IS_ASC_QUERY(pQuery) ? pQuery->pos: (pQuery->pos - pCtx->size + 1);
  assert(pCtx->startOffset >= 0);
H
Haojun Liao 已提交
1497

1498 1499
  uint32_t status = aAggs[functionId].nStatus;
  if (((status & (TSDB_FUNCSTATE_SELECTIVITY | TSDB_FUNCSTATE_NEED_TS)) != 0) && (tsCol != NULL)) {
H
Haojun Liao 已提交
1500
    pCtx->ptsList = &tsCol[pCtx->startOffset];
1501
  }
1502

1503 1504 1505 1506 1507
  if (functionId >= TSDB_FUNC_FIRST_DST && functionId <= TSDB_FUNC_LAST_DST) {
    // last_dist or first_dist function
    // store the first&last timestamp into the intermediate buffer [1], the true
    // value may be null but timestamp will never be null
  } else if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_TWA ||
1508
             functionId == TSDB_FUNC_DIFF || (functionId >= TSDB_FUNC_RATE && functionId <= TSDB_FUNC_AVG_IRATE)) {
1509
    /*
H
Haojun Liao 已提交
1510
     * least squares function needs two columns of input, currently, the x value of linear equation is set to
1511 1512 1513 1514 1515 1516 1517 1518 1519 1520
     * timestamp column, and the y-value is the column specified in pQuery->pSelectExpr[i].colIdxInBuffer
     *
     * top/bottom function needs timestamp to indicate when the
     * top/bottom values emerge, so does diff function
     */
    if (functionId == TSDB_FUNC_TWA) {
      STwaInfo *pTWAInfo = GET_RES_INFO(pCtx)->interResultBuf;
      pTWAInfo->SKey = pQuery->window.skey;
      pTWAInfo->EKey = pQuery->window.ekey;
    }
1521

1522 1523
  } else if (functionId == TSDB_FUNC_ARITHM) {
    pCtx->param[1].pz = param;
H
Haojun Liao 已提交
1524 1525 1526 1527 1528 1529
  } else if (functionId == TSDB_FUNC_SPREAD) {  // set the statistics data for primary time stamp column
    if (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
      pCtx->preAggVals.isSet  = true;
      pCtx->preAggVals.statis.min = pBlockInfo->window.skey;
      pCtx->preAggVals.statis.max = pBlockInfo->window.ekey;
    }
1530 1531
  } else if (functionId == TSDB_FUNC_INTERP) {
    SInterpInfoDetail *pInterpInfo = GET_RES_INFO(pCtx)->interResultBuf;
S
TD-1057  
Shengliang Guan 已提交
1532
    pInterpInfo->type = (int8_t)pQuery->fillType;
1533 1534
    pInterpInfo->ts = pQuery->window.skey;
    pInterpInfo->primaryCol = (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX);
1535

1536 1537 1538 1539 1540 1541 1542
    if (pQuery->fillVal != NULL) {
      if (isNull((const char*) &pQuery->fillVal[colIndex], pCtx->inputType)) {
        pCtx->param[1].nType = TSDB_DATA_TYPE_NULL;
      } else { // todo refactor, tVariantCreateFromBinary should handle the NULL value
        tVariantCreateFromBinary(&pCtx->param[1], (char*) &pQuery->fillVal[colIndex], pCtx->inputBytes, pCtx->inputType);
      }
    }
1543
  }
1544

1545 1546 1547 1548 1549 1550
#if defined(_DEBUG_VIEW)
  //  int64_t *tsList = (int64_t *)primaryColumnData;
//  int64_t  s = tsList[0];
//  int64_t  e = tsList[size - 1];

//    if (IS_DATA_BLOCK_LOADED(blockStatus)) {
1551
//        qDebug("QInfo:%p query ts:%lld-%lld, offset:%d, rows:%d, bstatus:%d,
1552 1553 1554
//        functId:%d", GET_QINFO_ADDR(pQuery),
//               s, e, startOffset, size, blockStatus, functionId);
//    } else {
1555
//        qDebug("QInfo:%p block not loaded, bstatus:%d",
1556 1557 1558 1559 1560 1561
//        GET_QINFO_ADDR(pQuery), blockStatus);
//    }
#endif
}

// set the output buffer for the selectivity + tag query
H
Haojun Liao 已提交
1562
static int32_t setCtxTagColumnInfo(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx) {
H
Haojun Liao 已提交
1563 1564
  SQuery* pQuery = pRuntimeEnv->pQuery;

1565
  if (isSelectivityWithTagsQuery(pQuery)) {
1566
    int32_t num = 0;
1567
    int16_t tagLen = 0;
1568

1569
    SQLFunctionCtx *p = NULL;
1570
    SQLFunctionCtx **pTagCtx = calloc(pQuery->numOfOutput, POINTER_BYTES);
H
Haojun Liao 已提交
1571 1572 1573
    if (pTagCtx == NULL) {
      return TSDB_CODE_QRY_OUT_OF_MEMORY;
    }
H
Haojun Liao 已提交
1574

1575
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1576
      SSqlFuncMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].base;
1577

1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590
      if (pSqlFuncMsg->functionId == TSDB_FUNC_TAG_DUMMY || pSqlFuncMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        tagLen += pCtx[i].outputBytes;
        pTagCtx[num++] = &pCtx[i];
      } else if ((aAggs[pSqlFuncMsg->functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
        p = &pCtx[i];
      } else if (pSqlFuncMsg->functionId == TSDB_FUNC_TS || pSqlFuncMsg->functionId == TSDB_FUNC_TAG) {
        // tag function may be the group by tag column
        // ts may be the required primary timestamp column
        continue;
      } else {
        // the column may be the normal column, group by normal_column, the functionId is TSDB_FUNC_PRJ
      }
    }
dengyihao's avatar
dengyihao 已提交
1591 1592 1593 1594 1595
    if (p != NULL) {
      p->tagInfo.pTagCtxList = pTagCtx;
      p->tagInfo.numOfTagCols = num;
      p->tagInfo.tagsLen = tagLen;
    } else {
1596
      taosTFree(pTagCtx);
dengyihao's avatar
dengyihao 已提交
1597
    }
1598
  }
H
Haojun Liao 已提交
1599 1600

  return TSDB_CODE_SUCCESS;
1601 1602
}

H
Haojun Liao 已提交
1603 1604
static FORCE_INLINE void setWindowResultInfo(SResultInfo *pResultInfo, SQuery *pQuery, bool isStableQuery, char* buf) {
  char* p = buf;
1605
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1606 1607 1608 1609
    int32_t size = pQuery->pSelectExpr[i].interBytes;
    setResultInfoBuf(&pResultInfo[i], size, isStableQuery, p);

    p += size;
1610 1611 1612
  }
}

1613
static int32_t setupQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv, int16_t order) {
1614
  qDebug("QInfo:%p setup runtime env", GET_QINFO_ADDR(pRuntimeEnv));
1615 1616
  SQuery *pQuery = pRuntimeEnv->pQuery;

H
Haojun Liao 已提交
1617 1618 1619
  size_t size = pRuntimeEnv->interBufSize + pQuery->numOfOutput * sizeof(SResultInfo);

  pRuntimeEnv->resultInfo = calloc(1, size);
1620
  pRuntimeEnv->pCtx = (SQLFunctionCtx *)calloc(pQuery->numOfOutput, sizeof(SQLFunctionCtx));
1621

1622
  if (pRuntimeEnv->resultInfo == NULL || pRuntimeEnv->pCtx == NULL) {
1623
    goto _clean;
1624
  }
1625

1626
  pRuntimeEnv->offset[0] = 0;
1627
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1628
    SSqlFuncMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].base;
1629

1630
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1631
    SColIndex* pIndex = &pSqlFuncMsg->colInfo;
1632

Y
TD-1230  
yihaoDeng 已提交
1633
    if (TSDB_COL_REQ_NULL(pIndex->flag)) {
1634 1635
      pCtx->requireNull = true;
      pIndex->flag &= ~(TSDB_COL_NULL);
Y
TD-1230  
yihaoDeng 已提交
1636
    } else {
1637 1638
      pCtx->requireNull = false;
    }
Y
TD-1230  
yihaoDeng 已提交
1639

1640 1641
    int32_t index = pSqlFuncMsg->colInfo.colIndex;
    if (TSDB_COL_IS_TAG(pIndex->flag)) {
1642
      if (pIndex->colId == TSDB_TBNAME_COLUMN_INDEX) {  // todo refactor
H
Haojun Liao 已提交
1643 1644 1645 1646
        SSchema s = tGetTableNameColumnSchema();

        pCtx->inputBytes = s.bytes;
        pCtx->inputType = s.type;
1647 1648 1649 1650
      } else {
        pCtx->inputBytes = pQuery->tagColList[index].bytes;
        pCtx->inputType = pQuery->tagColList[index].type;
      }
H
Haojun Liao 已提交
1651 1652 1653
    } else if (TSDB_COL_IS_UD_COL(pIndex->flag)) {
      pCtx->inputBytes = pSqlFuncMsg->arg[0].argBytes;
      pCtx->inputType = pSqlFuncMsg->arg[0].argType;
1654 1655 1656 1657
    } else {
      pCtx->inputBytes = pQuery->colList[index].bytes;
      pCtx->inputType = pQuery->colList[index].type;
    }
1658

Y
TD-1230  
yihaoDeng 已提交
1659

1660
    assert(isValidDataType(pCtx->inputType));
1661
    pCtx->ptsOutputBuf = NULL;
1662

1663 1664
    pCtx->outputBytes = pQuery->pSelectExpr[i].bytes;
    pCtx->outputType = pQuery->pSelectExpr[i].type;
1665

1666 1667
    pCtx->order = pQuery->order.order;
    pCtx->functionId = pSqlFuncMsg->functionId;
1668

1669 1670 1671 1672 1673
    pCtx->numOfParams = pSqlFuncMsg->numOfParams;
    for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
      int16_t type = pSqlFuncMsg->arg[j].argType;
      int16_t bytes = pSqlFuncMsg->arg[j].argBytes;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
1674
        tVariantCreateFromBinary(&pCtx->param[j], pSqlFuncMsg->arg[j].argValue.pz, bytes, type);
1675 1676 1677 1678
      } else {
        tVariantCreateFromBinary(&pCtx->param[j], (char *)&pSqlFuncMsg->arg[j].argValue.i64, bytes, type);
      }
    }
1679

1680 1681
    // set the order information for top/bottom query
    int32_t functionId = pCtx->functionId;
1682

1683
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
1684
      int32_t f = pQuery->pSelectExpr[0].base.functionId;
1685
      assert(f == TSDB_FUNC_TS || f == TSDB_FUNC_TS_DUMMY);
1686

1687 1688 1689 1690
      pCtx->param[2].i64Key = order;
      pCtx->param[2].nType = TSDB_DATA_TYPE_BIGINT;
      pCtx->param[3].i64Key = functionId;
      pCtx->param[3].nType = TSDB_DATA_TYPE_BIGINT;
1691

1692 1693
      pCtx->param[1].i64Key = pQuery->order.orderColId;
    }
1694

1695 1696 1697 1698
    if (i > 0) {
      pRuntimeEnv->offset[i] = pRuntimeEnv->offset[i - 1] + pRuntimeEnv->pCtx[i - 1].outputBytes;
    }
  }
1699

H
Haojun Liao 已提交
1700
  char* buf = (char*) pRuntimeEnv->resultInfo + sizeof(SResultInfo) * pQuery->numOfOutput;
H
Haojun Liao 已提交
1701

1702
  // set the intermediate result output buffer
H
Haojun Liao 已提交
1703
  setWindowResultInfo(pRuntimeEnv->resultInfo, pQuery, pRuntimeEnv->stableQuery, buf);
1704

1705
  // if it is group by normal column, do not set output buffer, the output buffer is pResult
1706
  if (!pRuntimeEnv->groupbyNormalCol && !pRuntimeEnv->stableQuery) {
1707 1708
    resetCtxOutputBuf(pRuntimeEnv);
  }
1709

H
Haojun Liao 已提交
1710 1711 1712
  if (setCtxTagColumnInfo(pRuntimeEnv, pRuntimeEnv->pCtx) != TSDB_CODE_SUCCESS) {
    goto _clean;
  }
1713

H
Haojun Liao 已提交
1714
  qDebug("QInfo:%p init runtime completed", GET_QINFO_ADDR(pRuntimeEnv));
1715
  return TSDB_CODE_SUCCESS;
1716

1717
_clean:
S
Shengliang Guan 已提交
1718 1719
  taosTFree(pRuntimeEnv->resultInfo);
  taosTFree(pRuntimeEnv->pCtx);
1720

1721
  return TSDB_CODE_QRY_OUT_OF_MEMORY;
1722 1723 1724 1725 1726 1727
}

static void teardownQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv) {
  if (pRuntimeEnv->pQuery == NULL) {
    return;
  }
1728

1729
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1730
  SQInfo* pQInfo = (SQInfo*) GET_QINFO_ADDR(pRuntimeEnv);
1731

1732
  qDebug("QInfo:%p teardown runtime env", pQInfo);
H
Haojun Liao 已提交
1733
  cleanupTimeWindowInfo(&pRuntimeEnv->windowResInfo);
1734

1735
  if (pRuntimeEnv->pCtx != NULL) {
1736
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1737
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1738

1739 1740 1741
      for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
        tVariantDestroy(&pCtx->param[j]);
      }
1742

1743
      tVariantDestroy(&pCtx->tag);
S
Shengliang Guan 已提交
1744
      taosTFree(pCtx->tagInfo.pTagCtxList);
1745
    }
1746

S
Shengliang Guan 已提交
1747 1748
    taosTFree(pRuntimeEnv->resultInfo);
    taosTFree(pRuntimeEnv->pCtx);
1749
  }
1750

H
Haojun Liao 已提交
1751
  pRuntimeEnv->pFillInfo = taosDestoryFillInfo(pRuntimeEnv->pFillInfo);
1752

H
Haojun Liao 已提交
1753
  destroyResultBuf(pRuntimeEnv->pResultBuf);
1754
  tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
1755
  tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
1756

H
Haojun Liao 已提交
1757
  pRuntimeEnv->pTSBuf = tsBufDestroy(pRuntimeEnv->pTSBuf);
1758 1759
}

H
Haojun Liao 已提交
1760
#define IS_QUERY_KILLED(_q) ((_q)->code == TSDB_CODE_TSC_QUERY_CANCELLED)
1761

H
Haojun Liao 已提交
1762
static void setQueryKilled(SQInfo *pQInfo) { pQInfo->code = TSDB_CODE_TSC_QUERY_CANCELLED;}
H
hjxilinx 已提交
1763

H
Haojun Liao 已提交
1764 1765 1766
static bool isFixedOutputQuery(SQueryRuntimeEnv* pRuntimeEnv) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1767 1768
    return false;
  }
1769

1770
  // Note:top/bottom query is fixed output query
H
Haojun Liao 已提交
1771
  if (pRuntimeEnv->topBotQuery || pRuntimeEnv->groupbyNormalCol) {
1772 1773
    return true;
  }
1774

1775
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1776
    SSqlFuncMsg *pExprMsg = &pQuery->pSelectExpr[i].base;
1777

1778 1779
    // ignore the ts_comp function
    if (i == 0 && pExprMsg->functionId == TSDB_FUNC_PRJ && pExprMsg->numOfParams == 1 &&
1780
        pExprMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
1781 1782
      continue;
    }
1783

1784 1785 1786
    if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
1787

1788 1789 1790 1791
    if (!IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus)) {
      return true;
    }
  }
1792

1793 1794 1795
  return false;
}

1796
// todo refactor with isLastRowQuery
H
hjxilinx 已提交
1797
static bool isPointInterpoQuery(SQuery *pQuery) {
1798
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1799
    int32_t functionID = pQuery->pSelectExpr[i].base.functionId;
1800
    if (functionID == TSDB_FUNC_INTERP) {
1801 1802 1803
      return true;
    }
  }
1804

1805 1806 1807 1808
  return false;
}

// TODO REFACTOR:MERGE WITH CLIENT-SIDE FUNCTION
H
hjxilinx 已提交
1809
static bool isSumAvgRateQuery(SQuery *pQuery) {
1810
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1811
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1812 1813 1814
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
1815

1816 1817 1818 1819 1820
    if (functionId == TSDB_FUNC_SUM_RATE || functionId == TSDB_FUNC_SUM_IRATE || functionId == TSDB_FUNC_AVG_RATE ||
        functionId == TSDB_FUNC_AVG_IRATE) {
      return true;
    }
  }
1821

1822 1823 1824
  return false;
}

H
hjxilinx 已提交
1825
static bool isFirstLastRowQuery(SQuery *pQuery) {
1826
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1827
    int32_t functionID = pQuery->pSelectExpr[i].base.functionId;
1828 1829 1830 1831
    if (functionID == TSDB_FUNC_LAST_ROW) {
      return true;
    }
  }
1832

1833 1834 1835
  return false;
}

H
hjxilinx 已提交
1836
static bool needReverseScan(SQuery *pQuery) {
1837
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1838
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1839 1840 1841
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG) {
      continue;
    }
1842

1843
    if ((functionId == TSDB_FUNC_FIRST || functionId == TSDB_FUNC_FIRST_DST) && !QUERY_IS_ASC_QUERY(pQuery)) {
1844 1845
      return true;
    }
1846 1847

    if (functionId == TSDB_FUNC_LAST || functionId == TSDB_FUNC_LAST_DST) {
S
TD-1057  
Shengliang Guan 已提交
1848
      int32_t order = (int32_t)pQuery->pSelectExpr[i].base.arg->argValue.i64;
1849 1850
      return order != pQuery->order.order;
    }
1851
  }
1852

1853 1854
  return false;
}
H
hjxilinx 已提交
1855

H
Haojun Liao 已提交
1856 1857 1858 1859
/**
 * The following 4 kinds of query are treated as the tags query
 * tagprj, tid_tag query, count(tbname), 'abc' (user defined constant value column) query
 */
H
hjxilinx 已提交
1860 1861
static bool onlyQueryTags(SQuery* pQuery) {
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1862 1863 1864
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];

    int32_t functionId = pExprInfo->base.functionId;
H
Haojun Liao 已提交
1865 1866 1867 1868

    if (functionId != TSDB_FUNC_TAGPRJ &&
        functionId != TSDB_FUNC_TID_TAG &&
        (!(functionId == TSDB_FUNC_COUNT && pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX)) &&
Y
TD-1230  
yihaoDeng 已提交
1869
        (!(functionId == TSDB_FUNC_PRJ && TSDB_COL_IS_UD_COL(pExprInfo->base.colInfo.flag)))) {
H
hjxilinx 已提交
1870 1871 1872
      return false;
    }
  }
1873

H
hjxilinx 已提交
1874 1875 1876
  return true;
}

1877 1878
/////////////////////////////////////////////////////////////////////////////////////////////

H
Haojun Liao 已提交
1879
void getAlignQueryTimeWindow(SQuery *pQuery, int64_t key, int64_t keyFirst, int64_t keyLast, STimeWindow *win) {
1880 1881
  assert(key >= keyFirst && key <= keyLast && pQuery->interval.sliding <= pQuery->interval.interval);
  win->skey = taosTimeTruncate(key, &pQuery->interval, pQuery->precision);
H
Haojun Liao 已提交
1882 1883

  /*
1884
   * if the realSkey > INT64_MAX - pQuery->interval.interval, the query duration between
H
Haojun Liao 已提交
1885 1886
   * realSkey and realEkey must be less than one interval.Therefore, no need to adjust the query ranges.
   */
1887 1888
  if (keyFirst > (INT64_MAX - pQuery->interval.interval)) {
    assert(keyLast - keyFirst < pQuery->interval.interval);
1889
    win->ekey = INT64_MAX;
1890 1891
  } else if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
    win->ekey = taosTimeAdd(win->skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
H
Haojun Liao 已提交
1892
  } else {
1893
    win->ekey = win->skey + pQuery->interval.interval - 1;
1894 1895 1896 1897 1898
  }
}

static void setScanLimitationByResultBuffer(SQuery *pQuery) {
  if (isTopBottomQuery(pQuery)) {
1899
    pQuery->checkBuffer = 0;
1900
  } else if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
1901
    pQuery->checkBuffer = 0;
1902 1903
  } else {
    bool hasMultioutput = false;
1904
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1905
      SSqlFuncMsg *pExprMsg = &pQuery->pSelectExpr[i].base;
1906 1907 1908
      if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        continue;
      }
1909

1910 1911 1912 1913 1914
      hasMultioutput = IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus);
      if (!hasMultioutput) {
        break;
      }
    }
1915

1916
    pQuery->checkBuffer = hasMultioutput ? 1 : 0;
1917 1918 1919 1920 1921 1922
  }
}

/*
 * todo add more parameters to check soon..
 */
1923
bool colIdCheck(SQuery *pQuery) {
1924 1925
  // load data column information is incorrect
  for (int32_t i = 0; i < pQuery->numOfCols - 1; ++i) {
1926
    if (pQuery->colList[i].colId == pQuery->colList[i + 1].colId) {
S
slguan 已提交
1927
      qError("QInfo:%p invalid data load column for query", GET_QINFO_ADDR(pQuery));
1928 1929 1930
      return false;
    }
  }
1931

1932 1933 1934 1935 1936 1937
  return true;
}

// todo ignore the avg/sum/min/max/count/stddev/top/bottom functions, of which
// the scan order is not matter
static bool onlyOneQueryType(SQuery *pQuery, int32_t functId, int32_t functIdDst) {
1938
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1939
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1940

1941 1942 1943 1944
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG ||
        functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
1945

1946 1947 1948 1949
    if (functionId != functId && functionId != functIdDst) {
      return false;
    }
  }
1950

1951 1952 1953 1954 1955 1956 1957
  return true;
}

static bool onlyFirstQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_FIRST, TSDB_FUNC_FIRST_DST); }

static bool onlyLastQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_LAST, TSDB_FUNC_LAST_DST); }

H
Haojun Liao 已提交
1958
// todo refactor, add iterator
1959 1960
static void doExchangeTimeWindow(SQInfo* pQInfo, STimeWindow* win) {
  size_t t = taosArrayGetSize(pQInfo->tableGroupInfo.pGroupList);
H
Haojun Liao 已提交
1961
  for(int32_t i = 0; i < t; ++i) {
1962
    SArray* p1 = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, i);
H
Haojun Liao 已提交
1963 1964 1965

    size_t len = taosArrayGetSize(p1);
    for(int32_t j = 0; j < len; ++j) {
1966
      STableKeyInfo* pInfo = taosArrayGet(p1, j);
H
Haojun Liao 已提交
1967

1968 1969 1970 1971
      // update the new lastkey if it is equalled to the value of the old skey
      if (pInfo->lastKey == win->ekey) {
        pInfo->lastKey = win->skey;
      }
H
Haojun Liao 已提交
1972 1973 1974 1975
    }
  }
}

1976
static void changeExecuteScanOrder(SQInfo *pQInfo, SQueryTableMsg* pQueryMsg, bool stableQuery) {
H
Haojun Liao 已提交
1977 1978
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;

1979 1980 1981
  // in case of point-interpolation query, use asc order scan
  char msg[] = "QInfo:%p scan order changed for %s query, old:%d, new:%d, qrange exchanged, old qrange:%" PRId64
               "-%" PRId64 ", new qrange:%" PRId64 "-%" PRId64;
1982

1983 1984
  // todo handle the case the the order irrelevant query type mixed up with order critical query type
  // descending order query for last_row query
H
Haojun Liao 已提交
1985
  if (isFirstLastRowQuery(pQuery)) {
1986
    qDebug("QInfo:%p scan order changed for last_row query, old:%d, new:%d", GET_QINFO_ADDR(pQuery),
H
Haojun Liao 已提交
1987
           pQuery->order.order, TSDB_ORDER_ASC);
1988

H
Haojun Liao 已提交
1989
    pQuery->order.order = TSDB_ORDER_ASC;
H
Haojun Liao 已提交
1990 1991 1992
    if (pQuery->window.skey > pQuery->window.ekey) {
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
1993

1994 1995
    return;
  }
1996

H
Haojun Liao 已提交
1997
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr) && pQuery->order.order == TSDB_ORDER_DESC) {
H
Haojun Liao 已提交
1998
    pQuery->order.order = TSDB_ORDER_ASC;
H
Haojun Liao 已提交
1999 2000 2001
    if (pQuery->window.skey > pQuery->window.ekey) {
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
H
Haojun Liao 已提交
2002

2003
    doExchangeTimeWindow(pQInfo, &pQuery->window);
H
Haojun Liao 已提交
2004 2005 2006
    return;
  }

2007
  if (isPointInterpoQuery(pQuery) && pQuery->interval.interval == 0) {
H
Haojun Liao 已提交
2008 2009 2010 2011 2012
    if (!QUERY_IS_ASC_QUERY(pQuery)) {
      qDebug(msg, GET_QINFO_ADDR(pQuery), "interp", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
             pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
2013

2014
    pQuery->order.order = TSDB_ORDER_ASC;
2015 2016
    return;
  }
2017

2018
  if (pQuery->interval.interval == 0) {
2019 2020
    if (onlyFirstQuery(pQuery)) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
2021
        qDebug(msg, GET_QINFO_ADDR(pQuery), "only-first", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
2022 2023
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2024
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2025
        doExchangeTimeWindow(pQInfo, &pQuery->window);
2026
      }
2027

2028
      pQuery->order.order = TSDB_ORDER_ASC;
2029 2030
    } else if (onlyLastQuery(pQuery)) {
      if (QUERY_IS_ASC_QUERY(pQuery)) {
2031
        qDebug(msg, GET_QINFO_ADDR(pQuery), "only-last", pQuery->order.order, TSDB_ORDER_DESC, pQuery->window.skey,
2032 2033
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2034
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2035
        doExchangeTimeWindow(pQInfo, &pQuery->window);
2036
      }
2037

2038
      pQuery->order.order = TSDB_ORDER_DESC;
2039
    }
2040

2041
  } else {  // interval query
2042
    if (stableQuery) {
2043 2044
      if (onlyFirstQuery(pQuery)) {
        if (!QUERY_IS_ASC_QUERY(pQuery)) {
2045
          qDebug(msg, GET_QINFO_ADDR(pQuery), "only-first stable", pQuery->order.order, TSDB_ORDER_ASC,
2046 2047
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2048
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2049
          doExchangeTimeWindow(pQInfo, &pQuery->window);
2050
        }
2051

2052
        pQuery->order.order = TSDB_ORDER_ASC;
2053 2054
      } else if (onlyLastQuery(pQuery)) {
        if (QUERY_IS_ASC_QUERY(pQuery)) {
2055
          qDebug(msg, GET_QINFO_ADDR(pQuery), "only-last stable", pQuery->order.order, TSDB_ORDER_DESC,
2056 2057
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2058
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2059
          doExchangeTimeWindow(pQInfo, &pQuery->window);
2060
        }
2061

2062
        pQuery->order.order = TSDB_ORDER_DESC;
2063 2064 2065 2066 2067 2068 2069 2070
      }
    }
  }
}

static int32_t getInitialPageNum(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  int32_t INITIAL_RESULT_ROWS_VALUE = 16;
2071

2072
  int32_t num = 0;
2073

2074 2075
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
    num = 128;
H
Haojun Liao 已提交
2076
  } else if (QUERY_IS_INTERVAL_QUERY(pQuery)) {  // time window query, allocate one page for each table
2077
    size_t s = pQInfo->tableqinfoGroupInfo.numOfTables;
S
TD-1057  
Shengliang Guan 已提交
2078
    num = (int32_t)(MAX(s, INITIAL_RESULT_ROWS_VALUE));
2079 2080
  } else {    // for super table query, one page for each subset
    num = 1;  // pQInfo->pSidSet->numOfSubSet;
2081
  }
2082

2083 2084 2085 2086
  assert(num > 0);
  return num;
}

2087 2088 2089
static void getIntermediateBufInfo(SQueryRuntimeEnv* pRuntimeEnv, int32_t* ps, int32_t* rowsize) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

S
TD-1057  
Shengliang Guan 已提交
2090
  *rowsize = (int32_t)(pQuery->rowSize * GET_ROW_PARAM_FOR_MULTIOUTPUT(pQuery, pRuntimeEnv->topBotQuery, pRuntimeEnv->stableQuery));
2091 2092 2093 2094 2095 2096 2097 2098 2099
  int32_t overhead = sizeof(tFilePage);

  // one page contains at least two rows
  *ps = DEFAULT_INTERN_BUF_PAGE_SIZE;
  while(((*rowsize) * 2) > (*ps) - overhead) {
    *ps = (*ps << 1u);
  }

  pRuntimeEnv->numOfRowsPerPage = ((*ps) - sizeof(tFilePage)) / (*rowsize);
2100
  assert(pRuntimeEnv->numOfRowsPerPage <= MAX_ROWS_PER_RESBUF_PAGE);
2101 2102
}

H
Haojun Liao 已提交
2103
#define IS_PREFILTER_TYPE(_t) ((_t) != TSDB_DATA_TYPE_BINARY && (_t) != TSDB_DATA_TYPE_NCHAR)
2104

H
Haojun Liao 已提交
2105 2106 2107 2108
static bool needToLoadDataBlock(SQueryRuntimeEnv* pRuntimeEnv, SDataStatis *pDataStatis, SQLFunctionCtx *pCtx,
    int32_t numOfRows) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
  if (pDataStatis == NULL || (pQuery->numOfFilterCols == 0 && (!pRuntimeEnv->topBotQuery))) {
2109 2110 2111 2112 2113
    return true;
  }

  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
2114

H
Haojun Liao 已提交
2115 2116 2117 2118 2119 2120 2121 2122
    int32_t index = -1;
    for(int32_t i = 0; i < pQuery->numOfCols; ++i) {
      if (pDataStatis[i].colId == pFilterInfo->info.colId) {
        index = i;
        break;
      }
    }

H
Haojun Liao 已提交
2123
    // no statistics data
H
Haojun Liao 已提交
2124
    if (index == -1) {
H
Haojun Liao 已提交
2125
      return true;
2126
    }
2127

2128
    // not support pre-filter operation on binary/nchar data type
H
Haojun Liao 已提交
2129
    if (!IS_PREFILTER_TYPE(pFilterInfo->info.type)) {
H
Haojun Liao 已提交
2130
      return true;
2131
    }
2132

2133
    // all points in current column are NULL, no need to check its boundary value
H
Haojun Liao 已提交
2134
    if (pDataStatis[index].numOfNull == numOfRows) {
2135 2136
      continue;
    }
2137

H
Haojun Liao 已提交
2138 2139 2140
    SDataStatis* pDataBlockst = &pDataStatis[index];

    if (pFilterInfo->info.type == TSDB_DATA_TYPE_FLOAT) {
S
TD-1057  
Shengliang Guan 已提交
2141 2142
      float minval = (float)(*(double *)(&pDataBlockst->min));
      float maxval = (float)(*(double *)(&pDataBlockst->max));
2143

2144 2145 2146 2147 2148 2149 2150
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&minval, (char *)&maxval)) {
          return true;
        }
      }
    } else {
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
H
Haojun Liao 已提交
2151
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&pDataBlockst->min, (char *)&pDataBlockst->max)) {
2152 2153 2154 2155 2156
          return true;
        }
      }
    }
  }
2157

H
Haojun Liao 已提交
2158 2159 2160 2161 2162 2163 2164 2165
  if (pRuntimeEnv->topBotQuery) {
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
      int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
      if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
        return topbot_datablock_filter(&pCtx[i], functionId, (char *)&pDataStatis[i].min, (char *)&pDataStatis[i].max);
      }
    }
  }
2166

H
Haojun Liao 已提交
2167
  return false;
2168 2169
}

H
Haojun Liao 已提交
2170 2171 2172 2173 2174 2175 2176 2177
static bool overlapWithTimeWindow(SQuery* pQuery, SDataBlockInfo* pBlockInfo) {
  STimeWindow w = {0};

  TSKEY sk = MIN(pQuery->window.skey, pQuery->window.ekey);
  TSKEY ek = MAX(pQuery->window.skey, pQuery->window.ekey);

  if (QUERY_IS_ASC_QUERY(pQuery)) {
    getAlignQueryTimeWindow(pQuery, pBlockInfo->window.skey, sk, ek, &w);
H
Haojun Liao 已提交
2178
    assert(w.ekey >= pBlockInfo->window.skey);
H
Haojun Liao 已提交
2179

H
Haojun Liao 已提交
2180
    if (w.ekey < pBlockInfo->window.ekey) {
H
Haojun Liao 已提交
2181 2182 2183 2184 2185
      return true;
    }

    while(1) {
      GET_NEXT_TIMEWINDOW(pQuery, &w);
H
Haojun Liao 已提交
2186
      if (w.skey > pBlockInfo->window.ekey) {
H
Haojun Liao 已提交
2187 2188 2189
        break;
      }

H
Haojun Liao 已提交
2190 2191
      assert(w.ekey > pBlockInfo->window.ekey);
      if (w.skey <= pBlockInfo->window.ekey && w.skey > pBlockInfo->window.skey) {
H
Haojun Liao 已提交
2192 2193 2194 2195 2196
        return true;
      }
    }
  } else {
    getAlignQueryTimeWindow(pQuery, pBlockInfo->window.ekey, sk, ek, &w);
H
Haojun Liao 已提交
2197 2198 2199
    assert(w.skey <= pBlockInfo->window.ekey);

    if (w.skey > pBlockInfo->window.skey) {
H
Haojun Liao 已提交
2200 2201 2202 2203 2204 2205 2206 2207 2208
      return true;
    }

    while(1) {
      GET_NEXT_TIMEWINDOW(pQuery, &w);
      if (w.ekey < pBlockInfo->window.skey) {
        break;
      }

H
Haojun Liao 已提交
2209 2210
      assert(w.skey < pBlockInfo->window.skey);
      if (w.ekey < pBlockInfo->window.ekey && w.ekey >= pBlockInfo->window.skey) {
H
Haojun Liao 已提交
2211 2212 2213 2214 2215 2216 2217 2218
        return true;
      }
    }
  }

  return false;
}

2219
int32_t loadDataBlockOnDemand(SQueryRuntimeEnv *pRuntimeEnv, void* pQueryHandle, SDataBlockInfo* pBlockInfo, SDataStatis **pStatis, SArray** pDataBlock, uint32_t* status) {
2220
  SQuery *pQuery = pRuntimeEnv->pQuery;
2221

2222
  *status = 0;
H
Haojun Liao 已提交
2223
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf > 0) {
2224
    *status = BLK_DATA_ALL_NEEDED;
H
Haojun Liao 已提交
2225
  } else { // check if this data block is required to load
H
Haojun Liao 已提交
2226

H
Haojun Liao 已提交
2227
    // Calculate all time windows that are overlapping or contain current data block.
2228
    // If current data block is contained by all possible time window, do not load current data block.
H
Haojun Liao 已提交
2229
    if (QUERY_IS_INTERVAL_QUERY(pQuery) && overlapWithTimeWindow(pQuery, pBlockInfo)) {
2230
      *status = BLK_DATA_ALL_NEEDED;
2231
    }
2232

2233
    if ((*status) != BLK_DATA_ALL_NEEDED) {
H
Haojun Liao 已提交
2234 2235 2236 2237 2238 2239
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        SSqlFuncMsg* pSqlFunc = &pQuery->pSelectExpr[i].base;

        int32_t functionId = pSqlFunc->functionId;
        int32_t colId = pSqlFunc->colInfo.colId;

2240 2241
        (*status) |= aAggs[functionId].dataReqFunc(&pRuntimeEnv->pCtx[i], pBlockInfo->window.skey, pBlockInfo->window.ekey, colId);
        if (((*status) & BLK_DATA_ALL_NEEDED) == BLK_DATA_ALL_NEEDED) {
H
Haojun Liao 已提交
2242 2243 2244
          break;
        }
      }
2245 2246
    }
  }
2247

2248
  if ((*status) == BLK_DATA_NO_NEEDED) {
H
Haojun Liao 已提交
2249 2250
    qDebug("QInfo:%p data block discard, brange:%"PRId64 "-%"PRId64", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
           pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
2251
    pRuntimeEnv->summary.discardBlocks += 1;
2252 2253 2254 2255
  } else if ((*status) == BLK_DATA_STATIS_NEEDED) {

    // this function never returns error?
    tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis);
2256

2257
    pRuntimeEnv->summary.loadBlockStatis += 1;
2258

2259
    if (*pStatis == NULL) { // data block statistics does not exist, load data block
H
Haojun Liao 已提交
2260
      *pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2261
      pRuntimeEnv->summary.totalCheckedRows += pBlockInfo->rows;
2262 2263
    }
  } else {
2264
    assert((*status) == BLK_DATA_ALL_NEEDED);
2265

2266
    // load the data block statistics to perform further filter
H
Haojun Liao 已提交
2267
    pRuntimeEnv->summary.loadBlockStatis += 1;
2268
    tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis);
2269

H
Haojun Liao 已提交
2270
    if (!needToLoadDataBlock(pRuntimeEnv, *pStatis, pRuntimeEnv->pCtx, pBlockInfo->rows)) {
2271 2272
      // current block has been discard due to filter applied
      pRuntimeEnv->summary.discardBlocks += 1;
H
Haojun Liao 已提交
2273 2274
      qDebug("QInfo:%p data block discard, brange:%"PRId64 "-%"PRId64", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
          pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
2275
      (*status) = BLK_DATA_DISCARD;
2276
    }
2277

2278
    pRuntimeEnv->summary.totalCheckedRows += pBlockInfo->rows;
H
Haojun Liao 已提交
2279
    pRuntimeEnv->summary.loadBlocks += 1;
H
Haojun Liao 已提交
2280
    *pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2281 2282 2283
    if (*pDataBlock == NULL) {
      return terrno;
    }
2284
  }
2285

H
Haojun Liao 已提交
2286
  return TSDB_CODE_SUCCESS;
2287 2288
}

H
hjxilinx 已提交
2289
int32_t binarySearchForKey(char *pValue, int num, TSKEY key, int order) {
2290
  int32_t midPos = -1;
H
Haojun Liao 已提交
2291
  int32_t numOfRows;
2292

2293 2294 2295
  if (num <= 0) {
    return -1;
  }
2296

2297
  assert(order == TSDB_ORDER_ASC || order == TSDB_ORDER_DESC);
2298 2299

  TSKEY * keyList = (TSKEY *)pValue;
2300
  int32_t firstPos = 0;
2301
  int32_t lastPos = num - 1;
2302

2303
  if (order == TSDB_ORDER_DESC) {
H
hjxilinx 已提交
2304 2305 2306 2307 2308
    // find the first position which is smaller than the key
    while (1) {
      if (key >= keyList[lastPos]) return lastPos;
      if (key == keyList[firstPos]) return firstPos;
      if (key < keyList[firstPos]) return firstPos - 1;
2309

H
Haojun Liao 已提交
2310 2311
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2312

H
hjxilinx 已提交
2313 2314 2315 2316 2317 2318 2319 2320
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
2321

H
hjxilinx 已提交
2322 2323 2324 2325 2326
  } else {
    // find the first position which is bigger than the key
    while (1) {
      if (key <= keyList[firstPos]) return firstPos;
      if (key == keyList[lastPos]) return lastPos;
2327

H
hjxilinx 已提交
2328 2329 2330 2331 2332 2333 2334
      if (key > keyList[lastPos]) {
        lastPos = lastPos + 1;
        if (lastPos >= num)
          return -1;
        else
          return lastPos;
      }
2335

H
Haojun Liao 已提交
2336 2337
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2338

H
hjxilinx 已提交
2339 2340 2341 2342 2343 2344 2345 2346 2347
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
  }
2348

H
hjxilinx 已提交
2349 2350 2351
  return midPos;
}

2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364
static void ensureOutputBufferSimple(SQueryRuntimeEnv* pRuntimeEnv, int32_t capacity) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  if (capacity < pQuery->rec.capacity) {
    return;
  }

  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    int32_t bytes = pQuery->pSelectExpr[i].bytes;
    assert(bytes > 0 && capacity > 0);

    char *tmp = realloc(pQuery->sdata[i], bytes * capacity + sizeof(tFilePage));
    if (tmp == NULL) {  // todo handle the oom
H
Haojun Liao 已提交
2365
      longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
2366 2367 2368 2369 2370 2371 2372 2373
    } else {
      pQuery->sdata[i] = (tFilePage *)tmp;
    }

    // set the pCtx output buffer position
    pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data;
  }

2374
  qDebug("QInfo:%p realloc output buffer to inc output buffer from: %" PRId64 " rows to:%d rows", GET_QINFO_ADDR(pRuntimeEnv),
2375 2376 2377 2378 2379
         pQuery->rec.capacity, capacity);

  pQuery->rec.capacity = capacity;
}

2380 2381 2382
static void ensureOutputBuffer(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pBlockInfo) {
  // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
  SQuery* pQuery = pRuntimeEnv->pQuery;
2383
  if (!QUERY_IS_INTERVAL_QUERY(pQuery) && !pRuntimeEnv->groupbyNormalCol && !isFixedOutputQuery(pRuntimeEnv) && !isTSCompQuery(pQuery)) {
2384
    SResultRec *pRec = &pQuery->rec;
2385

2386
    if (pQuery->rec.capacity - pQuery->rec.rows < pBlockInfo->rows) {
S
TD-1057  
Shengliang Guan 已提交
2387 2388
      int32_t remain = (int32_t)(pRec->capacity - pRec->rows);
      int32_t newSize = (int32_t)(pRec->capacity + (pBlockInfo->rows - remain));
2389

2390 2391
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        int32_t bytes = pQuery->pSelectExpr[i].bytes;
H
Haojun Liao 已提交
2392 2393
        assert(bytes > 0 && newSize > 0);

2394 2395
        char *tmp = realloc(pQuery->sdata[i], bytes * newSize + sizeof(tFilePage));
        if (tmp == NULL) {  // todo handle the oom
H
Haojun Liao 已提交
2396
          longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
2397
        } else {
2398
          memset(tmp + sizeof(tFilePage) + bytes * pRec->rows, 0, (size_t)((newSize - pRec->rows) * bytes));
2399 2400
          pQuery->sdata[i] = (tFilePage *)tmp;
        }
H
Haojun Liao 已提交
2401

2402 2403
        // set the pCtx output buffer position
        pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data + pRec->rows * bytes;
H
Haojun Liao 已提交
2404

2405 2406 2407 2408 2409
        int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
        if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
          pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
        }
      }
H
Haojun Liao 已提交
2410

2411
      qDebug("QInfo:%p realloc output buffer, new size: %d rows, old:%" PRId64 ", remain:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv),
2412
             newSize, pRec->capacity, newSize - pRec->rows);
2413

2414 2415 2416 2417 2418
      pRec->capacity = newSize;
    }
  }
}

2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439
static void doSetInitialTimewindow(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pBlockInfo) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  if (QUERY_IS_INTERVAL_QUERY(pQuery) && pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL) {
    STimeWindow w = TSWINDOW_INITIALIZER;
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;

    if (QUERY_IS_ASC_QUERY(pQuery)) {
      getAlignQueryTimeWindow(pQuery, pBlockInfo->window.skey, pBlockInfo->window.skey, pQuery->window.ekey, &w);
      pWindowResInfo->startTime = w.skey;
      pWindowResInfo->prevSKey = w.skey;
    } else {
      // the start position of the first time window in the endpoint that spreads beyond the queried last timestamp
      getAlignQueryTimeWindow(pQuery, pBlockInfo->window.ekey, pQuery->window.ekey, pBlockInfo->window.ekey, &w);

      pWindowResInfo->startTime = pQuery->window.skey;
      pWindowResInfo->prevSKey = w.skey;
    }
  }
}

2440 2441
static int64_t doScanAllDataBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
2442
  STableQueryInfo* pTableQueryInfo = pQuery->current;
H
Haojun Liao 已提交
2443
  SQueryCostInfo*  summary  = &pRuntimeEnv->summary;
2444

2445
  qDebug("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", lastkey:%" PRId64 ", order:%d",
H
hjxilinx 已提交
2446 2447
         GET_QINFO_ADDR(pRuntimeEnv), pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, pTableQueryInfo->lastKey,
         pQuery->order.order);
2448

2449
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
H
Haojun Liao 已提交
2450
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
2451

H
Haojun Liao 已提交
2452
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
2453
  while (tsdbNextDataBlock(pQueryHandle)) {
H
Haojun Liao 已提交
2454
    summary->totalBlocks += 1;
H
Haojun Liao 已提交
2455

H
Haojun Liao 已提交
2456
    if (IS_QUERY_KILLED(GET_QINFO_ADDR(pRuntimeEnv))) {
H
Haojun Liao 已提交
2457
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
2458
    }
2459

H
Haojun Liao 已提交
2460
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
2461
    doSetInitialTimewindow(pRuntimeEnv, &blockInfo);
2462

H
hjxilinx 已提交
2463
    // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
2464
    ensureOutputBuffer(pRuntimeEnv, &blockInfo);
2465

2466
    SDataStatis *pStatis = NULL;
2467 2468
    SArray *     pDataBlock = NULL;
    uint32_t     status = 0;
2469

2470 2471
    int32_t ret = loadDataBlockOnDemand(pRuntimeEnv, pQueryHandle, &blockInfo, &pStatis, &pDataBlock, &status);
    if (ret != TSDB_CODE_SUCCESS) {
2472 2473 2474
      break;
    }

2475 2476 2477 2478 2479 2480
    if (status == BLK_DATA_DISCARD) {
      pQuery->current->lastKey =
              QUERY_IS_ASC_QUERY(pQuery) ? blockInfo.window.ekey + step : blockInfo.window.skey + step;
      continue;
    }

H
Haojun Liao 已提交
2481 2482
    // query start position can not move into tableApplyFunctionsOnBlock due to limit/offset condition
    pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : blockInfo.rows - 1;
H
hjxilinx 已提交
2483
    int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, binarySearchForKey, pDataBlock);
2484

H
Haojun Liao 已提交
2485
    summary->totalRows += blockInfo.rows;
2486
    qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
2487
           blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes, pQuery->current->lastKey);
2488

2489 2490
    // while the output buffer is full or limit/offset is applied, query may be paused here
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL | QUERY_COMPLETED)) {
H
hjxilinx 已提交
2491
      break;
2492 2493
    }
  }
2494

H
Haojun Liao 已提交
2495 2496 2497 2498
  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }

H
hjxilinx 已提交
2499
  // if the result buffer is not full, set the query complete
2500 2501 2502
  if (!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
2503

H
Haojun Liao 已提交
2504
  if (QUERY_IS_INTERVAL_QUERY(pQuery) && IS_MASTER_SCAN(pRuntimeEnv)) {
H
hjxilinx 已提交
2505
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
2506
      closeAllTimeWindow(&pRuntimeEnv->windowResInfo);
H
hjxilinx 已提交
2507
      pRuntimeEnv->windowResInfo.curIndex = pRuntimeEnv->windowResInfo.size - 1;  // point to the last time window
2508 2509 2510 2511
    } else {
      assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
    }
  }
2512

2513
  return 0;
2514 2515 2516 2517 2518 2519
}

/*
 * set tag value in SQLFunctionCtx
 * e.g.,tag information into input buffer
 */
2520
static void doSetTagValueInParam(void *tsdb, void* pTable, int32_t tagColId, tVariant *tag, int16_t type, int16_t bytes) {
H
[td-90]  
Haojun Liao 已提交
2521
  tVariantDestroy(tag);
2522

2523
  if (tagColId == TSDB_TBNAME_COLUMN_INDEX) {
2524
    char* val = tsdbGetTableName(pTable);
H
[td-90]  
Haojun Liao 已提交
2525
    assert(val != NULL);
2526

H
[td-90]  
Haojun Liao 已提交
2527
    tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), TSDB_DATA_TYPE_BINARY);
2528
  } else {
2529
    char* val = tsdbGetTableTagVal(pTable, tagColId, type, bytes);
H
[td-90]  
Haojun Liao 已提交
2530 2531 2532 2533
    if (val == NULL) {
      tag->nType = TSDB_DATA_TYPE_NULL;
      return;
    }
2534

H
hjxilinx 已提交
2535
    if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
H
Hongze Cheng 已提交
2536
      if (isNull(val, type)) {
H
Haojun Liao 已提交
2537 2538 2539 2540
        tag->nType = TSDB_DATA_TYPE_NULL;
        return;
      }

H
[td-90]  
Haojun Liao 已提交
2541
      tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), type);
H
hjxilinx 已提交
2542
    } else {
H
Haojun Liao 已提交
2543 2544 2545 2546 2547
      if (isNull(val, type)) {
        tag->nType = TSDB_DATA_TYPE_NULL;
        return;
      }

H
[td-90]  
Haojun Liao 已提交
2548
      tVariantCreateFromBinary(tag, val, bytes, type);
H
hjxilinx 已提交
2549
    }
2550
  }
2551 2552
}

2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564
static SColumnInfo* doGetTagColumnInfoById(SColumnInfo* pTagColList, int32_t numOfTags, int16_t colId) {
  assert(pTagColList != NULL && numOfTags > 0);

  for(int32_t i = 0; i < numOfTags; ++i) {
    if (pTagColList[i].colId == colId) {
      return &pTagColList[i];
    }
  }

  return NULL;
}

2565
void setTagVal(SQueryRuntimeEnv *pRuntimeEnv, void *pTable, void *tsdb) {
2566
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
2567
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
2568

H
[td-90]  
Haojun Liao 已提交
2569 2570 2571
  SExprInfo *pExprInfo = &pQuery->pSelectExpr[0];
  if (pQuery->numOfOutput == 1 && pExprInfo->base.functionId == TSDB_FUNC_TS_COMP) {
    assert(pExprInfo->base.numOfParams == 1);
H
Haojun Liao 已提交
2572

S
TD-1057  
Shengliang Guan 已提交
2573
    int16_t tagColId = (int16_t)pExprInfo->base.arg->argValue.i64;
2574
    SColumnInfo* pColInfo = doGetTagColumnInfoById(pQuery->tagColList, pQuery->numOfTags, tagColId);
H
Haojun Liao 已提交
2575

2576
    doSetTagValueInParam(tsdb, pTable, tagColId, &pRuntimeEnv->pCtx[0].tag, pColInfo->type, pColInfo->bytes);
2577 2578
  } else {
    // set tag value, by which the results are aggregated.
2579
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
H
Haojun Liao 已提交
2580
      SExprInfo* pLocalExprInfo = &pQuery->pSelectExpr[idx];
2581

2582
      // ts_comp column required the tag value for join filter
H
Haojun Liao 已提交
2583
      if (!TSDB_COL_IS_TAG(pLocalExprInfo->base.colInfo.flag)) {
2584 2585
        continue;
      }
2586

2587
      // todo use tag column index to optimize performance
2588
      doSetTagValueInParam(tsdb, pTable, pLocalExprInfo->base.colInfo.colId, &pRuntimeEnv->pCtx[idx].tag,
H
Haojun Liao 已提交
2589
                           pLocalExprInfo->type, pLocalExprInfo->bytes);
2590
    }
2591

2592
    // set the join tag for first column
H
[td-90]  
Haojun Liao 已提交
2593
    SSqlFuncMsg *pFuncMsg = &pExprInfo->base;
2594 2595
    if ((pFuncMsg->functionId == TSDB_FUNC_TS || pFuncMsg->functionId == TSDB_FUNC_PRJ) && pRuntimeEnv->pTSBuf != NULL &&
        pFuncMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
2596
      assert(pFuncMsg->numOfParams == 1);
H
Haojun Liao 已提交
2597

S
TD-1057  
Shengliang Guan 已提交
2598
      int16_t tagColId = (int16_t)pExprInfo->base.arg->argValue.i64;
2599
      SColumnInfo* pColInfo = doGetTagColumnInfoById(pQuery->tagColList, pQuery->numOfTags, tagColId);
H
Haojun Liao 已提交
2600

2601
      doSetTagValueInParam(tsdb, pTable, tagColId, &pRuntimeEnv->pCtx[0].tag, pColInfo->type, pColInfo->bytes);
2602
      qDebug("QInfo:%p set tag value for join comparison, colId:%" PRId64 ", val:%"PRId64, pQInfo, pExprInfo->base.arg->argValue.i64,
B
Bomin Zhang 已提交
2603
          pRuntimeEnv->pCtx[0].tag.i64Key)
2604 2605 2606 2607 2608 2609 2610
    }
  }
}

static void doMerge(SQueryRuntimeEnv *pRuntimeEnv, int64_t timestamp, SWindowResult *pWindowRes, bool mergeFlag) {
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
2611

H
Haojun Liao 已提交
2612 2613
  tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes->pos.pageId);

2614
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2615
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2616 2617 2618
    if (!mergeFlag) {
      pCtx[i].aOutputBuf = pCtx[i].aOutputBuf + pCtx[i].outputBytes;
      pCtx[i].currentStage = FIRST_STAGE_MERGE;
2619

2620
      RESET_RESULT_INFO(pCtx[i].resultInfo);
2621 2622
      aAggs[functionId].init(&pCtx[i]);
    }
2623

2624 2625
    pCtx[i].hasNull = true;
    pCtx[i].nStartQueryTimestamp = timestamp;
H
Haojun Liao 已提交
2626
    pCtx[i].aInputElemBuf = getPosInResultPage(pRuntimeEnv, i, pWindowRes, page);
2627

2628 2629 2630
    // in case of tag column, the tag information should be extracted from input buffer
    if (functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TAG) {
      tVariantDestroy(&pCtx[i].tag);
2631

2632 2633 2634 2635 2636 2637
      int32_t type = pCtx[i].outputType;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        tVariantCreateFromBinary(&pCtx[i].tag, varDataVal(pCtx[i].aInputElemBuf), varDataLen(pCtx[i].aInputElemBuf), type);
      } else {
        tVariantCreateFromBinary(&pCtx[i].tag, pCtx[i].aInputElemBuf, pCtx[i].inputBytes, pCtx[i].inputType);
      }
2638

2639 2640
    }
  }
2641

2642
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2643
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2644 2645 2646
    if (functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
2647

2648 2649 2650 2651
    aAggs[functionId].distMergeFunc(&pCtx[i]);
  }
}

2652
static UNUSED_FUNC void printBinaryData(int32_t functionId, char *data, int32_t srcDataType) {
2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_LAST_DST) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_BINARY:
        printf("%" PRId64 ",%s\t", *(TSKEY *)data, (data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int8_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int16_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 ",%" PRId64 "\t", *(TSKEY *)data, *(TSKEY *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int32_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%" PRId64 ",%f\t", *(TSKEY *)data, *(float *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%" PRId64 ",%lf\t", *(TSKEY *)data, *(double *)(data + TSDB_KEYSIZE + 1));
        break;
    }
  } else if (functionId == TSDB_FUNC_AVG) {
    printf("%lf,%d\t", *(double *)data, *(int32_t *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_SPREAD) {
    printf("%lf,%lf\t", *(double *)data, *(double *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_TWA) {
    data += 1;
    printf("%lf,%" PRId64 ",%" PRId64 ",%" PRId64 "\t", *(double *)data, *(int64_t *)(data + 8),
           *(int64_t *)(data + 16), *(int64_t *)(data + 24));
  } else if (functionId == TSDB_FUNC_MIN || functionId == TSDB_FUNC_MAX) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%d\t", *(int8_t *)data);
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%d\t", *(int16_t *)data);
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 "\t", *(int64_t *)data);
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%d\t", *(int *)data);
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%f\t", *(float *)data);
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%f\t", *(float *)data);
        break;
    }
  } else if (functionId == TSDB_FUNC_SUM) {
    if (srcDataType == TSDB_DATA_TYPE_FLOAT || srcDataType == TSDB_DATA_TYPE_DOUBLE) {
      printf("%lf\t", *(float *)data);
    } else {
      printf("%" PRId64 "\t", *(int64_t *)data);
    }
  } else {
    printf("%s\t", data);
  }
}

2721
void UNUSED_FUNC displayInterResult(tFilePage **pdata, SQueryRuntimeEnv* pRuntimeEnv, int32_t numOfRows) {
2722
  SQuery* pQuery = pRuntimeEnv->pQuery;
2723
  int32_t numOfCols = pQuery->numOfOutput;
2724
  printf("super table query intermediate result, total:%d\n", numOfRows);
2725

2726 2727
  for (int32_t j = 0; j < numOfRows; ++j) {
    for (int32_t i = 0; i < numOfCols; ++i) {
2728

2729
      switch (pQuery->pSelectExpr[i].type) {
2730
        case TSDB_DATA_TYPE_BINARY: {
2731
          int32_t type = pQuery->pSelectExpr[i].type;
2732
          printBinaryData(pQuery->pSelectExpr[i].base.functionId, pdata[i]->data + pQuery->pSelectExpr[i].bytes * j,
2733 2734 2735 2736 2737
                          type);
          break;
        }
        case TSDB_DATA_TYPE_TIMESTAMP:
        case TSDB_DATA_TYPE_BIGINT:
2738
          printf("%" PRId64 "\t", *(int64_t *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2739 2740
          break;
        case TSDB_DATA_TYPE_INT:
2741
          printf("%d\t", *(int32_t *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2742 2743
          break;
        case TSDB_DATA_TYPE_FLOAT:
2744
          printf("%f\t", *(float *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2745 2746
          break;
        case TSDB_DATA_TYPE_DOUBLE:
2747
          printf("%lf\t", *(double *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2748 2749 2750 2751 2752 2753 2754 2755
          break;
      }
    }
    printf("\n");
  }
}

typedef struct SCompSupporter {
H
hjxilinx 已提交
2756 2757 2758
  STableQueryInfo **pTableQueryInfo;
  int32_t *         position;
  SQInfo *          pQInfo;
2759 2760 2761 2762 2763
} SCompSupporter;

int32_t tableResultComparFn(const void *pLeft, const void *pRight, void *param) {
  int32_t left = *(int32_t *)pLeft;
  int32_t right = *(int32_t *)pRight;
2764

2765 2766
  SCompSupporter *  supporter = (SCompSupporter *)param;
  SQueryRuntimeEnv *pRuntimeEnv = &supporter->pQInfo->runtimeEnv;
2767

2768 2769
  int32_t leftPos = supporter->position[left];
  int32_t rightPos = supporter->position[right];
2770

2771 2772 2773 2774
  /* left source is exhausted */
  if (leftPos == -1) {
    return 1;
  }
2775

2776 2777 2778 2779
  /* right source is exhausted*/
  if (rightPos == -1) {
    return -1;
  }
2780

H
hjxilinx 已提交
2781
  SWindowResInfo *pWindowResInfo1 = &supporter->pTableQueryInfo[left]->windowResInfo;
2782
  SWindowResult * pWindowRes1 = getWindowResult(pWindowResInfo1, leftPos);
H
Haojun Liao 已提交
2783
  tFilePage *page1 = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes1->pos.pageId);
2784

H
Haojun Liao 已提交
2785
  char *b1 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes1, page1);
2786
  TSKEY leftTimestamp = GET_INT64_VAL(b1);
2787

H
hjxilinx 已提交
2788
  SWindowResInfo *pWindowResInfo2 = &supporter->pTableQueryInfo[right]->windowResInfo;
2789
  SWindowResult * pWindowRes2 = getWindowResult(pWindowResInfo2, rightPos);
H
Haojun Liao 已提交
2790
  tFilePage *page2 = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes2->pos.pageId);
2791

H
Haojun Liao 已提交
2792
  char *b2 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes2, page2);
2793
  TSKEY rightTimestamp = GET_INT64_VAL(b2);
2794

2795 2796 2797
  if (leftTimestamp == rightTimestamp) {
    return 0;
  }
2798

2799 2800 2801
  return leftTimestamp > rightTimestamp ? 1 : -1;
}

2802
int32_t mergeIntoGroupResult(SQInfo *pQInfo) {
H
Haojun Liao 已提交
2803
  int64_t st = taosGetTimestampUs();
2804
  int32_t ret = TSDB_CODE_SUCCESS;
2805

S
TD-1057  
Shengliang Guan 已提交
2806
  int32_t numOfGroups = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
2807

2808
  while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
2809
    SArray *group = GET_TABLEGROUP(pQInfo, pQInfo->groupIndex);
2810
    ret = mergeIntoGroupResultImpl(pQInfo, group);
2811 2812 2813 2814
    if (ret < 0) {  // not enough disk space to save the data into disk
      return -1;
    }

2815
    pQInfo->groupIndex += 1;
2816 2817

    // this group generates at least one result, return results
2818 2819 2820
    if (ret > 0) {
      break;
    }
2821

H
Haojun Liao 已提交
2822
    assert(pQInfo->groupResInfo.numOfDataPages == 0);
2823
    qDebug("QInfo:%p no result in group %d, continue", pQInfo, pQInfo->groupIndex - 1);
2824
  }
2825

H
Haojun Liao 已提交
2826 2827
  SGroupResInfo* info = &pQInfo->groupResInfo;
  if (pQInfo->groupIndex == numOfGroups && info->pos.pageId == info->numOfDataPages) {
H
Haojun Liao 已提交
2828 2829 2830
    SET_STABLE_QUERY_OVER(pQInfo);
  }

H
Haojun Liao 已提交
2831 2832 2833
  int64_t elapsedTime = taosGetTimestampUs() - st;
  qDebug("QInfo:%p merge res data into group, index:%d, total group:%d, elapsed time:%" PRId64 "us", pQInfo,
         pQInfo->groupIndex - 1, numOfGroups, elapsedTime);
2834

H
Haojun Liao 已提交
2835
  pQInfo->runtimeEnv.summary.firstStageMergeTime += elapsedTime;
2836 2837 2838 2839
  return TSDB_CODE_SUCCESS;
}

void copyResToQueryResultBuf(SQInfo *pQInfo, SQuery *pQuery) {
H
Haojun Liao 已提交
2840 2841 2842 2843 2844
  SGroupResInfo* pGroupResInfo = &pQInfo->groupResInfo;

  // all results have been return to client, try next group
  if (pGroupResInfo->pos.pageId == pGroupResInfo->numOfDataPages) {
    pGroupResInfo->numOfDataPages = 0;
H
Haojun Liao 已提交
2845
    pGroupResInfo->pos.pageId = 0;
H
Haojun Liao 已提交
2846
    pGroupResInfo->pos.rowId = 0;
2847

2848
    // current results of group has been sent to client, try next group
2849
    if (mergeIntoGroupResult(pQInfo) != TSDB_CODE_SUCCESS) {
2850 2851
      return;  // failed to save data in the disk
    }
2852

2853
    // check if all results has been sent to client
S
TD-1057  
Shengliang Guan 已提交
2854
    int32_t numOfGroup = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
H
Haojun Liao 已提交
2855
    if (pGroupResInfo->numOfDataPages == 0 && pQInfo->groupIndex == numOfGroup) {
H
Haojun Liao 已提交
2856
      SET_STABLE_QUERY_OVER(pQInfo);
2857 2858
      return;
    }
2859
  }
2860 2861

  SQueryRuntimeEnv *   pRuntimeEnv = &pQInfo->runtimeEnv;
2862
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
2863

H
Haojun Liao 已提交
2864 2865
  int32_t id = pQInfo->groupResInfo.groupId;
  SIDList list = getDataBufPagesIdList(pResultBuf, id);
2866

2867
  int32_t offset = 0;
H
Haojun Liao 已提交
2868 2869 2870 2871 2872 2873 2874
  int32_t numOfCopiedRows = 0;

  size_t size = taosArrayGetSize(list);
  assert(size == pGroupResInfo->numOfDataPages);

  bool done = false;
  for (int32_t j = pGroupResInfo->pos.pageId; j < size; ++j) {
H
Haojun Liao 已提交
2875
    SPageInfo* pi = *(SPageInfo**) taosArrayGet(list, j);
H
Haojun Liao 已提交
2876 2877 2878
    tFilePage* pData = getResBufPage(pResultBuf, pi->pageId);

    assert(pData->num > 0 && pData->num <= pRuntimeEnv->numOfRowsPerPage && pGroupResInfo->pos.rowId < pData->num);
2879
    int32_t numOfRes = (int32_t)(pData->num - pGroupResInfo->pos.rowId);
H
Haojun Liao 已提交
2880 2881

    if (numOfRes > pQuery->rec.capacity - offset) {
2882
      numOfCopiedRows = (int32_t)(pQuery->rec.capacity - offset);
H
Haojun Liao 已提交
2883 2884 2885
      pGroupResInfo->pos.rowId += numOfCopiedRows;
      done = true;
    } else {
2886
      numOfCopiedRows = (int32_t)pData->num;
H
Haojun Liao 已提交
2887 2888 2889 2890

      pGroupResInfo->pos.pageId += 1;
      pGroupResInfo->pos.rowId = 0;
    }
2891

2892
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2893
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
2894
      char *  pDest = pQuery->sdata[i]->data;
2895

H
Haojun Liao 已提交
2896 2897
      memcpy(pDest + offset * bytes, pData->data + pRuntimeEnv->offset[i] * pRuntimeEnv->numOfRowsPerPage,
          (size_t)bytes * numOfCopiedRows);
2898
    }
2899

H
Haojun Liao 已提交
2900 2901 2902 2903
    offset += numOfCopiedRows;
    if (done) {
      break;
    }
2904
  }
2905

2906
  assert(pQuery->rec.rows == 0);
H
Haojun Liao 已提交
2907
  pQuery->rec.rows += offset;
2908 2909
}

H
Haojun Liao 已提交
2910
int64_t getNumOfResultWindowRes(SQuery *pQuery, SWindowResult *pWindowRes) {
2911
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
2912
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
2913

2914 2915 2916 2917 2918 2919 2920
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
      continue;
    }
2921

2922
    SResultInfo *pResultInfo = &pWindowRes->resultInfo[j];
H
Haojun Liao 已提交
2923
    assert(pResultInfo != NULL);
2924

H
Haojun Liao 已提交
2925 2926
    if (pResultInfo->numOfRes > 0) {
      return pResultInfo->numOfRes;
2927 2928
    }
  }
2929

H
Haojun Liao 已提交
2930
  return 0;
2931 2932
}

2933
int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *pGroup) {
2934
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
2935
  SQuery *          pQuery = pRuntimeEnv->pQuery;
2936

2937
  size_t size = taosArrayGetSize(pGroup);
2938
  tFilePage **buffer = pQuery->sdata;
2939

H
Haojun Liao 已提交
2940
  int32_t *posList = calloc(size, sizeof(int32_t));
H
hjxilinx 已提交
2941
  STableQueryInfo **pTableList = malloc(POINTER_BYTES * size);
2942

2943
  if (pTableList == NULL || posList == NULL) {
S
Shengliang Guan 已提交
2944 2945
    taosTFree(posList);
    taosTFree(pTableList);
2946 2947

    qError("QInfo:%p failed alloc memory", pQInfo);
H
Haojun Liao 已提交
2948
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
2949 2950
  }

2951
  // todo opt for the case of one table per group
2952
  int32_t numOfTables = 0;
H
Haojun Liao 已提交
2953 2954 2955
  SIDList pageList = NULL;
  int32_t tid = -1;

2956
  for (int32_t i = 0; i < size; ++i) {
2957
    STableQueryInfo *item = taosArrayGetP(pGroup, i);
2958

H
Haojun Liao 已提交
2959
    SIDList list = getDataBufPagesIdList(pRuntimeEnv->pResultBuf, TSDB_TABLEID(item->pTable)->tid);
H
Haojun Liao 已提交
2960 2961
    pageList = list;
    tid = TSDB_TABLEID(item->pTable)->tid;
H
Haojun Liao 已提交
2962

H
Haojun Liao 已提交
2963
    if (taosArrayGetSize(list) > 0 && item->windowResInfo.size > 0) {
H
Haojun Liao 已提交
2964
      pTableList[numOfTables++] = item;
2965 2966
    }
  }
2967

H
Haojun Liao 已提交
2968
  // there is no data in current group
2969
  if (numOfTables == 0) {
S
Shengliang Guan 已提交
2970 2971
    taosTFree(posList);
    taosTFree(pTableList);
2972
    return 0;
H
Haojun Liao 已提交
2973
  } else if (numOfTables == 1) { // no need to merge results since only one table in each group
H
Haojun Liao 已提交
2974 2975 2976 2977 2978
    taosTFree(posList);
    taosTFree(pTableList);

    SGroupResInfo* pGroupResInfo = &pQInfo->groupResInfo;

2979
    pGroupResInfo->numOfDataPages = (int32_t)taosArrayGetSize(pageList);
H
Haojun Liao 已提交
2980 2981 2982 2983 2984
    pGroupResInfo->groupId = tid;
    pGroupResInfo->pos.pageId = 0;
    pGroupResInfo->pos.rowId = 0;

    return pGroupResInfo->numOfDataPages;
2985
  }
2986

2987
  SCompSupporter cs = {pTableList, posList, pQInfo};
2988

2989
  SLoserTreeInfo *pTree = NULL;
2990
  tLoserTreeCreate(&pTree, numOfTables, &cs, tableResultComparFn);
2991

2992
  SResultInfo *pResultInfo = calloc(pQuery->numOfOutput, sizeof(SResultInfo));
H
Haojun Liao 已提交
2993 2994 2995 2996
  if (pResultInfo == NULL) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }

H
Haojun Liao 已提交
2997
  char* buf = calloc(1, pRuntimeEnv->interBufSize);
H
Haojun Liao 已提交
2998 2999 3000 3001
  if (buf == NULL) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }

H
Haojun Liao 已提交
3002
  setWindowResultInfo(pResultInfo, pQuery, pRuntimeEnv->stableQuery, buf);
3003
  resetMergeResultBuf(pQuery, pRuntimeEnv->pCtx, pResultInfo);
3004

H
Haojun Liao 已提交
3005 3006
  pQInfo->groupResInfo.groupId = getGroupResultId(pQInfo->groupIndex);

H
Haojun Liao 已提交
3007
  // todo add windowRes iterator
3008 3009
  int64_t lastTimestamp = -1;
  int64_t startt = taosGetTimestampMs();
3010

3011
  while (1) {
3012 3013
    if (IS_QUERY_KILLED(pQInfo)) {
      qDebug("QInfo:%p it is already killed, abort", pQInfo);
H
Haojun Liao 已提交
3014 3015 3016 3017 3018 3019 3020

      taosTFree(pTableList);
      taosTFree(posList);
      taosTFree(pTree);
      taosTFree(pResultInfo);
      taosTFree(buf);

3021 3022 3023
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
    }

3024
    int32_t pos = pTree->pNode[0].index;
3025

H
hjxilinx 已提交
3026
    SWindowResInfo *pWindowResInfo = &pTableList[pos]->windowResInfo;
H
Haojun Liao 已提交
3027
    SWindowResult  *pWindowRes = getWindowResult(pWindowResInfo, cs.position[pos]);
H
Haojun Liao 已提交
3028
    tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes->pos.pageId);
3029

H
Haojun Liao 已提交
3030
    char *b = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes, page);
3031
    TSKEY ts = GET_INT64_VAL(b);
3032

3033
    assert(ts == pWindowRes->win.skey);
H
Haojun Liao 已提交
3034
    int64_t num = getNumOfResultWindowRes(pQuery, pWindowRes);
3035 3036
    if (num <= 0) {
      cs.position[pos] += 1;
3037

3038 3039
      if (cs.position[pos] >= pWindowResInfo->size) {
        cs.position[pos] = -1;
3040

3041
        // all input sources are exhausted
3042
        if (--numOfTables == 0) {
3043 3044 3045 3046 3047 3048 3049
          break;
        }
      }
    } else {
      if (ts == lastTimestamp) {  // merge with the last one
        doMerge(pRuntimeEnv, ts, pWindowRes, true);
      } else {  // copy data to disk buffer
3050
        if (buffer[0]->num == pQuery->rec.capacity) {
H
Haojun Liao 已提交
3051
          if (flushFromResultBuf(pRuntimeEnv, &pQInfo->groupResInfo) != TSDB_CODE_SUCCESS) {
3052 3053
            return -1;
          }
3054

3055 3056
          resetMergeResultBuf(pQuery, pRuntimeEnv->pCtx, pResultInfo);
        }
3057

3058
        doMerge(pRuntimeEnv, ts, pWindowRes, false);
3059
        buffer[0]->num += 1;
3060
      }
3061

3062
      lastTimestamp = ts;
3063

H
Haojun Liao 已提交
3064 3065 3066
      // move to the next element of current entry
      int32_t currentPageId = pWindowRes->pos.pageId;

3067 3068 3069
      cs.position[pos] += 1;
      if (cs.position[pos] >= pWindowResInfo->size) {
        cs.position[pos] = -1;
3070

3071
        // all input sources are exhausted
3072
        if (--numOfTables == 0) {
3073 3074
          break;
        }
H
Haojun Liao 已提交
3075 3076 3077 3078 3079 3080
      } else {
        // current page is not needed anymore
        SWindowResult  *pNextWindowRes = getWindowResult(pWindowResInfo, cs.position[pos]);
        if (pNextWindowRes->pos.pageId != currentPageId) {
          releaseResBufPage(pRuntimeEnv->pResultBuf, page);
        }
3081 3082
      }
    }
3083

3084 3085
    tLoserTreeAdjust(pTree, pos + pTree->numOfEntries);
  }
3086

3087
  if (buffer[0]->num != 0) {  // there are data in buffer
H
Haojun Liao 已提交
3088
    if (flushFromResultBuf(pRuntimeEnv, &pQInfo->groupResInfo) != TSDB_CODE_SUCCESS) {
S
slguan 已提交
3089
      qError("QInfo:%p failed to flush data into temp file, abort query", pQInfo);
3090

S
Shengliang Guan 已提交
3091 3092 3093 3094
      taosTFree(pTree);
      taosTFree(pTableList);
      taosTFree(posList);
      taosTFree(pResultInfo);
3095

3096 3097 3098
      return -1;
    }
  }
3099

3100 3101 3102
  int64_t endt = taosGetTimestampMs();

#ifdef _DEBUG_VIEW
3103
  displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
3104
#endif
3105

3106
  qDebug("QInfo:%p result merge completed for group:%d, elapsed time:%" PRId64 " ms", pQInfo, pQInfo->groupIndex, endt - startt);
3107

S
Shengliang Guan 已提交
3108 3109 3110
  taosTFree(pTableList);
  taosTFree(posList);
  taosTFree(pTree);
3111

S
Shengliang Guan 已提交
3112 3113
  taosTFree(pResultInfo);
  taosTFree(buf);
H
Haojun Liao 已提交
3114 3115

  return pQInfo->groupResInfo.numOfDataPages;
3116 3117
}

H
Haojun Liao 已提交
3118 3119
int32_t flushFromResultBuf(SQueryRuntimeEnv* pRuntimeEnv, SGroupResInfo* pGroupResInfo) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3120

3121
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
3122

3123 3124
  // the base value for group result, since the maximum number of table for each vnode will not exceed 100,000.
  int32_t pageId = -1;
3125
  int32_t capacity = pResultBuf->numOfRowsPerPage;
3126

H
Haojun Liao 已提交
3127
  int32_t remain = (int32_t) pQuery->sdata[0]->num;
3128
  int32_t offset = 0;
3129

3130
  while (remain > 0) {
H
Haojun Liao 已提交
3131 3132
    int32_t rows = (remain > capacity)? capacity:remain;
    assert(rows > 0);
3133

H
Haojun Liao 已提交
3134 3135 3136
    // get the output buffer page
    tFilePage *buf = getNewDataBuf(pResultBuf, pGroupResInfo->groupId, &pageId);
    buf->num = rows;
3137

H
Haojun Liao 已提交
3138
    // pagewisely copy to dest buffer
3139
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3140
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
3141

H
Haojun Liao 已提交
3142 3143
      char* output = buf->data + pRuntimeEnv->offset[i] * pRuntimeEnv->numOfRowsPerPage;
      char* src = ((char *) pQuery->sdata[i]->data) + offset * bytes;
3144
      memcpy(output, src, (size_t)(buf->num * bytes));
3145
    }
3146

H
Haojun Liao 已提交
3147 3148 3149 3150
    offset += rows;
    remain -= rows;

    pGroupResInfo->numOfDataPages += 1;
3151
  }
3152

3153 3154 3155 3156
  return TSDB_CODE_SUCCESS;
}

void resetMergeResultBuf(SQuery *pQuery, SQLFunctionCtx *pCtx, SResultInfo *pResultInfo) {
3157
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
3158
    pCtx[k].aOutputBuf = pQuery->sdata[k]->data - pCtx[k].outputBytes;
3159 3160 3161
    pCtx[k].size = 1;
    pCtx[k].startOffset = 0;
    pCtx[k].resultInfo = &pResultInfo[k];
3162

3163
    pQuery->sdata[k]->num = 0;
3164 3165 3166
  }
}

3167 3168 3169 3170
static void updateTableQueryInfoForReverseScan(SQuery *pQuery, STableQueryInfo *pTableQueryInfo) {
  if (pTableQueryInfo == NULL) {
    return;
  }
3171

H
Haojun Liao 已提交
3172
  // order has changed already
3173
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
3174

H
Haojun Liao 已提交
3175 3176 3177 3178 3179 3180
  // TODO validate the assertion
//  if (!QUERY_IS_ASC_QUERY(pQuery)) {
//    assert(pTableQueryInfo->win.ekey >= pTableQueryInfo->lastKey + step);
//  } else {
//    assert(pTableQueryInfo->win.ekey <= pTableQueryInfo->lastKey + step);
//  }
H
Haojun Liao 已提交
3181 3182 3183 3184 3185 3186 3187

  if (pTableQueryInfo->lastKey == pTableQueryInfo->win.skey) {
    // do nothing, no results
  } else {
    pTableQueryInfo->win.ekey = pTableQueryInfo->lastKey + step;
  }

3188 3189
  SWAP(pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, TSKEY);
  pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
3190

3191 3192
  SWITCH_ORDER(pTableQueryInfo->cur.order);
  pTableQueryInfo->cur.vgroupIndex = -1;
H
Haojun Liao 已提交
3193 3194 3195

  // set the index at the end of time window
  pTableQueryInfo->windowResInfo.curIndex = pTableQueryInfo->windowResInfo.size - 1;
3196 3197 3198 3199
}

static void disableFuncInReverseScanImpl(SQInfo* pQInfo, SWindowResInfo *pWindowResInfo, int32_t order) {
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
3200

3201
  for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
3202 3203
    bool closed = getTimeWindowResStatus(pWindowResInfo, i);
    if (!closed) {
3204 3205
      continue;
    }
3206

3207
    SWindowResult *buf = getWindowResult(pWindowResInfo, i);
3208

3209
    // open/close the specified query for each group result
3210
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3211
      int32_t functId = pQuery->pSelectExpr[j].base.functionId;
3212

3213 3214
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
3215 3216 3217 3218 3219 3220 3221 3222
        buf->resultInfo[j].complete = false;
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
        buf->resultInfo[j].complete = true;
      }
    }
  }
}

3223 3224
void disableFuncInReverseScan(SQInfo *pQInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
3225
  SQuery *pQuery = pRuntimeEnv->pQuery;
3226
  int32_t order = pQuery->order.order;
3227

3228 3229
  // group by normal columns and interval query on normal table
  SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
3230
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3231
    disableFuncInReverseScanImpl(pQInfo, pWindowResInfo, order);
3232
  } else {  // for simple result of table query,
3233
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {  // todo refactor
3234
      int32_t functId = pQuery->pSelectExpr[j].base.functionId;
3235

3236
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[j];
3237 3238 3239
      if (pCtx->resultInfo == NULL) {
        continue; // resultInfo is NULL, means no data checked in previous scan
      }
3240

3241 3242
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
3243 3244 3245 3246 3247 3248
        pCtx->resultInfo->complete = false;
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
        pCtx->resultInfo->complete = true;
      }
    }
  }
H
Haojun Liao 已提交
3249 3250 3251 3252
}

static void setupQueryRangeForReverseScan(SQInfo* pQInfo) {
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
S
Shengliang Guan 已提交
3253
  int32_t numOfGroups = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
H
Haojun Liao 已提交
3254

H
hjxilinx 已提交
3255
  for(int32_t i = 0; i < numOfGroups; ++i) {
H
Haojun Liao 已提交
3256
    SArray *group = GET_TABLEGROUP(pQInfo, i);
H
Haojun Liao 已提交
3257 3258
    SArray *tableKeyGroup = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, i);

H
hjxilinx 已提交
3259 3260
    size_t t = taosArrayGetSize(group);
    for (int32_t j = 0; j < t; ++j) {
3261 3262
      STableQueryInfo *pCheckInfo = taosArrayGetP(group, j);
      updateTableQueryInfoForReverseScan(pQuery, pCheckInfo);
H
Haojun Liao 已提交
3263

H
Haojun Liao 已提交
3264 3265
      // update the last key in tableKeyInfo list, the tableKeyInfo is used to build the tsdbQueryHandle and decide
      // the start check timestamp of tsdbQueryHandle
H
Haojun Liao 已提交
3266 3267 3268 3269
      STableKeyInfo *pTableKeyInfo = taosArrayGet(tableKeyGroup, j);
      pTableKeyInfo->lastKey = pCheckInfo->lastKey;

      assert(pCheckInfo->pTable == pTableKeyInfo->pTable);
H
hjxilinx 已提交
3270 3271
    }
  }
3272 3273
}

3274
void switchCtxOrder(SQueryRuntimeEnv *pRuntimeEnv) {
3275
  SQuery *pQuery = pRuntimeEnv->pQuery;
3276
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
3277
    SWITCH_ORDER(pRuntimeEnv->pCtx[i].order);
3278 3279 3280
  }
}

H
Haojun Liao 已提交
3281
int32_t createQueryResultInfo(SQuery *pQuery, SWindowResult *pResultRow, bool isSTableQuery, size_t interBufSize) {
3282
  int32_t numOfCols = pQuery->numOfOutput;
3283

H
Haojun Liao 已提交
3284 3285
  size_t size = numOfCols * sizeof(SResultInfo) + interBufSize;
  pResultRow->resultInfo = calloc(1, size);
B
Bomin Zhang 已提交
3286 3287 3288
  if (pResultRow->resultInfo == NULL) {
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
  }
3289

H
Haojun Liao 已提交
3290
  pResultRow->pos = (SPosInfo) {-1, -1};
3291

H
Haojun Liao 已提交
3292
  char* buf = (char*) pResultRow->resultInfo + numOfCols * sizeof(SResultInfo);
H
Haojun Liao 已提交
3293

3294
  // set the intermediate result output buffer
H
Haojun Liao 已提交
3295
  setWindowResultInfo(pResultRow->resultInfo, pQuery, isSTableQuery, buf);
B
Bomin Zhang 已提交
3296
  return TSDB_CODE_SUCCESS;
3297 3298 3299 3300
}

void resetCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3301

3302
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3303 3304
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
    pCtx->aOutputBuf = pQuery->sdata[i]->data;
3305

3306 3307 3308 3309
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT/TAGPRJ/PRJ/TAG etc.
     */
3310
    RESET_RESULT_INFO(&pRuntimeEnv->resultInfo[i]);
3311
    pCtx->resultInfo = &pRuntimeEnv->resultInfo[i];
3312

3313
    // set the timestamp output buffer for top/bottom/diff query
3314
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3315 3316 3317
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3318

3319
    memset(pQuery->sdata[i]->data, 0, (size_t)(pQuery->pSelectExpr[i].bytes * pQuery->rec.capacity));
3320
  }
3321

3322 3323 3324 3325 3326
  initCtxOutputBuf(pRuntimeEnv);
}

void forwardCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, int64_t output) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3327

3328
  // reset the execution contexts
3329
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3330
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
3331
    assert(functionId != TSDB_FUNC_DIFF);
3332

3333 3334 3335 3336
    // set next output position
    if (IS_OUTER_FORWARD(aAggs[functionId].nStatus)) {
      pRuntimeEnv->pCtx[j].aOutputBuf += pRuntimeEnv->pCtx[j].outputBytes * output;
    }
3337

3338 3339 3340 3341 3342 3343 3344 3345
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      /*
       * NOTE: for top/bottom query, the value of first column of output (timestamp) are assigned
       * in the procedure of top/bottom routine
       * the output buffer in top/bottom routine is ptsOutputBuf, so we need to forward the output buffer
       *
       * diff function is handled in multi-output function
       */
S
TD-1057  
Shengliang Guan 已提交
3346
      pRuntimeEnv->pCtx[j].ptsOutputBuf = (char*)pRuntimeEnv->pCtx[j].ptsOutputBuf + TSDB_KEYSIZE * output;
3347
    }
3348

3349
    RESET_RESULT_INFO(pRuntimeEnv->pCtx[j].resultInfo);
3350 3351 3352 3353 3354
  }
}

void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3355

3356
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3357
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
3358
    pRuntimeEnv->pCtx[j].currentStage = 0;
3359

H
Haojun Liao 已提交
3360 3361 3362 3363
    SResultInfo* pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
    if (pResInfo->initialized) {
      continue;
    }
3364

3365 3366 3367 3368
    aAggs[functionId].init(&pRuntimeEnv->pCtx[j]);
  }
}

3369
void skipResults(SQueryRuntimeEnv *pRuntimeEnv) {
3370
  SQuery *pQuery = pRuntimeEnv->pQuery;
3371
  if (pQuery->rec.rows == 0 || pQuery->limit.offset == 0) {
3372 3373
    return;
  }
3374

3375
  if (pQuery->rec.rows <= pQuery->limit.offset) {
3376
    qDebug("QInfo:%p skip rows:%" PRId64 ", new offset:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), pQuery->rec.rows,
3377
        pQuery->limit.offset - pQuery->rec.rows);
3378

3379 3380
    pQuery->limit.offset -= pQuery->rec.rows;
    pQuery->rec.rows = 0;
3381

3382
    resetCtxOutputBuf(pRuntimeEnv);
3383

H
Haojun Liao 已提交
3384
    // clear the buffer full flag if exists
3385
    CLEAR_QUERY_STATUS(pQuery, QUERY_RESBUF_FULL);
3386
  } else {
3387
    int64_t numOfSkip = pQuery->limit.offset;
3388
    pQuery->rec.rows -= numOfSkip;
3389
    pQuery->limit.offset = 0;
3390

3391
    qDebug("QInfo:%p skip row:%"PRId64", new offset:%d, numOfRows remain:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), numOfSkip,
3392
           0, pQuery->rec.rows);
3393

3394
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3395
      int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3396
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
H
Haojun Liao 已提交
3397

3398
      memmove(pQuery->sdata[i]->data, (char*)pQuery->sdata[i]->data + bytes * numOfSkip, (size_t)(pQuery->rec.rows * bytes));
H
Haojun Liao 已提交
3399
      pRuntimeEnv->pCtx[i].aOutputBuf = ((char*) pQuery->sdata[i]->data) + pQuery->rec.rows * bytes;
3400

3401
      if (functionId == TSDB_FUNC_DIFF || functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
3402
        pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
3403 3404
      }
    }
3405

S
TD-1057  
Shengliang Guan 已提交
3406
    updateNumOfResult(pRuntimeEnv, (int32_t)pQuery->rec.rows);
3407 3408 3409 3410 3411 3412 3413 3414
  }
}

void setQueryStatus(SQuery *pQuery, int8_t status) {
  if (status == QUERY_NOT_COMPLETED) {
    pQuery->status = status;
  } else {
    // QUERY_NOT_COMPLETED is not compatible with any other status, so clear its position first
3415
    CLEAR_QUERY_STATUS(pQuery, QUERY_NOT_COMPLETED);
3416 3417 3418 3419 3420 3421
    pQuery->status |= status;
  }
}

bool needScanDataBlocksAgain(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3422

H
hjxilinx 已提交
3423
  bool toContinue = false;
H
Haojun Liao 已提交
3424
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3425 3426
    // for each group result, call the finalize function for each column
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
3427

3428 3429
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *pResult = getWindowResult(pWindowResInfo, i);
3430
      if (!pResult->closed) {
3431 3432
        continue;
      }
3433

3434
      setWindowResOutputBuf(pRuntimeEnv, pResult);
3435

3436
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3437
        int16_t functId = pQuery->pSelectExpr[j].base.functionId;
3438 3439 3440
        if (functId == TSDB_FUNC_TS) {
          continue;
        }
3441

3442 3443
        aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
        SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3444

3445 3446 3447 3448
        toContinue |= (!pResInfo->complete);
      }
    }
  } else {
3449
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3450
      int16_t functId = pQuery->pSelectExpr[j].base.functionId;
3451 3452 3453
      if (functId == TSDB_FUNC_TS) {
        continue;
      }
3454

3455 3456
      aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
      SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3457

3458 3459 3460
      toContinue |= (!pResInfo->complete);
    }
  }
3461

3462 3463 3464
  return toContinue;
}

H
Haojun Liao 已提交
3465
static SQueryStatusInfo getQueryStatusInfo(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
3466
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3467
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3468

H
Haojun Liao 已提交
3469 3470
  assert((start <= pTableQueryInfo->lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (start >= pTableQueryInfo->lastKey && !QUERY_IS_ASC_QUERY(pQuery)));
3471

3472
  SQueryStatusInfo info = {
H
hjxilinx 已提交
3473
      .status      = pQuery->status,
3474
      .windowIndex = pRuntimeEnv->windowResInfo.curIndex,
H
Haojun Liao 已提交
3475
      .lastKey     = start,
3476
  };
3477

S
TD-1057  
Shengliang Guan 已提交
3478 3479 3480 3481 3482
  TIME_WINDOW_COPY(info.w, pQuery->window);
  TIME_WINDOW_COPY(info.curWindow, pTableQueryInfo->win);

  info.curWindow.skey = start;

3483 3484 3485
  return info;
}

3486 3487 3488 3489
static void setEnvBeforeReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQInfo *pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery *pQuery = pRuntimeEnv->pQuery;

3490 3491 3492
  pStatus->cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);  // save the cursor
  if (pRuntimeEnv->pTSBuf) {
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
3493 3494
    bool ret = tsBufNextPos(pRuntimeEnv->pTSBuf);
    assert(ret);
3495
  }
3496

3497
  // reverse order time range
3498 3499 3500
  pQuery->window = pStatus->curWindow;
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);

3501
  SWITCH_ORDER(pQuery->order.order);
3502 3503 3504 3505 3506 3507 3508

  if (QUERY_IS_ASC_QUERY(pQuery)) {
    assert(pQuery->window.skey <= pQuery->window.ekey);
  } else {
    assert(pQuery->window.skey >= pQuery->window.ekey);
  }

3509
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
3510

3511
  STsdbQueryCond cond = {
H
hjxilinx 已提交
3512
      .order   = pQuery->order.order,
3513
      .colList = pQuery->colList,
3514 3515
      .numOfCols = pQuery->numOfCols,
  };
3516

S
TD-1057  
Shengliang Guan 已提交
3517 3518
  TIME_WINDOW_COPY(cond.twindow, pQuery->window);

H
Haojun Liao 已提交
3519 3520 3521 3522 3523
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
  disableFuncInReverseScan(pQInfo);
  setupQueryRangeForReverseScan(pQInfo);

3524 3525 3526 3527
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
3528

3529
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
B
Bomin Zhang 已提交
3530 3531 3532
  if (pRuntimeEnv->pSecQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
3533 3534
}

3535 3536
static void clearEnvAfterReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3537
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3538

3539 3540
  SWITCH_ORDER(pQuery->order.order);
  switchCtxOrder(pRuntimeEnv);
3541

3542 3543 3544 3545
  tsBufSetCursor(pRuntimeEnv->pTSBuf, &pStatus->cur);
  if (pRuntimeEnv->pTSBuf) {
    pRuntimeEnv->pTSBuf->cur.order = pQuery->order.order;
  }
3546

3547
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
3548

3549
  // update the pQuery->window.skey and pQuery->window.ekey to limit the scan scope of sliding query during reverse scan
H
hjxilinx 已提交
3550
  pTableQueryInfo->lastKey = pStatus->lastKey;
3551
  pQuery->status = pStatus->status;
3552

H
hjxilinx 已提交
3553
  pTableQueryInfo->win = pStatus->w;
3554
  pQuery->window = pTableQueryInfo->win;
3555 3556
}

H
Haojun Liao 已提交
3557 3558 3559 3560 3561 3562 3563
static void restoreTimeWindow(STableGroupInfo* pTableGroupInfo, STsdbQueryCond* pCond) {
  assert(pTableGroupInfo->numOfTables == 1);
  SArray* pTableKeyGroup = taosArrayGetP(pTableGroupInfo->pGroupList, 0);
  STableKeyInfo* pKeyInfo = taosArrayGet(pTableKeyGroup, 0);
  pKeyInfo->lastKey = pCond->twindow.skey;
}

3564
void scanOneTableDataBlocks(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
H
hjxilinx 已提交
3565
  SQInfo *pQInfo = (SQInfo *) GET_QINFO_ADDR(pRuntimeEnv);
3566
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3567
  STableQueryInfo *pTableQueryInfo = pQuery->current;
3568

3569
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
3570

3571
  // store the start query position
H
Haojun Liao 已提交
3572
  SQueryStatusInfo qstatus = getQueryStatusInfo(pRuntimeEnv, start);
3573

3574 3575
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
3576

3577 3578
  while (1) {
    doScanAllDataBlocks(pRuntimeEnv);
3579

3580 3581
    if (pRuntimeEnv->scanFlag == MASTER_SCAN) {
      qstatus.status = pQuery->status;
3582 3583 3584 3585 3586 3587

      // do nothing if no data blocks are found qualified during scan
      if (qstatus.lastKey != pTableQueryInfo->lastKey) {
        qstatus.curWindow.ekey = pTableQueryInfo->lastKey - step;
      }

3588
      qstatus.lastKey = pTableQueryInfo->lastKey;
3589
    }
3590

3591
    if (!needScanDataBlocksAgain(pRuntimeEnv)) {
3592
      // restore the status code and jump out of loop
3593
      if (pRuntimeEnv->scanFlag == REPEAT_SCAN) {
3594
        pQuery->status = qstatus.status;
3595
      }
3596

3597 3598
      break;
    }
3599

3600
    STsdbQueryCond cond = {
H
hjxilinx 已提交
3601
        .order   = pQuery->order.order,
3602
        .colList = pQuery->colList,
3603
        .numOfCols = pQuery->numOfCols,
3604
    };
3605

S
TD-1057  
Shengliang Guan 已提交
3606 3607
    TIME_WINDOW_COPY(cond.twindow, qstatus.curWindow);

3608 3609
    if (pRuntimeEnv->pSecQueryHandle != NULL) {
      tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
3610
    }
3611

H
Haojun Liao 已提交
3612
    restoreTimeWindow(&pQInfo->tableGroupInfo, &cond);
3613
    pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
B
Bomin Zhang 已提交
3614 3615 3616
    if (pRuntimeEnv->pSecQueryHandle == NULL) {
      longjmp(pRuntimeEnv->env, terrno);
    }
3617

3618
    pRuntimeEnv->windowResInfo.curIndex = qstatus.windowIndex;
3619 3620
    setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
    pRuntimeEnv->scanFlag = REPEAT_SCAN;
3621

3622
    qDebug("QInfo:%p start to repeat scan data blocks due to query func required, qrange:%"PRId64"-%"PRId64, pQInfo,
3623
        cond.twindow.skey, cond.twindow.ekey);
3624

3625
    // check if query is killed or not
H
Haojun Liao 已提交
3626
    if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
3627 3628
      finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
3629 3630
    }
  }
3631

H
hjxilinx 已提交
3632
  if (!needReverseScan(pQuery)) {
3633 3634
    return;
  }
3635

3636
  setEnvBeforeReverseScan(pRuntimeEnv, &qstatus);
3637

3638
  // reverse scan from current position
3639
  qDebug("QInfo:%p start to reverse scan", pQInfo);
3640
  doScanAllDataBlocks(pRuntimeEnv);
3641 3642

  clearEnvAfterReverseScan(pRuntimeEnv, &qstatus);
3643 3644
}

H
hjxilinx 已提交
3645
void finalizeQueryResult(SQueryRuntimeEnv *pRuntimeEnv) {
3646
  SQuery *pQuery = pRuntimeEnv->pQuery;
3647

H
Haojun Liao 已提交
3648
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3649 3650
    // for each group result, call the finalize function for each column
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
3651
    if (pRuntimeEnv->groupbyNormalCol) {
3652 3653
      closeAllTimeWindow(pWindowResInfo);
    }
3654

3655 3656 3657 3658 3659
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *buf = &pWindowResInfo->pResult[i];
      if (!isWindowResClosed(pWindowResInfo, i)) {
        continue;
      }
3660

3661
      setWindowResOutputBuf(pRuntimeEnv, buf);
3662

3663
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3664
        aAggs[pQuery->pSelectExpr[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3665
      }
3666

3667 3668 3669 3670
      /*
       * set the number of output results for group by normal columns, the number of output rows usually is 1 except
       * the top and bottom query
       */
S
TD-1057  
Shengliang Guan 已提交
3671
      buf->numOfRows = (uint16_t)getNumOfResult(pRuntimeEnv);
3672
    }
3673

3674
  } else {
3675
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3676
      aAggs[pQuery->pSelectExpr[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3677 3678 3679 3680 3681
    }
  }
}

static bool hasMainOutput(SQuery *pQuery) {
3682
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3683
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3684

3685 3686 3687 3688
    if (functionId != TSDB_FUNC_TS && functionId != TSDB_FUNC_TAG && functionId != TSDB_FUNC_TAGPRJ) {
      return true;
    }
  }
3689

3690 3691 3692
  return false;
}

H
Haojun Liao 已提交
3693
static STableQueryInfo *createTableQueryInfo(SQueryRuntimeEnv *pRuntimeEnv, void* pTable, STimeWindow win, void* buf) {
H
Haojun Liao 已提交
3694
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
3695

H
Haojun Liao 已提交
3696
  STableQueryInfo *pTableQueryInfo = buf;
3697

H
hjxilinx 已提交
3698 3699
  pTableQueryInfo->win = win;
  pTableQueryInfo->lastKey = win.skey;
3700

3701
  pTableQueryInfo->pTable = pTable;
3702
  pTableQueryInfo->cur.vgroupIndex = -1;
3703

H
Haojun Liao 已提交
3704 3705
  // set more initial size of interval/groupby query
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyNormalCol) {
H
Haojun Liao 已提交
3706
    int32_t initialSize = 16;
H
Haojun Liao 已提交
3707
    int32_t initialThreshold = 100;
B
Bomin Zhang 已提交
3708 3709 3710 3711
    int32_t code = initWindowResInfo(&pTableQueryInfo->windowResInfo, pRuntimeEnv, initialSize, initialThreshold, TSDB_DATA_TYPE_INT);
    if (code != TSDB_CODE_SUCCESS) {
      return NULL;
    }
H
Haojun Liao 已提交
3712
  } else { // in other aggregate query, do not initialize the windowResInfo
H
Haojun Liao 已提交
3713 3714
  }

3715 3716 3717
  return pTableQueryInfo;
}

H
Haojun Liao 已提交
3718
void destroyTableQueryInfo(STableQueryInfo *pTableQueryInfo) {
3719 3720 3721
  if (pTableQueryInfo == NULL) {
    return;
  }
3722

H
Haojun Liao 已提交
3723
  cleanupTimeWindowInfo(&pTableQueryInfo->windowResInfo);
3724 3725 3726 3727 3728
}

/**
 * set output buffer for different group
 * @param pRuntimeEnv
3729
 * @param pDataBlockInfo
3730
 */
H
Haojun Liao 已提交
3731
void setExecutionContext(SQInfo *pQInfo, int32_t groupIndex, TSKEY nextKey) {
3732
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
Haojun Liao 已提交
3733 3734 3735
  STableQueryInfo  *pTableQueryInfo = pRuntimeEnv->pQuery->current;
  SWindowResInfo   *pWindowResInfo = &pRuntimeEnv->windowResInfo;

H
Haojun Liao 已提交
3736 3737
  // lastKey needs to be updated
  pTableQueryInfo->lastKey = nextKey;
H
Haojun Liao 已提交
3738 3739 3740 3741

  if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
    setAdditionalInfo(pQInfo, pTableQueryInfo->pTable, pTableQueryInfo);
  }
H
Haojun Liao 已提交
3742

H
Haojun Liao 已提交
3743 3744 3745
  if (pRuntimeEnv->prevGroupId != INT32_MIN && pRuntimeEnv->prevGroupId == groupIndex) {
    return;
  }
3746

3747 3748
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&groupIndex,
      sizeof(groupIndex), true);
3749 3750 3751
  if (pWindowRes == NULL) {
    return;
  }
3752

3753 3754 3755 3756 3757
  /*
   * not assign result buffer yet, add new result buffer
   * all group belong to one result set, and each group result has different group id so set the id to be one
   */
  if (pWindowRes->pos.pageId == -1) {
3758
    if (addNewWindowResultBuf(pWindowRes, pRuntimeEnv->pResultBuf, groupIndex, pRuntimeEnv->numOfRowsPerPage) !=
3759 3760 3761 3762
        TSDB_CODE_SUCCESS) {
      return;
    }
  }
3763

H
Haojun Liao 已提交
3764 3765
  // record the current active group id
  pRuntimeEnv->prevGroupId = groupIndex;
3766 3767 3768 3769
  setWindowResOutputBuf(pRuntimeEnv, pWindowRes);
  initCtxOutputBuf(pRuntimeEnv);
}

H
Haojun Liao 已提交
3770
void setWindowResOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult) {
3771
  SQuery *pQuery = pRuntimeEnv->pQuery;
3772

3773
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
H
Haojun Liao 已提交
3774 3775
  tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pos.pageId);

3776
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3777
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
H
Haojun Liao 已提交
3778
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult, page);
3779

3780
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3781 3782 3783
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3784

3785 3786 3787 3788 3789
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT
     */
    pCtx->resultInfo = &pResult->resultInfo[i];
3790

3791 3792 3793 3794 3795 3796
    // set super table query flag
    SResultInfo *pResInfo = GET_RES_INFO(pCtx);
    pResInfo->superTableQ = pRuntimeEnv->stableQuery;
  }
}

H
Haojun Liao 已提交
3797 3798
void setWindowResOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3799

H
Haojun Liao 已提交
3800
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
H
Haojun Liao 已提交
3801 3802
  tFilePage* bufPage = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pos.pageId);

H
Haojun Liao 已提交
3803 3804 3805 3806
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];

    pCtx->resultInfo = &pResult->resultInfo[i];
H
Haojun Liao 已提交
3807
    if (pCtx->resultInfo->initialized && pCtx->resultInfo->complete) {
H
Haojun Liao 已提交
3808 3809
      continue;
    }
3810

H
Haojun Liao 已提交
3811
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult, bufPage);
H
Haojun Liao 已提交
3812
    pCtx->currentStage = 0;
3813

H
Haojun Liao 已提交
3814 3815 3816 3817
    int32_t functionId = pCtx->functionId;
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3818

H
Haojun Liao 已提交
3819 3820 3821 3822 3823
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT
     */
    pCtx->resultInfo->superTableQ = pRuntimeEnv->stableQuery;     // set super table query flag
3824

H
Haojun Liao 已提交
3825 3826 3827 3828 3829 3830
    if (!pCtx->resultInfo->initialized) {
      aAggs[functionId].init(pCtx);
    }
  }
}

3831
int32_t setAdditionalInfo(SQInfo *pQInfo, void* pTable, STableQueryInfo *pTableQueryInfo) {
3832
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
3833

3834
  setTagVal(pRuntimeEnv, pTable, pQInfo->tsdb);
3835

3836 3837
  // both the master and supplement scan needs to set the correct ts comp start position
  if (pRuntimeEnv->pTSBuf != NULL) {
3838
    if (pTableQueryInfo->cur.vgroupIndex == -1) {
3839 3840
      tVariantAssign(&pTableQueryInfo->tag, &pRuntimeEnv->pCtx[0].tag);
      tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, 0, &pTableQueryInfo->tag);
3841

3842 3843 3844 3845 3846 3847
      // keep the cursor info of current meter
      pTableQueryInfo->cur = pRuntimeEnv->pTSBuf->cur;
    } else {
      tsBufSetCursor(pRuntimeEnv->pTSBuf, &pTableQueryInfo->cur);
    }
  }
3848

3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860
  return 0;
}

/*
 * There are two cases to handle:
 *
 * 1. Query range is not set yet (queryRangeSet = 0). we need to set the query range info, including pQuery->lastKey,
 *    pQuery->window.skey, and pQuery->eKey.
 * 2. Query range is set and query is in progress. There may be another result with the same query ranges to be
 *    merged during merge stage. In this case, we need the pTableQueryInfo->lastResRows to decide if there
 *    is a previous result generated or not.
 */
H
hjxilinx 已提交
3861
void setIntervalQueryRange(SQInfo *pQInfo, TSKEY key) {
3862 3863
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3864
  STableQueryInfo *pTableQueryInfo = pQuery->current;
3865

3866 3867 3868
  if (pTableQueryInfo->queryRangeSet) {
    pTableQueryInfo->lastKey = key;
  } else {
3869
    pTableQueryInfo->win.skey = key;
3870
    STimeWindow win = {.skey = key, .ekey = pQuery->window.ekey};
3871

3872 3873 3874 3875 3876
    // for too small query range, no data in this interval.
    if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey < pQuery->window.skey)) ||
        (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey < pQuery->window.ekey))) {
      return;
    }
3877

3878 3879 3880 3881 3882 3883
    /**
     * In handling the both ascending and descending order super table query, we need to find the first qualified
     * timestamp of this table, and then set the first qualified start timestamp.
     * In ascending query, key is the first qualified timestamp. However, in the descending order query, additional
     * operations involve.
     */
H
Haojun Liao 已提交
3884
    STimeWindow     w = TSWINDOW_INITIALIZER;
3885
    SWindowResInfo *pWindowResInfo = &pTableQueryInfo->windowResInfo;
3886

H
Haojun Liao 已提交
3887 3888
    TSKEY sk = MIN(win.skey, win.ekey);
    TSKEY ek = MAX(win.skey, win.ekey);
H
Haojun Liao 已提交
3889
    getAlignQueryTimeWindow(pQuery, win.skey, sk, ek, &w);
3890
    pWindowResInfo->startTime = pTableQueryInfo->win.skey;  // windowSKey may be 0 in case of 1970 timestamp
3891

3892 3893
    if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
3894
        assert(win.ekey == pQuery->window.ekey);
3895
      }
3896

3897
      pWindowResInfo->prevSKey = w.skey;
3898
    }
3899

3900
    pTableQueryInfo->queryRangeSet = 1;
3901
    pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
3902 3903 3904 3905
  }
}

bool requireTimestamp(SQuery *pQuery) {
3906
  for (int32_t i = 0; i < pQuery->numOfOutput; i++) {
3907
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920
    if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_NEED_TS) != 0) {
      return true;
    }
  }
  return false;
}

bool needPrimaryTimestampCol(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo) {
  /*
   * 1. if skey or ekey locates in this block, we need to load the timestamp column to decide the precise position
   * 2. if there are top/bottom, first_dst/last_dst functions, we need to load timestamp column in any cases;
   */
  STimeWindow *w = &pDataBlockInfo->window;
H
hjxilinx 已提交
3921
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3922

H
hjxilinx 已提交
3923
  bool loadPrimaryTS = (pTableQueryInfo->lastKey >= w->skey && pTableQueryInfo->lastKey <= w->ekey) ||
3924 3925
                       (pQuery->window.ekey >= w->skey && pQuery->window.ekey <= w->ekey) || requireTimestamp(pQuery);

3926 3927 3928
  return loadPrimaryTS;
}

3929
static int32_t doCopyToSData(SQInfo *pQInfo, SWindowResInfo *pResultInfo, int32_t orderType) {
3930 3931
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
3932

3933 3934 3935
  int32_t numOfResult = 0;
  int32_t startIdx = 0;
  int32_t step = -1;
3936

3937
  qDebug("QInfo:%p start to copy data from windowResInfo to query buf", pQInfo);
3938 3939
  int32_t totalSet = numOfClosedTimeWindow(pResultInfo);
  SWindowResult* result = pResultInfo->pResult;
3940

3941
  if (orderType == TSDB_ORDER_ASC) {
3942
    startIdx = pQInfo->groupIndex;
3943 3944
    step = 1;
  } else {  // desc order copy all data
3945
    startIdx = totalSet - pQInfo->groupIndex - 1;
3946 3947
    step = -1;
  }
3948

H
Haojun Liao 已提交
3949 3950
  SGroupResInfo* pGroupResInfo = &pQInfo->groupResInfo;

3951
  for (int32_t i = startIdx; (i < totalSet) && (i >= 0); i += step) {
3952
    if (result[i].numOfRows == 0) {
3953
      pQInfo->groupIndex += 1;
H
Haojun Liao 已提交
3954
      pGroupResInfo->pos.rowId = 0;
3955 3956
      continue;
    }
3957

H
Haojun Liao 已提交
3958 3959
    int32_t numOfRowsToCopy = result[i].numOfRows - pGroupResInfo->pos.rowId;
    int32_t oldOffset = pGroupResInfo->pos.rowId;
3960

3961
    /*
H
Haojun Liao 已提交
3962 3963
     * current output space is not enough to accommodate all data of this page, only partial results
     * will be copied to SQuery object's result buffer
3964
     */
3965
    if (numOfRowsToCopy > pQuery->rec.capacity - numOfResult) {
H
Haojun Liao 已提交
3966 3967
      numOfRowsToCopy = (int32_t) pQuery->rec.capacity - numOfResult;
      pGroupResInfo->pos.rowId += numOfRowsToCopy;
3968
    } else {
H
Haojun Liao 已提交
3969
      pGroupResInfo->pos.rowId = 0;
3970
      pQInfo->groupIndex += 1;
3971
    }
3972

H
Haojun Liao 已提交
3973 3974
    tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, result[i].pos.pageId);

3975
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3976
      int32_t size = pRuntimeEnv->pCtx[j].outputBytes;
3977

3978
      char *out = pQuery->sdata[j]->data + numOfResult * size;
H
Haojun Liao 已提交
3979
      char *in = getPosInResultPage(pRuntimeEnv, j, &result[i], page);
3980 3981
      memcpy(out, in + oldOffset * size, size * numOfRowsToCopy);
    }
3982

3983
    numOfResult += numOfRowsToCopy;
3984 3985 3986
    if (numOfResult == pQuery->rec.capacity) {
      break;
    }
3987
  }
3988

3989
  qDebug("QInfo:%p copy data to query buf completed", pQInfo);
3990 3991

#ifdef _DEBUG_VIEW
3992
  displayInterResult(pQuery->sdata, pRuntimeEnv, numOfResult);
3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005
#endif
  return numOfResult;
}

/**
 * copyFromWindowResToSData support copy data in ascending/descending order
 * For interval query of both super table and table, copy the data in ascending order, since the output results are
 * ordered in SWindowResutl already. While handling the group by query for both table and super table,
 * all group result are completed already.
 *
 * @param pQInfo
 * @param result
 */
4006
void copyFromWindowResToSData(SQInfo *pQInfo, SWindowResInfo *pResultInfo) {
4007
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
4008

4009
  int32_t orderType = (pQuery->pGroupbyExpr != NULL) ? pQuery->pGroupbyExpr->orderType : TSDB_ORDER_ASC;
4010
  int32_t numOfResult = doCopyToSData(pQInfo, pResultInfo, orderType);
4011

4012
  pQuery->rec.rows += numOfResult;
4013

4014
  assert(pQuery->rec.rows <= pQuery->rec.capacity);
4015 4016
}

H
Haojun Liao 已提交
4017
static void updateWindowResNumOfRes(SQueryRuntimeEnv *pRuntimeEnv) {
4018
  SQuery *pQuery = pRuntimeEnv->pQuery;
4019

4020
  // update the number of result for each, only update the number of rows for the corresponding window result.
H
Haojun Liao 已提交
4021 4022 4023
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
    return;
  }
4024

H
Haojun Liao 已提交
4025 4026
  for (int32_t i = 0; i < pRuntimeEnv->windowResInfo.size; ++i) {
    SWindowResult *pResult = &pRuntimeEnv->windowResInfo.pResult[i];
4027

H
Haojun Liao 已提交
4028 4029 4030 4031
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
      int32_t functionId = pRuntimeEnv->pCtx[j].functionId;
      if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
        continue;
4032
      }
H
Haojun Liao 已提交
4033

4034
      pResult->numOfRows = (uint16_t)(MAX(pResult->numOfRows, pResult->resultInfo[j].numOfRes));
4035 4036 4037 4038
    }
  }
}

H
Haojun Liao 已提交
4039
static void stableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo, SDataStatis *pStatis,
4040
    SArray *pDataBlock, __block_search_fn_t searchFn) {
4041
  SQuery *         pQuery = pRuntimeEnv->pQuery;
4042
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4043

4044
  SWindowResInfo * pWindowResInfo = &pTableQueryInfo->windowResInfo;
H
hjxilinx 已提交
4045
  pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : pDataBlockInfo->rows - 1;
4046

H
Haojun Liao 已提交
4047
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->groupbyNormalCol) {
4048
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
4049
  } else {
4050
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
4051 4052 4053
  }
}

H
Haojun Liao 已提交
4054
bool queryHasRemainResForTableQuery(SQueryRuntimeEnv* pRuntimeEnv) {
4055 4056
  SQuery *pQuery = pRuntimeEnv->pQuery;
  SFillInfo *pFillInfo = pRuntimeEnv->pFillInfo;
4057

H
Haojun Liao 已提交
4058
  if (pQuery->limit.limit > 0 && pQuery->rec.total >= pQuery->limit.limit) {
4059 4060
    return false;
  }
4061

4062
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
H
Haojun Liao 已提交
4063
    // There are results not returned to client yet, so filling applied to the remain result is required firstly.
H
Haojun Liao 已提交
4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078
    int32_t remain = taosNumOfRemainRows(pFillInfo);
    if (remain > 0) {
      return true;
    }

    /*
     * While the code reaches here, there are no results remains now.
     * If query is not completed yet, the gaps between two results blocks need to be handled after next data block
     * is retrieved from TSDB.
     *
     * NOTE: If the result set is not the first block, the gap in front of the result set will be filled. If the result
     * set is the FIRST result block, the gap between the start time of query time window and the timestamp of the
     * first result row in the actual result set will fill nothing.
     */
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
S
TD-1057  
Shengliang Guan 已提交
4079
      int32_t numOfTotal = (int32_t)getFilledNumOfRes(pFillInfo, pQuery->window.ekey, (int32_t)pQuery->rec.capacity);
H
Haojun Liao 已提交
4080 4081 4082 4083 4084 4085
      return numOfTotal > 0;
    }

  } else {
    // there are results waiting for returned to client.
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED) &&
H
Haojun Liao 已提交
4086
        (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) &&
H
Haojun Liao 已提交
4087 4088 4089
        (pRuntimeEnv->windowResInfo.size > 0)) {
      return true;
    }
4090
  }
4091 4092

  return false;
4093 4094 4095
}

static void doCopyQueryResultToMsg(SQInfo *pQInfo, int32_t numOfRows, char *data) {
4096
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
4097

4098 4099
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
    int32_t bytes = pQuery->pSelectExpr[col].bytes;
4100

4101 4102 4103
    memmove(data, pQuery->sdata[col]->data, bytes * numOfRows);
    data += bytes * numOfRows;
  }
4104

weixin_48148422's avatar
weixin_48148422 已提交
4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116
  int32_t numOfTables = (int32_t)taosArrayGetSize(pQInfo->arrTableIdInfo);
  *(int32_t*)data = htonl(numOfTables);
  data += sizeof(int32_t);
  for(int32_t i = 0; i < numOfTables; i++) {
    STableIdInfo* pSrc = taosArrayGet(pQInfo->arrTableIdInfo, i);
    STableIdInfo* pDst = (STableIdInfo*)data;
    pDst->uid = htobe64(pSrc->uid);
    pDst->tid = htonl(pSrc->tid);
    pDst->key = htobe64(pSrc->key);
    data += sizeof(STableIdInfo);
  }

H
Haojun Liao 已提交
4117
  // Check if query is completed or not for stable query or normal table query respectively.
H
hjxilinx 已提交
4118
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4119
    if (pQInfo->runtimeEnv.stableQuery) {
H
Haojun Liao 已提交
4120
      if (IS_STASBLE_QUERY_OVER(pQInfo)) {
4121 4122 4123
        setQueryStatus(pQuery, QUERY_OVER);
      }
    } else {
H
Haojun Liao 已提交
4124
      if (!queryHasRemainResForTableQuery(&pQInfo->runtimeEnv)) {
4125 4126
        setQueryStatus(pQuery, QUERY_OVER);
      }
4127
    }
H
hjxilinx 已提交
4128
  }
4129 4130
}

H
Haojun Liao 已提交
4131
int32_t doFillGapsInResults(SQueryRuntimeEnv* pRuntimeEnv, tFilePage **pDst, int32_t *numOfFilled) {
4132
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
4133
  SQuery *pQuery = pRuntimeEnv->pQuery;
4134
  SFillInfo* pFillInfo = pRuntimeEnv->pFillInfo;
4135

4136
  while (1) {
S
TD-1057  
Shengliang Guan 已提交
4137
    int32_t ret = (int32_t)taosGenerateDataBlock(pFillInfo, (tFilePage**)pQuery->sdata, (int32_t)pQuery->rec.capacity);
4138

4139
    // todo apply limit output function
4140 4141
    /* reached the start position of according to offset value, return immediately */
    if (pQuery->limit.offset == 0) {
4142
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows", pQInfo, pFillInfo->numOfRows, ret);
4143 4144
      return ret;
    }
4145

4146
    if (pQuery->limit.offset < ret) {
4147
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%" PRId64 ". Discard due to offset, remain:%" PRId64 ", new offset:%d",
4148
             pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, ret - pQuery->limit.offset, 0);
4149

S
TD-1057  
Shengliang Guan 已提交
4150
      ret -= (int32_t)pQuery->limit.offset;
4151 4152
      // todo !!!!there exactly number of interpo is not valid.
      // todo refactor move to the beginning of buffer
4153 4154 4155
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        memmove(pDst[i]->data, pDst[i]->data + pQuery->pSelectExpr[i].bytes * pQuery->limit.offset,
                ret * pQuery->pSelectExpr[i].bytes);
4156
      }
4157

4158 4159 4160
      pQuery->limit.offset = 0;
      return ret;
    } else {
4161
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%" PRId64 ". Discard due to offset, "
B
Bomin Zhang 已提交
4162
             "remain:%d, new offset:%" PRId64, pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, 0,
4163
          pQuery->limit.offset - ret);
4164

4165
      pQuery->limit.offset -= ret;
4166
      pQuery->rec.rows = 0;
4167 4168
      ret = 0;
    }
4169

H
Haojun Liao 已提交
4170
    if (!queryHasRemainResForTableQuery(pRuntimeEnv)) {
4171 4172 4173 4174 4175
      return ret;
    }
  }
}

4176
static void queryCostStatis(SQInfo *pQInfo) {
4177
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4178
  SQueryCostInfo *pSummary = &pRuntimeEnv->summary;
H
Haojun Liao 已提交
4179

H
Haojun Liao 已提交
4180 4181 4182
  // add the merge time
  pSummary->elapsedTime += pSummary->firstStageMergeTime;

H
Haojun Liao 已提交
4183 4184 4185
  qDebug("QInfo:%p :cost summary: elapsed time:%"PRId64" us, first merge:%"PRId64" us, total blocks:%d, "
         "load block statis:%d, load data block:%d, total rows:%"PRId64 ", check rows:%"PRId64,
         pQInfo, pSummary->elapsedTime, pSummary->firstStageMergeTime, pSummary->totalBlocks, pSummary->loadBlockStatis,
H
Haojun Liao 已提交
4186
         pSummary->loadBlocks, pSummary->totalRows, pSummary->totalCheckedRows);
4187

H
Haojun Liao 已提交
4188
  qDebug("QInfo:%p :cost summary: internal size:%"PRId64"B, numOfWin:%"PRId64, pQInfo, pSummary->internalSupSize,
4189
      pSummary->numOfTimeWindows);
4190 4191
}

4192 4193
static void updateOffsetVal(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pBlockInfo) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
4194
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4195

4196
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
4197

4198
  if (pQuery->limit.offset == pBlockInfo->rows) {  // current block will ignore completed
H
hjxilinx 已提交
4199
    pTableQueryInfo->lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->window.ekey + step : pBlockInfo->window.skey + step;
4200 4201 4202
    pQuery->limit.offset = 0;
    return;
  }
4203

4204
  if (QUERY_IS_ASC_QUERY(pQuery)) {
S
TD-1057  
Shengliang Guan 已提交
4205
    pQuery->pos = (int32_t)pQuery->limit.offset;
4206
  } else {
S
TD-1057  
Shengliang Guan 已提交
4207
    pQuery->pos = pBlockInfo->rows - (int32_t)pQuery->limit.offset - 1;
4208
  }
4209

4210
  assert(pQuery->pos >= 0 && pQuery->pos <= pBlockInfo->rows - 1);
4211

4212
  SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
4213
  SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);
4214

4215
  // update the pQuery->limit.offset value, and pQuery->pos value
H
Haojun Liao 已提交
4216
  TSKEY *keys = (TSKEY *) pColInfoData->pData;
4217 4218

  // update the offset value
H
hjxilinx 已提交
4219
  pTableQueryInfo->lastKey = keys[pQuery->pos];
4220
  pQuery->limit.offset = 0;
4221

H
hjxilinx 已提交
4222
  int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, pBlockInfo, NULL, binarySearchForKey, pDataBlock);
4223

4224
  qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
4225
         pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows, numOfRes, pQuery->current->lastKey);
4226
}
4227

4228 4229 4230 4231 4232
void skipBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;

  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0) {
    return;
4233
  }
4234

4235 4236 4237
  pQuery->pos = 0;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

H
hjxilinx 已提交
4238
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4239
  TsdbQueryHandleT pQueryHandle = pRuntimeEnv->pQueryHandle;
4240

H
Haojun Liao 已提交
4241
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4242
  while (tsdbNextDataBlock(pQueryHandle)) {
H
Haojun Liao 已提交
4243
    if (IS_QUERY_KILLED(GET_QINFO_ADDR(pRuntimeEnv))) {
H
Haojun Liao 已提交
4244 4245
      finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4246
    }
4247

H
Haojun Liao 已提交
4248
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
4249

4250 4251
    if (pQuery->limit.offset > blockInfo.rows) {
      pQuery->limit.offset -= blockInfo.rows;
H
hjxilinx 已提交
4252 4253
      pTableQueryInfo->lastKey = (QUERY_IS_ASC_QUERY(pQuery)) ? blockInfo.window.ekey : blockInfo.window.skey;
      pTableQueryInfo->lastKey += step;
4254

4255
      qDebug("QInfo:%p skip rows:%d, offset:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv), blockInfo.rows,
4256 4257
             pQuery->limit.offset);
    } else {  // find the appropriated start position in current block
4258 4259 4260
      updateOffsetVal(pRuntimeEnv, &blockInfo);
      break;
    }
4261
  }
H
Haojun Liao 已提交
4262 4263 4264 4265

  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }
4266
}
4267

H
Haojun Liao 已提交
4268
static bool skipTimeInterval(SQueryRuntimeEnv *pRuntimeEnv, TSKEY* start) {
4269
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
4270
  *start = pQuery->current->lastKey;
4271

4272
  // if queried with value filter, do NOT forward query start position
4273
  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->pFillInfo != NULL) {
4274
    return true;
4275
  }
4276

4277
  /*
4278 4279
   * 1. for interval without interpolation query we forward pQuery->interval.interval at a time for
   *    pQuery->limit.offset times. Since hole exists, pQuery->interval.interval*pQuery->limit.offset value is
4280 4281
   *    not valid. otherwise, we only forward pQuery->limit.offset number of points
   */
4282
  assert(pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL);
4283

H
Haojun Liao 已提交
4284
  STimeWindow w = TSWINDOW_INITIALIZER;
4285

4286
  SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
hjxilinx 已提交
4287
  STableQueryInfo *pTableQueryInfo = pQuery->current;
4288

H
Haojun Liao 已提交
4289
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4290
  while (tsdbNextDataBlock(pRuntimeEnv->pQueryHandle)) {
H
Haojun Liao 已提交
4291
    tsdbRetrieveDataBlockInfo(pRuntimeEnv->pQueryHandle, &blockInfo);
4292

H
Haojun Liao 已提交
4293 4294
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
H
Haojun Liao 已提交
4295
        getAlignQueryTimeWindow(pQuery, blockInfo.window.skey, blockInfo.window.skey, pQuery->window.ekey, &w);
H
Haojun Liao 已提交
4296 4297 4298
        pWindowResInfo->startTime = w.skey;
        pWindowResInfo->prevSKey = w.skey;
      }
4299
    } else {
H
Haojun Liao 已提交
4300
      getAlignQueryTimeWindow(pQuery, blockInfo.window.ekey, pQuery->window.ekey, blockInfo.window.ekey, &w);
4301

4302 4303 4304
      pWindowResInfo->startTime = pQuery->window.skey;
      pWindowResInfo->prevSKey = w.skey;
    }
4305

4306 4307
    // the first time window
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, pWindowResInfo->prevSKey, pQuery);
4308

4309 4310 4311 4312 4313 4314
    while (pQuery->limit.offset > 0) {
      if ((win.ekey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (win.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        pQuery->limit.offset -= 1;
        pWindowResInfo->prevSKey = win.skey;
      }
4315

4316
      STimeWindow tw = win;
H
Haojun Liao 已提交
4317
      GET_NEXT_TIMEWINDOW(pQuery, &tw);
4318

4319
      if (pQuery->limit.offset == 0) {
4320 4321
        if ((tw.skey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
            (tw.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
H
Haojun Liao 已提交
4322 4323
          // load the data block and check data remaining in current data block
          // TODO optimize performance
4324 4325 4326
          SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
          SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

4327 4328
          tw = win;
          int32_t startPos =
H
Haojun Liao 已提交
4329
              getNextQualifiedWindow(pRuntimeEnv, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey, -1);
4330 4331 4332 4333
          assert(startPos >= 0);

          // set the abort info
          pQuery->pos = startPos;
4334

H
Haojun Liao 已提交
4335 4336 4337 4338
          // reset the query start timestamp
          pTableQueryInfo->win.skey = ((TSKEY *)pColInfoData->pData)[startPos];
          pQuery->window.skey = pTableQueryInfo->win.skey;
          *start = pTableQueryInfo->win.skey;
4339

4340
          pWindowResInfo->prevSKey = tw.skey;
H
Haojun Liao 已提交
4341
          int32_t index = pRuntimeEnv->windowResInfo.curIndex;
4342

H
hjxilinx 已提交
4343
          int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, NULL, binarySearchForKey, pDataBlock);
H
Haojun Liao 已提交
4344
          pRuntimeEnv->windowResInfo.curIndex = index;  // restore the window index
4345

4346
          qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64,
4347
                 GET_QINFO_ADDR(pRuntimeEnv), blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes, pQuery->current->lastKey);
4348

4349
          return true;
H
Haojun Liao 已提交
4350 4351 4352 4353
        } else { // do nothing
          *start = tw.skey;
          pQuery->window.skey = tw.skey;
          pWindowResInfo->prevSKey = tw.skey;
4354
          return true;
4355 4356 4357
        }
      }

H
Haojun Liao 已提交
4358 4359 4360 4361 4362 4363 4364
      /*
       * If the next time window still starts from current data block,
       * load the primary timestamp column first, and then find the start position for the next queried time window.
       * Note that only the primary timestamp column is required.
       * TODO: Optimize for this cases. All data blocks are not needed to be loaded, only if the first actually required
       * time window resides in current data block.
       */
4365 4366 4367 4368 4369 4370 4371
      if ((tw.skey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (tw.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
        SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

        tw = win;
        int32_t startPos =
H
Haojun Liao 已提交
4372
            getNextQualifiedWindow(pRuntimeEnv, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey, -1);
4373 4374 4375 4376
        assert(startPos >= 0);

        // set the abort info
        pQuery->pos = startPos;
H
hjxilinx 已提交
4377
        pTableQueryInfo->lastKey = ((TSKEY *)pColInfoData->pData)[startPos];
4378 4379
        pWindowResInfo->prevSKey = tw.skey;
        win = tw;
4380
      } else {
H
Haojun Liao 已提交
4381
        break;  // offset is not 0, and next time window begins or ends in the next block.
4382 4383 4384
      }
    }
  }
4385

H
Haojun Liao 已提交
4386 4387 4388 4389 4390
  // check for error
  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }

4391 4392 4393
  return true;
}

B
Bomin Zhang 已提交
4394
static int32_t setupQueryHandle(void* tsdb, SQInfo* pQInfo, bool isSTableQuery) {
B
Bomin Zhang 已提交
4395
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4396 4397
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

B
Bomin Zhang 已提交
4398
  if (onlyQueryTags(pQuery)) {
B
Bomin Zhang 已提交
4399
    return TSDB_CODE_SUCCESS;
B
Bomin Zhang 已提交
4400 4401
  }

H
Haojun Liao 已提交
4402
  if (isSTableQuery && (!QUERY_IS_INTERVAL_QUERY(pQuery)) && (!isFixedOutputQuery(pRuntimeEnv))) {
B
Bomin Zhang 已提交
4403
    return TSDB_CODE_SUCCESS;
B
Bomin Zhang 已提交
4404
  }
4405 4406

  STsdbQueryCond cond = {
B
Bomin Zhang 已提交
4407 4408 4409
    .order   = pQuery->order.order,
    .colList = pQuery->colList,
    .numOfCols = pQuery->numOfCols,
4410
  };
weixin_48148422's avatar
weixin_48148422 已提交
4411

S
TD-1057  
Shengliang Guan 已提交
4412 4413
  TIME_WINDOW_COPY(cond.twindow, pQuery->window);

B
Bomin Zhang 已提交
4414
  if (!isSTableQuery
4415
    && (pQInfo->tableqinfoGroupInfo.numOfTables == 1)
4416
    && (cond.order == TSDB_ORDER_ASC)
H
Haojun Liao 已提交
4417
    && (!QUERY_IS_INTERVAL_QUERY(pQuery))
B
Bomin Zhang 已提交
4418
    && (!isGroupbyNormalCol(pQuery->pGroupbyExpr))
H
Haojun Liao 已提交
4419
    && (!isFixedOutputQuery(pRuntimeEnv))
B
Bomin Zhang 已提交
4420
  ) {
H
Haojun Liao 已提交
4421
    SArray* pa = GET_TABLEGROUP(pQInfo, 0);
4422 4423
    STableQueryInfo* pCheckInfo = taosArrayGetP(pa, 0);
    cond.twindow = pCheckInfo->win;
4424
  }
B
Bomin Zhang 已提交
4425

B
Bomin Zhang 已提交
4426
  terrno = TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
4427
  if (isFirstLastRowQuery(pQuery)) {
4428
    pRuntimeEnv->pQueryHandle = tsdbQueryLastRow(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
H
Haojun Liao 已提交
4429

H
Haojun Liao 已提交
4430 4431 4432 4433 4434 4435
    // update the query time window
    pQuery->window = cond.twindow;

    size_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
    for(int32_t i = 0; i < numOfGroups; ++i) {
      SArray *group = GET_TABLEGROUP(pQInfo, i);
H
Haojun Liao 已提交
4436

H
Haojun Liao 已提交
4437 4438 4439
      size_t t = taosArrayGetSize(group);
      for (int32_t j = 0; j < t; ++j) {
        STableQueryInfo *pCheckInfo = taosArrayGetP(group, j);
H
Haojun Liao 已提交
4440

H
Haojun Liao 已提交
4441 4442
        pCheckInfo->win = pQuery->window;
        pCheckInfo->lastKey = pCheckInfo->win.skey;
H
Haojun Liao 已提交
4443 4444
      }
    }
4445
  } else if (isPointInterpoQuery(pQuery)) {
4446
    pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
H
Haojun Liao 已提交
4447
  } else {
4448
    pRuntimeEnv->pQueryHandle = tsdbQueryTables(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
H
Haojun Liao 已提交
4449
  }
4450

B
Bomin Zhang 已提交
4451
  return terrno;
B
Bomin Zhang 已提交
4452 4453
}

4454 4455 4456
static SFillColInfo* taosCreateFillColInfo(SQuery* pQuery) {
  int32_t numOfCols = pQuery->numOfOutput;
  int32_t offset = 0;
4457

4458
  SFillColInfo* pFillCol = calloc(numOfCols, sizeof(SFillColInfo));
H
Haojun Liao 已提交
4459 4460 4461 4462
  if (pFillCol == NULL) {
    return NULL;
  }

4463 4464
  for(int32_t i = 0; i < numOfCols; ++i) {
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];
4465

4466
    pFillCol[i].col.bytes  = pExprInfo->bytes;
S
TD-1057  
Shengliang Guan 已提交
4467
    pFillCol[i].col.type   = (int8_t)pExprInfo->type;
4468 4469 4470
    pFillCol[i].col.offset = offset;
    pFillCol[i].flag       = TSDB_COL_NORMAL;    // always be ta normal column for table query
    pFillCol[i].functionId = pExprInfo->base.functionId;
4471
    pFillCol[i].fillVal.i = pQuery->fillVal[i];
4472

4473 4474
    offset += pExprInfo->bytes;
  }
4475

4476 4477 4478
  return pFillCol;
}

4479
int32_t doInitQInfo(SQInfo *pQInfo, STSBuf *pTsBuf, void *tsdb, int32_t vgId, bool isSTableQuery) {
4480 4481
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;

4482
  int32_t code = TSDB_CODE_SUCCESS;
4483
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
4484

4485
  pQuery->precision = tsdbGetCfg(tsdb)->precision;
H
Haojun Liao 已提交
4486 4487
  pRuntimeEnv->topBotQuery = isTopBottomQuery(pQuery);
  pRuntimeEnv->hasTagResults = hasTagValOutput(pQuery);
4488 4489

  setScanLimitationByResultBuffer(pQuery);
H
Haojun Liao 已提交
4490

B
Bomin Zhang 已提交
4491 4492 4493 4494
  code = setupQueryHandle(tsdb, pQInfo, isSTableQuery);
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }
4495

4496
  pQInfo->tsdb = tsdb;
4497
  pQInfo->vgId = vgId;
4498 4499

  pRuntimeEnv->pQuery = pQuery;
H
Haojun Liao 已提交
4500
  pRuntimeEnv->pTSBuf = pTsBuf;
4501
  pRuntimeEnv->cur.vgroupIndex = -1;
4502
  pRuntimeEnv->stableQuery = isSTableQuery;
H
Haojun Liao 已提交
4503
  pRuntimeEnv->prevGroupId = INT32_MIN;
H
Haojun Liao 已提交
4504
  pRuntimeEnv->groupbyNormalCol = isGroupbyNormalCol(pQuery->pGroupbyExpr);
4505

H
Haojun Liao 已提交
4506
  if (pTsBuf != NULL) {
4507 4508 4509 4510 4511 4512 4513 4514 4515 4516
    int16_t order = (pQuery->order.order == pRuntimeEnv->pTSBuf->tsOrder) ? TSDB_ORDER_ASC : TSDB_ORDER_DESC;
    tsBufSetTraverseOrder(pRuntimeEnv->pTSBuf, order);
  }

  // create runtime environment
  code = setupQueryRuntimeEnv(pRuntimeEnv, pQuery->order.order);
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }

4517 4518 4519
  int32_t ps = DEFAULT_PAGE_SIZE;
  int32_t rowsize = 0;
  getIntermediateBufInfo(pRuntimeEnv, &ps, &rowsize);
H
Haojun Liao 已提交
4520
  int32_t TWOMB = 1024*1024*2;
4521

H
Haojun Liao 已提交
4522
  if (isSTableQuery && !onlyQueryTags(pRuntimeEnv->pQuery)) {
H
Haojun Liao 已提交
4523
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rowsize, ps, TWOMB, pQInfo);
4524 4525 4526 4527
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

H
Haojun Liao 已提交
4528
    if (!QUERY_IS_INTERVAL_QUERY(pQuery)) {
4529
      int16_t type = TSDB_DATA_TYPE_NULL;
4530
      int32_t threshold = 0;
4531

H
Haojun Liao 已提交
4532
      if (pRuntimeEnv->groupbyNormalCol) {  // group by columns not tags;
4533
        type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
4534
        threshold = 4000;
4535 4536
      } else {
        type = TSDB_DATA_TYPE_INT;  // group id
S
TD-1057  
Shengliang Guan 已提交
4537
        threshold = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
4538 4539 4540
        if (threshold < 8) {
          threshold = 8;
        }
4541 4542
      }

4543
      code = initWindowResInfo(&pRuntimeEnv->windowResInfo, pRuntimeEnv, 8, threshold, type);
B
Bomin Zhang 已提交
4544 4545 4546
      if (code != TSDB_CODE_SUCCESS) {
        return code;
      }
4547
    }
H
Haojun Liao 已提交
4548
  } else if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
4549 4550
    int32_t numOfResultRows = getInitialPageNum(pQInfo);
    getIntermediateBufInfo(pRuntimeEnv, &ps, &rowsize);
H
Haojun Liao 已提交
4551
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rowsize, ps, TWOMB, pQInfo);
4552 4553 4554 4555 4556
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

    int16_t type = TSDB_DATA_TYPE_NULL;
H
Haojun Liao 已提交
4557
    if (pRuntimeEnv->groupbyNormalCol) {
4558 4559 4560 4561 4562
      type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
    } else {
      type = TSDB_DATA_TYPE_TIMESTAMP;
    }

4563
    code = initWindowResInfo(&pRuntimeEnv->windowResInfo, pRuntimeEnv, numOfResultRows, 4096, type);
B
Bomin Zhang 已提交
4564 4565 4566
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }
4567 4568
  }

4569
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
4570
    SFillColInfo* pColInfo = taosCreateFillColInfo(pQuery);
H
Haojun Liao 已提交
4571 4572 4573 4574 4575 4576
    STimeWindow w = TSWINDOW_INITIALIZER;

    TSKEY sk = MIN(pQuery->window.skey, pQuery->window.ekey);
    TSKEY ek = MAX(pQuery->window.skey, pQuery->window.ekey);
    getAlignQueryTimeWindow(pQuery, pQuery->window.skey, sk, ek, &w);

S
TD-1057  
Shengliang Guan 已提交
4577
    pRuntimeEnv->pFillInfo = taosInitFillInfo(pQuery->order.order, w.skey, 0, (int32_t)pQuery->rec.capacity, pQuery->numOfOutput,
H
Haojun Liao 已提交
4578
                                              &pQuery->interval, pQuery->precision, pQuery->fillType, pColInfo);
4579
  }
4580

H
Haojun Liao 已提交
4581
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
4582
  return TSDB_CODE_SUCCESS;
4583 4584
}

4585
static void enableExecutionForNextTable(SQueryRuntimeEnv *pRuntimeEnv) {
4586
  SQuery *pQuery = pRuntimeEnv->pQuery;
4587

4588
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
4589 4590 4591 4592 4593 4594 4595
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[i]);
    if (pResInfo != NULL) {
      pResInfo->complete = false;
    }
  }
}

H
Haojun Liao 已提交
4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612
static FORCE_INLINE void setEnvForEachBlock(SQInfo* pQInfo, STableQueryInfo* pTableQueryInfo, SDataBlockInfo* pBlockInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

  if (!QUERY_IS_INTERVAL_QUERY(pQuery)) {
    setExecutionContext(pQInfo, pTableQueryInfo->groupIndex, pBlockInfo->window.ekey + step);
  } else {  // interval query
    TSKEY nextKey = pBlockInfo->window.skey;
    setIntervalQueryRange(pQInfo, nextKey);

    if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
      setAdditionalInfo(pQInfo, pTableQueryInfo->pTable, pTableQueryInfo);
    }
  }
}

H
Haojun Liao 已提交
4613
static int64_t scanMultiTableDataBlocks(SQInfo *pQInfo) {
4614
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
Haojun Liao 已提交
4615 4616
  SQuery*           pQuery = pRuntimeEnv->pQuery;
  SQueryCostInfo*   summary  = &pRuntimeEnv->summary;
H
Haojun Liao 已提交
4617

H
hjxilinx 已提交
4618
  int64_t st = taosGetTimestampMs();
4619

4620
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
H
Haojun Liao 已提交
4621
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4622

H
Haojun Liao 已提交
4623 4624
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

H
Haojun Liao 已提交
4625
  while (tsdbNextDataBlock(pQueryHandle)) {
4626
    summary->totalBlocks += 1;
H
Haojun Liao 已提交
4627

H
Haojun Liao 已提交
4628
    if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
4629
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4630
    }
4631

H
Haojun Liao 已提交
4632
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
H
Haojun Liao 已提交
4633 4634 4635 4636
    STableQueryInfo **pTableQueryInfo = (STableQueryInfo**) taosHashGet(pQInfo->tableqinfoGroupInfo.map, &blockInfo.tid, sizeof(blockInfo.tid));
    if(pTableQueryInfo == NULL) {
      break;
    }
4637

H
Haojun Liao 已提交
4638
    pQuery->current = *pTableQueryInfo;
H
Haojun Liao 已提交
4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      assert(
          ((*pTableQueryInfo)->win.skey <= (*pTableQueryInfo)->win.ekey) &&
          ((*pTableQueryInfo)->lastKey >= (*pTableQueryInfo)->win.skey) &&
          ((*pTableQueryInfo)->win.skey >= pQuery->window.skey && (*pTableQueryInfo)->win.ekey <= pQuery->window.ekey));
    } else {
      assert(
          ((*pTableQueryInfo)->win.skey >= (*pTableQueryInfo)->win.ekey) &&
          ((*pTableQueryInfo)->lastKey <= (*pTableQueryInfo)->win.skey) &&
          ((*pTableQueryInfo)->win.skey <= pQuery->window.skey && (*pTableQueryInfo)->win.ekey >= pQuery->window.ekey));
    }
4650

H
Haojun Liao 已提交
4651
    if (!pRuntimeEnv->groupbyNormalCol) {
H
Haojun Liao 已提交
4652
      setEnvForEachBlock(pQInfo, *pTableQueryInfo, &blockInfo);
4653
    }
4654

H
Haojun Liao 已提交
4655
    SDataStatis *pStatis = NULL;
4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666
    SArray *     pDataBlock = NULL;
    uint32_t     status = 0;

    int32_t ret = loadDataBlockOnDemand(pRuntimeEnv, pQueryHandle, &blockInfo, &pStatis, &pDataBlock, &status);
    if (ret != TSDB_CODE_SUCCESS) {
      break;
    }

    if (status == BLK_DATA_DISCARD) {
      pQuery->current->lastKey =
          QUERY_IS_ASC_QUERY(pQuery) ? blockInfo.window.ekey + step : blockInfo.window.skey + step;
H
Haojun Liao 已提交
4667 4668 4669
      continue;
    }

4670 4671
    summary->totalRows += blockInfo.rows;
    stableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, pDataBlock, binarySearchForKey);
H
Haojun Liao 已提交
4672

H
Haojun Liao 已提交
4673 4674 4675 4676
    qDebug("QInfo:%p check data block completed, uid:%"PRId64", tid:%d, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, "
           "lastKey:%" PRId64,
           pQInfo, blockInfo.uid, blockInfo.tid, blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows,
           pQuery->current->lastKey);
4677
  }
4678

H
Haojun Liao 已提交
4679 4680 4681 4682
  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }

H
Haojun Liao 已提交
4683 4684
  updateWindowResNumOfRes(pRuntimeEnv);

H
hjxilinx 已提交
4685 4686
  int64_t et = taosGetTimestampMs();
  return et - st;
4687 4688
}

4689 4690
static bool multiTableMultioutputHelper(SQInfo *pQInfo, int32_t index) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4691
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4692

4693
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
H
Haojun Liao 已提交
4694
  SArray *group = GET_TABLEGROUP(pQInfo, 0);
4695
  STableQueryInfo* pCheckInfo = taosArrayGetP(group, index);
4696

H
Haojun Liao 已提交
4697 4698 4699
  if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
    setTagVal(pRuntimeEnv, pCheckInfo->pTable, pQInfo->tsdb);
  }
4700

H
Haojun Liao 已提交
4701
  STableId* id = TSDB_TABLEID(pCheckInfo->pTable);
4702
  qDebug("QInfo:%p query on (%d): uid:%" PRIu64 ", tid:%d, qrange:%" PRId64 "-%" PRId64, pQInfo, index,
H
Haojun Liao 已提交
4703
         id->uid, id->tid, pCheckInfo->lastKey, pCheckInfo->win.ekey);
4704

4705
  STsdbQueryCond cond = {
4706
      .twindow   = {pCheckInfo->lastKey, pCheckInfo->win.ekey},
H
hjxilinx 已提交
4707 4708
      .order     = pQuery->order.order,
      .colList   = pQuery->colList,
4709
      .numOfCols = pQuery->numOfCols,
4710
  };
4711

H
hjxilinx 已提交
4712
  // todo refactor
4713
  SArray *g1 = taosArrayInit(1, POINTER_BYTES);
H
Haojun Liao 已提交
4714 4715 4716 4717
  SArray *tx = taosArrayInit(1, sizeof(STableKeyInfo));

  STableKeyInfo info = {.pTable = pCheckInfo->pTable, .lastKey = pCheckInfo->lastKey};
  taosArrayPush(tx, &info);
4718

4719
  taosArrayPush(g1, &tx);
4720
  STableGroupInfo gp = {.numOfTables = 1, .pGroupList = g1};
4721

4722
  // include only current table
4723 4724 4725 4726
  if (pRuntimeEnv->pQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
    pRuntimeEnv->pQueryHandle = NULL;
  }
4727

H
Haojun Liao 已提交
4728
  pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo);
4729 4730
  taosArrayDestroy(tx);
  taosArrayDestroy(g1);
B
Bomin Zhang 已提交
4731 4732 4733
  if (pRuntimeEnv->pQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
4734

4735
  if (pRuntimeEnv->pTSBuf != NULL) {
4736
    if (pRuntimeEnv->cur.vgroupIndex == -1) {
4737
      STSElem elem = tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, 0, &pRuntimeEnv->pCtx[0].tag);
4738

4739 4740 4741 4742 4743 4744 4745 4746
      // failed to find data with the specified tag value
      if (elem.vnode < 0) {
        return false;
      }
    } else {
      tsBufSetCursor(pRuntimeEnv->pTSBuf, &pRuntimeEnv->cur);
    }
  }
4747

4748
  initCtxOutputBuf(pRuntimeEnv);
4749 4750 4751 4752 4753 4754 4755 4756 4757 4758
  return true;
}

/**
 * super table query handler
 * 1. super table projection query, group-by on normal columns query, ts-comp query
 * 2. point interpolation query, last row query
 *
 * @param pQInfo
 */
4759
static void sequentialTableProcess(SQInfo *pQInfo) {
4760
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4761
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4762
  setQueryStatus(pQuery, QUERY_COMPLETED);
4763

H
Haojun Liao 已提交
4764
  size_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
4765

H
Haojun Liao 已提交
4766
  if (isPointInterpoQuery(pQuery) || isFirstLastRowQuery(pQuery)) {
4767 4768
    resetCtxOutputBuf(pRuntimeEnv);
    assert(pQuery->limit.offset == 0 && pQuery->limit.limit != 0);
4769

4770
    while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
4771
      SArray* group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex);
4772

S
TD-1057  
Shengliang Guan 已提交
4773
      qDebug("QInfo:%p last_row query on group:%d, total group:%" PRIzu ", current group:%p", pQInfo, pQInfo->groupIndex,
dengyihao's avatar
dengyihao 已提交
4774
             numOfGroups, group);
H
Haojun Liao 已提交
4775 4776 4777 4778 4779 4780 4781

      STsdbQueryCond cond = {
          .colList = pQuery->colList,
          .order   = pQuery->order.order,
          .numOfCols = pQuery->numOfCols,
      };

S
TD-1057  
Shengliang Guan 已提交
4782 4783
      TIME_WINDOW_COPY(cond.twindow, pQuery->window);

H
Haojun Liao 已提交
4784 4785 4786
      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);
4787

H
Haojun Liao 已提交
4788 4789 4790 4791 4792 4793 4794
      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }
4795

4796
      if (isFirstLastRowQuery(pQuery)) {
H
Haojun Liao 已提交
4797
        assert(0);  // last_row query switch to other routine to handle
H
Haojun Liao 已提交
4798
      } else {
H
Haojun Liao 已提交
4799
        pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(pQInfo->tsdb, &cond, &gp, pQInfo);
4800
      }
B
Bomin Zhang 已提交
4801 4802 4803 4804 4805 4806

      taosArrayDestroy(tx);
      taosArrayDestroy(g1);
      if (pRuntimeEnv->pQueryHandle == NULL) {
        longjmp(pRuntimeEnv->env, terrno);
      }
H
Haojun Liao 已提交
4807

H
Haojun Liao 已提交
4808
      initCtxOutputBuf(pRuntimeEnv);
4809

4810
      SArray* s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle);
4811
      assert(taosArrayGetSize(s) >= 1);
4812

4813
      setTagVal(pRuntimeEnv, taosArrayGetP(s, 0), pQInfo->tsdb);
4814 4815 4816
      if (isFirstLastRowQuery(pQuery)) {
        assert(taosArrayGetSize(s) == 1);
      }
H
Haojun Liao 已提交
4817

dengyihao's avatar
dengyihao 已提交
4818
      taosArrayDestroy(s);
H
Haojun Liao 已提交
4819

H
Haojun Liao 已提交
4820
      // here we simply set the first table as current table
4821 4822 4823
      SArray* first = GET_TABLEGROUP(pQInfo, pQInfo->groupIndex);
      pQuery->current = taosArrayGetP(first, 0);

4824
      scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
4825

H
Haojun Liao 已提交
4826 4827 4828 4829 4830
      int64_t numOfRes = getNumOfResult(pRuntimeEnv);
      if (numOfRes > 0) {
        pQuery->rec.rows += numOfRes;
        forwardCtxOutputBuf(pRuntimeEnv, numOfRes);
      }
4831

H
Haojun Liao 已提交
4832 4833 4834 4835 4836
      skipResults(pRuntimeEnv);
      pQInfo->groupIndex += 1;

      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
4837 4838 4839 4840 4841 4842

      if (pQuery->rec.rows >= pQuery->rec.capacity) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
        break;
      }
    }
H
Haojun Liao 已提交
4843
  } else if (pRuntimeEnv->groupbyNormalCol) { // group-by on normal columns query
4844
    while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
4845
      SArray* group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex);
4846

S
TD-1057  
Shengliang Guan 已提交
4847
      qDebug("QInfo:%p group by normal columns group:%d, total group:%" PRIzu "", pQInfo, pQInfo->groupIndex, numOfGroups);
4848 4849 4850 4851 4852 4853 4854

      STsdbQueryCond cond = {
          .colList = pQuery->colList,
          .order   = pQuery->order.order,
          .numOfCols = pQuery->numOfCols,
      };

S
TD-1057  
Shengliang Guan 已提交
4855 4856
      TIME_WINDOW_COPY(cond.twindow, pQuery->window);

4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868
      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);

      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }

H
Haojun Liao 已提交
4869
      // no need to update the lastkey for each table
4870
      pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo);
H
Haojun Liao 已提交
4871

B
Bomin Zhang 已提交
4872 4873
      taosArrayDestroy(g1);
      taosArrayDestroy(tx);
B
Bomin Zhang 已提交
4874 4875 4876
      if (pRuntimeEnv->pQueryHandle == NULL) {
        longjmp(pRuntimeEnv->env, terrno);
      }
4877

4878
      SArray* s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle);
4879 4880
      assert(taosArrayGetSize(s) >= 1);

4881
      setTagVal(pRuntimeEnv, taosArrayGetP(s, 0), pQInfo->tsdb);
4882 4883 4884 4885 4886 4887 4888 4889

      // here we simply set the first table as current table
      scanMultiTableDataBlocks(pQInfo);
      pQInfo->groupIndex += 1;

      SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;

        // no results generated for current group, continue to try the next group
4890
      taosArrayDestroy(s);
4891 4892 4893 4894 4895
      if (pWindowResInfo->size <= 0) {
        continue;
      }

      for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
4896
        pWindowResInfo->pResult[i].closed = true; // enable return all results for group by normal columns
4897 4898 4899

        SWindowResult *pResult = &pWindowResInfo->pResult[i];
        for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
4900
          pResult->numOfRows = (uint16_t)(MAX(pResult->numOfRows, pResult->resultInfo[j].numOfRes));
4901 4902 4903
        }
      }

4904
      qDebug("QInfo:%p generated groupby columns results %d rows for group %d completed", pQInfo, pWindowResInfo->size,
4905 4906 4907 4908 4909 4910 4911
          pQInfo->groupIndex);
      int32_t currentGroupIndex = pQInfo->groupIndex;

      pQuery->rec.rows = 0;
      pQInfo->groupIndex = 0;

      ensureOutputBufferSimple(pRuntimeEnv, pWindowResInfo->size);
4912
      copyFromWindowResToSData(pQInfo, pWindowResInfo);
4913 4914 4915 4916 4917 4918

      pQInfo->groupIndex = currentGroupIndex;  //restore the group index
      assert(pQuery->rec.rows == pWindowResInfo->size);

      clearClosedTimeWindow(pRuntimeEnv);
      break;
4919 4920 4921
    }
  } else {
    /*
4922
     * 1. super table projection query, 2. ts-comp query
4923 4924 4925
     * if the subgroup index is larger than 0, results generated by group by tbname,k is existed.
     * we need to return it to client in the first place.
     */
4926
    if (pQInfo->groupIndex > 0) {
4927
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
4928
      pQuery->rec.total += pQuery->rec.rows;
4929

4930
      if (pQuery->rec.rows > 0) {
4931 4932 4933
        return;
      }
    }
4934

4935
    // all data have returned already
4936
    if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
4937 4938
      return;
    }
4939

4940 4941
    resetCtxOutputBuf(pRuntimeEnv);
    resetTimeWindowInfo(pRuntimeEnv, &pRuntimeEnv->windowResInfo);
4942

H
Haojun Liao 已提交
4943
    SArray *group = GET_TABLEGROUP(pQInfo, 0);
4944 4945
    assert(taosArrayGetSize(group) == pQInfo->tableqinfoGroupInfo.numOfTables &&
           1 == taosArrayGetSize(pQInfo->tableqinfoGroupInfo.pGroupList));
4946

4947
    while (pQInfo->tableIndex < pQInfo->tableqinfoGroupInfo.numOfTables) {
H
Haojun Liao 已提交
4948
      if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
4949
        longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4950
      }
4951

4952
      pQuery->current = taosArrayGetP(group, pQInfo->tableIndex);
4953
      if (!multiTableMultioutputHelper(pQInfo, pQInfo->tableIndex)) {
4954
        pQInfo->tableIndex++;
4955 4956
        continue;
      }
4957

H
hjxilinx 已提交
4958
      // TODO handle the limit offset problem
4959
      if (pQuery->numOfFilterCols == 0 && pQuery->limit.offset > 0) {
4960 4961
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
          pQInfo->tableIndex++;
4962 4963 4964
          continue;
        }
      }
4965

4966
      scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
4967
      skipResults(pRuntimeEnv);
4968

4969
      // the limitation of output result is reached, set the query completed
4970
      if (limitResults(pRuntimeEnv)) {
H
Haojun Liao 已提交
4971
        SET_STABLE_QUERY_OVER(pQInfo);
4972 4973
        break;
      }
4974

4975 4976
      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
4977

4978
      if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4979 4980 4981 4982 4983 4984
        /*
         * query range is identical in terms of all meters involved in query,
         * so we need to restore them at the *beginning* of query on each meter,
         * not the consecutive query on meter on which is aborted due to buffer limitation
         * to ensure that, we can reset the query range once query on a meter is completed.
         */
4985
        pQInfo->tableIndex++;
weixin_48148422's avatar
weixin_48148422 已提交
4986

H
Haojun Liao 已提交
4987
        STableIdInfo tidInfo = {0};
4988

H
Haojun Liao 已提交
4989 4990 4991
        STableId* id = TSDB_TABLEID(pQuery->current->pTable);
        tidInfo.uid = id->uid;
        tidInfo.tid = id->tid;
weixin_48148422's avatar
weixin_48148422 已提交
4992
        tidInfo.key = pQuery->current->lastKey;
weixin_48148422's avatar
weixin_48148422 已提交
4993 4994
        taosArrayPush(pQInfo->arrTableIdInfo, &tidInfo);

4995
        // if the buffer is full or group by each table, we need to jump out of the loop
H
Haojun Liao 已提交
4996
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
4997 4998
          break;
        }
4999

5000
      } else {
5001
        // all data in the result buffer are skipped due to the offset, continue to retrieve data from current meter
5002 5003
        if (pQuery->rec.rows == 0) {
          assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
5004 5005
          continue;
        } else {
5006 5007 5008
          // buffer is full, wait for the next round to retrieve data from current meter
          assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
          break;
5009 5010 5011
        }
      }
    }
H
Haojun Liao 已提交
5012

5013
    if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
H
Haojun Liao 已提交
5014 5015
      setQueryStatus(pQuery, QUERY_COMPLETED);
    }
5016
  }
5017

5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029
  /*
   * 1. super table projection query, group-by on normal columns query, ts-comp query
   * 2. point interpolation query, last row query
   *
   * group-by on normal columns query and last_row query do NOT invoke the finalizer here,
   * since the finalize stage will be done at the client side.
   *
   * projection query, point interpolation query do not need the finalizer.
   *
   * Only the ts-comp query requires the finalizer function to be executed here.
   */
  if (isTSCompQuery(pQuery)) {
H
hjxilinx 已提交
5030
    finalizeQueryResult(pRuntimeEnv);
5031
  }
5032

5033 5034 5035
  if (pRuntimeEnv->pTSBuf != NULL) {
    pRuntimeEnv->cur = pRuntimeEnv->pTSBuf->cur;
  }
5036

5037
  qDebug(
S
TD-1057  
Shengliang Guan 已提交
5038
      "QInfo %p numOfTables:%"PRIu64", index:%d, numOfGroups:%" PRIzu ", %"PRId64" points returned, total:%"PRId64", offset:%" PRId64,
5039
      pQInfo, pQInfo->tableqinfoGroupInfo.numOfTables, pQInfo->tableIndex, numOfGroups, pQuery->rec.rows, pQuery->rec.total,
5040
      pQuery->limit.offset);
5041 5042
}

5043 5044 5045 5046
static void doSaveContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

5047 5048 5049
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
  SWITCH_ORDER(pQuery->order.order);
5050

5051
  if (pRuntimeEnv->pTSBuf != NULL) {
5052
    pRuntimeEnv->pTSBuf->cur.order = pQuery->order.order;
5053
  }
5054

5055 5056 5057 5058 5059
  STsdbQueryCond cond = {
      .order   = pQuery->order.order,
      .colList = pQuery->colList,
      .numOfCols = pQuery->numOfCols,
  };
H
Haojun Liao 已提交
5060

S
TD-1057  
Shengliang Guan 已提交
5061 5062
  TIME_WINDOW_COPY(cond.twindow, pQuery->window);

5063 5064 5065 5066
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
H
Haojun Liao 已提交
5067

H
Haojun Liao 已提交
5068 5069 5070 5071 5072
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
  disableFuncInReverseScan(pQInfo);
  setupQueryRangeForReverseScan(pQInfo);

H
Haojun Liao 已提交
5073
  pRuntimeEnv->prevGroupId = INT32_MIN;
5074
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
B
Bomin Zhang 已提交
5075 5076 5077
  if (pRuntimeEnv->pSecQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
H
hjxilinx 已提交
5078 5079
}

5080 5081 5082 5083
static void doRestoreContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

H
hjxilinx 已提交
5084
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
5085

5086
  if (pRuntimeEnv->pTSBuf != NULL) {
5087
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
5088
  }
5089

5090
  switchCtxOrder(pRuntimeEnv);
5091 5092 5093
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
}

5094 5095 5096
static void doCloseAllTimeWindowAfterScan(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
Haojun Liao 已提交
5097
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
H
Haojun Liao 已提交
5098
    size_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
5099
    for (int32_t i = 0; i < numOfGroup; ++i) {
H
Haojun Liao 已提交
5100
      SArray *group = GET_TABLEGROUP(pQInfo, i);
5101

5102
      size_t num = taosArrayGetSize(group);
5103
      for (int32_t j = 0; j < num; ++j) {
5104 5105
        STableQueryInfo* item = taosArrayGetP(group, j);
        closeAllTimeWindow(&item->windowResInfo);
5106
      }
H
hjxilinx 已提交
5107 5108 5109 5110 5111 5112 5113
    }
  } else {  // close results for group result
    closeAllTimeWindow(&pQInfo->runtimeEnv.windowResInfo);
  }
}

static void multiTableQueryProcess(SQInfo *pQInfo) {
5114 5115 5116
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

5117
  if (pQInfo->groupIndex > 0) {
5118
    /*
5119
     * if the groupIndex > 0, the query process must be completed yet, we only need to
5120 5121
     * copy the data into output buffer
     */
H
Haojun Liao 已提交
5122
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
5123 5124
      copyResToQueryResultBuf(pQInfo, pQuery);
#ifdef _DEBUG_VIEW
5125
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
5126 5127
#endif
    } else {
5128
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5129
    }
5130

5131
    qDebug("QInfo:%p current:%"PRId64", total:%"PRId64"", pQInfo, pQuery->rec.rows, pQuery->rec.total);
5132 5133
    return;
  }
5134

5135
  qDebug("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", order:%d, forward scan start", pQInfo,
5136 5137
         pQuery->window.skey, pQuery->window.ekey, pQuery->order.order);

H
hjxilinx 已提交
5138
  // do check all qualified data blocks
H
Haojun Liao 已提交
5139
  int64_t el = scanMultiTableDataBlocks(pQInfo);
5140
  qDebug("QInfo:%p master scan completed, elapsed time: %" PRId64 "ms, reverse scan start", pQInfo, el);
5141

H
hjxilinx 已提交
5142
  // query error occurred or query is killed, abort current execution
H
Haojun Liao 已提交
5143
  if (pQInfo->code != TSDB_CODE_SUCCESS || IS_QUERY_KILLED(pQInfo)) {
5144
    qDebug("QInfo:%p query killed or error occurred, code:%s, abort", pQInfo, tstrerror(pQInfo->code));
H
Haojun Liao 已提交
5145 5146
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
5147
  }
5148

H
hjxilinx 已提交
5149 5150
  // close all time window results
  doCloseAllTimeWindowAfterScan(pQInfo);
5151

H
hjxilinx 已提交
5152 5153
  if (needReverseScan(pQuery)) {
    doSaveContext(pQInfo);
5154

H
Haojun Liao 已提交
5155
    el = scanMultiTableDataBlocks(pQInfo);
5156
    qDebug("QInfo:%p reversed scan completed, elapsed time: %" PRId64 "ms", pQInfo, el);
5157

H
Haojun Liao 已提交
5158
    doRestoreContext(pQInfo);
H
hjxilinx 已提交
5159
  } else {
5160
    qDebug("QInfo:%p no need to do reversed scan, query completed", pQInfo);
5161
  }
5162

5163
  setQueryStatus(pQuery, QUERY_COMPLETED);
5164

H
Haojun Liao 已提交
5165
  if (pQInfo->code != TSDB_CODE_SUCCESS || IS_QUERY_KILLED(pQInfo)) {
5166
    qDebug("QInfo:%p query killed or error occurred, code:%s, abort", pQInfo, tstrerror(pQInfo->code));
H
Haojun Liao 已提交
5167 5168
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
H
hjxilinx 已提交
5169
  }
5170

H
Haojun Liao 已提交
5171
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || isSumAvgRateQuery(pQuery)) {
5172
    if (mergeIntoGroupResult(pQInfo) == TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
5173
      copyResToQueryResultBuf(pQInfo, pQuery);
5174 5175

#ifdef _DEBUG_VIEW
5176
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
5177 5178 5179
#endif
    }
  } else {  // not a interval query
5180
    copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5181
  }
5182

5183
  // handle the limitation of output buffer
5184
  qDebug("QInfo:%p points returned:%" PRId64 ", total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
5185 5186 5187 5188 5189 5190 5191 5192
}

/*
 * in each query, this function will be called only once, no retry for further result.
 *
 * select count(*)/top(field,k)/avg(field name) from table_name [where ts>now-1a];
 * select count(*) from table_name group by status_column;
 */
H
hjxilinx 已提交
5193
static void tableFixedOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
5194
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
5195

H
hjxilinx 已提交
5196
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
5197
  if (!pRuntimeEnv->topBotQuery && pQuery->limit.offset > 0) {  // no need to execute, since the output will be ignore.
H
Haojun Liao 已提交
5198 5199
    return;
  }
5200

H
hjxilinx 已提交
5201
  pQuery->current = pTableInfo;  // set current query table info
5202

5203
  scanOneTableDataBlocks(pRuntimeEnv, pTableInfo->lastKey);
H
hjxilinx 已提交
5204
  finalizeQueryResult(pRuntimeEnv);
5205

H
Haojun Liao 已提交
5206
  if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
5207 5208
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
5209
  }
5210

H
Haojun Liao 已提交
5211
  // since the numOfRows must be identical for all sql functions that are allowed to be executed simutaneously.
5212
  pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
5213

5214
  skipResults(pRuntimeEnv);
5215
  limitResults(pRuntimeEnv);
5216 5217
}

H
hjxilinx 已提交
5218
static void tableMultiOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
5219
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
5220

H
hjxilinx 已提交
5221 5222
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
5223

5224 5225 5226 5227
  // for ts_comp query, re-initialized is not allowed
  if (!isTSCompQuery(pQuery)) {
    resetCtxOutputBuf(pRuntimeEnv);
  }
5228

5229 5230 5231 5232 5233 5234
  // skip blocks without load the actual data block from file if no filter condition present
  skipBlocks(&pQInfo->runtimeEnv);
  if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return;
  }
5235 5236

  while (1) {
5237
    scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
H
hjxilinx 已提交
5238
    finalizeQueryResult(pRuntimeEnv);
5239

5240 5241
    pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
    if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols > 0 && pQuery->rec.rows > 0) {
5242
      skipResults(pRuntimeEnv);
5243 5244 5245
    }

    /*
H
hjxilinx 已提交
5246 5247
     * 1. if pQuery->size == 0, pQuery->limit.offset >= 0, still need to check data
     * 2. if pQuery->size > 0, pQuery->limit.offset must be 0
5248
     */
5249
    if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
5250 5251 5252
      break;
    }

5253
    qDebug("QInfo:%p skip current result, offset:%" PRId64 ", next qrange:%" PRId64 "-%" PRId64,
B
Bomin Zhang 已提交
5254
           pQInfo, pQuery->limit.offset, pQuery->current->lastKey, pQuery->current->win.ekey);
5255 5256 5257 5258

    resetCtxOutputBuf(pRuntimeEnv);
  }

5259
  limitResults(pRuntimeEnv);
5260
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
5261
    qDebug("QInfo:%p query paused due to output limitation, next qrange:%" PRId64 "-%" PRId64, pQInfo,
H
hjxilinx 已提交
5262
        pQuery->current->lastKey, pQuery->window.ekey);
weixin_48148422's avatar
weixin_48148422 已提交
5263 5264
  } else if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
    STableIdInfo tidInfo;
B
Bomin Zhang 已提交
5265
    STableId* id = TSDB_TABLEID(pQuery->current->pTable);
5266

H
Haojun Liao 已提交
5267 5268
    tidInfo.uid = id->uid;
    tidInfo.tid = id->tid;
weixin_48148422's avatar
weixin_48148422 已提交
5269 5270
    tidInfo.key = pQuery->current->lastKey;
    taosArrayPush(pQInfo->arrTableIdInfo, &tidInfo);
5271 5272
  }

5273 5274 5275
  if (!isTSCompQuery(pQuery)) {
    assert(pQuery->rec.rows <= pQuery->rec.capacity);
  }
5276 5277
}

H
Haojun Liao 已提交
5278
static void tableIntervalProcessImpl(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
5279
  SQuery *pQuery = pRuntimeEnv->pQuery;
5280

5281
  while (1) {
5282
    scanOneTableDataBlocks(pRuntimeEnv, start);
5283

5284
    assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_NOT_COMPLETED));
H
hjxilinx 已提交
5285
    finalizeQueryResult(pRuntimeEnv);
5286

5287 5288 5289
    // here we can ignore the records in case of no interpolation
    // todo handle offset, in case of top/bottom interval query
    if ((pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL) && pQuery->limit.offset > 0 &&
5290
        pQuery->fillType == TSDB_FILL_NONE) {
5291 5292
      // maxOutput <= 0, means current query does not generate any results
      int32_t numOfClosed = numOfClosedTimeWindow(&pRuntimeEnv->windowResInfo);
5293

S
TD-1057  
Shengliang Guan 已提交
5294
      int32_t c = (int32_t)(MIN(numOfClosed, pQuery->limit.offset));
5295 5296 5297
      clearFirstNTimeWindow(pRuntimeEnv, c);
      pQuery->limit.offset -= c;
    }
5298

5299
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED | QUERY_RESBUF_FULL)) {
5300 5301 5302 5303 5304
      break;
    }
  }
}

5305
// handle time interval query on table
H
hjxilinx 已提交
5306
static void tableIntervalProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
5307 5308
  SQueryRuntimeEnv *pRuntimeEnv = &(pQInfo->runtimeEnv);

H
hjxilinx 已提交
5309 5310
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
5311

H
Haojun Liao 已提交
5312
  int32_t numOfFilled = 0;
H
Haojun Liao 已提交
5313
  TSKEY newStartKey = TSKEY_INITIAL_VAL;
5314

5315
  // skip blocks without load the actual data block from file if no filter condition present
H
Haojun Liao 已提交
5316
  skipTimeInterval(pRuntimeEnv, &newStartKey);
5317
  if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0 && pRuntimeEnv->pFillInfo == NULL) {
5318 5319 5320 5321
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return;
  }

5322
  while (1) {
H
Haojun Liao 已提交
5323
    tableIntervalProcessImpl(pRuntimeEnv, newStartKey);
5324

H
Haojun Liao 已提交
5325
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
5326
      pQInfo->groupIndex = 0;  // always start from 0
5327
      pQuery->rec.rows = 0;
5328
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5329

5330
      clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
5331
    }
5332

5333
    // the offset is handled at prepare stage if no interpolation involved
5334
    if (pQuery->fillType == TSDB_FILL_NONE || pQuery->rec.rows == 0) {
5335
      limitResults(pRuntimeEnv);
5336 5337
      break;
    } else {
S
TD-1057  
Shengliang Guan 已提交
5338
      taosFillSetStartInfo(pRuntimeEnv->pFillInfo, (int32_t)pQuery->rec.rows, pQuery->window.ekey);
5339
      taosFillCopyInputDataFromFilePage(pRuntimeEnv->pFillInfo, (tFilePage**) pQuery->sdata);
H
Haojun Liao 已提交
5340
      numOfFilled = 0;
5341

H
Haojun Liao 已提交
5342
      pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, &numOfFilled);
5343
      if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
5344
        limitResults(pRuntimeEnv);
5345 5346
        break;
      }
5347

5348
      // no result generated yet, continue retrieve data
5349
      pQuery->rec.rows = 0;
5350 5351
    }
  }
5352

5353
  // all data scanned, the group by normal column can return
H
Haojun Liao 已提交
5354
  if (pRuntimeEnv->groupbyNormalCol) {  // todo refactor with merge interval time result
5355
    pQInfo->groupIndex = 0;
5356
    pQuery->rec.rows = 0;
5357
    copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5358
    clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
5359 5360 5361
  }
}

5362 5363 5364 5365
static void tableQueryImpl(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

H
Haojun Liao 已提交
5366
  if (queryHasRemainResForTableQuery(pRuntimeEnv)) {
5367

H
Haojun Liao 已提交
5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379
    if (pQuery->fillType != TSDB_FILL_NONE) {
      /*
       * There are remain results that are not returned due to result interpolation
       * So, we do keep in this procedure instead of launching retrieve procedure for next results.
       */
      int32_t numOfFilled = 0;
      pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, &numOfFilled);

      if (pQuery->rec.rows > 0) {
        limitResults(pRuntimeEnv);
      }

H
Haojun Liao 已提交
5380
      qDebug("QInfo:%p current:%" PRId64 " returned, total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total);
5381
      return;
H
Haojun Liao 已提交
5382
    } else {
5383
      pQuery->rec.rows = 0;
5384
      pQInfo->groupIndex = 0;  // always start from 0
5385

5386
      if (pRuntimeEnv->windowResInfo.size > 0) {
5387
        copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5388
        clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
5389

5390
        if (pQuery->rec.rows > 0) {
5391
          qDebug("QInfo:%p %"PRId64" rows returned from group results, total:%"PRId64"", pQInfo, pQuery->rec.rows, pQuery->rec.total);
H
Haojun Liao 已提交
5392 5393 5394

          // there are not data remains
          if (pRuntimeEnv->windowResInfo.size <= 0) {
H
Haojun Liao 已提交
5395
            qDebug("QInfo:%p query over, %"PRId64" rows are returned", pQInfo, pQuery->rec.total);
H
Haojun Liao 已提交
5396 5397
          }

5398 5399 5400 5401 5402
          return;
        }
      }
    }
  }
5403

H
hjxilinx 已提交
5404
  // number of points returned during this query
5405
  pQuery->rec.rows = 0;
5406
  int64_t st = taosGetTimestampUs();
5407

5408
  assert(pQInfo->tableqinfoGroupInfo.numOfTables == 1);
H
Haojun Liao 已提交
5409
  SArray* g = GET_TABLEGROUP(pQInfo, 0);
5410
  STableQueryInfo* item = taosArrayGetP(g, 0);
5411

5412
  // group by normal column, sliding window query, interval query are handled by interval query processor
H
Haojun Liao 已提交
5413
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyNormalCol) {  // interval (down sampling operation)
5414
    tableIntervalProcess(pQInfo, item);
H
Haojun Liao 已提交
5415
  } else if (isFixedOutputQuery(pRuntimeEnv)) {
5416
    tableFixedOutputProcess(pQInfo, item);
5417 5418
  } else {  // diff/add/multiply/subtract/division
    assert(pQuery->checkBuffer == 1);
5419
    tableMultiOutputProcess(pQInfo, item);
5420
  }
5421

5422
  // record the total elapsed time
5423
  pRuntimeEnv->summary.elapsedTime += (taosGetTimestampUs() - st);
5424
  assert(pQInfo->tableqinfoGroupInfo.numOfTables == 1);
5425 5426
}

5427
static void stableQueryImpl(SQInfo *pQInfo) {
H
Haojun Liao 已提交
5428 5429
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *pQuery = pRuntimeEnv->pQuery;
5430
  pQuery->rec.rows = 0;
5431

5432
  int64_t st = taosGetTimestampUs();
5433

H
Haojun Liao 已提交
5434
  if (QUERY_IS_INTERVAL_QUERY(pQuery) ||
5435
      (isFixedOutputQuery(pRuntimeEnv) && (!isPointInterpoQuery(pQuery)) && (!pRuntimeEnv->groupbyNormalCol))) {
H
hjxilinx 已提交
5436
    multiTableQueryProcess(pQInfo);
5437
  } else {
5438
    assert((pQuery->checkBuffer == 1 && pQuery->interval.interval == 0) || isPointInterpoQuery(pQuery) ||
H
Haojun Liao 已提交
5439
            isFirstLastRowQuery(pQuery) || pRuntimeEnv->groupbyNormalCol);
5440

5441
    sequentialTableProcess(pQInfo);
5442
  }
5443

H
hjxilinx 已提交
5444
  // record the total elapsed time
5445
  pQInfo->runtimeEnv.summary.elapsedTime += (taosGetTimestampUs() - st);
H
hjxilinx 已提交
5446 5447
}

5448
static int32_t getColumnIndexInSource(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
5449
  int32_t j = 0;
5450

5451
  if (TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) {
H
Haojun Liao 已提交
5452
    if (pExprMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
5453
      return TSDB_TBNAME_COLUMN_INDEX;
H
Haojun Liao 已提交
5454 5455
    }

5456 5457 5458 5459
    while(j < pQueryMsg->numOfTags) {
      if (pExprMsg->colInfo.colId == pTagCols[j].colId) {
        return j;
      }
5460

5461 5462
      j += 1;
    }
5463

Y
TD-1230  
yihaoDeng 已提交
5464
  } else if (TSDB_COL_IS_UD_COL(pExprMsg->colInfo.flag)) {  // user specified column data
H
Haojun Liao 已提交
5465
    return TSDB_UD_COLUMN_INDEX;
5466 5467 5468 5469 5470
  } else {
    while (j < pQueryMsg->numOfCols) {
      if (pExprMsg->colInfo.colId == pQueryMsg->colList[j].colId) {
        return j;
      }
5471

5472
      j += 1;
5473 5474
    }
  }
5475
  assert(0);
5476
  return -1;
5477 5478
}

5479 5480 5481
bool validateExprColumnInfo(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
  int32_t j = getColumnIndexInSource(pQueryMsg, pExprMsg, pTagCols);
  return j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags;
5482 5483
}

5484
static bool validateQueryMsg(SQueryTableMsg *pQueryMsg) {
5485 5486
  if (pQueryMsg->interval.interval < 0) {
    qError("qmsg:%p illegal value of interval time %" PRId64, pQueryMsg, pQueryMsg->interval.interval);
5487
    return false;
5488 5489
  }

H
hjxilinx 已提交
5490
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
5491
    qError("qmsg:%p illegal value of numOfTables %d", pQueryMsg, pQueryMsg->numOfTables);
5492
    return false;
5493 5494
  }

H
hjxilinx 已提交
5495
  if (pQueryMsg->numOfGroupCols < 0) {
S
slguan 已提交
5496
    qError("qmsg:%p illegal value of numOfGroupbyCols %d", pQueryMsg, pQueryMsg->numOfGroupCols);
5497
    return false;
5498 5499
  }

5500 5501
  if (pQueryMsg->numOfOutput > TSDB_MAX_COLUMNS || pQueryMsg->numOfOutput <= 0) {
    qError("qmsg:%p illegal value of output columns %d", pQueryMsg, pQueryMsg->numOfOutput);
5502
    return false;
5503 5504
  }

5505 5506 5507 5508 5509 5510 5511 5512 5513 5514
  return true;
}

static bool validateQuerySourceCols(SQueryTableMsg *pQueryMsg, SSqlFuncMsg** pExprMsg) {
  int32_t numOfTotal = pQueryMsg->numOfCols + pQueryMsg->numOfTags;
  if (pQueryMsg->numOfCols < 0 || pQueryMsg->numOfTags < 0 || numOfTotal > TSDB_MAX_COLUMNS) {
    qError("qmsg:%p illegal value of numOfCols %d numOfTags:%d", pQueryMsg, pQueryMsg->numOfCols, pQueryMsg->numOfTags);
    return false;
  } else if (numOfTotal == 0) {
    for(int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
H
Haojun Liao 已提交
5515 5516 5517 5518 5519
      SSqlFuncMsg* pFuncMsg = pExprMsg[i];

      if ((pFuncMsg->functionId == TSDB_FUNC_TAGPRJ) ||
          (pFuncMsg->functionId == TSDB_FUNC_TID_TAG && pFuncMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) ||
          (pFuncMsg->functionId == TSDB_FUNC_COUNT && pFuncMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX)) {
5520
        continue;
5521
      }
5522

5523
      return false;
5524 5525
    }
  }
5526

5527
  return true;
5528 5529
}

5530
static char *createTableIdList(SQueryTableMsg *pQueryMsg, char *pMsg, SArray **pTableIdList) {
H
hjxilinx 已提交
5531
  assert(pQueryMsg->numOfTables > 0);
5532

weixin_48148422's avatar
weixin_48148422 已提交
5533
  *pTableIdList = taosArrayInit(pQueryMsg->numOfTables, sizeof(STableIdInfo));
5534

weixin_48148422's avatar
weixin_48148422 已提交
5535 5536
  for (int32_t j = 0; j < pQueryMsg->numOfTables; ++j) {
    STableIdInfo* pTableIdInfo = (STableIdInfo *)pMsg;
5537

5538
    pTableIdInfo->tid = htonl(pTableIdInfo->tid);
H
hjxilinx 已提交
5539 5540
    pTableIdInfo->uid = htobe64(pTableIdInfo->uid);
    pTableIdInfo->key = htobe64(pTableIdInfo->key);
5541

H
hjxilinx 已提交
5542 5543 5544
    taosArrayPush(*pTableIdList, pTableIdInfo);
    pMsg += sizeof(STableIdInfo);
  }
5545

H
hjxilinx 已提交
5546 5547
  return pMsg;
}
5548

5549
/**
H
hjxilinx 已提交
5550
 * pQueryMsg->head has been converted before this function is called.
5551
 *
H
hjxilinx 已提交
5552
 * @param pQueryMsg
5553 5554 5555 5556
 * @param pTableIdList
 * @param pExpr
 * @return
 */
5557
static int32_t convertQueryMsg(SQueryTableMsg *pQueryMsg, SArray **pTableIdList, SSqlFuncMsg ***pExpr,
weixin_48148422's avatar
weixin_48148422 已提交
5558
                               char **tagCond, char** tbnameCond, SColIndex **groupbyCols, SColumnInfo** tagCols) {
5559 5560
  int32_t code = TSDB_CODE_SUCCESS;

5561 5562 5563 5564
  pQueryMsg->numOfTables = htonl(pQueryMsg->numOfTables);

  pQueryMsg->window.skey = htobe64(pQueryMsg->window.skey);
  pQueryMsg->window.ekey = htobe64(pQueryMsg->window.ekey);
5565 5566 5567 5568 5569 5570
  pQueryMsg->interval.interval = htobe64(pQueryMsg->interval.interval);
  pQueryMsg->interval.sliding = htobe64(pQueryMsg->interval.sliding);
  pQueryMsg->interval.offset = htobe64(pQueryMsg->interval.offset);
  pQueryMsg->interval.intervalUnit = pQueryMsg->interval.intervalUnit;
  pQueryMsg->interval.slidingUnit = pQueryMsg->interval.slidingUnit;
  pQueryMsg->interval.offsetUnit = pQueryMsg->interval.offsetUnit;
5571 5572
  pQueryMsg->limit = htobe64(pQueryMsg->limit);
  pQueryMsg->offset = htobe64(pQueryMsg->offset);
H
hjxilinx 已提交
5573

5574 5575
  pQueryMsg->order = htons(pQueryMsg->order);
  pQueryMsg->orderColId = htons(pQueryMsg->orderColId);
H
Haojun Liao 已提交
5576
  pQueryMsg->queryType = htonl(pQueryMsg->queryType);
weixin_48148422's avatar
weixin_48148422 已提交
5577
  pQueryMsg->tagNameRelType = htons(pQueryMsg->tagNameRelType);
5578 5579

  pQueryMsg->numOfCols = htons(pQueryMsg->numOfCols);
5580
  pQueryMsg->numOfOutput = htons(pQueryMsg->numOfOutput);
H
hjxilinx 已提交
5581
  pQueryMsg->numOfGroupCols = htons(pQueryMsg->numOfGroupCols);
5582 5583 5584
  pQueryMsg->tagCondLen = htons(pQueryMsg->tagCondLen);
  pQueryMsg->tsOffset = htonl(pQueryMsg->tsOffset);
  pQueryMsg->tsLen = htonl(pQueryMsg->tsLen);
H
hjxilinx 已提交
5585
  pQueryMsg->tsNumOfBlocks = htonl(pQueryMsg->tsNumOfBlocks);
5586
  pQueryMsg->tsOrder = htonl(pQueryMsg->tsOrder);
5587
  pQueryMsg->numOfTags = htonl(pQueryMsg->numOfTags);
5588

5589
  // query msg safety check
5590
  if (!validateQueryMsg(pQueryMsg)) {
5591 5592
    code = TSDB_CODE_QRY_INVALID_MSG;
    goto _cleanup;
5593 5594
  }

H
hjxilinx 已提交
5595 5596
  char *pMsg = (char *)(pQueryMsg->colList) + sizeof(SColumnInfo) * pQueryMsg->numOfCols;
  for (int32_t col = 0; col < pQueryMsg->numOfCols; ++col) {
5597 5598
    SColumnInfo *pColInfo = &pQueryMsg->colList[col];

H
hjxilinx 已提交
5599
    pColInfo->colId = htons(pColInfo->colId);
5600
    pColInfo->type = htons(pColInfo->type);
H
hjxilinx 已提交
5601 5602
    pColInfo->bytes = htons(pColInfo->bytes);
    pColInfo->numOfFilters = htons(pColInfo->numOfFilters);
5603

H
hjxilinx 已提交
5604
    assert(pColInfo->type >= TSDB_DATA_TYPE_BOOL && pColInfo->type <= TSDB_DATA_TYPE_NCHAR);
5605

H
hjxilinx 已提交
5606
    int32_t numOfFilters = pColInfo->numOfFilters;
5607
    if (numOfFilters > 0) {
H
hjxilinx 已提交
5608
      pColInfo->filters = calloc(numOfFilters, sizeof(SColumnFilterInfo));
H
Haojun Liao 已提交
5609 5610 5611 5612
      if (pColInfo->filters == NULL) {
        code = TSDB_CODE_QRY_OUT_OF_MEMORY;
        goto _cleanup;
      }
5613 5614 5615
    }

    for (int32_t f = 0; f < numOfFilters; ++f) {
5616
      SColumnFilterInfo *pFilterMsg = (SColumnFilterInfo *)pMsg;
5617

5618 5619
      SColumnFilterInfo *pColFilter = &pColInfo->filters[f];
      pColFilter->filterstr = htons(pFilterMsg->filterstr);
5620 5621 5622

      pMsg += sizeof(SColumnFilterInfo);

5623 5624
      if (pColFilter->filterstr) {
        pColFilter->len = htobe64(pFilterMsg->len);
5625

5626
        pColFilter->pz = (int64_t)calloc(1, (size_t)(pColFilter->len + 1 * TSDB_NCHAR_SIZE)); // note: null-terminator
H
Haojun Liao 已提交
5627 5628 5629 5630 5631
        if (pColFilter->pz == 0) {
          code = TSDB_CODE_QRY_OUT_OF_MEMORY;
          goto _cleanup;
        }

5632
        memcpy((void *)pColFilter->pz, pMsg, (size_t)pColFilter->len);
5633
        pMsg += (pColFilter->len + 1);
5634
      } else {
5635 5636
        pColFilter->lowerBndi = htobe64(pFilterMsg->lowerBndi);
        pColFilter->upperBndi = htobe64(pFilterMsg->upperBndi);
5637 5638
      }

5639 5640
      pColFilter->lowerRelOptr = htons(pFilterMsg->lowerRelOptr);
      pColFilter->upperRelOptr = htons(pFilterMsg->upperRelOptr);
5641 5642 5643
    }
  }

5644
  *pExpr = calloc(pQueryMsg->numOfOutput, POINTER_BYTES);
H
Haojun Liao 已提交
5645 5646 5647 5648 5649
  if (*pExpr == NULL) {
    code = TSDB_CODE_QRY_OUT_OF_MEMORY;
    goto _cleanup;
  }

5650
  SSqlFuncMsg *pExprMsg = (SSqlFuncMsg *)pMsg;
5651

5652
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5653
    (*pExpr)[i] = pExprMsg;
5654

5655
    pExprMsg->colInfo.colIndex = htons(pExprMsg->colInfo.colIndex);
5656 5657 5658 5659
    pExprMsg->colInfo.colId = htons(pExprMsg->colInfo.colId);
    pExprMsg->colInfo.flag = htons(pExprMsg->colInfo.flag);
    pExprMsg->functionId = htons(pExprMsg->functionId);
    pExprMsg->numOfParams = htons(pExprMsg->numOfParams);
5660

5661
    pMsg += sizeof(SSqlFuncMsg);
5662 5663

    for (int32_t j = 0; j < pExprMsg->numOfParams; ++j) {
5664
      pExprMsg->arg[j].argType = htons(pExprMsg->arg[j].argType);
5665 5666 5667 5668
      pExprMsg->arg[j].argBytes = htons(pExprMsg->arg[j].argBytes);

      if (pExprMsg->arg[j].argType == TSDB_DATA_TYPE_BINARY) {
        pExprMsg->arg[j].argValue.pz = pMsg;
5669
        pMsg += pExprMsg->arg[j].argBytes;  // one more for the string terminated char.
5670 5671 5672 5673 5674
      } else {
        pExprMsg->arg[j].argValue.i64 = htobe64(pExprMsg->arg[j].argValue.i64);
      }
    }

H
Haojun Liao 已提交
5675 5676
    int16_t functionId = pExprMsg->functionId;
    if (functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ || functionId == TSDB_FUNC_TAG_DUMMY) {
Y
TD-1230  
yihaoDeng 已提交
5677
      if (!TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) {  // ignore the column  index check for arithmetic expression.
5678 5679
        code = TSDB_CODE_QRY_INVALID_MSG;
        goto _cleanup;
5680 5681
      }
    } else {
5682
//      if (!validateExprColumnInfo(pQueryMsg, pExprMsg)) {
5683
//        return TSDB_CODE_QRY_INVALID_MSG;
5684
//      }
5685 5686
    }

5687
    pExprMsg = (SSqlFuncMsg *)pMsg;
5688
  }
5689

5690
  if (!validateQuerySourceCols(pQueryMsg, *pExpr)) {
5691
    code = TSDB_CODE_QRY_INVALID_MSG;
dengyihao's avatar
dengyihao 已提交
5692
    goto _cleanup;
5693
  }
5694

H
hjxilinx 已提交
5695
  pMsg = createTableIdList(pQueryMsg, pMsg, pTableIdList);
5696

H
hjxilinx 已提交
5697
  if (pQueryMsg->numOfGroupCols > 0) {  // group by tag columns
5698
    *groupbyCols = malloc(pQueryMsg->numOfGroupCols * sizeof(SColIndex));
5699 5700 5701 5702
    if (*groupbyCols == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }
5703 5704 5705

    for (int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
      (*groupbyCols)[i].colId = *(int16_t *)pMsg;
5706
      pMsg += sizeof((*groupbyCols)[i].colId);
5707 5708

      (*groupbyCols)[i].colIndex = *(int16_t *)pMsg;
5709 5710
      pMsg += sizeof((*groupbyCols)[i].colIndex);

5711
      (*groupbyCols)[i].flag = *(int16_t *)pMsg;
5712 5713 5714 5715 5716
      pMsg += sizeof((*groupbyCols)[i].flag);

      memcpy((*groupbyCols)[i].name, pMsg, tListLen(groupbyCols[i]->name));
      pMsg += tListLen((*groupbyCols)[i].name);
    }
5717

H
hjxilinx 已提交
5718 5719
    pQueryMsg->orderByIdx = htons(pQueryMsg->orderByIdx);
    pQueryMsg->orderType = htons(pQueryMsg->orderType);
5720 5721
  }

5722 5723
  pQueryMsg->fillType = htons(pQueryMsg->fillType);
  if (pQueryMsg->fillType != TSDB_FILL_NONE) {
5724
    pQueryMsg->fillVal = (uint64_t)(pMsg);
5725 5726

    int64_t *v = (int64_t *)pMsg;
5727
    for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5728 5729
      v[i] = htobe64(v[i]);
    }
5730

5731
    pMsg += sizeof(int64_t) * pQueryMsg->numOfOutput;
5732
  }
5733

5734 5735
  if (pQueryMsg->numOfTags > 0) {
    (*tagCols) = calloc(1, sizeof(SColumnInfo) * pQueryMsg->numOfTags);
H
Haojun Liao 已提交
5736 5737 5738 5739 5740
    if (*tagCols == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }

5741 5742
    for (int32_t i = 0; i < pQueryMsg->numOfTags; ++i) {
      SColumnInfo* pTagCol = (SColumnInfo*) pMsg;
5743

5744 5745 5746 5747
      pTagCol->colId = htons(pTagCol->colId);
      pTagCol->bytes = htons(pTagCol->bytes);
      pTagCol->type  = htons(pTagCol->type);
      pTagCol->numOfFilters = 0;
5748

5749
      (*tagCols)[i] = *pTagCol;
5750
      pMsg += sizeof(SColumnInfo);
5751
    }
H
hjxilinx 已提交
5752
  }
5753

5754 5755 5756
  // the tag query condition expression string is located at the end of query msg
  if (pQueryMsg->tagCondLen > 0) {
    *tagCond = calloc(1, pQueryMsg->tagCondLen);
H
Haojun Liao 已提交
5757 5758 5759 5760 5761 5762

    if (*tagCond == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;

    }
5763 5764 5765
    memcpy(*tagCond, pMsg, pQueryMsg->tagCondLen);
    pMsg += pQueryMsg->tagCondLen;
  }
5766

weixin_48148422's avatar
weixin_48148422 已提交
5767
  if (*pMsg != 0) {
5768
    size_t len = strlen(pMsg) + 1;
5769

5770
    *tbnameCond = malloc(len);
5771 5772 5773 5774 5775
    if (*tbnameCond == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }

weixin_48148422's avatar
weixin_48148422 已提交
5776
    strcpy(*tbnameCond, pMsg);
5777
    pMsg += len;
weixin_48148422's avatar
weixin_48148422 已提交
5778
  }
5779

5780
  qDebug("qmsg:%p query %d tables, type:%d, qrange:%" PRId64 "-%" PRId64 ", numOfGroupbyTagCols:%d, order:%d, "
H
Haojun Liao 已提交
5781 5782
         "outputCols:%d, numOfCols:%d, interval:%" PRId64 ", fillType:%d, comptsLen:%d, compNumOfBlocks:%d, limit:%" PRId64 ", offset:%" PRId64,
         pQueryMsg, pQueryMsg->numOfTables, pQueryMsg->queryType, pQueryMsg->window.skey, pQueryMsg->window.ekey, pQueryMsg->numOfGroupCols,
5783
         pQueryMsg->order, pQueryMsg->numOfOutput, pQueryMsg->numOfCols, pQueryMsg->interval.interval,
H
Haojun Liao 已提交
5784
         pQueryMsg->fillType, pQueryMsg->tsLen, pQueryMsg->tsNumOfBlocks, pQueryMsg->limit, pQueryMsg->offset);
5785 5786

  return TSDB_CODE_SUCCESS;
dengyihao's avatar
dengyihao 已提交
5787 5788

_cleanup:
S
Shengliang Guan 已提交
5789
  taosTFree(*pExpr);
dengyihao's avatar
dengyihao 已提交
5790 5791
  taosArrayDestroy(*pTableIdList);
  *pTableIdList = NULL;
S
Shengliang Guan 已提交
5792 5793 5794 5795
  taosTFree(*tbnameCond);
  taosTFree(*groupbyCols);
  taosTFree(*tagCols);
  taosTFree(*tagCond);
5796 5797

  return code;
5798 5799
}

H
hjxilinx 已提交
5800
static int32_t buildAirthmeticExprFromMsg(SExprInfo *pArithExprInfo, SQueryTableMsg *pQueryMsg) {
5801
  qDebug("qmsg:%p create arithmetic expr from binary string: %s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz);
weixin_48148422's avatar
weixin_48148422 已提交
5802 5803

  tExprNode* pExprNode = NULL;
dengyihao's avatar
dengyihao 已提交
5804
  TRY(TSDB_MAX_TAG_CONDITIONS) {
weixin_48148422's avatar
weixin_48148422 已提交
5805 5806 5807
    pExprNode = exprTreeFromBinary(pArithExprInfo->base.arg[0].argValue.pz, pArithExprInfo->base.arg[0].argBytes);
  } CATCH( code ) {
    CLEANUP_EXECUTE();
dengyihao's avatar
TD-816  
dengyihao 已提交
5808
    qError("qmsg:%p failed to create arithmetic expression string from:%s, reason: %s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz, tstrerror(code));
weixin_48148422's avatar
weixin_48148422 已提交
5809 5810 5811
    return code;
  } END_TRY

H
hjxilinx 已提交
5812
  if (pExprNode == NULL) {
5813
    qError("qmsg:%p failed to create arithmetic expression string from:%s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz);
5814
    return TSDB_CODE_QRY_APP_ERROR;
5815
  }
5816

5817
  pArithExprInfo->pExpr = pExprNode;
5818 5819 5820
  return TSDB_CODE_SUCCESS;
}

H
Haojun Liao 已提交
5821
static int32_t createQFunctionExprFromMsg(SQueryTableMsg *pQueryMsg, SExprInfo **pExprInfo, SSqlFuncMsg **pExprMsg,
5822 5823
    SColumnInfo* pTagCols) {
  *pExprInfo = NULL;
H
hjxilinx 已提交
5824
  int32_t code = TSDB_CODE_SUCCESS;
5825

H
Haojun Liao 已提交
5826
  SExprInfo *pExprs = (SExprInfo *)calloc(pQueryMsg->numOfOutput, sizeof(SExprInfo));
5827
  if (pExprs == NULL) {
5828
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
5829 5830 5831 5832 5833
  }

  bool    isSuperTable = QUERY_IS_STABLE_QUERY(pQueryMsg->queryType);
  int16_t tagLen = 0;

5834
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5835
    pExprs[i].base = *pExprMsg[i];
5836
    pExprs[i].bytes = 0;
5837 5838 5839 5840

    int16_t type = 0;
    int16_t bytes = 0;

5841
    // parse the arithmetic expression
5842
    if (pExprs[i].base.functionId == TSDB_FUNC_ARITHM) {
5843
      code = buildAirthmeticExprFromMsg(&pExprs[i], pQueryMsg);
5844

5845
      if (code != TSDB_CODE_SUCCESS) {
S
Shengliang Guan 已提交
5846
        taosTFree(pExprs);
5847
        return code;
5848 5849
      }

5850
      type  = TSDB_DATA_TYPE_DOUBLE;
5851
      bytes = tDataTypeDesc[type].nSize;
H
Haojun Liao 已提交
5852
    } else if (pExprs[i].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX && pExprs[i].base.functionId == TSDB_FUNC_TAGPRJ) {  // parse the normal column
H
Haojun Liao 已提交
5853
      SSchema s = tGetTableNameColumnSchema();
H
Haojun Liao 已提交
5854
      type = s.type;
H
Haojun Liao 已提交
5855
      bytes = s.bytes;
5856 5857
    } else if (pExprs[i].base.colInfo.colId <= TSDB_UD_COLUMN_INDEX) {
      // it is a user-defined constant value column
H
Haojun Liao 已提交
5858 5859
      assert(pExprs[i].base.functionId == TSDB_FUNC_PRJ);

5860 5861
      type = pExprs[i].base.arg[1].argType;
      bytes = pExprs[i].base.arg[1].argBytes;
H
Haojun Liao 已提交
5862 5863 5864 5865 5866

      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        bytes += VARSTR_HEADER_SIZE;
      }
    } else {
5867
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
dengyihao's avatar
dengyihao 已提交
5868
      assert(j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags);
H
Haojun Liao 已提交
5869

dengyihao's avatar
dengyihao 已提交
5870
      if (pExprs[i].base.colInfo.colId != TSDB_TBNAME_COLUMN_INDEX && j >= 0) {
H
Haojun Liao 已提交
5871 5872 5873 5874
        SColumnInfo* pCol = (TSDB_COL_IS_TAG(pExprs[i].base.colInfo.flag))? &pTagCols[j]:&pQueryMsg->colList[j];
        type = pCol->type;
        bytes = pCol->bytes;
      } else {
H
Haojun Liao 已提交
5875
        SSchema s = tGetTableNameColumnSchema();
H
hjxilinx 已提交
5876

H
Haojun Liao 已提交
5877 5878 5879
        type  = s.type;
        bytes = s.bytes;
      }
5880 5881
    }

S
TD-1057  
Shengliang Guan 已提交
5882
    int32_t param = (int32_t)pExprs[i].base.arg[0].argValue.i64;
5883
    if (getResultDataInfo(type, bytes, pExprs[i].base.functionId, param, &pExprs[i].type, &pExprs[i].bytes,
5884
                          &pExprs[i].interBytes, 0, isSuperTable) != TSDB_CODE_SUCCESS) {
S
Shengliang Guan 已提交
5885
      taosTFree(pExprs);
5886
      return TSDB_CODE_QRY_INVALID_MSG;
5887 5888
    }

5889
    if (pExprs[i].base.functionId == TSDB_FUNC_TAG_DUMMY || pExprs[i].base.functionId == TSDB_FUNC_TS_DUMMY) {
5890
      tagLen += pExprs[i].bytes;
5891
    }
5892
    assert(isValidDataType(pExprs[i].type));
5893 5894 5895
  }

  // TODO refactor
5896
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5897 5898
    pExprs[i].base = *pExprMsg[i];
    int16_t functId = pExprs[i].base.functionId;
5899

5900
    if (functId == TSDB_FUNC_TOP || functId == TSDB_FUNC_BOTTOM) {
5901
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
5902 5903 5904 5905 5906 5907 5908 5909 5910
      if (j < 0 || j >= pQueryMsg->numOfCols) {
        assert(0);
      } else {
        SColumnInfo *pCol = &pQueryMsg->colList[j];
        int32_t ret =
            getResultDataInfo(pCol->type, pCol->bytes, functId, (int32_t)pExprs[i].base.arg[0].argValue.i64,
                              &pExprs[i].type, &pExprs[i].bytes, &pExprs[i].interBytes, tagLen, isSuperTable);
        assert(ret == TSDB_CODE_SUCCESS);
      }
5911 5912 5913
    }
  }

5914
  *pExprInfo = pExprs;
5915 5916 5917
  return TSDB_CODE_SUCCESS;
}

5918
static SSqlGroupbyExpr *createGroupbyExprFromMsg(SQueryTableMsg *pQueryMsg, SColIndex *pColIndex, int32_t *code) {
5919 5920 5921 5922 5923
  if (pQueryMsg->numOfGroupCols == 0) {
    return NULL;
  }

  // using group by tag columns
5924
  SSqlGroupbyExpr *pGroupbyExpr = (SSqlGroupbyExpr *)calloc(1, sizeof(SSqlGroupbyExpr));
5925
  if (pGroupbyExpr == NULL) {
5926
    *code = TSDB_CODE_QRY_OUT_OF_MEMORY;
5927 5928 5929 5930 5931 5932 5933
    return NULL;
  }

  pGroupbyExpr->numOfGroupCols = pQueryMsg->numOfGroupCols;
  pGroupbyExpr->orderType = pQueryMsg->orderType;
  pGroupbyExpr->orderIndex = pQueryMsg->orderByIdx;

5934 5935 5936 5937
  pGroupbyExpr->columnInfo = taosArrayInit(pQueryMsg->numOfGroupCols, sizeof(SColIndex));
  for(int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
    taosArrayPush(pGroupbyExpr->columnInfo, &pColIndex[i]);
  }
5938

5939 5940 5941
  return pGroupbyExpr;
}

5942
static int32_t createFilterInfo(void *pQInfo, SQuery *pQuery) {
5943
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
5944
    if (pQuery->colList[i].numOfFilters > 0) {
5945 5946 5947 5948 5949 5950 5951 5952 5953
      pQuery->numOfFilterCols++;
    }
  }

  if (pQuery->numOfFilterCols == 0) {
    return TSDB_CODE_SUCCESS;
  }

  pQuery->pFilterInfo = calloc(1, sizeof(SSingleColumnFilterInfo) * pQuery->numOfFilterCols);
H
Haojun Liao 已提交
5954 5955 5956
  if (pQuery->pFilterInfo == NULL) {
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
  }
5957 5958

  for (int32_t i = 0, j = 0; i < pQuery->numOfCols; ++i) {
5959
    if (pQuery->colList[i].numOfFilters > 0) {
5960 5961
      SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[j];

B
Bomin Zhang 已提交
5962
      memcpy(&pFilterInfo->info, &pQuery->colList[i], sizeof(SColumnInfo));
5963
      pFilterInfo->info = pQuery->colList[i];
5964

5965
      pFilterInfo->numOfFilters = pQuery->colList[i].numOfFilters;
5966
      pFilterInfo->pFilters = calloc(pFilterInfo->numOfFilters, sizeof(SColumnFilterElem));
H
Haojun Liao 已提交
5967 5968 5969
      if (pFilterInfo->pFilters == NULL) {
        return TSDB_CODE_QRY_OUT_OF_MEMORY;
      }
5970 5971 5972

      for (int32_t f = 0; f < pFilterInfo->numOfFilters; ++f) {
        SColumnFilterElem *pSingleColFilter = &pFilterInfo->pFilters[f];
5973
        pSingleColFilter->filterInfo = pQuery->colList[i].filters[f];
5974 5975 5976 5977 5978

        int32_t lower = pSingleColFilter->filterInfo.lowerRelOptr;
        int32_t upper = pSingleColFilter->filterInfo.upperRelOptr;

        if (lower == TSDB_RELATION_INVALID && upper == TSDB_RELATION_INVALID) {
S
slguan 已提交
5979
          qError("QInfo:%p invalid filter info", pQInfo);
5980
          return TSDB_CODE_QRY_INVALID_MSG;
5981 5982
        }

5983 5984
        int16_t type  = pQuery->colList[i].type;
        int16_t bytes = pQuery->colList[i].bytes;
5985

5986 5987 5988
        // todo refactor
        __filter_func_t *rangeFilterArray = getRangeFilterFuncArray(type);
        __filter_func_t *filterArray = getValueFilterFuncArray(type);
5989 5990

        if (rangeFilterArray == NULL && filterArray == NULL) {
S
slguan 已提交
5991
          qError("QInfo:%p failed to get filter function, invalid data type:%d", pQInfo, type);
5992
          return TSDB_CODE_QRY_INVALID_MSG;
5993 5994
        }

5995
        if ((lower == TSDB_RELATION_GREATER_EQUAL || lower == TSDB_RELATION_GREATER) &&
5996
            (upper == TSDB_RELATION_LESS_EQUAL || upper == TSDB_RELATION_LESS)) {
dengyihao's avatar
dengyihao 已提交
5997
          assert(rangeFilterArray != NULL);
5998
          if (lower == TSDB_RELATION_GREATER_EQUAL) {
5999 6000 6001 6002 6003 6004 6005 6006 6007 6008 6009 6010 6011
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[4];
            } else {
              pSingleColFilter->fp = rangeFilterArray[2];
            }
          } else {
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[3];
            } else {
              pSingleColFilter->fp = rangeFilterArray[1];
            }
          }
        } else {  // set callback filter function
dengyihao's avatar
dengyihao 已提交
6012
          assert(filterArray != NULL);
6013 6014 6015 6016
          if (lower != TSDB_RELATION_INVALID) {
            pSingleColFilter->fp = filterArray[lower];

            if (upper != TSDB_RELATION_INVALID) {
dengyihao's avatar
dengyihao 已提交
6017
              qError("pQInfo:%p failed to get filter function, invalid filter condition: %d", pQInfo, type);
6018
              return TSDB_CODE_QRY_INVALID_MSG;
6019 6020 6021 6022 6023 6024 6025 6026 6027 6028 6029 6030 6031 6032 6033 6034
            }
          } else {
            pSingleColFilter->fp = filterArray[upper];
          }
        }
        assert(pSingleColFilter->fp != NULL);
        pSingleColFilter->bytes = bytes;
      }

      j++;
    }
  }

  return TSDB_CODE_SUCCESS;
}

6035
static void doUpdateExprColumnIndex(SQuery *pQuery) {
6036
  assert(pQuery->pSelectExpr != NULL && pQuery != NULL);
6037

6038
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
6039
    SSqlFuncMsg *pSqlExprMsg = &pQuery->pSelectExpr[k].base;
6040
    if (pSqlExprMsg->functionId == TSDB_FUNC_ARITHM) {
6041 6042
      continue;
    }
6043

6044
    // todo opt performance
H
Haojun Liao 已提交
6045
    SColIndex *pColIndex = &pSqlExprMsg->colInfo;
H
Haojun Liao 已提交
6046
    if (TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
6047 6048
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfCols; ++f) {
H
Haojun Liao 已提交
6049 6050
        if (pColIndex->colId == pQuery->colList[f].colId) {
          pColIndex->colIndex = f;
6051 6052 6053
          break;
        }
      }
H
Haojun Liao 已提交
6054 6055

      assert(f < pQuery->numOfCols);
6056 6057
    } else if (pColIndex->colId <= TSDB_UD_COLUMN_INDEX) {
      // do nothing for user-defined constant value result columns
6058
    } else {
6059 6060
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfTags; ++f) {
H
Haojun Liao 已提交
6061 6062
        if (pColIndex->colId == pQuery->tagColList[f].colId) {
          pColIndex->colIndex = f;
6063 6064
          break;
        }
6065
      }
6066

6067
      assert(f < pQuery->numOfTags || pColIndex->colId == TSDB_TBNAME_COLUMN_INDEX);
6068 6069 6070 6071
    }
  }
}

dengyihao's avatar
dengyihao 已提交
6072 6073
static void freeQInfo(SQInfo *pQInfo);

H
Haojun Liao 已提交
6074 6075 6076
static void calResultBufSize(SQuery* pQuery) {
  const int32_t RESULT_MSG_MIN_SIZE  = 1024 * (1024 + 512);  // bytes
  const int32_t RESULT_MSG_MIN_ROWS  = 8192;
S
TD-1057  
Shengliang Guan 已提交
6077
  const float RESULT_THRESHOLD_RATIO = 0.85f;
H
Haojun Liao 已提交
6078

6079 6080 6081 6082 6083
  if (isProjQuery(pQuery)) {
    int32_t numOfRes = RESULT_MSG_MIN_SIZE / pQuery->rowSize;
    if (numOfRes < RESULT_MSG_MIN_ROWS) {
      numOfRes = RESULT_MSG_MIN_ROWS;
    }
H
Haojun Liao 已提交
6084

6085
    pQuery->rec.capacity  = numOfRes;
S
TD-1057  
Shengliang Guan 已提交
6086
    pQuery->rec.threshold = (int32_t)(numOfRes * RESULT_THRESHOLD_RATIO);
6087 6088
  } else {  // in case of non-prj query, a smaller output buffer will be used.
    pQuery->rec.capacity = 4096;
S
TD-1057  
Shengliang Guan 已提交
6089
    pQuery->rec.threshold = (int32_t)(pQuery->rec.capacity * RESULT_THRESHOLD_RATIO);
6090
  }
H
Haojun Liao 已提交
6091 6092
}

6093 6094
static SQInfo *createQInfoImpl(SQueryTableMsg *pQueryMsg, SSqlGroupbyExpr *pGroupbyExpr, SExprInfo *pExprs,
                               STableGroupInfo *pTableGroupInfo, SColumnInfo* pTagCols, bool stableQuery) {
B
Bomin Zhang 已提交
6095 6096 6097
  int16_t numOfCols = pQueryMsg->numOfCols;
  int16_t numOfOutput = pQueryMsg->numOfOutput;

6098 6099
  SQInfo *pQInfo = (SQInfo *)calloc(1, sizeof(SQInfo));
  if (pQInfo == NULL) {
B
Bomin Zhang 已提交
6100
    goto _cleanup_qinfo;
6101
  }
6102

B
Bomin Zhang 已提交
6103 6104 6105
  // to make sure third party won't overwrite this structure
  pQInfo->signature = pQInfo;
  pQInfo->tableGroupInfo = *pTableGroupInfo;
6106 6107

  SQuery *pQuery = calloc(1, sizeof(SQuery));
B
Bomin Zhang 已提交
6108 6109 6110
  if (pQuery == NULL) {
    goto _cleanup_query;
  }
H
Haojun Liao 已提交
6111

6112 6113
  pQInfo->runtimeEnv.pQuery = pQuery;

6114
  pQuery->numOfCols       = numOfCols;
H
hjxilinx 已提交
6115
  pQuery->numOfOutput     = numOfOutput;
6116 6117 6118
  pQuery->limit.limit     = pQueryMsg->limit;
  pQuery->limit.offset    = pQueryMsg->offset;
  pQuery->order.order     = pQueryMsg->order;
6119
  pQuery->order.orderColId = pQueryMsg->orderColId;
6120 6121
  pQuery->pSelectExpr     = pExprs;
  pQuery->pGroupbyExpr    = pGroupbyExpr;
6122
  memcpy(&pQuery->interval, &pQueryMsg->interval, sizeof(pQuery->interval));
6123
  pQuery->fillType        = pQueryMsg->fillType;
6124
  pQuery->numOfTags       = pQueryMsg->numOfTags;
B
Bomin Zhang 已提交
6125
  pQuery->tagColList      = pTagCols;
H
Haojun Liao 已提交
6126

6127
  pQuery->colList = calloc(numOfCols, sizeof(SSingleColumnFilterInfo));
6128
  if (pQuery->colList == NULL) {
6129
    goto _cleanup;
6130
  }
6131

H
hjxilinx 已提交
6132
  for (int16_t i = 0; i < numOfCols; ++i) {
6133
    pQuery->colList[i] = pQueryMsg->colList[i];
6134
    pQuery->colList[i].filters = tscFilterInfoClone(pQueryMsg->colList[i].filters, pQuery->colList[i].numOfFilters);
H
hjxilinx 已提交
6135
  }
6136

6137
  // calculate the result row size
6138 6139 6140
  for (int16_t col = 0; col < numOfOutput; ++col) {
    assert(pExprs[col].bytes > 0);
    pQuery->rowSize += pExprs[col].bytes;
6141
  }
6142

6143
  doUpdateExprColumnIndex(pQuery);
6144

6145
  int32_t ret = createFilterInfo(pQInfo, pQuery);
6146
  if (ret != TSDB_CODE_SUCCESS) {
6147
    goto _cleanup;
6148 6149 6150
  }

  // prepare the result buffer
6151
  pQuery->sdata = (tFilePage **)calloc(pQuery->numOfOutput, POINTER_BYTES);
6152
  if (pQuery->sdata == NULL) {
6153
    goto _cleanup;
6154 6155
  }

H
Haojun Liao 已提交
6156
  calResultBufSize(pQuery);
6157

6158
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
6159
    assert(pExprs[col].interBytes >= pExprs[col].bytes);
6160 6161

    // allocate additional memory for interResults that are usually larger then final results
6162
    size_t size = (size_t)((pQuery->rec.capacity + 1) * pExprs[col].bytes + pExprs[col].interBytes + sizeof(tFilePage));
6163
    pQuery->sdata[col] = (tFilePage *)calloc(1, size);
6164
    if (pQuery->sdata[col] == NULL) {
6165
      goto _cleanup;
6166 6167 6168
    }
  }

6169
  if (pQuery->fillType != TSDB_FILL_NONE) {
6170 6171
    pQuery->fillVal = malloc(sizeof(int64_t) * pQuery->numOfOutput);
    if (pQuery->fillVal == NULL) {
6172
      goto _cleanup;
6173 6174 6175
    }

    // the first column is the timestamp
6176
    memcpy(pQuery->fillVal, (char *)pQueryMsg->fillVal, pQuery->numOfOutput * sizeof(int64_t));
6177 6178
  }

dengyihao's avatar
dengyihao 已提交
6179 6180 6181 6182 6183 6184
  size_t numOfGroups = 0;
  if (pTableGroupInfo->pGroupList != NULL) {
    numOfGroups = taosArrayGetSize(pTableGroupInfo->pGroupList);

    pQInfo->tableqinfoGroupInfo.pGroupList = taosArrayInit(numOfGroups, POINTER_BYTES);
    pQInfo->tableqinfoGroupInfo.numOfTables = pTableGroupInfo->numOfTables;
H
Haojun Liao 已提交
6185
    pQInfo->tableqinfoGroupInfo.map = taosHashInit(pTableGroupInfo->numOfTables,
H
Haojun Liao 已提交
6186
                                                   taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, false);
H
Haojun Liao 已提交
6187
  }
6188

weixin_48148422's avatar
weixin_48148422 已提交
6189
  int tableIndex = 0;
6190

H
Haojun Liao 已提交
6191
  pQInfo->runtimeEnv.interBufSize = getOutputInterResultBufSize(pQuery);
H
Haojun Liao 已提交
6192
  pQInfo->pBuf = calloc(pTableGroupInfo->numOfTables, sizeof(STableQueryInfo));
H
Haojun Liao 已提交
6193 6194 6195 6196
  if (pQInfo->pBuf == NULL) {
    goto _cleanup;
  }

H
Haojun Liao 已提交
6197
  // NOTE: pTableCheckInfo need to update the query time range and the lastKey info
6198 6199 6200 6201 6202 6203 6204 6205 6206
  pQInfo->arrTableIdInfo = taosArrayInit(tableIndex, sizeof(STableIdInfo));
  pQInfo->dataReady = QUERY_RESULT_NOT_READY;
  pthread_mutex_init(&pQInfo->lock, NULL);

  pQuery->pos = -1;
  pQuery->window = pQueryMsg->window;
  changeExecuteScanOrder(pQInfo, pQueryMsg, stableQuery);

  STimeWindow window = pQuery->window;
H
Haojun Liao 已提交
6207

H
Haojun Liao 已提交
6208 6209
  int32_t index = 0;

H
hjxilinx 已提交
6210
  for(int32_t i = 0; i < numOfGroups; ++i) {
6211
    SArray* pa = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, i);
6212

H
Haojun Liao 已提交
6213
    size_t s = taosArrayGetSize(pa);
6214
    SArray* p1 = taosArrayInit(s, POINTER_BYTES);
B
Bomin Zhang 已提交
6215 6216 6217
    if (p1 == NULL) {
      goto _cleanup;
    }
H
Haojun Liao 已提交
6218

Y
yihaoDeng 已提交
6219
    taosArrayPush(pQInfo->tableqinfoGroupInfo.pGroupList, &p1);
6220

H
hjxilinx 已提交
6221
    for(int32_t j = 0; j < s; ++j) {
H
Haojun Liao 已提交
6222
      STableKeyInfo* info = taosArrayGet(pa, j);
6223

S
TD-1057  
Shengliang Guan 已提交
6224
      void* buf = (char*)pQInfo->pBuf + index * sizeof(STableQueryInfo);
H
Haojun Liao 已提交
6225

6226
      window.skey = info->lastKey;
H
Haojun Liao 已提交
6227
      STableQueryInfo* item = createTableQueryInfo(&pQInfo->runtimeEnv, info->pTable, window, buf);
B
Bomin Zhang 已提交
6228 6229 6230
      if (item == NULL) {
        goto _cleanup;
      }
H
Haojun Liao 已提交
6231

6232
      item->groupIndex = i;
H
hjxilinx 已提交
6233
      taosArrayPush(p1, &item);
6234 6235

      STableId* id = TSDB_TABLEID(info->pTable);
H
Haojun Liao 已提交
6236 6237
      taosHashPut(pQInfo->tableqinfoGroupInfo.map, &id->tid, sizeof(id->tid), &item, POINTER_BYTES);
      index += 1;
H
hjxilinx 已提交
6238 6239
    }
  }
6240

6241
  colIdCheck(pQuery);
6242

6243
  qDebug("qmsg:%p QInfo:%p created", pQueryMsg, pQInfo);
6244 6245
  return pQInfo;

B
Bomin Zhang 已提交
6246
_cleanup_qinfo:
H
Haojun Liao 已提交
6247
  tsdbDestroyTableGroup(pTableGroupInfo);
B
Bomin Zhang 已提交
6248 6249

_cleanup_query:
6250 6251 6252 6253
  if (pGroupbyExpr != NULL) {
    taosArrayDestroy(pGroupbyExpr->columnInfo);
    free(pGroupbyExpr);
  }
H
Haojun Liao 已提交
6254

S
Shengliang Guan 已提交
6255
  taosTFree(pTagCols);
B
Bomin Zhang 已提交
6256 6257 6258 6259 6260 6261
  for (int32_t i = 0; i < numOfOutput; ++i) {
    SExprInfo* pExprInfo = &pExprs[i];
    if (pExprInfo->pExpr != NULL) {
      tExprTreeDestroy(&pExprInfo->pExpr, NULL);
    }
  }
H
Haojun Liao 已提交
6262

S
Shengliang Guan 已提交
6263
  taosTFree(pExprs);
B
Bomin Zhang 已提交
6264

6265
_cleanup:
dengyihao's avatar
dengyihao 已提交
6266
  freeQInfo(pQInfo);
6267 6268 6269
  return NULL;
}

H
hjxilinx 已提交
6270
static bool isValidQInfo(void *param) {
H
hjxilinx 已提交
6271 6272 6273 6274
  SQInfo *pQInfo = (SQInfo *)param;
  if (pQInfo == NULL) {
    return false;
  }
6275

H
hjxilinx 已提交
6276 6277 6278 6279
  /*
   * pQInfo->signature may be changed by another thread, so we assign value of signature
   * into local variable, then compare by using local variable
   */
6280
  uint64_t sig = (uint64_t)pQInfo->signature;
H
hjxilinx 已提交
6281 6282 6283
  return (sig == (uint64_t)pQInfo);
}

6284
static int32_t initQInfo(SQueryTableMsg *pQueryMsg, void *tsdb, int32_t vgId, SQInfo *pQInfo, bool isSTable) {
H
hjxilinx 已提交
6285
  int32_t code = TSDB_CODE_SUCCESS;
6286
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
6287

H
hjxilinx 已提交
6288 6289
  STSBuf *pTSBuf = NULL;
  if (pQueryMsg->tsLen > 0) {  // open new file to save the result
H
Haojun Liao 已提交
6290
    char *tsBlock = (char *) pQueryMsg + pQueryMsg->tsOffset;
H
hjxilinx 已提交
6291
    pTSBuf = tsBufCreateFromCompBlocks(tsBlock, pQueryMsg->tsNumOfBlocks, pQueryMsg->tsLen, pQueryMsg->tsOrder);
6292

H
hjxilinx 已提交
6293
    tsBufResetPos(pTSBuf);
dengyihao's avatar
dengyihao 已提交
6294 6295
    bool ret = tsBufNextPos(pTSBuf);
    UNUSED(ret);
H
hjxilinx 已提交
6296
  }
6297

6298 6299
  if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey > pQuery->window.ekey)) ||
      (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey > pQuery->window.skey))) {
6300
    qDebug("QInfo:%p no result in time range %" PRId64 "-%" PRId64 ", order %d", pQInfo, pQuery->window.skey,
6301
           pQuery->window.ekey, pQuery->order.order);
6302
    setQueryStatus(pQuery, QUERY_COMPLETED);
B
Bomin Zhang 已提交
6303
    pQInfo->tableqinfoGroupInfo.numOfTables = 0;
6304 6305
    return TSDB_CODE_SUCCESS;
  }
6306

6307
  if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
6308
    qDebug("QInfo:%p no table qualified for tag filter, abort query", pQInfo);
6309 6310 6311
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return TSDB_CODE_SUCCESS;
  }
H
hjxilinx 已提交
6312 6313

  // filter the qualified
6314
  if ((code = doInitQInfo(pQInfo, pTSBuf, tsdb, vgId, isSTable)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6315 6316
    goto _error;
  }
6317

H
hjxilinx 已提交
6318 6319 6320 6321
  return code;

_error:
  // table query ref will be decrease during error handling
6322
  freeQInfo(pQInfo);
H
hjxilinx 已提交
6323 6324 6325
  return code;
}

B
Bomin Zhang 已提交
6326 6327 6328 6329 6330 6331 6332 6333 6334 6335 6336 6337
static void freeColumnFilterInfo(SColumnFilterInfo* pFilter, int32_t numOfFilters) {
    if (pFilter == NULL) {
      return;
    }
    for (int32_t i = 0; i < numOfFilters; i++) {
      if (pFilter[i].filterstr) {
        free((void*)(pFilter[i].pz));
      }
    }
    free(pFilter);
}

H
hjxilinx 已提交
6338 6339 6340 6341
static void freeQInfo(SQInfo *pQInfo) {
  if (!isValidQInfo(pQInfo)) {
    return;
  }
6342

6343
  qDebug("QInfo:%p start to free QInfo", pQInfo);
6344

6345
  teardownQueryRuntimeEnv(&pQInfo->runtimeEnv);
6346

H
Haojun Liao 已提交
6347 6348 6349 6350 6351 6352 6353
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  if (pQuery != NULL) {
    if (pQuery->sdata != NULL) {
      for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
        taosTFree(pQuery->sdata[col]);
      }
      taosTFree(pQuery->sdata);
H
hjxilinx 已提交
6354
    }
6355

H
Haojun Liao 已提交
6356 6357 6358
    if (pQuery->fillVal != NULL) {
      taosTFree(pQuery->fillVal);
    }
6359

H
Haojun Liao 已提交
6360 6361 6362 6363
    for (int32_t i = 0; i < pQuery->numOfFilterCols; ++i) {
      SSingleColumnFilterInfo *pColFilter = &pQuery->pFilterInfo[i];
      if (pColFilter->numOfFilters > 0) {
        taosTFree(pColFilter->pFilters);
H
hjxilinx 已提交
6364
      }
H
hjxilinx 已提交
6365
    }
6366

H
Haojun Liao 已提交
6367 6368 6369 6370 6371 6372 6373 6374 6375 6376 6377 6378 6379 6380 6381 6382
    if (pQuery->pSelectExpr != NULL) {
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        SExprInfo *pExprInfo = &pQuery->pSelectExpr[i];

        if (pExprInfo->pExpr != NULL) {
          tExprTreeDestroy(&pExprInfo->pExpr, NULL);
        }
      }

      taosTFree(pQuery->pSelectExpr);
    }

    if (pQuery->pGroupbyExpr != NULL) {
      taosArrayDestroy(pQuery->pGroupbyExpr->columnInfo);
      taosTFree(pQuery->pGroupbyExpr);
    }
6383

H
Haojun Liao 已提交
6384 6385 6386 6387 6388 6389 6390 6391 6392 6393 6394 6395
    taosTFree(pQuery->tagColList);
    taosTFree(pQuery->pFilterInfo);

    if (pQuery->colList != NULL) {
      for (int32_t i = 0; i < pQuery->numOfCols; i++) {
        SColumnInfo *column = pQuery->colList + i;
        freeColumnFilterInfo(column->filters, column->numOfFilters);
      }
      taosTFree(pQuery->colList);
    }

    taosTFree(pQuery);
H
hjxilinx 已提交
6396
  }
6397

H
Haojun Liao 已提交
6398 6399 6400 6401 6402
  // todo refactor, extract method to destroytableDataInfo
  if (pQInfo->tableqinfoGroupInfo.pGroupList != NULL) {
    int32_t numOfGroups = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
    for (int32_t i = 0; i < numOfGroups; ++i) {
      SArray *p = GET_TABLEGROUP(pQInfo, i);
6403

H
Haojun Liao 已提交
6404 6405 6406 6407 6408 6409 6410 6411 6412
      size_t num = taosArrayGetSize(p);
      for(int32_t j = 0; j < num; ++j) {
        STableQueryInfo* item = taosArrayGetP(p, j);
        destroyTableQueryInfo(item);
      }

      taosArrayDestroy(p);
    }
  }
H
Haojun Liao 已提交
6413

H
Haojun Liao 已提交
6414 6415 6416
  taosTFree(pQInfo->pBuf);
  taosArrayDestroy(pQInfo->tableqinfoGroupInfo.pGroupList);
  taosHashCleanup(pQInfo->tableqinfoGroupInfo.map);
H
Haojun Liao 已提交
6417
  tsdbDestroyTableGroup(&pQInfo->tableGroupInfo);
weixin_48148422's avatar
weixin_48148422 已提交
6418
  taosArrayDestroy(pQInfo->arrTableIdInfo);
6419

6420
  pQInfo->signature = 0;
6421

6422
  qDebug("QInfo:%p QInfo is freed", pQInfo);
6423

S
Shengliang Guan 已提交
6424
  taosTFree(pQInfo);
H
hjxilinx 已提交
6425 6426
}

H
hjxilinx 已提交
6427
static size_t getResultSize(SQInfo *pQInfo, int64_t *numOfRows) {
6428 6429
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
hjxilinx 已提交
6430 6431 6432 6433 6434 6435 6436 6437 6438 6439 6440
  /*
   * get the file size and set the numOfRows to be the file size, since for tsComp query,
   * the returned row size is equalled to 1
   * TODO handle the case that the file is too large to send back one time
   */
  if (isTSCompQuery(pQuery) && (*numOfRows) > 0) {
    struct stat fstat;
    if (stat(pQuery->sdata[0]->data, &fstat) == 0) {
      *numOfRows = fstat.st_size;
      return fstat.st_size;
    } else {
S
slguan 已提交
6441
      qError("QInfo:%p failed to get file info, path:%s, reason:%s", pQInfo, pQuery->sdata[0]->data, strerror(errno));
H
hjxilinx 已提交
6442 6443 6444
      return 0;
    }
  } else {
6445
    return (size_t)(pQuery->rowSize * (*numOfRows));
6446
  }
H
hjxilinx 已提交
6447
}
6448

H
hjxilinx 已提交
6449 6450 6451
static int32_t doDumpQueryResult(SQInfo *pQInfo, char *data) {
  // the remained number of retrieved rows, not the interpolated result
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
6452

H
hjxilinx 已提交
6453 6454 6455
  // load data from file to msg buffer
  if (isTSCompQuery(pQuery)) {
    int32_t fd = open(pQuery->sdata[0]->data, O_RDONLY, 0666);
6456

H
hjxilinx 已提交
6457 6458
    // make sure file exist
    if (FD_VALID(fd)) {
6459 6460 6461
      uint64_t s = lseek(fd, 0, SEEK_END);

      qDebug("QInfo:%p ts comp data return, file:%s, size:%"PRId64, pQInfo, pQuery->sdata[0]->data, s);
H
Haojun Liao 已提交
6462
      if (lseek(fd, 0, SEEK_SET) >= 0) {
6463
        size_t sz = read(fd, data, (uint32_t)s);
6464 6465 6466
        if(sz < s) {  // todo handle error
          assert(0);
        }
H
Haojun Liao 已提交
6467
      } else {
6468
        UNUSED(s);
dengyihao's avatar
dengyihao 已提交
6469
      }
H
Haojun Liao 已提交
6470

H
hjxilinx 已提交
6471 6472 6473
      close(fd);
      unlink(pQuery->sdata[0]->data);
    } else {
dengyihao's avatar
dengyihao 已提交
6474
      // todo return the error code to client and handle invalid fd
S
slguan 已提交
6475
      qError("QInfo:%p failed to open tmp file to send ts-comp data to client, path:%s, reason:%s", pQInfo,
H
hjxilinx 已提交
6476
             pQuery->sdata[0]->data, strerror(errno));
dengyihao's avatar
dengyihao 已提交
6477
      if (fd != -1) {
6478
        close(fd);
dengyihao's avatar
dengyihao 已提交
6479
      }
H
hjxilinx 已提交
6480
    }
6481

H
hjxilinx 已提交
6482 6483 6484 6485
    // all data returned, set query over
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
      setQueryStatus(pQuery, QUERY_OVER);
    }
H
hjxilinx 已提交
6486
  } else {
S
TD-1057  
Shengliang Guan 已提交
6487
    doCopyQueryResultToMsg(pQInfo, (int32_t)pQuery->rec.rows, data);
6488
  }
6489

6490
  pQuery->rec.total += pQuery->rec.rows;
6491
  qDebug("QInfo:%p current numOfRes rows:%" PRId64 ", total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total);
6492

6493
  if (pQuery->limit.limit > 0 && pQuery->limit.limit == pQuery->rec.total) {
6494
    qDebug("QInfo:%p results limitation reached, limitation:%"PRId64, pQInfo, pQuery->limit.limit);
6495 6496
    setQueryStatus(pQuery, QUERY_OVER);
  }
6497

H
hjxilinx 已提交
6498
  return TSDB_CODE_SUCCESS;
6499 6500
}

6501 6502 6503 6504 6505 6506 6507
typedef struct SQueryMgmt {
  SCacheObj      *qinfoPool;      // query handle pool
  int32_t         vgId;
  bool            closed;
  pthread_mutex_t lock;
} SQueryMgmt;

6508
int32_t qCreateQueryInfo(void* tsdb, int32_t vgId, SQueryTableMsg* pQueryMsg, qinfo_t* pQInfo) {
6509
  assert(pQueryMsg != NULL && tsdb != NULL);
6510 6511

  int32_t code = TSDB_CODE_SUCCESS;
6512

6513 6514 6515 6516 6517 6518 6519 6520
  char            *tagCond  = NULL;
  char            *tbnameCond = NULL;
  SArray          *pTableIdList = NULL;
  SSqlFuncMsg    **pExprMsg = NULL;
  SExprInfo       *pExprs   = NULL;
  SColIndex       *pGroupColIndex = NULL;
  SColumnInfo     *pTagColumnInfo = NULL;
  SSqlGroupbyExpr *pGroupbyExpr   = NULL;
6521

6522 6523
  code = convertQueryMsg(pQueryMsg, &pTableIdList, &pExprMsg, &tagCond, &tbnameCond, &pGroupColIndex, &pTagColumnInfo);
  if (code != TSDB_CODE_SUCCESS) {
B
Bomin Zhang 已提交
6524
    goto _over;
6525 6526
  }

H
hjxilinx 已提交
6527
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
6528
    qError("Invalid number of tables to query, numOfTables:%d", pQueryMsg->numOfTables);
6529
    code = TSDB_CODE_QRY_INVALID_MSG;
H
hjxilinx 已提交
6530
    goto _over;
6531 6532
  }

H
hjxilinx 已提交
6533
  if (pTableIdList == NULL || taosArrayGetSize(pTableIdList) == 0) {
S
slguan 已提交
6534
    qError("qmsg:%p, SQueryTableMsg wrong format", pQueryMsg);
6535
    code = TSDB_CODE_QRY_INVALID_MSG;
H
hjxilinx 已提交
6536
    goto _over;
6537 6538
  }

H
Haojun Liao 已提交
6539
  if ((code = createQFunctionExprFromMsg(pQueryMsg, &pExprs, pExprMsg, pTagColumnInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6540
    goto _over;
6541 6542
  }

dengyihao's avatar
dengyihao 已提交
6543
  pGroupbyExpr = createGroupbyExprFromMsg(pQueryMsg, pGroupColIndex, &code);
H
hjxilinx 已提交
6544
  if ((pGroupbyExpr == NULL && pQueryMsg->numOfGroupCols != 0) || code != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6545
    goto _over;
6546
  }
6547

H
hjxilinx 已提交
6548
  bool isSTableQuery = false;
6549
  STableGroupInfo tableGroupInfo = {0};
6550 6551
  int64_t st = taosGetTimestampUs();

H
Haojun Liao 已提交
6552
  if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_TABLE_QUERY)) {
weixin_48148422's avatar
weixin_48148422 已提交
6553
    STableIdInfo *id = taosArrayGet(pTableIdList, 0);
H
Haojun Liao 已提交
6554

6555
    qDebug("qmsg:%p query normal table, uid:%"PRId64", tid:%d", pQueryMsg, id->uid, id->tid);
H
Haojun Liao 已提交
6556
    if ((code = tsdbGetOneTableGroup(tsdb, id->uid, pQueryMsg->window.skey, &tableGroupInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6557
      goto _over;
6558
    }
H
Haojun Liao 已提交
6559
  } else if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY|TSDB_QUERY_TYPE_STABLE_QUERY)) {
6560
    isSTableQuery = true;
H
Haojun Liao 已提交
6561 6562 6563

    // also note there's possibility that only one table in the super table
    if (!TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY)) {
weixin_48148422's avatar
weixin_48148422 已提交
6564 6565 6566 6567 6568 6569 6570
      STableIdInfo *id = taosArrayGet(pTableIdList, 0);

      // group by normal column, do not pass the group by condition to tsdb to group table into different group
      int32_t numOfGroupByCols = pQueryMsg->numOfGroupCols;
      if (pQueryMsg->numOfGroupCols == 1 && !TSDB_COL_IS_TAG(pGroupColIndex->flag)) {
        numOfGroupByCols = 0;
      }
6571 6572

      qDebug("qmsg:%p query stable, uid:%"PRId64", tid:%d", pQueryMsg, id->uid, id->tid);
H
Haojun Liao 已提交
6573 6574 6575
      code = tsdbQuerySTableByTagCond(tsdb, id->uid, pQueryMsg->window.skey, tagCond, pQueryMsg->tagCondLen,
          pQueryMsg->tagNameRelType, tbnameCond, &tableGroupInfo, pGroupColIndex, numOfGroupByCols);

6576
      if (code != TSDB_CODE_SUCCESS) {
6577
        qError("qmsg:%p failed to query stable, reason: %s", pQueryMsg, tstrerror(code));
6578 6579
        goto _over;
      }
weixin_48148422's avatar
weixin_48148422 已提交
6580
    } else {
6581 6582 6583 6584
      code = tsdbGetTableGroupFromIdList(tsdb, pTableIdList, &tableGroupInfo);
      if (code != TSDB_CODE_SUCCESS) {
        goto _over;
      }
H
Haojun Liao 已提交
6585

S
TD-1057  
Shengliang Guan 已提交
6586
      qDebug("qmsg:%p query on %" PRIzu " tables in one group from client", pQueryMsg, tableGroupInfo.numOfTables);
6587
    }
6588 6589

    int64_t el = taosGetTimestampUs() - st;
S
TD-1057  
Shengliang Guan 已提交
6590
    qDebug("qmsg:%p tag filter completed, numOfTables:%" PRIzu ", elapsed time:%"PRId64"us", pQueryMsg, tableGroupInfo.numOfTables, el);
H
hjxilinx 已提交
6591
  } else {
6592
    assert(0);
6593
  }
6594

6595
  (*pQInfo) = createQInfoImpl(pQueryMsg, pGroupbyExpr, pExprs, &tableGroupInfo, pTagColumnInfo, isSTableQuery);
dengyihao's avatar
dengyihao 已提交
6596 6597 6598
  pExprs = NULL;
  pGroupbyExpr = NULL;
  pTagColumnInfo = NULL;
6599

6600
  if ((*pQInfo) == NULL) {
6601
    code = TSDB_CODE_QRY_OUT_OF_MEMORY;
H
hjxilinx 已提交
6602
    goto _over;
6603
  }
6604

6605
  code = initQInfo(pQueryMsg, tsdb, vgId, *pQInfo, isSTableQuery);
6606

H
hjxilinx 已提交
6607
_over:
dengyihao's avatar
dengyihao 已提交
6608 6609 6610
  free(tagCond);
  free(tbnameCond);
  free(pGroupColIndex);
dengyihao's avatar
dengyihao 已提交
6611 6612
  if (pGroupbyExpr != NULL) {
    taosArrayDestroy(pGroupbyExpr->columnInfo);
dengyihao's avatar
dengyihao 已提交
6613
    free(pGroupbyExpr);
6614
  }
dengyihao's avatar
dengyihao 已提交
6615 6616
  free(pTagColumnInfo);
  free(pExprs);
dengyihao's avatar
dengyihao 已提交
6617
  free(pExprMsg);
H
hjxilinx 已提交
6618
  taosArrayDestroy(pTableIdList);
6619

B
Bomin Zhang 已提交
6620 6621 6622 6623 6624
  for (int32_t i = 0; i < pQueryMsg->numOfCols; i++) {
    SColumnInfo* column = pQueryMsg->colList + i;
    freeColumnFilterInfo(column->filters, column->numOfFilters);
  }

H
Haojun Liao 已提交
6625
  //pQInfo already freed in initQInfo, but *pQInfo may not pointer to null;
6626 6627 6628 6629
  if (code != TSDB_CODE_SUCCESS) {
    *pQInfo = NULL;
  }

6630
  // if failed to add ref for all tables in this query, abort current query
6631
  return code;
H
hjxilinx 已提交
6632 6633
}

H
Haojun Liao 已提交
6634
void qDestroyQueryInfo(qinfo_t qHandle) {
H
Haojun Liao 已提交
6635 6636 6637 6638 6639
  SQInfo* pQInfo = (SQInfo*) qHandle;
  if (!isValidQInfo(pQInfo)) {
    return;
  }

H
Haojun Liao 已提交
6640 6641 6642
  qDebug("QInfo:%p query completed", pQInfo);
  queryCostStatis(pQInfo);   // print the query cost summary
  freeQInfo(pQInfo);
H
Haojun Liao 已提交
6643 6644
}

6645 6646 6647 6648 6649 6650 6651 6652 6653 6654
static bool doBuildResCheck(SQInfo* pQInfo) {
  bool buildRes = false;

  pthread_mutex_lock(&pQInfo->lock);

  pQInfo->dataReady = QUERY_RESULT_READY;
  buildRes = (pQInfo->rspContext != NULL);

  pthread_mutex_unlock(&pQInfo->lock);

H
Haojun Liao 已提交
6655
  // clear qhandle owner
6656 6657
  assert(pQInfo->owner == taosGetPthreadId());
  pQInfo->owner = 0;
H
Haojun Liao 已提交
6658

6659 6660 6661
  return buildRes;
}

6662
bool qTableQuery(qinfo_t qinfo) {
6663
  SQInfo *pQInfo = (SQInfo *)qinfo;
H
Haojun Liao 已提交
6664
  assert(pQInfo && pQInfo->signature == pQInfo);
6665
  int64_t threadId = taosGetPthreadId();
6666

6667 6668 6669 6670
  int64_t curOwner = 0;
  if ((curOwner = atomic_val_compare_exchange_64(&pQInfo->owner, 0, threadId)) != 0) {
    qError("QInfo:%p qhandle is now executed by thread:%p", pQInfo, (void*) curOwner);
    pQInfo->code = TSDB_CODE_QRY_IN_EXEC;
6671
    return false;
H
hjxilinx 已提交
6672
  }
6673

H
Haojun Liao 已提交
6674
  if (IS_QUERY_KILLED(pQInfo)) {
6675
    qDebug("QInfo:%p it is already killed, abort", pQInfo);
6676
    return doBuildResCheck(pQInfo);
H
hjxilinx 已提交
6677
  }
6678

6679 6680
  if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
    qDebug("QInfo:%p no table exists for query, abort", pQInfo);
6681 6682
    setQueryStatus(pQInfo->runtimeEnv.pQuery, QUERY_COMPLETED);
    return doBuildResCheck(pQInfo);
6683 6684 6685
  }

  // error occurs, record the error code and return to client
H
Haojun Liao 已提交
6686
  int32_t ret = setjmp(pQInfo->runtimeEnv.env);
6687 6688
  if (ret != TSDB_CODE_SUCCESS) {
    pQInfo->code = ret;
H
Haojun Liao 已提交
6689
    qDebug("QInfo:%p query abort due to error/cancel occurs, code:%s", pQInfo, tstrerror(pQInfo->code));
6690
    return doBuildResCheck(pQInfo);
6691 6692
  }

6693
  qDebug("QInfo:%p query task is launched", pQInfo);
6694

6695
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
6696
  if (onlyQueryTags(pQInfo->runtimeEnv.pQuery)) {
H
Haojun Liao 已提交
6697
    assert(pQInfo->runtimeEnv.pQueryHandle == NULL);
6698
    buildTagQueryResult(pQInfo);
H
hjxilinx 已提交
6699
  } else if (pQInfo->runtimeEnv.stableQuery) {
6700
    stableQueryImpl(pQInfo);
H
hjxilinx 已提交
6701
  } else {
6702
    tableQueryImpl(pQInfo);
H
hjxilinx 已提交
6703
  }
6704

6705
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
6706
  if (IS_QUERY_KILLED(pQInfo)) {
6707 6708
    qDebug("QInfo:%p query is killed", pQInfo);
  } else if (pQuery->rec.rows == 0) {
S
TD-1057  
Shengliang Guan 已提交
6709
    qDebug("QInfo:%p over, %" PRIzu " tables queried, %"PRId64" rows are returned", pQInfo, pQInfo->tableqinfoGroupInfo.numOfTables, pQuery->rec.total);
6710 6711 6712 6713 6714
  } else {
    qDebug("QInfo:%p query paused, %" PRId64 " rows returned, numOfTotal:%" PRId64 " rows",
           pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
  }

6715
  return doBuildResCheck(pQInfo);
H
hjxilinx 已提交
6716 6717
}

6718
int32_t qRetrieveQueryResultInfo(qinfo_t qinfo, bool* buildRes, void* pRspContext) {
6719 6720
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
6721
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
6722
    return TSDB_CODE_QRY_INVALID_QHANDLE;
H
hjxilinx 已提交
6723
  }
6724

6725
  *buildRes = false;
H
hjxilinx 已提交
6726
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
6727
  if (IS_QUERY_KILLED(pQInfo)) {
6728
    qDebug("QInfo:%p query is killed, code:%d", pQInfo, pQInfo->code);
H
hjxilinx 已提交
6729
    return pQInfo->code;
H
hjxilinx 已提交
6730
  }
6731

6732
  int32_t code = TSDB_CODE_SUCCESS;
6733 6734 6735 6736 6737 6738
  pthread_mutex_lock(&pQInfo->lock);
  if (pQInfo->dataReady == QUERY_RESULT_READY) {
    *buildRes = true;
    qDebug("QInfo:%p retrieve result info, rowsize:%d, rows:%"PRId64", code:%d", pQInfo, pQuery->rowSize, pQuery->rec.rows,
           pQInfo->code);
  } else {
H
Haojun Liao 已提交
6739
    *buildRes = false;
6740
    qDebug("QInfo:%p retrieve req set query return result after paused", pQInfo);
6741 6742
    pQInfo->rspContext = pRspContext;
  }
6743

6744
  code = pQInfo->code;
6745
  pthread_mutex_unlock(&pQInfo->lock);
6746
  return code;
H
hjxilinx 已提交
6747
}
6748

6749
int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *contLen, bool* continueExec) {
6750 6751
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
6752
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
6753
    return TSDB_CODE_QRY_INVALID_QHANDLE;
6754
  }
6755

6756
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
6757 6758
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  size_t  size = getResultSize(pQInfo, &pQuery->rec.rows);
6759

weixin_48148422's avatar
weixin_48148422 已提交
6760 6761
  size += sizeof(int32_t);
  size += sizeof(STableIdInfo) * taosArrayGetSize(pQInfo->arrTableIdInfo);
6762

S
TD-1057  
Shengliang Guan 已提交
6763
  *contLen = (int32_t)(size + sizeof(SRetrieveTableRsp));
6764

B
Bomin Zhang 已提交
6765 6766
  // todo proper handle failed to allocate memory,
  // current solution only avoid crash, but cannot return error code to client
6767
  *pRsp = (SRetrieveTableRsp *)rpcMallocCont(*contLen);
B
Bomin Zhang 已提交
6768 6769 6770
  if (*pRsp == NULL) {
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
  }
6771

S
TD-1057  
Shengliang Guan 已提交
6772
  (*pRsp)->numOfRows = htonl((int32_t)pQuery->rec.rows);
6773

H
Haojun Liao 已提交
6774
  if (pQInfo->code == TSDB_CODE_SUCCESS) {
6775
    (*pRsp)->offset   = htobe64(pQuery->limit.offset);
6776
    (*pRsp)->useconds = htobe64(pRuntimeEnv->summary.elapsedTime);
6777
  } else {
6778 6779
    (*pRsp)->offset   = 0;
    (*pRsp)->useconds = htobe64(pRuntimeEnv->summary.elapsedTime);
6780
  }
6781

6782
  (*pRsp)->precision = htons(pQuery->precision);
H
Haojun Liao 已提交
6783 6784
  if (pQuery->rec.rows > 0 && pQInfo->code == TSDB_CODE_SUCCESS) {
    doDumpQueryResult(pQInfo, (*pRsp)->data);
6785
  } else {
H
hjxilinx 已提交
6786
    setQueryStatus(pQuery, QUERY_OVER);
6787
  }
6788

6789
  pQInfo->rspContext = NULL;
6790
  pQInfo->dataReady  = QUERY_RESULT_NOT_READY;
6791

H
Haojun Liao 已提交
6792
  if (IS_QUERY_KILLED(pQInfo) || Q_STATUS_EQUAL(pQuery->status, QUERY_OVER)) {
6793
    *continueExec = false;
6794
    (*pRsp)->completed = 1;  // notify no more result to client
6795
  } else {
6796
    *continueExec = true;
6797
    qDebug("QInfo:%p has more results waits for client retrieve", pQInfo);
6798 6799
  }

H
Haojun Liao 已提交
6800
  return pQInfo->code;
6801
}
H
hjxilinx 已提交
6802

6803 6804 6805 6806 6807 6808 6809 6810 6811 6812 6813
int32_t qQueryCompleted(qinfo_t qinfo) {
  SQInfo *pQInfo = (SQInfo *)qinfo;

  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
    return TSDB_CODE_QRY_INVALID_QHANDLE;
  }

  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
  return IS_QUERY_KILLED(pQInfo) || Q_STATUS_EQUAL(pQuery->status, QUERY_OVER);
}

H
Haojun Liao 已提交
6814
int32_t qKillQuery(qinfo_t qinfo) {
H
Haojun Liao 已提交
6815 6816 6817 6818 6819 6820 6821
  SQInfo *pQInfo = (SQInfo *)qinfo;

  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
    return TSDB_CODE_QRY_INVALID_QHANDLE;
  }

  setQueryKilled(pQInfo);
6822 6823 6824

  // Wait for the query executing thread being stopped/
  // Once the query is stopped, the owner of qHandle will be cleared immediately.
6825
  while (pQInfo->owner != 0) {
6826 6827 6828
    taosMsleep(100);
  }

H
Haojun Liao 已提交
6829 6830 6831
  return TSDB_CODE_SUCCESS;
}

6832 6833 6834 6835 6836 6837 6838 6839 6840 6841 6842 6843 6844 6845 6846 6847
static void doSetTagValueToResultBuf(char* output, const char* val, int16_t type, int16_t bytes) {
  if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
    if (val == NULL) {
      setVardataNull(output, type);
    } else {
      memcpy(output, val, varDataTLen(val));
    }
  } else {
    if (val == NULL) {
      setNull(output, type, bytes);
    } else {  // todo here stop will cause client crash
      memcpy(output, val, bytes);
    }
  }
}

H
hjxilinx 已提交
6848 6849 6850
static void buildTagQueryResult(SQInfo* pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
6851

H
Haojun Liao 已提交
6852
  size_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
H
Haojun Liao 已提交
6853
  assert(numOfGroup == 0 || numOfGroup == 1);
6854

H
Haojun Liao 已提交
6855
  if (numOfGroup == 0) {
6856 6857
    return;
  }
6858

H
Haojun Liao 已提交
6859
  SArray* pa = GET_TABLEGROUP(pQInfo, 0);
6860

H
Haojun Liao 已提交
6861
  size_t num = taosArrayGetSize(pa);
6862
  assert(num == pQInfo->tableqinfoGroupInfo.numOfTables);
6863

H
Haojun Liao 已提交
6864
  int32_t count = 0;
6865 6866 6867
  int32_t functionId = pQuery->pSelectExpr[0].base.functionId;
  if (functionId == TSDB_FUNC_TID_TAG) { // return the tags & table Id
    assert(pQuery->numOfOutput == 1);
6868

6869 6870
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[0];
    int32_t rsize = pExprInfo->bytes;
H
Haojun Liao 已提交
6871
    count = 0;
6872

H
Haojun Liao 已提交
6873 6874 6875 6876 6877 6878 6879 6880 6881 6882 6883
    int16_t bytes = pExprInfo->bytes;
    int16_t type = pExprInfo->type;

    for(int32_t i = 0; i < pQuery->numOfTags; ++i) {
      if (pQuery->tagColList[i].colId == pExprInfo->base.colInfo.colId) {
        bytes = pQuery->tagColList[i].bytes;
        type = pQuery->tagColList[i].type;
        break;
      }
    }

H
Haojun Liao 已提交
6884 6885
    while(pQInfo->tableIndex < num && count < pQuery->rec.capacity) {
      int32_t i = pQInfo->tableIndex++;
6886
      STableQueryInfo *item = taosArrayGetP(pa, i);
6887

6888
      char *output = pQuery->sdata[0]->data + count * rsize;
6889
      varDataSetLen(output, rsize - VARSTR_HEADER_SIZE);
6890

6891
      output = varDataVal(output);
H
Haojun Liao 已提交
6892
      STableId* id = TSDB_TABLEID(item->pTable);
6893

6894 6895 6896
      *(int16_t *)output = 0;
      output += sizeof(int16_t);

H
Haojun Liao 已提交
6897 6898
      *(int64_t *)output = id->uid;  // memory align problem, todo serialize
      output += sizeof(id->uid);
6899

H
Haojun Liao 已提交
6900 6901
      *(int32_t *)output = id->tid;
      output += sizeof(id->tid);
6902

6903
      *(int32_t *)output = pQInfo->vgId;
6904
      output += sizeof(pQInfo->vgId);
6905

6906
      if (pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
6907
        char* data = tsdbGetTableName(item->pTable);
6908
        memcpy(output, data, varDataTLen(data));
H
[td-90]  
Haojun Liao 已提交
6909
      } else {
6910 6911
        char* data = tsdbGetTableTagVal(item->pTable, pExprInfo->base.colInfo.colId, type, bytes);
        doSetTagValueToResultBuf(output, data, type, bytes);
H
[td-90]  
Haojun Liao 已提交
6912
      }
6913

H
Haojun Liao 已提交
6914
      count += 1;
6915
    }
6916

6917
    qDebug("QInfo:%p create (tableId, tag) info completed, rows:%d", pQInfo, count);
6918

H
Haojun Liao 已提交
6919 6920 6921 6922
  } else if (functionId == TSDB_FUNC_COUNT) {// handle the "count(tbname)" query
    *(int64_t*) pQuery->sdata[0]->data = num;

    count = 1;
H
Haojun Liao 已提交
6923
    SET_STABLE_QUERY_OVER(pQInfo);
6924
    qDebug("QInfo:%p create count(tbname) query, res:%d rows:1", pQInfo, count);
6925
  } else {  // return only the tags|table name etc.
H
Haojun Liao 已提交
6926
    count = 0;
H
Haojun Liao 已提交
6927
    SSchema tbnameSchema = tGetTableNameColumnSchema();
6928

S
TD-1057  
Shengliang Guan 已提交
6929
    int32_t maxNumOfTables = (int32_t)pQuery->rec.capacity;
6930
    if (pQuery->limit.limit >= 0 && pQuery->limit.limit < pQuery->rec.capacity) {
S
TD-1057  
Shengliang Guan 已提交
6931
      maxNumOfTables = (int32_t)pQuery->limit.limit;
6932 6933
    }

6934
    while(pQInfo->tableIndex < num && count < maxNumOfTables) {
H
Haojun Liao 已提交
6935
      int32_t i = pQInfo->tableIndex++;
6936

6937 6938 6939 6940 6941 6942
      // discard current result due to offset
      if (pQuery->limit.offset > 0) {
        pQuery->limit.offset -= 1;
        continue;
      }

6943
      SExprInfo* pExprInfo = pQuery->pSelectExpr;
6944
      STableQueryInfo* item = taosArrayGetP(pa, i);
6945

6946 6947
      char *data = NULL, *dst = NULL;
      int16_t type = 0, bytes = 0;
6948
      for(int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
6949
        // not assign value in case of user defined constant output column
Y
TD-1230  
yihaoDeng 已提交
6950
        if (TSDB_COL_IS_UD_COL(pExprInfo[j].base.colInfo.flag)) {
H
Haojun Liao 已提交
6951 6952
          continue;
        }
6953

6954
        if (pExprInfo[j].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
6955 6956 6957 6958 6959 6960 6961 6962
          bytes = tbnameSchema.bytes;
          type = tbnameSchema.type;

          data = tsdbGetTableName(item->pTable);
          dst = pQuery->sdata[j]->data + count * tbnameSchema.bytes;
        } else {
          type = pExprInfo[j].type;
          bytes = pExprInfo[j].bytes;
6963

6964 6965
          data = tsdbGetTableTagVal(item->pTable, pExprInfo[j].base.colInfo.colId, type, bytes);
          dst = pQuery->sdata[j]->data + count * pExprInfo[j].bytes;
6966

6967
        }
6968 6969

        doSetTagValueToResultBuf(dst, data, type, bytes);
H
hjxilinx 已提交
6970
      }
H
Haojun Liao 已提交
6971
      count += 1;
H
hjxilinx 已提交
6972
    }
6973

6974
    qDebug("QInfo:%p create tag values results completed, rows:%d", pQInfo, count);
H
hjxilinx 已提交
6975
  }
6976

H
Haojun Liao 已提交
6977
  pQuery->rec.rows = count;
H
hjxilinx 已提交
6978
  setQueryStatus(pQuery, QUERY_COMPLETED);
H
hjxilinx 已提交
6979 6980
}

6981 6982 6983 6984 6985 6986 6987
void* qGetResultRetrieveMsg(qinfo_t qinfo) {
  SQInfo* pQInfo = (SQInfo*) qinfo;
  assert(pQInfo != NULL);

  return pQInfo->rspContext;
}

6988 6989 6990 6991 6992 6993 6994
void freeqinfoFn(void *qhandle) {
  void** handle = qhandle;
  if (handle == NULL || *handle == NULL) {
    return;
  }

  qKillQuery(*handle);
H
Haojun Liao 已提交
6995
  qDestroyQueryInfo(*handle);
6996 6997 6998
}

void* qOpenQueryMgmt(int32_t vgId) {
H
Haojun Liao 已提交
6999
  const int32_t REFRESH_HANDLE_INTERVAL = 30; // every 30 seconds, refresh handle pool
7000 7001 7002 7003

  char cacheName[128] = {0};
  sprintf(cacheName, "qhandle_%d", vgId);

7004
  SQueryMgmt* pQueryMgmt = calloc(1, sizeof(SQueryMgmt));
H
Haojun Liao 已提交
7005 7006 7007 7008
  if (pQueryMgmt == NULL) {
    terrno = TSDB_CODE_QRY_OUT_OF_MEMORY;
    return NULL;
  }
7009

7010 7011 7012 7013 7014
  pQueryMgmt->qinfoPool = taosCacheInit(TSDB_DATA_TYPE_BIGINT, REFRESH_HANDLE_INTERVAL, true, freeqinfoFn, cacheName);
  pQueryMgmt->closed    = false;
  pQueryMgmt->vgId      = vgId;

  pthread_mutex_init(&pQueryMgmt->lock, NULL);
7015 7016

  qDebug("vgId:%d, open querymgmt success", vgId);
7017
  return pQueryMgmt;
7018 7019
}

H
Haojun Liao 已提交
7020
static void queryMgmtKillQueryFn(void* handle) {
H
Hui Li 已提交
7021 7022
  void** fp = (void**)handle;
  qKillQuery(*fp);
H
Haojun Liao 已提交
7023 7024 7025
}

void qQueryMgmtNotifyClosed(void* pQMgmt) {
7026 7027 7028 7029 7030 7031 7032
  if (pQMgmt == NULL) {
    return;
  }

  SQueryMgmt* pQueryMgmt = pQMgmt;
  qDebug("vgId:%d, set querymgmt closed, wait for all queries cancelled", pQueryMgmt->vgId);

H
Haojun Liao 已提交
7033
//  pthread_mutex_lock(&pQueryMgmt->lock);
7034
  pQueryMgmt->closed = true;
H
Haojun Liao 已提交
7035
//  pthread_mutex_unlock(&pQueryMgmt->lock);
7036

H
Haojun Liao 已提交
7037
  taosCacheRefresh(pQueryMgmt->qinfoPool, queryMgmtKillQueryFn);
7038 7039 7040 7041 7042 7043 7044 7045 7046 7047 7048 7049 7050 7051 7052 7053 7054
}

void qCleanupQueryMgmt(void* pQMgmt) {
  if (pQMgmt == NULL) {
    return;
  }

  SQueryMgmt* pQueryMgmt = pQMgmt;
  int32_t vgId = pQueryMgmt->vgId;

  assert(pQueryMgmt->closed);

  SCacheObj* pqinfoPool = pQueryMgmt->qinfoPool;
  pQueryMgmt->qinfoPool = NULL;

  taosCacheCleanup(pqinfoPool);
  pthread_mutex_destroy(&pQueryMgmt->lock);
S
Shengliang Guan 已提交
7055
  taosTFree(pQueryMgmt);
7056

7057
  qDebug("vgId:%d queryMgmt cleanup completed", vgId);
7058 7059
}

7060
void** qRegisterQInfo(void* pMgmt, uint64_t qInfo) {
7061 7062 7063 7064
  if (pMgmt == NULL) {
    return NULL;
  }

7065
  const int32_t DEFAULT_QHANDLE_LIFE_SPAN = tsShellActivityTimer * 2 * 1000;
7066

7067 7068
  SQueryMgmt *pQueryMgmt = pMgmt;
  if (pQueryMgmt->qinfoPool == NULL) {
7069
    qError("QInfo:%p failed to add qhandle into qMgmt, since qMgmt is closed", (void *)qInfo);
7070 7071 7072
    return NULL;
  }

H
Haojun Liao 已提交
7073
//  pthread_mutex_lock(&pQueryMgmt->lock);
7074
  if (pQueryMgmt->closed) {
H
Haojun Liao 已提交
7075
//    pthread_mutex_unlock(&pQueryMgmt->lock);
7076
    qError("QInfo:%p failed to add qhandle into cache, since qMgmt is colsing", (void *)qInfo);
7077 7078
    return NULL;
  } else {
7079 7080 7081
    uint64_t handleVal = (uint64_t) qInfo;

    void** handle = taosCachePut(pQueryMgmt->qinfoPool, &handleVal, sizeof(int64_t), &qInfo, POINTER_BYTES, DEFAULT_QHANDLE_LIFE_SPAN);
H
Haojun Liao 已提交
7082
//    pthread_mutex_unlock(&pQueryMgmt->lock);
7083 7084 7085 7086 7087

    return handle;
  }
}

7088
void** qAcquireQInfo(void* pMgmt, uint64_t key) {
7089 7090 7091 7092 7093 7094
  SQueryMgmt *pQueryMgmt = pMgmt;

  if (pQueryMgmt->qinfoPool == NULL || pQueryMgmt->closed) {
    return NULL;
  }

7095
  void** handle = taosCacheAcquireByKey(pQueryMgmt->qinfoPool, &key, sizeof(uint64_t));
7096 7097 7098 7099 7100 7101 7102
  if (handle == NULL || *handle == NULL) {
    return NULL;
  } else {
    return handle;
  }
}

H
Haojun Liao 已提交
7103
void** qReleaseQInfo(void* pMgmt, void* pQInfo, bool freeHandle) {
7104 7105 7106 7107 7108
  SQueryMgmt *pQueryMgmt = pMgmt;
  if (pQueryMgmt->qinfoPool == NULL) {
    return NULL;
  }

H
Haojun Liao 已提交
7109
  taosCacheRelease(pQueryMgmt->qinfoPool, pQInfo, freeHandle);
7110 7111 7112
  return 0;
}

7113