qExecutor.c 246.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
#include "os.h"
H
Haojun Liao 已提交
16 17
#include "qFill.h"
#include "taosmsg.h"
18 19
#include "tcache.h"
#include "tglobal.h"
20

H
Haojun Liao 已提交
21
#include "exception.h"
22
#include "hash.h"
H
Haojun Liao 已提交
23 24 25 26
#include "qAst.h"
#include "qExecutor.h"
#include "qResultbuf.h"
#include "qUtil.h"
H
hjxilinx 已提交
27
#include "query.h"
S
slguan 已提交
28
#include "queryLog.h"
29
#include "tlosertree.h"
30

H
Haojun Liao 已提交
31
#define MAX_ROWS_PER_RESBUF_PAGE  ((1u<<12) - 1)
32 33 34 35 36

/**
 * check if the primary column is load by default, otherwise, the program will
 * forced to load primary column explicitly.
 */
37
#define Q_STATUS_EQUAL(p, s)  (((p) & (s)) != 0)
38 39
#define QUERY_IS_ASC_QUERY(q) (GET_FORWARD_DIRECTION_FACTOR((q)->order.order) == QUERY_ASC_FORWARD_STEP)

40
#define IS_MASTER_SCAN(runtime)        ((runtime)->scanFlag == MASTER_SCAN)
H
hjxilinx 已提交
41
#define IS_REVERSE_SCAN(runtime)       ((runtime)->scanFlag == REVERSE_SCAN)
42
#define SET_MASTER_SCAN_FLAG(runtime)  ((runtime)->scanFlag = MASTER_SCAN)
H
hjxilinx 已提交
43
#define SET_REVERSE_SCAN_FLAG(runtime) ((runtime)->scanFlag = REVERSE_SCAN)
44

H
Haojun Liao 已提交
45
#define GET_QINFO_ADDR(x) ((SQInfo *)((char *)(x)-offsetof(SQInfo, runtimeEnv)))
46

47
#define GET_COL_DATA_POS(query, index, step) ((query)->pos + (index) * (step))
48
#define SWITCH_ORDER(n) (((n) = ((n) == TSDB_ORDER_ASC) ? TSDB_ORDER_DESC : TSDB_ORDER_ASC))
49

H
Haojun Liao 已提交
50 51
#define SDATA_BLOCK_INITIALIZER (SDataBlockInfo) {{0}, 0}

S
TD-1057  
Shengliang Guan 已提交
52 53 54 55 56
#define TIME_WINDOW_COPY(_dst, _src)  do {\
   _dst.skey = _src.skey;\
   _dst.ekey = _src.ekey;\
} while (0);

57
enum {
H
hjxilinx 已提交
58
  // when query starts to execute, this status will set
59 60
  QUERY_NOT_COMPLETED = 0x1u,

H
hjxilinx 已提交
61 62
  /* result output buffer is full, current query is paused.
   * this status is only exist in group-by clause and diff/add/division/multiply/ query.
63
   */
64 65
  QUERY_RESBUF_FULL = 0x2u,

H
hjxilinx 已提交
66 67 68
  /* query is over
   * 1. this status is used in one row result query process, e.g., count/sum/first/last/ avg...etc.
   * 2. when all data within queried time window, it is also denoted as query_completed
69
   */
70
  QUERY_COMPLETED = 0x4u,
71

H
hjxilinx 已提交
72 73
  /* when the result is not completed return to client, this status will be
   * usually used in case of interval query with interpolation option
74
   */
75
  QUERY_OVER = 0x8u,
76
};
77 78

enum {
79 80
  TS_JOIN_TS_EQUAL       = 0,
  TS_JOIN_TS_NOT_EQUALS  = 1,
81 82 83
  TS_JOIN_TAG_NOT_EQUALS = 2,
};

84
typedef struct {
85 86 87 88 89 90
  int32_t     status;       // query status
  TSKEY       lastKey;      // the lastKey value before query executed
  STimeWindow w;            // whole query time window
  STimeWindow curWindow;    // current query window
  int32_t     windowIndex;  // index of active time window result for interval query
  STSCursor   cur;
91 92
} SQueryStatusInfo;

H
Haojun Liao 已提交
93
#if 0
H
Haojun Liao 已提交
94
static UNUSED_FUNC void *u_malloc (size_t __size) {
H
Haojun Liao 已提交
95
  uint32_t v = rand();
H
Haojun Liao 已提交
96 97

  if (v % 1000 <= 0) {
H
Haojun Liao 已提交
98 99
    return NULL;
  } else {
H
Haojun Liao 已提交
100
    return malloc(__size);
H
Haojun Liao 已提交
101
  }
H
Haojun Liao 已提交
102 103
}

H
Haojun Liao 已提交
104 105
static UNUSED_FUNC void* u_calloc(size_t num, size_t __size) {
  uint32_t v = rand();
H
Haojun Liao 已提交
106
  if (v % 1000 <= 0) {
H
Haojun Liao 已提交
107 108 109 110 111 112
    return NULL;
  } else {
    return calloc(num, __size);
  }
}

H
Haojun Liao 已提交
113 114 115 116 117 118 119 120 121
static UNUSED_FUNC void* u_realloc(void* p, size_t __size) {
  uint32_t v = rand();
  if (v % 5 <= 1) {
    return NULL;
  } else {
    return realloc(p, __size);
  }
}

H
Haojun Liao 已提交
122
#define calloc  u_calloc
H
Haojun Liao 已提交
123
#define malloc  u_malloc
H
Haojun Liao 已提交
124
#define realloc u_realloc
H
Haojun Liao 已提交
125
#endif
H
Haojun Liao 已提交
126

127
#define CLEAR_QUERY_STATUS(q, st)   ((q)->status &= (~(st)))
H
Haojun Liao 已提交
128 129 130
#define GET_NUM_OF_TABLEGROUP(q)    taosArrayGetSize((q)->tableqinfoGroupInfo.pGroupList)
#define GET_TABLEGROUP(q, _index)   ((SArray*) taosArrayGetP((q)->tableqinfoGroupInfo.pGroupList, (_index)))

131
static void setQueryStatus(SQuery *pQuery, int8_t status);
H
Haojun Liao 已提交
132
static void finalizeQueryResult(SQueryRuntimeEnv *pRuntimeEnv);
133

134
#define QUERY_IS_INTERVAL_QUERY(_q) ((_q)->interval.interval > 0)
135

136 137
static void getNextTimeWindow(SQuery* pQuery, STimeWindow* tw) {
  int32_t factor = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
138 139 140
  if (pQuery->interval.intervalUnit != 'n' && pQuery->interval.intervalUnit != 'y') {
    tw->skey += pQuery->interval.sliding * factor;
    tw->ekey = tw->skey + pQuery->interval.interval - 1;
141 142 143
    return;
  }

144
  int64_t key = tw->skey / 1000, interval = pQuery->interval.interval;
145 146 147
  if (pQuery->precision == TSDB_TIME_PRECISION_MICRO) {
    key /= 1000;
  }
148
  if (pQuery->interval.intervalUnit == 'y') {
149 150
    interval *= 12;
  }
151 152 153 154 155

  struct tm tm;
  time_t t = (time_t)key;
  localtime_r(&t, &tm);

S
Shengliang Guan 已提交
156
  int mon = (int)(tm.tm_year * 12 + tm.tm_mon + interval * factor);
157 158 159 160
  tm.tm_year = mon / 12;
  tm.tm_mon = mon % 12;
  tw->skey = mktime(&tm) * 1000L;

S
Shengliang Guan 已提交
161
  mon = (int)(mon + interval);
162 163 164 165 166 167 168 169 170 171 172 173
  tm.tm_year = mon / 12;
  tm.tm_mon = mon % 12;
  tw->ekey = mktime(&tm) * 1000L;

  if (pQuery->precision == TSDB_TIME_PRECISION_MICRO) {
    tw->skey *= 1000L;
    tw->ekey *= 1000L;
  }
  tw->ekey -= 1;
}

#define GET_NEXT_TIMEWINDOW(_q, tw) getNextTimeWindow((_q), (tw))
H
Haojun Liao 已提交
174

175 176
#define SET_STABLE_QUERY_OVER(_q) ((_q)->tableIndex = (int32_t)((_q)->tableqinfoGroupInfo.numOfTables))
#define IS_STASBLE_QUERY_OVER(_q) ((_q)->tableIndex >= (int32_t)((_q)->tableqinfoGroupInfo.numOfTables))
H
Haojun Liao 已提交
177

H
hjxilinx 已提交
178
// todo move to utility
179
static int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *group);
180

H
Haojun Liao 已提交
181 182 183
static void setWindowResOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SResultRow *pResult);
static void setWindowResOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SResultRow *pResult);
static void resetMergeResultBuf(SQueryRuntimeEnv* pRuntimeEnv, SQLFunctionCtx *pCtx, SResultRow *pRow);
184
static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId);
185

186
static void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
H
Haojun Liao 已提交
187
                          SDataStatis *pStatis, void *param, int32_t colIndex, int32_t vgId);
188

189
static void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
H
Haojun Liao 已提交
190
static void destroyTableQueryInfoImpl(STableQueryInfo *pTableQueryInfo);
191 192
static void resetCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
static bool hasMainOutput(SQuery *pQuery);
H
hjxilinx 已提交
193
static void buildTagQueryResult(SQInfo *pQInfo);
194

195
static int32_t setAdditionalInfo(SQInfo *pQInfo, void *pTable, STableQueryInfo *pTableQueryInfo);
H
Haojun Liao 已提交
196
static int32_t flushFromResultBuf(SQueryRuntimeEnv* pRuntimeEnv, SGroupResInfo* pGroupResInfo);
H
Haojun Liao 已提交
197 198
static int32_t checkForQueryBuf(size_t numOfTables);
static void releaseQueryBuf(size_t numOfTables);
199

200
bool doFilterData(SQuery *pQuery, int32_t elemPos) {
201 202
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
203

S
TD-1057  
Shengliang Guan 已提交
204
    char *pElem = (char*)pFilterInfo->pData + pFilterInfo->info.bytes * elemPos;
205

206 207
    bool qualified = false;
    for (int32_t j = 0; j < pFilterInfo->numOfFilters; ++j) {
208
      SColumnFilterElem *pFilterElem = &pFilterInfo->pFilters[j];
209

H
Haojun Liao 已提交
210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226
      bool isnull = isNull(pElem, pFilterInfo->info.type);
      if (isnull) {
        if (pFilterElem->fp == isNull_filter) {
          qualified = true;
          break;
        } else {
          continue;
        }
      } else {
        if (pFilterElem->fp == notNull_filter) {
          qualified = true;
          break;
        } else if (pFilterElem->fp == isNull_filter) {
          continue;
        }
      }

227 228 229 230 231
      if (pFilterElem->fp(pFilterElem, pElem, pElem)) {
        qualified = true;
        break;
      }
    }
232

233 234 235 236
    if (!qualified) {
      return false;
    }
  }
237

238 239 240 241 242 243
  return true;
}

int64_t getNumOfResult(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  bool    hasMainFunction = hasMainOutput(pQuery);
244

245
  int64_t maxOutput = 0;
246
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
247
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
248

249 250 251 252 253 254 255 256
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (hasMainFunction &&
        (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ)) {
      continue;
    }
257

H
Haojun Liao 已提交
258
    SResultRowCellInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
259 260 261 262
    if (pResInfo != NULL && maxOutput < pResInfo->numOfRes) {
      maxOutput = pResInfo->numOfRes;
    }
  }
263

264
  assert(maxOutput >= 0);
265 266 267
  return maxOutput;
}

268 269 270 271 272
/*
 * the value of number of result needs to be update due to offset value upated.
 */
void updateNumOfResult(SQueryRuntimeEnv *pRuntimeEnv, int32_t numOfRes) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
273

274
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
275
    SResultRowCellInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
276

H
Haojun Liao 已提交
277 278 279 280 281
    int16_t functionId = pRuntimeEnv->pCtx[j].functionId;
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ ||
        functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
282

H
Haojun Liao 已提交
283
    assert(pResInfo->numOfRes > numOfRes);
284 285 286 287
    pResInfo->numOfRes = numOfRes;
  }
}

H
Haojun Liao 已提交
288
static UNUSED_FUNC int32_t getGroupResultId(int32_t groupIndex) {
H
Haojun Liao 已提交
289
  int32_t base = 20000000;
290 291 292 293 294 295 296
  return base + (groupIndex * 10000);
}

bool isGroupbyNormalCol(SSqlGroupbyExpr *pGroupbyExpr) {
  if (pGroupbyExpr == NULL || pGroupbyExpr->numOfGroupCols == 0) {
    return false;
  }
297

298
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
299
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
Y
TD-1230  
yihaoDeng 已提交
300
    if (TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
301
      //make sure the normal column locates at the second position if tbname exists in group by clause
302
      if (pGroupbyExpr->numOfGroupCols > 1) {
303
        assert(pColIndex->colIndex > 0);
304
      }
305

306 307 308
      return true;
    }
  }
309

310 311 312 313 314
  return false;
}

int16_t getGroupbyColumnType(SQuery *pQuery, SSqlGroupbyExpr *pGroupbyExpr) {
  assert(pGroupbyExpr != NULL);
315

316 317
  int32_t colId = -2;
  int16_t type = TSDB_DATA_TYPE_NULL;
318

319
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
320
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
Y
TD-1230  
yihaoDeng 已提交
321
    if (TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
322 323 324 325
      colId = pColIndex->colId;
      break;
    }
  }
326

327
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
328 329
    if (colId == pQuery->colList[i].colId) {
      type = pQuery->colList[i].type;
330 331 332
      break;
    }
  }
333

334 335 336 337 338 339
  return type;
}

bool isSelectivityWithTagsQuery(SQuery *pQuery) {
  bool    hasTags = false;
  int32_t numOfSelectivity = 0;
340

341
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
342
    int32_t functId = pQuery->pSelectExpr[i].base.functionId;
343 344 345 346
    if (functId == TSDB_FUNC_TAG_DUMMY || functId == TSDB_FUNC_TS_DUMMY) {
      hasTags = true;
      continue;
    }
347

348 349 350 351
    if ((aAggs[functId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
      numOfSelectivity++;
    }
  }
352

353 354 355
  if (numOfSelectivity > 0 && hasTags) {
    return true;
  }
356

357 358 359
  return false;
}

360 361 362 363 364 365 366 367 368 369 370
bool isProjQuery(SQuery *pQuery) {
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    int32_t functId = pQuery->pSelectExpr[i].base.functionId;
    if (functId != TSDB_FUNC_PRJ && functId != TSDB_FUNC_TAGPRJ) {
      return false;
    }
  }

  return true;
}

371
bool isTSCompQuery(SQuery *pQuery) { return pQuery->pSelectExpr[0].base.functionId == TSDB_FUNC_TS_COMP; }
372

373 374 375
static bool limitResults(SQueryRuntimeEnv* pRuntimeEnv) {
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery* pQuery = pRuntimeEnv->pQuery;
376

377 378
  if ((pQuery->limit.limit > 0) && (pQuery->rec.total + pQuery->rec.rows > pQuery->limit.limit)) {
    pQuery->rec.rows = pQuery->limit.limit - pQuery->rec.total;
379

380
    qDebug("QInfo:%p discard remain data due to result limitation, limit:%"PRId64", current return:%" PRId64 ", total:%"PRId64,
381 382
        pQInfo, pQuery->limit.limit, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
    assert(pQuery->rec.rows >= 0);
383 384 385
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return true;
  }
386

387 388 389 390
  return false;
}

static bool isTopBottomQuery(SQuery *pQuery) {
391
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
392
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
393 394 395
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
396

397 398 399 400
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      return true;
    }
  }
401

402 403 404
  return false;
}

H
Haojun Liao 已提交
405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422
static bool hasTagValOutput(SQuery* pQuery) {
  SExprInfo *pExprInfo = &pQuery->pSelectExpr[0];
  if (pQuery->numOfOutput == 1 && pExprInfo->base.functionId == TSDB_FUNC_TS_COMP) {
    return true;
  } else {  // set tag value, by which the results are aggregated.
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
      SExprInfo *pLocalExprInfo = &pQuery->pSelectExpr[idx];

      // ts_comp column required the tag value for join filter
      if (TSDB_COL_IS_TAG(pLocalExprInfo->base.colInfo.flag)) {
        return true;
      }
    }
  }

  return false;
}

423 424 425 426 427 428 429 430
/**
 * @param pQuery
 * @param col
 * @param pDataBlockInfo
 * @param pStatis
 * @param pColStatis
 * @return
 */
H
Haojun Liao 已提交
431
static bool hasNullValue(SColIndex* pColIndex, SDataStatis *pStatis, SDataStatis **pColStatis) {
H
Haojun Liao 已提交
432
  if (pStatis != NULL && TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
H
Haojun Liao 已提交
433 434
    *pColStatis = &pStatis[pColIndex->colIndex];
    assert((*pColStatis)->colId == pColIndex->colId);
H
hjxilinx 已提交
435 436
  } else {
    *pColStatis = NULL;
437
  }
438

H
Haojun Liao 已提交
439
  if (TSDB_COL_IS_TAG(pColIndex->flag) || TSDB_COL_IS_UD_COL(pColIndex->flag) || pColIndex->colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
H
Haojun Liao 已提交
440 441 442
    return false;
  }

443 444 445
  if ((*pColStatis) != NULL && (*pColStatis)->numOfNull == 0) {
    return false;
  }
446

447 448 449
  return true;
}

H
Haojun Liao 已提交
450
static SResultRow *doSetTimeWindowFromKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, char *pData,
H
Haojun Liao 已提交
451
                                             int16_t bytes, bool masterscan, uint64_t uid) {
452
  SQuery *pQuery = pRuntimeEnv->pQuery;
453

H
Haojun Liao 已提交
454 455
  SET_RES_WINDOW_KEY(pRuntimeEnv->keyBuf, pData, bytes, uid);
  int32_t *p1 = (int32_t *) taosHashGet(pRuntimeEnv->pWindowHashTable, pRuntimeEnv->keyBuf, GET_RES_WINDOW_KEY_LEN(bytes));
456 457
  if (p1 != NULL) {
    pWindowResInfo->curIndex = *p1;
458
  } else {
H
Haojun Liao 已提交
459 460 461
    if (!masterscan) {  // not master scan, do not add new timewindow
      return NULL;
    }
462

H
Haojun Liao 已提交
463 464
    // more than the capacity, reallocate the resources
    if (pWindowResInfo->size >= pWindowResInfo->capacity) {
H
Haojun Liao 已提交
465
      int64_t newCapacity = 0;
466
      if (pWindowResInfo->capacity > 10000) {
H
Haojun Liao 已提交
467
        newCapacity = (int64_t)(pWindowResInfo->capacity * 1.25);
468
      } else {
H
Haojun Liao 已提交
469
        newCapacity = (int64_t)(pWindowResInfo->capacity * 1.5);
470 471
      }

472
      char *t = realloc(pWindowResInfo->pResult, (size_t)(newCapacity * POINTER_BYTES));
H
Haojun Liao 已提交
473
      //      pRuntimeEnv->summary.winInfoSize += (newCapacity - pWindowResInfo->capacity) * sizeof(SResultRow);
474
      //      pRuntimeEnv->summary.numOfTimeWindows += (newCapacity - pWindowResInfo->capacity);
475

476 477
      if (t == NULL) {
        longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
478
      }
479

H
Haojun Liao 已提交
480
      pWindowResInfo->pResult = (SResultRow **)t;
481

H
Haojun Liao 已提交
482
      int32_t inc = (int32_t)newCapacity - pWindowResInfo->capacity;
483
      memset(&pWindowResInfo->pResult[pWindowResInfo->capacity], 0, POINTER_BYTES * inc);
484

485 486
      pWindowResInfo->capacity = (int32_t)newCapacity;
    }
H
Haojun Liao 已提交
487 488
//      pRuntimeEnv->summary.winInfoSize += (pQuery->numOfOutput * sizeof(SResultRowCellInfo) + pRuntimeEnv->interBufSize) * inc;
      SResultRow* pResult = getNewWindowResult(pRuntimeEnv->pool);
489
      pWindowResInfo->pResult[pWindowResInfo->size] = pResult;
H
Haojun Liao 已提交
490 491 492 493
      int32_t ret = createQueryResultInfo(pQuery, pResult);
      if (ret != TSDB_CODE_SUCCESS) {
        longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
      }
H
Haojun Liao 已提交
494 495 496

    // add a new result set for a new group
    pWindowResInfo->curIndex = pWindowResInfo->size++;
H
Haojun Liao 已提交
497
    taosHashPut(pRuntimeEnv->pWindowHashTable, pRuntimeEnv->keyBuf, GET_RES_WINDOW_KEY_LEN(bytes), (char *)&pWindowResInfo->curIndex, sizeof(int32_t));
498
  }
499

500 501 502 503 504
  // too many time window in query
  if (pWindowResInfo->size > MAX_INTERVAL_TIME_WINDOW) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_TOO_MANY_TIMEWINDOW);
  }

505 506 507 508 509 510
  return getWindowResult(pWindowResInfo, pWindowResInfo->curIndex);
}

// get the correct time window according to the handled timestamp
static STimeWindow getActiveTimeWindow(SWindowResInfo *pWindowResInfo, int64_t ts, SQuery *pQuery) {
  STimeWindow w = {0};
511

512
 if (pWindowResInfo->curIndex == -1) {  // the first window, from the previous stored value
513
    w.skey = pWindowResInfo->prevSKey;
514 515
    if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
      w.ekey = taosTimeAdd(w.skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision);
516
    } else {
517
      w.ekey = w.skey + pQuery->interval.interval - 1;
518
    }
519
  } else {
520
    int32_t slot = curTimeWindowIndex(pWindowResInfo);
H
Haojun Liao 已提交
521
    SResultRow* pWindowRes = getWindowResult(pWindowResInfo, slot);
522
    w = pWindowRes->win;
523
  }
524

525
  if (w.skey > ts || w.ekey < ts) {
526 527 528
    if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
      w.skey = taosTimeTruncate(ts, &pQuery->interval, pQuery->precision);
      w.ekey = taosTimeAdd(w.skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
529 530
    } else {
      int64_t st = w.skey;
531

532
      if (st > ts) {
533
        st -= ((st - ts + pQuery->interval.sliding - 1) / pQuery->interval.sliding) * pQuery->interval.sliding;
534
      }
535

536
      int64_t et = st + pQuery->interval.interval - 1;
537
      if (et < ts) {
538
        st += ((ts - et + pQuery->interval.sliding - 1) / pQuery->interval.sliding) * pQuery->interval.sliding;
539
      }
540

541
      w.skey = st;
542
      w.ekey = w.skey + pQuery->interval.interval - 1;
543
    }
544
  }
545

546 547 548 549 550 551 552
  /*
   * query border check, skey should not be bounded by the query time range, since the value skey will
   * be used as the time window index value. So we only change ekey of time window accordingly.
   */
  if (w.ekey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) {
    w.ekey = pQuery->window.ekey;
  }
553

554 555 556
  return w;
}

H
Haojun Liao 已提交
557
static int32_t addNewWindowResultBuf(SResultRow *pWindowRes, SDiskbasedResultBuf *pResultBuf, int32_t tid,
558
                                     int32_t numOfRowsPerPage) {
559
  if (pWindowRes->pageId != -1) {
560 561
    return 0;
  }
562

563
  tFilePage *pData = NULL;
564

565 566
  // in the first scan, new space needed for results
  int32_t pageId = -1;
H
Haojun Liao 已提交
567
  SIDList list = getDataBufPagesIdList(pResultBuf, tid);
568

H
Haojun Liao 已提交
569
  if (taosArrayGetSize(list) == 0) {
H
Haojun Liao 已提交
570
    pData = getNewDataBuf(pResultBuf, tid, &pageId);
571
  } else {
H
Haojun Liao 已提交
572 573 574
    SPageInfo* pi = getLastPageInfo(list);
    pData = getResBufPage(pResultBuf, pi->pageId);
    pageId = pi->pageId;
575

576
    if (pData->num >= numOfRowsPerPage) {
H
Haojun Liao 已提交
577 578 579
      // release current page first, and prepare the next one
      releaseResBufPageInfo(pResultBuf, pi);

H
Haojun Liao 已提交
580
      pData = getNewDataBuf(pResultBuf, tid, &pageId);
581
      if (pData != NULL) {
582
        assert(pData->num == 0);  // number of elements must be 0 for new allocated buffer
583 584 585
      }
    }
  }
586

587 588 589
  if (pData == NULL) {
    return -1;
  }
590

591
  // set the number of rows in current disk page
592 593 594
  if (pWindowRes->pageId == -1) {  // not allocated yet, allocate new buffer
    pWindowRes->pageId = pageId;
    pWindowRes->rowId = (int32_t)(pData->num++);
H
Haojun Liao 已提交
595

596
    assert(pWindowRes->pageId >= 0);
597
  }
598

599 600 601
  return 0;
}

H
Haojun Liao 已提交
602
static int32_t setWindowOutputBufByKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, SDataBlockInfo* pBockInfo,
603
                                       STimeWindow *win, bool masterscan, bool* newWind) {
604 605
  assert(win->skey <= win->ekey);
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
606

H
Haojun Liao 已提交
607
  SResultRow *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&win->skey, TSDB_KEYSIZE, masterscan, pBockInfo->uid);
608
  if (pWindowRes == NULL) {
609 610 611
    *newWind = false;

    return masterscan? -1:0;
612
  }
613

614
  *newWind = true;
H
Haojun Liao 已提交
615

616
  // not assign result buffer yet, add new result buffer
617
  if (pWindowRes->pageId == -1) {
H
Haojun Liao 已提交
618
    int32_t ret = addNewWindowResultBuf(pWindowRes, pResultBuf, pBockInfo->tid, pRuntimeEnv->numOfRowsPerPage);
H
Haojun Liao 已提交
619
    if (ret != TSDB_CODE_SUCCESS) {
620 621 622
      return -1;
    }
  }
623

624
  // set time window for current result
625
  pWindowRes->win = (*win);
626

H
Haojun Liao 已提交
627
  setWindowResOutputBufInitCtx(pRuntimeEnv, pWindowRes);
628 629 630
  return TSDB_CODE_SUCCESS;
}

631
static bool getTimeWindowResStatus(SWindowResInfo *pWindowResInfo, int32_t slot) {
632
  assert(slot >= 0 && slot < pWindowResInfo->size);
633
  return pWindowResInfo->pResult[slot]->closed;
634 635
}

H
Haojun Liao 已提交
636
static FORCE_INLINE int32_t getForwardStepsInBlock(int32_t numOfRows, __block_search_fn_t searchFn, TSKEY ekey, int16_t pos,
637 638
                                      int16_t order, int64_t *pData) {
  int32_t forwardStep = 0;
639

H
Haojun Liao 已提交
640 641 642 643
  if (order == TSDB_ORDER_ASC) {
    int32_t end = searchFn((char*) &pData[pos], numOfRows - pos, ekey, order);
    if (end >= 0) {
      forwardStep = end;
644

H
Haojun Liao 已提交
645 646 647 648 649 650 651 652 653 654 655 656
      if (pData[end + pos] == ekey) {
        forwardStep += 1;
      }
    }
  } else {
    int32_t end = searchFn((char *)pData, pos + 1, ekey, order);
    if (end >= 0) {
      forwardStep = pos - end;

      if (pData[end] == ekey) {
        forwardStep += 1;
      }
657 658
    }
  }
659

H
Haojun Liao 已提交
660
  assert(forwardStep > 0);
661 662 663 664 665 666
  return forwardStep;
}

/**
 * NOTE: the query status only set for the first scan of master scan.
 */
667
static int32_t doCheckQueryCompleted(SQueryRuntimeEnv *pRuntimeEnv, TSKEY lastKey, SWindowResInfo *pWindowResInfo) {
668
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
669
  if (pRuntimeEnv->scanFlag != MASTER_SCAN || (!QUERY_IS_INTERVAL_QUERY(pQuery))) {
670
    return pWindowResInfo->size;
671
  }
672

673
  // no qualified results exist, abort check
674
  int32_t numOfClosed = 0;
675

676
  if (pWindowResInfo->size == 0) {
677
    return pWindowResInfo->size;
678
  }
679

680
  // query completed
H
hjxilinx 已提交
681 682
  if ((lastKey >= pQuery->current->win.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (lastKey <= pQuery->current->win.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
683
    closeAllTimeWindow(pWindowResInfo);
684

685 686 687 688
    pWindowResInfo->curIndex = pWindowResInfo->size - 1;
    setQueryStatus(pQuery, QUERY_COMPLETED | QUERY_RESBUF_FULL);
  } else {  // set the current index to be the last unclosed window
    int32_t i = 0;
689
    int64_t skey = TSKEY_INITIAL_VAL;
690

691
    for (i = 0; i < pWindowResInfo->size; ++i) {
H
Haojun Liao 已提交
692
      SResultRow *pResult = pWindowResInfo->pResult[i];
693
      if (pResult->closed) {
694
        numOfClosed += 1;
695 696
        continue;
      }
697

698
      TSKEY ekey = pResult->win.ekey;
699
      if ((ekey <= lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
700
          (pResult->win.skey >= lastKey && !QUERY_IS_ASC_QUERY(pQuery))) {
701 702
        closeTimeWindow(pWindowResInfo, i);
      } else {
703
        skey = pResult->win.skey;
704 705 706
        break;
      }
    }
707

708
    // all windows are closed, set the last one to be the skey
709
    if (skey == TSKEY_INITIAL_VAL) {
710 711 712 713 714
      assert(i == pWindowResInfo->size);
      pWindowResInfo->curIndex = pWindowResInfo->size - 1;
    } else {
      pWindowResInfo->curIndex = i;
    }
715

716
    pWindowResInfo->prevSKey = pWindowResInfo->pResult[pWindowResInfo->curIndex]->win.skey;
717

718 719
    // the number of completed slots are larger than the threshold, return current generated results to client.
    if (numOfClosed > pWindowResInfo->threshold) {
720
      qDebug("QInfo:%p total result window:%d closed:%d, reached the output threshold %d, return",
721
          GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size, numOfClosed, pQuery->rec.threshold);
722

723
      setQueryStatus(pQuery, QUERY_RESBUF_FULL);
724
    } else {
725
      qDebug("QInfo:%p total result window:%d already closed:%d", GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size,
726
             numOfClosed);
727 728
    }
  }
729

730 731 732 733 734
  // output has reached the limitation, set query completed
  if (pQuery->limit.limit > 0 && (pQuery->limit.limit + pQuery->limit.offset) <= numOfClosed &&
      pRuntimeEnv->scanFlag == MASTER_SCAN) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
735

736
  assert(pWindowResInfo->prevSKey != TSKEY_INITIAL_VAL);
737
  return numOfClosed;
738 739 740
}

static int32_t getNumOfRowsInTimeWindow(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo, TSKEY *pPrimaryColumn,
H
hjxilinx 已提交
741
                                        int32_t startPos, TSKEY ekey, __block_search_fn_t searchFn, bool updateLastKey) {
742
  assert(startPos >= 0 && startPos < pDataBlockInfo->rows);
743

H
Haojun Liao 已提交
744
  int32_t num   = -1;
745
  int32_t order = pQuery->order.order;
H
Haojun Liao 已提交
746
  int32_t step  = GET_FORWARD_DIRECTION_FACTOR(order);
747

H
hjxilinx 已提交
748
  STableQueryInfo* item = pQuery->current;
749

750 751
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (ekey < pDataBlockInfo->window.ekey) {
752
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
H
Haojun Liao 已提交
753 754
      if (updateLastKey) { // update the last key
        item->lastKey = pPrimaryColumn[startPos + (num - 1)] + step;
755 756
      }
    } else {
757
      num = pDataBlockInfo->rows - startPos;
758
      if (updateLastKey) {
H
hjxilinx 已提交
759
        item->lastKey = pDataBlockInfo->window.ekey + step;
760 761 762 763
      }
    }
  } else {  // desc
    if (ekey > pDataBlockInfo->window.skey) {
764
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
H
Haojun Liao 已提交
765 766
      if (updateLastKey) {  // update the last key
        item->lastKey = pPrimaryColumn[startPos - (num - 1)] + step;
767 768 769 770
      }
    } else {
      num = startPos + 1;
      if (updateLastKey) {
H
hjxilinx 已提交
771
        item->lastKey = pDataBlockInfo->window.skey + step;
772 773 774
      }
    }
  }
775

H
Haojun Liao 已提交
776
  assert(num > 0);
777 778 779
  return num;
}

H
Haojun Liao 已提交
780 781
static void doBlockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, bool closed, STimeWindow *pWin, int32_t offset,
                                      int32_t forwardStep, TSKEY *tsCol, int32_t numOfTotal) {
782 783
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
784

H
Haojun Liao 已提交
785 786
  bool hasPrev = pCtx[0].preAggVals.isSet;

787
  if (IS_MASTER_SCAN(pRuntimeEnv) || closed) {
788 789 790 791
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
      pCtx[k].nStartQueryTimestamp = pWin->skey;
      pCtx[k].size = forwardStep;
      pCtx[k].startOffset = (QUERY_IS_ASC_QUERY(pQuery)) ? offset : offset - (forwardStep - 1);
792

H
Haojun Liao 已提交
793
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
794
      if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
H
Haojun Liao 已提交
795
        pCtx[k].ptsList = &tsCol[pCtx[k].startOffset];
796
      }
797

798
      // not a whole block involved in query processing, statistics data can not be used
H
Haojun Liao 已提交
799 800 801 802
      // NOTE: the original value of isSet have been changed here
      if (pCtx[k].preAggVals.isSet && forwardStep < numOfTotal) {
        pCtx[k].preAggVals.isSet = false;
      }
H
Haojun Liao 已提交
803

804 805 806
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunction(&pCtx[k]);
      }
H
Haojun Liao 已提交
807 808 809

      // restore it
      pCtx[k].preAggVals.isSet = hasPrev;
810 811 812 813
    }
  }
}

814
static void doRowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, bool closed, STimeWindow *pWin, int32_t offset) {
815 816
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
817

818
  if (IS_MASTER_SCAN(pRuntimeEnv) || closed) {
819 820
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
      pCtx[k].nStartQueryTimestamp = pWin->skey;
821

822 823 824 825
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunctionF(&pCtx[k], offset);
      }
826 827 828 829
    }
  }
}

H
Haojun Liao 已提交
830 831
static int32_t getNextQualifiedWindow(SQueryRuntimeEnv *pRuntimeEnv, STimeWindow *pNext, SDataBlockInfo *pDataBlockInfo,
    TSKEY *primaryKeys, __block_search_fn_t searchFn, int32_t prevPosition) {
832
  SQuery *pQuery = pRuntimeEnv->pQuery;
833

H
Haojun Liao 已提交
834
  GET_NEXT_TIMEWINDOW(pQuery, pNext);
835

H
Haojun Liao 已提交
836
  // next time window is not in current block
H
Haojun Liao 已提交
837 838
  if ((pNext->skey > pDataBlockInfo->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (pNext->ekey < pDataBlockInfo->window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
H
Haojun Liao 已提交
839 840
    return -1;
  }
841

H
Haojun Liao 已提交
842 843
  TSKEY startKey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
844
    startKey = pNext->skey;
H
Haojun Liao 已提交
845 846
    if (startKey < pQuery->window.skey) {
      startKey = pQuery->window.skey;
847
    }
H
Haojun Liao 已提交
848
  } else {
H
Haojun Liao 已提交
849
    startKey = pNext->ekey;
H
Haojun Liao 已提交
850 851
    if (startKey > pQuery->window.skey) {
      startKey = pQuery->window.skey;
852
    }
H
Haojun Liao 已提交
853
  }
854

H
Haojun Liao 已提交
855 856
  int32_t startPos = 0;
  // tumbling time window query, a special case of sliding time window query
857
  if (pQuery->interval.sliding == pQuery->interval.interval && prevPosition != -1) {
H
Haojun Liao 已提交
858 859 860 861 862
    int32_t factor = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
    startPos = prevPosition + factor;
  } else {
    startPos = searchFn((char *)primaryKeys, pDataBlockInfo->rows, startKey, pQuery->order.order);
  }
863

H
Haojun Liao 已提交
864 865 866 867
  /*
   * This time window does not cover any data, try next time window,
   * this case may happen when the time window is too small
   */
H
Haojun Liao 已提交
868
  if (QUERY_IS_ASC_QUERY(pQuery) && primaryKeys[startPos] > pNext->ekey) {
H
Haojun Liao 已提交
869
    TSKEY next = primaryKeys[startPos];
870 871 872
    if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
      pNext->skey = taosTimeTruncate(next, &pQuery->interval, pQuery->precision);
      pNext->ekey = taosTimeAdd(pNext->skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
873
    } else {
874 875
      pNext->ekey += ((next - pNext->ekey + pQuery->interval.sliding - 1)/pQuery->interval.sliding) * pQuery->interval.sliding;
      pNext->skey = pNext->ekey - pQuery->interval.interval + 1;
876
    }
H
Haojun Liao 已提交
877
  } else if ((!QUERY_IS_ASC_QUERY(pQuery)) && primaryKeys[startPos] < pNext->skey) {
H
Haojun Liao 已提交
878
    TSKEY next = primaryKeys[startPos];
879 880 881
    if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
      pNext->skey = taosTimeTruncate(next, &pQuery->interval, pQuery->precision);
      pNext->ekey = taosTimeAdd(pNext->skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
882
    } else {
883 884
      pNext->skey -= ((pNext->skey - next + pQuery->interval.sliding - 1) / pQuery->interval.sliding) * pQuery->interval.sliding;
      pNext->ekey = pNext->skey + pQuery->interval.interval - 1;
885
    }
886
  }
887

H
Haojun Liao 已提交
888
  return startPos;
889 890
}

H
Haojun Liao 已提交
891
static FORCE_INLINE TSKEY reviseWindowEkey(SQuery *pQuery, STimeWindow *pWindow) {
892 893 894 895 896 897 898 899 900 901 902 903
  TSKEY ekey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    ekey = pWindow->ekey;
    if (ekey > pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  } else {
    ekey = pWindow->skey;
    if (ekey < pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  }
904

905 906 907
  return ekey;
}

H
hjxilinx 已提交
908 909
//todo binary search
static void* getDataBlockImpl(SArray* pDataBlock, int32_t colId) {
S
TD-1057  
Shengliang Guan 已提交
910
  int32_t numOfCols = (int32_t)taosArrayGetSize(pDataBlock);
911

H
hjxilinx 已提交
912 913 914 915 916 917
  for (int32_t i = 0; i < numOfCols; ++i) {
    SColumnInfoData *p = taosArrayGet(pDataBlock, i);
    if (colId == p->info.colId) {
      return p->pData;
    }
  }
918

H
hjxilinx 已提交
919 920 921 922
  return NULL;
}

static char *getDataBlock(SQueryRuntimeEnv *pRuntimeEnv, SArithmeticSupport *sas, int32_t col, int32_t size,
923
                    SArray *pDataBlock) {
dengyihao's avatar
dengyihao 已提交
924 925 926
  if (pDataBlock == NULL) {
    return NULL;
  }
927

H
Haojun Liao 已提交
928
  char *dataBlock = NULL;
H
Haojun Liao 已提交
929
  SQuery *pQuery = pRuntimeEnv->pQuery;
930

931
  int32_t functionId = pQuery->pSelectExpr[col].base.functionId;
932
  if (functionId == TSDB_FUNC_ARITHM) {
933
    sas->pArithExpr = &pQuery->pSelectExpr[col];
934

935 936 937 938
    sas->offset  = 0;
    sas->colList = pQuery->colList;
    sas->numOfCols = pQuery->numOfCols;
    sas->data    = calloc(pQuery->numOfCols, POINTER_BYTES);
939

H
Haojun Liao 已提交
940 941 942 943
    if (sas->data == NULL) {
      longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
    }

944
    // here the pQuery->colList and sas->colList are identical
S
TD-1057  
Shengliang Guan 已提交
945
    int32_t numOfCols = (int32_t)taosArrayGetSize(pDataBlock);
946
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
947
      SColumnInfo *pColMsg = &pQuery->colList[i];
948

949 950 951 952 953 954 955 956
      dataBlock = NULL;
      for (int32_t k = 0; k < numOfCols; ++k) {  //todo refactor
        SColumnInfoData *p = taosArrayGet(pDataBlock, k);
        if (pColMsg->colId == p->info.colId) {
          dataBlock = p->pData;
          break;
        }
      }
957

958
      assert(dataBlock != NULL);
959
      sas->data[i] = dataBlock;  // start from the offset
960
    }
961

962
  } else {  // other type of query function
963
    SColIndex *pCol = &pQuery->pSelectExpr[col].base.colInfo;
H
Haojun Liao 已提交
964
    if (TSDB_COL_IS_NORMAL_COL(pCol->flag)) {
H
Haojun Liao 已提交
965 966 967 968 969
      SColIndex* pColIndex = &pQuery->pSelectExpr[col].base.colInfo;
      SColumnInfoData *p = taosArrayGet(pDataBlock, pColIndex->colIndex);
      assert(p->info.colId == pColIndex->colId);

      dataBlock = p->pData;
H
Haojun Liao 已提交
970 971
    } else {
      dataBlock = NULL;
972 973
    }
  }
974

975 976 977 978
  return dataBlock;
}

/**
H
Haojun Liao 已提交
979
 * todo set the last value for pQueryTableInfo as in rowwiseapplyfunctions
980 981
 * @param pRuntimeEnv
 * @param forwardStep
982
 * @param tsCols
983 984 985 986 987
 * @param pFields
 * @param isDiskFileBlock
 * @return                  the incremental number of output value, so it maybe 0 for fixed number of query,
 *                          such as count/min/max etc.
 */
988
static void blockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis,
989 990
                                       SDataBlockInfo *pDataBlockInfo, SWindowResInfo *pWindowResInfo,
                                       __block_search_fn_t searchFn, SArray *pDataBlock) {
991
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
992 993
  bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);

994 995
  SQuery *pQuery = pRuntimeEnv->pQuery;
  TSKEY  *tsCols = NULL;
996
  if (pDataBlock != NULL) {
997
    SColumnInfoData* pColInfo = taosArrayGet(pDataBlock, 0);
998
    tsCols = (TSKEY *)(pColInfo->pData);
999
  }
1000

1001
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
H
Haojun Liao 已提交
1002 1003 1004
  if (sasArray == NULL) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }
1005

H
Haojun Liao 已提交
1006
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
1007
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
1008
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
H
Haojun Liao 已提交
1009
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k, pQInfo->vgId);
1010
  }
1011

1012
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
1013
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
H
Haojun Liao 已提交
1014
    TSKEY ts = TSKEY_INITIAL_VAL;
1015

H
Haojun Liao 已提交
1016 1017 1018 1019 1020 1021 1022 1023
    if (tsCols == NULL) {
      ts = QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->window.skey:pDataBlockInfo->window.ekey;
    } else {
      int32_t offset = GET_COL_DATA_POS(pQuery, 0, step);
      ts = tsCols[offset];
    }

    bool        hasTimeWindow = false;
1024
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
H
Haojun Liao 已提交
1025
    if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo, &win, masterScan, &hasTimeWindow) != TSDB_CODE_SUCCESS) {
S
Shengliang Guan 已提交
1026
      taosTFree(sasArray);
H
hjxilinx 已提交
1027
      return;
1028
    }
1029

H
Haojun Liao 已提交
1030 1031 1032
    int32_t forwardStep = 0;
    int32_t startPos = pQuery->pos;

1033
    if (hasTimeWindow) {
H
Haojun Liao 已提交
1034
      TSKEY ekey = reviseWindowEkey(pQuery, &win);
H
Haojun Liao 已提交
1035
      forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, pQuery->pos, ekey, searchFn, true);
1036

1037
      bool pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
H
Haojun Liao 已提交
1038
      doBlockwiseApplyFunctions(pRuntimeEnv, pStatus, &win, startPos, forwardStep, tsCols, pDataBlockInfo->rows);
1039
    }
1040

1041 1042
    int32_t     index = pWindowResInfo->curIndex;
    STimeWindow nextWin = win;
1043

1044
    while (1) {
H
Haojun Liao 已提交
1045 1046
      int32_t prevEndPos = (forwardStep - 1) * step + startPos;
      startPos = getNextQualifiedWindow(pRuntimeEnv, &nextWin, pDataBlockInfo, tsCols, searchFn, prevEndPos);
1047 1048 1049
      if (startPos < 0) {
        break;
      }
1050

1051
      // null data, failed to allocate more memory buffer
H
Haojun Liao 已提交
1052
      hasTimeWindow = false;
H
Haojun Liao 已提交
1053
      if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo, &nextWin, masterScan, &hasTimeWindow) != TSDB_CODE_SUCCESS) {
1054 1055
        break;
      }
1056

1057 1058 1059 1060 1061
      if (!hasTimeWindow) {
        continue;
      }

      TSKEY ekey = reviseWindowEkey(pQuery, &nextWin);
H
Haojun Liao 已提交
1062
      forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, startPos, ekey, searchFn, true);
1063

1064 1065
      bool closed = getTimeWindowResStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
      doBlockwiseApplyFunctions(pRuntimeEnv, closed, &nextWin, startPos, forwardStep, tsCols, pDataBlockInfo->rows);
1066
    }
1067

1068 1069 1070 1071 1072 1073 1074
    pWindowResInfo->curIndex = index;
  } else {
    /*
     * the sqlfunctionCtx parameters should be set done before all functions are invoked,
     * since the selectivity + tag_prj query needs all parameters been set done.
     * tag_prj function are changed to be TSDB_FUNC_TAG_DUMMY
     */
1075
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
1076
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
1077 1078 1079 1080 1081
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunction(&pCtx[k]);
      }
    }
  }
1082

1083 1084 1085 1086
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    if (pQuery->pSelectExpr[i].base.functionId != TSDB_FUNC_ARITHM) {
      continue;
    }
1087

S
Shengliang Guan 已提交
1088
    taosTFree(sasArray[i].data);
1089
  }
1090

S
Shengliang Guan 已提交
1091
  taosTFree(sasArray);
1092 1093 1094 1095 1096 1097
}

static int32_t setGroupResultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, char *pData, int16_t type, int16_t bytes) {
  if (isNull(pData, type)) {  // ignore the null value
    return -1;
  }
1098

1099
  int32_t GROUPRESULTID = 1;
1100

1101
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
1102

1103
  // not assign result buffer yet, add new result buffer
1104 1105 1106 1107 1108 1109 1110
  char* d = pData;
  int16_t len = bytes;
  if (type == TSDB_DATA_TYPE_BINARY||type == TSDB_DATA_TYPE_NCHAR) {
    d = varDataVal(pData);
    len = varDataLen(pData);
  } else if (type == TSDB_DATA_TYPE_FLOAT || type == TSDB_DATA_TYPE_DOUBLE) {
    SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
H
Haojun Liao 已提交
1111
    qError("QInfo:%p group by not supported on double/float columns, abort", pQInfo);
1112 1113 1114 1115

    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_APP_ERROR);
  }

H
Haojun Liao 已提交
1116
  uint64_t uid = 0; // uid is always set to be 0.
H
Haojun Liao 已提交
1117
  SResultRow *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, &pRuntimeEnv->windowResInfo, d, len, true, uid);
1118 1119 1120 1121 1122
  if (pWindowRes == NULL) {
    return -1;
  }

  int64_t v = -1;
1123 1124 1125 1126 1127 1128 1129 1130
  switch(type) {
    case TSDB_DATA_TYPE_BOOL:
    case TSDB_DATA_TYPE_TINYINT:  v = GET_INT8_VAL(pData);  break;
    case TSDB_DATA_TYPE_SMALLINT: v = GET_INT16_VAL(pData); break;
    case TSDB_DATA_TYPE_INT:      v = GET_INT32_VAL(pData); break;
    case TSDB_DATA_TYPE_BIGINT:   v = GET_INT64_VAL(pData); break;
  }

1131 1132 1133 1134 1135 1136
  if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
    pWindowRes->key = malloc(varDataTLen(pData));
    varDataCopy(pWindowRes->key, pData);
  } else {
    pWindowRes->win.skey = v;
    pWindowRes->win.ekey = v;
1137
  }
1138

1139
  if (pWindowRes->pageId == -1) {
1140 1141 1142 1143 1144
    int32_t ret = addNewWindowResultBuf(pWindowRes, pResultBuf, GROUPRESULTID, pRuntimeEnv->numOfRowsPerPage);
    if (ret != 0) {
      return -1;
    }
  }
1145

1146 1147 1148 1149 1150
  setWindowResOutputBuf(pRuntimeEnv, pWindowRes);
  initCtxOutputBuf(pRuntimeEnv);
  return TSDB_CODE_SUCCESS;
}

1151
static char *getGroupbyColumnData(SQuery *pQuery, int16_t *type, int16_t *bytes, SArray* pDataBlock) {
1152
  SSqlGroupbyExpr *pGroupbyExpr = pQuery->pGroupbyExpr;
1153

1154
  for (int32_t k = 0; k < pGroupbyExpr->numOfGroupCols; ++k) {
1155
    SColIndex* pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, k);
Y
TD-1230  
yihaoDeng 已提交
1156
    if (TSDB_COL_IS_TAG(pColIndex->flag)) {
1157 1158
      continue;
    }
1159

1160
    int16_t colIndex = -1;
1161
    int32_t colId = pColIndex->colId;
1162

1163
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
1164
      if (pQuery->colList[i].colId == colId) {
1165 1166 1167 1168
        colIndex = i;
        break;
      }
    }
1169

1170
    assert(colIndex >= 0 && colIndex < pQuery->numOfCols);
1171

1172 1173
    *type = pQuery->colList[colIndex].type;
    *bytes = pQuery->colList[colIndex].bytes;
1174
    /*
1175 1176 1177
     *  the colIndex is acquired from the first tables of all qualified tables in this vnode during query prepare
     * stage, the remain tables may not have the required column in cache actually. So, the validation of required
     * column in cache with the corresponding schema is reinforced.
1178
     */
S
TD-1057  
Shengliang Guan 已提交
1179
    int32_t numOfCols = (int32_t)taosArrayGetSize(pDataBlock);
1180

1181 1182 1183 1184 1185 1186
    for (int32_t i = 0; i < numOfCols; ++i) {
      SColumnInfoData *p = taosArrayGet(pDataBlock, i);
      if (pColIndex->colId == p->info.colId) {
        return p->pData;
      }
    }
1187
  }
1188

1189
  return NULL;
1190 1191 1192 1193
}

static int32_t doTSJoinFilter(SQueryRuntimeEnv *pRuntimeEnv, int32_t offset) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
1194

1195 1196
  STSElem         elem = tsBufGetElem(pRuntimeEnv->pTSBuf);
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1197

1198
  // compare tag first
H
Haojun Liao 已提交
1199
  if (tVariantCompare(&pCtx[0].tag, elem.tag) != 0) {
1200 1201
    return TS_JOIN_TAG_NOT_EQUALS;
  }
1202

S
TD-1057  
Shengliang Guan 已提交
1203
  TSKEY key = *(TSKEY *)((char*)pCtx[0].aInputElemBuf + TSDB_KEYSIZE * offset);
1204 1205

#if defined(_DEBUG_VIEW)
1206
  printf("elem in comp ts file:%" PRId64 ", key:%" PRId64 ", tag:%"PRIu64", query order:%d, ts order:%d, traverse:%d, index:%d\n",
H
Haojun Liao 已提交
1207
         elem.ts, key, elem.tag.i64Key, pQuery->order.order, pRuntimeEnv->pTSBuf->tsOrder,
1208 1209
         pRuntimeEnv->pTSBuf->cur.order, pRuntimeEnv->pTSBuf->cur.tsIndex);
#endif
1210

1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (key < elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key > elem.ts) {
      assert(false);
    }
  } else {
    if (key > elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key < elem.ts) {
      assert(false);
    }
  }
1224

1225 1226 1227 1228
  return TS_JOIN_TS_EQUAL;
}

static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId) {
H
Haojun Liao 已提交
1229
  SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx);
H
hjxilinx 已提交
1230
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
1231 1232 1233 1234 1235

  // in case of timestamp column, always generated results.
  if (functionId == TSDB_FUNC_TS) {
    return true;
  }
1236

1237 1238 1239
  if (pResInfo->complete || functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TS_DUMMY) {
    return false;
  }
1240

1241
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_FIRST) {
H
hjxilinx 已提交
1242 1243
    return QUERY_IS_ASC_QUERY(pQuery);
  }
1244 1245 1246 1247 1248 1249

  // todo add comments
  if ((functionId == TSDB_FUNC_LAST_DST || functionId == TSDB_FUNC_LAST)) {
    return pCtx->param[0].i64Key == pQuery->order.order;
  }

1250
  // in the supplementary scan, only the following functions need to be executed
H
Haojun Liao 已提交
1251
  if (IS_REVERSE_SCAN(pRuntimeEnv)) {
1252 1253
    return false;
  }
1254

1255 1256 1257
  return true;
}

1258 1259
static void rowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis, SDataBlockInfo *pDataBlockInfo,
    SWindowResInfo *pWindowResInfo, SArray *pDataBlock) {
1260
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1261
  bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);
1262

1263
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1264
  STableQueryInfo* item = pQuery->current;
H
Haojun Liao 已提交
1265 1266 1267 1268

  SColumnInfoData* pColumnInfoData = (SColumnInfoData *)taosArrayGet(pDataBlock, 0);

  TSKEY  *tsCols = (pColumnInfoData->info.type == TSDB_DATA_TYPE_TIMESTAMP)? (TSKEY*) pColumnInfoData->pData:NULL;
H
Haojun Liao 已提交
1269 1270
  bool    groupbyColumnValue = pRuntimeEnv->groupbyNormalCol;

1271
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
H
Haojun Liao 已提交
1272 1273 1274
  if (sasArray == NULL) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }
1275

1276 1277
  int16_t type = 0;
  int16_t bytes = 0;
1278

1279
  char *groupbyColumnData = NULL;
H
Haojun Liao 已提交
1280
  if (groupbyColumnValue) {
1281
    groupbyColumnData = getGroupbyColumnData(pQuery, &type, &bytes, pDataBlock);
1282
  }
1283

H
Haojun Liao 已提交
1284
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
1285
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
1286
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
H
Haojun Liao 已提交
1287
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k, pQInfo->vgId);
1288
  }
1289

1290 1291
  // set the input column data
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
1292
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
H
hjxilinx 已提交
1293 1294
    pFilterInfo->pData = getDataBlockImpl(pDataBlock, pFilterInfo->info.colId);
    assert(pFilterInfo->pData != NULL);
1295
  }
1296

1297
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1298

1299 1300 1301
  // from top to bottom in desc
  // from bottom to top in asc order
  if (pRuntimeEnv->pTSBuf != NULL) {
1302
    qDebug("QInfo:%p process data rows, numOfRows:%d, query order:%d, ts comp order:%d", pQInfo, pDataBlockInfo->rows,
1303 1304
           pQuery->order.order, pRuntimeEnv->pTSBuf->cur.order);
  }
1305

1306
  int32_t j = 0;
H
hjxilinx 已提交
1307
  int32_t offset = -1;
1308

1309
  for (j = 0; j < pDataBlockInfo->rows; ++j) {
H
hjxilinx 已提交
1310
    offset = GET_COL_DATA_POS(pQuery, j, step);
1311

1312 1313 1314 1315 1316 1317 1318 1319 1320 1321
    if (pRuntimeEnv->pTSBuf != NULL) {
      int32_t r = doTSJoinFilter(pRuntimeEnv, offset);
      if (r == TS_JOIN_TAG_NOT_EQUALS) {
        break;
      } else if (r == TS_JOIN_TS_NOT_EQUALS) {
        continue;
      } else {
        assert(r == TS_JOIN_TS_EQUAL);
      }
    }
1322

1323
    if (pQuery->numOfFilterCols > 0 && (!doFilterData(pQuery, offset))) {
1324 1325
      continue;
    }
1326

1327
    // interval window query, decide the time window according to the primary timestamp
H
Haojun Liao 已提交
1328
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1329
      int64_t     ts = tsCols[offset];
1330
      STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
1331

1332
      bool hasTimeWindow = false;
H
Haojun Liao 已提交
1333
      int32_t ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo, &win, masterScan, &hasTimeWindow);
1334 1335 1336
      if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
        continue;
      }
1337

1338 1339 1340 1341
      if (!hasTimeWindow) {
        continue;
      }

1342 1343
      bool closed = getTimeWindowResStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
      doRowwiseApplyFunctions(pRuntimeEnv, closed, &win, offset);
1344

1345 1346
      STimeWindow nextWin = win;
      int32_t     index = pWindowResInfo->curIndex;
1347

1348
      while (1) {
H
Haojun Liao 已提交
1349
        GET_NEXT_TIMEWINDOW(pQuery, &nextWin);
1350
        if ((nextWin.skey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
H
Haojun Liao 已提交
1351
            (nextWin.skey < pQuery->window.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
1352 1353
          break;
        }
1354

1355 1356 1357
        if (ts < nextWin.skey || ts > nextWin.ekey) {
          break;
        }
1358

1359
        // null data, failed to allocate more memory buffer
H
Haojun Liao 已提交
1360
        hasTimeWindow = false;
H
Haojun Liao 已提交
1361
        if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo, &nextWin, masterScan, &hasTimeWindow) != TSDB_CODE_SUCCESS) {
1362 1363
          break;
        }
1364

1365
        if (hasTimeWindow) {
1366 1367
          closed = getTimeWindowResStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
          doRowwiseApplyFunctions(pRuntimeEnv, closed, &nextWin, offset);
1368
        }
1369
      }
1370

1371 1372 1373
      pWindowResInfo->curIndex = index;
    } else {  // other queries
      // decide which group this rows belongs to according to current state value
H
Haojun Liao 已提交
1374
      if (groupbyColumnValue) {
H
hjxilinx 已提交
1375
        char *val = groupbyColumnData + bytes * offset;
1376

H
hjxilinx 已提交
1377
        int32_t ret = setGroupResultOutputBuf(pRuntimeEnv, val, type, bytes);
1378 1379 1380 1381
        if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
          continue;
        }
      }
1382

1383
      for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
1384
        int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
1385 1386 1387 1388 1389
        if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
          aAggs[functionId].xFunctionF(&pCtx[k], offset);
        }
      }
    }
1390

1391 1392 1393
    if (pRuntimeEnv->pTSBuf != NULL) {
      // if timestamp filter list is empty, quit current query
      if (!tsBufNextPos(pRuntimeEnv->pTSBuf)) {
H
hjxilinx 已提交
1394
        setQueryStatus(pQuery, QUERY_COMPLETED);
1395 1396 1397 1398
        break;
      }
    }
  }
H
Haojun Liao 已提交
1399 1400 1401 1402 1403 1404 1405 1406

  assert(offset >= 0);
  if (tsCols != NULL) {
    item->lastKey = tsCols[offset] + step;
  } else {
    item->lastKey = (QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->window.ekey:pDataBlockInfo->window.skey) + step;
  }

H
Haojun Liao 已提交
1407 1408 1409
  if (pRuntimeEnv->pTSBuf != NULL) {
    item->cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);
  }
H
Haojun Liao 已提交
1410

1411 1412 1413 1414 1415
  // todo refactor: extract method
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    if (pQuery->pSelectExpr[i].base.functionId != TSDB_FUNC_ARITHM) {
      continue;
    }
1416

S
Shengliang Guan 已提交
1417
    taosTFree(sasArray[i].data);
1418
  }
1419

1420 1421 1422 1423
  free(sasArray);
}

static int32_t tableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo,
H
hjxilinx 已提交
1424
                                          SDataStatis *pStatis, __block_search_fn_t searchFn, SArray *pDataBlock) {
H
hjxilinx 已提交
1425
  SQuery *pQuery = pRuntimeEnv->pQuery;
1426

H
hjxilinx 已提交
1427 1428
  STableQueryInfo* pTableQInfo = pQuery->current;
  SWindowResInfo*  pWindowResInfo = &pRuntimeEnv->windowResInfo;
1429

H
Haojun Liao 已提交
1430
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->groupbyNormalCol) {
1431
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
1432
  } else {
1433
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
1434
  }
1435

1436
  // update the lastkey of current table
1437
  TSKEY lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pDataBlockInfo->window.ekey : pDataBlockInfo->window.skey;
H
hjxilinx 已提交
1438
  pTableQInfo->lastKey = lastKey + GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1439

1440
  // interval query with limit applied
1441
  int32_t numOfRes = 0;
H
Haojun Liao 已提交
1442
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1443 1444
    numOfRes = doCheckQueryCompleted(pRuntimeEnv, lastKey, pWindowResInfo);
  } else {
S
TD-1057  
Shengliang Guan 已提交
1445
    numOfRes = (int32_t)getNumOfResult(pRuntimeEnv);
1446

1447 1448 1449 1450
    // update the number of output result
    if (numOfRes > 0 && pQuery->checkBuffer == 1) {
      assert(numOfRes >= pQuery->rec.rows);
      pQuery->rec.rows = numOfRes;
1451

1452 1453 1454
      if (numOfRes >= pQuery->rec.threshold) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
      }
1455

1456 1457 1458
      if ((pQuery->limit.limit >= 0) && (pQuery->limit.limit + pQuery->limit.offset) <= numOfRes) {
        setQueryStatus(pQuery, QUERY_COMPLETED);
      }
1459 1460 1461 1462 1463

      if (((pTableQInfo->lastKey > pTableQInfo->win.ekey) && QUERY_IS_ASC_QUERY(pQuery)) ||
          ((pTableQInfo->lastKey < pTableQInfo->win.ekey) && (!QUERY_IS_ASC_QUERY(pQuery)))) {
        setQueryStatus(pQuery, QUERY_COMPLETED);
      }
H
Haojun Liao 已提交
1464
    }
1465
  }
1466

1467
  return numOfRes;
1468 1469
}

H
Haojun Liao 已提交
1470
void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
H
Haojun Liao 已提交
1471
                   SDataStatis *pStatis, void *param, int32_t colIndex, int32_t vgId) {
1472

1473 1474
  int32_t functionId = pQuery->pSelectExpr[colIndex].base.functionId;
  int32_t colId = pQuery->pSelectExpr[colIndex].base.colInfo.colId;
1475

1476
  SDataStatis *tpField = NULL;
H
Haojun Liao 已提交
1477
  pCtx->hasNull = hasNullValue(&pQuery->pSelectExpr[colIndex].base.colInfo, pStatis, &tpField);
1478
  pCtx->aInputElemBuf = inputData;
1479

1480
  if (tpField != NULL) {
H
Haojun Liao 已提交
1481
    pCtx->preAggVals.isSet  = true;
1482 1483
    pCtx->preAggVals.statis = *tpField;
    assert(pCtx->preAggVals.statis.numOfNull <= pBlockInfo->rows);
1484 1485 1486
  } else {
    pCtx->preAggVals.isSet = false;
  }
1487

H
Haojun Liao 已提交
1488 1489
  pCtx->preAggVals.dataBlockLoaded = (inputData != NULL);

H
Haojun Liao 已提交
1490 1491
  // limit/offset query will affect this value
  pCtx->size = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->rows - pQuery->pos : pQuery->pos + 1;
1492

H
Haojun Liao 已提交
1493
  // minimum value no matter ascending/descending order query
H
Haojun Liao 已提交
1494 1495
  pCtx->startOffset = QUERY_IS_ASC_QUERY(pQuery) ? pQuery->pos: (pQuery->pos - pCtx->size + 1);
  assert(pCtx->startOffset >= 0);
H
Haojun Liao 已提交
1496

1497 1498
  uint32_t status = aAggs[functionId].nStatus;
  if (((status & (TSDB_FUNCSTATE_SELECTIVITY | TSDB_FUNCSTATE_NEED_TS)) != 0) && (tsCol != NULL)) {
H
Haojun Liao 已提交
1499
    pCtx->ptsList = &tsCol[pCtx->startOffset];
1500
  }
1501

1502 1503 1504 1505 1506
  if (functionId >= TSDB_FUNC_FIRST_DST && functionId <= TSDB_FUNC_LAST_DST) {
    // last_dist or first_dist function
    // store the first&last timestamp into the intermediate buffer [1], the true
    // value may be null but timestamp will never be null
  } else if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_TWA ||
1507
             functionId == TSDB_FUNC_DIFF || (functionId >= TSDB_FUNC_RATE && functionId <= TSDB_FUNC_AVG_IRATE)) {
1508
    /*
H
Haojun Liao 已提交
1509
     * least squares function needs two columns of input, currently, the x value of linear equation is set to
1510 1511 1512 1513 1514 1515
     * timestamp column, and the y-value is the column specified in pQuery->pSelectExpr[i].colIdxInBuffer
     *
     * top/bottom function needs timestamp to indicate when the
     * top/bottom values emerge, so does diff function
     */
    if (functionId == TSDB_FUNC_TWA) {
H
Haojun Liao 已提交
1516 1517
      SResultRowCellInfo* pInfo = GET_RES_INFO(pCtx);
      STwaInfo *pTWAInfo = (STwaInfo*) GET_ROWCELL_INTERBUF(pInfo);
1518 1519 1520
      pTWAInfo->SKey = pQuery->window.skey;
      pTWAInfo->EKey = pQuery->window.ekey;
    }
1521

1522 1523
  } else if (functionId == TSDB_FUNC_ARITHM) {
    pCtx->param[1].pz = param;
H
Haojun Liao 已提交
1524 1525 1526 1527 1528 1529
  } else if (functionId == TSDB_FUNC_SPREAD) {  // set the statistics data for primary time stamp column
    if (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
      pCtx->preAggVals.isSet  = true;
      pCtx->preAggVals.statis.min = pBlockInfo->window.skey;
      pCtx->preAggVals.statis.max = pBlockInfo->window.ekey;
    }
1530
  } else if (functionId == TSDB_FUNC_INTERP) {
H
Haojun Liao 已提交
1531 1532 1533
    SResultRowCellInfo* pInfo = GET_RES_INFO(pCtx);

    SInterpInfoDetail *pInterpInfo = (SInterpInfoDetail *)GET_ROWCELL_INTERBUF(pInfo);
S
TD-1057  
Shengliang Guan 已提交
1534
    pInterpInfo->type = (int8_t)pQuery->fillType;
1535 1536
    pInterpInfo->ts = pQuery->window.skey;
    pInterpInfo->primaryCol = (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX);
1537

1538 1539 1540 1541
    if (pQuery->fillVal != NULL) {
      if (isNull((const char*) &pQuery->fillVal[colIndex], pCtx->inputType)) {
        pCtx->param[1].nType = TSDB_DATA_TYPE_NULL;
      } else { // todo refactor, tVariantCreateFromBinary should handle the NULL value
H
Haojun Liao 已提交
1542 1543 1544
        if (pCtx->inputType != TSDB_DATA_TYPE_BINARY && pCtx->inputType != TSDB_DATA_TYPE_NCHAR) {
          tVariantCreateFromBinary(&pCtx->param[1], (char*) &pQuery->fillVal[colIndex], pCtx->inputBytes, pCtx->inputType);
        }
1545 1546
      }
    }
H
Haojun Liao 已提交
1547 1548 1549
  } else if (functionId == TSDB_FUNC_TS_COMP) {
    pCtx->param[0].i64Key = vgId;
    pCtx->param[0].nType = TSDB_DATA_TYPE_BIGINT;
1550
  }
1551

1552 1553 1554 1555 1556 1557
#if defined(_DEBUG_VIEW)
  //  int64_t *tsList = (int64_t *)primaryColumnData;
//  int64_t  s = tsList[0];
//  int64_t  e = tsList[size - 1];

//    if (IS_DATA_BLOCK_LOADED(blockStatus)) {
1558
//        qDebug("QInfo:%p query ts:%lld-%lld, offset:%d, rows:%d, bstatus:%d,
1559 1560 1561
//        functId:%d", GET_QINFO_ADDR(pQuery),
//               s, e, startOffset, size, blockStatus, functionId);
//    } else {
1562
//        qDebug("QInfo:%p block not loaded, bstatus:%d",
1563 1564 1565 1566 1567 1568
//        GET_QINFO_ADDR(pQuery), blockStatus);
//    }
#endif
}

// set the output buffer for the selectivity + tag query
H
Haojun Liao 已提交
1569
static int32_t setCtxTagColumnInfo(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx) {
H
Haojun Liao 已提交
1570 1571
  SQuery* pQuery = pRuntimeEnv->pQuery;

1572
  if (isSelectivityWithTagsQuery(pQuery)) {
1573
    int32_t num = 0;
1574
    int16_t tagLen = 0;
1575

1576
    SQLFunctionCtx *p = NULL;
1577
    SQLFunctionCtx **pTagCtx = calloc(pQuery->numOfOutput, POINTER_BYTES);
H
Haojun Liao 已提交
1578 1579 1580
    if (pTagCtx == NULL) {
      return TSDB_CODE_QRY_OUT_OF_MEMORY;
    }
H
Haojun Liao 已提交
1581

1582
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1583
      SSqlFuncMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].base;
1584

1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597
      if (pSqlFuncMsg->functionId == TSDB_FUNC_TAG_DUMMY || pSqlFuncMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        tagLen += pCtx[i].outputBytes;
        pTagCtx[num++] = &pCtx[i];
      } else if ((aAggs[pSqlFuncMsg->functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
        p = &pCtx[i];
      } else if (pSqlFuncMsg->functionId == TSDB_FUNC_TS || pSqlFuncMsg->functionId == TSDB_FUNC_TAG) {
        // tag function may be the group by tag column
        // ts may be the required primary timestamp column
        continue;
      } else {
        // the column may be the normal column, group by normal_column, the functionId is TSDB_FUNC_PRJ
      }
    }
dengyihao's avatar
dengyihao 已提交
1598 1599 1600 1601 1602
    if (p != NULL) {
      p->tagInfo.pTagCtxList = pTagCtx;
      p->tagInfo.numOfTagCols = num;
      p->tagInfo.tagsLen = tagLen;
    } else {
1603
      taosTFree(pTagCtx);
dengyihao's avatar
dengyihao 已提交
1604
    }
1605
  }
H
Haojun Liao 已提交
1606 1607

  return TSDB_CODE_SUCCESS;
1608 1609
}

H
Haojun Liao 已提交
1610 1611 1612 1613 1614 1615 1616 1617 1618 1619
static FORCE_INLINE void setResultRowCellInfo(SQueryRuntimeEnv* pRuntimeEnv, SResultRow *pRow, char* buf) {
//  SQuery* pQuery = pRuntimeEnv->pQuery;
//
//  char* p = buf;
//  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
//    int32_t size = pQuery->pSelectExpr[i].interBytes;
//    SResultRowCellInfo* pInfo = getResultCell(pRuntimeEnv, pRow, i);
//    setResultInfoBuf(pInfo, p);
//    p += size;
//  }
1620 1621
}

1622
static int32_t setupQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv, int16_t order) {
1623
  qDebug("QInfo:%p setup runtime env", GET_QINFO_ADDR(pRuntimeEnv));
1624 1625
  SQuery *pQuery = pRuntimeEnv->pQuery;

H
Haojun Liao 已提交
1626
  size_t size = pRuntimeEnv->interBufSize + pQuery->numOfOutput * sizeof(SResultRowCellInfo);
H
Haojun Liao 已提交
1627 1628

  pRuntimeEnv->resultInfo = calloc(1, size);
1629
  pRuntimeEnv->pCtx = (SQLFunctionCtx *)calloc(pQuery->numOfOutput, sizeof(SQLFunctionCtx));
H
Haojun Liao 已提交
1630
  pRuntimeEnv->rowCellInfoOffset = calloc(pQuery->numOfOutput, sizeof(int32_t));
1631

H
Haojun Liao 已提交
1632
  if (pRuntimeEnv->resultInfo == NULL || pRuntimeEnv->pCtx == NULL || pRuntimeEnv->rowCellInfoOffset == NULL) {
1633
    goto _clean;
1634
  }
1635

1636
  pRuntimeEnv->offset[0] = 0;
1637
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1638
    SSqlFuncMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].base;
1639

1640
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1641
    SColIndex* pIndex = &pSqlFuncMsg->colInfo;
1642

Y
TD-1230  
yihaoDeng 已提交
1643
    if (TSDB_COL_REQ_NULL(pIndex->flag)) {
1644 1645
      pCtx->requireNull = true;
      pIndex->flag &= ~(TSDB_COL_NULL);
Y
TD-1230  
yihaoDeng 已提交
1646
    } else {
1647 1648
      pCtx->requireNull = false;
    }
Y
TD-1230  
yihaoDeng 已提交
1649

1650 1651
    int32_t index = pSqlFuncMsg->colInfo.colIndex;
    if (TSDB_COL_IS_TAG(pIndex->flag)) {
1652
      if (pIndex->colId == TSDB_TBNAME_COLUMN_INDEX) {  // todo refactor
H
Haojun Liao 已提交
1653 1654 1655 1656
        SSchema s = tGetTableNameColumnSchema();

        pCtx->inputBytes = s.bytes;
        pCtx->inputType = s.type;
1657 1658 1659 1660
      } else {
        pCtx->inputBytes = pQuery->tagColList[index].bytes;
        pCtx->inputType = pQuery->tagColList[index].type;
      }
H
Haojun Liao 已提交
1661 1662 1663
    } else if (TSDB_COL_IS_UD_COL(pIndex->flag)) {
      pCtx->inputBytes = pSqlFuncMsg->arg[0].argBytes;
      pCtx->inputType = pSqlFuncMsg->arg[0].argType;
1664 1665 1666 1667
    } else {
      pCtx->inputBytes = pQuery->colList[index].bytes;
      pCtx->inputType = pQuery->colList[index].type;
    }
1668

Y
TD-1230  
yihaoDeng 已提交
1669

1670
    assert(isValidDataType(pCtx->inputType));
1671
    pCtx->ptsOutputBuf = NULL;
1672

1673 1674
    pCtx->outputBytes = pQuery->pSelectExpr[i].bytes;
    pCtx->outputType = pQuery->pSelectExpr[i].type;
1675

1676 1677
    pCtx->order = pQuery->order.order;
    pCtx->functionId = pSqlFuncMsg->functionId;
H
Haojun Liao 已提交
1678 1679
    pCtx->stableQuery = pRuntimeEnv->stableQuery;
    pCtx->interBufBytes = pQuery->pSelectExpr[i].interBytes;
1680

1681 1682 1683 1684 1685
    pCtx->numOfParams = pSqlFuncMsg->numOfParams;
    for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
      int16_t type = pSqlFuncMsg->arg[j].argType;
      int16_t bytes = pSqlFuncMsg->arg[j].argBytes;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
1686
        tVariantCreateFromBinary(&pCtx->param[j], pSqlFuncMsg->arg[j].argValue.pz, bytes, type);
1687 1688 1689 1690
      } else {
        tVariantCreateFromBinary(&pCtx->param[j], (char *)&pSqlFuncMsg->arg[j].argValue.i64, bytes, type);
      }
    }
1691

1692 1693
    // set the order information for top/bottom query
    int32_t functionId = pCtx->functionId;
1694

1695
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
1696
      int32_t f = pQuery->pSelectExpr[0].base.functionId;
1697
      assert(f == TSDB_FUNC_TS || f == TSDB_FUNC_TS_DUMMY);
1698

1699 1700 1701 1702
      pCtx->param[2].i64Key = order;
      pCtx->param[2].nType = TSDB_DATA_TYPE_BIGINT;
      pCtx->param[3].i64Key = functionId;
      pCtx->param[3].nType = TSDB_DATA_TYPE_BIGINT;
1703

1704 1705
      pCtx->param[1].i64Key = pQuery->order.orderColId;
    }
1706

1707 1708
    if (i > 0) {
      pRuntimeEnv->offset[i] = pRuntimeEnv->offset[i - 1] + pRuntimeEnv->pCtx[i - 1].outputBytes;
H
Haojun Liao 已提交
1709
      pRuntimeEnv->rowCellInfoOffset[i] = pRuntimeEnv->rowCellInfoOffset[i - 1] + sizeof(SResultRowCellInfo) + pQuery->pSelectExpr[i - 1].interBytes;
1710
    }
H
Haojun Liao 已提交
1711

1712
  }
1713

H
Haojun Liao 已提交
1714
//  char* buf = (char*) pRuntimeEnv->resultInfo + sizeof(SResultRowCellInfo) * pQuery->numOfOutput;
H
Haojun Liao 已提交
1715

1716
  // set the intermediate result output buffer
H
Haojun Liao 已提交
1717
//  setResultRowCellInfo(pRuntimeEnv, pRuntimeEnv->resultInfo, NULL);
1718

1719
  // if it is group by normal column, do not set output buffer, the output buffer is pResult
1720
  if (!pRuntimeEnv->groupbyNormalCol && !pRuntimeEnv->stableQuery) {
1721 1722
    resetCtxOutputBuf(pRuntimeEnv);
  }
1723

H
Haojun Liao 已提交
1724 1725 1726
  if (setCtxTagColumnInfo(pRuntimeEnv, pRuntimeEnv->pCtx) != TSDB_CODE_SUCCESS) {
    goto _clean;
  }
1727

H
Haojun Liao 已提交
1728
  qDebug("QInfo:%p init runtime completed", GET_QINFO_ADDR(pRuntimeEnv));
1729
  return TSDB_CODE_SUCCESS;
1730

1731
_clean:
S
Shengliang Guan 已提交
1732 1733
  taosTFree(pRuntimeEnv->resultInfo);
  taosTFree(pRuntimeEnv->pCtx);
1734

1735
  return TSDB_CODE_QRY_OUT_OF_MEMORY;
1736 1737 1738 1739 1740 1741
}

static void teardownQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv) {
  if (pRuntimeEnv->pQuery == NULL) {
    return;
  }
1742

1743
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1744
  SQInfo* pQInfo = (SQInfo*) GET_QINFO_ADDR(pRuntimeEnv);
1745

1746
  qDebug("QInfo:%p teardown runtime env", pQInfo);
H
Haojun Liao 已提交
1747
  cleanupTimeWindowInfo(&pRuntimeEnv->windowResInfo);
1748

1749
  if (pRuntimeEnv->pCtx != NULL) {
1750
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1751
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1752

1753 1754 1755
      for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
        tVariantDestroy(&pCtx->param[j]);
      }
1756

1757
      tVariantDestroy(&pCtx->tag);
S
Shengliang Guan 已提交
1758
      taosTFree(pCtx->tagInfo.pTagCtxList);
1759
    }
1760

S
Shengliang Guan 已提交
1761 1762
    taosTFree(pRuntimeEnv->resultInfo);
    taosTFree(pRuntimeEnv->pCtx);
1763
  }
1764

1765
  pRuntimeEnv->pFillInfo = taosDestroyFillInfo(pRuntimeEnv->pFillInfo);
1766

H
Haojun Liao 已提交
1767
  destroyResultBuf(pRuntimeEnv->pResultBuf);
1768
  tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
1769
  tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
1770

H
Haojun Liao 已提交
1771
  pRuntimeEnv->pTSBuf = tsBufDestroy(pRuntimeEnv->pTSBuf);
H
Haojun Liao 已提交
1772 1773 1774 1775
  taosTFree(pRuntimeEnv->keyBuf);

  taosHashCleanup(pRuntimeEnv->pWindowHashTable);
  pRuntimeEnv->pWindowHashTable = NULL;
1776 1777

  pRuntimeEnv->pool = destroyWindowResultPool(pRuntimeEnv->pool);
1778 1779
}

H
Haojun Liao 已提交
1780
#define IS_QUERY_KILLED(_q) ((_q)->code == TSDB_CODE_TSC_QUERY_CANCELLED)
1781

H
Haojun Liao 已提交
1782
static void setQueryKilled(SQInfo *pQInfo) { pQInfo->code = TSDB_CODE_TSC_QUERY_CANCELLED;}
H
hjxilinx 已提交
1783

H
Haojun Liao 已提交
1784 1785 1786
static bool isFixedOutputQuery(SQueryRuntimeEnv* pRuntimeEnv) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1787 1788
    return false;
  }
1789

1790
  // Note:top/bottom query is fixed output query
H
Haojun Liao 已提交
1791
  if (pRuntimeEnv->topBotQuery || pRuntimeEnv->groupbyNormalCol) {
1792 1793
    return true;
  }
1794

1795
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1796
    SSqlFuncMsg *pExprMsg = &pQuery->pSelectExpr[i].base;
1797

1798 1799
    // ignore the ts_comp function
    if (i == 0 && pExprMsg->functionId == TSDB_FUNC_PRJ && pExprMsg->numOfParams == 1 &&
1800
        pExprMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
1801 1802
      continue;
    }
1803

1804 1805 1806
    if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
1807

1808 1809 1810 1811
    if (!IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus)) {
      return true;
    }
  }
1812

1813 1814 1815
  return false;
}

1816
// todo refactor with isLastRowQuery
H
hjxilinx 已提交
1817
static bool isPointInterpoQuery(SQuery *pQuery) {
1818
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1819
    int32_t functionID = pQuery->pSelectExpr[i].base.functionId;
1820
    if (functionID == TSDB_FUNC_INTERP) {
1821 1822 1823
      return true;
    }
  }
1824

1825 1826 1827 1828
  return false;
}

// TODO REFACTOR:MERGE WITH CLIENT-SIDE FUNCTION
H
hjxilinx 已提交
1829
static bool isSumAvgRateQuery(SQuery *pQuery) {
1830
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1831
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1832 1833 1834
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
1835

1836 1837 1838 1839 1840
    if (functionId == TSDB_FUNC_SUM_RATE || functionId == TSDB_FUNC_SUM_IRATE || functionId == TSDB_FUNC_AVG_RATE ||
        functionId == TSDB_FUNC_AVG_IRATE) {
      return true;
    }
  }
1841

1842 1843 1844
  return false;
}

H
hjxilinx 已提交
1845
static bool isFirstLastRowQuery(SQuery *pQuery) {
1846
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1847
    int32_t functionID = pQuery->pSelectExpr[i].base.functionId;
1848 1849 1850 1851
    if (functionID == TSDB_FUNC_LAST_ROW) {
      return true;
    }
  }
1852

1853 1854 1855
  return false;
}

H
hjxilinx 已提交
1856
static bool needReverseScan(SQuery *pQuery) {
1857
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1858
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1859 1860 1861
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG) {
      continue;
    }
1862

1863
    if ((functionId == TSDB_FUNC_FIRST || functionId == TSDB_FUNC_FIRST_DST) && !QUERY_IS_ASC_QUERY(pQuery)) {
1864 1865
      return true;
    }
1866 1867

    if (functionId == TSDB_FUNC_LAST || functionId == TSDB_FUNC_LAST_DST) {
H
Haojun Liao 已提交
1868
      // the scan order to acquire the last result of the specified column
S
TD-1057  
Shengliang Guan 已提交
1869
      int32_t order = (int32_t)pQuery->pSelectExpr[i].base.arg->argValue.i64;
H
Haojun Liao 已提交
1870 1871 1872
      if (order != pQuery->order.order) {
        return true;
      }
1873
    }
1874
  }
1875

1876 1877
  return false;
}
H
hjxilinx 已提交
1878

H
Haojun Liao 已提交
1879 1880 1881 1882
/**
 * The following 4 kinds of query are treated as the tags query
 * tagprj, tid_tag query, count(tbname), 'abc' (user defined constant value column) query
 */
H
hjxilinx 已提交
1883 1884
static bool onlyQueryTags(SQuery* pQuery) {
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1885 1886 1887
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];

    int32_t functionId = pExprInfo->base.functionId;
H
Haojun Liao 已提交
1888 1889 1890 1891

    if (functionId != TSDB_FUNC_TAGPRJ &&
        functionId != TSDB_FUNC_TID_TAG &&
        (!(functionId == TSDB_FUNC_COUNT && pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX)) &&
Y
TD-1230  
yihaoDeng 已提交
1892
        (!(functionId == TSDB_FUNC_PRJ && TSDB_COL_IS_UD_COL(pExprInfo->base.colInfo.flag)))) {
H
hjxilinx 已提交
1893 1894 1895
      return false;
    }
  }
1896

H
hjxilinx 已提交
1897 1898 1899
  return true;
}

1900 1901
/////////////////////////////////////////////////////////////////////////////////////////////

H
Haojun Liao 已提交
1902
void getAlignQueryTimeWindow(SQuery *pQuery, int64_t key, int64_t keyFirst, int64_t keyLast, STimeWindow *win) {
1903 1904
  assert(key >= keyFirst && key <= keyLast && pQuery->interval.sliding <= pQuery->interval.interval);
  win->skey = taosTimeTruncate(key, &pQuery->interval, pQuery->precision);
H
Haojun Liao 已提交
1905 1906

  /*
1907
   * if the realSkey > INT64_MAX - pQuery->interval.interval, the query duration between
H
Haojun Liao 已提交
1908 1909
   * realSkey and realEkey must be less than one interval.Therefore, no need to adjust the query ranges.
   */
1910 1911
  if (keyFirst > (INT64_MAX - pQuery->interval.interval)) {
    assert(keyLast - keyFirst < pQuery->interval.interval);
1912
    win->ekey = INT64_MAX;
1913 1914
  } else if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
    win->ekey = taosTimeAdd(win->skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
H
Haojun Liao 已提交
1915
  } else {
1916
    win->ekey = win->skey + pQuery->interval.interval - 1;
1917 1918 1919 1920 1921
  }
}

static void setScanLimitationByResultBuffer(SQuery *pQuery) {
  if (isTopBottomQuery(pQuery)) {
1922
    pQuery->checkBuffer = 0;
1923
  } else if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
1924
    pQuery->checkBuffer = 0;
1925 1926
  } else {
    bool hasMultioutput = false;
1927
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1928
      SSqlFuncMsg *pExprMsg = &pQuery->pSelectExpr[i].base;
1929 1930 1931
      if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        continue;
      }
1932

1933 1934 1935 1936 1937
      hasMultioutput = IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus);
      if (!hasMultioutput) {
        break;
      }
    }
1938

1939
    pQuery->checkBuffer = hasMultioutput ? 1 : 0;
1940 1941 1942 1943 1944 1945
  }
}

/*
 * todo add more parameters to check soon..
 */
1946
bool colIdCheck(SQuery *pQuery) {
1947 1948
  // load data column information is incorrect
  for (int32_t i = 0; i < pQuery->numOfCols - 1; ++i) {
1949
    if (pQuery->colList[i].colId == pQuery->colList[i + 1].colId) {
S
slguan 已提交
1950
      qError("QInfo:%p invalid data load column for query", GET_QINFO_ADDR(pQuery));
1951 1952 1953
      return false;
    }
  }
1954

1955 1956 1957 1958 1959 1960
  return true;
}

// todo ignore the avg/sum/min/max/count/stddev/top/bottom functions, of which
// the scan order is not matter
static bool onlyOneQueryType(SQuery *pQuery, int32_t functId, int32_t functIdDst) {
1961
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1962
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1963

1964 1965 1966 1967
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG ||
        functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
1968

1969 1970 1971 1972
    if (functionId != functId && functionId != functIdDst) {
      return false;
    }
  }
1973

1974 1975 1976 1977 1978 1979 1980
  return true;
}

static bool onlyFirstQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_FIRST, TSDB_FUNC_FIRST_DST); }

static bool onlyLastQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_LAST, TSDB_FUNC_LAST_DST); }

H
Haojun Liao 已提交
1981
// todo refactor, add iterator
1982 1983
static void doExchangeTimeWindow(SQInfo* pQInfo, STimeWindow* win) {
  size_t t = taosArrayGetSize(pQInfo->tableGroupInfo.pGroupList);
H
Haojun Liao 已提交
1984
  for(int32_t i = 0; i < t; ++i) {
1985
    SArray* p1 = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, i);
H
Haojun Liao 已提交
1986 1987 1988

    size_t len = taosArrayGetSize(p1);
    for(int32_t j = 0; j < len; ++j) {
1989
      STableKeyInfo* pInfo = taosArrayGet(p1, j);
H
Haojun Liao 已提交
1990

1991 1992 1993 1994
      // update the new lastkey if it is equalled to the value of the old skey
      if (pInfo->lastKey == win->ekey) {
        pInfo->lastKey = win->skey;
      }
H
Haojun Liao 已提交
1995 1996 1997 1998
    }
  }
}

1999
static void changeExecuteScanOrder(SQInfo *pQInfo, SQueryTableMsg* pQueryMsg, bool stableQuery) {
H
Haojun Liao 已提交
2000 2001
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;

2002 2003 2004
  // in case of point-interpolation query, use asc order scan
  char msg[] = "QInfo:%p scan order changed for %s query, old:%d, new:%d, qrange exchanged, old qrange:%" PRId64
               "-%" PRId64 ", new qrange:%" PRId64 "-%" PRId64;
2005

2006 2007
  // todo handle the case the the order irrelevant query type mixed up with order critical query type
  // descending order query for last_row query
H
Haojun Liao 已提交
2008
  if (isFirstLastRowQuery(pQuery)) {
H
Haojun Liao 已提交
2009
    qDebug("QInfo:%p scan order changed for last_row query, old:%d, new:%d", pQInfo, pQuery->order.order, TSDB_ORDER_ASC);
2010

H
Haojun Liao 已提交
2011
    pQuery->order.order = TSDB_ORDER_ASC;
H
Haojun Liao 已提交
2012 2013 2014
    if (pQuery->window.skey > pQuery->window.ekey) {
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
2015

2016 2017
    return;
  }
2018

H
Haojun Liao 已提交
2019
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr) && pQuery->order.order == TSDB_ORDER_DESC) {
H
Haojun Liao 已提交
2020
    pQuery->order.order = TSDB_ORDER_ASC;
H
Haojun Liao 已提交
2021 2022 2023
    if (pQuery->window.skey > pQuery->window.ekey) {
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
H
Haojun Liao 已提交
2024

2025
    doExchangeTimeWindow(pQInfo, &pQuery->window);
H
Haojun Liao 已提交
2026 2027 2028
    return;
  }

2029
  if (isPointInterpoQuery(pQuery) && pQuery->interval.interval == 0) {
H
Haojun Liao 已提交
2030 2031 2032 2033 2034
    if (!QUERY_IS_ASC_QUERY(pQuery)) {
      qDebug(msg, GET_QINFO_ADDR(pQuery), "interp", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
             pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
2035

2036
    pQuery->order.order = TSDB_ORDER_ASC;
2037 2038
    return;
  }
2039

2040
  if (pQuery->interval.interval == 0) {
2041 2042
    if (onlyFirstQuery(pQuery)) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
2043
        qDebug(msg, GET_QINFO_ADDR(pQuery), "only-first", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
2044 2045
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2046
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2047
        doExchangeTimeWindow(pQInfo, &pQuery->window);
2048
      }
2049

2050
      pQuery->order.order = TSDB_ORDER_ASC;
2051 2052
    } else if (onlyLastQuery(pQuery)) {
      if (QUERY_IS_ASC_QUERY(pQuery)) {
2053
        qDebug(msg, GET_QINFO_ADDR(pQuery), "only-last", pQuery->order.order, TSDB_ORDER_DESC, pQuery->window.skey,
2054 2055
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2056
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2057
        doExchangeTimeWindow(pQInfo, &pQuery->window);
2058
      }
2059

2060
      pQuery->order.order = TSDB_ORDER_DESC;
2061
    }
2062

2063
  } else {  // interval query
2064
    if (stableQuery) {
2065 2066
      if (onlyFirstQuery(pQuery)) {
        if (!QUERY_IS_ASC_QUERY(pQuery)) {
2067
          qDebug(msg, GET_QINFO_ADDR(pQuery), "only-first stable", pQuery->order.order, TSDB_ORDER_ASC,
2068 2069
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2070
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2071
          doExchangeTimeWindow(pQInfo, &pQuery->window);
2072
        }
2073

2074
        pQuery->order.order = TSDB_ORDER_ASC;
2075 2076
      } else if (onlyLastQuery(pQuery)) {
        if (QUERY_IS_ASC_QUERY(pQuery)) {
2077
          qDebug(msg, GET_QINFO_ADDR(pQuery), "only-last stable", pQuery->order.order, TSDB_ORDER_DESC,
2078 2079
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2080
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2081
          doExchangeTimeWindow(pQInfo, &pQuery->window);
2082
        }
2083

2084
        pQuery->order.order = TSDB_ORDER_DESC;
2085 2086 2087 2088 2089 2090 2091 2092
      }
    }
  }
}

static int32_t getInitialPageNum(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  int32_t INITIAL_RESULT_ROWS_VALUE = 16;
2093

2094
  int32_t num = 0;
2095

2096 2097
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
    num = 128;
H
Haojun Liao 已提交
2098
  } else if (QUERY_IS_INTERVAL_QUERY(pQuery)) {  // time window query, allocate one page for each table
2099
    size_t s = pQInfo->tableqinfoGroupInfo.numOfTables;
S
TD-1057  
Shengliang Guan 已提交
2100
    num = (int32_t)(MAX(s, INITIAL_RESULT_ROWS_VALUE));
2101 2102
  } else {    // for super table query, one page for each subset
    num = 1;  // pQInfo->pSidSet->numOfSubSet;
2103
  }
2104

2105 2106 2107 2108
  assert(num > 0);
  return num;
}

2109 2110
static void getIntermediateBufInfo(SQueryRuntimeEnv* pRuntimeEnv, int32_t* ps, int32_t* rowsize) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
2111
  int32_t MIN_ROWS_PER_PAGE = 4;
2112

S
TD-1057  
Shengliang Guan 已提交
2113
  *rowsize = (int32_t)(pQuery->rowSize * GET_ROW_PARAM_FOR_MULTIOUTPUT(pQuery, pRuntimeEnv->topBotQuery, pRuntimeEnv->stableQuery));
2114 2115 2116 2117
  int32_t overhead = sizeof(tFilePage);

  // one page contains at least two rows
  *ps = DEFAULT_INTERN_BUF_PAGE_SIZE;
H
Haojun Liao 已提交
2118
  while(((*rowsize) * MIN_ROWS_PER_PAGE) > (*ps) - overhead) {
2119 2120 2121 2122
    *ps = (*ps << 1u);
  }

  pRuntimeEnv->numOfRowsPerPage = ((*ps) - sizeof(tFilePage)) / (*rowsize);
2123
  assert(pRuntimeEnv->numOfRowsPerPage <= MAX_ROWS_PER_RESBUF_PAGE);
2124 2125
}

H
Haojun Liao 已提交
2126
#define IS_PREFILTER_TYPE(_t) ((_t) != TSDB_DATA_TYPE_BINARY && (_t) != TSDB_DATA_TYPE_NCHAR)
2127

H
Haojun Liao 已提交
2128 2129 2130 2131
static bool needToLoadDataBlock(SQueryRuntimeEnv* pRuntimeEnv, SDataStatis *pDataStatis, SQLFunctionCtx *pCtx,
    int32_t numOfRows) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
  if (pDataStatis == NULL || (pQuery->numOfFilterCols == 0 && (!pRuntimeEnv->topBotQuery))) {
2132 2133 2134 2135 2136
    return true;
  }

  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
2137

H
Haojun Liao 已提交
2138 2139 2140 2141 2142 2143 2144 2145
    int32_t index = -1;
    for(int32_t i = 0; i < pQuery->numOfCols; ++i) {
      if (pDataStatis[i].colId == pFilterInfo->info.colId) {
        index = i;
        break;
      }
    }

2146
    // no statistics data, load the true data block
H
Haojun Liao 已提交
2147
    if (index == -1) {
H
Haojun Liao 已提交
2148
      return true;
2149
    }
2150

2151
    // not support pre-filter operation on binary/nchar data type
H
Haojun Liao 已提交
2152
    if (!IS_PREFILTER_TYPE(pFilterInfo->info.type)) {
H
Haojun Liao 已提交
2153
      return true;
2154
    }
2155

2156
    // all data in current column are NULL, no need to check its boundary value
H
Haojun Liao 已提交
2157
    if (pDataStatis[index].numOfNull == numOfRows) {
2158 2159 2160 2161 2162 2163 2164 2165 2166

      // if isNULL query exists, load the null data column
      for (int32_t j = 0; j < pFilterInfo->numOfFilters; ++j) {
        SColumnFilterElem *pFilterElem = &pFilterInfo->pFilters[j];
        if (pFilterElem->fp == isNull_filter) {
          return true;
        }
      }

2167 2168
      continue;
    }
2169

H
Haojun Liao 已提交
2170 2171 2172
    SDataStatis* pDataBlockst = &pDataStatis[index];

    if (pFilterInfo->info.type == TSDB_DATA_TYPE_FLOAT) {
S
TD-1057  
Shengliang Guan 已提交
2173 2174
      float minval = (float)(*(double *)(&pDataBlockst->min));
      float maxval = (float)(*(double *)(&pDataBlockst->max));
2175

2176 2177 2178 2179 2180 2181 2182
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&minval, (char *)&maxval)) {
          return true;
        }
      }
    } else {
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
H
Haojun Liao 已提交
2183
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&pDataBlockst->min, (char *)&pDataBlockst->max)) {
2184 2185 2186 2187 2188
          return true;
        }
      }
    }
  }
2189

H
Haojun Liao 已提交
2190 2191 2192 2193 2194 2195 2196 2197
  if (pRuntimeEnv->topBotQuery) {
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
      int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
      if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
        return topbot_datablock_filter(&pCtx[i], functionId, (char *)&pDataStatis[i].min, (char *)&pDataStatis[i].max);
      }
    }
  }
2198

H
Haojun Liao 已提交
2199
  return false;
2200 2201
}

H
Haojun Liao 已提交
2202 2203 2204 2205 2206 2207 2208 2209
static bool overlapWithTimeWindow(SQuery* pQuery, SDataBlockInfo* pBlockInfo) {
  STimeWindow w = {0};

  TSKEY sk = MIN(pQuery->window.skey, pQuery->window.ekey);
  TSKEY ek = MAX(pQuery->window.skey, pQuery->window.ekey);

  if (QUERY_IS_ASC_QUERY(pQuery)) {
    getAlignQueryTimeWindow(pQuery, pBlockInfo->window.skey, sk, ek, &w);
H
Haojun Liao 已提交
2210
    assert(w.ekey >= pBlockInfo->window.skey);
H
Haojun Liao 已提交
2211

H
Haojun Liao 已提交
2212
    if (w.ekey < pBlockInfo->window.ekey) {
H
Haojun Liao 已提交
2213 2214 2215 2216 2217
      return true;
    }

    while(1) {
      GET_NEXT_TIMEWINDOW(pQuery, &w);
H
Haojun Liao 已提交
2218
      if (w.skey > pBlockInfo->window.ekey) {
H
Haojun Liao 已提交
2219 2220 2221
        break;
      }

H
Haojun Liao 已提交
2222 2223
      assert(w.ekey > pBlockInfo->window.ekey);
      if (w.skey <= pBlockInfo->window.ekey && w.skey > pBlockInfo->window.skey) {
H
Haojun Liao 已提交
2224 2225 2226 2227 2228
        return true;
      }
    }
  } else {
    getAlignQueryTimeWindow(pQuery, pBlockInfo->window.ekey, sk, ek, &w);
H
Haojun Liao 已提交
2229 2230 2231
    assert(w.skey <= pBlockInfo->window.ekey);

    if (w.skey > pBlockInfo->window.skey) {
H
Haojun Liao 已提交
2232 2233 2234 2235 2236 2237 2238 2239 2240
      return true;
    }

    while(1) {
      GET_NEXT_TIMEWINDOW(pQuery, &w);
      if (w.ekey < pBlockInfo->window.skey) {
        break;
      }

H
Haojun Liao 已提交
2241 2242
      assert(w.skey < pBlockInfo->window.skey);
      if (w.ekey < pBlockInfo->window.ekey && w.ekey >= pBlockInfo->window.skey) {
H
Haojun Liao 已提交
2243 2244 2245 2246 2247 2248 2249 2250
        return true;
      }
    }
  }

  return false;
}

H
Haojun Liao 已提交
2251
int32_t loadDataBlockOnDemand(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo * pWindowResInfo, void* pQueryHandle, SDataBlockInfo* pBlockInfo, SDataStatis **pStatis, SArray** pDataBlock, uint32_t* status) {
2252
  SQuery *pQuery = pRuntimeEnv->pQuery;
2253

H
Haojun Liao 已提交
2254 2255
  *status = BLK_DATA_NO_NEEDED;

H
Haojun Liao 已提交
2256
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf > 0) {
2257
    *status = BLK_DATA_ALL_NEEDED;
H
Haojun Liao 已提交
2258
  } else { // check if this data block is required to load
H
Haojun Liao 已提交
2259

H
Haojun Liao 已提交
2260
    // Calculate all time windows that are overlapping or contain current data block.
2261
    // If current data block is contained by all possible time window, do not load current data block.
H
Haojun Liao 已提交
2262
    if (QUERY_IS_INTERVAL_QUERY(pQuery) && overlapWithTimeWindow(pQuery, pBlockInfo)) {
2263
      *status = BLK_DATA_ALL_NEEDED;
2264
    }
2265

2266
    if ((*status) != BLK_DATA_ALL_NEEDED) {
H
Haojun Liao 已提交
2267 2268 2269 2270 2271 2272 2273 2274 2275
      // the pCtx[i] result is belonged to previous time window since the outputBuf has not been set yet,
      // the filter result may be incorrect. So in case of interval query, we need to set the correct time output buffer
      if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
        bool hasTimeWindow = false;
        bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);

        TSKEY k = QUERY_IS_ASC_QUERY(pQuery)? pBlockInfo->window.skey:pBlockInfo->window.ekey;

        STimeWindow win = getActiveTimeWindow(pWindowResInfo, k, pQuery);
H
Haojun Liao 已提交
2276
        if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pBlockInfo, &win, masterScan, &hasTimeWindow) !=
H
Haojun Liao 已提交
2277 2278 2279 2280 2281
            TSDB_CODE_SUCCESS) {
          // todo handle error in set result for timewindow
        }
      }

H
Haojun Liao 已提交
2282 2283 2284 2285 2286
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        SSqlFuncMsg* pSqlFunc = &pQuery->pSelectExpr[i].base;

        int32_t functionId = pSqlFunc->functionId;
        int32_t colId = pSqlFunc->colInfo.colId;
2287 2288
        (*status) |= aAggs[functionId].dataReqFunc(&pRuntimeEnv->pCtx[i], pBlockInfo->window.skey, pBlockInfo->window.ekey, colId);
        if (((*status) & BLK_DATA_ALL_NEEDED) == BLK_DATA_ALL_NEEDED) {
H
Haojun Liao 已提交
2289 2290 2291
          break;
        }
      }
2292 2293
    }
  }
2294

2295
  if ((*status) == BLK_DATA_NO_NEEDED) {
H
Haojun Liao 已提交
2296 2297
    qDebug("QInfo:%p data block discard, brange:%"PRId64 "-%"PRId64", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
           pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
2298
    pRuntimeEnv->summary.discardBlocks += 1;
2299 2300 2301 2302
  } else if ((*status) == BLK_DATA_STATIS_NEEDED) {

    // this function never returns error?
    tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis);
2303

2304
    pRuntimeEnv->summary.loadBlockStatis += 1;
2305

2306
    if (*pStatis == NULL) { // data block statistics does not exist, load data block
H
Haojun Liao 已提交
2307
      *pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2308
      pRuntimeEnv->summary.totalCheckedRows += pBlockInfo->rows;
2309 2310
    }
  } else {
2311
    assert((*status) == BLK_DATA_ALL_NEEDED);
2312

2313
    // load the data block statistics to perform further filter
H
Haojun Liao 已提交
2314
    pRuntimeEnv->summary.loadBlockStatis += 1;
2315
    tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis);
2316

H
Haojun Liao 已提交
2317
    if (!needToLoadDataBlock(pRuntimeEnv, *pStatis, pRuntimeEnv->pCtx, pBlockInfo->rows)) {
2318 2319
      // current block has been discard due to filter applied
      pRuntimeEnv->summary.discardBlocks += 1;
H
Haojun Liao 已提交
2320 2321
      qDebug("QInfo:%p data block discard, brange:%"PRId64 "-%"PRId64", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
          pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
2322
      (*status) = BLK_DATA_DISCARD;
2323
    }
2324

2325
    pRuntimeEnv->summary.totalCheckedRows += pBlockInfo->rows;
H
Haojun Liao 已提交
2326
    pRuntimeEnv->summary.loadBlocks += 1;
H
Haojun Liao 已提交
2327
    *pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2328 2329 2330
    if (*pDataBlock == NULL) {
      return terrno;
    }
2331
  }
2332

H
Haojun Liao 已提交
2333
  return TSDB_CODE_SUCCESS;
2334 2335
}

H
hjxilinx 已提交
2336
int32_t binarySearchForKey(char *pValue, int num, TSKEY key, int order) {
2337
  int32_t midPos = -1;
H
Haojun Liao 已提交
2338
  int32_t numOfRows;
2339

2340 2341 2342
  if (num <= 0) {
    return -1;
  }
2343

2344
  assert(order == TSDB_ORDER_ASC || order == TSDB_ORDER_DESC);
2345 2346

  TSKEY * keyList = (TSKEY *)pValue;
2347
  int32_t firstPos = 0;
2348
  int32_t lastPos = num - 1;
2349

2350
  if (order == TSDB_ORDER_DESC) {
H
hjxilinx 已提交
2351 2352 2353 2354 2355
    // find the first position which is smaller than the key
    while (1) {
      if (key >= keyList[lastPos]) return lastPos;
      if (key == keyList[firstPos]) return firstPos;
      if (key < keyList[firstPos]) return firstPos - 1;
2356

H
Haojun Liao 已提交
2357 2358
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2359

H
hjxilinx 已提交
2360 2361 2362 2363 2364 2365 2366 2367
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
2368

H
hjxilinx 已提交
2369 2370 2371 2372 2373
  } else {
    // find the first position which is bigger than the key
    while (1) {
      if (key <= keyList[firstPos]) return firstPos;
      if (key == keyList[lastPos]) return lastPos;
2374

H
hjxilinx 已提交
2375 2376 2377 2378 2379 2380 2381
      if (key > keyList[lastPos]) {
        lastPos = lastPos + 1;
        if (lastPos >= num)
          return -1;
        else
          return lastPos;
      }
2382

H
Haojun Liao 已提交
2383 2384
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2385

H
hjxilinx 已提交
2386 2387 2388 2389 2390 2391 2392 2393 2394
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
  }
2395

H
hjxilinx 已提交
2396 2397 2398
  return midPos;
}

2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411
static void ensureOutputBufferSimple(SQueryRuntimeEnv* pRuntimeEnv, int32_t capacity) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  if (capacity < pQuery->rec.capacity) {
    return;
  }

  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    int32_t bytes = pQuery->pSelectExpr[i].bytes;
    assert(bytes > 0 && capacity > 0);

    char *tmp = realloc(pQuery->sdata[i], bytes * capacity + sizeof(tFilePage));
    if (tmp == NULL) {  // todo handle the oom
H
Haojun Liao 已提交
2412
      longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
2413 2414 2415 2416 2417 2418 2419 2420
    } else {
      pQuery->sdata[i] = (tFilePage *)tmp;
    }

    // set the pCtx output buffer position
    pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data;
  }

2421
  qDebug("QInfo:%p realloc output buffer to inc output buffer from: %" PRId64 " rows to:%d rows", GET_QINFO_ADDR(pRuntimeEnv),
2422 2423 2424 2425 2426
         pQuery->rec.capacity, capacity);

  pQuery->rec.capacity = capacity;
}

2427 2428 2429
static void ensureOutputBuffer(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pBlockInfo) {
  // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
  SQuery* pQuery = pRuntimeEnv->pQuery;
2430
  if (!QUERY_IS_INTERVAL_QUERY(pQuery) && !pRuntimeEnv->groupbyNormalCol && !isFixedOutputQuery(pRuntimeEnv) && !isTSCompQuery(pQuery)) {
2431
    SResultRec *pRec = &pQuery->rec;
2432

2433
    if (pQuery->rec.capacity - pQuery->rec.rows < pBlockInfo->rows) {
S
TD-1057  
Shengliang Guan 已提交
2434 2435
      int32_t remain = (int32_t)(pRec->capacity - pRec->rows);
      int32_t newSize = (int32_t)(pRec->capacity + (pBlockInfo->rows - remain));
2436

2437 2438
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        int32_t bytes = pQuery->pSelectExpr[i].bytes;
H
Haojun Liao 已提交
2439 2440
        assert(bytes > 0 && newSize > 0);

2441 2442
        char *tmp = realloc(pQuery->sdata[i], bytes * newSize + sizeof(tFilePage));
        if (tmp == NULL) {  // todo handle the oom
H
Haojun Liao 已提交
2443
          longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
2444
        } else {
2445
          memset(tmp + sizeof(tFilePage) + bytes * pRec->rows, 0, (size_t)((newSize - pRec->rows) * bytes));
2446 2447
          pQuery->sdata[i] = (tFilePage *)tmp;
        }
H
Haojun Liao 已提交
2448

2449 2450
        // set the pCtx output buffer position
        pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data + pRec->rows * bytes;
H
Haojun Liao 已提交
2451

2452 2453 2454 2455 2456
        int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
        if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
          pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
        }
      }
H
Haojun Liao 已提交
2457

2458
      qDebug("QInfo:%p realloc output buffer, new size: %d rows, old:%" PRId64 ", remain:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv),
2459
             newSize, pRec->capacity, newSize - pRec->rows);
2460

2461 2462 2463 2464 2465
      pRec->capacity = newSize;
    }
  }
}

2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486
static void doSetInitialTimewindow(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pBlockInfo) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  if (QUERY_IS_INTERVAL_QUERY(pQuery) && pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL) {
    STimeWindow w = TSWINDOW_INITIALIZER;
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;

    if (QUERY_IS_ASC_QUERY(pQuery)) {
      getAlignQueryTimeWindow(pQuery, pBlockInfo->window.skey, pBlockInfo->window.skey, pQuery->window.ekey, &w);
      pWindowResInfo->startTime = w.skey;
      pWindowResInfo->prevSKey = w.skey;
    } else {
      // the start position of the first time window in the endpoint that spreads beyond the queried last timestamp
      getAlignQueryTimeWindow(pQuery, pBlockInfo->window.ekey, pQuery->window.ekey, pBlockInfo->window.ekey, &w);

      pWindowResInfo->startTime = pQuery->window.skey;
      pWindowResInfo->prevSKey = w.skey;
    }
  }
}

2487 2488
static int64_t doScanAllDataBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
2489
  STableQueryInfo* pTableQueryInfo = pQuery->current;
H
Haojun Liao 已提交
2490
  SQueryCostInfo*  summary  = &pRuntimeEnv->summary;
2491

2492
  qDebug("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", lastkey:%" PRId64 ", order:%d",
H
hjxilinx 已提交
2493 2494
         GET_QINFO_ADDR(pRuntimeEnv), pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, pTableQueryInfo->lastKey,
         pQuery->order.order);
2495

2496
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
H
Haojun Liao 已提交
2497
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
2498

H
Haojun Liao 已提交
2499
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
2500
  while (tsdbNextDataBlock(pQueryHandle)) {
H
Haojun Liao 已提交
2501
    summary->totalBlocks += 1;
H
Haojun Liao 已提交
2502

H
Haojun Liao 已提交
2503
    if (IS_QUERY_KILLED(GET_QINFO_ADDR(pRuntimeEnv))) {
H
Haojun Liao 已提交
2504
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
2505
    }
2506

H
Haojun Liao 已提交
2507
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
2508
    doSetInitialTimewindow(pRuntimeEnv, &blockInfo);
2509

H
hjxilinx 已提交
2510
    // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
2511
    ensureOutputBuffer(pRuntimeEnv, &blockInfo);
2512

2513
    SDataStatis *pStatis = NULL;
2514 2515
    SArray *     pDataBlock = NULL;
    uint32_t     status = 0;
2516

H
Haojun Liao 已提交
2517
    int32_t ret = loadDataBlockOnDemand(pRuntimeEnv, &pRuntimeEnv->windowResInfo, pQueryHandle, &blockInfo, &pStatis, &pDataBlock, &status);
2518
    if (ret != TSDB_CODE_SUCCESS) {
2519 2520 2521
      break;
    }

2522 2523 2524 2525 2526 2527
    if (status == BLK_DATA_DISCARD) {
      pQuery->current->lastKey =
              QUERY_IS_ASC_QUERY(pQuery) ? blockInfo.window.ekey + step : blockInfo.window.skey + step;
      continue;
    }

H
Haojun Liao 已提交
2528 2529
    // query start position can not move into tableApplyFunctionsOnBlock due to limit/offset condition
    pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : blockInfo.rows - 1;
H
hjxilinx 已提交
2530
    int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, binarySearchForKey, pDataBlock);
2531

H
Haojun Liao 已提交
2532
    summary->totalRows += blockInfo.rows;
2533
    qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
2534
           blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes, pQuery->current->lastKey);
2535

2536 2537
    // while the output buffer is full or limit/offset is applied, query may be paused here
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL | QUERY_COMPLETED)) {
H
hjxilinx 已提交
2538
      break;
2539 2540
    }
  }
2541

H
Haojun Liao 已提交
2542 2543 2544 2545
  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }

H
hjxilinx 已提交
2546
  // if the result buffer is not full, set the query complete
2547 2548 2549
  if (!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
2550

H
Haojun Liao 已提交
2551
  if (QUERY_IS_INTERVAL_QUERY(pQuery) && IS_MASTER_SCAN(pRuntimeEnv)) {
H
hjxilinx 已提交
2552
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
2553
      closeAllTimeWindow(&pRuntimeEnv->windowResInfo);
H
hjxilinx 已提交
2554
      pRuntimeEnv->windowResInfo.curIndex = pRuntimeEnv->windowResInfo.size - 1;  // point to the last time window
2555 2556 2557 2558
    } else {
      assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
    }
  }
2559

2560
  return 0;
2561 2562 2563 2564 2565 2566
}

/*
 * set tag value in SQLFunctionCtx
 * e.g.,tag information into input buffer
 */
2567
static void doSetTagValueInParam(void *tsdb, void* pTable, int32_t tagColId, tVariant *tag, int16_t type, int16_t bytes) {
H
[td-90]  
Haojun Liao 已提交
2568
  tVariantDestroy(tag);
2569

2570
  if (tagColId == TSDB_TBNAME_COLUMN_INDEX) {
2571
    char* val = tsdbGetTableName(pTable);
H
[td-90]  
Haojun Liao 已提交
2572
    assert(val != NULL);
2573

H
[td-90]  
Haojun Liao 已提交
2574
    tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), TSDB_DATA_TYPE_BINARY);
2575
  } else {
2576
    char* val = tsdbGetTableTagVal(pTable, tagColId, type, bytes);
H
[td-90]  
Haojun Liao 已提交
2577 2578 2579 2580
    if (val == NULL) {
      tag->nType = TSDB_DATA_TYPE_NULL;
      return;
    }
2581

H
hjxilinx 已提交
2582
    if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
H
Hongze Cheng 已提交
2583
      if (isNull(val, type)) {
H
Haojun Liao 已提交
2584 2585 2586 2587
        tag->nType = TSDB_DATA_TYPE_NULL;
        return;
      }

H
[td-90]  
Haojun Liao 已提交
2588
      tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), type);
H
hjxilinx 已提交
2589
    } else {
H
Haojun Liao 已提交
2590 2591 2592 2593 2594
      if (isNull(val, type)) {
        tag->nType = TSDB_DATA_TYPE_NULL;
        return;
      }

H
[td-90]  
Haojun Liao 已提交
2595
      tVariantCreateFromBinary(tag, val, bytes, type);
H
hjxilinx 已提交
2596
    }
2597
  }
2598 2599
}

2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611
static SColumnInfo* doGetTagColumnInfoById(SColumnInfo* pTagColList, int32_t numOfTags, int16_t colId) {
  assert(pTagColList != NULL && numOfTags > 0);

  for(int32_t i = 0; i < numOfTags; ++i) {
    if (pTagColList[i].colId == colId) {
      return &pTagColList[i];
    }
  }

  return NULL;
}

2612
void setTagVal(SQueryRuntimeEnv *pRuntimeEnv, void *pTable, void *tsdb) {
2613
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
2614
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
2615

H
[td-90]  
Haojun Liao 已提交
2616 2617 2618
  SExprInfo *pExprInfo = &pQuery->pSelectExpr[0];
  if (pQuery->numOfOutput == 1 && pExprInfo->base.functionId == TSDB_FUNC_TS_COMP) {
    assert(pExprInfo->base.numOfParams == 1);
H
Haojun Liao 已提交
2619

S
TD-1057  
Shengliang Guan 已提交
2620
    int16_t tagColId = (int16_t)pExprInfo->base.arg->argValue.i64;
2621
    SColumnInfo* pColInfo = doGetTagColumnInfoById(pQuery->tagColList, pQuery->numOfTags, tagColId);
H
Haojun Liao 已提交
2622

2623
    doSetTagValueInParam(tsdb, pTable, tagColId, &pRuntimeEnv->pCtx[0].tag, pColInfo->type, pColInfo->bytes);
2624 2625
  } else {
    // set tag value, by which the results are aggregated.
2626
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
H
Haojun Liao 已提交
2627
      SExprInfo* pLocalExprInfo = &pQuery->pSelectExpr[idx];
2628

2629
      // ts_comp column required the tag value for join filter
H
Haojun Liao 已提交
2630
      if (!TSDB_COL_IS_TAG(pLocalExprInfo->base.colInfo.flag)) {
2631 2632
        continue;
      }
2633

2634
      // todo use tag column index to optimize performance
2635
      doSetTagValueInParam(tsdb, pTable, pLocalExprInfo->base.colInfo.colId, &pRuntimeEnv->pCtx[idx].tag,
H
Haojun Liao 已提交
2636
                           pLocalExprInfo->type, pLocalExprInfo->bytes);
2637
    }
2638

2639
    // set the join tag for first column
H
[td-90]  
Haojun Liao 已提交
2640
    SSqlFuncMsg *pFuncMsg = &pExprInfo->base;
2641 2642
    if ((pFuncMsg->functionId == TSDB_FUNC_TS || pFuncMsg->functionId == TSDB_FUNC_PRJ) && pRuntimeEnv->pTSBuf != NULL &&
        pFuncMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
2643
      assert(pFuncMsg->numOfParams == 1);
H
Haojun Liao 已提交
2644

2645 2646
      int16_t      tagColId = (int16_t)pExprInfo->base.arg->argValue.i64;
      SColumnInfo *pColInfo = doGetTagColumnInfoById(pQuery->tagColList, pQuery->numOfTags, tagColId);
H
Haojun Liao 已提交
2647

2648
      doSetTagValueInParam(tsdb, pTable, tagColId, &pRuntimeEnv->pCtx[0].tag, pColInfo->type, pColInfo->bytes);
H
Haojun Liao 已提交
2649

2650 2651 2652 2653 2654 2655 2656 2657
      int16_t tagType = pRuntimeEnv->pCtx[0].tag.nType;
      if (tagType == TSDB_DATA_TYPE_BINARY || tagType == TSDB_DATA_TYPE_NCHAR) {
        qDebug("QInfo:%p set tag value for join comparison, colId:%" PRId64 ", val:%s", pQInfo,
               pExprInfo->base.arg->argValue.i64, pRuntimeEnv->pCtx[0].tag.pz);
      } else {
        qDebug("QInfo:%p set tag value for join comparison, colId:%" PRId64 ", val:%" PRId64, pQInfo,
               pExprInfo->base.arg->argValue.i64, pRuntimeEnv->pCtx[0].tag.i64Key);
      }
2658 2659 2660 2661
    }
  }
}

H
Haojun Liao 已提交
2662
static void doMerge(SQueryRuntimeEnv *pRuntimeEnv, int64_t timestamp, SResultRow *pWindowRes, bool mergeFlag) {
2663 2664
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
2665

2666
  tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes->pageId);
H
Haojun Liao 已提交
2667

2668
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2669
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2670 2671 2672
    if (!mergeFlag) {
      pCtx[i].aOutputBuf = pCtx[i].aOutputBuf + pCtx[i].outputBytes;
      pCtx[i].currentStage = FIRST_STAGE_MERGE;
2673

2674
      RESET_RESULT_INFO(pCtx[i].resultInfo);
2675 2676
      aAggs[functionId].init(&pCtx[i]);
    }
2677

2678 2679
    pCtx[i].hasNull = true;
    pCtx[i].nStartQueryTimestamp = timestamp;
H
Haojun Liao 已提交
2680
    pCtx[i].aInputElemBuf = getPosInResultPage(pRuntimeEnv, i, pWindowRes, page);
2681

2682 2683 2684
    // in case of tag column, the tag information should be extracted from input buffer
    if (functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TAG) {
      tVariantDestroy(&pCtx[i].tag);
2685

2686 2687 2688 2689 2690 2691
      int32_t type = pCtx[i].outputType;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        tVariantCreateFromBinary(&pCtx[i].tag, varDataVal(pCtx[i].aInputElemBuf), varDataLen(pCtx[i].aInputElemBuf), type);
      } else {
        tVariantCreateFromBinary(&pCtx[i].tag, pCtx[i].aInputElemBuf, pCtx[i].inputBytes, pCtx[i].inputType);
      }
2692

2693 2694
    }
  }
2695

2696
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2697
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2698 2699 2700
    if (functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
2701

2702 2703 2704 2705
    aAggs[functionId].distMergeFunc(&pCtx[i]);
  }
}

2706
static UNUSED_FUNC void printBinaryData(int32_t functionId, char *data, int32_t srcDataType) {
2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_LAST_DST) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_BINARY:
        printf("%" PRId64 ",%s\t", *(TSKEY *)data, (data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int8_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int16_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 ",%" PRId64 "\t", *(TSKEY *)data, *(TSKEY *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int32_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%" PRId64 ",%f\t", *(TSKEY *)data, *(float *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%" PRId64 ",%lf\t", *(TSKEY *)data, *(double *)(data + TSDB_KEYSIZE + 1));
        break;
    }
  } else if (functionId == TSDB_FUNC_AVG) {
    printf("%lf,%d\t", *(double *)data, *(int32_t *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_SPREAD) {
    printf("%lf,%lf\t", *(double *)data, *(double *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_TWA) {
    data += 1;
    printf("%lf,%" PRId64 ",%" PRId64 ",%" PRId64 "\t", *(double *)data, *(int64_t *)(data + 8),
           *(int64_t *)(data + 16), *(int64_t *)(data + 24));
  } else if (functionId == TSDB_FUNC_MIN || functionId == TSDB_FUNC_MAX) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%d\t", *(int8_t *)data);
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%d\t", *(int16_t *)data);
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 "\t", *(int64_t *)data);
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%d\t", *(int *)data);
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%f\t", *(float *)data);
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%f\t", *(float *)data);
        break;
    }
  } else if (functionId == TSDB_FUNC_SUM) {
    if (srcDataType == TSDB_DATA_TYPE_FLOAT || srcDataType == TSDB_DATA_TYPE_DOUBLE) {
      printf("%lf\t", *(float *)data);
    } else {
      printf("%" PRId64 "\t", *(int64_t *)data);
    }
  } else {
    printf("%s\t", data);
  }
}

2775
void UNUSED_FUNC displayInterResult(tFilePage **pdata, SQueryRuntimeEnv* pRuntimeEnv, int32_t numOfRows) {
2776
  SQuery* pQuery = pRuntimeEnv->pQuery;
2777
  int32_t numOfCols = pQuery->numOfOutput;
2778
  printf("super table query intermediate result, total:%d\n", numOfRows);
2779

2780 2781
  for (int32_t j = 0; j < numOfRows; ++j) {
    for (int32_t i = 0; i < numOfCols; ++i) {
2782

2783
      switch (pQuery->pSelectExpr[i].type) {
2784
        case TSDB_DATA_TYPE_BINARY: {
2785
          int32_t type = pQuery->pSelectExpr[i].type;
2786
          printBinaryData(pQuery->pSelectExpr[i].base.functionId, pdata[i]->data + pQuery->pSelectExpr[i].bytes * j,
2787 2788 2789 2790 2791
                          type);
          break;
        }
        case TSDB_DATA_TYPE_TIMESTAMP:
        case TSDB_DATA_TYPE_BIGINT:
2792
          printf("%" PRId64 "\t", *(int64_t *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2793 2794
          break;
        case TSDB_DATA_TYPE_INT:
2795
          printf("%d\t", *(int32_t *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2796 2797
          break;
        case TSDB_DATA_TYPE_FLOAT:
2798
          printf("%f\t", *(float *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2799 2800
          break;
        case TSDB_DATA_TYPE_DOUBLE:
2801
          printf("%lf\t", *(double *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2802 2803 2804 2805 2806 2807 2808 2809
          break;
      }
    }
    printf("\n");
  }
}

typedef struct SCompSupporter {
H
hjxilinx 已提交
2810 2811 2812
  STableQueryInfo **pTableQueryInfo;
  int32_t *         position;
  SQInfo *          pQInfo;
2813 2814 2815 2816 2817
} SCompSupporter;

int32_t tableResultComparFn(const void *pLeft, const void *pRight, void *param) {
  int32_t left = *(int32_t *)pLeft;
  int32_t right = *(int32_t *)pRight;
2818

2819 2820
  SCompSupporter *  supporter = (SCompSupporter *)param;
  SQueryRuntimeEnv *pRuntimeEnv = &supporter->pQInfo->runtimeEnv;
2821

2822 2823
  int32_t leftPos = supporter->position[left];
  int32_t rightPos = supporter->position[right];
2824

2825 2826 2827 2828
  /* left source is exhausted */
  if (leftPos == -1) {
    return 1;
  }
2829

2830 2831 2832 2833
  /* right source is exhausted*/
  if (rightPos == -1) {
    return -1;
  }
2834

H
hjxilinx 已提交
2835
  SWindowResInfo *pWindowResInfo1 = &supporter->pTableQueryInfo[left]->windowResInfo;
H
Haojun Liao 已提交
2836
  SResultRow * pWindowRes1 = getWindowResult(pWindowResInfo1, leftPos);
2837
  tFilePage *page1 = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes1->pageId);
2838

H
Haojun Liao 已提交
2839
  char *b1 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes1, page1);
2840
  TSKEY leftTimestamp = GET_INT64_VAL(b1);
2841

H
hjxilinx 已提交
2842
  SWindowResInfo *pWindowResInfo2 = &supporter->pTableQueryInfo[right]->windowResInfo;
H
Haojun Liao 已提交
2843
  SResultRow * pWindowRes2 = getWindowResult(pWindowResInfo2, rightPos);
2844
  tFilePage *page2 = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes2->pageId);
2845

H
Haojun Liao 已提交
2846
  char *b2 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes2, page2);
2847
  TSKEY rightTimestamp = GET_INT64_VAL(b2);
2848

2849 2850 2851
  if (leftTimestamp == rightTimestamp) {
    return 0;
  }
2852

2853 2854 2855
  return leftTimestamp > rightTimestamp ? 1 : -1;
}

2856
int32_t mergeIntoGroupResult(SQInfo *pQInfo) {
H
Haojun Liao 已提交
2857
  int64_t st = taosGetTimestampUs();
2858
  int32_t ret = TSDB_CODE_SUCCESS;
2859

S
TD-1057  
Shengliang Guan 已提交
2860
  int32_t numOfGroups = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
2861

2862
  while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
2863
    SArray *group = GET_TABLEGROUP(pQInfo, pQInfo->groupIndex);
2864
    ret = mergeIntoGroupResultImpl(pQInfo, group);
2865 2866 2867 2868
    if (ret < 0) {  // not enough disk space to save the data into disk
      return -1;
    }

2869
    pQInfo->groupIndex += 1;
2870 2871

    // this group generates at least one result, return results
2872 2873 2874
    if (ret > 0) {
      break;
    }
2875

H
Haojun Liao 已提交
2876
    assert(pQInfo->groupResInfo.numOfDataPages == 0);
2877
    qDebug("QInfo:%p no result in group %d, continue", pQInfo, pQInfo->groupIndex - 1);
2878
  }
2879

H
Haojun Liao 已提交
2880
  SGroupResInfo* info = &pQInfo->groupResInfo;
2881
  if (pQInfo->groupIndex == numOfGroups && info->pageId == info->numOfDataPages) {
H
Haojun Liao 已提交
2882 2883 2884
    SET_STABLE_QUERY_OVER(pQInfo);
  }

H
Haojun Liao 已提交
2885 2886 2887
  int64_t elapsedTime = taosGetTimestampUs() - st;
  qDebug("QInfo:%p merge res data into group, index:%d, total group:%d, elapsed time:%" PRId64 "us", pQInfo,
         pQInfo->groupIndex - 1, numOfGroups, elapsedTime);
2888

H
Haojun Liao 已提交
2889
  pQInfo->runtimeEnv.summary.firstStageMergeTime += elapsedTime;
2890 2891 2892 2893
  return TSDB_CODE_SUCCESS;
}

void copyResToQueryResultBuf(SQInfo *pQInfo, SQuery *pQuery) {
H
Haojun Liao 已提交
2894 2895 2896
  SGroupResInfo* pGroupResInfo = &pQInfo->groupResInfo;

  // all results have been return to client, try next group
2897
  if (pGroupResInfo->pageId == pGroupResInfo->numOfDataPages) {
H
Haojun Liao 已提交
2898
    pGroupResInfo->numOfDataPages = 0;
2899 2900
    pGroupResInfo->pageId = 0;
    pGroupResInfo->rowId = 0;
2901

2902
    // current results of group has been sent to client, try next group
2903
    if (mergeIntoGroupResult(pQInfo) != TSDB_CODE_SUCCESS) {
2904 2905
      return;  // failed to save data in the disk
    }
2906

2907
    // check if all results has been sent to client
S
TD-1057  
Shengliang Guan 已提交
2908
    int32_t numOfGroup = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
H
Haojun Liao 已提交
2909
    if (pGroupResInfo->numOfDataPages == 0 && pQInfo->groupIndex == numOfGroup) {
H
Haojun Liao 已提交
2910
      SET_STABLE_QUERY_OVER(pQInfo);
2911 2912
      return;
    }
2913
  }
2914 2915

  SQueryRuntimeEnv *   pRuntimeEnv = &pQInfo->runtimeEnv;
2916
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
2917

H
Haojun Liao 已提交
2918 2919
  int32_t id = pQInfo->groupResInfo.groupId;
  SIDList list = getDataBufPagesIdList(pResultBuf, id);
2920

2921
  int32_t offset = 0;
H
Haojun Liao 已提交
2922 2923 2924 2925 2926 2927
  int32_t numOfCopiedRows = 0;

  size_t size = taosArrayGetSize(list);
  assert(size == pGroupResInfo->numOfDataPages);

  bool done = false;
2928
  for (int32_t j = pGroupResInfo->pageId; j < size; ++j) {
H
Haojun Liao 已提交
2929
    SPageInfo* pi = *(SPageInfo**) taosArrayGet(list, j);
H
Haojun Liao 已提交
2930 2931
    tFilePage* pData = getResBufPage(pResultBuf, pi->pageId);

2932 2933
    assert(pData->num > 0 && pData->num <= pRuntimeEnv->numOfRowsPerPage && pGroupResInfo->rowId < pData->num);
    int32_t numOfRes = (int32_t)(pData->num - pGroupResInfo->rowId);
H
Haojun Liao 已提交
2934 2935

    if (numOfRes > pQuery->rec.capacity - offset) {
2936
      numOfCopiedRows = (int32_t)(pQuery->rec.capacity - offset);
2937
      pGroupResInfo->rowId += numOfCopiedRows;
H
Haojun Liao 已提交
2938 2939
      done = true;
    } else {
2940
      numOfCopiedRows = (int32_t)pData->num;
H
Haojun Liao 已提交
2941

2942 2943
      pGroupResInfo->pageId += 1;
      pGroupResInfo->rowId = 0;
H
Haojun Liao 已提交
2944
    }
2945

2946
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2947
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
2948
      char *  pDest = pQuery->sdata[i]->data;
2949

H
Haojun Liao 已提交
2950 2951
      memcpy(pDest + offset * bytes, pData->data + pRuntimeEnv->offset[i] * pRuntimeEnv->numOfRowsPerPage,
          (size_t)bytes * numOfCopiedRows);
2952
    }
2953

H
Haojun Liao 已提交
2954 2955 2956 2957
    offset += numOfCopiedRows;
    if (done) {
      break;
    }
2958
  }
2959

2960
  assert(pQuery->rec.rows == 0);
H
Haojun Liao 已提交
2961
  pQuery->rec.rows += offset;
2962 2963
}

H
Haojun Liao 已提交
2964
int64_t getNumOfResultWindowRes(SQuery *pQuery, SResultRow *pResultRow) {
2965
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
2966
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
2967

2968 2969 2970 2971 2972 2973 2974
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
      continue;
    }
2975

H
Haojun Liao 已提交
2976
    SResultRowCellInfo *pResultInfo = &pResultRow->pCellInfo[j];
H
Haojun Liao 已提交
2977
    assert(pResultInfo != NULL);
2978

H
Haojun Liao 已提交
2979 2980
    if (pResultInfo->numOfRes > 0) {
      return pResultInfo->numOfRes;
2981 2982
    }
  }
2983

H
Haojun Liao 已提交
2984
  return 0;
2985 2986
}

2987
int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *pGroup) {
2988
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
2989
  SQuery *          pQuery = pRuntimeEnv->pQuery;
2990

2991
  size_t size = taosArrayGetSize(pGroup);
2992
  tFilePage **buffer = pQuery->sdata;
2993

H
Haojun Liao 已提交
2994
  int32_t *posList = calloc(size, sizeof(int32_t));
H
hjxilinx 已提交
2995
  STableQueryInfo **pTableList = malloc(POINTER_BYTES * size);
2996

2997
  if (pTableList == NULL || posList == NULL) {
S
Shengliang Guan 已提交
2998 2999
    taosTFree(posList);
    taosTFree(pTableList);
3000 3001

    qError("QInfo:%p failed alloc memory", pQInfo);
H
Haojun Liao 已提交
3002
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
3003 3004
  }

3005
  // todo opt for the case of one table per group
3006
  int32_t numOfTables = 0;
H
Haojun Liao 已提交
3007 3008 3009
  SIDList pageList = NULL;
  int32_t tid = -1;

3010
  for (int32_t i = 0; i < size; ++i) {
3011
    STableQueryInfo *item = taosArrayGetP(pGroup, i);
3012

H
Haojun Liao 已提交
3013
    SIDList list = getDataBufPagesIdList(pRuntimeEnv->pResultBuf, TSDB_TABLEID(item->pTable)->tid);
H
Haojun Liao 已提交
3014
    if (taosArrayGetSize(list) > 0 && item->windowResInfo.size > 0) {
H
Haojun Liao 已提交
3015
      pTableList[numOfTables++] = item;
3016 3017
      tid = TSDB_TABLEID(item->pTable)->tid;
      pageList = list;
3018 3019
    }
  }
3020

H
Haojun Liao 已提交
3021
  // there is no data in current group
3022
  if (numOfTables == 0) {
S
Shengliang Guan 已提交
3023 3024
    taosTFree(posList);
    taosTFree(pTableList);
3025
    return 0;
H
Haojun Liao 已提交
3026
  } else if (numOfTables == 1) { // no need to merge results since only one table in each group
H
Haojun Liao 已提交
3027 3028 3029 3030 3031
    taosTFree(posList);
    taosTFree(pTableList);

    SGroupResInfo* pGroupResInfo = &pQInfo->groupResInfo;

3032
    pGroupResInfo->numOfDataPages = (int32_t)taosArrayGetSize(pageList);
H
Haojun Liao 已提交
3033
    pGroupResInfo->groupId = tid;
3034 3035
    pGroupResInfo->pageId = 0;
    pGroupResInfo->rowId = 0;
H
Haojun Liao 已提交
3036 3037

    return pGroupResInfo->numOfDataPages;
3038
  }
3039

3040
  SCompSupporter cs = {pTableList, posList, pQInfo};
3041

3042
  SLoserTreeInfo *pTree = NULL;
3043
  tLoserTreeCreate(&pTree, numOfTables, &cs, tableResultComparFn);
3044

H
Haojun Liao 已提交
3045 3046
  SResultRow* pRow = calloc(1, getWindowResultSize(pRuntimeEnv));
  if (pRow == NULL) {
H
Haojun Liao 已提交
3047 3048 3049
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }

H
Haojun Liao 已提交
3050 3051 3052 3053 3054
  pRow->pCellInfo = (SResultRowCellInfo*) ((char*) pRow + sizeof(SResultRow));
//  char* buf = (char*) pRow + sizeof(SResultRowCellInfo)*;
//  if (buf == NULL) {
//    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
//  }
H
Haojun Liao 已提交
3055

H
Haojun Liao 已提交
3056 3057
  setResultRowCellInfo(pRuntimeEnv, pRow, NULL);
  resetMergeResultBuf(pRuntimeEnv, pRuntimeEnv->pCtx, pRow);
3058

H
Haojun Liao 已提交
3059 3060
  pQInfo->groupResInfo.groupId = getGroupResultId(pQInfo->groupIndex);

H
Haojun Liao 已提交
3061
  // todo add windowRes iterator
3062 3063
  int64_t lastTimestamp = -1;
  int64_t startt = taosGetTimestampMs();
3064

3065
  while (1) {
3066 3067
    if (IS_QUERY_KILLED(pQInfo)) {
      qDebug("QInfo:%p it is already killed, abort", pQInfo);
H
Haojun Liao 已提交
3068 3069 3070 3071

      taosTFree(pTableList);
      taosTFree(posList);
      taosTFree(pTree);
H
Haojun Liao 已提交
3072 3073
//      taosTFree(pResultInfo);
//      taosTFree(buf);
H
Haojun Liao 已提交
3074

3075 3076 3077
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
    }

3078
    int32_t pos = pTree->pNode[0].index;
3079

H
hjxilinx 已提交
3080
    SWindowResInfo *pWindowResInfo = &pTableList[pos]->windowResInfo;
H
Haojun Liao 已提交
3081
    SResultRow  *pWindowRes = getWindowResult(pWindowResInfo, cs.position[pos]);
3082
    tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes->pageId);
3083

H
Haojun Liao 已提交
3084
    char *b = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes, page);
3085
    TSKEY ts = GET_INT64_VAL(b);
3086

3087
    assert(ts == pWindowRes->win.skey);
H
Haojun Liao 已提交
3088
    int64_t num = getNumOfResultWindowRes(pQuery, pWindowRes);
3089 3090
    if (num <= 0) {
      cs.position[pos] += 1;
3091

3092 3093
      if (cs.position[pos] >= pWindowResInfo->size) {
        cs.position[pos] = -1;
3094

3095
        // all input sources are exhausted
3096
        if (--numOfTables == 0) {
3097 3098 3099 3100 3101 3102 3103
          break;
        }
      }
    } else {
      if (ts == lastTimestamp) {  // merge with the last one
        doMerge(pRuntimeEnv, ts, pWindowRes, true);
      } else {  // copy data to disk buffer
3104
        if (buffer[0]->num == pQuery->rec.capacity) {
H
Haojun Liao 已提交
3105
          if (flushFromResultBuf(pRuntimeEnv, &pQInfo->groupResInfo) != TSDB_CODE_SUCCESS) {
3106 3107
            return -1;
          }
3108

H
Haojun Liao 已提交
3109
          resetMergeResultBuf(pRuntimeEnv, pRuntimeEnv->pCtx, pRow);
3110
        }
3111

3112
        doMerge(pRuntimeEnv, ts, pWindowRes, false);
3113
        buffer[0]->num += 1;
3114
      }
3115

3116
      lastTimestamp = ts;
3117

H
Haojun Liao 已提交
3118
      // move to the next element of current entry
3119
      int32_t currentPageId = pWindowRes->pageId;
H
Haojun Liao 已提交
3120

3121 3122 3123
      cs.position[pos] += 1;
      if (cs.position[pos] >= pWindowResInfo->size) {
        cs.position[pos] = -1;
3124

3125
        // all input sources are exhausted
3126
        if (--numOfTables == 0) {
3127 3128
          break;
        }
H
Haojun Liao 已提交
3129 3130
      } else {
        // current page is not needed anymore
H
Haojun Liao 已提交
3131
        SResultRow  *pNextWindowRes = getWindowResult(pWindowResInfo, cs.position[pos]);
3132
        if (pNextWindowRes->pageId != currentPageId) {
H
Haojun Liao 已提交
3133 3134
          releaseResBufPage(pRuntimeEnv->pResultBuf, page);
        }
3135 3136
      }
    }
3137

3138 3139
    tLoserTreeAdjust(pTree, pos + pTree->numOfEntries);
  }
3140

3141
  if (buffer[0]->num != 0) {  // there are data in buffer
H
Haojun Liao 已提交
3142
    if (flushFromResultBuf(pRuntimeEnv, &pQInfo->groupResInfo) != TSDB_CODE_SUCCESS) {
S
slguan 已提交
3143
      qError("QInfo:%p failed to flush data into temp file, abort query", pQInfo);
3144

S
Shengliang Guan 已提交
3145 3146 3147
      taosTFree(pTree);
      taosTFree(pTableList);
      taosTFree(posList);
H
Haojun Liao 已提交
3148
//      taosTFree(pResultInfo);
3149

3150 3151 3152
      return -1;
    }
  }
3153

3154 3155 3156
  int64_t endt = taosGetTimestampMs();

#ifdef _DEBUG_VIEW
3157
  displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
3158
#endif
3159

3160
  qDebug("QInfo:%p result merge completed for group:%d, elapsed time:%" PRId64 " ms", pQInfo, pQInfo->groupIndex, endt - startt);
3161

S
Shengliang Guan 已提交
3162 3163 3164
  taosTFree(pTableList);
  taosTFree(posList);
  taosTFree(pTree);
3165

H
Haojun Liao 已提交
3166 3167
//  taosTFree(pResultInfo);
//  taosTFree(buf);
H
Haojun Liao 已提交
3168 3169

  return pQInfo->groupResInfo.numOfDataPages;
3170 3171
}

H
Haojun Liao 已提交
3172 3173
int32_t flushFromResultBuf(SQueryRuntimeEnv* pRuntimeEnv, SGroupResInfo* pGroupResInfo) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3174

3175
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
3176

3177 3178
  // the base value for group result, since the maximum number of table for each vnode will not exceed 100,000.
  int32_t pageId = -1;
3179
  int32_t capacity = pResultBuf->numOfRowsPerPage;
3180

H
Haojun Liao 已提交
3181
  int32_t remain = (int32_t) pQuery->sdata[0]->num;
3182
  int32_t offset = 0;
3183

3184
  while (remain > 0) {
H
Haojun Liao 已提交
3185 3186
    int32_t rows = (remain > capacity)? capacity:remain;
    assert(rows > 0);
3187

H
Haojun Liao 已提交
3188 3189 3190
    // get the output buffer page
    tFilePage *buf = getNewDataBuf(pResultBuf, pGroupResInfo->groupId, &pageId);
    buf->num = rows;
3191

H
Haojun Liao 已提交
3192
    // pagewisely copy to dest buffer
3193
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3194
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
3195

H
Haojun Liao 已提交
3196 3197
      char* output = buf->data + pRuntimeEnv->offset[i] * pRuntimeEnv->numOfRowsPerPage;
      char* src = ((char *) pQuery->sdata[i]->data) + offset * bytes;
3198
      memcpy(output, src, (size_t)(buf->num * bytes));
3199
    }
3200

H
Haojun Liao 已提交
3201 3202 3203 3204
    offset += rows;
    remain -= rows;

    pGroupResInfo->numOfDataPages += 1;
3205
  }
3206

3207 3208 3209
  return TSDB_CODE_SUCCESS;
}

H
Haojun Liao 已提交
3210 3211 3212
void resetMergeResultBuf(SQueryRuntimeEnv* pRuntimeEnv, SQLFunctionCtx *pCtx, SResultRow *pRow) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

3213
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
3214
    pCtx[k].aOutputBuf = pQuery->sdata[k]->data - pCtx[k].outputBytes;
3215 3216
    pCtx[k].size = 1;
    pCtx[k].startOffset = 0;
H
Haojun Liao 已提交
3217
    pCtx[k].resultInfo = getResultCell(pRuntimeEnv, pRow, k);
3218

3219
    pQuery->sdata[k]->num = 0;
3220 3221 3222
  }
}

3223 3224 3225 3226
static void updateTableQueryInfoForReverseScan(SQuery *pQuery, STableQueryInfo *pTableQueryInfo) {
  if (pTableQueryInfo == NULL) {
    return;
  }
3227

H
Haojun Liao 已提交
3228
  // order has changed already
3229
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
3230

H
Haojun Liao 已提交
3231 3232 3233 3234 3235 3236
  // TODO validate the assertion
//  if (!QUERY_IS_ASC_QUERY(pQuery)) {
//    assert(pTableQueryInfo->win.ekey >= pTableQueryInfo->lastKey + step);
//  } else {
//    assert(pTableQueryInfo->win.ekey <= pTableQueryInfo->lastKey + step);
//  }
H
Haojun Liao 已提交
3237 3238 3239 3240 3241 3242 3243

  if (pTableQueryInfo->lastKey == pTableQueryInfo->win.skey) {
    // do nothing, no results
  } else {
    pTableQueryInfo->win.ekey = pTableQueryInfo->lastKey + step;
  }

3244 3245
  SWAP(pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, TSKEY);
  pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
3246

3247 3248
  SWITCH_ORDER(pTableQueryInfo->cur.order);
  pTableQueryInfo->cur.vgroupIndex = -1;
H
Haojun Liao 已提交
3249 3250 3251

  // set the index at the end of time window
  pTableQueryInfo->windowResInfo.curIndex = pTableQueryInfo->windowResInfo.size - 1;
3252 3253 3254 3255
}

static void disableFuncInReverseScanImpl(SQInfo* pQInfo, SWindowResInfo *pWindowResInfo, int32_t order) {
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
3256

3257
  for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
3258 3259
    bool closed = getTimeWindowResStatus(pWindowResInfo, i);
    if (!closed) {
3260 3261
      continue;
    }
3262

H
Haojun Liao 已提交
3263
    SResultRow *buf = getWindowResult(pWindowResInfo, i);
3264

3265
    // open/close the specified query for each group result
3266
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3267
      int32_t functId = pQuery->pSelectExpr[j].base.functionId;
3268

3269 3270
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
H
Haojun Liao 已提交
3271
        buf->pCellInfo[j].complete = false;
3272
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
H
Haojun Liao 已提交
3273
        buf->pCellInfo[j].complete = true;
3274 3275 3276 3277 3278
      }
    }
  }
}

3279 3280
void disableFuncInReverseScan(SQInfo *pQInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
3281
  SQuery *pQuery = pRuntimeEnv->pQuery;
3282
  int32_t order = pQuery->order.order;
3283

3284 3285
  // group by normal columns and interval query on normal table
  SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
3286
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3287
    disableFuncInReverseScanImpl(pQInfo, pWindowResInfo, order);
3288
  } else {  // for simple result of table query,
3289
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {  // todo refactor
3290
      int32_t functId = pQuery->pSelectExpr[j].base.functionId;
3291

3292
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[j];
3293 3294 3295
      if (pCtx->resultInfo == NULL) {
        continue; // resultInfo is NULL, means no data checked in previous scan
      }
3296

3297 3298
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
3299 3300 3301 3302 3303 3304
        pCtx->resultInfo->complete = false;
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
        pCtx->resultInfo->complete = true;
      }
    }
  }
H
Haojun Liao 已提交
3305 3306 3307 3308
}

static void setupQueryRangeForReverseScan(SQInfo* pQInfo) {
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
S
Shengliang Guan 已提交
3309
  int32_t numOfGroups = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
H
Haojun Liao 已提交
3310

H
hjxilinx 已提交
3311
  for(int32_t i = 0; i < numOfGroups; ++i) {
H
Haojun Liao 已提交
3312
    SArray *group = GET_TABLEGROUP(pQInfo, i);
H
Haojun Liao 已提交
3313 3314
    SArray *tableKeyGroup = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, i);

H
hjxilinx 已提交
3315 3316
    size_t t = taosArrayGetSize(group);
    for (int32_t j = 0; j < t; ++j) {
3317 3318
      STableQueryInfo *pCheckInfo = taosArrayGetP(group, j);
      updateTableQueryInfoForReverseScan(pQuery, pCheckInfo);
H
Haojun Liao 已提交
3319

H
Haojun Liao 已提交
3320 3321
      // update the last key in tableKeyInfo list, the tableKeyInfo is used to build the tsdbQueryHandle and decide
      // the start check timestamp of tsdbQueryHandle
H
Haojun Liao 已提交
3322 3323 3324 3325
      STableKeyInfo *pTableKeyInfo = taosArrayGet(tableKeyGroup, j);
      pTableKeyInfo->lastKey = pCheckInfo->lastKey;

      assert(pCheckInfo->pTable == pTableKeyInfo->pTable);
H
hjxilinx 已提交
3326 3327
    }
  }
3328 3329
}

3330
void switchCtxOrder(SQueryRuntimeEnv *pRuntimeEnv) {
3331
  SQuery *pQuery = pRuntimeEnv->pQuery;
3332
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
3333
    SWITCH_ORDER(pRuntimeEnv->pCtx[i].order);
3334 3335 3336
  }
}

H
Haojun Liao 已提交
3337 3338
int32_t createQueryResultInfo(SQuery *pQuery, SResultRow *pResultRow) {
//  int32_t numOfCols = pQuery->numOfOutput;
3339

H
Haojun Liao 已提交
3340
  pResultRow->pCellInfo = (SResultRowCellInfo*)((char*)pResultRow + sizeof(SResultRow));
3341 3342
  pResultRow->pageId = -1;
  pResultRow->rowId = -1;
3343

H
Haojun Liao 已提交
3344
//  char* buf = (char*) pResultRow->pCellInfo + numOfCols * sizeof(SResultRowCellInfo);
H
Haojun Liao 已提交
3345

3346
  // set the intermediate result output buffer
H
Haojun Liao 已提交
3347
//  setResultRowCellInfo(pRunimeEnv, pResultRow, buf);
B
Bomin Zhang 已提交
3348
  return TSDB_CODE_SUCCESS;
3349 3350 3351 3352
}

void resetCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3353

3354
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3355 3356
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
    pCtx->aOutputBuf = pQuery->sdata[i]->data;
3357

3358 3359 3360 3361
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT/TAGPRJ/PRJ/TAG etc.
     */
3362
    RESET_RESULT_INFO(&pRuntimeEnv->resultInfo[i]);
3363
    pCtx->resultInfo = &pRuntimeEnv->resultInfo[i];
3364

3365
    // set the timestamp output buffer for top/bottom/diff query
3366
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3367 3368 3369
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3370

3371
    memset(pQuery->sdata[i]->data, 0, (size_t)(pQuery->pSelectExpr[i].bytes * pQuery->rec.capacity));
3372
  }
3373

3374 3375 3376 3377 3378
  initCtxOutputBuf(pRuntimeEnv);
}

void forwardCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, int64_t output) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3379

3380
  // reset the execution contexts
3381
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3382
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
3383
    assert(functionId != TSDB_FUNC_DIFF);
3384

3385 3386 3387 3388
    // set next output position
    if (IS_OUTER_FORWARD(aAggs[functionId].nStatus)) {
      pRuntimeEnv->pCtx[j].aOutputBuf += pRuntimeEnv->pCtx[j].outputBytes * output;
    }
3389

3390 3391 3392 3393 3394 3395 3396 3397
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      /*
       * NOTE: for top/bottom query, the value of first column of output (timestamp) are assigned
       * in the procedure of top/bottom routine
       * the output buffer in top/bottom routine is ptsOutputBuf, so we need to forward the output buffer
       *
       * diff function is handled in multi-output function
       */
S
TD-1057  
Shengliang Guan 已提交
3398
      pRuntimeEnv->pCtx[j].ptsOutputBuf = (char*)pRuntimeEnv->pCtx[j].ptsOutputBuf + TSDB_KEYSIZE * output;
3399
    }
3400

3401
    RESET_RESULT_INFO(pRuntimeEnv->pCtx[j].resultInfo);
3402 3403 3404 3405 3406
  }
}

void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3407

3408
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3409
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
3410
    pRuntimeEnv->pCtx[j].currentStage = 0;
3411

H
Haojun Liao 已提交
3412
    SResultRowCellInfo* pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
H
Haojun Liao 已提交
3413 3414 3415
    if (pResInfo->initialized) {
      continue;
    }
3416

3417 3418 3419 3420
    aAggs[functionId].init(&pRuntimeEnv->pCtx[j]);
  }
}

3421
void skipResults(SQueryRuntimeEnv *pRuntimeEnv) {
3422
  SQuery *pQuery = pRuntimeEnv->pQuery;
3423
  if (pQuery->rec.rows == 0 || pQuery->limit.offset == 0) {
3424 3425
    return;
  }
3426

3427
  if (pQuery->rec.rows <= pQuery->limit.offset) {
3428
    qDebug("QInfo:%p skip rows:%" PRId64 ", new offset:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), pQuery->rec.rows,
3429
        pQuery->limit.offset - pQuery->rec.rows);
3430

3431 3432
    pQuery->limit.offset -= pQuery->rec.rows;
    pQuery->rec.rows = 0;
3433

3434
    resetCtxOutputBuf(pRuntimeEnv);
3435

H
Haojun Liao 已提交
3436
    // clear the buffer full flag if exists
3437
    CLEAR_QUERY_STATUS(pQuery, QUERY_RESBUF_FULL);
3438
  } else {
3439
    int64_t numOfSkip = pQuery->limit.offset;
3440
    pQuery->rec.rows -= numOfSkip;
3441
    pQuery->limit.offset = 0;
3442

3443
    qDebug("QInfo:%p skip row:%"PRId64", new offset:%d, numOfRows remain:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), numOfSkip,
3444
           0, pQuery->rec.rows);
3445

3446
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3447
      int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3448
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
H
Haojun Liao 已提交
3449

3450
      memmove(pQuery->sdata[i]->data, (char*)pQuery->sdata[i]->data + bytes * numOfSkip, (size_t)(pQuery->rec.rows * bytes));
H
Haojun Liao 已提交
3451
      pRuntimeEnv->pCtx[i].aOutputBuf = ((char*) pQuery->sdata[i]->data) + pQuery->rec.rows * bytes;
3452

3453
      if (functionId == TSDB_FUNC_DIFF || functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
3454
        pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
3455 3456
      }
    }
3457

S
TD-1057  
Shengliang Guan 已提交
3458
    updateNumOfResult(pRuntimeEnv, (int32_t)pQuery->rec.rows);
3459 3460 3461 3462 3463 3464 3465 3466
  }
}

void setQueryStatus(SQuery *pQuery, int8_t status) {
  if (status == QUERY_NOT_COMPLETED) {
    pQuery->status = status;
  } else {
    // QUERY_NOT_COMPLETED is not compatible with any other status, so clear its position first
3467
    CLEAR_QUERY_STATUS(pQuery, QUERY_NOT_COMPLETED);
3468 3469 3470 3471 3472 3473
    pQuery->status |= status;
  }
}

bool needScanDataBlocksAgain(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3474

H
hjxilinx 已提交
3475
  bool toContinue = false;
H
Haojun Liao 已提交
3476
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3477 3478
    // for each group result, call the finalize function for each column
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
3479

3480
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
H
Haojun Liao 已提交
3481
      SResultRow *pResult = getWindowResult(pWindowResInfo, i);
3482
      if (!pResult->closed) {
3483 3484
        continue;
      }
3485

3486
      setWindowResOutputBuf(pRuntimeEnv, pResult);
3487

3488
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3489
        int16_t functId = pQuery->pSelectExpr[j].base.functionId;
3490 3491 3492
        if (functId == TSDB_FUNC_TS) {
          continue;
        }
3493

3494
        aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
H
Haojun Liao 已提交
3495
        SResultRowCellInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3496

3497 3498 3499 3500
        toContinue |= (!pResInfo->complete);
      }
    }
  } else {
3501
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3502
      int16_t functId = pQuery->pSelectExpr[j].base.functionId;
3503 3504 3505
      if (functId == TSDB_FUNC_TS) {
        continue;
      }
3506

3507
      aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
H
Haojun Liao 已提交
3508
      SResultRowCellInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3509

3510 3511 3512
      toContinue |= (!pResInfo->complete);
    }
  }
3513

3514 3515 3516
  return toContinue;
}

H
Haojun Liao 已提交
3517
static SQueryStatusInfo getQueryStatusInfo(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
3518
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3519
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3520

H
Haojun Liao 已提交
3521 3522
  assert((start <= pTableQueryInfo->lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (start >= pTableQueryInfo->lastKey && !QUERY_IS_ASC_QUERY(pQuery)));
3523

3524
  SQueryStatusInfo info = {
H
hjxilinx 已提交
3525
      .status      = pQuery->status,
3526
      .windowIndex = pRuntimeEnv->windowResInfo.curIndex,
H
Haojun Liao 已提交
3527
      .lastKey     = start,
3528
  };
3529

S
TD-1057  
Shengliang Guan 已提交
3530 3531 3532 3533 3534
  TIME_WINDOW_COPY(info.w, pQuery->window);
  TIME_WINDOW_COPY(info.curWindow, pTableQueryInfo->win);

  info.curWindow.skey = start;

3535 3536 3537
  return info;
}

3538 3539 3540 3541
static void setEnvBeforeReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQInfo *pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery *pQuery = pRuntimeEnv->pQuery;

3542 3543 3544
  pStatus->cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);  // save the cursor
  if (pRuntimeEnv->pTSBuf) {
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
3545 3546
    bool ret = tsBufNextPos(pRuntimeEnv->pTSBuf);
    assert(ret);
3547
  }
3548

3549
  // reverse order time range
3550 3551 3552
  pQuery->window = pStatus->curWindow;
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);

3553
  SWITCH_ORDER(pQuery->order.order);
3554 3555 3556 3557 3558 3559 3560

  if (QUERY_IS_ASC_QUERY(pQuery)) {
    assert(pQuery->window.skey <= pQuery->window.ekey);
  } else {
    assert(pQuery->window.skey >= pQuery->window.ekey);
  }

3561
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
3562

3563
  STsdbQueryCond cond = {
H
hjxilinx 已提交
3564
      .order   = pQuery->order.order,
3565
      .colList = pQuery->colList,
3566 3567
      .numOfCols = pQuery->numOfCols,
  };
3568

S
TD-1057  
Shengliang Guan 已提交
3569 3570
  TIME_WINDOW_COPY(cond.twindow, pQuery->window);

H
Haojun Liao 已提交
3571 3572 3573 3574 3575
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
  disableFuncInReverseScan(pQInfo);
  setupQueryRangeForReverseScan(pQInfo);

3576 3577 3578 3579
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
3580

3581
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
B
Bomin Zhang 已提交
3582 3583 3584
  if (pRuntimeEnv->pSecQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
3585 3586
}

3587 3588
static void clearEnvAfterReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3589
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3590

3591 3592
  SWITCH_ORDER(pQuery->order.order);
  switchCtxOrder(pRuntimeEnv);
3593

3594 3595 3596 3597
  tsBufSetCursor(pRuntimeEnv->pTSBuf, &pStatus->cur);
  if (pRuntimeEnv->pTSBuf) {
    pRuntimeEnv->pTSBuf->cur.order = pQuery->order.order;
  }
3598

3599
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
3600

3601
  // update the pQuery->window.skey and pQuery->window.ekey to limit the scan scope of sliding query during reverse scan
H
hjxilinx 已提交
3602
  pTableQueryInfo->lastKey = pStatus->lastKey;
3603
  pQuery->status = pStatus->status;
3604

H
hjxilinx 已提交
3605
  pTableQueryInfo->win = pStatus->w;
3606
  pQuery->window = pTableQueryInfo->win;
3607 3608
}

H
Haojun Liao 已提交
3609 3610 3611 3612 3613 3614 3615
static void restoreTimeWindow(STableGroupInfo* pTableGroupInfo, STsdbQueryCond* pCond) {
  assert(pTableGroupInfo->numOfTables == 1);
  SArray* pTableKeyGroup = taosArrayGetP(pTableGroupInfo->pGroupList, 0);
  STableKeyInfo* pKeyInfo = taosArrayGet(pTableKeyGroup, 0);
  pKeyInfo->lastKey = pCond->twindow.skey;
}

3616
void scanOneTableDataBlocks(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
H
hjxilinx 已提交
3617
  SQInfo *pQInfo = (SQInfo *) GET_QINFO_ADDR(pRuntimeEnv);
3618
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3619
  STableQueryInfo *pTableQueryInfo = pQuery->current;
3620

3621
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
3622

3623
  // store the start query position
H
Haojun Liao 已提交
3624
  SQueryStatusInfo qstatus = getQueryStatusInfo(pRuntimeEnv, start);
3625

3626 3627
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
3628

3629 3630
  while (1) {
    doScanAllDataBlocks(pRuntimeEnv);
3631

3632 3633
    if (pRuntimeEnv->scanFlag == MASTER_SCAN) {
      qstatus.status = pQuery->status;
3634 3635 3636 3637 3638 3639

      // do nothing if no data blocks are found qualified during scan
      if (qstatus.lastKey != pTableQueryInfo->lastKey) {
        qstatus.curWindow.ekey = pTableQueryInfo->lastKey - step;
      }

3640
      qstatus.lastKey = pTableQueryInfo->lastKey;
3641
    }
3642

3643
    if (!needScanDataBlocksAgain(pRuntimeEnv)) {
3644
      // restore the status code and jump out of loop
3645
      if (pRuntimeEnv->scanFlag == REPEAT_SCAN) {
3646
        pQuery->status = qstatus.status;
3647
      }
3648

3649 3650
      break;
    }
3651

3652
    STsdbQueryCond cond = {
H
hjxilinx 已提交
3653
        .order   = pQuery->order.order,
3654
        .colList = pQuery->colList,
3655
        .numOfCols = pQuery->numOfCols,
3656
    };
3657

S
TD-1057  
Shengliang Guan 已提交
3658 3659
    TIME_WINDOW_COPY(cond.twindow, qstatus.curWindow);

3660 3661
    if (pRuntimeEnv->pSecQueryHandle != NULL) {
      tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
3662
    }
3663

H
Haojun Liao 已提交
3664
    restoreTimeWindow(&pQInfo->tableGroupInfo, &cond);
3665
    pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
B
Bomin Zhang 已提交
3666 3667 3668
    if (pRuntimeEnv->pSecQueryHandle == NULL) {
      longjmp(pRuntimeEnv->env, terrno);
    }
3669

3670
    pRuntimeEnv->windowResInfo.curIndex = qstatus.windowIndex;
3671 3672
    setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
    pRuntimeEnv->scanFlag = REPEAT_SCAN;
3673

3674
    qDebug("QInfo:%p start to repeat scan data blocks due to query func required, qrange:%"PRId64"-%"PRId64, pQInfo,
3675
        cond.twindow.skey, cond.twindow.ekey);
3676

3677
    // check if query is killed or not
H
Haojun Liao 已提交
3678
    if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
3679
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
3680 3681
    }
  }
3682

H
hjxilinx 已提交
3683
  if (!needReverseScan(pQuery)) {
3684 3685
    return;
  }
3686

3687
  setEnvBeforeReverseScan(pRuntimeEnv, &qstatus);
3688

3689
  // reverse scan from current position
3690
  qDebug("QInfo:%p start to reverse scan", pQInfo);
3691
  doScanAllDataBlocks(pRuntimeEnv);
3692 3693

  clearEnvAfterReverseScan(pRuntimeEnv, &qstatus);
3694 3695
}

H
hjxilinx 已提交
3696
void finalizeQueryResult(SQueryRuntimeEnv *pRuntimeEnv) {
3697
  SQuery *pQuery = pRuntimeEnv->pQuery;
3698

H
Haojun Liao 已提交
3699
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3700 3701
    // for each group result, call the finalize function for each column
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
3702
    if (pRuntimeEnv->groupbyNormalCol) {
3703 3704
      closeAllTimeWindow(pWindowResInfo);
    }
3705

3706
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
H
Haojun Liao 已提交
3707
      SResultRow *buf = pWindowResInfo->pResult[i];
3708 3709 3710
      if (!isWindowResClosed(pWindowResInfo, i)) {
        continue;
      }
3711

3712
      setWindowResOutputBuf(pRuntimeEnv, buf);
3713

3714
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3715
        aAggs[pQuery->pSelectExpr[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3716
      }
3717

3718 3719 3720 3721
      /*
       * set the number of output results for group by normal columns, the number of output rows usually is 1 except
       * the top and bottom query
       */
S
TD-1057  
Shengliang Guan 已提交
3722
      buf->numOfRows = (uint16_t)getNumOfResult(pRuntimeEnv);
3723
    }
3724

3725
  } else {
3726
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3727
      aAggs[pQuery->pSelectExpr[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3728 3729 3730 3731 3732
    }
  }
}

static bool hasMainOutput(SQuery *pQuery) {
3733
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3734
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3735

3736 3737 3738 3739
    if (functionId != TSDB_FUNC_TS && functionId != TSDB_FUNC_TAG && functionId != TSDB_FUNC_TAGPRJ) {
      return true;
    }
  }
3740

3741 3742 3743
  return false;
}

H
Haojun Liao 已提交
3744
static STableQueryInfo *createTableQueryInfo(SQueryRuntimeEnv *pRuntimeEnv, void* pTable, STimeWindow win, void* buf) {
H
Haojun Liao 已提交
3745
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
3746

H
Haojun Liao 已提交
3747
  STableQueryInfo *pTableQueryInfo = buf;
3748

H
hjxilinx 已提交
3749 3750
  pTableQueryInfo->win = win;
  pTableQueryInfo->lastKey = win.skey;
3751

3752
  pTableQueryInfo->pTable = pTable;
3753
  pTableQueryInfo->cur.vgroupIndex = -1;
3754

H
Haojun Liao 已提交
3755 3756
  // set more initial size of interval/groupby query
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyNormalCol) {
H
Haojun Liao 已提交
3757
    int32_t initialSize = 16;
H
Haojun Liao 已提交
3758
    int32_t initialThreshold = 100;
3759
    int32_t code = initWindowResInfo(&pTableQueryInfo->windowResInfo, initialSize, initialThreshold, TSDB_DATA_TYPE_INT);
B
Bomin Zhang 已提交
3760 3761 3762
    if (code != TSDB_CODE_SUCCESS) {
      return NULL;
    }
H
Haojun Liao 已提交
3763
  } else { // in other aggregate query, do not initialize the windowResInfo
H
Haojun Liao 已提交
3764 3765
  }

3766 3767 3768
  return pTableQueryInfo;
}

H
Haojun Liao 已提交
3769
void destroyTableQueryInfoImpl(STableQueryInfo *pTableQueryInfo) {
3770 3771 3772
  if (pTableQueryInfo == NULL) {
    return;
  }
3773

H
Haojun Liao 已提交
3774
  tVariantDestroy(&pTableQueryInfo->tag);
H
Haojun Liao 已提交
3775
  cleanupTimeWindowInfo(&pTableQueryInfo->windowResInfo);
3776 3777 3778 3779 3780
}

/**
 * set output buffer for different group
 * @param pRuntimeEnv
3781
 * @param pDataBlockInfo
3782
 */
H
Haojun Liao 已提交
3783
void setExecutionContext(SQInfo *pQInfo, int32_t groupIndex, TSKEY nextKey) {
3784
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
Haojun Liao 已提交
3785 3786 3787
  STableQueryInfo  *pTableQueryInfo = pRuntimeEnv->pQuery->current;
  SWindowResInfo   *pWindowResInfo = &pRuntimeEnv->windowResInfo;

H
Haojun Liao 已提交
3788 3789
  // lastKey needs to be updated
  pTableQueryInfo->lastKey = nextKey;
H
Haojun Liao 已提交
3790 3791 3792 3793

  if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
    setAdditionalInfo(pQInfo, pTableQueryInfo->pTable, pTableQueryInfo);
  }
H
Haojun Liao 已提交
3794

H
Haojun Liao 已提交
3795 3796 3797
  if (pRuntimeEnv->prevGroupId != INT32_MIN && pRuntimeEnv->prevGroupId == groupIndex) {
    return;
  }
3798

H
Haojun Liao 已提交
3799
  uint64_t uid = 0; // uid is always set to be 0
H
Haojun Liao 已提交
3800
  SResultRow *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&groupIndex,
H
Haojun Liao 已提交
3801
      sizeof(groupIndex), true, uid);
3802 3803 3804
  if (pWindowRes == NULL) {
    return;
  }
3805

3806 3807 3808 3809
  /*
   * not assign result buffer yet, add new result buffer
   * all group belong to one result set, and each group result has different group id so set the id to be one
   */
3810
  if (pWindowRes->pageId == -1) {
3811
    if (addNewWindowResultBuf(pWindowRes, pRuntimeEnv->pResultBuf, groupIndex, pRuntimeEnv->numOfRowsPerPage) !=
3812 3813 3814 3815
        TSDB_CODE_SUCCESS) {
      return;
    }
  }
3816

H
Haojun Liao 已提交
3817 3818
  // record the current active group id
  pRuntimeEnv->prevGroupId = groupIndex;
3819 3820 3821 3822
  setWindowResOutputBuf(pRuntimeEnv, pWindowRes);
  initCtxOutputBuf(pRuntimeEnv);
}

H
Haojun Liao 已提交
3823
void setWindowResOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SResultRow *pResult) {
3824
  SQuery *pQuery = pRuntimeEnv->pQuery;
3825

3826
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
3827
  tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pageId);
H
Haojun Liao 已提交
3828

3829
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3830
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
H
Haojun Liao 已提交
3831
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult, page);
3832

3833
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3834 3835 3836
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3837

3838 3839 3840 3841
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT
     */
H
Haojun Liao 已提交
3842
    pCtx->resultInfo = &pResult->pCellInfo[i];
3843 3844 3845
  }
}

H
Haojun Liao 已提交
3846
void setWindowResOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SResultRow *pResult) {
H
Haojun Liao 已提交
3847
  SQuery *pQuery = pRuntimeEnv->pQuery;
3848

H
Haojun Liao 已提交
3849
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
3850
  tFilePage* bufPage = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pageId);
H
Haojun Liao 已提交
3851

H
Haojun Liao 已提交
3852 3853 3854
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];

H
Haojun Liao 已提交
3855
    pCtx->resultInfo = &pResult->pCellInfo[i];
H
Haojun Liao 已提交
3856
    if (pCtx->resultInfo->initialized && pCtx->resultInfo->complete) {
H
Haojun Liao 已提交
3857 3858
      continue;
    }
3859

H
Haojun Liao 已提交
3860
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult, bufPage);
H
Haojun Liao 已提交
3861
    pCtx->currentStage = 0;
3862

H
Haojun Liao 已提交
3863 3864 3865 3866
    int32_t functionId = pCtx->functionId;
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3867

H
Haojun Liao 已提交
3868 3869 3870 3871 3872 3873
    if (!pCtx->resultInfo->initialized) {
      aAggs[functionId].init(pCtx);
    }
  }
}

3874
int32_t setAdditionalInfo(SQInfo *pQInfo, void* pTable, STableQueryInfo *pTableQueryInfo) {
3875
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
3876

3877
  setTagVal(pRuntimeEnv, pTable, pQInfo->tsdb);
3878

3879 3880
  // both the master and supplement scan needs to set the correct ts comp start position
  if (pRuntimeEnv->pTSBuf != NULL) {
H
Haojun Liao 已提交
3881 3882
    tVariant* pTag = &pRuntimeEnv->pCtx[0].tag;

H
Haojun Liao 已提交
3883 3884
    if (pTableQueryInfo->cur.vgroupIndex == -1) {
      tVariantAssign(&pTableQueryInfo->tag, pTag);
3885

H
Haojun Liao 已提交
3886 3887
      STSElem elem = tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, pQInfo->vgId, &pTableQueryInfo->tag);

H
Haojun Liao 已提交
3888 3889 3890 3891 3892
      // failed to find data with the specified tag value and vnodeId
      if (elem.vnode < 0) {
        if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
          qError("QInfo:%p failed to find tag:%s in ts_comp", pQInfo, pTag->pz);
        } else {
H
Haojun Liao 已提交
3893
          qError("QInfo:%p failed to find tag:%" PRId64 " in ts_comp", pQInfo, pTag->i64Key);
H
Haojun Liao 已提交
3894 3895 3896 3897 3898
        }

        return false;
      }

H
Haojun Liao 已提交
3899
      // keep the cursor info of current meter
H
Haojun Liao 已提交
3900 3901
      pTableQueryInfo->cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);
      if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
H
Haojun Liao 已提交
3902
        qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex);
H
Haojun Liao 已提交
3903
      } else {
H
Haojun Liao 已提交
3904
        qDebug("QInfo:%p find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex);
H
Haojun Liao 已提交
3905 3906
      }

H
Haojun Liao 已提交
3907 3908
    } else {
      tsBufSetCursor(pRuntimeEnv->pTSBuf, &pTableQueryInfo->cur);
H
Haojun Liao 已提交
3909 3910

      if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
H
Haojun Liao 已提交
3911
        qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex);
H
Haojun Liao 已提交
3912
      } else {
H
Haojun Liao 已提交
3913
        qDebug("QInfo:%p find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex);
H
Haojun Liao 已提交
3914
      }
3915 3916
    }
  }
3917

3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929
  return 0;
}

/*
 * There are two cases to handle:
 *
 * 1. Query range is not set yet (queryRangeSet = 0). we need to set the query range info, including pQuery->lastKey,
 *    pQuery->window.skey, and pQuery->eKey.
 * 2. Query range is set and query is in progress. There may be another result with the same query ranges to be
 *    merged during merge stage. In this case, we need the pTableQueryInfo->lastResRows to decide if there
 *    is a previous result generated or not.
 */
H
hjxilinx 已提交
3930
void setIntervalQueryRange(SQInfo *pQInfo, TSKEY key) {
3931 3932
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3933
  STableQueryInfo *pTableQueryInfo = pQuery->current;
3934

3935 3936 3937
  if (pTableQueryInfo->queryRangeSet) {
    pTableQueryInfo->lastKey = key;
  } else {
3938
    pTableQueryInfo->win.skey = key;
3939
    STimeWindow win = {.skey = key, .ekey = pQuery->window.ekey};
3940

3941 3942 3943 3944 3945
    // for too small query range, no data in this interval.
    if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey < pQuery->window.skey)) ||
        (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey < pQuery->window.ekey))) {
      return;
    }
3946

3947 3948 3949 3950 3951 3952
    /**
     * In handling the both ascending and descending order super table query, we need to find the first qualified
     * timestamp of this table, and then set the first qualified start timestamp.
     * In ascending query, key is the first qualified timestamp. However, in the descending order query, additional
     * operations involve.
     */
H
Haojun Liao 已提交
3953
    STimeWindow     w = TSWINDOW_INITIALIZER;
3954
    SWindowResInfo *pWindowResInfo = &pTableQueryInfo->windowResInfo;
3955

H
Haojun Liao 已提交
3956 3957
    TSKEY sk = MIN(win.skey, win.ekey);
    TSKEY ek = MAX(win.skey, win.ekey);
H
Haojun Liao 已提交
3958
    getAlignQueryTimeWindow(pQuery, win.skey, sk, ek, &w);
3959
    pWindowResInfo->startTime = pTableQueryInfo->win.skey;  // windowSKey may be 0 in case of 1970 timestamp
3960

3961 3962
    if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
3963
        assert(win.ekey == pQuery->window.ekey);
3964
      }
3965

3966
      pWindowResInfo->prevSKey = w.skey;
3967
    }
3968

3969
    pTableQueryInfo->queryRangeSet = 1;
3970
    pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
3971 3972 3973 3974
  }
}

bool requireTimestamp(SQuery *pQuery) {
3975
  for (int32_t i = 0; i < pQuery->numOfOutput; i++) {
3976
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989
    if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_NEED_TS) != 0) {
      return true;
    }
  }
  return false;
}

bool needPrimaryTimestampCol(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo) {
  /*
   * 1. if skey or ekey locates in this block, we need to load the timestamp column to decide the precise position
   * 2. if there are top/bottom, first_dst/last_dst functions, we need to load timestamp column in any cases;
   */
  STimeWindow *w = &pDataBlockInfo->window;
H
hjxilinx 已提交
3990
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3991

H
hjxilinx 已提交
3992
  bool loadPrimaryTS = (pTableQueryInfo->lastKey >= w->skey && pTableQueryInfo->lastKey <= w->ekey) ||
3993 3994
                       (pQuery->window.ekey >= w->skey && pQuery->window.ekey <= w->ekey) || requireTimestamp(pQuery);

3995 3996 3997
  return loadPrimaryTS;
}

3998
static int32_t doCopyToSData(SQInfo *pQInfo, SWindowResInfo *pResultInfo, int32_t orderType) {
3999 4000
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4001

4002 4003 4004
  int32_t numOfResult = 0;
  int32_t startIdx = 0;
  int32_t step = -1;
4005

4006
  qDebug("QInfo:%p start to copy data from windowResInfo to query buf", pQInfo);
4007
  int32_t totalSet = numOfClosedTimeWindow(pResultInfo);
H
Haojun Liao 已提交
4008
  SResultRow** result = pResultInfo->pResult;
4009

4010
  if (orderType == TSDB_ORDER_ASC) {
4011
    startIdx = pQInfo->groupIndex;
4012 4013
    step = 1;
  } else {  // desc order copy all data
4014
    startIdx = totalSet - pQInfo->groupIndex - 1;
4015 4016
    step = -1;
  }
4017

H
Haojun Liao 已提交
4018 4019
  SGroupResInfo* pGroupResInfo = &pQInfo->groupResInfo;

4020
  for (int32_t i = startIdx; (i < totalSet) && (i >= 0); i += step) {
4021
    if (result[i]->numOfRows == 0) {
4022
      pQInfo->groupIndex += 1;
4023
      pGroupResInfo->rowId = 0;
4024 4025
      continue;
    }
4026

4027
    int32_t numOfRowsToCopy = result[i]->numOfRows - pGroupResInfo->rowId;
4028
    int32_t oldOffset = pGroupResInfo->rowId;
4029

4030
    /*
H
Haojun Liao 已提交
4031 4032
     * current output space is not enough to accommodate all data of this page, only partial results
     * will be copied to SQuery object's result buffer
4033
     */
4034
    if (numOfRowsToCopy > pQuery->rec.capacity - numOfResult) {
H
Haojun Liao 已提交
4035
      numOfRowsToCopy = (int32_t) pQuery->rec.capacity - numOfResult;
4036
      pGroupResInfo->rowId += numOfRowsToCopy;
4037
    } else {
4038
      pGroupResInfo->rowId = 0;
4039
      pQInfo->groupIndex += 1;
4040
    }
4041

4042
    tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, result[i]->pageId);
H
Haojun Liao 已提交
4043

4044
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
4045
      int32_t size = pRuntimeEnv->pCtx[j].outputBytes;
4046

4047
      char *out = pQuery->sdata[j]->data + numOfResult * size;
4048
      char *in = getPosInResultPage(pRuntimeEnv, j, result[i], page);
4049 4050
      memcpy(out, in + oldOffset * size, size * numOfRowsToCopy);
    }
4051

4052
    numOfResult += numOfRowsToCopy;
4053 4054 4055
    if (numOfResult == pQuery->rec.capacity) {
      break;
    }
4056
  }
4057

4058
  qDebug("QInfo:%p copy data to query buf completed", pQInfo);
4059 4060

#ifdef _DEBUG_VIEW
4061
  displayInterResult(pQuery->sdata, pRuntimeEnv, numOfResult);
4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074
#endif
  return numOfResult;
}

/**
 * copyFromWindowResToSData support copy data in ascending/descending order
 * For interval query of both super table and table, copy the data in ascending order, since the output results are
 * ordered in SWindowResutl already. While handling the group by query for both table and super table,
 * all group result are completed already.
 *
 * @param pQInfo
 * @param result
 */
4075
void copyFromWindowResToSData(SQInfo *pQInfo, SWindowResInfo *pResultInfo) {
4076
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
4077

4078
  int32_t orderType = (pQuery->pGroupbyExpr != NULL) ? pQuery->pGroupbyExpr->orderType : TSDB_ORDER_ASC;
4079
  int32_t numOfResult = doCopyToSData(pQInfo, pResultInfo, orderType);
4080

4081
  pQuery->rec.rows += numOfResult;
4082

4083
  assert(pQuery->rec.rows <= pQuery->rec.capacity);
4084 4085
}

H
Haojun Liao 已提交
4086
static void updateWindowResNumOfRes(SQueryRuntimeEnv *pRuntimeEnv) {
4087
  SQuery *pQuery = pRuntimeEnv->pQuery;
4088

4089
  // update the number of result for each, only update the number of rows for the corresponding window result.
H
Haojun Liao 已提交
4090 4091 4092
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
    return;
  }
4093

H
Haojun Liao 已提交
4094
  for (int32_t i = 0; i < pRuntimeEnv->windowResInfo.size; ++i) {
H
Haojun Liao 已提交
4095
    SResultRow *pResult = pRuntimeEnv->windowResInfo.pResult[i];
4096

H
Haojun Liao 已提交
4097 4098 4099 4100
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
      int32_t functionId = pRuntimeEnv->pCtx[j].functionId;
      if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
        continue;
4101
      }
H
Haojun Liao 已提交
4102

H
Haojun Liao 已提交
4103
      pResult->numOfRows = (uint16_t)(MAX(pResult->numOfRows, pResult->pCellInfo[j].numOfRes));
4104 4105 4106 4107
    }
  }
}

H
Haojun Liao 已提交
4108
static void stableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo, SDataStatis *pStatis,
4109
    SArray *pDataBlock, __block_search_fn_t searchFn) {
4110
  SQuery *         pQuery = pRuntimeEnv->pQuery;
4111
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4112

4113
  SWindowResInfo * pWindowResInfo = &pTableQueryInfo->windowResInfo;
H
hjxilinx 已提交
4114
  pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : pDataBlockInfo->rows - 1;
4115

H
Haojun Liao 已提交
4116
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->groupbyNormalCol) {
4117
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
4118
  } else {
4119
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
4120 4121 4122
  }
}

H
Haojun Liao 已提交
4123
bool queryHasRemainResForTableQuery(SQueryRuntimeEnv* pRuntimeEnv) {
4124 4125
  SQuery *pQuery = pRuntimeEnv->pQuery;
  SFillInfo *pFillInfo = pRuntimeEnv->pFillInfo;
4126

H
Haojun Liao 已提交
4127
  if (pQuery->limit.limit > 0 && pQuery->rec.total >= pQuery->limit.limit) {
4128 4129
    return false;
  }
4130

4131
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
H
Haojun Liao 已提交
4132
    // There are results not returned to client yet, so filling applied to the remain result is required firstly.
H
Haojun Liao 已提交
4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147
    int32_t remain = taosNumOfRemainRows(pFillInfo);
    if (remain > 0) {
      return true;
    }

    /*
     * While the code reaches here, there are no results remains now.
     * If query is not completed yet, the gaps between two results blocks need to be handled after next data block
     * is retrieved from TSDB.
     *
     * NOTE: If the result set is not the first block, the gap in front of the result set will be filled. If the result
     * set is the FIRST result block, the gap between the start time of query time window and the timestamp of the
     * first result row in the actual result set will fill nothing.
     */
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
S
TD-1057  
Shengliang Guan 已提交
4148
      int32_t numOfTotal = (int32_t)getFilledNumOfRes(pFillInfo, pQuery->window.ekey, (int32_t)pQuery->rec.capacity);
H
Haojun Liao 已提交
4149 4150 4151 4152 4153 4154
      return numOfTotal > 0;
    }

  } else {
    // there are results waiting for returned to client.
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED) &&
H
Haojun Liao 已提交
4155
        (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) &&
H
Haojun Liao 已提交
4156 4157 4158
        (pRuntimeEnv->windowResInfo.size > 0)) {
      return true;
    }
4159
  }
4160 4161

  return false;
4162 4163 4164
}

static void doCopyQueryResultToMsg(SQInfo *pQInfo, int32_t numOfRows, char *data) {
4165
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
4166

4167 4168
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
    int32_t bytes = pQuery->pSelectExpr[col].bytes;
4169

4170 4171 4172
    memmove(data, pQuery->sdata[col]->data, bytes * numOfRows);
    data += bytes * numOfRows;
  }
4173

weixin_48148422's avatar
weixin_48148422 已提交
4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185
  int32_t numOfTables = (int32_t)taosArrayGetSize(pQInfo->arrTableIdInfo);
  *(int32_t*)data = htonl(numOfTables);
  data += sizeof(int32_t);
  for(int32_t i = 0; i < numOfTables; i++) {
    STableIdInfo* pSrc = taosArrayGet(pQInfo->arrTableIdInfo, i);
    STableIdInfo* pDst = (STableIdInfo*)data;
    pDst->uid = htobe64(pSrc->uid);
    pDst->tid = htonl(pSrc->tid);
    pDst->key = htobe64(pSrc->key);
    data += sizeof(STableIdInfo);
  }

H
Haojun Liao 已提交
4186
  // Check if query is completed or not for stable query or normal table query respectively.
H
hjxilinx 已提交
4187
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4188
    if (pQInfo->runtimeEnv.stableQuery) {
H
Haojun Liao 已提交
4189
      if (IS_STASBLE_QUERY_OVER(pQInfo)) {
4190 4191 4192
        setQueryStatus(pQuery, QUERY_OVER);
      }
    } else {
H
Haojun Liao 已提交
4193
      if (!queryHasRemainResForTableQuery(&pQInfo->runtimeEnv)) {
4194 4195
        setQueryStatus(pQuery, QUERY_OVER);
      }
4196
    }
H
hjxilinx 已提交
4197
  }
4198 4199
}

H
Haojun Liao 已提交
4200
int32_t doFillGapsInResults(SQueryRuntimeEnv* pRuntimeEnv, tFilePage **pDst, int32_t *numOfFilled) {
4201
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
4202
  SQuery *pQuery = pRuntimeEnv->pQuery;
4203
  SFillInfo* pFillInfo = pRuntimeEnv->pFillInfo;
4204

4205
  while (1) {
S
TD-1057  
Shengliang Guan 已提交
4206
    int32_t ret = (int32_t)taosGenerateDataBlock(pFillInfo, (tFilePage**)pQuery->sdata, (int32_t)pQuery->rec.capacity);
4207

4208
    // todo apply limit output function
4209 4210
    /* reached the start position of according to offset value, return immediately */
    if (pQuery->limit.offset == 0) {
4211
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows", pQInfo, pFillInfo->numOfRows, ret);
4212 4213
      return ret;
    }
4214

4215
    if (pQuery->limit.offset < ret) {
4216
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%" PRId64 ". Discard due to offset, remain:%" PRId64 ", new offset:%d",
4217
             pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, ret - pQuery->limit.offset, 0);
4218

S
TD-1057  
Shengliang Guan 已提交
4219
      ret -= (int32_t)pQuery->limit.offset;
4220 4221
      // todo !!!!there exactly number of interpo is not valid.
      // todo refactor move to the beginning of buffer
4222 4223 4224
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        memmove(pDst[i]->data, pDst[i]->data + pQuery->pSelectExpr[i].bytes * pQuery->limit.offset,
                ret * pQuery->pSelectExpr[i].bytes);
4225
      }
4226

4227 4228 4229
      pQuery->limit.offset = 0;
      return ret;
    } else {
4230
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%" PRId64 ". Discard due to offset, "
B
Bomin Zhang 已提交
4231
             "remain:%d, new offset:%" PRId64, pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, 0,
4232
          pQuery->limit.offset - ret);
4233

4234
      pQuery->limit.offset -= ret;
4235
      pQuery->rec.rows = 0;
4236 4237
      ret = 0;
    }
4238

H
Haojun Liao 已提交
4239
    if (!queryHasRemainResForTableQuery(pRuntimeEnv)) {
4240 4241 4242 4243 4244
      return ret;
    }
  }
}

4245
static void queryCostStatis(SQInfo *pQInfo) {
4246
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4247
  SQueryCostInfo *pSummary = &pRuntimeEnv->summary;
H
Haojun Liao 已提交
4248

H
Haojun Liao 已提交
4249
  uint64_t hashSize = taosHashGetMemSize(pQInfo->runtimeEnv.pWindowHashTable);
H
Haojun Liao 已提交
4250 4251 4252
  hashSize += taosHashGetMemSize(pQInfo->tableqinfoGroupInfo.map);
  pSummary->hashSize = hashSize;

H
Haojun Liao 已提交
4253 4254 4255
  // add the merge time
  pSummary->elapsedTime += pSummary->firstStageMergeTime;

4256 4257 4258 4259
  SWindowResultPool* p = pQInfo->runtimeEnv.pool;
  pSummary->winInfoSize = getWindowResultPoolMemSize(p);
  pSummary->numOfTimeWindows = getNumOfAllocatedWindowResult(p);

H
Haojun Liao 已提交
4260 4261 4262
  qDebug("QInfo:%p :cost summary: elapsed time:%"PRId64" us, first merge:%"PRId64" us, total blocks:%d, "
         "load block statis:%d, load data block:%d, total rows:%"PRId64 ", check rows:%"PRId64,
         pQInfo, pSummary->elapsedTime, pSummary->firstStageMergeTime, pSummary->totalBlocks, pSummary->loadBlockStatis,
H
Haojun Liao 已提交
4263
         pSummary->loadBlocks, pSummary->totalRows, pSummary->totalCheckedRows);
4264

4265
  qDebug("QInfo:%p :cost summary: winResPool size:%.2f Kb, numOfWin:%"PRId64", tableInfoSize:%.2f Kb, hashTable:%.2f Kb", pQInfo, pSummary->winInfoSize/1024.0,
H
Haojun Liao 已提交
4266
      pSummary->numOfTimeWindows, pSummary->tableInfoSize/1024.0, pSummary->hashSize/1024.0);
4267 4268
}

4269 4270
static void updateOffsetVal(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pBlockInfo) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
4271
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4272

4273
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
4274

4275
  if (pQuery->limit.offset == pBlockInfo->rows) {  // current block will ignore completed
H
hjxilinx 已提交
4276
    pTableQueryInfo->lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->window.ekey + step : pBlockInfo->window.skey + step;
4277 4278 4279
    pQuery->limit.offset = 0;
    return;
  }
4280

4281
  if (QUERY_IS_ASC_QUERY(pQuery)) {
S
TD-1057  
Shengliang Guan 已提交
4282
    pQuery->pos = (int32_t)pQuery->limit.offset;
4283
  } else {
S
TD-1057  
Shengliang Guan 已提交
4284
    pQuery->pos = pBlockInfo->rows - (int32_t)pQuery->limit.offset - 1;
4285
  }
4286

4287
  assert(pQuery->pos >= 0 && pQuery->pos <= pBlockInfo->rows - 1);
4288

4289
  SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
4290
  SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);
4291

4292
  // update the pQuery->limit.offset value, and pQuery->pos value
H
Haojun Liao 已提交
4293
  TSKEY *keys = (TSKEY *) pColInfoData->pData;
4294 4295

  // update the offset value
H
hjxilinx 已提交
4296
  pTableQueryInfo->lastKey = keys[pQuery->pos];
4297
  pQuery->limit.offset = 0;
4298

H
hjxilinx 已提交
4299
  int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, pBlockInfo, NULL, binarySearchForKey, pDataBlock);
4300

4301
  qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
4302
         pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows, numOfRes, pQuery->current->lastKey);
4303
}
4304

4305 4306 4307 4308 4309
void skipBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;

  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0) {
    return;
4310
  }
4311

4312 4313 4314
  pQuery->pos = 0;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

H
hjxilinx 已提交
4315
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4316
  TsdbQueryHandleT pQueryHandle = pRuntimeEnv->pQueryHandle;
4317

H
Haojun Liao 已提交
4318
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4319
  while (tsdbNextDataBlock(pQueryHandle)) {
H
Haojun Liao 已提交
4320
    if (IS_QUERY_KILLED(GET_QINFO_ADDR(pRuntimeEnv))) {
H
Haojun Liao 已提交
4321
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4322
    }
4323

H
Haojun Liao 已提交
4324
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
4325

4326 4327
    if (pQuery->limit.offset > blockInfo.rows) {
      pQuery->limit.offset -= blockInfo.rows;
H
hjxilinx 已提交
4328 4329
      pTableQueryInfo->lastKey = (QUERY_IS_ASC_QUERY(pQuery)) ? blockInfo.window.ekey : blockInfo.window.skey;
      pTableQueryInfo->lastKey += step;
4330

4331
      qDebug("QInfo:%p skip rows:%d, offset:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv), blockInfo.rows,
4332 4333
             pQuery->limit.offset);
    } else {  // find the appropriated start position in current block
4334 4335 4336
      updateOffsetVal(pRuntimeEnv, &blockInfo);
      break;
    }
4337
  }
H
Haojun Liao 已提交
4338 4339 4340 4341

  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }
4342
}
4343

H
Haojun Liao 已提交
4344
static bool skipTimeInterval(SQueryRuntimeEnv *pRuntimeEnv, TSKEY* start) {
4345
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
4346
  *start = pQuery->current->lastKey;
4347

4348
  // if queried with value filter, do NOT forward query start position
4349
  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->pFillInfo != NULL) {
4350
    return true;
4351
  }
4352

4353
  /*
4354 4355
   * 1. for interval without interpolation query we forward pQuery->interval.interval at a time for
   *    pQuery->limit.offset times. Since hole exists, pQuery->interval.interval*pQuery->limit.offset value is
4356 4357
   *    not valid. otherwise, we only forward pQuery->limit.offset number of points
   */
4358
  assert(pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL);
4359

H
Haojun Liao 已提交
4360
  STimeWindow w = TSWINDOW_INITIALIZER;
4361

4362
  SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
hjxilinx 已提交
4363
  STableQueryInfo *pTableQueryInfo = pQuery->current;
4364

H
Haojun Liao 已提交
4365
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4366
  while (tsdbNextDataBlock(pRuntimeEnv->pQueryHandle)) {
H
Haojun Liao 已提交
4367
    tsdbRetrieveDataBlockInfo(pRuntimeEnv->pQueryHandle, &blockInfo);
4368

H
Haojun Liao 已提交
4369 4370
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
H
Haojun Liao 已提交
4371
        getAlignQueryTimeWindow(pQuery, blockInfo.window.skey, blockInfo.window.skey, pQuery->window.ekey, &w);
H
Haojun Liao 已提交
4372 4373 4374
        pWindowResInfo->startTime = w.skey;
        pWindowResInfo->prevSKey = w.skey;
      }
4375
    } else {
H
Haojun Liao 已提交
4376
      getAlignQueryTimeWindow(pQuery, blockInfo.window.ekey, pQuery->window.ekey, blockInfo.window.ekey, &w);
4377

4378 4379 4380
      pWindowResInfo->startTime = pQuery->window.skey;
      pWindowResInfo->prevSKey = w.skey;
    }
4381

4382 4383
    // the first time window
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, pWindowResInfo->prevSKey, pQuery);
4384

4385 4386 4387 4388 4389 4390
    while (pQuery->limit.offset > 0) {
      if ((win.ekey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (win.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        pQuery->limit.offset -= 1;
        pWindowResInfo->prevSKey = win.skey;
      }
4391

4392
      STimeWindow tw = win;
H
Haojun Liao 已提交
4393
      GET_NEXT_TIMEWINDOW(pQuery, &tw);
4394

4395
      if (pQuery->limit.offset == 0) {
4396 4397
        if ((tw.skey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
            (tw.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
H
Haojun Liao 已提交
4398 4399
          // load the data block and check data remaining in current data block
          // TODO optimize performance
4400 4401 4402
          SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
          SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

4403 4404
          tw = win;
          int32_t startPos =
H
Haojun Liao 已提交
4405
              getNextQualifiedWindow(pRuntimeEnv, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey, -1);
4406 4407 4408 4409
          assert(startPos >= 0);

          // set the abort info
          pQuery->pos = startPos;
4410

H
Haojun Liao 已提交
4411 4412 4413 4414
          // reset the query start timestamp
          pTableQueryInfo->win.skey = ((TSKEY *)pColInfoData->pData)[startPos];
          pQuery->window.skey = pTableQueryInfo->win.skey;
          *start = pTableQueryInfo->win.skey;
4415

4416
          pWindowResInfo->prevSKey = tw.skey;
H
Haojun Liao 已提交
4417
          int32_t index = pRuntimeEnv->windowResInfo.curIndex;
4418

H
hjxilinx 已提交
4419
          int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, NULL, binarySearchForKey, pDataBlock);
H
Haojun Liao 已提交
4420
          pRuntimeEnv->windowResInfo.curIndex = index;  // restore the window index
4421

4422
          qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64,
4423
                 GET_QINFO_ADDR(pRuntimeEnv), blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes, pQuery->current->lastKey);
4424

4425
          return true;
H
Haojun Liao 已提交
4426 4427 4428 4429
        } else { // do nothing
          *start = tw.skey;
          pQuery->window.skey = tw.skey;
          pWindowResInfo->prevSKey = tw.skey;
4430
          return true;
4431 4432 4433
        }
      }

H
Haojun Liao 已提交
4434 4435 4436 4437 4438 4439 4440
      /*
       * If the next time window still starts from current data block,
       * load the primary timestamp column first, and then find the start position for the next queried time window.
       * Note that only the primary timestamp column is required.
       * TODO: Optimize for this cases. All data blocks are not needed to be loaded, only if the first actually required
       * time window resides in current data block.
       */
4441 4442 4443 4444 4445 4446 4447
      if ((tw.skey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (tw.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
        SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

        tw = win;
        int32_t startPos =
H
Haojun Liao 已提交
4448
            getNextQualifiedWindow(pRuntimeEnv, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey, -1);
4449 4450 4451 4452
        assert(startPos >= 0);

        // set the abort info
        pQuery->pos = startPos;
H
hjxilinx 已提交
4453
        pTableQueryInfo->lastKey = ((TSKEY *)pColInfoData->pData)[startPos];
4454 4455
        pWindowResInfo->prevSKey = tw.skey;
        win = tw;
4456
      } else {
H
Haojun Liao 已提交
4457
        break;  // offset is not 0, and next time window begins or ends in the next block.
4458 4459 4460
      }
    }
  }
4461

H
Haojun Liao 已提交
4462 4463 4464 4465 4466
  // check for error
  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }

4467 4468 4469
  return true;
}

H
Haojun Liao 已提交
4470 4471
static void doDestroyTableQueryInfo(STableGroupInfo* pTableqinfoGroupInfo);

B
Bomin Zhang 已提交
4472
static int32_t setupQueryHandle(void* tsdb, SQInfo* pQInfo, bool isSTableQuery) {
B
Bomin Zhang 已提交
4473
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4474 4475
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

B
Bomin Zhang 已提交
4476
  if (onlyQueryTags(pQuery)) {
B
Bomin Zhang 已提交
4477
    return TSDB_CODE_SUCCESS;
B
Bomin Zhang 已提交
4478 4479
  }

H
Haojun Liao 已提交
4480
  if (isSTableQuery && (!QUERY_IS_INTERVAL_QUERY(pQuery)) && (!isFixedOutputQuery(pRuntimeEnv))) {
B
Bomin Zhang 已提交
4481
    return TSDB_CODE_SUCCESS;
B
Bomin Zhang 已提交
4482
  }
4483 4484

  STsdbQueryCond cond = {
B
Bomin Zhang 已提交
4485 4486 4487
    .order   = pQuery->order.order,
    .colList = pQuery->colList,
    .numOfCols = pQuery->numOfCols,
4488
  };
weixin_48148422's avatar
weixin_48148422 已提交
4489

S
TD-1057  
Shengliang Guan 已提交
4490 4491
  TIME_WINDOW_COPY(cond.twindow, pQuery->window);

B
Bomin Zhang 已提交
4492
  if (!isSTableQuery
4493
    && (pQInfo->tableqinfoGroupInfo.numOfTables == 1)
4494
    && (cond.order == TSDB_ORDER_ASC)
H
Haojun Liao 已提交
4495
    && (!QUERY_IS_INTERVAL_QUERY(pQuery))
B
Bomin Zhang 已提交
4496
    && (!isGroupbyNormalCol(pQuery->pGroupbyExpr))
H
Haojun Liao 已提交
4497
    && (!isFixedOutputQuery(pRuntimeEnv))
B
Bomin Zhang 已提交
4498
  ) {
H
Haojun Liao 已提交
4499
    SArray* pa = GET_TABLEGROUP(pQInfo, 0);
4500 4501
    STableQueryInfo* pCheckInfo = taosArrayGetP(pa, 0);
    cond.twindow = pCheckInfo->win;
4502
  }
B
Bomin Zhang 已提交
4503

B
Bomin Zhang 已提交
4504
  terrno = TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
4505
  if (isFirstLastRowQuery(pQuery)) {
4506
    pRuntimeEnv->pQueryHandle = tsdbQueryLastRow(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
H
Haojun Liao 已提交
4507

H
Haojun Liao 已提交
4508 4509 4510
    // update the query time window
    pQuery->window = cond.twindow;

H
Haojun Liao 已提交
4511
    if (pQInfo->tableGroupInfo.numOfTables == 0) {
H
Haojun Liao 已提交
4512
      pQInfo->tableqinfoGroupInfo.numOfTables = 0;
H
Haojun Liao 已提交
4513 4514 4515 4516
    } else {
      size_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
      for(int32_t i = 0; i < numOfGroups; ++i) {
        SArray *group = GET_TABLEGROUP(pQInfo, i);
H
Haojun Liao 已提交
4517

H
Haojun Liao 已提交
4518 4519 4520
        size_t t = taosArrayGetSize(group);
        for (int32_t j = 0; j < t; ++j) {
          STableQueryInfo *pCheckInfo = taosArrayGetP(group, j);
H
Haojun Liao 已提交
4521

H
Haojun Liao 已提交
4522 4523 4524
          pCheckInfo->win = pQuery->window;
          pCheckInfo->lastKey = pCheckInfo->win.skey;
        }
H
Haojun Liao 已提交
4525 4526
      }
    }
4527
  } else if (isPointInterpoQuery(pQuery)) {
4528
    pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
H
Haojun Liao 已提交
4529
  } else {
4530
    pRuntimeEnv->pQueryHandle = tsdbQueryTables(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
H
Haojun Liao 已提交
4531
  }
4532

B
Bomin Zhang 已提交
4533
  return terrno;
B
Bomin Zhang 已提交
4534 4535
}

4536 4537 4538
static SFillColInfo* taosCreateFillColInfo(SQuery* pQuery) {
  int32_t numOfCols = pQuery->numOfOutput;
  int32_t offset = 0;
4539

4540
  SFillColInfo* pFillCol = calloc(numOfCols, sizeof(SFillColInfo));
H
Haojun Liao 已提交
4541 4542 4543 4544
  if (pFillCol == NULL) {
    return NULL;
  }

4545 4546
  for(int32_t i = 0; i < numOfCols; ++i) {
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];
4547

4548
    pFillCol[i].col.bytes  = pExprInfo->bytes;
S
TD-1057  
Shengliang Guan 已提交
4549
    pFillCol[i].col.type   = (int8_t)pExprInfo->type;
4550 4551 4552
    pFillCol[i].col.offset = offset;
    pFillCol[i].flag       = TSDB_COL_NORMAL;    // always be ta normal column for table query
    pFillCol[i].functionId = pExprInfo->base.functionId;
4553
    pFillCol[i].fillVal.i = pQuery->fillVal[i];
4554

4555 4556
    offset += pExprInfo->bytes;
  }
4557

4558 4559 4560
  return pFillCol;
}

4561
int32_t doInitQInfo(SQInfo *pQInfo, STSBuf *pTsBuf, void *tsdb, int32_t vgId, bool isSTableQuery) {
4562 4563
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;

4564
  int32_t code = TSDB_CODE_SUCCESS;
4565
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
4566 4567 4568

  pRuntimeEnv->topBotQuery = isTopBottomQuery(pQuery);
  pRuntimeEnv->hasTagResults = hasTagValOutput(pQuery);
4569 4570

  setScanLimitationByResultBuffer(pQuery);
H
Haojun Liao 已提交
4571

B
Bomin Zhang 已提交
4572 4573 4574 4575
  code = setupQueryHandle(tsdb, pQInfo, isSTableQuery);
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }
4576

4577
  pQInfo->tsdb = tsdb;
4578
  pQInfo->vgId = vgId;
4579 4580

  pRuntimeEnv->pQuery = pQuery;
H
Haojun Liao 已提交
4581
  pRuntimeEnv->pTSBuf = pTsBuf;
4582
  pRuntimeEnv->cur.vgroupIndex = -1;
4583
  pRuntimeEnv->stableQuery = isSTableQuery;
H
Haojun Liao 已提交
4584
  pRuntimeEnv->prevGroupId = INT32_MIN;
H
Haojun Liao 已提交
4585
  pRuntimeEnv->groupbyNormalCol = isGroupbyNormalCol(pQuery->pGroupbyExpr);
4586

H
Haojun Liao 已提交
4587
  if (pTsBuf != NULL) {
4588 4589 4590 4591 4592 4593 4594 4595 4596 4597
    int16_t order = (pQuery->order.order == pRuntimeEnv->pTSBuf->tsOrder) ? TSDB_ORDER_ASC : TSDB_ORDER_DESC;
    tsBufSetTraverseOrder(pRuntimeEnv->pTSBuf, order);
  }

  // create runtime environment
  code = setupQueryRuntimeEnv(pRuntimeEnv, pQuery->order.order);
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }

4598 4599 4600
  int32_t ps = DEFAULT_PAGE_SIZE;
  int32_t rowsize = 0;
  getIntermediateBufInfo(pRuntimeEnv, &ps, &rowsize);
H
Haojun Liao 已提交
4601
  int32_t TWOMB = 1024*1024*2;
4602

H
Haojun Liao 已提交
4603
  if (isSTableQuery && !onlyQueryTags(pRuntimeEnv->pQuery)) {
H
Haojun Liao 已提交
4604
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rowsize, ps, TWOMB, pQInfo);
4605 4606 4607 4608
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

H
Haojun Liao 已提交
4609
    if (!QUERY_IS_INTERVAL_QUERY(pQuery)) {
4610
      int16_t type = TSDB_DATA_TYPE_NULL;
4611
      int32_t threshold = 0;
4612

H
Haojun Liao 已提交
4613
      if (pRuntimeEnv->groupbyNormalCol) {  // group by columns not tags;
4614
        type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
4615
        threshold = 4000;
4616 4617
      } else {
        type = TSDB_DATA_TYPE_INT;  // group id
S
TD-1057  
Shengliang Guan 已提交
4618
        threshold = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
4619 4620 4621
        if (threshold < 8) {
          threshold = 8;
        }
4622 4623
      }

4624
      code = initWindowResInfo(&pRuntimeEnv->windowResInfo, 8, threshold, type);
B
Bomin Zhang 已提交
4625 4626 4627
      if (code != TSDB_CODE_SUCCESS) {
        return code;
      }
4628
    }
H
Haojun Liao 已提交
4629
  } else if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
4630 4631
    int32_t numOfResultRows = getInitialPageNum(pQInfo);
    getIntermediateBufInfo(pRuntimeEnv, &ps, &rowsize);
H
Haojun Liao 已提交
4632
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rowsize, ps, TWOMB, pQInfo);
4633 4634 4635 4636 4637
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

    int16_t type = TSDB_DATA_TYPE_NULL;
H
Haojun Liao 已提交
4638
    if (pRuntimeEnv->groupbyNormalCol) {
4639 4640 4641 4642 4643
      type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
    } else {
      type = TSDB_DATA_TYPE_TIMESTAMP;
    }

4644
    code = initWindowResInfo(&pRuntimeEnv->windowResInfo, numOfResultRows, 1024, type);
B
Bomin Zhang 已提交
4645 4646 4647
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }
4648 4649
  }

4650
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
4651
    SFillColInfo* pColInfo = taosCreateFillColInfo(pQuery);
H
Haojun Liao 已提交
4652 4653 4654 4655 4656 4657
    STimeWindow w = TSWINDOW_INITIALIZER;

    TSKEY sk = MIN(pQuery->window.skey, pQuery->window.ekey);
    TSKEY ek = MAX(pQuery->window.skey, pQuery->window.ekey);
    getAlignQueryTimeWindow(pQuery, pQuery->window.skey, sk, ek, &w);

S
TD-1057  
Shengliang Guan 已提交
4658
    pRuntimeEnv->pFillInfo = taosInitFillInfo(pQuery->order.order, w.skey, 0, (int32_t)pQuery->rec.capacity, pQuery->numOfOutput,
4659 4660
                                              pQuery->interval.sliding, pQuery->interval.slidingUnit, (int8_t)pQuery->precision,
                                              pQuery->fillType, pColInfo);
4661
  }
4662

H
Haojun Liao 已提交
4663
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
4664
  return TSDB_CODE_SUCCESS;
4665 4666
}

4667
static void enableExecutionForNextTable(SQueryRuntimeEnv *pRuntimeEnv) {
4668
  SQuery *pQuery = pRuntimeEnv->pQuery;
4669

4670
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
4671
    SResultRowCellInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[i]);
4672 4673 4674 4675 4676 4677
    if (pResInfo != NULL) {
      pResInfo->complete = false;
    }
  }
}

H
Haojun Liao 已提交
4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694
static FORCE_INLINE void setEnvForEachBlock(SQInfo* pQInfo, STableQueryInfo* pTableQueryInfo, SDataBlockInfo* pBlockInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

  if (!QUERY_IS_INTERVAL_QUERY(pQuery)) {
    setExecutionContext(pQInfo, pTableQueryInfo->groupIndex, pBlockInfo->window.ekey + step);
  } else {  // interval query
    TSKEY nextKey = pBlockInfo->window.skey;
    setIntervalQueryRange(pQInfo, nextKey);

    if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
      setAdditionalInfo(pQInfo, pTableQueryInfo->pTable, pTableQueryInfo);
    }
  }
}

H
Haojun Liao 已提交
4695
static int64_t scanMultiTableDataBlocks(SQInfo *pQInfo) {
4696
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
Haojun Liao 已提交
4697 4698
  SQuery*           pQuery = pRuntimeEnv->pQuery;
  SQueryCostInfo*   summary  = &pRuntimeEnv->summary;
H
Haojun Liao 已提交
4699

H
hjxilinx 已提交
4700
  int64_t st = taosGetTimestampMs();
4701

4702
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
H
Haojun Liao 已提交
4703
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4704

H
Haojun Liao 已提交
4705 4706
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

H
Haojun Liao 已提交
4707
  while (tsdbNextDataBlock(pQueryHandle)) {
4708
    summary->totalBlocks += 1;
H
Haojun Liao 已提交
4709

H
Haojun Liao 已提交
4710
    if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
4711
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4712
    }
4713

H
Haojun Liao 已提交
4714
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
H
Haojun Liao 已提交
4715 4716 4717 4718
    STableQueryInfo **pTableQueryInfo = (STableQueryInfo**) taosHashGet(pQInfo->tableqinfoGroupInfo.map, &blockInfo.tid, sizeof(blockInfo.tid));
    if(pTableQueryInfo == NULL) {
      break;
    }
4719

H
Haojun Liao 已提交
4720
    pQuery->current = *pTableQueryInfo;
H
Haojun Liao 已提交
4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      assert(
          ((*pTableQueryInfo)->win.skey <= (*pTableQueryInfo)->win.ekey) &&
          ((*pTableQueryInfo)->lastKey >= (*pTableQueryInfo)->win.skey) &&
          ((*pTableQueryInfo)->win.skey >= pQuery->window.skey && (*pTableQueryInfo)->win.ekey <= pQuery->window.ekey));
    } else {
      assert(
          ((*pTableQueryInfo)->win.skey >= (*pTableQueryInfo)->win.ekey) &&
          ((*pTableQueryInfo)->lastKey <= (*pTableQueryInfo)->win.skey) &&
          ((*pTableQueryInfo)->win.skey <= pQuery->window.skey && (*pTableQueryInfo)->win.ekey >= pQuery->window.ekey));
    }
4732

H
Haojun Liao 已提交
4733
    if (!pRuntimeEnv->groupbyNormalCol) {
H
Haojun Liao 已提交
4734
      setEnvForEachBlock(pQInfo, *pTableQueryInfo, &blockInfo);
4735
    }
4736

4737
    uint32_t     status = 0;
H
Haojun Liao 已提交
4738 4739
    SDataStatis *pStatis = NULL;
    SArray      *pDataBlock = NULL;
4740

H
Haojun Liao 已提交
4741
    int32_t ret = loadDataBlockOnDemand(pRuntimeEnv, &pQuery->current->windowResInfo, pQueryHandle, &blockInfo, &pStatis, &pDataBlock, &status);
4742 4743 4744 4745 4746
    if (ret != TSDB_CODE_SUCCESS) {
      break;
    }

    if (status == BLK_DATA_DISCARD) {
H
Haojun Liao 已提交
4747
      pQuery->current->lastKey = QUERY_IS_ASC_QUERY(pQuery)? blockInfo.window.ekey + step : blockInfo.window.skey + step;
H
Haojun Liao 已提交
4748 4749 4750
      continue;
    }

4751 4752
    summary->totalRows += blockInfo.rows;
    stableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, pDataBlock, binarySearchForKey);
H
Haojun Liao 已提交
4753

H
Haojun Liao 已提交
4754 4755 4756 4757
    qDebug("QInfo:%p check data block completed, uid:%"PRId64", tid:%d, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, "
           "lastKey:%" PRId64,
           pQInfo, blockInfo.uid, blockInfo.tid, blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows,
           pQuery->current->lastKey);
4758
  }
4759

H
Haojun Liao 已提交
4760 4761 4762 4763
  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }

H
Haojun Liao 已提交
4764 4765
  updateWindowResNumOfRes(pRuntimeEnv);

H
hjxilinx 已提交
4766 4767
  int64_t et = taosGetTimestampMs();
  return et - st;
4768 4769
}

4770 4771
static bool multiTableMultioutputHelper(SQInfo *pQInfo, int32_t index) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4772
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4773

4774
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
H
Haojun Liao 已提交
4775
  SArray *group = GET_TABLEGROUP(pQInfo, 0);
4776
  STableQueryInfo* pCheckInfo = taosArrayGetP(group, index);
4777

H
Haojun Liao 已提交
4778 4779 4780
  if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
    setTagVal(pRuntimeEnv, pCheckInfo->pTable, pQInfo->tsdb);
  }
4781

H
Haojun Liao 已提交
4782
  STableId* id = TSDB_TABLEID(pCheckInfo->pTable);
4783
  qDebug("QInfo:%p query on (%d): uid:%" PRIu64 ", tid:%d, qrange:%" PRId64 "-%" PRId64, pQInfo, index,
H
Haojun Liao 已提交
4784
         id->uid, id->tid, pCheckInfo->lastKey, pCheckInfo->win.ekey);
4785

4786
  STsdbQueryCond cond = {
4787
      .twindow   = {pCheckInfo->lastKey, pCheckInfo->win.ekey},
H
hjxilinx 已提交
4788 4789
      .order     = pQuery->order.order,
      .colList   = pQuery->colList,
4790
      .numOfCols = pQuery->numOfCols,
4791
  };
4792

H
hjxilinx 已提交
4793
  // todo refactor
4794
  SArray *g1 = taosArrayInit(1, POINTER_BYTES);
H
Haojun Liao 已提交
4795 4796 4797 4798
  SArray *tx = taosArrayInit(1, sizeof(STableKeyInfo));

  STableKeyInfo info = {.pTable = pCheckInfo->pTable, .lastKey = pCheckInfo->lastKey};
  taosArrayPush(tx, &info);
4799

4800
  taosArrayPush(g1, &tx);
4801
  STableGroupInfo gp = {.numOfTables = 1, .pGroupList = g1};
4802

4803
  // include only current table
4804 4805 4806 4807
  if (pRuntimeEnv->pQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
    pRuntimeEnv->pQueryHandle = NULL;
  }
4808

H
Haojun Liao 已提交
4809
  pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo);
4810 4811
  taosArrayDestroy(tx);
  taosArrayDestroy(g1);
B
Bomin Zhang 已提交
4812 4813 4814
  if (pRuntimeEnv->pQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
4815

4816
  if (pRuntimeEnv->pTSBuf != NULL) {
H
Haojun Liao 已提交
4817 4818
      tVariant* pTag = &pRuntimeEnv->pCtx[0].tag;

4819
    if (pRuntimeEnv->cur.vgroupIndex == -1) {
H
Haojun Liao 已提交
4820
      STSElem elem = tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, pQInfo->vgId, pTag);
H
Haojun Liao 已提交
4821
      // failed to find data with the specified tag value and vnodeId
4822
      if (elem.vnode < 0) {
H
Haojun Liao 已提交
4823 4824 4825 4826 4827 4828
        if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
          qError("QInfo:%p failed to find tag:%s in ts_comp", pQInfo, pTag->pz);
        } else {
          qError("QInfo:%p failed to find tag:%"PRId64" in ts_comp", pQInfo, pTag->i64Key);
        }

4829
        return false;
H
Haojun Liao 已提交
4830 4831
      } else {
        STSCursor cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);
H
Haojun Liao 已提交
4832 4833 4834 4835 4836 4837 4838 4839

        if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
          qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz,
                 cur.blockIndex, cur.tsIndex);
        } else {
          qDebug("QInfo:%p find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key,
                 cur.blockIndex, cur.tsIndex);
        }
4840 4841
      }
    } else {
H
Haojun Liao 已提交
4842
      STSElem elem = tsBufGetElem(pRuntimeEnv->pTSBuf);
H
Haojun Liao 已提交
4843
      if (tVariantCompare(elem.tag, &pRuntimeEnv->pCtx[0].tag) != 0) {
H
Haojun Liao 已提交
4844

H
Haojun Liao 已提交
4845
        STSElem elem1 = tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, pQInfo->vgId, pTag);
H
Haojun Liao 已提交
4846
        // failed to find data with the specified tag value and vnodeId
H
Haojun Liao 已提交
4847
        if (elem1.vnode < 0) {
H
Haojun Liao 已提交
4848 4849 4850 4851 4852
          if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
            qError("QInfo:%p failed to find tag:%s in ts_comp", pQInfo, pTag->pz);
          } else {
            qError("QInfo:%p failed to find tag:%"PRId64" in ts_comp", pQInfo, pTag->i64Key);
          }
H
Haojun Liao 已提交
4853

H
Haojun Liao 已提交
4854
          return false;
H
Haojun Liao 已提交
4855 4856
        } else {
          STSCursor cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);
H
Haojun Liao 已提交
4857 4858 4859 4860 4861
          if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
            qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, cur.blockIndex, cur.tsIndex);
          } else {
            qDebug("QInfo:%p find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key, cur.blockIndex, cur.tsIndex);
          }
H
Haojun Liao 已提交
4862
        }
H
Haojun Liao 已提交
4863

H
Haojun Liao 已提交
4864 4865
      } else {
        tsBufSetCursor(pRuntimeEnv->pTSBuf, &pRuntimeEnv->cur);
H
Haojun Liao 已提交
4866
        STSCursor cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);
H
Haojun Liao 已提交
4867 4868 4869 4870 4871
        if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
          qDebug("QInfo:%p continue scan ts_comp file, tag:%s blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, cur.blockIndex, cur.tsIndex);
        } else {
          qDebug("QInfo:%p continue scan ts_comp file, tag:%"PRId64" blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key, cur.blockIndex, cur.tsIndex);
        }
H
Haojun Liao 已提交
4872
      }
4873 4874
    }
  }
4875

4876
  initCtxOutputBuf(pRuntimeEnv);
4877 4878 4879 4880 4881 4882 4883 4884 4885 4886
  return true;
}

/**
 * super table query handler
 * 1. super table projection query, group-by on normal columns query, ts-comp query
 * 2. point interpolation query, last row query
 *
 * @param pQInfo
 */
4887
static void sequentialTableProcess(SQInfo *pQInfo) {
4888
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4889
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4890
  setQueryStatus(pQuery, QUERY_COMPLETED);
4891

H
Haojun Liao 已提交
4892
  size_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
4893

H
Haojun Liao 已提交
4894
  if (isPointInterpoQuery(pQuery) || isFirstLastRowQuery(pQuery)) {
4895 4896
    resetCtxOutputBuf(pRuntimeEnv);
    assert(pQuery->limit.offset == 0 && pQuery->limit.limit != 0);
4897

4898
    while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
4899
      SArray* group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex);
4900

S
TD-1057  
Shengliang Guan 已提交
4901
      qDebug("QInfo:%p last_row query on group:%d, total group:%" PRIzu ", current group:%p", pQInfo, pQInfo->groupIndex,
dengyihao's avatar
dengyihao 已提交
4902
             numOfGroups, group);
H
Haojun Liao 已提交
4903 4904 4905 4906 4907 4908 4909

      STsdbQueryCond cond = {
          .colList = pQuery->colList,
          .order   = pQuery->order.order,
          .numOfCols = pQuery->numOfCols,
      };

S
TD-1057  
Shengliang Guan 已提交
4910 4911
      TIME_WINDOW_COPY(cond.twindow, pQuery->window);

H
Haojun Liao 已提交
4912 4913 4914
      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);
4915

H
Haojun Liao 已提交
4916 4917 4918 4919 4920 4921 4922
      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }
4923

4924
      if (isFirstLastRowQuery(pQuery)) {
H
Haojun Liao 已提交
4925
        assert(0);  // last_row query switch to other routine to handle
H
Haojun Liao 已提交
4926
      } else {
H
Haojun Liao 已提交
4927
        pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(pQInfo->tsdb, &cond, &gp, pQInfo);
4928
      }
B
Bomin Zhang 已提交
4929 4930 4931 4932 4933 4934

      taosArrayDestroy(tx);
      taosArrayDestroy(g1);
      if (pRuntimeEnv->pQueryHandle == NULL) {
        longjmp(pRuntimeEnv->env, terrno);
      }
H
Haojun Liao 已提交
4935

H
Haojun Liao 已提交
4936
      initCtxOutputBuf(pRuntimeEnv);
4937

4938
      SArray* s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle);
4939
      assert(taosArrayGetSize(s) >= 1);
4940

4941
      setTagVal(pRuntimeEnv, taosArrayGetP(s, 0), pQInfo->tsdb);
4942 4943 4944
      if (isFirstLastRowQuery(pQuery)) {
        assert(taosArrayGetSize(s) == 1);
      }
H
Haojun Liao 已提交
4945

dengyihao's avatar
dengyihao 已提交
4946
      taosArrayDestroy(s);
H
Haojun Liao 已提交
4947

H
Haojun Liao 已提交
4948
      // here we simply set the first table as current table
4949 4950 4951
      SArray* first = GET_TABLEGROUP(pQInfo, pQInfo->groupIndex);
      pQuery->current = taosArrayGetP(first, 0);

4952
      scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
4953

H
Haojun Liao 已提交
4954 4955 4956 4957 4958
      int64_t numOfRes = getNumOfResult(pRuntimeEnv);
      if (numOfRes > 0) {
        pQuery->rec.rows += numOfRes;
        forwardCtxOutputBuf(pRuntimeEnv, numOfRes);
      }
4959

H
Haojun Liao 已提交
4960 4961 4962 4963 4964
      skipResults(pRuntimeEnv);
      pQInfo->groupIndex += 1;

      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
4965 4966 4967 4968 4969 4970

      if (pQuery->rec.rows >= pQuery->rec.capacity) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
        break;
      }
    }
H
Haojun Liao 已提交
4971
  } else if (pRuntimeEnv->groupbyNormalCol) { // group-by on normal columns query
4972
    while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
4973
      SArray* group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex);
4974

S
TD-1057  
Shengliang Guan 已提交
4975
      qDebug("QInfo:%p group by normal columns group:%d, total group:%" PRIzu "", pQInfo, pQInfo->groupIndex, numOfGroups);
4976 4977 4978 4979 4980 4981 4982

      STsdbQueryCond cond = {
          .colList = pQuery->colList,
          .order   = pQuery->order.order,
          .numOfCols = pQuery->numOfCols,
      };

S
TD-1057  
Shengliang Guan 已提交
4983 4984
      TIME_WINDOW_COPY(cond.twindow, pQuery->window);

4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996
      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);

      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }

H
Haojun Liao 已提交
4997
      // no need to update the lastkey for each table
4998
      pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo);
H
Haojun Liao 已提交
4999

B
Bomin Zhang 已提交
5000 5001
      taosArrayDestroy(g1);
      taosArrayDestroy(tx);
B
Bomin Zhang 已提交
5002 5003 5004
      if (pRuntimeEnv->pQueryHandle == NULL) {
        longjmp(pRuntimeEnv->env, terrno);
      }
5005

5006
      SArray* s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle);
5007 5008
      assert(taosArrayGetSize(s) >= 1);

5009
      setTagVal(pRuntimeEnv, taosArrayGetP(s, 0), pQInfo->tsdb);
5010 5011 5012 5013 5014 5015 5016 5017

      // here we simply set the first table as current table
      scanMultiTableDataBlocks(pQInfo);
      pQInfo->groupIndex += 1;

      SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;

        // no results generated for current group, continue to try the next group
5018
      taosArrayDestroy(s);
5019 5020 5021 5022 5023
      if (pWindowResInfo->size <= 0) {
        continue;
      }

      for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
5024
        pWindowResInfo->pResult[i]->closed = true; // enable return all results for group by normal columns
5025

H
Haojun Liao 已提交
5026
        SResultRow *pResult = pWindowResInfo->pResult[i];
5027
        for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
5028
          pResult->numOfRows = (uint16_t)(MAX(pResult->numOfRows, pResult->pCellInfo[j].numOfRes));
5029 5030 5031
        }
      }

5032
      qDebug("QInfo:%p generated groupby columns results %d rows for group %d completed", pQInfo, pWindowResInfo->size,
5033 5034 5035 5036 5037 5038 5039
          pQInfo->groupIndex);
      int32_t currentGroupIndex = pQInfo->groupIndex;

      pQuery->rec.rows = 0;
      pQInfo->groupIndex = 0;

      ensureOutputBufferSimple(pRuntimeEnv, pWindowResInfo->size);
5040
      copyFromWindowResToSData(pQInfo, pWindowResInfo);
5041 5042 5043 5044 5045 5046

      pQInfo->groupIndex = currentGroupIndex;  //restore the group index
      assert(pQuery->rec.rows == pWindowResInfo->size);

      clearClosedTimeWindow(pRuntimeEnv);
      break;
5047 5048 5049
    }
  } else {
    /*
5050
     * 1. super table projection query, 2. ts-comp query
5051 5052 5053
     * if the subgroup index is larger than 0, results generated by group by tbname,k is existed.
     * we need to return it to client in the first place.
     */
5054
    if (pQInfo->groupIndex > 0) {
5055
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5056
      pQuery->rec.total += pQuery->rec.rows;
5057

5058
      if (pQuery->rec.rows > 0) {
5059 5060 5061
        return;
      }
    }
5062

5063
    // all data have returned already
5064
    if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
5065 5066
      return;
    }
5067

5068 5069
    resetCtxOutputBuf(pRuntimeEnv);
    resetTimeWindowInfo(pRuntimeEnv, &pRuntimeEnv->windowResInfo);
5070

H
Haojun Liao 已提交
5071
    SArray *group = GET_TABLEGROUP(pQInfo, 0);
5072 5073
    assert(taosArrayGetSize(group) == pQInfo->tableqinfoGroupInfo.numOfTables &&
           1 == taosArrayGetSize(pQInfo->tableqinfoGroupInfo.pGroupList));
5074

5075
    while (pQInfo->tableIndex < pQInfo->tableqinfoGroupInfo.numOfTables) {
H
Haojun Liao 已提交
5076
      if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
5077
        longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
5078
      }
5079

5080
      pQuery->current = taosArrayGetP(group, pQInfo->tableIndex);
5081
      if (!multiTableMultioutputHelper(pQInfo, pQInfo->tableIndex)) {
5082
        pQInfo->tableIndex++;
5083 5084
        continue;
      }
5085

H
hjxilinx 已提交
5086
      // TODO handle the limit offset problem
5087
      if (pQuery->numOfFilterCols == 0 && pQuery->limit.offset > 0) {
5088 5089
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
          pQInfo->tableIndex++;
5090 5091 5092
          continue;
        }
      }
5093

5094
      scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
5095
      skipResults(pRuntimeEnv);
5096

5097
      // the limitation of output result is reached, set the query completed
5098
      if (limitResults(pRuntimeEnv)) {
H
Haojun Liao 已提交
5099
        SET_STABLE_QUERY_OVER(pQInfo);
5100 5101
        break;
      }
5102

5103 5104
      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
5105

5106
      if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
5107 5108 5109 5110 5111 5112
        /*
         * query range is identical in terms of all meters involved in query,
         * so we need to restore them at the *beginning* of query on each meter,
         * not the consecutive query on meter on which is aborted due to buffer limitation
         * to ensure that, we can reset the query range once query on a meter is completed.
         */
5113
        pQInfo->tableIndex++;
weixin_48148422's avatar
weixin_48148422 已提交
5114

H
Haojun Liao 已提交
5115
        STableIdInfo tidInfo = {0};
5116

H
Haojun Liao 已提交
5117 5118 5119
        STableId* id = TSDB_TABLEID(pQuery->current->pTable);
        tidInfo.uid = id->uid;
        tidInfo.tid = id->tid;
weixin_48148422's avatar
weixin_48148422 已提交
5120
        tidInfo.key = pQuery->current->lastKey;
weixin_48148422's avatar
weixin_48148422 已提交
5121 5122
        taosArrayPush(pQInfo->arrTableIdInfo, &tidInfo);

5123
        // if the buffer is full or group by each table, we need to jump out of the loop
H
Haojun Liao 已提交
5124
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
5125 5126
          break;
        }
5127

H
Haojun Liao 已提交
5128 5129 5130 5131
        if (pRuntimeEnv->pTSBuf != NULL) {
          pRuntimeEnv->cur = pRuntimeEnv->pTSBuf->cur;
        }

5132
      } else {
5133
        // all data in the result buffer are skipped due to the offset, continue to retrieve data from current meter
5134 5135
        if (pQuery->rec.rows == 0) {
          assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
5136 5137
          continue;
        } else {
5138 5139 5140
          // buffer is full, wait for the next round to retrieve data from current meter
          assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
          break;
5141 5142 5143
        }
      }
    }
H
Haojun Liao 已提交
5144

5145
    if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
H
Haojun Liao 已提交
5146 5147
      setQueryStatus(pQuery, QUERY_COMPLETED);
    }
5148
  }
5149

5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161
  /*
   * 1. super table projection query, group-by on normal columns query, ts-comp query
   * 2. point interpolation query, last row query
   *
   * group-by on normal columns query and last_row query do NOT invoke the finalizer here,
   * since the finalize stage will be done at the client side.
   *
   * projection query, point interpolation query do not need the finalizer.
   *
   * Only the ts-comp query requires the finalizer function to be executed here.
   */
  if (isTSCompQuery(pQuery)) {
H
hjxilinx 已提交
5162
    finalizeQueryResult(pRuntimeEnv);
5163
  }
5164

5165 5166 5167
  if (pRuntimeEnv->pTSBuf != NULL) {
    pRuntimeEnv->cur = pRuntimeEnv->pTSBuf->cur;
  }
5168

5169
  qDebug(
S
TD-1530  
Shengliang Guan 已提交
5170 5171
      "QInfo %p numOfTables:%" PRIu64 ", index:%d, numOfGroups:%" PRIzu ", %" PRId64 " points returned, total:%" PRId64 ", offset:%" PRId64,
      pQInfo, (uint64_t)pQInfo->tableqinfoGroupInfo.numOfTables, pQInfo->tableIndex, numOfGroups, pQuery->rec.rows, pQuery->rec.total,
5172
      pQuery->limit.offset);
5173 5174
}

5175 5176 5177 5178
static void doSaveContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

5179 5180 5181
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
  SWITCH_ORDER(pQuery->order.order);
5182

5183
  if (pRuntimeEnv->pTSBuf != NULL) {
H
Haojun Liao 已提交
5184
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
5185
  }
5186

5187 5188 5189 5190 5191
  STsdbQueryCond cond = {
      .order   = pQuery->order.order,
      .colList = pQuery->colList,
      .numOfCols = pQuery->numOfCols,
  };
H
Haojun Liao 已提交
5192

S
TD-1057  
Shengliang Guan 已提交
5193 5194
  TIME_WINDOW_COPY(cond.twindow, pQuery->window);

5195 5196 5197 5198
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
H
Haojun Liao 已提交
5199

H
Haojun Liao 已提交
5200 5201 5202 5203 5204
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
  disableFuncInReverseScan(pQInfo);
  setupQueryRangeForReverseScan(pQInfo);

H
Haojun Liao 已提交
5205
  pRuntimeEnv->prevGroupId = INT32_MIN;
5206
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
B
Bomin Zhang 已提交
5207 5208 5209
  if (pRuntimeEnv->pSecQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
H
hjxilinx 已提交
5210 5211
}

5212 5213 5214 5215
static void doRestoreContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

H
hjxilinx 已提交
5216
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
5217

5218
  if (pRuntimeEnv->pTSBuf != NULL) {
5219
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
5220
  }
5221

5222
  switchCtxOrder(pRuntimeEnv);
5223 5224 5225
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
}

5226 5227 5228
static void doCloseAllTimeWindowAfterScan(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
Haojun Liao 已提交
5229
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
H
Haojun Liao 已提交
5230
    size_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
5231
    for (int32_t i = 0; i < numOfGroup; ++i) {
H
Haojun Liao 已提交
5232
      SArray *group = GET_TABLEGROUP(pQInfo, i);
5233

5234
      size_t num = taosArrayGetSize(group);
5235
      for (int32_t j = 0; j < num; ++j) {
5236 5237
        STableQueryInfo* item = taosArrayGetP(group, j);
        closeAllTimeWindow(&item->windowResInfo);
5238
      }
H
hjxilinx 已提交
5239 5240 5241 5242 5243 5244 5245
    }
  } else {  // close results for group result
    closeAllTimeWindow(&pQInfo->runtimeEnv.windowResInfo);
  }
}

static void multiTableQueryProcess(SQInfo *pQInfo) {
5246 5247 5248
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

5249
  if (pQInfo->groupIndex > 0) {
5250
    /*
5251
     * if the groupIndex > 0, the query process must be completed yet, we only need to
5252 5253
     * copy the data into output buffer
     */
H
Haojun Liao 已提交
5254
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
5255 5256
      copyResToQueryResultBuf(pQInfo, pQuery);
#ifdef _DEBUG_VIEW
5257
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
5258 5259
#endif
    } else {
5260
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5261
    }
5262

5263
    qDebug("QInfo:%p current:%"PRId64", total:%"PRId64"", pQInfo, pQuery->rec.rows, pQuery->rec.total);
5264 5265
    return;
  }
5266

5267
  qDebug("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", order:%d, forward scan start", pQInfo,
5268 5269
         pQuery->window.skey, pQuery->window.ekey, pQuery->order.order);

H
hjxilinx 已提交
5270
  // do check all qualified data blocks
H
Haojun Liao 已提交
5271
  int64_t el = scanMultiTableDataBlocks(pQInfo);
5272
  qDebug("QInfo:%p master scan completed, elapsed time: %" PRId64 "ms, reverse scan start", pQInfo, el);
5273

H
hjxilinx 已提交
5274
  // query error occurred or query is killed, abort current execution
H
Haojun Liao 已提交
5275
  if (pQInfo->code != TSDB_CODE_SUCCESS || IS_QUERY_KILLED(pQInfo)) {
5276
    qDebug("QInfo:%p query killed or error occurred, code:%s, abort", pQInfo, tstrerror(pQInfo->code));
H
Haojun Liao 已提交
5277
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
5278
  }
5279

H
hjxilinx 已提交
5280 5281
  // close all time window results
  doCloseAllTimeWindowAfterScan(pQInfo);
5282

H
hjxilinx 已提交
5283 5284
  if (needReverseScan(pQuery)) {
    doSaveContext(pQInfo);
5285

H
Haojun Liao 已提交
5286
    el = scanMultiTableDataBlocks(pQInfo);
5287
    qDebug("QInfo:%p reversed scan completed, elapsed time: %" PRId64 "ms", pQInfo, el);
5288

H
Haojun Liao 已提交
5289
    doRestoreContext(pQInfo);
H
hjxilinx 已提交
5290
  } else {
5291
    qDebug("QInfo:%p no need to do reversed scan, query completed", pQInfo);
5292
  }
5293

5294
  setQueryStatus(pQuery, QUERY_COMPLETED);
5295

H
Haojun Liao 已提交
5296
  if (pQInfo->code != TSDB_CODE_SUCCESS || IS_QUERY_KILLED(pQInfo)) {
5297
    qDebug("QInfo:%p query killed or error occurred, code:%s, abort", pQInfo, tstrerror(pQInfo->code));
H
Haojun Liao 已提交
5298 5299
    //TODO finalizeQueryResult may cause SEGSEV, since the memory may not allocated yet, add a cleanup function instead
//    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
H
Haojun Liao 已提交
5300
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
H
hjxilinx 已提交
5301
  }
5302

H
Haojun Liao 已提交
5303
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || isSumAvgRateQuery(pQuery)) {
5304
    if (mergeIntoGroupResult(pQInfo) == TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
5305
      copyResToQueryResultBuf(pQInfo, pQuery);
5306 5307

#ifdef _DEBUG_VIEW
5308
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
5309 5310 5311
#endif
    }
  } else {  // not a interval query
5312
    copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5313
  }
5314

5315
  // handle the limitation of output buffer
5316
  qDebug("QInfo:%p points returned:%" PRId64 ", total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
5317 5318 5319 5320 5321 5322 5323 5324
}

/*
 * in each query, this function will be called only once, no retry for further result.
 *
 * select count(*)/top(field,k)/avg(field name) from table_name [where ts>now-1a];
 * select count(*) from table_name group by status_column;
 */
H
hjxilinx 已提交
5325
static void tableFixedOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
5326
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
5327

H
hjxilinx 已提交
5328
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
5329
  if (!pRuntimeEnv->topBotQuery && pQuery->limit.offset > 0) {  // no need to execute, since the output will be ignore.
H
Haojun Liao 已提交
5330 5331
    return;
  }
5332

H
hjxilinx 已提交
5333
  pQuery->current = pTableInfo;  // set current query table info
5334

5335
  scanOneTableDataBlocks(pRuntimeEnv, pTableInfo->lastKey);
H
hjxilinx 已提交
5336
  finalizeQueryResult(pRuntimeEnv);
5337

H
Haojun Liao 已提交
5338
  if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
5339
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
5340
  }
5341

H
Haojun Liao 已提交
5342
  // since the numOfRows must be identical for all sql functions that are allowed to be executed simutaneously.
5343
  pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
5344

5345
  skipResults(pRuntimeEnv);
5346
  limitResults(pRuntimeEnv);
5347 5348
}

H
hjxilinx 已提交
5349
static void tableMultiOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
5350
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
5351

H
hjxilinx 已提交
5352 5353
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
5354

5355 5356 5357 5358
  // for ts_comp query, re-initialized is not allowed
  if (!isTSCompQuery(pQuery)) {
    resetCtxOutputBuf(pRuntimeEnv);
  }
5359

5360 5361 5362 5363 5364 5365
  // skip blocks without load the actual data block from file if no filter condition present
  skipBlocks(&pQInfo->runtimeEnv);
  if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return;
  }
5366 5367

  while (1) {
5368
    scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
H
hjxilinx 已提交
5369
    finalizeQueryResult(pRuntimeEnv);
5370

5371 5372
    pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
    if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols > 0 && pQuery->rec.rows > 0) {
5373
      skipResults(pRuntimeEnv);
5374 5375 5376
    }

    /*
H
hjxilinx 已提交
5377 5378
     * 1. if pQuery->size == 0, pQuery->limit.offset >= 0, still need to check data
     * 2. if pQuery->size > 0, pQuery->limit.offset must be 0
5379
     */
5380
    if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
5381 5382 5383
      break;
    }

5384
    qDebug("QInfo:%p skip current result, offset:%" PRId64 ", next qrange:%" PRId64 "-%" PRId64,
B
Bomin Zhang 已提交
5385
           pQInfo, pQuery->limit.offset, pQuery->current->lastKey, pQuery->current->win.ekey);
5386 5387 5388 5389

    resetCtxOutputBuf(pRuntimeEnv);
  }

5390
  limitResults(pRuntimeEnv);
5391
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
5392
    qDebug("QInfo:%p query paused due to output limitation, next qrange:%" PRId64 "-%" PRId64, pQInfo,
H
hjxilinx 已提交
5393
        pQuery->current->lastKey, pQuery->window.ekey);
weixin_48148422's avatar
weixin_48148422 已提交
5394 5395
  } else if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
    STableIdInfo tidInfo;
B
Bomin Zhang 已提交
5396
    STableId* id = TSDB_TABLEID(pQuery->current->pTable);
5397

H
Haojun Liao 已提交
5398 5399
    tidInfo.uid = id->uid;
    tidInfo.tid = id->tid;
weixin_48148422's avatar
weixin_48148422 已提交
5400 5401
    tidInfo.key = pQuery->current->lastKey;
    taosArrayPush(pQInfo->arrTableIdInfo, &tidInfo);
5402 5403
  }

5404 5405 5406
  if (!isTSCompQuery(pQuery)) {
    assert(pQuery->rec.rows <= pQuery->rec.capacity);
  }
5407 5408
}

H
Haojun Liao 已提交
5409
static void tableIntervalProcessImpl(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
5410
  SQuery *pQuery = pRuntimeEnv->pQuery;
5411

5412
  while (1) {
5413
    scanOneTableDataBlocks(pRuntimeEnv, start);
5414

5415
    assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_NOT_COMPLETED));
H
hjxilinx 已提交
5416
    finalizeQueryResult(pRuntimeEnv);
5417

5418 5419 5420
    // here we can ignore the records in case of no interpolation
    // todo handle offset, in case of top/bottom interval query
    if ((pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL) && pQuery->limit.offset > 0 &&
5421
        pQuery->fillType == TSDB_FILL_NONE) {
5422 5423
      // maxOutput <= 0, means current query does not generate any results
      int32_t numOfClosed = numOfClosedTimeWindow(&pRuntimeEnv->windowResInfo);
5424

S
TD-1057  
Shengliang Guan 已提交
5425
      int32_t c = (int32_t)(MIN(numOfClosed, pQuery->limit.offset));
5426 5427 5428
      clearFirstNTimeWindow(pRuntimeEnv, c);
      pQuery->limit.offset -= c;
    }
5429

5430
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED | QUERY_RESBUF_FULL)) {
5431 5432 5433 5434 5435
      break;
    }
  }
}

5436
// handle time interval query on table
H
hjxilinx 已提交
5437
static void tableIntervalProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
5438 5439
  SQueryRuntimeEnv *pRuntimeEnv = &(pQInfo->runtimeEnv);

H
hjxilinx 已提交
5440 5441
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
5442

H
Haojun Liao 已提交
5443
  int32_t numOfFilled = 0;
H
Haojun Liao 已提交
5444
  TSKEY newStartKey = TSKEY_INITIAL_VAL;
5445

5446
  // skip blocks without load the actual data block from file if no filter condition present
H
Haojun Liao 已提交
5447
  skipTimeInterval(pRuntimeEnv, &newStartKey);
5448
  if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0 && pRuntimeEnv->pFillInfo == NULL) {
5449 5450 5451 5452
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return;
  }

5453
  while (1) {
H
Haojun Liao 已提交
5454
    tableIntervalProcessImpl(pRuntimeEnv, newStartKey);
5455

H
Haojun Liao 已提交
5456
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
5457
      pQInfo->groupIndex = 0;  // always start from 0
5458
      pQuery->rec.rows = 0;
5459
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5460

5461
      clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
5462
    }
5463

5464
    // the offset is handled at prepare stage if no interpolation involved
5465
    if (pQuery->fillType == TSDB_FILL_NONE || pQuery->rec.rows == 0) {
5466
      limitResults(pRuntimeEnv);
5467 5468
      break;
    } else {
S
TD-1057  
Shengliang Guan 已提交
5469
      taosFillSetStartInfo(pRuntimeEnv->pFillInfo, (int32_t)pQuery->rec.rows, pQuery->window.ekey);
5470
      taosFillCopyInputDataFromFilePage(pRuntimeEnv->pFillInfo, (tFilePage**) pQuery->sdata);
H
Haojun Liao 已提交
5471
      numOfFilled = 0;
5472

H
Haojun Liao 已提交
5473
      pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, &numOfFilled);
5474
      if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
5475
        limitResults(pRuntimeEnv);
5476 5477
        break;
      }
5478

5479
      // no result generated yet, continue retrieve data
5480
      pQuery->rec.rows = 0;
5481 5482
    }
  }
5483

5484
  // all data scanned, the group by normal column can return
H
Haojun Liao 已提交
5485
  if (pRuntimeEnv->groupbyNormalCol) {  // todo refactor with merge interval time result
5486
    pQInfo->groupIndex = 0;
5487
    pQuery->rec.rows = 0;
5488
    copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5489
    clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
5490 5491 5492
  }
}

5493 5494 5495 5496
static void tableQueryImpl(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

H
Haojun Liao 已提交
5497
  if (queryHasRemainResForTableQuery(pRuntimeEnv)) {
5498

H
Haojun Liao 已提交
5499 5500 5501 5502 5503 5504 5505 5506 5507 5508 5509 5510
    if (pQuery->fillType != TSDB_FILL_NONE) {
      /*
       * There are remain results that are not returned due to result interpolation
       * So, we do keep in this procedure instead of launching retrieve procedure for next results.
       */
      int32_t numOfFilled = 0;
      pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, &numOfFilled);

      if (pQuery->rec.rows > 0) {
        limitResults(pRuntimeEnv);
      }

H
Haojun Liao 已提交
5511
      qDebug("QInfo:%p current:%" PRId64 " returned, total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total);
5512
      return;
H
Haojun Liao 已提交
5513
    } else {
5514
      pQuery->rec.rows = 0;
5515
      pQInfo->groupIndex = 0;  // always start from 0
5516

5517
      if (pRuntimeEnv->windowResInfo.size > 0) {
5518
        copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5519
        clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
5520

5521
        if (pQuery->rec.rows > 0) {
5522
          qDebug("QInfo:%p %"PRId64" rows returned from group results, total:%"PRId64"", pQInfo, pQuery->rec.rows, pQuery->rec.total);
H
Haojun Liao 已提交
5523 5524 5525

          // there are not data remains
          if (pRuntimeEnv->windowResInfo.size <= 0) {
H
Haojun Liao 已提交
5526
            qDebug("QInfo:%p query over, %"PRId64" rows are returned", pQInfo, pQuery->rec.total);
H
Haojun Liao 已提交
5527 5528
          }

5529 5530 5531 5532 5533
          return;
        }
      }
    }
  }
5534

H
hjxilinx 已提交
5535
  // number of points returned during this query
5536
  pQuery->rec.rows = 0;
5537
  int64_t st = taosGetTimestampUs();
5538

5539
  assert(pQInfo->tableqinfoGroupInfo.numOfTables == 1);
H
Haojun Liao 已提交
5540
  SArray* g = GET_TABLEGROUP(pQInfo, 0);
5541
  STableQueryInfo* item = taosArrayGetP(g, 0);
5542

5543
  // group by normal column, sliding window query, interval query are handled by interval query processor
H
Haojun Liao 已提交
5544
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyNormalCol) {  // interval (down sampling operation)
5545
    tableIntervalProcess(pQInfo, item);
H
Haojun Liao 已提交
5546
  } else if (isFixedOutputQuery(pRuntimeEnv)) {
5547
    tableFixedOutputProcess(pQInfo, item);
5548 5549
  } else {  // diff/add/multiply/subtract/division
    assert(pQuery->checkBuffer == 1);
5550
    tableMultiOutputProcess(pQInfo, item);
5551
  }
5552

5553
  // record the total elapsed time
5554
  pRuntimeEnv->summary.elapsedTime += (taosGetTimestampUs() - st);
5555
  assert(pQInfo->tableqinfoGroupInfo.numOfTables == 1);
5556 5557
}

5558
static void stableQueryImpl(SQInfo *pQInfo) {
H
Haojun Liao 已提交
5559 5560
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *pQuery = pRuntimeEnv->pQuery;
5561
  pQuery->rec.rows = 0;
5562

5563
  int64_t st = taosGetTimestampUs();
5564

H
Haojun Liao 已提交
5565
  if (QUERY_IS_INTERVAL_QUERY(pQuery) ||
5566
      (isFixedOutputQuery(pRuntimeEnv) && (!isPointInterpoQuery(pQuery)) && (!pRuntimeEnv->groupbyNormalCol))) {
H
hjxilinx 已提交
5567
    multiTableQueryProcess(pQInfo);
5568
  } else {
5569
    assert((pQuery->checkBuffer == 1 && pQuery->interval.interval == 0) || isPointInterpoQuery(pQuery) ||
H
Haojun Liao 已提交
5570
            isFirstLastRowQuery(pQuery) || pRuntimeEnv->groupbyNormalCol);
5571

5572
    sequentialTableProcess(pQInfo);
5573
  }
5574

H
hjxilinx 已提交
5575
  // record the total elapsed time
5576
  pQInfo->runtimeEnv.summary.elapsedTime += (taosGetTimestampUs() - st);
H
hjxilinx 已提交
5577 5578
}

5579
static int32_t getColumnIndexInSource(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
5580
  int32_t j = 0;
5581

5582
  if (TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) {
H
Haojun Liao 已提交
5583
    if (pExprMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
5584
      return TSDB_TBNAME_COLUMN_INDEX;
H
Haojun Liao 已提交
5585 5586
    }

5587 5588 5589 5590
    while(j < pQueryMsg->numOfTags) {
      if (pExprMsg->colInfo.colId == pTagCols[j].colId) {
        return j;
      }
5591

5592 5593
      j += 1;
    }
5594

Y
TD-1230  
yihaoDeng 已提交
5595
  } else if (TSDB_COL_IS_UD_COL(pExprMsg->colInfo.flag)) {  // user specified column data
H
Haojun Liao 已提交
5596
    return TSDB_UD_COLUMN_INDEX;
5597 5598 5599 5600 5601
  } else {
    while (j < pQueryMsg->numOfCols) {
      if (pExprMsg->colInfo.colId == pQueryMsg->colList[j].colId) {
        return j;
      }
5602

5603
      j += 1;
5604 5605
    }
  }
5606
  assert(0);
5607
  return -1;
5608 5609
}

5610 5611 5612
bool validateExprColumnInfo(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
  int32_t j = getColumnIndexInSource(pQueryMsg, pExprMsg, pTagCols);
  return j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags;
5613 5614
}

5615
static bool validateQueryMsg(SQueryTableMsg *pQueryMsg) {
5616 5617
  if (pQueryMsg->interval.interval < 0) {
    qError("qmsg:%p illegal value of interval time %" PRId64, pQueryMsg, pQueryMsg->interval.interval);
5618
    return false;
5619 5620
  }

H
hjxilinx 已提交
5621
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
5622
    qError("qmsg:%p illegal value of numOfTables %d", pQueryMsg, pQueryMsg->numOfTables);
5623
    return false;
5624 5625
  }

H
hjxilinx 已提交
5626
  if (pQueryMsg->numOfGroupCols < 0) {
S
slguan 已提交
5627
    qError("qmsg:%p illegal value of numOfGroupbyCols %d", pQueryMsg, pQueryMsg->numOfGroupCols);
5628
    return false;
5629 5630
  }

5631 5632
  if (pQueryMsg->numOfOutput > TSDB_MAX_COLUMNS || pQueryMsg->numOfOutput <= 0) {
    qError("qmsg:%p illegal value of output columns %d", pQueryMsg, pQueryMsg->numOfOutput);
5633
    return false;
5634 5635
  }

5636 5637 5638 5639 5640 5641 5642 5643 5644 5645
  return true;
}

static bool validateQuerySourceCols(SQueryTableMsg *pQueryMsg, SSqlFuncMsg** pExprMsg) {
  int32_t numOfTotal = pQueryMsg->numOfCols + pQueryMsg->numOfTags;
  if (pQueryMsg->numOfCols < 0 || pQueryMsg->numOfTags < 0 || numOfTotal > TSDB_MAX_COLUMNS) {
    qError("qmsg:%p illegal value of numOfCols %d numOfTags:%d", pQueryMsg, pQueryMsg->numOfCols, pQueryMsg->numOfTags);
    return false;
  } else if (numOfTotal == 0) {
    for(int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
H
Haojun Liao 已提交
5646 5647 5648 5649 5650
      SSqlFuncMsg* pFuncMsg = pExprMsg[i];

      if ((pFuncMsg->functionId == TSDB_FUNC_TAGPRJ) ||
          (pFuncMsg->functionId == TSDB_FUNC_TID_TAG && pFuncMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) ||
          (pFuncMsg->functionId == TSDB_FUNC_COUNT && pFuncMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX)) {
5651
        continue;
5652
      }
5653

5654
      return false;
5655 5656
    }
  }
5657

5658
  return true;
5659 5660
}

5661
static char *createTableIdList(SQueryTableMsg *pQueryMsg, char *pMsg, SArray **pTableIdList) {
H
hjxilinx 已提交
5662
  assert(pQueryMsg->numOfTables > 0);
5663

weixin_48148422's avatar
weixin_48148422 已提交
5664
  *pTableIdList = taosArrayInit(pQueryMsg->numOfTables, sizeof(STableIdInfo));
5665

weixin_48148422's avatar
weixin_48148422 已提交
5666 5667
  for (int32_t j = 0; j < pQueryMsg->numOfTables; ++j) {
    STableIdInfo* pTableIdInfo = (STableIdInfo *)pMsg;
5668

5669
    pTableIdInfo->tid = htonl(pTableIdInfo->tid);
H
hjxilinx 已提交
5670 5671
    pTableIdInfo->uid = htobe64(pTableIdInfo->uid);
    pTableIdInfo->key = htobe64(pTableIdInfo->key);
5672

H
hjxilinx 已提交
5673 5674 5675
    taosArrayPush(*pTableIdList, pTableIdInfo);
    pMsg += sizeof(STableIdInfo);
  }
5676

H
hjxilinx 已提交
5677 5678
  return pMsg;
}
5679

5680
/**
H
hjxilinx 已提交
5681
 * pQueryMsg->head has been converted before this function is called.
5682
 *
H
hjxilinx 已提交
5683
 * @param pQueryMsg
5684 5685 5686 5687
 * @param pTableIdList
 * @param pExpr
 * @return
 */
5688
static int32_t convertQueryMsg(SQueryTableMsg *pQueryMsg, SArray **pTableIdList, SSqlFuncMsg ***pExpr,
weixin_48148422's avatar
weixin_48148422 已提交
5689
                               char **tagCond, char** tbnameCond, SColIndex **groupbyCols, SColumnInfo** tagCols) {
5690 5691
  int32_t code = TSDB_CODE_SUCCESS;

5692 5693 5694 5695
  pQueryMsg->numOfTables = htonl(pQueryMsg->numOfTables);

  pQueryMsg->window.skey = htobe64(pQueryMsg->window.skey);
  pQueryMsg->window.ekey = htobe64(pQueryMsg->window.ekey);
5696 5697 5698 5699 5700 5701
  pQueryMsg->interval.interval = htobe64(pQueryMsg->interval.interval);
  pQueryMsg->interval.sliding = htobe64(pQueryMsg->interval.sliding);
  pQueryMsg->interval.offset = htobe64(pQueryMsg->interval.offset);
  pQueryMsg->interval.intervalUnit = pQueryMsg->interval.intervalUnit;
  pQueryMsg->interval.slidingUnit = pQueryMsg->interval.slidingUnit;
  pQueryMsg->interval.offsetUnit = pQueryMsg->interval.offsetUnit;
5702 5703
  pQueryMsg->limit = htobe64(pQueryMsg->limit);
  pQueryMsg->offset = htobe64(pQueryMsg->offset);
H
hjxilinx 已提交
5704

5705 5706
  pQueryMsg->order = htons(pQueryMsg->order);
  pQueryMsg->orderColId = htons(pQueryMsg->orderColId);
H
Haojun Liao 已提交
5707
  pQueryMsg->queryType = htonl(pQueryMsg->queryType);
weixin_48148422's avatar
weixin_48148422 已提交
5708
  pQueryMsg->tagNameRelType = htons(pQueryMsg->tagNameRelType);
5709 5710

  pQueryMsg->numOfCols = htons(pQueryMsg->numOfCols);
5711
  pQueryMsg->numOfOutput = htons(pQueryMsg->numOfOutput);
H
hjxilinx 已提交
5712
  pQueryMsg->numOfGroupCols = htons(pQueryMsg->numOfGroupCols);
5713 5714 5715
  pQueryMsg->tagCondLen = htons(pQueryMsg->tagCondLen);
  pQueryMsg->tsOffset = htonl(pQueryMsg->tsOffset);
  pQueryMsg->tsLen = htonl(pQueryMsg->tsLen);
H
hjxilinx 已提交
5716
  pQueryMsg->tsNumOfBlocks = htonl(pQueryMsg->tsNumOfBlocks);
5717
  pQueryMsg->tsOrder = htonl(pQueryMsg->tsOrder);
5718
  pQueryMsg->numOfTags = htonl(pQueryMsg->numOfTags);
5719

5720
  // query msg safety check
5721
  if (!validateQueryMsg(pQueryMsg)) {
5722 5723
    code = TSDB_CODE_QRY_INVALID_MSG;
    goto _cleanup;
5724 5725
  }

H
hjxilinx 已提交
5726 5727
  char *pMsg = (char *)(pQueryMsg->colList) + sizeof(SColumnInfo) * pQueryMsg->numOfCols;
  for (int32_t col = 0; col < pQueryMsg->numOfCols; ++col) {
5728 5729
    SColumnInfo *pColInfo = &pQueryMsg->colList[col];

H
hjxilinx 已提交
5730
    pColInfo->colId = htons(pColInfo->colId);
5731
    pColInfo->type = htons(pColInfo->type);
H
hjxilinx 已提交
5732 5733
    pColInfo->bytes = htons(pColInfo->bytes);
    pColInfo->numOfFilters = htons(pColInfo->numOfFilters);
5734

H
hjxilinx 已提交
5735
    assert(pColInfo->type >= TSDB_DATA_TYPE_BOOL && pColInfo->type <= TSDB_DATA_TYPE_NCHAR);
5736

H
hjxilinx 已提交
5737
    int32_t numOfFilters = pColInfo->numOfFilters;
5738
    if (numOfFilters > 0) {
H
hjxilinx 已提交
5739
      pColInfo->filters = calloc(numOfFilters, sizeof(SColumnFilterInfo));
H
Haojun Liao 已提交
5740 5741 5742 5743
      if (pColInfo->filters == NULL) {
        code = TSDB_CODE_QRY_OUT_OF_MEMORY;
        goto _cleanup;
      }
5744 5745 5746
    }

    for (int32_t f = 0; f < numOfFilters; ++f) {
5747
      SColumnFilterInfo *pFilterMsg = (SColumnFilterInfo *)pMsg;
5748

5749 5750
      SColumnFilterInfo *pColFilter = &pColInfo->filters[f];
      pColFilter->filterstr = htons(pFilterMsg->filterstr);
5751 5752 5753

      pMsg += sizeof(SColumnFilterInfo);

5754 5755
      if (pColFilter->filterstr) {
        pColFilter->len = htobe64(pFilterMsg->len);
5756

5757
        pColFilter->pz = (int64_t)calloc(1, (size_t)(pColFilter->len + 1 * TSDB_NCHAR_SIZE)); // note: null-terminator
H
Haojun Liao 已提交
5758 5759 5760 5761 5762
        if (pColFilter->pz == 0) {
          code = TSDB_CODE_QRY_OUT_OF_MEMORY;
          goto _cleanup;
        }

5763
        memcpy((void *)pColFilter->pz, pMsg, (size_t)pColFilter->len);
5764
        pMsg += (pColFilter->len + 1);
5765
      } else {
5766 5767
        pColFilter->lowerBndi = htobe64(pFilterMsg->lowerBndi);
        pColFilter->upperBndi = htobe64(pFilterMsg->upperBndi);
5768 5769
      }

5770 5771
      pColFilter->lowerRelOptr = htons(pFilterMsg->lowerRelOptr);
      pColFilter->upperRelOptr = htons(pFilterMsg->upperRelOptr);
5772 5773 5774
    }
  }

5775
  *pExpr = calloc(pQueryMsg->numOfOutput, POINTER_BYTES);
H
Haojun Liao 已提交
5776 5777 5778 5779 5780
  if (*pExpr == NULL) {
    code = TSDB_CODE_QRY_OUT_OF_MEMORY;
    goto _cleanup;
  }

5781
  SSqlFuncMsg *pExprMsg = (SSqlFuncMsg *)pMsg;
5782

5783
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5784
    (*pExpr)[i] = pExprMsg;
5785

5786
    pExprMsg->colInfo.colIndex = htons(pExprMsg->colInfo.colIndex);
5787 5788 5789 5790
    pExprMsg->colInfo.colId = htons(pExprMsg->colInfo.colId);
    pExprMsg->colInfo.flag = htons(pExprMsg->colInfo.flag);
    pExprMsg->functionId = htons(pExprMsg->functionId);
    pExprMsg->numOfParams = htons(pExprMsg->numOfParams);
5791

5792
    pMsg += sizeof(SSqlFuncMsg);
5793 5794

    for (int32_t j = 0; j < pExprMsg->numOfParams; ++j) {
5795
      pExprMsg->arg[j].argType = htons(pExprMsg->arg[j].argType);
5796 5797 5798 5799
      pExprMsg->arg[j].argBytes = htons(pExprMsg->arg[j].argBytes);

      if (pExprMsg->arg[j].argType == TSDB_DATA_TYPE_BINARY) {
        pExprMsg->arg[j].argValue.pz = pMsg;
5800
        pMsg += pExprMsg->arg[j].argBytes;  // one more for the string terminated char.
5801 5802 5803 5804 5805
      } else {
        pExprMsg->arg[j].argValue.i64 = htobe64(pExprMsg->arg[j].argValue.i64);
      }
    }

H
Haojun Liao 已提交
5806 5807
    int16_t functionId = pExprMsg->functionId;
    if (functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ || functionId == TSDB_FUNC_TAG_DUMMY) {
Y
TD-1230  
yihaoDeng 已提交
5808
      if (!TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) {  // ignore the column  index check for arithmetic expression.
5809 5810
        code = TSDB_CODE_QRY_INVALID_MSG;
        goto _cleanup;
5811 5812
      }
    } else {
5813
//      if (!validateExprColumnInfo(pQueryMsg, pExprMsg)) {
5814
//        return TSDB_CODE_QRY_INVALID_MSG;
5815
//      }
5816 5817
    }

5818
    pExprMsg = (SSqlFuncMsg *)pMsg;
5819
  }
5820

5821
  if (!validateQuerySourceCols(pQueryMsg, *pExpr)) {
5822
    code = TSDB_CODE_QRY_INVALID_MSG;
dengyihao's avatar
dengyihao 已提交
5823
    goto _cleanup;
5824
  }
5825

H
hjxilinx 已提交
5826
  pMsg = createTableIdList(pQueryMsg, pMsg, pTableIdList);
5827

H
hjxilinx 已提交
5828
  if (pQueryMsg->numOfGroupCols > 0) {  // group by tag columns
5829
    *groupbyCols = malloc(pQueryMsg->numOfGroupCols * sizeof(SColIndex));
5830 5831 5832 5833
    if (*groupbyCols == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }
5834 5835 5836

    for (int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
      (*groupbyCols)[i].colId = *(int16_t *)pMsg;
5837
      pMsg += sizeof((*groupbyCols)[i].colId);
5838 5839

      (*groupbyCols)[i].colIndex = *(int16_t *)pMsg;
5840 5841
      pMsg += sizeof((*groupbyCols)[i].colIndex);

5842
      (*groupbyCols)[i].flag = *(int16_t *)pMsg;
5843 5844 5845 5846 5847
      pMsg += sizeof((*groupbyCols)[i].flag);

      memcpy((*groupbyCols)[i].name, pMsg, tListLen(groupbyCols[i]->name));
      pMsg += tListLen((*groupbyCols)[i].name);
    }
5848

H
hjxilinx 已提交
5849 5850
    pQueryMsg->orderByIdx = htons(pQueryMsg->orderByIdx);
    pQueryMsg->orderType = htons(pQueryMsg->orderType);
5851 5852
  }

5853 5854
  pQueryMsg->fillType = htons(pQueryMsg->fillType);
  if (pQueryMsg->fillType != TSDB_FILL_NONE) {
5855
    pQueryMsg->fillVal = (uint64_t)(pMsg);
5856 5857

    int64_t *v = (int64_t *)pMsg;
5858
    for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5859 5860
      v[i] = htobe64(v[i]);
    }
5861

5862
    pMsg += sizeof(int64_t) * pQueryMsg->numOfOutput;
5863
  }
5864

5865 5866
  if (pQueryMsg->numOfTags > 0) {
    (*tagCols) = calloc(1, sizeof(SColumnInfo) * pQueryMsg->numOfTags);
H
Haojun Liao 已提交
5867 5868 5869 5870 5871
    if (*tagCols == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }

5872 5873
    for (int32_t i = 0; i < pQueryMsg->numOfTags; ++i) {
      SColumnInfo* pTagCol = (SColumnInfo*) pMsg;
5874

5875 5876 5877 5878
      pTagCol->colId = htons(pTagCol->colId);
      pTagCol->bytes = htons(pTagCol->bytes);
      pTagCol->type  = htons(pTagCol->type);
      pTagCol->numOfFilters = 0;
5879

5880
      (*tagCols)[i] = *pTagCol;
5881
      pMsg += sizeof(SColumnInfo);
5882
    }
H
hjxilinx 已提交
5883
  }
5884

5885 5886 5887
  // the tag query condition expression string is located at the end of query msg
  if (pQueryMsg->tagCondLen > 0) {
    *tagCond = calloc(1, pQueryMsg->tagCondLen);
H
Haojun Liao 已提交
5888 5889 5890 5891 5892 5893

    if (*tagCond == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;

    }
5894 5895 5896
    memcpy(*tagCond, pMsg, pQueryMsg->tagCondLen);
    pMsg += pQueryMsg->tagCondLen;
  }
5897

weixin_48148422's avatar
weixin_48148422 已提交
5898
  if (*pMsg != 0) {
5899
    size_t len = strlen(pMsg) + 1;
5900

5901
    *tbnameCond = malloc(len);
5902 5903 5904 5905 5906
    if (*tbnameCond == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }

weixin_48148422's avatar
weixin_48148422 已提交
5907
    strcpy(*tbnameCond, pMsg);
5908
    pMsg += len;
weixin_48148422's avatar
weixin_48148422 已提交
5909
  }
5910

5911
  qDebug("qmsg:%p query %d tables, type:%d, qrange:%" PRId64 "-%" PRId64 ", numOfGroupbyTagCols:%d, order:%d, "
H
Haojun Liao 已提交
5912 5913
         "outputCols:%d, numOfCols:%d, interval:%" PRId64 ", fillType:%d, comptsLen:%d, compNumOfBlocks:%d, limit:%" PRId64 ", offset:%" PRId64,
         pQueryMsg, pQueryMsg->numOfTables, pQueryMsg->queryType, pQueryMsg->window.skey, pQueryMsg->window.ekey, pQueryMsg->numOfGroupCols,
5914
         pQueryMsg->order, pQueryMsg->numOfOutput, pQueryMsg->numOfCols, pQueryMsg->interval.interval,
H
Haojun Liao 已提交
5915
         pQueryMsg->fillType, pQueryMsg->tsLen, pQueryMsg->tsNumOfBlocks, pQueryMsg->limit, pQueryMsg->offset);
5916 5917

  return TSDB_CODE_SUCCESS;
dengyihao's avatar
dengyihao 已提交
5918 5919

_cleanup:
S
Shengliang Guan 已提交
5920
  taosTFree(*pExpr);
dengyihao's avatar
dengyihao 已提交
5921 5922
  taosArrayDestroy(*pTableIdList);
  *pTableIdList = NULL;
S
Shengliang Guan 已提交
5923 5924 5925 5926
  taosTFree(*tbnameCond);
  taosTFree(*groupbyCols);
  taosTFree(*tagCols);
  taosTFree(*tagCond);
5927 5928

  return code;
5929 5930
}

H
hjxilinx 已提交
5931
static int32_t buildAirthmeticExprFromMsg(SExprInfo *pArithExprInfo, SQueryTableMsg *pQueryMsg) {
5932
  qDebug("qmsg:%p create arithmetic expr from binary string: %s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz);
weixin_48148422's avatar
weixin_48148422 已提交
5933 5934

  tExprNode* pExprNode = NULL;
dengyihao's avatar
dengyihao 已提交
5935
  TRY(TSDB_MAX_TAG_CONDITIONS) {
weixin_48148422's avatar
weixin_48148422 已提交
5936 5937 5938
    pExprNode = exprTreeFromBinary(pArithExprInfo->base.arg[0].argValue.pz, pArithExprInfo->base.arg[0].argBytes);
  } CATCH( code ) {
    CLEANUP_EXECUTE();
dengyihao's avatar
TD-816  
dengyihao 已提交
5939
    qError("qmsg:%p failed to create arithmetic expression string from:%s, reason: %s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz, tstrerror(code));
weixin_48148422's avatar
weixin_48148422 已提交
5940 5941 5942
    return code;
  } END_TRY

H
hjxilinx 已提交
5943
  if (pExprNode == NULL) {
5944
    qError("qmsg:%p failed to create arithmetic expression string from:%s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz);
5945
    return TSDB_CODE_QRY_APP_ERROR;
5946
  }
5947

5948
  pArithExprInfo->pExpr = pExprNode;
5949 5950 5951
  return TSDB_CODE_SUCCESS;
}

H
Haojun Liao 已提交
5952
static int32_t createQFunctionExprFromMsg(SQueryTableMsg *pQueryMsg, SExprInfo **pExprInfo, SSqlFuncMsg **pExprMsg,
5953 5954
    SColumnInfo* pTagCols) {
  *pExprInfo = NULL;
H
hjxilinx 已提交
5955
  int32_t code = TSDB_CODE_SUCCESS;
5956

H
Haojun Liao 已提交
5957
  SExprInfo *pExprs = (SExprInfo *)calloc(pQueryMsg->numOfOutput, sizeof(SExprInfo));
5958
  if (pExprs == NULL) {
5959
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
5960 5961 5962 5963 5964
  }

  bool    isSuperTable = QUERY_IS_STABLE_QUERY(pQueryMsg->queryType);
  int16_t tagLen = 0;

5965
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5966
    pExprs[i].base = *pExprMsg[i];
5967
    pExprs[i].bytes = 0;
5968 5969 5970 5971

    int16_t type = 0;
    int16_t bytes = 0;

5972
    // parse the arithmetic expression
5973
    if (pExprs[i].base.functionId == TSDB_FUNC_ARITHM) {
5974
      code = buildAirthmeticExprFromMsg(&pExprs[i], pQueryMsg);
5975

5976
      if (code != TSDB_CODE_SUCCESS) {
S
Shengliang Guan 已提交
5977
        taosTFree(pExprs);
5978
        return code;
5979 5980
      }

5981
      type  = TSDB_DATA_TYPE_DOUBLE;
5982
      bytes = tDataTypeDesc[type].nSize;
H
Haojun Liao 已提交
5983
    } else if (pExprs[i].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX && pExprs[i].base.functionId == TSDB_FUNC_TAGPRJ) {  // parse the normal column
H
Haojun Liao 已提交
5984
      SSchema s = tGetTableNameColumnSchema();
H
Haojun Liao 已提交
5985
      type = s.type;
H
Haojun Liao 已提交
5986
      bytes = s.bytes;
5987 5988
    } else if (pExprs[i].base.colInfo.colId <= TSDB_UD_COLUMN_INDEX) {
      // it is a user-defined constant value column
H
Haojun Liao 已提交
5989 5990
      assert(pExprs[i].base.functionId == TSDB_FUNC_PRJ);

5991 5992
      type = pExprs[i].base.arg[1].argType;
      bytes = pExprs[i].base.arg[1].argBytes;
H
Haojun Liao 已提交
5993 5994 5995 5996 5997

      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        bytes += VARSTR_HEADER_SIZE;
      }
    } else {
5998
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
dengyihao's avatar
dengyihao 已提交
5999
      assert(j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags);
H
Haojun Liao 已提交
6000

dengyihao's avatar
dengyihao 已提交
6001
      if (pExprs[i].base.colInfo.colId != TSDB_TBNAME_COLUMN_INDEX && j >= 0) {
H
Haojun Liao 已提交
6002 6003 6004 6005
        SColumnInfo* pCol = (TSDB_COL_IS_TAG(pExprs[i].base.colInfo.flag))? &pTagCols[j]:&pQueryMsg->colList[j];
        type = pCol->type;
        bytes = pCol->bytes;
      } else {
H
Haojun Liao 已提交
6006
        SSchema s = tGetTableNameColumnSchema();
H
hjxilinx 已提交
6007

H
Haojun Liao 已提交
6008 6009 6010
        type  = s.type;
        bytes = s.bytes;
      }
6011 6012
    }

S
TD-1057  
Shengliang Guan 已提交
6013
    int32_t param = (int32_t)pExprs[i].base.arg[0].argValue.i64;
6014
    if (getResultDataInfo(type, bytes, pExprs[i].base.functionId, param, &pExprs[i].type, &pExprs[i].bytes,
6015
                          &pExprs[i].interBytes, 0, isSuperTable) != TSDB_CODE_SUCCESS) {
S
Shengliang Guan 已提交
6016
      taosTFree(pExprs);
6017
      return TSDB_CODE_QRY_INVALID_MSG;
6018 6019
    }

6020
    if (pExprs[i].base.functionId == TSDB_FUNC_TAG_DUMMY || pExprs[i].base.functionId == TSDB_FUNC_TS_DUMMY) {
6021
      tagLen += pExprs[i].bytes;
6022
    }
6023
    assert(isValidDataType(pExprs[i].type));
6024 6025 6026
  }

  // TODO refactor
6027
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
6028 6029
    pExprs[i].base = *pExprMsg[i];
    int16_t functId = pExprs[i].base.functionId;
6030

6031
    if (functId == TSDB_FUNC_TOP || functId == TSDB_FUNC_BOTTOM) {
6032
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
6033 6034 6035 6036 6037 6038 6039 6040 6041
      if (j < 0 || j >= pQueryMsg->numOfCols) {
        assert(0);
      } else {
        SColumnInfo *pCol = &pQueryMsg->colList[j];
        int32_t ret =
            getResultDataInfo(pCol->type, pCol->bytes, functId, (int32_t)pExprs[i].base.arg[0].argValue.i64,
                              &pExprs[i].type, &pExprs[i].bytes, &pExprs[i].interBytes, tagLen, isSuperTable);
        assert(ret == TSDB_CODE_SUCCESS);
      }
6042 6043 6044
    }
  }

6045
  *pExprInfo = pExprs;
6046 6047 6048
  return TSDB_CODE_SUCCESS;
}

6049
static SSqlGroupbyExpr *createGroupbyExprFromMsg(SQueryTableMsg *pQueryMsg, SColIndex *pColIndex, int32_t *code) {
6050 6051 6052 6053 6054
  if (pQueryMsg->numOfGroupCols == 0) {
    return NULL;
  }

  // using group by tag columns
6055
  SSqlGroupbyExpr *pGroupbyExpr = (SSqlGroupbyExpr *)calloc(1, sizeof(SSqlGroupbyExpr));
6056
  if (pGroupbyExpr == NULL) {
6057
    *code = TSDB_CODE_QRY_OUT_OF_MEMORY;
6058 6059 6060 6061 6062 6063 6064
    return NULL;
  }

  pGroupbyExpr->numOfGroupCols = pQueryMsg->numOfGroupCols;
  pGroupbyExpr->orderType = pQueryMsg->orderType;
  pGroupbyExpr->orderIndex = pQueryMsg->orderByIdx;

6065 6066 6067 6068
  pGroupbyExpr->columnInfo = taosArrayInit(pQueryMsg->numOfGroupCols, sizeof(SColIndex));
  for(int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
    taosArrayPush(pGroupbyExpr->columnInfo, &pColIndex[i]);
  }
6069

6070 6071 6072
  return pGroupbyExpr;
}

6073
static int32_t createFilterInfo(void *pQInfo, SQuery *pQuery) {
6074
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
6075
    if (pQuery->colList[i].numOfFilters > 0) {
6076 6077 6078 6079 6080 6081 6082 6083 6084
      pQuery->numOfFilterCols++;
    }
  }

  if (pQuery->numOfFilterCols == 0) {
    return TSDB_CODE_SUCCESS;
  }

  pQuery->pFilterInfo = calloc(1, sizeof(SSingleColumnFilterInfo) * pQuery->numOfFilterCols);
H
Haojun Liao 已提交
6085 6086 6087
  if (pQuery->pFilterInfo == NULL) {
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
  }
6088 6089

  for (int32_t i = 0, j = 0; i < pQuery->numOfCols; ++i) {
6090
    if (pQuery->colList[i].numOfFilters > 0) {
6091 6092
      SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[j];

B
Bomin Zhang 已提交
6093
      memcpy(&pFilterInfo->info, &pQuery->colList[i], sizeof(SColumnInfo));
6094
      pFilterInfo->info = pQuery->colList[i];
6095

6096
      pFilterInfo->numOfFilters = pQuery->colList[i].numOfFilters;
6097
      pFilterInfo->pFilters = calloc(pFilterInfo->numOfFilters, sizeof(SColumnFilterElem));
H
Haojun Liao 已提交
6098 6099 6100
      if (pFilterInfo->pFilters == NULL) {
        return TSDB_CODE_QRY_OUT_OF_MEMORY;
      }
6101 6102 6103

      for (int32_t f = 0; f < pFilterInfo->numOfFilters; ++f) {
        SColumnFilterElem *pSingleColFilter = &pFilterInfo->pFilters[f];
6104
        pSingleColFilter->filterInfo = pQuery->colList[i].filters[f];
6105 6106 6107 6108 6109

        int32_t lower = pSingleColFilter->filterInfo.lowerRelOptr;
        int32_t upper = pSingleColFilter->filterInfo.upperRelOptr;

        if (lower == TSDB_RELATION_INVALID && upper == TSDB_RELATION_INVALID) {
S
slguan 已提交
6110
          qError("QInfo:%p invalid filter info", pQInfo);
6111
          return TSDB_CODE_QRY_INVALID_MSG;
6112 6113
        }

6114 6115
        int16_t type  = pQuery->colList[i].type;
        int16_t bytes = pQuery->colList[i].bytes;
6116

6117 6118 6119
        // todo refactor
        __filter_func_t *rangeFilterArray = getRangeFilterFuncArray(type);
        __filter_func_t *filterArray = getValueFilterFuncArray(type);
6120 6121

        if (rangeFilterArray == NULL && filterArray == NULL) {
S
slguan 已提交
6122
          qError("QInfo:%p failed to get filter function, invalid data type:%d", pQInfo, type);
6123
          return TSDB_CODE_QRY_INVALID_MSG;
6124 6125
        }

6126
        if ((lower == TSDB_RELATION_GREATER_EQUAL || lower == TSDB_RELATION_GREATER) &&
6127
            (upper == TSDB_RELATION_LESS_EQUAL || upper == TSDB_RELATION_LESS)) {
dengyihao's avatar
dengyihao 已提交
6128
          assert(rangeFilterArray != NULL);
6129
          if (lower == TSDB_RELATION_GREATER_EQUAL) {
6130 6131 6132 6133 6134 6135 6136 6137 6138 6139 6140 6141 6142
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[4];
            } else {
              pSingleColFilter->fp = rangeFilterArray[2];
            }
          } else {
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[3];
            } else {
              pSingleColFilter->fp = rangeFilterArray[1];
            }
          }
        } else {  // set callback filter function
dengyihao's avatar
dengyihao 已提交
6143
          assert(filterArray != NULL);
6144 6145 6146 6147
          if (lower != TSDB_RELATION_INVALID) {
            pSingleColFilter->fp = filterArray[lower];

            if (upper != TSDB_RELATION_INVALID) {
dengyihao's avatar
dengyihao 已提交
6148
              qError("pQInfo:%p failed to get filter function, invalid filter condition: %d", pQInfo, type);
6149
              return TSDB_CODE_QRY_INVALID_MSG;
6150 6151 6152 6153 6154 6155 6156 6157 6158 6159 6160 6161 6162 6163 6164 6165
            }
          } else {
            pSingleColFilter->fp = filterArray[upper];
          }
        }
        assert(pSingleColFilter->fp != NULL);
        pSingleColFilter->bytes = bytes;
      }

      j++;
    }
  }

  return TSDB_CODE_SUCCESS;
}

6166
static void doUpdateExprColumnIndex(SQuery *pQuery) {
6167
  assert(pQuery->pSelectExpr != NULL && pQuery != NULL);
6168

6169
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
6170
    SSqlFuncMsg *pSqlExprMsg = &pQuery->pSelectExpr[k].base;
6171
    if (pSqlExprMsg->functionId == TSDB_FUNC_ARITHM) {
6172 6173
      continue;
    }
6174

6175
    // todo opt performance
H
Haojun Liao 已提交
6176
    SColIndex *pColIndex = &pSqlExprMsg->colInfo;
H
Haojun Liao 已提交
6177
    if (TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
6178 6179
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfCols; ++f) {
H
Haojun Liao 已提交
6180 6181
        if (pColIndex->colId == pQuery->colList[f].colId) {
          pColIndex->colIndex = f;
6182 6183 6184
          break;
        }
      }
H
Haojun Liao 已提交
6185 6186

      assert(f < pQuery->numOfCols);
6187 6188
    } else if (pColIndex->colId <= TSDB_UD_COLUMN_INDEX) {
      // do nothing for user-defined constant value result columns
6189
    } else {
6190 6191
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfTags; ++f) {
H
Haojun Liao 已提交
6192 6193
        if (pColIndex->colId == pQuery->tagColList[f].colId) {
          pColIndex->colIndex = f;
6194 6195
          break;
        }
6196
      }
6197

6198
      assert(f < pQuery->numOfTags || pColIndex->colId == TSDB_TBNAME_COLUMN_INDEX);
6199 6200 6201 6202
    }
  }
}

dengyihao's avatar
dengyihao 已提交
6203 6204
static void freeQInfo(SQInfo *pQInfo);

H
Haojun Liao 已提交
6205 6206 6207
static void calResultBufSize(SQuery* pQuery) {
  const int32_t RESULT_MSG_MIN_SIZE  = 1024 * (1024 + 512);  // bytes
  const int32_t RESULT_MSG_MIN_ROWS  = 8192;
S
TD-1057  
Shengliang Guan 已提交
6208
  const float RESULT_THRESHOLD_RATIO = 0.85f;
H
Haojun Liao 已提交
6209

6210 6211 6212 6213 6214
  if (isProjQuery(pQuery)) {
    int32_t numOfRes = RESULT_MSG_MIN_SIZE / pQuery->rowSize;
    if (numOfRes < RESULT_MSG_MIN_ROWS) {
      numOfRes = RESULT_MSG_MIN_ROWS;
    }
H
Haojun Liao 已提交
6215

6216
    pQuery->rec.capacity  = numOfRes;
S
TD-1057  
Shengliang Guan 已提交
6217
    pQuery->rec.threshold = (int32_t)(numOfRes * RESULT_THRESHOLD_RATIO);
6218 6219
  } else {  // in case of non-prj query, a smaller output buffer will be used.
    pQuery->rec.capacity = 4096;
S
TD-1057  
Shengliang Guan 已提交
6220
    pQuery->rec.threshold = (int32_t)(pQuery->rec.capacity * RESULT_THRESHOLD_RATIO);
6221
  }
H
Haojun Liao 已提交
6222 6223
}

6224 6225
static SQInfo *createQInfoImpl(SQueryTableMsg *pQueryMsg, SSqlGroupbyExpr *pGroupbyExpr, SExprInfo *pExprs,
                               STableGroupInfo *pTableGroupInfo, SColumnInfo* pTagCols, bool stableQuery) {
B
Bomin Zhang 已提交
6226 6227 6228
  int16_t numOfCols = pQueryMsg->numOfCols;
  int16_t numOfOutput = pQueryMsg->numOfOutput;

6229 6230
  SQInfo *pQInfo = (SQInfo *)calloc(1, sizeof(SQInfo));
  if (pQInfo == NULL) {
B
Bomin Zhang 已提交
6231
    goto _cleanup_qinfo;
6232
  }
6233

B
Bomin Zhang 已提交
6234 6235 6236
  // to make sure third party won't overwrite this structure
  pQInfo->signature = pQInfo;
  pQInfo->tableGroupInfo = *pTableGroupInfo;
6237 6238

  SQuery *pQuery = calloc(1, sizeof(SQuery));
B
Bomin Zhang 已提交
6239 6240 6241
  if (pQuery == NULL) {
    goto _cleanup_query;
  }
H
Haojun Liao 已提交
6242

6243 6244
  pQInfo->runtimeEnv.pQuery = pQuery;

6245
  pQuery->numOfCols       = numOfCols;
H
hjxilinx 已提交
6246
  pQuery->numOfOutput     = numOfOutput;
6247 6248 6249
  pQuery->limit.limit     = pQueryMsg->limit;
  pQuery->limit.offset    = pQueryMsg->offset;
  pQuery->order.order     = pQueryMsg->order;
6250
  pQuery->order.orderColId = pQueryMsg->orderColId;
6251 6252
  pQuery->pSelectExpr     = pExprs;
  pQuery->pGroupbyExpr    = pGroupbyExpr;
6253
  memcpy(&pQuery->interval, &pQueryMsg->interval, sizeof(pQuery->interval));
6254
  pQuery->fillType        = pQueryMsg->fillType;
6255
  pQuery->numOfTags       = pQueryMsg->numOfTags;
B
Bomin Zhang 已提交
6256
  pQuery->tagColList      = pTagCols;
H
Haojun Liao 已提交
6257

6258
  pQuery->colList = calloc(numOfCols, sizeof(SSingleColumnFilterInfo));
6259
  if (pQuery->colList == NULL) {
6260
    goto _cleanup;
6261
  }
6262

H
hjxilinx 已提交
6263
  for (int16_t i = 0; i < numOfCols; ++i) {
6264
    pQuery->colList[i] = pQueryMsg->colList[i];
6265
    pQuery->colList[i].filters = tscFilterInfoClone(pQueryMsg->colList[i].filters, pQuery->colList[i].numOfFilters);
H
hjxilinx 已提交
6266
  }
6267

6268
  // calculate the result row size
6269 6270 6271
  for (int16_t col = 0; col < numOfOutput; ++col) {
    assert(pExprs[col].bytes > 0);
    pQuery->rowSize += pExprs[col].bytes;
6272
  }
6273

6274
  doUpdateExprColumnIndex(pQuery);
6275

6276
  int32_t ret = createFilterInfo(pQInfo, pQuery);
6277
  if (ret != TSDB_CODE_SUCCESS) {
6278
    goto _cleanup;
6279 6280 6281
  }

  // prepare the result buffer
6282
  pQuery->sdata = (tFilePage **)calloc(pQuery->numOfOutput, POINTER_BYTES);
6283
  if (pQuery->sdata == NULL) {
6284
    goto _cleanup;
6285 6286
  }

H
Haojun Liao 已提交
6287
  calResultBufSize(pQuery);
6288

6289
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
H
Haojun Liao 已提交
6290
    assert(pExprs[col].interBytes >= pExprs[col].bytes || pExprs[col].interBytes == 0);
6291 6292

    // allocate additional memory for interResults that are usually larger then final results
6293
    size_t size = (size_t)((pQuery->rec.capacity + 1) * pExprs[col].bytes + pExprs[col].interBytes + sizeof(tFilePage));
6294
    pQuery->sdata[col] = (tFilePage *)calloc(1, size);
6295
    if (pQuery->sdata[col] == NULL) {
6296
      goto _cleanup;
6297 6298 6299
    }
  }

6300
  if (pQuery->fillType != TSDB_FILL_NONE) {
6301 6302
    pQuery->fillVal = malloc(sizeof(int64_t) * pQuery->numOfOutput);
    if (pQuery->fillVal == NULL) {
6303
      goto _cleanup;
6304 6305 6306
    }

    // the first column is the timestamp
6307
    memcpy(pQuery->fillVal, (char *)pQueryMsg->fillVal, pQuery->numOfOutput * sizeof(int64_t));
6308 6309
  }

dengyihao's avatar
dengyihao 已提交
6310 6311 6312 6313 6314 6315
  size_t numOfGroups = 0;
  if (pTableGroupInfo->pGroupList != NULL) {
    numOfGroups = taosArrayGetSize(pTableGroupInfo->pGroupList);

    pQInfo->tableqinfoGroupInfo.pGroupList = taosArrayInit(numOfGroups, POINTER_BYTES);
    pQInfo->tableqinfoGroupInfo.numOfTables = pTableGroupInfo->numOfTables;
H
Haojun Liao 已提交
6316
    pQInfo->tableqinfoGroupInfo.map = taosHashInit(pTableGroupInfo->numOfTables,
H
Haojun Liao 已提交
6317
                                                   taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_NO_LOCK);
H
Haojun Liao 已提交
6318
  }
6319

weixin_48148422's avatar
weixin_48148422 已提交
6320
  int tableIndex = 0;
6321

H
Haojun Liao 已提交
6322
  pQInfo->runtimeEnv.interBufSize = getOutputInterResultBufSize(pQuery);
H
Haojun Liao 已提交
6323 6324
  pQInfo->runtimeEnv.summary.tableInfoSize += (pTableGroupInfo->numOfTables * sizeof(STableQueryInfo));

H
Haojun Liao 已提交
6325 6326
  pQInfo->runtimeEnv.pWindowHashTable = taosHashInit(pTableGroupInfo->numOfTables, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_NO_LOCK);
  pQInfo->runtimeEnv.keyBuf = malloc(TSDB_MAX_BYTES_PER_ROW);
6327
  pQInfo->runtimeEnv.pool = initWindowResultPool(getWindowResultSize(&pQInfo->runtimeEnv));
H
Haojun Liao 已提交
6328

H
Haojun Liao 已提交
6329
  pQInfo->pBuf = calloc(pTableGroupInfo->numOfTables, sizeof(STableQueryInfo));
H
Haojun Liao 已提交
6330 6331 6332 6333
  if (pQInfo->pBuf == NULL) {
    goto _cleanup;
  }

H
Haojun Liao 已提交
6334
  // NOTE: pTableCheckInfo need to update the query time range and the lastKey info
6335 6336
  pQInfo->arrTableIdInfo = taosArrayInit(tableIndex, sizeof(STableIdInfo));
  pQInfo->dataReady = QUERY_RESULT_NOT_READY;
H
Haojun Liao 已提交
6337
  pQInfo->rspContext = NULL;
6338
  pthread_mutex_init(&pQInfo->lock, NULL);
H
Haojun Liao 已提交
6339
  tsem_init(&pQInfo->ready, 0, 0);
6340 6341 6342 6343 6344 6345

  pQuery->pos = -1;
  pQuery->window = pQueryMsg->window;
  changeExecuteScanOrder(pQInfo, pQueryMsg, stableQuery);

  STimeWindow window = pQuery->window;
H
Haojun Liao 已提交
6346

H
Haojun Liao 已提交
6347 6348
  int32_t index = 0;

H
hjxilinx 已提交
6349
  for(int32_t i = 0; i < numOfGroups; ++i) {
6350
    SArray* pa = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, i);
6351

H
Haojun Liao 已提交
6352
    size_t s = taosArrayGetSize(pa);
6353
    SArray* p1 = taosArrayInit(s, POINTER_BYTES);
B
Bomin Zhang 已提交
6354 6355 6356
    if (p1 == NULL) {
      goto _cleanup;
    }
H
Haojun Liao 已提交
6357

Y
yihaoDeng 已提交
6358
    taosArrayPush(pQInfo->tableqinfoGroupInfo.pGroupList, &p1);
6359

H
hjxilinx 已提交
6360
    for(int32_t j = 0; j < s; ++j) {
H
Haojun Liao 已提交
6361
      STableKeyInfo* info = taosArrayGet(pa, j);
6362

H
Haojun Liao 已提交
6363
      void* buf = (char*) pQInfo->pBuf + index * sizeof(STableQueryInfo);
H
Haojun Liao 已提交
6364

6365
      window.skey = info->lastKey;
H
Haojun Liao 已提交
6366
      STableQueryInfo* item = createTableQueryInfo(&pQInfo->runtimeEnv, info->pTable, window, buf);
B
Bomin Zhang 已提交
6367 6368 6369
      if (item == NULL) {
        goto _cleanup;
      }
H
Haojun Liao 已提交
6370

6371
      item->groupIndex = i;
H
hjxilinx 已提交
6372
      taosArrayPush(p1, &item);
6373 6374

      STableId* id = TSDB_TABLEID(info->pTable);
H
Haojun Liao 已提交
6375 6376
      taosHashPut(pQInfo->tableqinfoGroupInfo.map, &id->tid, sizeof(id->tid), &item, POINTER_BYTES);
      index += 1;
H
hjxilinx 已提交
6377 6378
    }
  }
6379

6380
  colIdCheck(pQuery);
6381

6382
  qDebug("qmsg:%p QInfo:%p created", pQueryMsg, pQInfo);
6383 6384
  return pQInfo;

B
Bomin Zhang 已提交
6385
_cleanup_qinfo:
H
Haojun Liao 已提交
6386
  tsdbDestroyTableGroup(pTableGroupInfo);
B
Bomin Zhang 已提交
6387 6388

_cleanup_query:
6389 6390 6391 6392
  if (pGroupbyExpr != NULL) {
    taosArrayDestroy(pGroupbyExpr->columnInfo);
    free(pGroupbyExpr);
  }
H
Haojun Liao 已提交
6393

S
Shengliang Guan 已提交
6394
  taosTFree(pTagCols);
B
Bomin Zhang 已提交
6395 6396 6397 6398 6399 6400
  for (int32_t i = 0; i < numOfOutput; ++i) {
    SExprInfo* pExprInfo = &pExprs[i];
    if (pExprInfo->pExpr != NULL) {
      tExprTreeDestroy(&pExprInfo->pExpr, NULL);
    }
  }
H
Haojun Liao 已提交
6401

S
Shengliang Guan 已提交
6402
  taosTFree(pExprs);
B
Bomin Zhang 已提交
6403

6404
_cleanup:
dengyihao's avatar
dengyihao 已提交
6405
  freeQInfo(pQInfo);
6406 6407 6408
  return NULL;
}

H
hjxilinx 已提交
6409
static bool isValidQInfo(void *param) {
H
hjxilinx 已提交
6410 6411 6412 6413
  SQInfo *pQInfo = (SQInfo *)param;
  if (pQInfo == NULL) {
    return false;
  }
6414

H
hjxilinx 已提交
6415 6416 6417 6418
  /*
   * pQInfo->signature may be changed by another thread, so we assign value of signature
   * into local variable, then compare by using local variable
   */
6419
  uint64_t sig = (uint64_t)pQInfo->signature;
H
hjxilinx 已提交
6420 6421 6422
  return (sig == (uint64_t)pQInfo);
}

6423
static int32_t initQInfo(SQueryTableMsg *pQueryMsg, void *tsdb, int32_t vgId, SQInfo *pQInfo, bool isSTable) {
H
hjxilinx 已提交
6424
  int32_t code = TSDB_CODE_SUCCESS;
6425
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
6426

H
hjxilinx 已提交
6427 6428
  STSBuf *pTSBuf = NULL;
  if (pQueryMsg->tsLen > 0) {  // open new file to save the result
H
Haojun Liao 已提交
6429
    char *tsBlock = (char *) pQueryMsg + pQueryMsg->tsOffset;
H
Haojun Liao 已提交
6430
    pTSBuf = tsBufCreateFromCompBlocks(tsBlock, pQueryMsg->tsNumOfBlocks, pQueryMsg->tsLen, pQueryMsg->tsOrder, vgId);
6431

H
hjxilinx 已提交
6432
    tsBufResetPos(pTSBuf);
dengyihao's avatar
dengyihao 已提交
6433 6434
    bool ret = tsBufNextPos(pTSBuf);
    UNUSED(ret);
H
hjxilinx 已提交
6435
  }
Y
TD-1665  
yihaoDeng 已提交
6436 6437
  
  pQuery->precision = tsdbGetCfg(tsdb)->precision;
6438

6439 6440
  if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey > pQuery->window.ekey)) ||
      (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey > pQuery->window.skey))) {
6441
    qDebug("QInfo:%p no result in time range %" PRId64 "-%" PRId64 ", order %d", pQInfo, pQuery->window.skey,
6442
           pQuery->window.ekey, pQuery->order.order);
6443
    setQueryStatus(pQuery, QUERY_COMPLETED);
B
Bomin Zhang 已提交
6444
    pQInfo->tableqinfoGroupInfo.numOfTables = 0;
6445 6446
    return TSDB_CODE_SUCCESS;
  }
6447

6448
  if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
6449
    qDebug("QInfo:%p no table qualified for tag filter, abort query", pQInfo);
6450 6451 6452
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return TSDB_CODE_SUCCESS;
  }
H
hjxilinx 已提交
6453 6454

  // filter the qualified
6455
  if ((code = doInitQInfo(pQInfo, pTSBuf, tsdb, vgId, isSTable)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6456 6457
    goto _error;
  }
6458

H
hjxilinx 已提交
6459 6460 6461 6462
  return code;

_error:
  // table query ref will be decrease during error handling
6463
  freeQInfo(pQInfo);
H
hjxilinx 已提交
6464 6465 6466
  return code;
}

B
Bomin Zhang 已提交
6467
static void freeColumnFilterInfo(SColumnFilterInfo* pFilter, int32_t numOfFilters) {
H
Haojun Liao 已提交
6468
    if (pFilter == NULL || numOfFilters == 0) {
B
Bomin Zhang 已提交
6469 6470
      return;
    }
H
Haojun Liao 已提交
6471

B
Bomin Zhang 已提交
6472 6473 6474 6475 6476
    for (int32_t i = 0; i < numOfFilters; i++) {
      if (pFilter[i].filterstr) {
        free((void*)(pFilter[i].pz));
      }
    }
H
Haojun Liao 已提交
6477

B
Bomin Zhang 已提交
6478 6479 6480
    free(pFilter);
}

H
Haojun Liao 已提交
6481 6482
static void doDestroyTableQueryInfo(STableGroupInfo* pTableqinfoGroupInfo) {
  if (pTableqinfoGroupInfo->pGroupList != NULL) {
H
Haojun Liao 已提交
6483
    int32_t numOfGroups = (int32_t) taosArrayGetSize(pTableqinfoGroupInfo->pGroupList);
H
Haojun Liao 已提交
6484 6485 6486 6487 6488 6489 6490 6491 6492 6493 6494 6495 6496 6497 6498 6499 6500 6501 6502 6503 6504
    for (int32_t i = 0; i < numOfGroups; ++i) {
      SArray *p = taosArrayGetP(pTableqinfoGroupInfo->pGroupList, i);

      size_t num = taosArrayGetSize(p);
      for(int32_t j = 0; j < num; ++j) {
        STableQueryInfo* item = taosArrayGetP(p, j);
        destroyTableQueryInfoImpl(item);
      }

      taosArrayDestroy(p);
    }
  }

  taosArrayDestroy(pTableqinfoGroupInfo->pGroupList);
  taosHashCleanup(pTableqinfoGroupInfo->map);

  pTableqinfoGroupInfo->pGroupList = NULL;
  pTableqinfoGroupInfo->map = NULL;
  pTableqinfoGroupInfo->numOfTables = 0;
}

H
hjxilinx 已提交
6505 6506 6507 6508
static void freeQInfo(SQInfo *pQInfo) {
  if (!isValidQInfo(pQInfo)) {
    return;
  }
6509

6510
  qDebug("QInfo:%p start to free QInfo", pQInfo);
6511

H
Haojun Liao 已提交
6512
  releaseQueryBuf(pQInfo->tableqinfoGroupInfo.numOfTables);
H
Haojun Liao 已提交
6513

6514
  teardownQueryRuntimeEnv(&pQInfo->runtimeEnv);
6515

H
Haojun Liao 已提交
6516 6517 6518 6519 6520 6521 6522
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  if (pQuery != NULL) {
    if (pQuery->sdata != NULL) {
      for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
        taosTFree(pQuery->sdata[col]);
      }
      taosTFree(pQuery->sdata);
H
hjxilinx 已提交
6523
    }
6524

H
Haojun Liao 已提交
6525 6526 6527
    if (pQuery->fillVal != NULL) {
      taosTFree(pQuery->fillVal);
    }
6528

H
Haojun Liao 已提交
6529 6530 6531 6532
    for (int32_t i = 0; i < pQuery->numOfFilterCols; ++i) {
      SSingleColumnFilterInfo *pColFilter = &pQuery->pFilterInfo[i];
      if (pColFilter->numOfFilters > 0) {
        taosTFree(pColFilter->pFilters);
H
hjxilinx 已提交
6533
      }
H
hjxilinx 已提交
6534
    }
6535

H
Haojun Liao 已提交
6536 6537 6538 6539 6540 6541 6542 6543 6544 6545 6546 6547 6548 6549 6550 6551
    if (pQuery->pSelectExpr != NULL) {
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        SExprInfo *pExprInfo = &pQuery->pSelectExpr[i];

        if (pExprInfo->pExpr != NULL) {
          tExprTreeDestroy(&pExprInfo->pExpr, NULL);
        }
      }

      taosTFree(pQuery->pSelectExpr);
    }

    if (pQuery->pGroupbyExpr != NULL) {
      taosArrayDestroy(pQuery->pGroupbyExpr->columnInfo);
      taosTFree(pQuery->pGroupbyExpr);
    }
6552

H
Haojun Liao 已提交
6553 6554 6555 6556 6557 6558 6559 6560 6561 6562 6563 6564
    taosTFree(pQuery->tagColList);
    taosTFree(pQuery->pFilterInfo);

    if (pQuery->colList != NULL) {
      for (int32_t i = 0; i < pQuery->numOfCols; i++) {
        SColumnInfo *column = pQuery->colList + i;
        freeColumnFilterInfo(column->filters, column->numOfFilters);
      }
      taosTFree(pQuery->colList);
    }

    taosTFree(pQuery);
H
hjxilinx 已提交
6565
  }
6566

H
Haojun Liao 已提交
6567
  doDestroyTableQueryInfo(&pQInfo->tableqinfoGroupInfo);
H
Haojun Liao 已提交
6568

H
Haojun Liao 已提交
6569
  taosTFree(pQInfo->pBuf);
H
Haojun Liao 已提交
6570
  tsdbDestroyTableGroup(&pQInfo->tableGroupInfo);
weixin_48148422's avatar
weixin_48148422 已提交
6571
  taosArrayDestroy(pQInfo->arrTableIdInfo);
6572

6573
  pQInfo->signature = 0;
6574

6575
  qDebug("QInfo:%p QInfo is freed", pQInfo);
6576

S
Shengliang Guan 已提交
6577
  taosTFree(pQInfo);
H
hjxilinx 已提交
6578 6579
}

H
hjxilinx 已提交
6580
static size_t getResultSize(SQInfo *pQInfo, int64_t *numOfRows) {
6581 6582
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
hjxilinx 已提交
6583 6584 6585 6586 6587 6588 6589 6590 6591 6592 6593
  /*
   * get the file size and set the numOfRows to be the file size, since for tsComp query,
   * the returned row size is equalled to 1
   * TODO handle the case that the file is too large to send back one time
   */
  if (isTSCompQuery(pQuery) && (*numOfRows) > 0) {
    struct stat fstat;
    if (stat(pQuery->sdata[0]->data, &fstat) == 0) {
      *numOfRows = fstat.st_size;
      return fstat.st_size;
    } else {
S
slguan 已提交
6594
      qError("QInfo:%p failed to get file info, path:%s, reason:%s", pQInfo, pQuery->sdata[0]->data, strerror(errno));
H
hjxilinx 已提交
6595 6596 6597
      return 0;
    }
  } else {
6598
    return (size_t)(pQuery->rowSize * (*numOfRows));
6599
  }
H
hjxilinx 已提交
6600
}
6601

H
hjxilinx 已提交
6602 6603 6604
static int32_t doDumpQueryResult(SQInfo *pQInfo, char *data) {
  // the remained number of retrieved rows, not the interpolated result
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
6605

H
hjxilinx 已提交
6606 6607 6608
  // load data from file to msg buffer
  if (isTSCompQuery(pQuery)) {
    int32_t fd = open(pQuery->sdata[0]->data, O_RDONLY, 0666);
6609

H
hjxilinx 已提交
6610 6611
    // make sure file exist
    if (FD_VALID(fd)) {
6612 6613 6614
      uint64_t s = lseek(fd, 0, SEEK_END);

      qDebug("QInfo:%p ts comp data return, file:%s, size:%"PRId64, pQInfo, pQuery->sdata[0]->data, s);
H
Haojun Liao 已提交
6615
      if (lseek(fd, 0, SEEK_SET) >= 0) {
6616
        size_t sz = read(fd, data, (uint32_t)s);
6617 6618 6619
        if(sz < s) {  // todo handle error
          assert(0);
        }
H
Haojun Liao 已提交
6620
      } else {
6621
        UNUSED(s);
dengyihao's avatar
dengyihao 已提交
6622
      }
H
Haojun Liao 已提交
6623

H
hjxilinx 已提交
6624 6625 6626
      close(fd);
      unlink(pQuery->sdata[0]->data);
    } else {
dengyihao's avatar
dengyihao 已提交
6627
      // todo return the error code to client and handle invalid fd
S
slguan 已提交
6628
      qError("QInfo:%p failed to open tmp file to send ts-comp data to client, path:%s, reason:%s", pQInfo,
H
hjxilinx 已提交
6629
             pQuery->sdata[0]->data, strerror(errno));
dengyihao's avatar
dengyihao 已提交
6630
      if (fd != -1) {
6631
        close(fd);
dengyihao's avatar
dengyihao 已提交
6632
      }
H
hjxilinx 已提交
6633
    }
6634

H
hjxilinx 已提交
6635 6636 6637 6638
    // all data returned, set query over
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
      setQueryStatus(pQuery, QUERY_OVER);
    }
H
hjxilinx 已提交
6639
  } else {
S
TD-1057  
Shengliang Guan 已提交
6640
    doCopyQueryResultToMsg(pQInfo, (int32_t)pQuery->rec.rows, data);
6641
  }
6642

6643
  pQuery->rec.total += pQuery->rec.rows;
6644
  qDebug("QInfo:%p current numOfRes rows:%" PRId64 ", total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total);
6645

6646
  if (pQuery->limit.limit > 0 && pQuery->limit.limit == pQuery->rec.total) {
6647
    qDebug("QInfo:%p results limitation reached, limitation:%"PRId64, pQInfo, pQuery->limit.limit);
6648 6649
    setQueryStatus(pQuery, QUERY_OVER);
  }
6650

H
hjxilinx 已提交
6651
  return TSDB_CODE_SUCCESS;
6652 6653
}

6654 6655 6656 6657 6658 6659 6660
typedef struct SQueryMgmt {
  SCacheObj      *qinfoPool;      // query handle pool
  int32_t         vgId;
  bool            closed;
  pthread_mutex_t lock;
} SQueryMgmt;

6661
int32_t qCreateQueryInfo(void* tsdb, int32_t vgId, SQueryTableMsg* pQueryMsg, qinfo_t* pQInfo) {
6662
  assert(pQueryMsg != NULL && tsdb != NULL);
6663 6664

  int32_t code = TSDB_CODE_SUCCESS;
6665

6666 6667 6668 6669 6670 6671 6672 6673
  char            *tagCond  = NULL;
  char            *tbnameCond = NULL;
  SArray          *pTableIdList = NULL;
  SSqlFuncMsg    **pExprMsg = NULL;
  SExprInfo       *pExprs   = NULL;
  SColIndex       *pGroupColIndex = NULL;
  SColumnInfo     *pTagColumnInfo = NULL;
  SSqlGroupbyExpr *pGroupbyExpr   = NULL;
6674

6675 6676
  code = convertQueryMsg(pQueryMsg, &pTableIdList, &pExprMsg, &tagCond, &tbnameCond, &pGroupColIndex, &pTagColumnInfo);
  if (code != TSDB_CODE_SUCCESS) {
B
Bomin Zhang 已提交
6677
    goto _over;
6678 6679
  }

H
hjxilinx 已提交
6680
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
6681
    qError("Invalid number of tables to query, numOfTables:%d", pQueryMsg->numOfTables);
6682
    code = TSDB_CODE_QRY_INVALID_MSG;
H
hjxilinx 已提交
6683
    goto _over;
6684 6685
  }

H
hjxilinx 已提交
6686
  if (pTableIdList == NULL || taosArrayGetSize(pTableIdList) == 0) {
S
slguan 已提交
6687
    qError("qmsg:%p, SQueryTableMsg wrong format", pQueryMsg);
6688
    code = TSDB_CODE_QRY_INVALID_MSG;
H
hjxilinx 已提交
6689
    goto _over;
6690 6691
  }

H
Haojun Liao 已提交
6692
  if ((code = createQFunctionExprFromMsg(pQueryMsg, &pExprs, pExprMsg, pTagColumnInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6693
    goto _over;
6694 6695
  }

dengyihao's avatar
dengyihao 已提交
6696
  pGroupbyExpr = createGroupbyExprFromMsg(pQueryMsg, pGroupColIndex, &code);
H
hjxilinx 已提交
6697
  if ((pGroupbyExpr == NULL && pQueryMsg->numOfGroupCols != 0) || code != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6698
    goto _over;
6699
  }
6700

H
hjxilinx 已提交
6701
  bool isSTableQuery = false;
6702
  STableGroupInfo tableGroupInfo = {0};
6703 6704
  int64_t st = taosGetTimestampUs();

H
Haojun Liao 已提交
6705
  if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_TABLE_QUERY)) {
weixin_48148422's avatar
weixin_48148422 已提交
6706
    STableIdInfo *id = taosArrayGet(pTableIdList, 0);
H
Haojun Liao 已提交
6707

6708
    qDebug("qmsg:%p query normal table, uid:%"PRId64", tid:%d", pQueryMsg, id->uid, id->tid);
H
Haojun Liao 已提交
6709
    if ((code = tsdbGetOneTableGroup(tsdb, id->uid, pQueryMsg->window.skey, &tableGroupInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6710
      goto _over;
6711
    }
H
Haojun Liao 已提交
6712
  } else if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY|TSDB_QUERY_TYPE_STABLE_QUERY)) {
6713
    isSTableQuery = true;
H
Haojun Liao 已提交
6714 6715 6716

    // also note there's possibility that only one table in the super table
    if (!TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY)) {
weixin_48148422's avatar
weixin_48148422 已提交
6717 6718 6719 6720 6721 6722 6723
      STableIdInfo *id = taosArrayGet(pTableIdList, 0);

      // group by normal column, do not pass the group by condition to tsdb to group table into different group
      int32_t numOfGroupByCols = pQueryMsg->numOfGroupCols;
      if (pQueryMsg->numOfGroupCols == 1 && !TSDB_COL_IS_TAG(pGroupColIndex->flag)) {
        numOfGroupByCols = 0;
      }
6724 6725

      qDebug("qmsg:%p query stable, uid:%"PRId64", tid:%d", pQueryMsg, id->uid, id->tid);
H
Haojun Liao 已提交
6726 6727 6728
      code = tsdbQuerySTableByTagCond(tsdb, id->uid, pQueryMsg->window.skey, tagCond, pQueryMsg->tagCondLen,
          pQueryMsg->tagNameRelType, tbnameCond, &tableGroupInfo, pGroupColIndex, numOfGroupByCols);

6729
      if (code != TSDB_CODE_SUCCESS) {
6730
        qError("qmsg:%p failed to query stable, reason: %s", pQueryMsg, tstrerror(code));
6731 6732
        goto _over;
      }
weixin_48148422's avatar
weixin_48148422 已提交
6733
    } else {
6734 6735 6736 6737
      code = tsdbGetTableGroupFromIdList(tsdb, pTableIdList, &tableGroupInfo);
      if (code != TSDB_CODE_SUCCESS) {
        goto _over;
      }
H
Haojun Liao 已提交
6738

S
TD-1057  
Shengliang Guan 已提交
6739
      qDebug("qmsg:%p query on %" PRIzu " tables in one group from client", pQueryMsg, tableGroupInfo.numOfTables);
6740
    }
6741 6742

    int64_t el = taosGetTimestampUs() - st;
S
TD-1057  
Shengliang Guan 已提交
6743
    qDebug("qmsg:%p tag filter completed, numOfTables:%" PRIzu ", elapsed time:%"PRId64"us", pQueryMsg, tableGroupInfo.numOfTables, el);
H
hjxilinx 已提交
6744
  } else {
6745
    assert(0);
6746
  }
6747

H
Haojun Liao 已提交
6748 6749 6750 6751 6752
  code = checkForQueryBuf(tableGroupInfo.numOfTables);
  if (code != TSDB_CODE_SUCCESS) {  // not enough query buffer, abort
    goto _over;
  }

6753
  (*pQInfo) = createQInfoImpl(pQueryMsg, pGroupbyExpr, pExprs, &tableGroupInfo, pTagColumnInfo, isSTableQuery);
dengyihao's avatar
dengyihao 已提交
6754 6755 6756
  pExprs = NULL;
  pGroupbyExpr = NULL;
  pTagColumnInfo = NULL;
6757

6758
  if ((*pQInfo) == NULL) {
6759
    code = TSDB_CODE_QRY_OUT_OF_MEMORY;
H
hjxilinx 已提交
6760
    goto _over;
6761
  }
6762

6763
  code = initQInfo(pQueryMsg, tsdb, vgId, *pQInfo, isSTableQuery);
6764

H
hjxilinx 已提交
6765
_over:
dengyihao's avatar
dengyihao 已提交
6766 6767 6768
  free(tagCond);
  free(tbnameCond);
  free(pGroupColIndex);
dengyihao's avatar
dengyihao 已提交
6769 6770
  if (pGroupbyExpr != NULL) {
    taosArrayDestroy(pGroupbyExpr->columnInfo);
dengyihao's avatar
dengyihao 已提交
6771
    free(pGroupbyExpr);
6772
  }
dengyihao's avatar
dengyihao 已提交
6773 6774
  free(pTagColumnInfo);
  free(pExprs);
dengyihao's avatar
dengyihao 已提交
6775
  free(pExprMsg);
H
hjxilinx 已提交
6776
  taosArrayDestroy(pTableIdList);
6777

B
Bomin Zhang 已提交
6778 6779 6780 6781 6782
  for (int32_t i = 0; i < pQueryMsg->numOfCols; i++) {
    SColumnInfo* column = pQueryMsg->colList + i;
    freeColumnFilterInfo(column->filters, column->numOfFilters);
  }

H
Haojun Liao 已提交
6783
  //pQInfo already freed in initQInfo, but *pQInfo may not pointer to null;
6784 6785 6786 6787
  if (code != TSDB_CODE_SUCCESS) {
    *pQInfo = NULL;
  }

6788
  // if failed to add ref for all tables in this query, abort current query
6789
  return code;
H
hjxilinx 已提交
6790 6791
}

H
Haojun Liao 已提交
6792
void qDestroyQueryInfo(qinfo_t qHandle) {
H
Haojun Liao 已提交
6793 6794 6795 6796 6797
  SQInfo* pQInfo = (SQInfo*) qHandle;
  if (!isValidQInfo(pQInfo)) {
    return;
  }

H
Haojun Liao 已提交
6798 6799 6800
  qDebug("QInfo:%p query completed", pQInfo);
  queryCostStatis(pQInfo);   // print the query cost summary
  freeQInfo(pQInfo);
H
Haojun Liao 已提交
6801 6802
}

6803 6804 6805 6806 6807 6808 6809 6810
static bool doBuildResCheck(SQInfo* pQInfo) {
  bool buildRes = false;

  pthread_mutex_lock(&pQInfo->lock);

  pQInfo->dataReady = QUERY_RESULT_READY;
  buildRes = (pQInfo->rspContext != NULL);

H
Haojun Liao 已提交
6811 6812
  // clear qhandle owner, it must be in the secure area. other thread may run ahead before current, after it is
  // put into task to be executed.
6813 6814
  assert(pQInfo->owner == taosGetPthreadId());
  pQInfo->owner = 0;
H
Haojun Liao 已提交
6815

H
Haojun Liao 已提交
6816
  pthread_mutex_unlock(&pQInfo->lock);
H
Haojun Liao 已提交
6817 6818

  tsem_post(&pQInfo->ready);
6819 6820 6821
  return buildRes;
}

6822
bool qTableQuery(qinfo_t qinfo) {
6823
  SQInfo *pQInfo = (SQInfo *)qinfo;
H
Haojun Liao 已提交
6824
  assert(pQInfo && pQInfo->signature == pQInfo);
6825
  int64_t threadId = taosGetPthreadId();
6826

6827 6828 6829 6830
  int64_t curOwner = 0;
  if ((curOwner = atomic_val_compare_exchange_64(&pQInfo->owner, 0, threadId)) != 0) {
    qError("QInfo:%p qhandle is now executed by thread:%p", pQInfo, (void*) curOwner);
    pQInfo->code = TSDB_CODE_QRY_IN_EXEC;
6831
    return false;
H
hjxilinx 已提交
6832
  }
6833

H
Haojun Liao 已提交
6834
  if (IS_QUERY_KILLED(pQInfo)) {
6835
    qDebug("QInfo:%p it is already killed, abort", pQInfo);
6836
    return doBuildResCheck(pQInfo);
H
hjxilinx 已提交
6837
  }
6838

6839 6840
  if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
    qDebug("QInfo:%p no table exists for query, abort", pQInfo);
6841 6842
    setQueryStatus(pQInfo->runtimeEnv.pQuery, QUERY_COMPLETED);
    return doBuildResCheck(pQInfo);
6843 6844 6845
  }

  // error occurs, record the error code and return to client
H
Haojun Liao 已提交
6846
  int32_t ret = setjmp(pQInfo->runtimeEnv.env);
6847 6848
  if (ret != TSDB_CODE_SUCCESS) {
    pQInfo->code = ret;
H
Haojun Liao 已提交
6849
    qDebug("QInfo:%p query abort due to error/cancel occurs, code:%s", pQInfo, tstrerror(pQInfo->code));
6850
    return doBuildResCheck(pQInfo);
6851 6852
  }

6853
  qDebug("QInfo:%p query task is launched", pQInfo);
6854

6855
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
6856
  if (onlyQueryTags(pQInfo->runtimeEnv.pQuery)) {
H
Haojun Liao 已提交
6857
    assert(pQInfo->runtimeEnv.pQueryHandle == NULL);
6858
    buildTagQueryResult(pQInfo);
H
hjxilinx 已提交
6859
  } else if (pQInfo->runtimeEnv.stableQuery) {
6860
    stableQueryImpl(pQInfo);
H
hjxilinx 已提交
6861
  } else {
6862
    tableQueryImpl(pQInfo);
H
hjxilinx 已提交
6863
  }
6864

6865
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
6866
  if (IS_QUERY_KILLED(pQInfo)) {
6867 6868
    qDebug("QInfo:%p query is killed", pQInfo);
  } else if (pQuery->rec.rows == 0) {
S
TD-1057  
Shengliang Guan 已提交
6869
    qDebug("QInfo:%p over, %" PRIzu " tables queried, %"PRId64" rows are returned", pQInfo, pQInfo->tableqinfoGroupInfo.numOfTables, pQuery->rec.total);
6870 6871 6872 6873 6874
  } else {
    qDebug("QInfo:%p query paused, %" PRId64 " rows returned, numOfTotal:%" PRId64 " rows",
           pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
  }

6875
  return doBuildResCheck(pQInfo);
H
hjxilinx 已提交
6876 6877
}

6878
int32_t qRetrieveQueryResultInfo(qinfo_t qinfo, bool* buildRes, void* pRspContext) {
6879 6880
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
6881
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
H
Haojun Liao 已提交
6882
    qError("QInfo:%p invalid qhandle", pQInfo);
6883
    return TSDB_CODE_QRY_INVALID_QHANDLE;
H
hjxilinx 已提交
6884
  }
6885

6886
  *buildRes = false;
H
Haojun Liao 已提交
6887
  if (IS_QUERY_KILLED(pQInfo)) {
B
Bomin Zhang 已提交
6888
    qDebug("QInfo:%p query is killed, code:0x%08x", pQInfo, pQInfo->code);
H
hjxilinx 已提交
6889
    return pQInfo->code;
H
hjxilinx 已提交
6890
  }
6891

6892
  int32_t code = TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
6893 6894 6895 6896

#if 0
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

6897
  pthread_mutex_lock(&pQInfo->lock);
H
Haojun Liao 已提交
6898 6899
  assert(pQInfo->rspContext == NULL);

6900 6901 6902 6903 6904
  if (pQInfo->dataReady == QUERY_RESULT_READY) {
    *buildRes = true;
    qDebug("QInfo:%p retrieve result info, rowsize:%d, rows:%"PRId64", code:%d", pQInfo, pQuery->rowSize, pQuery->rec.rows,
           pQInfo->code);
  } else {
H
Haojun Liao 已提交
6905
    *buildRes = false;
6906
    qDebug("QInfo:%p retrieve req set query return result after paused", pQInfo);
6907
    pQInfo->rspContext = pRspContext;
H
Haojun Liao 已提交
6908
    assert(pQInfo->rspContext != NULL);
6909
  }
6910

6911
  code = pQInfo->code;
6912
  pthread_mutex_unlock(&pQInfo->lock);
H
Haojun Liao 已提交
6913 6914 6915 6916 6917 6918
#else
  tsem_wait(&pQInfo->ready);
  *buildRes = true;
  code = pQInfo->code;
#endif

6919
  return code;
H
hjxilinx 已提交
6920
}
6921

6922
int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *contLen, bool* continueExec) {
6923 6924
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
6925
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
6926
    return TSDB_CODE_QRY_INVALID_QHANDLE;
6927
  }
6928

6929
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
6930 6931
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  size_t  size = getResultSize(pQInfo, &pQuery->rec.rows);
6932

weixin_48148422's avatar
weixin_48148422 已提交
6933 6934
  size += sizeof(int32_t);
  size += sizeof(STableIdInfo) * taosArrayGetSize(pQInfo->arrTableIdInfo);
6935

S
TD-1057  
Shengliang Guan 已提交
6936
  *contLen = (int32_t)(size + sizeof(SRetrieveTableRsp));
6937

B
Bomin Zhang 已提交
6938 6939
  // todo proper handle failed to allocate memory,
  // current solution only avoid crash, but cannot return error code to client
6940
  *pRsp = (SRetrieveTableRsp *)rpcMallocCont(*contLen);
B
Bomin Zhang 已提交
6941 6942 6943
  if (*pRsp == NULL) {
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
  }
6944

S
TD-1057  
Shengliang Guan 已提交
6945
  (*pRsp)->numOfRows = htonl((int32_t)pQuery->rec.rows);
6946

H
Haojun Liao 已提交
6947
  if (pQInfo->code == TSDB_CODE_SUCCESS) {
6948
    (*pRsp)->offset   = htobe64(pQuery->limit.offset);
6949
    (*pRsp)->useconds = htobe64(pRuntimeEnv->summary.elapsedTime);
6950
  } else {
6951 6952
    (*pRsp)->offset   = 0;
    (*pRsp)->useconds = htobe64(pRuntimeEnv->summary.elapsedTime);
6953
  }
6954

6955
  (*pRsp)->precision = htons(pQuery->precision);
H
Haojun Liao 已提交
6956 6957
  if (pQuery->rec.rows > 0 && pQInfo->code == TSDB_CODE_SUCCESS) {
    doDumpQueryResult(pQInfo, (*pRsp)->data);
6958
  } else {
H
hjxilinx 已提交
6959
    setQueryStatus(pQuery, QUERY_OVER);
6960
  }
6961

6962
  pQInfo->rspContext = NULL;
6963
  pQInfo->dataReady  = QUERY_RESULT_NOT_READY;
6964

H
Haojun Liao 已提交
6965
  if (IS_QUERY_KILLED(pQInfo) || Q_STATUS_EQUAL(pQuery->status, QUERY_OVER)) {
6966
    *continueExec = false;
6967
    (*pRsp)->completed = 1;  // notify no more result to client
6968
  } else {
6969
    *continueExec = true;
6970
    qDebug("QInfo:%p has more results waits for client retrieve", pQInfo);
6971 6972
  }

H
Haojun Liao 已提交
6973
  return pQInfo->code;
6974
}
H
hjxilinx 已提交
6975

6976 6977 6978 6979 6980 6981 6982 6983 6984 6985 6986
int32_t qQueryCompleted(qinfo_t qinfo) {
  SQInfo *pQInfo = (SQInfo *)qinfo;

  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
    return TSDB_CODE_QRY_INVALID_QHANDLE;
  }

  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
  return IS_QUERY_KILLED(pQInfo) || Q_STATUS_EQUAL(pQuery->status, QUERY_OVER);
}

H
Haojun Liao 已提交
6987
int32_t qKillQuery(qinfo_t qinfo) {
H
Haojun Liao 已提交
6988 6989 6990 6991 6992 6993 6994
  SQInfo *pQInfo = (SQInfo *)qinfo;

  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
    return TSDB_CODE_QRY_INVALID_QHANDLE;
  }

  setQueryKilled(pQInfo);
6995 6996 6997

  // Wait for the query executing thread being stopped/
  // Once the query is stopped, the owner of qHandle will be cleared immediately.
6998
  while (pQInfo->owner != 0) {
6999 7000 7001
    taosMsleep(100);
  }

H
Haojun Liao 已提交
7002 7003 7004
  return TSDB_CODE_SUCCESS;
}

7005 7006 7007 7008 7009 7010 7011 7012 7013 7014 7015 7016 7017 7018 7019 7020
static void doSetTagValueToResultBuf(char* output, const char* val, int16_t type, int16_t bytes) {
  if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
    if (val == NULL) {
      setVardataNull(output, type);
    } else {
      memcpy(output, val, varDataTLen(val));
    }
  } else {
    if (val == NULL) {
      setNull(output, type, bytes);
    } else {  // todo here stop will cause client crash
      memcpy(output, val, bytes);
    }
  }
}

H
hjxilinx 已提交
7021 7022 7023
static void buildTagQueryResult(SQInfo* pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
7024

H
Haojun Liao 已提交
7025
  size_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
H
Haojun Liao 已提交
7026
  assert(numOfGroup == 0 || numOfGroup == 1);
7027

H
Haojun Liao 已提交
7028
  if (numOfGroup == 0) {
7029 7030
    return;
  }
7031

H
Haojun Liao 已提交
7032
  SArray* pa = GET_TABLEGROUP(pQInfo, 0);
7033

H
Haojun Liao 已提交
7034
  size_t num = taosArrayGetSize(pa);
7035
  assert(num == pQInfo->tableqinfoGroupInfo.numOfTables);
7036

H
Haojun Liao 已提交
7037
  int32_t count = 0;
7038 7039 7040
  int32_t functionId = pQuery->pSelectExpr[0].base.functionId;
  if (functionId == TSDB_FUNC_TID_TAG) { // return the tags & table Id
    assert(pQuery->numOfOutput == 1);
7041

7042 7043
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[0];
    int32_t rsize = pExprInfo->bytes;
H
Haojun Liao 已提交
7044
    count = 0;
7045

H
Haojun Liao 已提交
7046 7047 7048 7049 7050 7051 7052 7053 7054 7055 7056
    int16_t bytes = pExprInfo->bytes;
    int16_t type = pExprInfo->type;

    for(int32_t i = 0; i < pQuery->numOfTags; ++i) {
      if (pQuery->tagColList[i].colId == pExprInfo->base.colInfo.colId) {
        bytes = pQuery->tagColList[i].bytes;
        type = pQuery->tagColList[i].type;
        break;
      }
    }

H
Haojun Liao 已提交
7057 7058
    while(pQInfo->tableIndex < num && count < pQuery->rec.capacity) {
      int32_t i = pQInfo->tableIndex++;
7059
      STableQueryInfo *item = taosArrayGetP(pa, i);
7060

7061
      char *output = pQuery->sdata[0]->data + count * rsize;
7062
      varDataSetLen(output, rsize - VARSTR_HEADER_SIZE);
7063

7064
      output = varDataVal(output);
H
Haojun Liao 已提交
7065
      STableId* id = TSDB_TABLEID(item->pTable);
7066

7067 7068 7069
      *(int16_t *)output = 0;
      output += sizeof(int16_t);

H
Haojun Liao 已提交
7070 7071
      *(int64_t *)output = id->uid;  // memory align problem, todo serialize
      output += sizeof(id->uid);
7072

H
Haojun Liao 已提交
7073 7074
      *(int32_t *)output = id->tid;
      output += sizeof(id->tid);
7075

7076
      *(int32_t *)output = pQInfo->vgId;
7077
      output += sizeof(pQInfo->vgId);
7078

7079
      if (pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
7080
        char* data = tsdbGetTableName(item->pTable);
7081
        memcpy(output, data, varDataTLen(data));
H
[td-90]  
Haojun Liao 已提交
7082
      } else {
7083 7084
        char* data = tsdbGetTableTagVal(item->pTable, pExprInfo->base.colInfo.colId, type, bytes);
        doSetTagValueToResultBuf(output, data, type, bytes);
H
[td-90]  
Haojun Liao 已提交
7085
      }
7086

H
Haojun Liao 已提交
7087
      count += 1;
7088
    }
7089

7090
    qDebug("QInfo:%p create (tableId, tag) info completed, rows:%d", pQInfo, count);
7091

H
Haojun Liao 已提交
7092 7093 7094 7095
  } else if (functionId == TSDB_FUNC_COUNT) {// handle the "count(tbname)" query
    *(int64_t*) pQuery->sdata[0]->data = num;

    count = 1;
H
Haojun Liao 已提交
7096
    SET_STABLE_QUERY_OVER(pQInfo);
7097
    qDebug("QInfo:%p create count(tbname) query, res:%d rows:1", pQInfo, count);
7098
  } else {  // return only the tags|table name etc.
H
Haojun Liao 已提交
7099
    count = 0;
H
Haojun Liao 已提交
7100
    SSchema tbnameSchema = tGetTableNameColumnSchema();
7101

S
TD-1057  
Shengliang Guan 已提交
7102
    int32_t maxNumOfTables = (int32_t)pQuery->rec.capacity;
7103
    if (pQuery->limit.limit >= 0 && pQuery->limit.limit < pQuery->rec.capacity) {
S
TD-1057  
Shengliang Guan 已提交
7104
      maxNumOfTables = (int32_t)pQuery->limit.limit;
7105 7106
    }

7107
    while(pQInfo->tableIndex < num && count < maxNumOfTables) {
H
Haojun Liao 已提交
7108
      int32_t i = pQInfo->tableIndex++;
7109

7110 7111 7112 7113 7114 7115
      // discard current result due to offset
      if (pQuery->limit.offset > 0) {
        pQuery->limit.offset -= 1;
        continue;
      }

7116
      SExprInfo* pExprInfo = pQuery->pSelectExpr;
7117
      STableQueryInfo* item = taosArrayGetP(pa, i);
7118

7119 7120
      char *data = NULL, *dst = NULL;
      int16_t type = 0, bytes = 0;
7121
      for(int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
7122
        // not assign value in case of user defined constant output column
Y
TD-1230  
yihaoDeng 已提交
7123
        if (TSDB_COL_IS_UD_COL(pExprInfo[j].base.colInfo.flag)) {
H
Haojun Liao 已提交
7124 7125
          continue;
        }
7126

7127
        if (pExprInfo[j].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
7128 7129 7130 7131 7132 7133 7134 7135
          bytes = tbnameSchema.bytes;
          type = tbnameSchema.type;

          data = tsdbGetTableName(item->pTable);
          dst = pQuery->sdata[j]->data + count * tbnameSchema.bytes;
        } else {
          type = pExprInfo[j].type;
          bytes = pExprInfo[j].bytes;
7136

7137 7138
          data = tsdbGetTableTagVal(item->pTable, pExprInfo[j].base.colInfo.colId, type, bytes);
          dst = pQuery->sdata[j]->data + count * pExprInfo[j].bytes;
7139

7140
        }
7141 7142

        doSetTagValueToResultBuf(dst, data, type, bytes);
H
hjxilinx 已提交
7143
      }
H
Haojun Liao 已提交
7144
      count += 1;
H
hjxilinx 已提交
7145
    }
7146

7147
    qDebug("QInfo:%p create tag values results completed, rows:%d", pQInfo, count);
H
hjxilinx 已提交
7148
  }
7149

H
Haojun Liao 已提交
7150
  pQuery->rec.rows = count;
H
hjxilinx 已提交
7151
  setQueryStatus(pQuery, QUERY_COMPLETED);
H
hjxilinx 已提交
7152 7153
}

H
Haojun Liao 已提交
7154
static int64_t getQuerySupportBufSize(size_t numOfTables) {
H
Haojun Liao 已提交
7155 7156 7157 7158
  size_t s1 = sizeof(STableQueryInfo);
  size_t s2 = sizeof(SHashNode);

//  size_t s3 = sizeof(STableCheckInfo);  buffer consumption in tsdb
H
Haojun Liao 已提交
7159
  return (int64_t)((s1 + s2) * 1.5 * numOfTables);
H
Haojun Liao 已提交
7160 7161
}

H
Haojun Liao 已提交
7162
int32_t checkForQueryBuf(size_t numOfTables) {
H
Haojun Liao 已提交
7163 7164
  int64_t t = getQuerySupportBufSize(numOfTables);
  if (tsQueryBufferSize < 0) {
H
Haojun Liao 已提交
7165
    return TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
7166 7167 7168 7169 7170 7171 7172 7173 7174 7175 7176 7177 7178 7179 7180 7181 7182 7183 7184
  } else if (tsQueryBufferSize > 0) {

    while(1) {
      int64_t s = tsQueryBufferSize;
      int64_t remain = s - t;
      if (remain >= 0) {
        if (atomic_val_compare_exchange_64(&tsQueryBufferSize, s, remain) == s) {
          return TSDB_CODE_SUCCESS;
        }
      } else {
        return TSDB_CODE_QRY_NOT_ENOUGH_BUFFER;
      }
    }
  }

  // disable query processing if the value of tsQueryBufferSize is zero.
  return TSDB_CODE_QRY_NOT_ENOUGH_BUFFER;
}

H
Haojun Liao 已提交
7185
void releaseQueryBuf(size_t numOfTables) {
H
Haojun Liao 已提交
7186 7187 7188 7189 7190 7191 7192 7193 7194 7195
  if (tsQueryBufferSize <= 0) {
    return;
  }

  int64_t t = getQuerySupportBufSize(numOfTables);

  // restore value is not enough buffer available
  atomic_add_fetch_64(&tsQueryBufferSize, t);
}

7196 7197 7198 7199 7200 7201 7202
void* qGetResultRetrieveMsg(qinfo_t qinfo) {
  SQInfo* pQInfo = (SQInfo*) qinfo;
  assert(pQInfo != NULL);

  return pQInfo->rspContext;
}

7203 7204 7205 7206 7207 7208 7209
void freeqinfoFn(void *qhandle) {
  void** handle = qhandle;
  if (handle == NULL || *handle == NULL) {
    return;
  }

  qKillQuery(*handle);
H
Haojun Liao 已提交
7210
  qDestroyQueryInfo(*handle);
7211 7212 7213
}

void* qOpenQueryMgmt(int32_t vgId) {
H
Haojun Liao 已提交
7214
  const int32_t REFRESH_HANDLE_INTERVAL = 30; // every 30 seconds, refresh handle pool
7215 7216 7217 7218

  char cacheName[128] = {0};
  sprintf(cacheName, "qhandle_%d", vgId);

7219
  SQueryMgmt* pQueryMgmt = calloc(1, sizeof(SQueryMgmt));
H
Haojun Liao 已提交
7220 7221 7222 7223
  if (pQueryMgmt == NULL) {
    terrno = TSDB_CODE_QRY_OUT_OF_MEMORY;
    return NULL;
  }
7224

S
TD-1530  
Shengliang Guan 已提交
7225
  pQueryMgmt->qinfoPool = taosCacheInit(TSDB_CACHE_PTR_KEY, REFRESH_HANDLE_INTERVAL, true, freeqinfoFn, cacheName);
7226 7227 7228 7229
  pQueryMgmt->closed    = false;
  pQueryMgmt->vgId      = vgId;

  pthread_mutex_init(&pQueryMgmt->lock, NULL);
7230 7231

  qDebug("vgId:%d, open querymgmt success", vgId);
7232
  return pQueryMgmt;
7233 7234
}

H
Haojun Liao 已提交
7235
static void queryMgmtKillQueryFn(void* handle) {
H
Hui Li 已提交
7236 7237
  void** fp = (void**)handle;
  qKillQuery(*fp);
H
Haojun Liao 已提交
7238 7239 7240
}

void qQueryMgmtNotifyClosed(void* pQMgmt) {
7241 7242 7243 7244 7245 7246 7247
  if (pQMgmt == NULL) {
    return;
  }

  SQueryMgmt* pQueryMgmt = pQMgmt;
  qDebug("vgId:%d, set querymgmt closed, wait for all queries cancelled", pQueryMgmt->vgId);

H
Haojun Liao 已提交
7248
//  pthread_mutex_lock(&pQueryMgmt->lock);
7249
  pQueryMgmt->closed = true;
H
Haojun Liao 已提交
7250
//  pthread_mutex_unlock(&pQueryMgmt->lock);
7251

H
Haojun Liao 已提交
7252
  taosCacheRefresh(pQueryMgmt->qinfoPool, queryMgmtKillQueryFn);
7253 7254 7255 7256 7257 7258 7259 7260 7261 7262 7263 7264 7265 7266 7267 7268 7269
}

void qCleanupQueryMgmt(void* pQMgmt) {
  if (pQMgmt == NULL) {
    return;
  }

  SQueryMgmt* pQueryMgmt = pQMgmt;
  int32_t vgId = pQueryMgmt->vgId;

  assert(pQueryMgmt->closed);

  SCacheObj* pqinfoPool = pQueryMgmt->qinfoPool;
  pQueryMgmt->qinfoPool = NULL;

  taosCacheCleanup(pqinfoPool);
  pthread_mutex_destroy(&pQueryMgmt->lock);
S
Shengliang Guan 已提交
7270
  taosTFree(pQueryMgmt);
7271

7272
  qDebug("vgId:%d queryMgmt cleanup completed", vgId);
7273 7274
}

7275
void** qRegisterQInfo(void* pMgmt, uint64_t qInfo) {
7276
  if (pMgmt == NULL) {
7277
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
7278 7279 7280
    return NULL;
  }

7281
  const int32_t DEFAULT_QHANDLE_LIFE_SPAN = tsShellActivityTimer * 2 * 1000;
7282

7283 7284
  SQueryMgmt *pQueryMgmt = pMgmt;
  if (pQueryMgmt->qinfoPool == NULL) {
7285
    qError("QInfo:%p failed to add qhandle into qMgmt, since qMgmt is closed", (void *)qInfo);
7286
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
7287 7288 7289
    return NULL;
  }

H
Haojun Liao 已提交
7290
//  pthread_mutex_lock(&pQueryMgmt->lock);
7291
  if (pQueryMgmt->closed) {
H
Haojun Liao 已提交
7292
//    pthread_mutex_unlock(&pQueryMgmt->lock);
7293
    qError("QInfo:%p failed to add qhandle into cache, since qMgmt is colsing", (void *)qInfo);
7294
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
7295 7296
    return NULL;
  } else {
S
TD-1530  
Shengliang Guan 已提交
7297 7298
    TSDB_CACHE_PTR_TYPE handleVal = (TSDB_CACHE_PTR_TYPE) qInfo;
    void** handle = taosCachePut(pQueryMgmt->qinfoPool, &handleVal, sizeof(TSDB_CACHE_PTR_TYPE), &qInfo, sizeof(TSDB_CACHE_PTR_TYPE), DEFAULT_QHANDLE_LIFE_SPAN);
H
Haojun Liao 已提交
7299
//    pthread_mutex_unlock(&pQueryMgmt->lock);
7300 7301 7302 7303 7304

    return handle;
  }
}

S
TD-1530  
Shengliang Guan 已提交
7305
void** qAcquireQInfo(void* pMgmt, uint64_t _key) {
7306 7307
  SQueryMgmt *pQueryMgmt = pMgmt;

B
Bomin Zhang 已提交
7308 7309 7310 7311 7312 7313 7314
  if (pQueryMgmt->closed) {
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
    return NULL;
  }

  if (pQueryMgmt->qinfoPool == NULL) {
    terrno = TSDB_CODE_QRY_INVALID_QHANDLE;
7315 7316 7317
    return NULL;
  }

S
TD-1530  
Shengliang Guan 已提交
7318 7319
  TSDB_CACHE_PTR_TYPE key = (TSDB_CACHE_PTR_TYPE)_key;
  void** handle = taosCacheAcquireByKey(pQueryMgmt->qinfoPool, &key, sizeof(TSDB_CACHE_PTR_TYPE));
7320
  if (handle == NULL || *handle == NULL) {
B
Bomin Zhang 已提交
7321
    terrno = TSDB_CODE_QRY_INVALID_QHANDLE;
7322 7323 7324 7325 7326 7327
    return NULL;
  } else {
    return handle;
  }
}

H
Haojun Liao 已提交
7328
void** qReleaseQInfo(void* pMgmt, void* pQInfo, bool freeHandle) {
7329 7330 7331 7332 7333
  SQueryMgmt *pQueryMgmt = pMgmt;
  if (pQueryMgmt->qinfoPool == NULL) {
    return NULL;
  }

H
Haojun Liao 已提交
7334
  taosCacheRelease(pQueryMgmt->qinfoPool, pQInfo, freeHandle);
7335 7336 7337
  return 0;
}

7338