qExecutor.c 276.0 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
#include "os.h"
H
Haojun Liao 已提交
16 17
#include "qFill.h"
#include "taosmsg.h"
18 19
#include "tcache.h"
#include "tglobal.h"
20

H
Haojun Liao 已提交
21
#include "exception.h"
22
#include "hash.h"
H
Haojun Liao 已提交
23 24 25 26
#include "qAst.h"
#include "qExecutor.h"
#include "qResultbuf.h"
#include "qUtil.h"
H
hjxilinx 已提交
27
#include "query.h"
S
slguan 已提交
28
#include "queryLog.h"
29
#include "tlosertree.h"
30
#include "ttype.h"
31

H
Haojun Liao 已提交
32
#define MAX_ROWS_PER_RESBUF_PAGE  ((1u<<12) - 1)
33 34 35 36 37

/**
 * check if the primary column is load by default, otherwise, the program will
 * forced to load primary column explicitly.
 */
H
Haojun Liao 已提交
38
#define Q_STATUS_EQUAL(p, s)  (((p) & (s)) != 0u)
39 40
#define QUERY_IS_ASC_QUERY(q) (GET_FORWARD_DIRECTION_FACTOR((q)->order.order) == QUERY_ASC_FORWARD_STEP)

41
#define IS_MASTER_SCAN(runtime)        ((runtime)->scanFlag == MASTER_SCAN)
H
hjxilinx 已提交
42
#define IS_REVERSE_SCAN(runtime)       ((runtime)->scanFlag == REVERSE_SCAN)
43
#define SET_MASTER_SCAN_FLAG(runtime)  ((runtime)->scanFlag = MASTER_SCAN)
H
hjxilinx 已提交
44
#define SET_REVERSE_SCAN_FLAG(runtime) ((runtime)->scanFlag = REVERSE_SCAN)
45

H
Haojun Liao 已提交
46
#define GET_QINFO_ADDR(x) ((SQInfo *)((char *)(x)-offsetof(SQInfo, runtimeEnv)))
47

48
#define GET_COL_DATA_POS(query, index, step) ((query)->pos + (index) * (step))
49
#define SWITCH_ORDER(n) (((n) = ((n) == TSDB_ORDER_ASC) ? TSDB_ORDER_DESC : TSDB_ORDER_ASC))
50

H
Haojun Liao 已提交
51 52
#define SDATA_BLOCK_INITIALIZER (SDataBlockInfo) {{0}, 0}

S
TD-1057  
Shengliang Guan 已提交
53
#define TIME_WINDOW_COPY(_dst, _src)  do {\
H
Haojun Liao 已提交
54 55
   (_dst).skey = (_src).skey;\
   (_dst).ekey = (_src).ekey;\
H
Haojun Liao 已提交
56
} while (0)
S
TD-1057  
Shengliang Guan 已提交
57

58
enum {
H
hjxilinx 已提交
59
  // when query starts to execute, this status will set
60 61
  QUERY_NOT_COMPLETED = 0x1u,

H
hjxilinx 已提交
62 63
  /* result output buffer is full, current query is paused.
   * this status is only exist in group-by clause and diff/add/division/multiply/ query.
64
   */
65 66
  QUERY_RESBUF_FULL = 0x2u,

H
hjxilinx 已提交
67 68 69
  /* query is over
   * 1. this status is used in one row result query process, e.g., count/sum/first/last/ avg...etc.
   * 2. when all data within queried time window, it is also denoted as query_completed
70
   */
71
  QUERY_COMPLETED = 0x4u,
72

H
hjxilinx 已提交
73 74
  /* when the result is not completed return to client, this status will be
   * usually used in case of interval query with interpolation option
75
   */
76
  QUERY_OVER = 0x8u,
77
};
78 79

enum {
80 81
  TS_JOIN_TS_EQUAL       = 0,
  TS_JOIN_TS_NOT_EQUALS  = 1,
82 83 84
  TS_JOIN_TAG_NOT_EQUALS = 2,
};

85
typedef struct {
86 87 88 89 90 91
  int32_t     status;       // query status
  TSKEY       lastKey;      // the lastKey value before query executed
  STimeWindow w;            // whole query time window
  STimeWindow curWindow;    // current query window
  int32_t     windowIndex;  // index of active time window result for interval query
  STSCursor   cur;
92 93
} SQueryStatusInfo;

H
Haojun Liao 已提交
94
#if 0
H
Haojun Liao 已提交
95
static UNUSED_FUNC void *u_malloc (size_t __size) {
H
Haojun Liao 已提交
96
  uint32_t v = rand();
H
Haojun Liao 已提交
97 98

  if (v % 1000 <= 0) {
H
Haojun Liao 已提交
99 100
    return NULL;
  } else {
H
Haojun Liao 已提交
101
    return malloc(__size);
H
Haojun Liao 已提交
102
  }
H
Haojun Liao 已提交
103 104
}

H
Haojun Liao 已提交
105 106
static UNUSED_FUNC void* u_calloc(size_t num, size_t __size) {
  uint32_t v = rand();
H
Haojun Liao 已提交
107
  if (v % 1000 <= 0) {
H
Haojun Liao 已提交
108 109 110 111 112 113
    return NULL;
  } else {
    return calloc(num, __size);
  }
}

H
Haojun Liao 已提交
114 115 116 117 118 119 120 121 122
static UNUSED_FUNC void* u_realloc(void* p, size_t __size) {
  uint32_t v = rand();
  if (v % 5 <= 1) {
    return NULL;
  } else {
    return realloc(p, __size);
  }
}

H
Haojun Liao 已提交
123
#define calloc  u_calloc
H
Haojun Liao 已提交
124
#define malloc  u_malloc
H
Haojun Liao 已提交
125
#define realloc u_realloc
H
Haojun Liao 已提交
126
#endif
H
Haojun Liao 已提交
127

128
#define CLEAR_QUERY_STATUS(q, st)   ((q)->status &= (~(st)))
H
Haojun Liao 已提交
129 130
#define GET_NUM_OF_TABLEGROUP(q)    taosArrayGetSize((q)->tableqinfoGroupInfo.pGroupList)
#define GET_TABLEGROUP(q, _index)   ((SArray*) taosArrayGetP((q)->tableqinfoGroupInfo.pGroupList, (_index)))
131
#define QUERY_IS_INTERVAL_QUERY(_q) ((_q)->interval.interval > 0)
H
Haojun Liao 已提交
132

133
static void setQueryStatus(SQuery *pQuery, int8_t status);
H
Haojun Liao 已提交
134
static void finalizeQueryResult(SQueryRuntimeEnv *pRuntimeEnv);
135

136 137 138
static int32_t getMaximumIdleDurationSec() {
  return tsShellActivityTimer * 2;
}
139

140 141
static void getNextTimeWindow(SQuery* pQuery, STimeWindow* tw) {
  int32_t factor = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
142 143 144
  if (pQuery->interval.intervalUnit != 'n' && pQuery->interval.intervalUnit != 'y') {
    tw->skey += pQuery->interval.sliding * factor;
    tw->ekey = tw->skey + pQuery->interval.interval - 1;
145 146 147
    return;
  }

148
  int64_t key = tw->skey / 1000, interval = pQuery->interval.interval;
149 150 151
  if (pQuery->precision == TSDB_TIME_PRECISION_MICRO) {
    key /= 1000;
  }
152
  if (pQuery->interval.intervalUnit == 'y') {
153 154
    interval *= 12;
  }
155 156 157 158 159

  struct tm tm;
  time_t t = (time_t)key;
  localtime_r(&t, &tm);

S
Shengliang Guan 已提交
160
  int mon = (int)(tm.tm_year * 12 + tm.tm_mon + interval * factor);
161 162 163 164
  tm.tm_year = mon / 12;
  tm.tm_mon = mon % 12;
  tw->skey = mktime(&tm) * 1000L;

S
Shengliang Guan 已提交
165
  mon = (int)(mon + interval);
166 167 168 169 170 171 172 173 174 175 176
  tm.tm_year = mon / 12;
  tm.tm_mon = mon % 12;
  tw->ekey = mktime(&tm) * 1000L;

  if (pQuery->precision == TSDB_TIME_PRECISION_MICRO) {
    tw->skey *= 1000L;
    tw->ekey *= 1000L;
  }
  tw->ekey -= 1;
}

177 178
#define SET_STABLE_QUERY_OVER(_q) ((_q)->tableIndex = (int32_t)((_q)->tableqinfoGroupInfo.numOfTables))
#define IS_STASBLE_QUERY_OVER(_q) ((_q)->tableIndex >= (int32_t)((_q)->tableqinfoGroupInfo.numOfTables))
H
Haojun Liao 已提交
179

H
hjxilinx 已提交
180
// todo move to utility
181
static int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *group);
182

183
static void setResultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SResultRow *pResult);
H
Haojun Liao 已提交
184
static void setResultRowOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SResultRow *pResult);
H
Haojun Liao 已提交
185
static void resetMergeResultBuf(SQueryRuntimeEnv* pRuntimeEnv, SQLFunctionCtx *pCtx, SResultRow *pRow);
186
static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId);
187

188
static void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
H
Haojun Liao 已提交
189
                          SDataStatis *pStatis, void *param, int32_t colIndex, int32_t vgId);
190

191
static void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
H
Haojun Liao 已提交
192
static void destroyTableQueryInfoImpl(STableQueryInfo *pTableQueryInfo);
H
Haojun Liao 已提交
193
static void resetDefaultResInfoOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
194
static bool hasMainOutput(SQuery *pQuery);
H
hjxilinx 已提交
195
static void buildTagQueryResult(SQInfo *pQInfo);
196

197
static int32_t setAdditionalInfo(SQInfo *pQInfo, void *pTable, STableQueryInfo *pTableQueryInfo);
H
Haojun Liao 已提交
198
static int32_t flushFromResultBuf(SQueryRuntimeEnv* pRuntimeEnv, SGroupResInfo* pGroupResInfo);
H
Haojun Liao 已提交
199 200
static int32_t checkForQueryBuf(size_t numOfTables);
static void releaseQueryBuf(size_t numOfTables);
201
static int32_t binarySearchForKey(char *pValue, int num, TSKEY key, int order);
H
Haojun Liao 已提交
202
static void doRowwiseTimeWindowInterpolation(SQueryRuntimeEnv* pRuntimeEnv, SArray* pDataBlock, TSKEY prevTs, int32_t prevRowIndex, TSKEY curTs, int32_t curRowIndex, TSKEY windowKey, int32_t type);
H
Haojun Liao 已提交
203
static STsdbQueryCond createTsdbQueryCond(SQuery* pQuery, STimeWindow* win);
204
static STableIdInfo createTableIdInfo(SQuery* pQuery);
205

206
bool doFilterData(SQuery *pQuery, int32_t elemPos) {
207 208
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
209

S
TD-1057  
Shengliang Guan 已提交
210
    char *pElem = (char*)pFilterInfo->pData + pFilterInfo->info.bytes * elemPos;
211

212 213
    bool qualified = false;
    for (int32_t j = 0; j < pFilterInfo->numOfFilters; ++j) {
214
      SColumnFilterElem *pFilterElem = &pFilterInfo->pFilters[j];
215

H
Haojun Liao 已提交
216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232
      bool isnull = isNull(pElem, pFilterInfo->info.type);
      if (isnull) {
        if (pFilterElem->fp == isNull_filter) {
          qualified = true;
          break;
        } else {
          continue;
        }
      } else {
        if (pFilterElem->fp == notNull_filter) {
          qualified = true;
          break;
        } else if (pFilterElem->fp == isNull_filter) {
          continue;
        }
      }

233 234 235 236 237
      if (pFilterElem->fp(pFilterElem, pElem, pElem)) {
        qualified = true;
        break;
      }
    }
238

239 240 241 242
    if (!qualified) {
      return false;
    }
  }
243

244 245 246 247 248 249
  return true;
}

int64_t getNumOfResult(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  bool    hasMainFunction = hasMainOutput(pQuery);
250

251
  int64_t maxOutput = 0;
252
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
253
    int32_t functionId = pQuery->pExpr1[j].base.functionId;
254

255 256 257 258 259 260 261 262
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (hasMainFunction &&
        (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ)) {
      continue;
    }
263

H
Haojun Liao 已提交
264
    SResultRowCellInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
265 266 267 268
    if (pResInfo != NULL && maxOutput < pResInfo->numOfRes) {
      maxOutput = pResInfo->numOfRes;
    }
  }
269

270
  assert(maxOutput >= 0);
271 272 273
  return maxOutput;
}

274 275 276 277 278
/*
 * the value of number of result needs to be update due to offset value upated.
 */
void updateNumOfResult(SQueryRuntimeEnv *pRuntimeEnv, int32_t numOfRes) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
279

280
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
281
    SResultRowCellInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
282

H
Haojun Liao 已提交
283 284 285 286 287
    int16_t functionId = pRuntimeEnv->pCtx[j].functionId;
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ ||
        functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
288

H
Haojun Liao 已提交
289
    assert(pResInfo->numOfRes > numOfRes);
290 291 292 293
    pResInfo->numOfRes = numOfRes;
  }
}

H
Haojun Liao 已提交
294 295
static int32_t getMergeResultGroupId(int32_t groupIndex) {
  int32_t base = 50000000;
296 297 298 299 300 301 302
  return base + (groupIndex * 10000);
}

bool isGroupbyNormalCol(SSqlGroupbyExpr *pGroupbyExpr) {
  if (pGroupbyExpr == NULL || pGroupbyExpr->numOfGroupCols == 0) {
    return false;
  }
303

304
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
305
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
Y
TD-1230  
yihaoDeng 已提交
306
    if (TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
307
      //make sure the normal column locates at the second position if tbname exists in group by clause
308
      if (pGroupbyExpr->numOfGroupCols > 1) {
309
        assert(pColIndex->colIndex > 0);
310
      }
311

312 313 314
      return true;
    }
  }
315

316 317 318 319 320
  return false;
}

int16_t getGroupbyColumnType(SQuery *pQuery, SSqlGroupbyExpr *pGroupbyExpr) {
  assert(pGroupbyExpr != NULL);
321

322 323
  int32_t colId = -2;
  int16_t type = TSDB_DATA_TYPE_NULL;
324

325
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
326
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
Y
TD-1230  
yihaoDeng 已提交
327
    if (TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
328 329 330 331
      colId = pColIndex->colId;
      break;
    }
  }
332

333
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
334 335
    if (colId == pQuery->colList[i].colId) {
      type = pQuery->colList[i].type;
336 337 338
      break;
    }
  }
339

340 341 342 343 344 345
  return type;
}

bool isSelectivityWithTagsQuery(SQuery *pQuery) {
  bool    hasTags = false;
  int32_t numOfSelectivity = 0;
346

347
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
348
    int32_t functId = pQuery->pExpr1[i].base.functionId;
349 350 351 352
    if (functId == TSDB_FUNC_TAG_DUMMY || functId == TSDB_FUNC_TS_DUMMY) {
      hasTags = true;
      continue;
    }
353

354 355 356 357
    if ((aAggs[functId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
      numOfSelectivity++;
    }
  }
358

359 360 361
  if (numOfSelectivity > 0 && hasTags) {
    return true;
  }
362

363 364 365
  return false;
}

366 367
bool isProjQuery(SQuery *pQuery) {
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
368
    int32_t functId = pQuery->pExpr1[i].base.functionId;
369 370 371 372 373 374 375 376
    if (functId != TSDB_FUNC_PRJ && functId != TSDB_FUNC_TAGPRJ) {
      return false;
    }
  }

  return true;
}

H
Haojun Liao 已提交
377
bool isTSCompQuery(SQuery *pQuery) { return pQuery->pExpr1[0].base.functionId == TSDB_FUNC_TS_COMP; }
378

379 380 381
static bool limitResults(SQueryRuntimeEnv* pRuntimeEnv) {
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery* pQuery = pRuntimeEnv->pQuery;
382

383 384
  if ((pQuery->limit.limit > 0) && (pQuery->rec.total + pQuery->rec.rows > pQuery->limit.limit)) {
    pQuery->rec.rows = pQuery->limit.limit - pQuery->rec.total;
385

386
    qDebug("QInfo:%p discard remain data due to result limitation, limit:%"PRId64", current return:%" PRId64 ", total:%"PRId64,
387 388
        pQInfo, pQuery->limit.limit, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
    assert(pQuery->rec.rows >= 0);
389 390 391
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return true;
  }
392

393 394 395 396
  return false;
}

static bool isTopBottomQuery(SQuery *pQuery) {
397
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
398
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
399 400 401
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
402

403 404 405 406
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      return true;
    }
  }
407

408 409 410
  return false;
}

411 412 413 414 415 416 417 418 419 420 421
static bool timeWindowInterpoRequired(SQuery *pQuery) {
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
    if (functionId == TSDB_FUNC_TWA) {
      return true;
    }
  }

  return false;
}

H
Haojun Liao 已提交
422
static bool hasTagValOutput(SQuery* pQuery) {
H
Haojun Liao 已提交
423
  SExprInfo *pExprInfo = &pQuery->pExpr1[0];
H
Haojun Liao 已提交
424 425 426 427
  if (pQuery->numOfOutput == 1 && pExprInfo->base.functionId == TSDB_FUNC_TS_COMP) {
    return true;
  } else {  // set tag value, by which the results are aggregated.
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
H
Haojun Liao 已提交
428
      SExprInfo *pLocalExprInfo = &pQuery->pExpr1[idx];
H
Haojun Liao 已提交
429 430 431 432 433 434 435 436 437 438 439

      // ts_comp column required the tag value for join filter
      if (TSDB_COL_IS_TAG(pLocalExprInfo->base.colInfo.flag)) {
        return true;
      }
    }
  }

  return false;
}

440 441 442 443 444 445 446 447
/**
 * @param pQuery
 * @param col
 * @param pDataBlockInfo
 * @param pStatis
 * @param pColStatis
 * @return
 */
H
Haojun Liao 已提交
448
static bool hasNullValue(SColIndex* pColIndex, SDataStatis *pStatis, SDataStatis **pColStatis) {
H
Haojun Liao 已提交
449
  if (pStatis != NULL && TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
H
Haojun Liao 已提交
450 451
    *pColStatis = &pStatis[pColIndex->colIndex];
    assert((*pColStatis)->colId == pColIndex->colId);
H
hjxilinx 已提交
452 453
  } else {
    *pColStatis = NULL;
454
  }
455

H
Haojun Liao 已提交
456
  if (TSDB_COL_IS_TAG(pColIndex->flag) || TSDB_COL_IS_UD_COL(pColIndex->flag) || pColIndex->colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
H
Haojun Liao 已提交
457 458 459
    return false;
  }

460 461 462
  if ((*pColStatis) != NULL && (*pColStatis)->numOfNull == 0) {
    return false;
  }
463

464 465 466
  return true;
}

H
Haojun Liao 已提交
467
static SResultRow *doPrepareResultRowFromKey(SQueryRuntimeEnv *pRuntimeEnv, SResultRowInfo *pResultRowInfo, char *pData,
H
Haojun Liao 已提交
468 469
                                             int16_t bytes, bool masterscan, uint64_t uid) {
  SET_RES_WINDOW_KEY(pRuntimeEnv->keyBuf, pData, bytes, uid);
470 471
  int32_t *p1 =
      (int32_t *)taosHashGet(pRuntimeEnv->pResultRowHashTable, pRuntimeEnv->keyBuf, GET_RES_WINDOW_KEY_LEN(bytes));
472
  if (p1 != NULL) {
H
Haojun Liao 已提交
473
    pResultRowInfo->curIndex = *p1;
474
  } else {
H
Haojun Liao 已提交
475 476 477
    if (!masterscan) {  // not master scan, do not add new timewindow
      return NULL;
    }
478

H
Haojun Liao 已提交
479
    // TODO refactor
H
Haojun Liao 已提交
480
    // more than the capacity, reallocate the resources
H
Haojun Liao 已提交
481
    if (pResultRowInfo->size >= pResultRowInfo->capacity) {
H
Haojun Liao 已提交
482
      int64_t newCapacity = 0;
H
Haojun Liao 已提交
483 484
      if (pResultRowInfo->capacity > 10000) {
        newCapacity = (int64_t)(pResultRowInfo->capacity * 1.25);
485
      } else {
H
Haojun Liao 已提交
486
        newCapacity = (int64_t)(pResultRowInfo->capacity * 1.5);
487 488
      }

H
Haojun Liao 已提交
489
      char *t = realloc(pResultRowInfo->pResult, (size_t)(newCapacity * POINTER_BYTES));
490 491
      if (t == NULL) {
        longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
492
      }
493

H
Haojun Liao 已提交
494
      pResultRowInfo->pResult = (SResultRow **)t;
495

H
Haojun Liao 已提交
496 497
      int32_t inc = (int32_t)newCapacity - pResultRowInfo->capacity;
      memset(&pResultRowInfo->pResult[pResultRowInfo->capacity], 0, POINTER_BYTES * inc);
498

H
Haojun Liao 已提交
499
      pResultRowInfo->capacity = (int32_t)newCapacity;
500
    }
501 502

    SResultRow *pResult = getNewResultRow(pRuntimeEnv->pool);
H
Haojun Liao 已提交
503
    pResultRowInfo->pResult[pResultRowInfo->size] = pResult;
504 505 506 507
    int32_t ret = initResultRow(pResult);
    if (ret != TSDB_CODE_SUCCESS) {
      longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
    }
H
Haojun Liao 已提交
508 509

    // add a new result set for a new group
H
Haojun Liao 已提交
510
    pResultRowInfo->curIndex = pResultRowInfo->size++;
511
    taosHashPut(pRuntimeEnv->pResultRowHashTable, pRuntimeEnv->keyBuf, GET_RES_WINDOW_KEY_LEN(bytes),
H
Haojun Liao 已提交
512
                (char *)&pResultRowInfo->curIndex, sizeof(int32_t));
513
  }
514

515
  // too many time window in query
H
Haojun Liao 已提交
516
  if (pResultRowInfo->size > MAX_INTERVAL_TIME_WINDOW) {
517 518 519
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_TOO_MANY_TIMEWINDOW);
  }

H
Haojun Liao 已提交
520
  return getResultRow(pResultRowInfo, pResultRowInfo->curIndex);
521 522 523
}

// get the correct time window according to the handled timestamp
H
Haojun Liao 已提交
524
static STimeWindow getActiveTimeWindow(SResultRowInfo *pWindowResInfo, int64_t ts, SQuery *pQuery) {
525
  STimeWindow w = {0};
526

527
 if (pWindowResInfo->curIndex == -1) {  // the first window, from the previous stored value
528
    w.skey = pWindowResInfo->prevSKey;
529 530
    if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
      w.ekey = taosTimeAdd(w.skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision);
531
    } else {
532
      w.ekey = w.skey + pQuery->interval.interval - 1;
533
    }
534
  } else {
535
    int32_t slot = curTimeWindowIndex(pWindowResInfo);
536
    SResultRow* pWindowRes = getResultRow(pWindowResInfo, slot);
537
    w = pWindowRes->win;
538
  }
539

540
  if (w.skey > ts || w.ekey < ts) {
541 542 543
    if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
      w.skey = taosTimeTruncate(ts, &pQuery->interval, pQuery->precision);
      w.ekey = taosTimeAdd(w.skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
544 545
    } else {
      int64_t st = w.skey;
546

547
      if (st > ts) {
548
        st -= ((st - ts + pQuery->interval.sliding - 1) / pQuery->interval.sliding) * pQuery->interval.sliding;
549
      }
550

551
      int64_t et = st + pQuery->interval.interval - 1;
552
      if (et < ts) {
553
        st += ((ts - et + pQuery->interval.sliding - 1) / pQuery->interval.sliding) * pQuery->interval.sliding;
554
      }
555

556
      w.skey = st;
557
      w.ekey = w.skey + pQuery->interval.interval - 1;
558
    }
559
  }
560

561 562 563 564 565 566 567
  /*
   * query border check, skey should not be bounded by the query time range, since the value skey will
   * be used as the time window index value. So we only change ekey of time window accordingly.
   */
  if (w.ekey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) {
    w.ekey = pQuery->window.ekey;
  }
568

569 570 571
  return w;
}

H
Haojun Liao 已提交
572
static int32_t addNewWindowResultBuf(SResultRow *pWindowRes, SDiskbasedResultBuf *pResultBuf, int32_t tid,
573
                                     int32_t numOfRowsPerPage) {
574
  if (pWindowRes->pageId != -1) {
575 576
    return 0;
  }
577

578
  tFilePage *pData = NULL;
579

580 581
  // in the first scan, new space needed for results
  int32_t pageId = -1;
H
Haojun Liao 已提交
582
  SIDList list = getDataBufPagesIdList(pResultBuf, tid);
583

H
Haojun Liao 已提交
584
  if (taosArrayGetSize(list) == 0) {
H
Haojun Liao 已提交
585
    pData = getNewDataBuf(pResultBuf, tid, &pageId);
586
  } else {
H
Haojun Liao 已提交
587 588 589
    SPageInfo* pi = getLastPageInfo(list);
    pData = getResBufPage(pResultBuf, pi->pageId);
    pageId = pi->pageId;
590

591
    if (pData->num >= numOfRowsPerPage) {
H
Haojun Liao 已提交
592 593 594
      // release current page first, and prepare the next one
      releaseResBufPageInfo(pResultBuf, pi);

H
Haojun Liao 已提交
595
      pData = getNewDataBuf(pResultBuf, tid, &pageId);
596
      if (pData != NULL) {
597
        assert(pData->num == 0);  // number of elements must be 0 for new allocated buffer
598 599 600
      }
    }
  }
601

602 603 604
  if (pData == NULL) {
    return -1;
  }
605

606
  // set the number of rows in current disk page
607 608 609
  if (pWindowRes->pageId == -1) {  // not allocated yet, allocate new buffer
    pWindowRes->pageId = pageId;
    pWindowRes->rowId = (int32_t)(pData->num++);
H
Haojun Liao 已提交
610

611
    assert(pWindowRes->pageId >= 0);
612
  }
613

614 615 616
  return 0;
}

H
Haojun Liao 已提交
617
static int32_t setWindowOutputBufByKey(SQueryRuntimeEnv *pRuntimeEnv, SResultRowInfo *pResultRowInfo, SDataBlockInfo* pBockInfo,
618
                                       STimeWindow *win, bool masterscan, bool* newWind, SResultRow** pResult) {
619 620
  assert(win->skey <= win->ekey);
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
621

H
Haojun Liao 已提交
622 623
  // todo refactor
  int64_t uid = getResultInfoUId(pRuntimeEnv);
H
Haojun Liao 已提交
624
  SResultRow *pResultRow = doPrepareResultRowFromKey(pRuntimeEnv, pResultRowInfo, (char *)&win->skey, TSDB_KEYSIZE, masterscan, uid);
H
Haojun Liao 已提交
625
  if (pResultRow == NULL) {
626
    *newWind = false;
627
    return masterscan? -1:0;   // no master scan, no result generated means error occurs
628
  }
629

630
  *newWind = true;
H
Haojun Liao 已提交
631

632
  // not assign result buffer yet, add new result buffer
H
Haojun Liao 已提交
633 634
  if (pResultRow->pageId == -1) {
    int32_t ret = addNewWindowResultBuf(pResultRow, pResultBuf, pBockInfo->tid, pRuntimeEnv->numOfRowsPerPage);
H
Haojun Liao 已提交
635
    if (ret != TSDB_CODE_SUCCESS) {
636 637 638
      return -1;
    }
  }
639

640
  // set time window for current result
H
Haojun Liao 已提交
641
  pResultRow->win = (*win);
642
  *pResult = pResultRow;
H
Haojun Liao 已提交
643
  setResultRowOutputBufInitCtx(pRuntimeEnv, pResultRow);
644

645 646 647
  return TSDB_CODE_SUCCESS;
}

H
Haojun Liao 已提交
648
static bool getResultRowStatus(SResultRowInfo *pWindowResInfo, int32_t slot) {
649
  assert(slot >= 0 && slot < pWindowResInfo->size);
650
  return pWindowResInfo->pResult[slot]->closed;
651 652
}

653 654 655 656 657 658 659 660 661 662 663 664 665 666
typedef enum SResultTsInterpType {
  RESULT_ROW_START_INTERP = 1,
  RESULT_ROW_END_INTERP   = 2,
} SResultTsInterpType;

static void setResultRowInterpo(SResultRow* pResult, SResultTsInterpType type) {
  assert(pResult != NULL && (type == RESULT_ROW_START_INTERP || type == RESULT_ROW_END_INTERP));
  if (type == RESULT_ROW_START_INTERP) {
    pResult->startInterp = true;
  } else {
    pResult->endInterp   = true;
  }
}

H
Haojun Liao 已提交
667
static bool resultRowInterpolated(SResultRow* pResult, SResultTsInterpType type) {
668 669 670 671 672 673 674 675
  assert(pResult != NULL && (type == RESULT_ROW_START_INTERP || type == RESULT_ROW_END_INTERP));
  if (type == RESULT_ROW_START_INTERP) {
    return pResult->startInterp == true;
  } else {
    return pResult->endInterp   == true;
  }
}

H
Haojun Liao 已提交
676
static FORCE_INLINE int32_t getForwardStepsInBlock(int32_t numOfRows, __block_search_fn_t searchFn, TSKEY ekey, int16_t pos,
677 678
                                      int16_t order, int64_t *pData) {
  int32_t forwardStep = 0;
679

H
Haojun Liao 已提交
680 681 682 683
  if (order == TSDB_ORDER_ASC) {
    int32_t end = searchFn((char*) &pData[pos], numOfRows - pos, ekey, order);
    if (end >= 0) {
      forwardStep = end;
684

H
Haojun Liao 已提交
685 686 687 688 689 690 691 692 693 694 695 696
      if (pData[end + pos] == ekey) {
        forwardStep += 1;
      }
    }
  } else {
    int32_t end = searchFn((char *)pData, pos + 1, ekey, order);
    if (end >= 0) {
      forwardStep = pos - end;

      if (pData[end] == ekey) {
        forwardStep += 1;
      }
697 698
    }
  }
699

H
Haojun Liao 已提交
700
  assert(forwardStep > 0);
701 702 703
  return forwardStep;
}

H
Haojun Liao 已提交
704
static int32_t updateResultRowCurrentIndex(SResultRowInfo* pWindowResInfo, TSKEY lastKey, bool ascQuery) {
H
Haojun Liao 已提交
705 706 707
  int32_t i = 0;
  int64_t skey = TSKEY_INITIAL_VAL;

H
Haojun Liao 已提交
708
  int32_t numOfClosed = 0;
H
Haojun Liao 已提交
709 710 711 712 713 714 715 716 717
  for (i = 0; i < pWindowResInfo->size; ++i) {
    SResultRow *pResult = pWindowResInfo->pResult[i];
    if (pResult->closed) {
      numOfClosed += 1;
      continue;
    }

    TSKEY ekey = pResult->win.ekey;
    if ((ekey <= lastKey && ascQuery) || (pResult->win.skey >= lastKey && !ascQuery)) {
H
Haojun Liao 已提交
718
      closeResultRow(pWindowResInfo, i);
H
Haojun Liao 已提交
719 720 721 722 723 724 725 726 727 728 729 730
    } else {
      skey = pResult->win.skey;
      break;
    }
  }

  // all windows are closed, set the last one to be the skey
  if (skey == TSKEY_INITIAL_VAL) {
    assert(i == pWindowResInfo->size);
    pWindowResInfo->curIndex = pWindowResInfo->size - 1;
  } else {
    pWindowResInfo->curIndex = i;
H
Haojun Liao 已提交
731
    pWindowResInfo->prevSKey = pWindowResInfo->pResult[pWindowResInfo->curIndex]->win.skey;
H
Haojun Liao 已提交
732 733
  }

H
Haojun Liao 已提交
734
  return numOfClosed;
H
Haojun Liao 已提交
735 736
}

737 738 739
/**
 * NOTE: the query status only set for the first scan of master scan.
 */
H
Haojun Liao 已提交
740
static int32_t doCheckQueryCompleted(SQueryRuntimeEnv *pRuntimeEnv, TSKEY lastKey, SResultRowInfo *pWindowResInfo) {
741
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
742
  if (pRuntimeEnv->scanFlag != MASTER_SCAN || pWindowResInfo->size == 0) {
743
    return pWindowResInfo->size;
744
  }
745

746
  // no qualified results exist, abort check
747
  int32_t numOfClosed = 0;
H
Haojun Liao 已提交
748
  bool ascQuery = QUERY_IS_ASC_QUERY(pQuery);
749

750
  // query completed
H
Haojun Liao 已提交
751
  if ((lastKey >= pQuery->current->win.ekey && ascQuery) || (lastKey <= pQuery->current->win.ekey && (!ascQuery))) {
H
Haojun Liao 已提交
752
    closeAllResultRows(pWindowResInfo);
753

754 755 756
    pWindowResInfo->curIndex = pWindowResInfo->size - 1;
    setQueryStatus(pQuery, QUERY_COMPLETED | QUERY_RESBUF_FULL);
  } else {  // set the current index to be the last unclosed window
H
Haojun Liao 已提交
757
    numOfClosed = updateResultRowCurrentIndex(pWindowResInfo, lastKey, ascQuery);
758

759
    // the number of completed slots are larger than the threshold, return current generated results to client.
H
Haojun Liao 已提交
760
    if (numOfClosed > pQuery->rec.threshold) {
761
      qDebug("QInfo:%p total result window:%d closed:%d, reached the output threshold %d, return",
762
          GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size, numOfClosed, pQuery->rec.threshold);
763

764
      setQueryStatus(pQuery, QUERY_RESBUF_FULL);
765
    } else {
766
      qDebug("QInfo:%p total result window:%d already closed:%d", GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size,
767
             numOfClosed);
768 769
    }
  }
770

771 772 773 774 775
  // output has reached the limitation, set query completed
  if (pQuery->limit.limit > 0 && (pQuery->limit.limit + pQuery->limit.offset) <= numOfClosed &&
      pRuntimeEnv->scanFlag == MASTER_SCAN) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
776

777
  assert(pWindowResInfo->prevSKey != TSKEY_INITIAL_VAL);
778
  return numOfClosed;
779 780 781
}

static int32_t getNumOfRowsInTimeWindow(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo, TSKEY *pPrimaryColumn,
H
hjxilinx 已提交
782
                                        int32_t startPos, TSKEY ekey, __block_search_fn_t searchFn, bool updateLastKey) {
783
  assert(startPos >= 0 && startPos < pDataBlockInfo->rows);
784

H
Haojun Liao 已提交
785
  int32_t num   = -1;
786
  int32_t order = pQuery->order.order;
H
Haojun Liao 已提交
787
  int32_t step  = GET_FORWARD_DIRECTION_FACTOR(order);
788

H
hjxilinx 已提交
789
  STableQueryInfo* item = pQuery->current;
790

791 792
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (ekey < pDataBlockInfo->window.ekey) {
793
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
H
Haojun Liao 已提交
794 795
      if (updateLastKey) { // update the last key
        item->lastKey = pPrimaryColumn[startPos + (num - 1)] + step;
796 797
      }
    } else {
798
      num = pDataBlockInfo->rows - startPos;
799
      if (updateLastKey) {
H
hjxilinx 已提交
800
        item->lastKey = pDataBlockInfo->window.ekey + step;
801 802 803 804
      }
    }
  } else {  // desc
    if (ekey > pDataBlockInfo->window.skey) {
805
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
H
Haojun Liao 已提交
806 807
      if (updateLastKey) {  // update the last key
        item->lastKey = pPrimaryColumn[startPos - (num - 1)] + step;
808 809 810 811
      }
    } else {
      num = startPos + 1;
      if (updateLastKey) {
H
hjxilinx 已提交
812
        item->lastKey = pDataBlockInfo->window.skey + step;
813 814 815
      }
    }
  }
816

H
Haojun Liao 已提交
817
  assert(num > 0);
818 819 820
  return num;
}

H
Haojun Liao 已提交
821 822
static void doBlockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, bool closed, STimeWindow *pWin, int32_t offset,
                                      int32_t forwardStep, TSKEY *tsCol, int32_t numOfTotal) {
823 824
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
825

H
Haojun Liao 已提交
826 827
  bool hasPrev = pCtx[0].preAggVals.isSet;

828
  if (IS_MASTER_SCAN(pRuntimeEnv) || closed) {
829 830 831 832
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
      pCtx[k].nStartQueryTimestamp = pWin->skey;
      pCtx[k].size = forwardStep;
      pCtx[k].startOffset = (QUERY_IS_ASC_QUERY(pQuery)) ? offset : offset - (forwardStep - 1);
833

H
Haojun Liao 已提交
834
      int32_t functionId = pQuery->pExpr1[k].base.functionId;
835
      if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
H
Haojun Liao 已提交
836
        pCtx[k].ptsList = &tsCol[pCtx[k].startOffset];
837
      }
838

839
      // not a whole block involved in query processing, statistics data can not be used
H
Haojun Liao 已提交
840 841 842 843
      // NOTE: the original value of isSet have been changed here
      if (pCtx[k].preAggVals.isSet && forwardStep < numOfTotal) {
        pCtx[k].preAggVals.isSet = false;
      }
H
Haojun Liao 已提交
844

845 846 847
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunction(&pCtx[k]);
      }
H
Haojun Liao 已提交
848 849 850

      // restore it
      pCtx[k].preAggVals.isSet = hasPrev;
851 852 853 854
    }
  }
}

855
static void doRowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, bool closed, STimeWindow *pWin, int32_t offset) {
856 857
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
858

859
  if (IS_MASTER_SCAN(pRuntimeEnv) || closed) {
860 861
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
      pCtx[k].nStartQueryTimestamp = pWin->skey;
862

H
Haojun Liao 已提交
863
      int32_t functionId = pQuery->pExpr1[k].base.functionId;
864 865 866
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunctionF(&pCtx[k], offset);
      }
867 868 869 870
    }
  }
}

H
Haojun Liao 已提交
871 872
static int32_t getNextQualifiedWindow(SQueryRuntimeEnv *pRuntimeEnv, STimeWindow *pNext, SDataBlockInfo *pDataBlockInfo,
    TSKEY *primaryKeys, __block_search_fn_t searchFn, int32_t prevPosition) {
873
  SQuery *pQuery = pRuntimeEnv->pQuery;
874

H
Haojun Liao 已提交
875
  getNextTimeWindow(pQuery, pNext);
876

H
Haojun Liao 已提交
877
  // next time window is not in current block
H
Haojun Liao 已提交
878 879
  if ((pNext->skey > pDataBlockInfo->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (pNext->ekey < pDataBlockInfo->window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
H
Haojun Liao 已提交
880 881
    return -1;
  }
882

H
Haojun Liao 已提交
883 884
  TSKEY startKey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
885
    startKey = pNext->skey;
H
Haojun Liao 已提交
886 887
    if (startKey < pQuery->window.skey) {
      startKey = pQuery->window.skey;
888
    }
H
Haojun Liao 已提交
889
  } else {
H
Haojun Liao 已提交
890
    startKey = pNext->ekey;
H
Haojun Liao 已提交
891 892
    if (startKey > pQuery->window.skey) {
      startKey = pQuery->window.skey;
893
    }
H
Haojun Liao 已提交
894
  }
895

H
Haojun Liao 已提交
896
  int32_t startPos = 0;
H
Haojun Liao 已提交
897

H
Haojun Liao 已提交
898
  // tumbling time window query, a special case of sliding time window query
899
  if (pQuery->interval.sliding == pQuery->interval.interval && prevPosition != -1) {
H
Haojun Liao 已提交
900 901 902
    int32_t factor = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
    startPos = prevPosition + factor;
  } else {
H
Haojun Liao 已提交
903
    if (startKey <= pDataBlockInfo->window.skey && QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
904
      startPos = 0;
H
Haojun Liao 已提交
905
    } else if (startKey >= pDataBlockInfo->window.ekey && !QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
906 907 908 909
      startPos = pDataBlockInfo->rows - 1;
    } else {
      startPos = searchFn((char *)primaryKeys, pDataBlockInfo->rows, startKey, pQuery->order.order);
    }
H
Haojun Liao 已提交
910
  }
911

H
Haojun Liao 已提交
912 913 914 915
  /*
   * This time window does not cover any data, try next time window,
   * this case may happen when the time window is too small
   */
H
Haojun Liao 已提交
916 917 918
  if (primaryKeys == NULL) {
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      assert(pDataBlockInfo->window.skey <= pNext->ekey);
919
    } else {
H
Haojun Liao 已提交
920
      assert(pDataBlockInfo->window.ekey >= pNext->skey);
921
    }
H
Haojun Liao 已提交
922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940
  } else {
    if (QUERY_IS_ASC_QUERY(pQuery) && primaryKeys[startPos] > pNext->ekey) {
      TSKEY next = primaryKeys[startPos];
      if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
        pNext->skey = taosTimeTruncate(next, &pQuery->interval, pQuery->precision);
        pNext->ekey = taosTimeAdd(pNext->skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
      } else {
        pNext->ekey += ((next - pNext->ekey + pQuery->interval.sliding - 1)/pQuery->interval.sliding) * pQuery->interval.sliding;
        pNext->skey = pNext->ekey - pQuery->interval.interval + 1;
      }
    } else if ((!QUERY_IS_ASC_QUERY(pQuery)) && primaryKeys[startPos] < pNext->skey) {
      TSKEY next = primaryKeys[startPos];
      if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
        pNext->skey = taosTimeTruncate(next, &pQuery->interval, pQuery->precision);
        pNext->ekey = taosTimeAdd(pNext->skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
      } else {
        pNext->skey -= ((pNext->skey - next + pQuery->interval.sliding - 1) / pQuery->interval.sliding) * pQuery->interval.sliding;
        pNext->ekey = pNext->skey + pQuery->interval.interval - 1;
      }
941
    }
942
  }
943

H
Haojun Liao 已提交
944
  return startPos;
945 946
}

H
Haojun Liao 已提交
947
static FORCE_INLINE TSKEY reviseWindowEkey(SQuery *pQuery, STimeWindow *pWindow) {
948 949 950 951 952 953 954 955 956 957 958 959
  TSKEY ekey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    ekey = pWindow->ekey;
    if (ekey > pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  } else {
    ekey = pWindow->skey;
    if (ekey < pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  }
960

961 962 963
  return ekey;
}

H
hjxilinx 已提交
964 965
//todo binary search
static void* getDataBlockImpl(SArray* pDataBlock, int32_t colId) {
S
TD-1057  
Shengliang Guan 已提交
966
  int32_t numOfCols = (int32_t)taosArrayGetSize(pDataBlock);
967

H
hjxilinx 已提交
968 969 970 971 972 973
  for (int32_t i = 0; i < numOfCols; ++i) {
    SColumnInfoData *p = taosArrayGet(pDataBlock, i);
    if (colId == p->info.colId) {
      return p->pData;
    }
  }
974

H
hjxilinx 已提交
975 976 977 978
  return NULL;
}

static char *getDataBlock(SQueryRuntimeEnv *pRuntimeEnv, SArithmeticSupport *sas, int32_t col, int32_t size,
979
                    SArray *pDataBlock) {
dengyihao's avatar
dengyihao 已提交
980 981 982
  if (pDataBlock == NULL) {
    return NULL;
  }
983

H
Haojun Liao 已提交
984
  char *dataBlock = NULL;
H
Haojun Liao 已提交
985
  SQuery *pQuery = pRuntimeEnv->pQuery;
986

H
Haojun Liao 已提交
987
  int32_t functionId = pQuery->pExpr1[col].base.functionId;
988
  if (functionId == TSDB_FUNC_ARITHM) {
H
Haojun Liao 已提交
989
    sas->pArithExpr = &pQuery->pExpr1[col];
990

H
Haojun Liao 已提交
991
    sas->offset    = (QUERY_IS_ASC_QUERY(pQuery))? pQuery->pos : pQuery->pos - (size - 1);
H
Haojun Liao 已提交
992
    sas->colList   = pQuery->colList;
993
    sas->numOfCols = pQuery->numOfCols;
H
Haojun Liao 已提交
994
    sas->data      = calloc(pQuery->numOfCols, POINTER_BYTES);
995

H
Haojun Liao 已提交
996 997 998 999
    if (sas->data == NULL) {
      longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
    }

1000
    // here the pQuery->colList and sas->colList are identical
S
TD-1057  
Shengliang Guan 已提交
1001
    int32_t numOfCols = (int32_t)taosArrayGetSize(pDataBlock);
1002
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
1003
      SColumnInfo *pColMsg = &pQuery->colList[i];
1004

1005 1006 1007 1008 1009 1010 1011 1012
      dataBlock = NULL;
      for (int32_t k = 0; k < numOfCols; ++k) {  //todo refactor
        SColumnInfoData *p = taosArrayGet(pDataBlock, k);
        if (pColMsg->colId == p->info.colId) {
          dataBlock = p->pData;
          break;
        }
      }
1013

1014
      assert(dataBlock != NULL);
1015
      sas->data[i] = dataBlock;  // start from the offset
1016
    }
1017

1018
  } else {  // other type of query function
H
Haojun Liao 已提交
1019
    SColIndex *pCol = &pQuery->pExpr1[col].base.colInfo;
H
Haojun Liao 已提交
1020
    if (TSDB_COL_IS_NORMAL_COL(pCol->flag)) {
H
Haojun Liao 已提交
1021
      SColIndex* pColIndex = &pQuery->pExpr1[col].base.colInfo;
H
Haojun Liao 已提交
1022 1023 1024 1025
      SColumnInfoData *p = taosArrayGet(pDataBlock, pColIndex->colIndex);
      assert(p->info.colId == pColIndex->colId);

      dataBlock = p->pData;
H
Haojun Liao 已提交
1026 1027
    } else {
      dataBlock = NULL;
1028 1029
    }
  }
1030

1031 1032 1033
  return dataBlock;
}

H
Haojun Liao 已提交
1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045
static void setNotInterpoWindowKey(SQLFunctionCtx* pCtx, int32_t numOfOutput, int32_t type) {
  if (type == RESULT_ROW_START_INTERP) {
    for (int32_t k = 0; k < numOfOutput; ++k) {
      pCtx[k].start.key = INT64_MIN;
    }
  } else {
    for (int32_t k = 0; k < numOfOutput; ++k) {
      pCtx[k].end.key = INT64_MIN;
    }
  }
}

1046
// window start key interpolation
H
Haojun Liao 已提交
1047
static bool setTimeWindowInterpolationStartTs(SQueryRuntimeEnv* pRuntimeEnv, int32_t pos, int32_t numOfRows, SArray* pDataBlock, TSKEY* tsCols, STimeWindow* win) {
1048 1049
  SQuery* pQuery = pRuntimeEnv->pQuery;

H
Haojun Liao 已提交
1050
  TSKEY curTs  = tsCols[pos];
1051 1052
  TSKEY lastTs = *(TSKEY *) pRuntimeEnv->prevRow[0];

H
Haojun Liao 已提交
1053 1054 1055 1056
  // lastTs == INT64_MIN and pos == 0 means this is the first time window, interpolation is not needed.
  // start exactly from this point, no need to do interpolation
  TSKEY key = QUERY_IS_ASC_QUERY(pQuery)? win->skey:win->ekey;
  if (key == curTs) {
H
Haojun Liao 已提交
1057
    setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_START_INTERP);
1058
    return true;
H
Haojun Liao 已提交
1059
  }
1060

H
Haojun Liao 已提交
1061
  if (lastTs == INT64_MIN && ((pos == 0 && QUERY_IS_ASC_QUERY(pQuery)) || (pos == (numOfRows - 1) && !QUERY_IS_ASC_QUERY(pQuery)))) {
H
Haojun Liao 已提交
1062
    setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_START_INTERP);
H
Haojun Liao 已提交
1063
    return true;
1064 1065
  }

H
Haojun Liao 已提交
1066 1067 1068
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
  TSKEY   prevTs = ((pos == 0 && QUERY_IS_ASC_QUERY(pQuery)) || (pos == (numOfRows - 1) && !QUERY_IS_ASC_QUERY(pQuery)))?
      lastTs:tsCols[pos - step];
1069

H
Haojun Liao 已提交
1070 1071 1072
  doRowwiseTimeWindowInterpolation(pRuntimeEnv, pDataBlock, prevTs, pos - step, curTs, pos, key, RESULT_ROW_START_INTERP);
  return true;
}
1073

H
Haojun Liao 已提交
1074 1075 1076
static bool setTimeWindowInterpolationEndTs(SQueryRuntimeEnv* pRuntimeEnv, int32_t endRowIndex, SArray* pDataBlock, TSKEY* tsCols, TSKEY blockEkey, STimeWindow* win) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
  TSKEY   actualEndKey = tsCols[endRowIndex];
1077

H
Haojun Liao 已提交
1078
  TSKEY key = QUERY_IS_ASC_QUERY(pQuery)? win->ekey:win->skey;
1079

H
Haojun Liao 已提交
1080 1081
  // not ended in current data block, do not invoke interpolation
  if ((key > blockEkey && QUERY_IS_ASC_QUERY(pQuery)) || (key < blockEkey && !QUERY_IS_ASC_QUERY(pQuery))) {
H
Haojun Liao 已提交
1082
    setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_END_INTERP);
H
Haojun Liao 已提交
1083 1084
    return false;
  }
1085

H
Haojun Liao 已提交
1086 1087
  // there is actual end point of current time window, no interpolation need
  if (key == actualEndKey) {
H
Haojun Liao 已提交
1088
    setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_END_INTERP);
1089 1090
    return true;
  }
H
Haojun Liao 已提交
1091 1092 1093 1094 1095 1096 1097 1098

  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
  int32_t nextRowIndex = endRowIndex + step;
  assert(nextRowIndex >= 0);

  TSKEY nextKey = tsCols[nextRowIndex];
  doRowwiseTimeWindowInterpolation(pRuntimeEnv, pDataBlock, actualEndKey, endRowIndex, nextKey, nextRowIndex, key, RESULT_ROW_END_INTERP);
  return true;
1099 1100 1101 1102 1103 1104 1105 1106
}

static void saveDataBlockLastRow(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pDataBlockInfo, SArray* pDataBlock) {
  if (pDataBlock == NULL) {
    return;
  }

  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
1107
  int32_t rowIndex = QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->rows-1:0;
1108 1109
  for (int32_t k = 0; k < pQuery->numOfCols; ++k) {
    SColumnInfoData *pColInfo = taosArrayGet(pDataBlock, k);
H
Haojun Liao 已提交
1110
    memcpy(pRuntimeEnv->prevRow[k], ((char*)pColInfo->pData) + (pColInfo->info.bytes * rowIndex), pColInfo->info.bytes);
1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126
  }
}

static TSKEY getStartTsKey(SQuery* pQuery, SDataBlockInfo* pDataBlockInfo, TSKEY* tsCols, int32_t step) {
  TSKEY ts = TSKEY_INITIAL_VAL;

  if (tsCols == NULL) {
    ts = QUERY_IS_ASC_QUERY(pQuery) ? pDataBlockInfo->window.skey : pDataBlockInfo->window.ekey;
  } else {
    int32_t offset = GET_COL_DATA_POS(pQuery, 0, step);
    ts = tsCols[offset];
  }

  return ts;
}

1127
/**
H
Haojun Liao 已提交
1128
 * todo set the last value for pQueryTableInfo as in rowwiseapplyfunctions
1129 1130
 * @param pRuntimeEnv
 * @param forwardStep
1131
 * @param tsCols
1132 1133 1134 1135 1136
 * @param pFields
 * @param isDiskFileBlock
 * @return                  the incremental number of output value, so it maybe 0 for fixed number of query,
 *                          such as count/min/max etc.
 */
H
Haojun Liao 已提交
1137
static void blockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis, SDataBlockInfo *pDataBlockInfo,
H
Haojun Liao 已提交
1138
                                    SResultRowInfo *pWindowResInfo, __block_search_fn_t searchFn, SArray *pDataBlock) {
1139
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1140
  bool            masterScan = IS_MASTER_SCAN(pRuntimeEnv);
1141

1142 1143
  SQuery *pQuery = pRuntimeEnv->pQuery;
  TSKEY  *tsCols = NULL;
1144
  if (pDataBlock != NULL) {
1145
    SColumnInfoData *pColInfo = taosArrayGet(pDataBlock, 0);
1146
    tsCols = (TSKEY *)(pColInfo->pData);
1147
  }
1148

1149
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
H
Haojun Liao 已提交
1150 1151 1152
  if (sasArray == NULL) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }
1153

1154
  SQInfo *pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
1155
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
1156
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
H
Haojun Liao 已提交
1157
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k, pQInfo->vgId);
1158
  }
1159

1160
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
1161
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
H
Haojun Liao 已提交
1162 1163
    int32_t prevIndex = curTimeWindowIndex(pWindowResInfo);

1164
    TSKEY ts = getStartTsKey(pQuery, pDataBlockInfo, tsCols, step);
H
Haojun Liao 已提交
1165
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
1166

1167 1168 1169 1170
    bool hasTimeWindow  = false;
    SResultRow* pResult = NULL;
    int32_t ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo, &win, masterScan, &hasTimeWindow, &pResult);
    if (ret != TSDB_CODE_SUCCESS) {
S
TD-1848  
Shengliang Guan 已提交
1171
      tfree(sasArray);
H
hjxilinx 已提交
1172
      return;
1173
    }
1174

H
Haojun Liao 已提交
1175 1176 1177
    int32_t forwardStep = 0;
    int32_t startPos = pQuery->pos;

H
Haojun Liao 已提交
1178
    // in case of repeat scan/reverse scan, no new time window added.
1179
    if (hasTimeWindow) {
H
Haojun Liao 已提交
1180
      TSKEY ekey = reviseWindowEkey(pQuery, &win);
H
Haojun Liao 已提交
1181
      forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, pQuery->pos, ekey, searchFn, true);
1182

H
Haojun Liao 已提交
1183 1184
      // prev time window not interpolation yet.
      int32_t curIndex = curTimeWindowIndex(pWindowResInfo);
H
Haojun Liao 已提交
1185
      if (prevIndex != -1 && prevIndex < curIndex && pRuntimeEnv->timeWindowInterpo) {
H
Haojun Liao 已提交
1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206
        for(int32_t j = prevIndex; j < curIndex; ++j) {
          SResultRow *pRes = pWindowResInfo->pResult[j];

          STimeWindow w = pRes->win;
          ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo, &w, masterScan, &hasTimeWindow, &pResult);
          assert(ret == TSDB_CODE_SUCCESS && !resultRowInterpolated(pResult, RESULT_ROW_END_INTERP));

          int32_t p = QUERY_IS_ASC_QUERY(pQuery)? 0:pDataBlockInfo->rows-1;
          doRowwiseTimeWindowInterpolation(pRuntimeEnv, pDataBlock, *(TSKEY*) pRuntimeEnv->prevRow[0], -1,  tsCols[0], p, w.ekey, RESULT_ROW_END_INTERP);
          setResultRowInterpo(pResult, RESULT_ROW_END_INTERP);
          setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_START_INTERP);

          bool closed = getResultRowStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
          doBlockwiseApplyFunctions(pRuntimeEnv, closed, &w, startPos, 0, tsCols, pDataBlockInfo->rows);
        }

        // restore current time window
        ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo, &win, masterScan, &hasTimeWindow, &pResult);
        assert (ret == TSDB_CODE_SUCCESS);  // null data, too many state code
      }

1207 1208
      // window start key interpolation
      if (pRuntimeEnv->timeWindowInterpo) {
H
Haojun Liao 已提交
1209 1210 1211 1212
        bool done = resultRowInterpolated(pResult, RESULT_ROW_START_INTERP);
        if (!done) {
          int32_t startRowIndex = pQuery->pos;
          bool    interp = setTimeWindowInterpolationStartTs(pRuntimeEnv, startRowIndex, pDataBlockInfo->rows, pDataBlock, tsCols, &win);
1213 1214 1215
          if (interp) {
            setResultRowInterpo(pResult, RESULT_ROW_START_INTERP);
          }
H
Haojun Liao 已提交
1216 1217
        } else {
          setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_START_INTERP);
1218 1219
        }

H
Haojun Liao 已提交
1220 1221 1222 1223 1224 1225
        done = resultRowInterpolated(pResult, RESULT_ROW_END_INTERP);
        if (!done) {
          int32_t endRowIndex = pQuery->pos + (forwardStep - 1) * step;

          TSKEY endKey = QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->window.ekey:pDataBlockInfo->window.skey;
          bool  interp = setTimeWindowInterpolationEndTs(pRuntimeEnv, endRowIndex, pDataBlock, tsCols, endKey, &win);
1226 1227 1228
          if (interp) {
            setResultRowInterpo(pResult, RESULT_ROW_END_INTERP);
          }
H
Haojun Liao 已提交
1229 1230
        } else {
          setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_END_INTERP);
1231 1232 1233 1234
        }
      }

      bool pStatus = getResultRowStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
H
Haojun Liao 已提交
1235
      doBlockwiseApplyFunctions(pRuntimeEnv, pStatus, &win, startPos, forwardStep, tsCols, pDataBlockInfo->rows);
1236
    }
1237

1238 1239
    int32_t     index = pWindowResInfo->curIndex;
    STimeWindow nextWin = win;
1240

1241
    while (1) {
H
Haojun Liao 已提交
1242 1243
      int32_t prevEndPos = (forwardStep - 1) * step + startPos;
      startPos = getNextQualifiedWindow(pRuntimeEnv, &nextWin, pDataBlockInfo, tsCols, searchFn, prevEndPos);
1244 1245 1246
      if (startPos < 0) {
        break;
      }
1247

1248
      // null data, failed to allocate more memory buffer
H
Haojun Liao 已提交
1249
      hasTimeWindow = false;
1250 1251
      if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo, &nextWin, masterScan, &hasTimeWindow, &pResult) !=
          TSDB_CODE_SUCCESS) {
1252 1253
        break;
      }
1254

1255 1256 1257 1258 1259
      if (!hasTimeWindow) {
        continue;
      }

      TSKEY ekey = reviseWindowEkey(pQuery, &nextWin);
H
Haojun Liao 已提交
1260
      forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, startPos, ekey, searchFn, true);
1261

1262 1263
      // window start(end) key interpolation
      if (pRuntimeEnv->timeWindowInterpo) {
H
Haojun Liao 已提交
1264 1265 1266 1267
        bool done = resultRowInterpolated(pResult, RESULT_ROW_START_INTERP);
        if (!done) {
          int32_t startRowIndex = startPos;
          bool    interp = setTimeWindowInterpolationStartTs(pRuntimeEnv, startRowIndex, pDataBlockInfo->rows, pDataBlock, tsCols, &nextWin);
1268 1269 1270
          if (interp) {
            setResultRowInterpo(pResult, RESULT_ROW_START_INTERP);
          }
H
Haojun Liao 已提交
1271 1272
        } else {
          setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_START_INTERP);
1273 1274
        }

H
Haojun Liao 已提交
1275 1276 1277 1278 1279
        done = resultRowInterpolated(pResult, RESULT_ROW_END_INTERP);
        if (!done) {
          int32_t endRowIndex = startPos + (forwardStep - 1)*step;
          TSKEY endKey = QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->window.ekey:pDataBlockInfo->window.skey;
          bool  interp = setTimeWindowInterpolationEndTs(pRuntimeEnv, endRowIndex, pDataBlock, tsCols, endKey, &nextWin);
1280 1281 1282
          if (interp) {
            setResultRowInterpo(pResult, RESULT_ROW_END_INTERP);
          }
H
Haojun Liao 已提交
1283 1284
        } else {
          setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_END_INTERP);
1285 1286 1287 1288
        }
      }

      bool closed = getResultRowStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
1289
      doBlockwiseApplyFunctions(pRuntimeEnv, closed, &nextWin, startPos, forwardStep, tsCols, pDataBlockInfo->rows);
1290
    }
1291

1292 1293 1294 1295 1296 1297 1298
    pWindowResInfo->curIndex = index;
  } else {
    /*
     * the sqlfunctionCtx parameters should be set done before all functions are invoked,
     * since the selectivity + tag_prj query needs all parameters been set done.
     * tag_prj function are changed to be TSDB_FUNC_TAG_DUMMY
     */
1299
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
Haojun Liao 已提交
1300
      int32_t functionId = pQuery->pExpr1[k].base.functionId;
1301
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
1302
        pCtx[k].nStartQueryTimestamp = pDataBlockInfo->window.skey;
1303 1304 1305 1306
        aAggs[functionId].xFunction(&pCtx[k]);
      }
    }
  }
1307

1308 1309 1310 1311 1312
  if (pRuntimeEnv->timeWindowInterpo) {
    saveDataBlockLastRow(pRuntimeEnv, pDataBlockInfo, pDataBlock);
  }

  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1313
    if (pQuery->pExpr1[i].base.functionId != TSDB_FUNC_ARITHM) {
1314 1315
      continue;
    }
1316

S
TD-1848  
Shengliang Guan 已提交
1317
    tfree(sasArray[i].data);
1318
  }
1319

S
TD-1848  
Shengliang Guan 已提交
1320
  tfree(sasArray);
1321 1322
}

1323
static int32_t setGroupResultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, char *pData, int16_t type, int16_t bytes, int32_t groupIndex) {
1324 1325 1326
  if (isNull(pData, type)) {  // ignore the null value
    return -1;
  }
1327

1328
  int32_t GROUPRESULTID = 1;
1329

1330
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
1331

H
Haojun Liao 已提交
1332
  // not assign result buffer yet, add new result buffer, TODO remove it
1333 1334 1335 1336 1337 1338 1339
  char* d = pData;
  int16_t len = bytes;
  if (type == TSDB_DATA_TYPE_BINARY||type == TSDB_DATA_TYPE_NCHAR) {
    d = varDataVal(pData);
    len = varDataLen(pData);
  } else if (type == TSDB_DATA_TYPE_FLOAT || type == TSDB_DATA_TYPE_DOUBLE) {
    SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
H
Haojun Liao 已提交
1340
    qError("QInfo:%p group by not supported on double/float columns, abort", pQInfo);
1341 1342 1343 1344

    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_APP_ERROR);
  }

H
Haojun Liao 已提交
1345
  uint64_t uid = groupIndex;
H
Haojun Liao 已提交
1346 1347
  SResultRow *pResultRow = doPrepareResultRowFromKey(pRuntimeEnv, &pRuntimeEnv->windowResInfo, d, len, true, uid);
  if (pResultRow == NULL) {
1348 1349 1350 1351
    return -1;
  }

  int64_t v = -1;
H
Haojun Liao 已提交
1352
  GET_TYPED_DATA(v, int64_t, type, pData);
1353
  if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
1354 1355 1356 1357 1358 1359
    if (pResultRow->key == NULL) {
      pResultRow->key = malloc(varDataTLen(pData));
      varDataCopy(pResultRow->key, pData);
    } else {
      assert(memcmp(pResultRow->key, pData, varDataTLen(pData)) == 0);
    }
1360
  } else {
H
Haojun Liao 已提交
1361 1362
    pResultRow->win.skey = v;
    pResultRow->win.ekey = v;
1363
  }
1364

H
Haojun Liao 已提交
1365 1366
  if (pResultRow->pageId == -1) {
    int32_t ret = addNewWindowResultBuf(pResultRow, pResultBuf, GROUPRESULTID, pRuntimeEnv->numOfRowsPerPage);
1367 1368 1369 1370
    if (ret != 0) {
      return -1;
    }
  }
1371

H
Haojun Liao 已提交
1372
  setResultOutputBuf(pRuntimeEnv, pResultRow);
1373 1374 1375 1376
  initCtxOutputBuf(pRuntimeEnv);
  return TSDB_CODE_SUCCESS;
}

1377
static char *getGroupbyColumnData(SQuery *pQuery, int16_t *type, int16_t *bytes, SArray* pDataBlock) {
1378
  SSqlGroupbyExpr *pGroupbyExpr = pQuery->pGroupbyExpr;
1379

1380
  for (int32_t k = 0; k < pGroupbyExpr->numOfGroupCols; ++k) {
1381
    SColIndex* pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, k);
Y
TD-1230  
yihaoDeng 已提交
1382
    if (TSDB_COL_IS_TAG(pColIndex->flag)) {
1383 1384
      continue;
    }
1385

1386
    int16_t colIndex = -1;
1387
    int32_t colId = pColIndex->colId;
1388

1389
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
1390
      if (pQuery->colList[i].colId == colId) {
1391 1392 1393 1394
        colIndex = i;
        break;
      }
    }
1395

1396
    assert(colIndex >= 0 && colIndex < pQuery->numOfCols);
1397

1398 1399
    *type = pQuery->colList[colIndex].type;
    *bytes = pQuery->colList[colIndex].bytes;
1400
    /*
1401 1402 1403
     *  the colIndex is acquired from the first tables of all qualified tables in this vnode during query prepare
     * stage, the remain tables may not have the required column in cache actually. So, the validation of required
     * column in cache with the corresponding schema is reinforced.
1404
     */
S
TD-1057  
Shengliang Guan 已提交
1405
    int32_t numOfCols = (int32_t)taosArrayGetSize(pDataBlock);
1406

1407 1408 1409 1410 1411 1412
    for (int32_t i = 0; i < numOfCols; ++i) {
      SColumnInfoData *p = taosArrayGet(pDataBlock, i);
      if (pColIndex->colId == p->info.colId) {
        return p->pData;
      }
    }
1413
  }
1414

1415
  return NULL;
1416 1417 1418 1419
}

static int32_t doTSJoinFilter(SQueryRuntimeEnv *pRuntimeEnv, int32_t offset) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
1420

H
Haojun Liao 已提交
1421
  STSElem         elem = tsBufGetElem(pRuntimeEnv->pTsBuf);
1422
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1423

1424
  // compare tag first
H
Haojun Liao 已提交
1425
  if (tVariantCompare(&pCtx[0].tag, elem.tag) != 0) {
1426 1427
    return TS_JOIN_TAG_NOT_EQUALS;
  }
1428

S
TD-1057  
Shengliang Guan 已提交
1429
  TSKEY key = *(TSKEY *)((char*)pCtx[0].aInputElemBuf + TSDB_KEYSIZE * offset);
1430 1431

#if defined(_DEBUG_VIEW)
1432
  printf("elem in comp ts file:%" PRId64 ", key:%" PRId64 ", tag:%"PRIu64", query order:%d, ts order:%d, traverse:%d, index:%d\n",
H
Haojun Liao 已提交
1433 1434
         elem.ts, key, elem.tag.i64Key, pQuery->order.order, pRuntimeEnv->pTsBuf->tsOrder,
         pRuntimeEnv->pTsBuf->cur.order, pRuntimeEnv->pTsBuf->cur.tsIndex);
1435
#endif
1436

1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (key < elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key > elem.ts) {
      assert(false);
    }
  } else {
    if (key > elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key < elem.ts) {
      assert(false);
    }
  }
1450

1451 1452 1453 1454
  return TS_JOIN_TS_EQUAL;
}

static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId) {
H
Haojun Liao 已提交
1455
  SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx);
H
hjxilinx 已提交
1456
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
1457 1458 1459 1460 1461

  // in case of timestamp column, always generated results.
  if (functionId == TSDB_FUNC_TS) {
    return true;
  }
1462

1463 1464 1465
  if (pResInfo->complete || functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TS_DUMMY) {
    return false;
  }
1466

1467
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_FIRST) {
H
hjxilinx 已提交
1468 1469
    return QUERY_IS_ASC_QUERY(pQuery);
  }
1470

H
Haojun Liao 已提交
1471
  // denote the order type
1472 1473 1474 1475
  if ((functionId == TSDB_FUNC_LAST_DST || functionId == TSDB_FUNC_LAST)) {
    return pCtx->param[0].i64Key == pQuery->order.order;
  }

1476
  // in the supplementary scan, only the following functions need to be executed
H
Haojun Liao 已提交
1477
  if (IS_REVERSE_SCAN(pRuntimeEnv)) {
1478 1479
    return false;
  }
1480

1481 1482 1483
  return true;
}

H
Haojun Liao 已提交
1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522
void doRowwiseTimeWindowInterpolation(SQueryRuntimeEnv* pRuntimeEnv, SArray* pDataBlock, TSKEY prevTs, int32_t prevRowIndex, TSKEY curTs, int32_t curRowIndex, TSKEY windowKey,  int32_t type) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
    int32_t functionId = pQuery->pExpr1[k].base.functionId;
    if (functionId != TSDB_FUNC_TWA) {
      pRuntimeEnv->pCtx[k].start.key = INT64_MIN;
      continue;
    }

    SColIndex* pColIndex = &pQuery->pExpr1[k].base.colInfo;
    int16_t index = pColIndex->colIndex;
    SColumnInfoData* pColInfo = taosArrayGet(pDataBlock, index);

    assert(pColInfo->info.colId == pColIndex->colId && curTs != windowKey);
    double v1 = 0, v2 = 0, v = 0;

    if (prevRowIndex == -1) {
      GET_TYPED_DATA(v1, double, pColInfo->info.type, (char *)pRuntimeEnv->prevRow[k]);
    } else {
      GET_TYPED_DATA(v1, double, pColInfo->info.type, (char *)pColInfo->pData + prevRowIndex * pColInfo->info.bytes);
    }

    GET_TYPED_DATA(v2, double, pColInfo->info.type, (char *)pColInfo->pData + curRowIndex * pColInfo->info.bytes);

    SPoint point1 = (SPoint){.key = prevTs, .val = &v1};
    SPoint point2 = (SPoint){.key = curTs, .val = &v2};
    SPoint point  = (SPoint){.key = windowKey, .val = &v};
    taosGetLinearInterpolationVal(TSDB_DATA_TYPE_DOUBLE, &point1, &point2, &point);

    if (type == RESULT_ROW_START_INTERP) {
      pRuntimeEnv->pCtx[k].start.key = point.key;
      pRuntimeEnv->pCtx[k].start.val = v;
    } else {
      pRuntimeEnv->pCtx[k].end.key = point.key;
      pRuntimeEnv->pCtx[k].end.val = v;
    }
  }
}

H
Haojun Liao 已提交
1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559
static void setTimeWindowSKeyInterp(SQueryRuntimeEnv* pRuntimeEnv, SArray* pDataBlock, TSKEY prevTs, int32_t prevRowIndex, TSKEY ts, int32_t offset, SResultRow* pResult, STimeWindow* win) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  bool done = resultRowInterpolated(pResult, RESULT_ROW_START_INTERP);
  if (!done) {
    TSKEY key = QUERY_IS_ASC_QUERY(pQuery)? win->skey:win->ekey;
    if (key == ts) {
      setResultRowInterpo(pResult, RESULT_ROW_START_INTERP);
    } else if (prevTs != INT64_MIN && ((QUERY_IS_ASC_QUERY(pQuery) && prevTs < key) || (!QUERY_IS_ASC_QUERY(pQuery) && prevTs > key))) {
      doRowwiseTimeWindowInterpolation(pRuntimeEnv, pDataBlock, prevTs, prevRowIndex, ts, offset, key, RESULT_ROW_START_INTERP);
      setResultRowInterpo(pResult, RESULT_ROW_START_INTERP);
    } else {
      setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_START_INTERP);
    }

    setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_END_INTERP);
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
      pRuntimeEnv->pCtx[k].size = 1;
    }
  } else {
    setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_START_INTERP);
  }
}

static void setTimeWindowEKeyInterp(SQueryRuntimeEnv* pRuntimeEnv, SArray* pDataBlock, TSKEY prevTs, int32_t prevRowIndex, TSKEY ts, int32_t offset, SResultRow* pResult, STimeWindow* win) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  TSKEY key = QUERY_IS_ASC_QUERY(pQuery)? win->ekey:win->skey;
  doRowwiseTimeWindowInterpolation(pRuntimeEnv, pDataBlock, prevTs, prevRowIndex, ts, offset, key, RESULT_ROW_END_INTERP);
  setResultRowInterpo(pResult, RESULT_ROW_END_INTERP);

  setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_START_INTERP);
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    pRuntimeEnv->pCtx[i].size = 0;
  }
}

1560
static void rowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis, SDataBlockInfo *pDataBlockInfo,
H
Haojun Liao 已提交
1561
    SResultRowInfo *pWindowResInfo, SArray *pDataBlock) {
1562
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1563
  bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);
1564

1565
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1566
  STableQueryInfo* item = pQuery->current;
H
Haojun Liao 已提交
1567 1568 1569 1570

  SColumnInfoData* pColumnInfoData = (SColumnInfoData *)taosArrayGet(pDataBlock, 0);

  TSKEY  *tsCols = (pColumnInfoData->info.type == TSDB_DATA_TYPE_TIMESTAMP)? (TSKEY*) pColumnInfoData->pData:NULL;
H
Haojun Liao 已提交
1571 1572
  bool    groupbyColumnValue = pRuntimeEnv->groupbyNormalCol;

1573
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
H
Haojun Liao 已提交
1574 1575 1576
  if (sasArray == NULL) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }
1577

1578 1579
  int16_t type = 0;
  int16_t bytes = 0;
1580

1581
  char *groupbyColumnData = NULL;
H
Haojun Liao 已提交
1582
  if (groupbyColumnValue) {
1583
    groupbyColumnData = getGroupbyColumnData(pQuery, &type, &bytes, pDataBlock);
1584
  }
1585

H
Haojun Liao 已提交
1586
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
1587
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
1588
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
H
Haojun Liao 已提交
1589
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k, pQInfo->vgId);
H
Haojun Liao 已提交
1590
    pCtx[k].size = 1;
1591
  }
1592

1593 1594
  // set the input column data
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
1595
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
H
hjxilinx 已提交
1596 1597
    pFilterInfo->pData = getDataBlockImpl(pDataBlock, pFilterInfo->info.colId);
    assert(pFilterInfo->pData != NULL);
1598
  }
1599

1600
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1601

1602 1603
  // from top to bottom in desc
  // from bottom to top in asc order
H
Haojun Liao 已提交
1604
  if (pRuntimeEnv->pTsBuf != NULL) {
1605
    qDebug("QInfo:%p process data rows, numOfRows:%d, query order:%d, ts comp order:%d", pQInfo, pDataBlockInfo->rows,
H
Haojun Liao 已提交
1606
           pQuery->order.order, pRuntimeEnv->pTsBuf->cur.order);
1607
  }
1608

H
hjxilinx 已提交
1609
  int32_t offset = -1;
H
Haojun Liao 已提交
1610
  TSKEY   prevTs = *(TSKEY*) pRuntimeEnv->prevRow[0];
H
Haojun Liao 已提交
1611
  int32_t prevRowIndex = -1;
1612

1613
  for (int32_t j = 0; j < pDataBlockInfo->rows; ++j) {
H
hjxilinx 已提交
1614
    offset = GET_COL_DATA_POS(pQuery, j, step);
1615

H
Haojun Liao 已提交
1616
    if (pRuntimeEnv->pTsBuf != NULL) {
1617 1618
      int32_t ret = doTSJoinFilter(pRuntimeEnv, offset);
      if (ret == TS_JOIN_TAG_NOT_EQUALS) {
1619
        break;
1620
      } else if (ret == TS_JOIN_TS_NOT_EQUALS) {
1621 1622
        continue;
      } else {
1623
        assert(ret == TS_JOIN_TS_EQUAL);
1624 1625
      }
    }
1626

1627
    if (pQuery->numOfFilterCols > 0 && (!doFilterData(pQuery, offset))) {
1628 1629
      continue;
    }
1630

1631
    // interval window query, decide the time window according to the primary timestamp
H
Haojun Liao 已提交
1632
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
H
Haojun Liao 已提交
1633
      int32_t prevWindowIndex = curTimeWindowIndex(pWindowResInfo);
H
Haojun Liao 已提交
1634
      int64_t ts  = tsCols[offset];
H
Haojun Liao 已提交
1635

1636
      STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
1637

1638 1639 1640
      bool hasTimeWindow  = false;
      SResultRow* pResult = NULL;
      int32_t ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo, &win, masterScan, &hasTimeWindow, &pResult);
1641 1642 1643
      if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
        continue;
      }
1644

1645 1646 1647
      if (!hasTimeWindow) {
        continue;
      }
H
Haojun Liao 已提交
1648

1649 1650
      // window start key interpolation
      if (pRuntimeEnv->timeWindowInterpo) {
H
Haojun Liao 已提交
1651 1652 1653 1654 1655 1656
        // check for the time window end time interpolation
        int32_t curIndex = curTimeWindowIndex(pWindowResInfo);
        if (prevWindowIndex != -1 && prevWindowIndex < curIndex) {
          for (int32_t k = prevWindowIndex; k < curIndex; ++k) {
            SResultRow *pRes = pWindowResInfo->pResult[k];

H
Haojun Liao 已提交
1657 1658
            ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo, &pRes->win, masterScan, &hasTimeWindow, &pResult);
            assert(ret == TSDB_CODE_SUCCESS && !resultRowInterpolated(pResult, RESULT_ROW_END_INTERP));
H
Haojun Liao 已提交
1659

H
Haojun Liao 已提交
1660
            setTimeWindowEKeyInterp(pRuntimeEnv, pDataBlock, prevTs, prevRowIndex, ts, offset, pResult, &pRes->win);
H
Haojun Liao 已提交
1661 1662

            bool closed = getResultRowStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
H
Haojun Liao 已提交
1663
            doRowwiseApplyFunctions(pRuntimeEnv, closed, &pRes->win, offset);
H
Haojun Liao 已提交
1664 1665 1666 1667 1668 1669 1670
          }

          // restore current time window
          ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo, &win, masterScan, &hasTimeWindow,
                                        &pResult);
          if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
            continue;
1671 1672
          }
        }
1673

H
Haojun Liao 已提交
1674
        setTimeWindowSKeyInterp(pRuntimeEnv, pDataBlock, prevTs, prevRowIndex, ts, offset, pResult, &win);
1675
      }
H
Haojun Liao 已提交
1676

1677
      bool closed = getResultRowStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
1678
      doRowwiseApplyFunctions(pRuntimeEnv, closed, &win, offset);
1679

1680 1681
      STimeWindow nextWin = win;
      int32_t     index = pWindowResInfo->curIndex;
1682

1683
      while (1) {
H
Haojun Liao 已提交
1684
        getNextTimeWindow(pQuery, &nextWin);
1685
        if ((nextWin.skey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
H
Haojun Liao 已提交
1686
            (nextWin.ekey < pQuery->window.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
1687 1688
          break;
        }
1689

1690 1691 1692
        if (ts < nextWin.skey || ts > nextWin.ekey) {
          break;
        }
1693

1694
        // null data, failed to allocate more memory buffer
H
Haojun Liao 已提交
1695
        hasTimeWindow = false;
1696
        if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo, &nextWin, masterScan, &hasTimeWindow, &pResult) != TSDB_CODE_SUCCESS) {
1697 1698
          break;
        }
1699

1700
        if (hasTimeWindow) {
H
Haojun Liao 已提交
1701
          setTimeWindowSKeyInterp(pRuntimeEnv, pDataBlock, prevTs, prevRowIndex, ts, offset, pResult, &nextWin);
1702
          closed = getResultRowStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
1703
          doRowwiseApplyFunctions(pRuntimeEnv, closed, &nextWin, offset);
1704
        }
1705
      }
1706

1707 1708 1709
      pWindowResInfo->curIndex = index;
    } else {  // other queries
      // decide which group this rows belongs to according to current state value
H
Haojun Liao 已提交
1710
      if (groupbyColumnValue) {
H
hjxilinx 已提交
1711
        char *val = groupbyColumnData + bytes * offset;
1712

1713
        int32_t ret = setGroupResultOutputBuf(pRuntimeEnv, val, type, bytes, item->groupIndex);
1714 1715 1716 1717
        if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
          continue;
        }
      }
1718

1719
      for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
Haojun Liao 已提交
1720
        int32_t functionId = pQuery->pExpr1[k].base.functionId;
1721 1722 1723 1724 1725
        if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
          aAggs[functionId].xFunctionF(&pCtx[k], offset);
        }
      }
    }
1726

H
Haojun Liao 已提交
1727 1728
    prevTs = tsCols[offset];
    prevRowIndex = offset;
1729

H
Haojun Liao 已提交
1730
    if (pRuntimeEnv->pTsBuf != NULL) {
1731
      // if timestamp filter list is empty, quit current query
H
Haojun Liao 已提交
1732
      if (!tsBufNextPos(pRuntimeEnv->pTsBuf)) {
H
hjxilinx 已提交
1733
        setQueryStatus(pQuery, QUERY_COMPLETED);
1734 1735 1736 1737
        break;
      }
    }
  }
H
Haojun Liao 已提交
1738 1739 1740 1741 1742 1743 1744 1745

  assert(offset >= 0);
  if (tsCols != NULL) {
    item->lastKey = tsCols[offset] + step;
  } else {
    item->lastKey = (QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->window.ekey:pDataBlockInfo->window.skey) + step;
  }

H
Haojun Liao 已提交
1746 1747
  if (pRuntimeEnv->pTsBuf != NULL) {
    item->cur = tsBufGetCursor(pRuntimeEnv->pTsBuf);
H
Haojun Liao 已提交
1748
  }
H
Haojun Liao 已提交
1749

1750 1751
  // todo refactor: extract method
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1752
    if (pQuery->pExpr1[i].base.functionId != TSDB_FUNC_ARITHM) {
1753 1754
      continue;
    }
1755

S
TD-1848  
Shengliang Guan 已提交
1756
    tfree(sasArray[i].data);
1757
  }
1758

1759 1760 1761 1762
  free(sasArray);
}

static int32_t tableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo,
H
hjxilinx 已提交
1763
                                          SDataStatis *pStatis, __block_search_fn_t searchFn, SArray *pDataBlock) {
H
hjxilinx 已提交
1764
  SQuery *pQuery = pRuntimeEnv->pQuery;
1765

H
hjxilinx 已提交
1766
  STableQueryInfo* pTableQInfo = pQuery->current;
H
Haojun Liao 已提交
1767
  SResultRowInfo*  pWindowResInfo = &pRuntimeEnv->windowResInfo;
1768

H
Haojun Liao 已提交
1769
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTsBuf != NULL || pRuntimeEnv->groupbyNormalCol) {
1770
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
1771
  } else {
1772
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
1773
  }
1774

1775
  // update the lastkey of current table
1776
  TSKEY lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pDataBlockInfo->window.ekey : pDataBlockInfo->window.skey;
H
hjxilinx 已提交
1777
  pTableQInfo->lastKey = lastKey + GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1778

1779
  // interval query with limit applied
1780
  int32_t numOfRes = 0;
H
Haojun Liao 已提交
1781
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1782
    numOfRes = doCheckQueryCompleted(pRuntimeEnv, lastKey, pWindowResInfo);
H
Haojun Liao 已提交
1783
  } else if (pRuntimeEnv->groupbyNormalCol) {
H
Haojun Liao 已提交
1784
    closeAllResultRows(pWindowResInfo);
H
Haojun Liao 已提交
1785 1786
    numOfRes = pWindowResInfo->size;
  } else { // projection query
S
TD-1057  
Shengliang Guan 已提交
1787
    numOfRes = (int32_t)getNumOfResult(pRuntimeEnv);
1788

1789 1790 1791 1792
    // update the number of output result
    if (numOfRes > 0 && pQuery->checkBuffer == 1) {
      assert(numOfRes >= pQuery->rec.rows);
      pQuery->rec.rows = numOfRes;
1793

1794 1795 1796
      if (numOfRes >= pQuery->rec.threshold) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
      }
1797

1798 1799 1800
      if ((pQuery->limit.limit >= 0) && (pQuery->limit.limit + pQuery->limit.offset) <= numOfRes) {
        setQueryStatus(pQuery, QUERY_COMPLETED);
      }
1801 1802 1803 1804 1805

      if (((pTableQInfo->lastKey > pTableQInfo->win.ekey) && QUERY_IS_ASC_QUERY(pQuery)) ||
          ((pTableQInfo->lastKey < pTableQInfo->win.ekey) && (!QUERY_IS_ASC_QUERY(pQuery)))) {
        setQueryStatus(pQuery, QUERY_COMPLETED);
      }
H
Haojun Liao 已提交
1806
    }
1807
  }
1808

1809
  return numOfRes;
1810 1811
}

H
Haojun Liao 已提交
1812
void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
H
Haojun Liao 已提交
1813
                   SDataStatis *pStatis, void *param, int32_t colIndex, int32_t vgId) {
1814

H
Haojun Liao 已提交
1815 1816
  int32_t functionId = pQuery->pExpr1[colIndex].base.functionId;
  int32_t colId = pQuery->pExpr1[colIndex].base.colInfo.colId;
1817

1818
  SDataStatis *tpField = NULL;
H
Haojun Liao 已提交
1819
  pCtx->hasNull = hasNullValue(&pQuery->pExpr1[colIndex].base.colInfo, pStatis, &tpField);
1820
  pCtx->aInputElemBuf = inputData;
1821

1822
  if (tpField != NULL) {
H
Haojun Liao 已提交
1823
    pCtx->preAggVals.isSet  = true;
1824 1825
    pCtx->preAggVals.statis = *tpField;
    assert(pCtx->preAggVals.statis.numOfNull <= pBlockInfo->rows);
1826 1827 1828
  } else {
    pCtx->preAggVals.isSet = false;
  }
1829

H
Haojun Liao 已提交
1830 1831
  pCtx->preAggVals.dataBlockLoaded = (inputData != NULL);

H
Haojun Liao 已提交
1832 1833
  // limit/offset query will affect this value
  pCtx->size = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->rows - pQuery->pos : pQuery->pos + 1;
1834

H
Haojun Liao 已提交
1835
  // minimum value no matter ascending/descending order query
H
Haojun Liao 已提交
1836 1837
  pCtx->startOffset = QUERY_IS_ASC_QUERY(pQuery) ? pQuery->pos: (pQuery->pos - pCtx->size + 1);
  assert(pCtx->startOffset >= 0);
H
Haojun Liao 已提交
1838

1839 1840
  uint32_t status = aAggs[functionId].nStatus;
  if (((status & (TSDB_FUNCSTATE_SELECTIVITY | TSDB_FUNCSTATE_NEED_TS)) != 0) && (tsCol != NULL)) {
H
Haojun Liao 已提交
1841
    pCtx->ptsList = &tsCol[pCtx->startOffset];
1842
  }
1843

1844 1845 1846 1847 1848
  if (functionId >= TSDB_FUNC_FIRST_DST && functionId <= TSDB_FUNC_LAST_DST) {
    // last_dist or first_dist function
    // store the first&last timestamp into the intermediate buffer [1], the true
    // value may be null but timestamp will never be null
  } else if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_TWA ||
1849
             functionId == TSDB_FUNC_DIFF || (functionId >= TSDB_FUNC_RATE && functionId <= TSDB_FUNC_AVG_IRATE)) {
1850
    /*
H
Haojun Liao 已提交
1851
     * least squares function needs two columns of input, currently, the x value of linear equation is set to
H
Haojun Liao 已提交
1852
     * timestamp column, and the y-value is the column specified in pQuery->pExpr1[i].colIdxInBuffer
1853 1854 1855 1856 1857
     *
     * top/bottom function needs timestamp to indicate when the
     * top/bottom values emerge, so does diff function
     */
    if (functionId == TSDB_FUNC_TWA) {
1858 1859 1860 1861
       pCtx->param[1].i64Key = pQuery->window.skey;
       pCtx->param[1].nType = TSDB_DATA_TYPE_BIGINT;
       pCtx->param[2].i64Key = pQuery->window.ekey;
       pCtx->param[2].nType = TSDB_DATA_TYPE_BIGINT;
1862
    }
1863

1864 1865
  } else if (functionId == TSDB_FUNC_ARITHM) {
    pCtx->param[1].pz = param;
H
Haojun Liao 已提交
1866 1867 1868 1869 1870 1871
  } else if (functionId == TSDB_FUNC_SPREAD) {  // set the statistics data for primary time stamp column
    if (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
      pCtx->preAggVals.isSet  = true;
      pCtx->preAggVals.statis.min = pBlockInfo->window.skey;
      pCtx->preAggVals.statis.max = pBlockInfo->window.ekey;
    }
1872
  } else if (functionId == TSDB_FUNC_INTERP) {
H
Haojun Liao 已提交
1873 1874 1875
    SResultRowCellInfo* pInfo = GET_RES_INFO(pCtx);

    SInterpInfoDetail *pInterpInfo = (SInterpInfoDetail *)GET_ROWCELL_INTERBUF(pInfo);
S
TD-1057  
Shengliang Guan 已提交
1876
    pInterpInfo->type = (int8_t)pQuery->fillType;
1877 1878
    pInterpInfo->ts = pQuery->window.skey;
    pInterpInfo->primaryCol = (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX);
1879

1880 1881 1882 1883
    if (pQuery->fillVal != NULL) {
      if (isNull((const char*) &pQuery->fillVal[colIndex], pCtx->inputType)) {
        pCtx->param[1].nType = TSDB_DATA_TYPE_NULL;
      } else { // todo refactor, tVariantCreateFromBinary should handle the NULL value
H
Haojun Liao 已提交
1884 1885 1886
        if (pCtx->inputType != TSDB_DATA_TYPE_BINARY && pCtx->inputType != TSDB_DATA_TYPE_NCHAR) {
          tVariantCreateFromBinary(&pCtx->param[1], (char*) &pQuery->fillVal[colIndex], pCtx->inputBytes, pCtx->inputType);
        }
1887 1888
      }
    }
H
Haojun Liao 已提交
1889 1890 1891
  } else if (functionId == TSDB_FUNC_TS_COMP) {
    pCtx->param[0].i64Key = vgId;
    pCtx->param[0].nType = TSDB_DATA_TYPE_BIGINT;
1892
  }
1893

1894 1895 1896 1897 1898 1899
#if defined(_DEBUG_VIEW)
  //  int64_t *tsList = (int64_t *)primaryColumnData;
//  int64_t  s = tsList[0];
//  int64_t  e = tsList[size - 1];

//    if (IS_DATA_BLOCK_LOADED(blockStatus)) {
1900
//        qDebug("QInfo:%p query ts:%lld-%lld, offset:%d, rows:%d, bstatus:%d,
1901 1902 1903
//        functId:%d", GET_QINFO_ADDR(pQuery),
//               s, e, startOffset, size, blockStatus, functionId);
//    } else {
1904
//        qDebug("QInfo:%p block not loaded, bstatus:%d",
1905 1906 1907 1908 1909 1910
//        GET_QINFO_ADDR(pQuery), blockStatus);
//    }
#endif
}

// set the output buffer for the selectivity + tag query
H
Haojun Liao 已提交
1911
static int32_t setCtxTagColumnInfo(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx) {
H
Haojun Liao 已提交
1912 1913
  SQuery* pQuery = pRuntimeEnv->pQuery;

1914
  if (isSelectivityWithTagsQuery(pQuery)) {
1915
    int32_t num = 0;
1916
    int16_t tagLen = 0;
1917

1918
    SQLFunctionCtx *p = NULL;
1919
    SQLFunctionCtx **pTagCtx = calloc(pQuery->numOfOutput, POINTER_BYTES);
H
Haojun Liao 已提交
1920 1921 1922
    if (pTagCtx == NULL) {
      return TSDB_CODE_QRY_OUT_OF_MEMORY;
    }
H
Haojun Liao 已提交
1923

1924
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1925
      SSqlFuncMsg *pSqlFuncMsg = &pQuery->pExpr1[i].base;
1926

1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939
      if (pSqlFuncMsg->functionId == TSDB_FUNC_TAG_DUMMY || pSqlFuncMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        tagLen += pCtx[i].outputBytes;
        pTagCtx[num++] = &pCtx[i];
      } else if ((aAggs[pSqlFuncMsg->functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
        p = &pCtx[i];
      } else if (pSqlFuncMsg->functionId == TSDB_FUNC_TS || pSqlFuncMsg->functionId == TSDB_FUNC_TAG) {
        // tag function may be the group by tag column
        // ts may be the required primary timestamp column
        continue;
      } else {
        // the column may be the normal column, group by normal_column, the functionId is TSDB_FUNC_PRJ
      }
    }
dengyihao's avatar
dengyihao 已提交
1940 1941 1942 1943 1944
    if (p != NULL) {
      p->tagInfo.pTagCtxList = pTagCtx;
      p->tagInfo.numOfTagCols = num;
      p->tagInfo.tagsLen = tagLen;
    } else {
S
TD-1848  
Shengliang Guan 已提交
1945
      tfree(pTagCtx);
dengyihao's avatar
dengyihao 已提交
1946
    }
1947
  }
H
Haojun Liao 已提交
1948 1949

  return TSDB_CODE_SUCCESS;
1950 1951
}

1952
static int32_t setupQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv, int16_t order) {
1953
  qDebug("QInfo:%p setup runtime env", GET_QINFO_ADDR(pRuntimeEnv));
1954 1955
  SQuery *pQuery = pRuntimeEnv->pQuery;

1956
  pRuntimeEnv->pCtx = (SQLFunctionCtx *)calloc(pQuery->numOfOutput, sizeof(SQLFunctionCtx));
H
Haojun Liao 已提交
1957
  pRuntimeEnv->offset = calloc(pQuery->numOfOutput, sizeof(int16_t));
H
Haojun Liao 已提交
1958
  pRuntimeEnv->rowCellInfoOffset = calloc(pQuery->numOfOutput, sizeof(int32_t));
1959

H
Haojun Liao 已提交
1960
  if (pRuntimeEnv->offset == NULL || pRuntimeEnv->pCtx == NULL || pRuntimeEnv->rowCellInfoOffset == NULL) {
1961
    goto _clean;
1962
  }
1963

1964
  pRuntimeEnv->offset[0] = 0;
1965
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1966
    SSqlFuncMsg *pSqlFuncMsg = &pQuery->pExpr1[i].base;
1967

1968
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1969
    SColIndex* pIndex = &pSqlFuncMsg->colInfo;
1970

Y
TD-1230  
yihaoDeng 已提交
1971
    if (TSDB_COL_REQ_NULL(pIndex->flag)) {
1972 1973
      pCtx->requireNull = true;
      pIndex->flag &= ~(TSDB_COL_NULL);
Y
TD-1230  
yihaoDeng 已提交
1974
    } else {
1975 1976
      pCtx->requireNull = false;
    }
Y
TD-1230  
yihaoDeng 已提交
1977

1978 1979
    int32_t index = pSqlFuncMsg->colInfo.colIndex;
    if (TSDB_COL_IS_TAG(pIndex->flag)) {
1980
      if (pIndex->colId == TSDB_TBNAME_COLUMN_INDEX) {  // todo refactor
H
Haojun Liao 已提交
1981 1982 1983 1984
        SSchema s = tGetTableNameColumnSchema();

        pCtx->inputBytes = s.bytes;
        pCtx->inputType = s.type;
1985 1986 1987 1988
      } else {
        pCtx->inputBytes = pQuery->tagColList[index].bytes;
        pCtx->inputType = pQuery->tagColList[index].type;
      }
H
Haojun Liao 已提交
1989 1990 1991
    } else if (TSDB_COL_IS_UD_COL(pIndex->flag)) {
      pCtx->inputBytes = pSqlFuncMsg->arg[0].argBytes;
      pCtx->inputType = pSqlFuncMsg->arg[0].argType;
1992 1993 1994 1995
    } else {
      pCtx->inputBytes = pQuery->colList[index].bytes;
      pCtx->inputType = pQuery->colList[index].type;
    }
1996

1997
    assert(isValidDataType(pCtx->inputType));
1998
    pCtx->ptsOutputBuf = NULL;
1999

H
Haojun Liao 已提交
2000 2001
    pCtx->outputBytes  = pQuery->pExpr1[i].bytes;
    pCtx->outputType   = pQuery->pExpr1[i].type;
2002

H
Haojun Liao 已提交
2003 2004 2005
    pCtx->order        = pQuery->order.order;
    pCtx->functionId   = pSqlFuncMsg->functionId;
    pCtx->stableQuery  = pRuntimeEnv->stableQuery;
H
Haojun Liao 已提交
2006
    pCtx->interBufBytes = pQuery->pExpr1[i].interBytes;
2007 2008
    pCtx->start.key    = INT64_MIN;
    pCtx->end.key      = INT64_MIN;
2009

H
Haojun Liao 已提交
2010
    pCtx->numOfParams  = pSqlFuncMsg->numOfParams;
2011 2012 2013 2014
    for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
      int16_t type = pSqlFuncMsg->arg[j].argType;
      int16_t bytes = pSqlFuncMsg->arg[j].argBytes;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
2015
        tVariantCreateFromBinary(&pCtx->param[j], pSqlFuncMsg->arg[j].argValue.pz, bytes, type);
2016 2017 2018 2019
      } else {
        tVariantCreateFromBinary(&pCtx->param[j], (char *)&pSqlFuncMsg->arg[j].argValue.i64, bytes, type);
      }
    }
2020

2021 2022
    // set the order information for top/bottom query
    int32_t functionId = pCtx->functionId;
2023

2024
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
H
Haojun Liao 已提交
2025
      int32_t f = pQuery->pExpr1[0].base.functionId;
2026
      assert(f == TSDB_FUNC_TS || f == TSDB_FUNC_TS_DUMMY);
2027

2028 2029 2030 2031
      pCtx->param[2].i64Key = order;
      pCtx->param[2].nType = TSDB_DATA_TYPE_BIGINT;
      pCtx->param[3].i64Key = functionId;
      pCtx->param[3].nType = TSDB_DATA_TYPE_BIGINT;
2032

2033 2034
      pCtx->param[1].i64Key = pQuery->order.orderColId;
    }
2035

2036 2037
    if (i > 0) {
      pRuntimeEnv->offset[i] = pRuntimeEnv->offset[i - 1] + pRuntimeEnv->pCtx[i - 1].outputBytes;
H
Haojun Liao 已提交
2038
      pRuntimeEnv->rowCellInfoOffset[i] = pRuntimeEnv->rowCellInfoOffset[i - 1] + sizeof(SResultRowCellInfo) + pQuery->pExpr1[i - 1].interBytes;
2039
    }
H
Haojun Liao 已提交
2040

2041
  }
2042

2043 2044
  *(int64_t*) pRuntimeEnv->prevRow[0] = INT64_MIN;

2045
  // if it is group by normal column, do not set output buffer, the output buffer is pResult
2046 2047
  // fixed output query/multi-output query for normal table
  if (!pRuntimeEnv->groupbyNormalCol && !pRuntimeEnv->stableQuery && !QUERY_IS_INTERVAL_QUERY(pRuntimeEnv->pQuery)) {
H
Haojun Liao 已提交
2048
    resetDefaultResInfoOutputBuf(pRuntimeEnv);
2049
  }
2050

H
Haojun Liao 已提交
2051 2052 2053
  if (setCtxTagColumnInfo(pRuntimeEnv, pRuntimeEnv->pCtx) != TSDB_CODE_SUCCESS) {
    goto _clean;
  }
2054

H
Haojun Liao 已提交
2055
  qDebug("QInfo:%p init runtime completed", GET_QINFO_ADDR(pRuntimeEnv));
2056
  return TSDB_CODE_SUCCESS;
2057

2058
_clean:
S
TD-1848  
Shengliang Guan 已提交
2059
  tfree(pRuntimeEnv->pCtx);
H
Haojun Liao 已提交
2060 2061
  tfree(pRuntimeEnv->offset);
  tfree(pRuntimeEnv->rowCellInfoOffset);
2062

2063
  return TSDB_CODE_QRY_OUT_OF_MEMORY;
2064 2065
}

H
Haojun Liao 已提交
2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078
static void doFreeQueryHandle(SQInfo* pQInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;

  tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
  tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);

  pRuntimeEnv->pQueryHandle = NULL;
  pRuntimeEnv->pSecQueryHandle = NULL;

  SMemRef* pMemRef = &pQInfo->memRef;
  assert(pMemRef->ref == 0 && pMemRef->imem == NULL && pMemRef->mem == NULL);
}

2079 2080 2081 2082
static void teardownQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv) {
  if (pRuntimeEnv->pQuery == NULL) {
    return;
  }
2083

2084
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
2085
  SQInfo* pQInfo = (SQInfo*) GET_QINFO_ADDR(pRuntimeEnv);
2086

2087
  qDebug("QInfo:%p teardown runtime env", pQInfo);
H
Haojun Liao 已提交
2088
  cleanupResultRowInfo(&pRuntimeEnv->windowResInfo);
2089

2090
  if (pRuntimeEnv->pCtx != NULL) {
2091
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2092
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
2093

2094 2095 2096
      for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
        tVariantDestroy(&pCtx->param[j]);
      }
2097

2098
      tVariantDestroy(&pCtx->tag);
S
TD-1848  
Shengliang Guan 已提交
2099
      tfree(pCtx->tagInfo.pTagCtxList);
2100
    }
2101

S
TD-1848  
Shengliang Guan 已提交
2102
    tfree(pRuntimeEnv->pCtx);
2103
  }
2104

2105
  pRuntimeEnv->pFillInfo = taosDestroyFillInfo(pRuntimeEnv->pFillInfo);
2106

H
Haojun Liao 已提交
2107
  destroyResultBuf(pRuntimeEnv->pResultBuf);
H
Haojun Liao 已提交
2108
  doFreeQueryHandle(pQInfo);
2109

H
Haojun Liao 已提交
2110
  pRuntimeEnv->pTsBuf = tsBufDestroy(pRuntimeEnv->pTsBuf);
H
Haojun Liao 已提交
2111 2112

  tfree(pRuntimeEnv->offset);
S
TD-1848  
Shengliang Guan 已提交
2113 2114
  tfree(pRuntimeEnv->keyBuf);
  tfree(pRuntimeEnv->rowCellInfoOffset);
2115
  tfree(pRuntimeEnv->prevRow);
H
Haojun Liao 已提交
2116

H
Haojun Liao 已提交
2117 2118
  taosHashCleanup(pRuntimeEnv->pResultRowHashTable);
  pRuntimeEnv->pResultRowHashTable = NULL;
2119

H
Haojun Liao 已提交
2120
  pRuntimeEnv->pool = destroyResultRowPool(pRuntimeEnv->pool);
2121 2122
}

2123 2124 2125 2126
static bool needBuildResAfterQueryComplete(SQInfo* pQInfo) {
  return pQInfo->rspContext != NULL;
}

H
Haojun Liao 已提交
2127
#define IS_QUERY_KILLED(_q) ((_q)->code == TSDB_CODE_TSC_QUERY_CANCELLED)
2128

2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147
static bool isQueryKilled(SQInfo *pQInfo) {
  if (IS_QUERY_KILLED(pQInfo)) {
    return true;
  }

  // query has been executed more than tsShellActivityTimer, and the retrieve has not arrived
  // abort current query execution.
  if (pQInfo->owner != 0 && ((taosGetTimestampSec() - pQInfo->startExecTs) > getMaximumIdleDurationSec()) &&
      (!needBuildResAfterQueryComplete(pQInfo))) {

    assert(pQInfo->startExecTs != 0);
    qDebug("QInfo:%p retrieve not arrive beyond %d sec, abort current query execution, start:%"PRId64", current:%d", pQInfo, 1,
           pQInfo->startExecTs, taosGetTimestampSec());
    return true;
  }

  return false;
}

H
Haojun Liao 已提交
2148
static void setQueryKilled(SQInfo *pQInfo) { pQInfo->code = TSDB_CODE_TSC_QUERY_CANCELLED;}
H
hjxilinx 已提交
2149

H
Haojun Liao 已提交
2150 2151 2152
static bool isFixedOutputQuery(SQueryRuntimeEnv* pRuntimeEnv) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
2153 2154
    return false;
  }
2155

2156
  // Note:top/bottom query is fixed output query
H
Haojun Liao 已提交
2157
  if (pRuntimeEnv->topBotQuery || pRuntimeEnv->groupbyNormalCol) {
2158 2159
    return true;
  }
2160

2161
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2162
    SSqlFuncMsg *pExprMsg = &pQuery->pExpr1[i].base;
2163

2164 2165
    // ignore the ts_comp function
    if (i == 0 && pExprMsg->functionId == TSDB_FUNC_PRJ && pExprMsg->numOfParams == 1 &&
2166
        pExprMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
2167 2168
      continue;
    }
2169

2170 2171 2172
    if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
2173

2174 2175 2176 2177
    if (!IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus)) {
      return true;
    }
  }
2178

2179 2180 2181
  return false;
}

2182
// todo refactor with isLastRowQuery
2183
bool isPointInterpoQuery(SQuery *pQuery) {
2184
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2185
    int32_t functionID = pQuery->pExpr1[i].base.functionId;
2186
    if (functionID == TSDB_FUNC_INTERP) {
2187 2188 2189
      return true;
    }
  }
2190

2191 2192 2193 2194
  return false;
}

// TODO REFACTOR:MERGE WITH CLIENT-SIDE FUNCTION
H
hjxilinx 已提交
2195
static bool isSumAvgRateQuery(SQuery *pQuery) {
2196
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2197
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
2198 2199 2200
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
2201

2202 2203 2204 2205 2206
    if (functionId == TSDB_FUNC_SUM_RATE || functionId == TSDB_FUNC_SUM_IRATE || functionId == TSDB_FUNC_AVG_RATE ||
        functionId == TSDB_FUNC_AVG_IRATE) {
      return true;
    }
  }
2207

2208 2209 2210
  return false;
}

H
hjxilinx 已提交
2211
static bool isFirstLastRowQuery(SQuery *pQuery) {
2212
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2213
    int32_t functionID = pQuery->pExpr1[i].base.functionId;
2214 2215 2216 2217
    if (functionID == TSDB_FUNC_LAST_ROW) {
      return true;
    }
  }
2218

2219 2220 2221
  return false;
}

H
hjxilinx 已提交
2222
static bool needReverseScan(SQuery *pQuery) {
2223
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2224
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
2225 2226 2227
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG) {
      continue;
    }
2228

2229
    if ((functionId == TSDB_FUNC_FIRST || functionId == TSDB_FUNC_FIRST_DST) && !QUERY_IS_ASC_QUERY(pQuery)) {
2230 2231
      return true;
    }
2232 2233

    if (functionId == TSDB_FUNC_LAST || functionId == TSDB_FUNC_LAST_DST) {
H
Haojun Liao 已提交
2234
      // the scan order to acquire the last result of the specified column
H
Haojun Liao 已提交
2235
      int32_t order = (int32_t)pQuery->pExpr1[i].base.arg->argValue.i64;
H
Haojun Liao 已提交
2236 2237 2238
      if (order != pQuery->order.order) {
        return true;
      }
2239
    }
2240
  }
2241

2242 2243
  return false;
}
H
hjxilinx 已提交
2244

H
Haojun Liao 已提交
2245 2246 2247 2248
/**
 * The following 4 kinds of query are treated as the tags query
 * tagprj, tid_tag query, count(tbname), 'abc' (user defined constant value column) query
 */
H
hjxilinx 已提交
2249 2250
static bool onlyQueryTags(SQuery* pQuery) {
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2251
    SExprInfo* pExprInfo = &pQuery->pExpr1[i];
H
Haojun Liao 已提交
2252 2253

    int32_t functionId = pExprInfo->base.functionId;
H
Haojun Liao 已提交
2254 2255 2256 2257

    if (functionId != TSDB_FUNC_TAGPRJ &&
        functionId != TSDB_FUNC_TID_TAG &&
        (!(functionId == TSDB_FUNC_COUNT && pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX)) &&
Y
TD-1230  
yihaoDeng 已提交
2258
        (!(functionId == TSDB_FUNC_PRJ && TSDB_COL_IS_UD_COL(pExprInfo->base.colInfo.flag)))) {
H
hjxilinx 已提交
2259 2260 2261
      return false;
    }
  }
2262

H
hjxilinx 已提交
2263 2264 2265
  return true;
}

2266 2267
/////////////////////////////////////////////////////////////////////////////////////////////

H
Haojun Liao 已提交
2268
void getAlignQueryTimeWindow(SQuery *pQuery, int64_t key, int64_t keyFirst, int64_t keyLast, STimeWindow *win) {
2269 2270
  assert(key >= keyFirst && key <= keyLast && pQuery->interval.sliding <= pQuery->interval.interval);
  win->skey = taosTimeTruncate(key, &pQuery->interval, pQuery->precision);
H
Haojun Liao 已提交
2271 2272

  /*
2273
   * if the realSkey > INT64_MAX - pQuery->interval.interval, the query duration between
H
Haojun Liao 已提交
2274 2275
   * realSkey and realEkey must be less than one interval.Therefore, no need to adjust the query ranges.
   */
2276 2277
  if (keyFirst > (INT64_MAX - pQuery->interval.interval)) {
    assert(keyLast - keyFirst < pQuery->interval.interval);
2278
    win->ekey = INT64_MAX;
2279 2280
  } else if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
    win->ekey = taosTimeAdd(win->skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
H
Haojun Liao 已提交
2281
  } else {
2282
    win->ekey = win->skey + pQuery->interval.interval - 1;
2283 2284 2285 2286 2287
  }
}

static void setScanLimitationByResultBuffer(SQuery *pQuery) {
  if (isTopBottomQuery(pQuery)) {
2288
    pQuery->checkBuffer = 0;
2289
  } else if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
2290
    pQuery->checkBuffer = 0;
2291 2292
  } else {
    bool hasMultioutput = false;
2293
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2294
      SSqlFuncMsg *pExprMsg = &pQuery->pExpr1[i].base;
2295 2296 2297
      if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        continue;
      }
2298

2299 2300 2301 2302 2303
      hasMultioutput = IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus);
      if (!hasMultioutput) {
        break;
      }
    }
2304

2305
    pQuery->checkBuffer = hasMultioutput ? 1 : 0;
2306 2307 2308 2309 2310 2311
  }
}

/*
 * todo add more parameters to check soon..
 */
2312
bool colIdCheck(SQuery *pQuery) {
2313 2314
  // load data column information is incorrect
  for (int32_t i = 0; i < pQuery->numOfCols - 1; ++i) {
2315
    if (pQuery->colList[i].colId == pQuery->colList[i + 1].colId) {
S
slguan 已提交
2316
      qError("QInfo:%p invalid data load column for query", GET_QINFO_ADDR(pQuery));
2317 2318 2319
      return false;
    }
  }
2320

2321 2322 2323 2324 2325 2326
  return true;
}

// todo ignore the avg/sum/min/max/count/stddev/top/bottom functions, of which
// the scan order is not matter
static bool onlyOneQueryType(SQuery *pQuery, int32_t functId, int32_t functIdDst) {
2327
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2328
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
2329

2330 2331 2332 2333
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG ||
        functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
2334

2335 2336 2337 2338
    if (functionId != functId && functionId != functIdDst) {
      return false;
    }
  }
2339

2340 2341 2342 2343 2344 2345 2346
  return true;
}

static bool onlyFirstQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_FIRST, TSDB_FUNC_FIRST_DST); }

static bool onlyLastQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_LAST, TSDB_FUNC_LAST_DST); }

H
Haojun Liao 已提交
2347
// todo refactor, add iterator
2348 2349
static void doExchangeTimeWindow(SQInfo* pQInfo, STimeWindow* win) {
  size_t t = taosArrayGetSize(pQInfo->tableGroupInfo.pGroupList);
H
Haojun Liao 已提交
2350
  for(int32_t i = 0; i < t; ++i) {
2351
    SArray* p1 = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, i);
H
Haojun Liao 已提交
2352 2353 2354

    size_t len = taosArrayGetSize(p1);
    for(int32_t j = 0; j < len; ++j) {
2355
      STableKeyInfo* pInfo = taosArrayGet(p1, j);
H
Haojun Liao 已提交
2356

2357 2358 2359 2360
      // update the new lastkey if it is equalled to the value of the old skey
      if (pInfo->lastKey == win->ekey) {
        pInfo->lastKey = win->skey;
      }
H
Haojun Liao 已提交
2361 2362 2363 2364
    }
  }
}

2365
static void changeExecuteScanOrder(SQInfo *pQInfo, SQueryTableMsg* pQueryMsg, bool stableQuery) {
H
Haojun Liao 已提交
2366 2367
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;

2368 2369 2370
  // in case of point-interpolation query, use asc order scan
  char msg[] = "QInfo:%p scan order changed for %s query, old:%d, new:%d, qrange exchanged, old qrange:%" PRId64
               "-%" PRId64 ", new qrange:%" PRId64 "-%" PRId64;
2371

2372 2373
  // todo handle the case the the order irrelevant query type mixed up with order critical query type
  // descending order query for last_row query
H
Haojun Liao 已提交
2374
  if (isFirstLastRowQuery(pQuery)) {
H
Haojun Liao 已提交
2375
    qDebug("QInfo:%p scan order changed for last_row query, old:%d, new:%d", pQInfo, pQuery->order.order, TSDB_ORDER_ASC);
2376

H
Haojun Liao 已提交
2377
    pQuery->order.order = TSDB_ORDER_ASC;
H
Haojun Liao 已提交
2378 2379 2380
    if (pQuery->window.skey > pQuery->window.ekey) {
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
2381

2382 2383
    return;
  }
2384

H
Haojun Liao 已提交
2385
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr) && pQuery->order.order == TSDB_ORDER_DESC) {
H
Haojun Liao 已提交
2386
    pQuery->order.order = TSDB_ORDER_ASC;
H
Haojun Liao 已提交
2387 2388 2389
    if (pQuery->window.skey > pQuery->window.ekey) {
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
H
Haojun Liao 已提交
2390

2391
    doExchangeTimeWindow(pQInfo, &pQuery->window);
H
Haojun Liao 已提交
2392 2393 2394
    return;
  }

2395
  if (isPointInterpoQuery(pQuery) && pQuery->interval.interval == 0) {
H
Haojun Liao 已提交
2396 2397 2398 2399 2400
    if (!QUERY_IS_ASC_QUERY(pQuery)) {
      qDebug(msg, GET_QINFO_ADDR(pQuery), "interp", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
             pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
2401

2402
    pQuery->order.order = TSDB_ORDER_ASC;
2403 2404
    return;
  }
2405

2406
  if (pQuery->interval.interval == 0) {
2407 2408
    if (onlyFirstQuery(pQuery)) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
2409
        qDebug(msg, GET_QINFO_ADDR(pQuery), "only-first", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
2410 2411
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2412
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2413
        doExchangeTimeWindow(pQInfo, &pQuery->window);
2414
      }
2415

2416
      pQuery->order.order = TSDB_ORDER_ASC;
2417 2418
    } else if (onlyLastQuery(pQuery)) {
      if (QUERY_IS_ASC_QUERY(pQuery)) {
2419
        qDebug(msg, GET_QINFO_ADDR(pQuery), "only-last", pQuery->order.order, TSDB_ORDER_DESC, pQuery->window.skey,
2420 2421
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2422
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2423
        doExchangeTimeWindow(pQInfo, &pQuery->window);
2424
      }
2425

2426
      pQuery->order.order = TSDB_ORDER_DESC;
2427
    }
2428

2429
  } else {  // interval query
2430
    if (stableQuery) {
2431 2432
      if (onlyFirstQuery(pQuery)) {
        if (!QUERY_IS_ASC_QUERY(pQuery)) {
2433
          qDebug(msg, GET_QINFO_ADDR(pQuery), "only-first stable", pQuery->order.order, TSDB_ORDER_ASC,
2434 2435
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2436
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2437
          doExchangeTimeWindow(pQInfo, &pQuery->window);
2438
        }
2439

2440
        pQuery->order.order = TSDB_ORDER_ASC;
2441 2442
      } else if (onlyLastQuery(pQuery)) {
        if (QUERY_IS_ASC_QUERY(pQuery)) {
2443
          qDebug(msg, GET_QINFO_ADDR(pQuery), "only-last stable", pQuery->order.order, TSDB_ORDER_DESC,
2444 2445
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2446
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2447
          doExchangeTimeWindow(pQInfo, &pQuery->window);
2448
        }
2449

2450
        pQuery->order.order = TSDB_ORDER_DESC;
2451 2452 2453 2454 2455 2456 2457 2458
      }
    }
  }
}

static int32_t getInitialPageNum(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  int32_t INITIAL_RESULT_ROWS_VALUE = 16;
2459

2460
  int32_t num = 0;
2461

2462 2463
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
    num = 128;
H
Haojun Liao 已提交
2464
  } else if (QUERY_IS_INTERVAL_QUERY(pQuery)) {  // time window query, allocate one page for each table
2465
    size_t s = pQInfo->tableqinfoGroupInfo.numOfTables;
S
TD-1057  
Shengliang Guan 已提交
2466
    num = (int32_t)(MAX(s, INITIAL_RESULT_ROWS_VALUE));
2467 2468
  } else {    // for super table query, one page for each subset
    num = 1;  // pQInfo->pSidSet->numOfSubSet;
2469
  }
2470

2471 2472 2473 2474
  assert(num > 0);
  return num;
}

2475 2476
static void getIntermediateBufInfo(SQueryRuntimeEnv* pRuntimeEnv, int32_t* ps, int32_t* rowsize) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
2477
  int32_t MIN_ROWS_PER_PAGE = 4;
2478

S
TD-1057  
Shengliang Guan 已提交
2479
  *rowsize = (int32_t)(pQuery->rowSize * GET_ROW_PARAM_FOR_MULTIOUTPUT(pQuery, pRuntimeEnv->topBotQuery, pRuntimeEnv->stableQuery));
2480 2481 2482 2483
  int32_t overhead = sizeof(tFilePage);

  // one page contains at least two rows
  *ps = DEFAULT_INTERN_BUF_PAGE_SIZE;
H
Haojun Liao 已提交
2484
  while(((*rowsize) * MIN_ROWS_PER_PAGE) > (*ps) - overhead) {
2485 2486 2487 2488
    *ps = (*ps << 1u);
  }

  pRuntimeEnv->numOfRowsPerPage = ((*ps) - sizeof(tFilePage)) / (*rowsize);
2489
  assert(pRuntimeEnv->numOfRowsPerPage <= MAX_ROWS_PER_RESBUF_PAGE);
2490 2491
}

H
Haojun Liao 已提交
2492
#define IS_PREFILTER_TYPE(_t) ((_t) != TSDB_DATA_TYPE_BINARY && (_t) != TSDB_DATA_TYPE_NCHAR)
2493

H
Haojun Liao 已提交
2494 2495 2496 2497
static bool needToLoadDataBlock(SQueryRuntimeEnv* pRuntimeEnv, SDataStatis *pDataStatis, SQLFunctionCtx *pCtx,
    int32_t numOfRows) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
  if (pDataStatis == NULL || (pQuery->numOfFilterCols == 0 && (!pRuntimeEnv->topBotQuery))) {
2498 2499 2500 2501 2502
    return true;
  }

  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
2503

H
Haojun Liao 已提交
2504 2505 2506 2507 2508 2509 2510 2511
    int32_t index = -1;
    for(int32_t i = 0; i < pQuery->numOfCols; ++i) {
      if (pDataStatis[i].colId == pFilterInfo->info.colId) {
        index = i;
        break;
      }
    }

2512
    // no statistics data, load the true data block
H
Haojun Liao 已提交
2513
    if (index == -1) {
H
Haojun Liao 已提交
2514
      return true;
2515
    }
2516

2517
    // not support pre-filter operation on binary/nchar data type
H
Haojun Liao 已提交
2518
    if (!IS_PREFILTER_TYPE(pFilterInfo->info.type)) {
H
Haojun Liao 已提交
2519
      return true;
2520
    }
2521

2522
    // all data in current column are NULL, no need to check its boundary value
H
Haojun Liao 已提交
2523
    if (pDataStatis[index].numOfNull == numOfRows) {
2524 2525 2526 2527 2528 2529 2530 2531 2532

      // if isNULL query exists, load the null data column
      for (int32_t j = 0; j < pFilterInfo->numOfFilters; ++j) {
        SColumnFilterElem *pFilterElem = &pFilterInfo->pFilters[j];
        if (pFilterElem->fp == isNull_filter) {
          return true;
        }
      }

2533 2534
      continue;
    }
2535

H
Haojun Liao 已提交
2536 2537 2538
    SDataStatis* pDataBlockst = &pDataStatis[index];

    if (pFilterInfo->info.type == TSDB_DATA_TYPE_FLOAT) {
S
TD-1057  
Shengliang Guan 已提交
2539 2540
      float minval = (float)(*(double *)(&pDataBlockst->min));
      float maxval = (float)(*(double *)(&pDataBlockst->max));
2541

2542 2543 2544 2545 2546 2547 2548
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&minval, (char *)&maxval)) {
          return true;
        }
      }
    } else {
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
H
Haojun Liao 已提交
2549
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&pDataBlockst->min, (char *)&pDataBlockst->max)) {
2550 2551 2552 2553 2554
          return true;
        }
      }
    }
  }
2555

H
Haojun Liao 已提交
2556 2557
  if (pRuntimeEnv->topBotQuery) {
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2558
      int32_t functionId = pQuery->pExpr1[i].base.functionId;
H
Haojun Liao 已提交
2559 2560 2561 2562 2563
      if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
        return topbot_datablock_filter(&pCtx[i], functionId, (char *)&pDataStatis[i].min, (char *)&pDataStatis[i].max);
      }
    }
  }
2564

H
Haojun Liao 已提交
2565
  return false;
2566 2567
}

H
Haojun Liao 已提交
2568 2569 2570 2571 2572 2573 2574 2575
static bool overlapWithTimeWindow(SQuery* pQuery, SDataBlockInfo* pBlockInfo) {
  STimeWindow w = {0};

  TSKEY sk = MIN(pQuery->window.skey, pQuery->window.ekey);
  TSKEY ek = MAX(pQuery->window.skey, pQuery->window.ekey);

  if (QUERY_IS_ASC_QUERY(pQuery)) {
    getAlignQueryTimeWindow(pQuery, pBlockInfo->window.skey, sk, ek, &w);
H
Haojun Liao 已提交
2576
    assert(w.ekey >= pBlockInfo->window.skey);
H
Haojun Liao 已提交
2577

H
Haojun Liao 已提交
2578
    if (w.ekey < pBlockInfo->window.ekey) {
H
Haojun Liao 已提交
2579 2580 2581 2582
      return true;
    }

    while(1) {
H
Haojun Liao 已提交
2583
      getNextTimeWindow(pQuery, &w);
H
Haojun Liao 已提交
2584
      if (w.skey > pBlockInfo->window.ekey) {
H
Haojun Liao 已提交
2585 2586 2587
        break;
      }

H
Haojun Liao 已提交
2588 2589
      assert(w.ekey > pBlockInfo->window.ekey);
      if (w.skey <= pBlockInfo->window.ekey && w.skey > pBlockInfo->window.skey) {
H
Haojun Liao 已提交
2590 2591 2592 2593 2594
        return true;
      }
    }
  } else {
    getAlignQueryTimeWindow(pQuery, pBlockInfo->window.ekey, sk, ek, &w);
H
Haojun Liao 已提交
2595 2596 2597
    assert(w.skey <= pBlockInfo->window.ekey);

    if (w.skey > pBlockInfo->window.skey) {
H
Haojun Liao 已提交
2598 2599 2600 2601
      return true;
    }

    while(1) {
H
Haojun Liao 已提交
2602
      getNextTimeWindow(pQuery, &w);
H
Haojun Liao 已提交
2603 2604 2605 2606
      if (w.ekey < pBlockInfo->window.skey) {
        break;
      }

H
Haojun Liao 已提交
2607 2608
      assert(w.skey < pBlockInfo->window.skey);
      if (w.ekey < pBlockInfo->window.ekey && w.ekey >= pBlockInfo->window.skey) {
H
Haojun Liao 已提交
2609 2610 2611 2612 2613 2614 2615 2616
        return true;
      }
    }
  }

  return false;
}

H
Haojun Liao 已提交
2617
int32_t loadDataBlockOnDemand(SQueryRuntimeEnv *pRuntimeEnv, SResultRowInfo * pWindowResInfo, void* pQueryHandle, SDataBlockInfo* pBlockInfo, SDataStatis **pStatis, SArray** pDataBlock, uint32_t* status) {
H
Haojun Liao 已提交
2618 2619
  *status = BLK_DATA_NO_NEEDED;

2620 2621 2622
  SQuery *pQuery = pRuntimeEnv->pQuery;
  SQueryCostInfo* pCost = &pRuntimeEnv->summary;

H
Haojun Liao 已提交
2623
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTsBuf > 0) {
2624
    *status = BLK_DATA_ALL_NEEDED;
H
Haojun Liao 已提交
2625
  } else { // check if this data block is required to load
H
Haojun Liao 已提交
2626

H
Haojun Liao 已提交
2627
    // Calculate all time windows that are overlapping or contain current data block.
2628
    // If current data block is contained by all possible time window, do not load current data block.
H
Haojun Liao 已提交
2629
    if (QUERY_IS_INTERVAL_QUERY(pQuery) && overlapWithTimeWindow(pQuery, pBlockInfo)) {
2630
      *status = BLK_DATA_ALL_NEEDED;
2631
    }
2632

2633
    if ((*status) != BLK_DATA_ALL_NEEDED) {
H
Haojun Liao 已提交
2634 2635 2636 2637
      // the pCtx[i] result is belonged to previous time window since the outputBuf has not been set yet,
      // the filter result may be incorrect. So in case of interval query, we need to set the correct time output buffer
      if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
        bool hasTimeWindow = false;
2638 2639
        SResultRow* pResult = NULL;

H
Haojun Liao 已提交
2640 2641 2642 2643
        bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);

        TSKEY k = QUERY_IS_ASC_QUERY(pQuery)? pBlockInfo->window.skey:pBlockInfo->window.ekey;
        STimeWindow win = getActiveTimeWindow(pWindowResInfo, k, pQuery);
2644
        if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pBlockInfo, &win, masterScan, &hasTimeWindow, &pResult) !=
H
Haojun Liao 已提交
2645 2646 2647 2648 2649
            TSDB_CODE_SUCCESS) {
          // todo handle error in set result for timewindow
        }
      }

H
Haojun Liao 已提交
2650
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2651
        SSqlFuncMsg* pSqlFunc = &pQuery->pExpr1[i].base;
H
Haojun Liao 已提交
2652 2653 2654

        int32_t functionId = pSqlFunc->functionId;
        int32_t colId = pSqlFunc->colInfo.colId;
2655 2656
        (*status) |= aAggs[functionId].dataReqFunc(&pRuntimeEnv->pCtx[i], pBlockInfo->window.skey, pBlockInfo->window.ekey, colId);
        if (((*status) & BLK_DATA_ALL_NEEDED) == BLK_DATA_ALL_NEEDED) {
H
Haojun Liao 已提交
2657 2658 2659
          break;
        }
      }
2660 2661
    }
  }
2662

2663
  if ((*status) == BLK_DATA_NO_NEEDED) {
H
Haojun Liao 已提交
2664 2665
    qDebug("QInfo:%p data block discard, brange:%"PRId64 "-%"PRId64", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
           pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
2666
    pCost->discardBlocks += 1;
2667 2668 2669 2670
  } else if ((*status) == BLK_DATA_STATIS_NEEDED) {

    // this function never returns error?
    tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis);
2671
    pCost->loadBlockStatis += 1;
2672

2673
    if (*pStatis == NULL) { // data block statistics does not exist, load data block
H
Haojun Liao 已提交
2674
      *pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2675
      pCost->totalCheckedRows += pBlockInfo->rows;
2676 2677
    }
  } else {
2678
    assert((*status) == BLK_DATA_ALL_NEEDED);
2679

2680
    // load the data block statistics to perform further filter
2681
    pCost->loadBlockStatis += 1;
2682
    tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis);
2683

H
Haojun Liao 已提交
2684
    if (!needToLoadDataBlock(pRuntimeEnv, *pStatis, pRuntimeEnv->pCtx, pBlockInfo->rows)) {
2685
      // current block has been discard due to filter applied
2686
      pCost->discardBlocks += 1;
H
Haojun Liao 已提交
2687 2688
      qDebug("QInfo:%p data block discard, brange:%"PRId64 "-%"PRId64", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
          pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
2689
      (*status) = BLK_DATA_DISCARD;
2690
    }
2691

2692 2693
    pCost->totalCheckedRows += pBlockInfo->rows;
    pCost->loadBlocks += 1;
H
Haojun Liao 已提交
2694
    *pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2695 2696 2697
    if (*pDataBlock == NULL) {
      return terrno;
    }
2698
  }
2699

H
Haojun Liao 已提交
2700
  return TSDB_CODE_SUCCESS;
2701 2702
}

H
hjxilinx 已提交
2703
int32_t binarySearchForKey(char *pValue, int num, TSKEY key, int order) {
2704
  int32_t midPos = -1;
H
Haojun Liao 已提交
2705
  int32_t numOfRows;
2706

2707 2708 2709
  if (num <= 0) {
    return -1;
  }
2710

2711
  assert(order == TSDB_ORDER_ASC || order == TSDB_ORDER_DESC);
2712 2713

  TSKEY * keyList = (TSKEY *)pValue;
2714
  int32_t firstPos = 0;
2715
  int32_t lastPos = num - 1;
2716

2717
  if (order == TSDB_ORDER_DESC) {
H
hjxilinx 已提交
2718 2719 2720 2721 2722
    // find the first position which is smaller than the key
    while (1) {
      if (key >= keyList[lastPos]) return lastPos;
      if (key == keyList[firstPos]) return firstPos;
      if (key < keyList[firstPos]) return firstPos - 1;
2723

H
Haojun Liao 已提交
2724 2725
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2726

H
hjxilinx 已提交
2727 2728 2729 2730 2731 2732 2733 2734
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
2735

H
hjxilinx 已提交
2736 2737 2738 2739 2740
  } else {
    // find the first position which is bigger than the key
    while (1) {
      if (key <= keyList[firstPos]) return firstPos;
      if (key == keyList[lastPos]) return lastPos;
2741

H
hjxilinx 已提交
2742 2743 2744 2745 2746 2747 2748
      if (key > keyList[lastPos]) {
        lastPos = lastPos + 1;
        if (lastPos >= num)
          return -1;
        else
          return lastPos;
      }
2749

H
Haojun Liao 已提交
2750
      numOfRows = lastPos - firstPos + 1;
H
Haojun Liao 已提交
2751
      midPos = (numOfRows >> 1u) + firstPos;
2752

H
hjxilinx 已提交
2753 2754 2755 2756 2757 2758 2759 2760 2761
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
  }
2762

H
hjxilinx 已提交
2763 2764 2765
  return midPos;
}

2766 2767 2768 2769 2770 2771 2772 2773
static void ensureOutputBufferSimple(SQueryRuntimeEnv* pRuntimeEnv, int32_t capacity) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  if (capacity < pQuery->rec.capacity) {
    return;
  }

  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2774
    int32_t bytes = pQuery->pExpr1[i].bytes;
2775 2776 2777
    assert(bytes > 0 && capacity > 0);

    char *tmp = realloc(pQuery->sdata[i], bytes * capacity + sizeof(tFilePage));
H
Haojun Liao 已提交
2778
    if (tmp == NULL) {
H
Haojun Liao 已提交
2779
      longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
2780 2781 2782 2783 2784 2785 2786 2787
    } else {
      pQuery->sdata[i] = (tFilePage *)tmp;
    }

    // set the pCtx output buffer position
    pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data;
  }

2788
  qDebug("QInfo:%p realloc output buffer to inc output buffer from: %" PRId64 " rows to:%d rows", GET_QINFO_ADDR(pRuntimeEnv),
2789 2790 2791 2792 2793
         pQuery->rec.capacity, capacity);

  pQuery->rec.capacity = capacity;
}

2794
// TODO merge with enuserOutputBufferSimple
2795 2796 2797
static void ensureOutputBuffer(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pBlockInfo) {
  // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
  SQuery* pQuery = pRuntimeEnv->pQuery;
2798
  if (!QUERY_IS_INTERVAL_QUERY(pQuery) && !pRuntimeEnv->groupbyNormalCol && !isFixedOutputQuery(pRuntimeEnv) && !isTSCompQuery(pQuery)) {
2799
    SResultRec *pRec = &pQuery->rec;
2800

2801
    if (pQuery->rec.capacity - pQuery->rec.rows < pBlockInfo->rows) {
S
TD-1057  
Shengliang Guan 已提交
2802 2803
      int32_t remain = (int32_t)(pRec->capacity - pRec->rows);
      int32_t newSize = (int32_t)(pRec->capacity + (pBlockInfo->rows - remain));
2804

2805
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2806
        int32_t bytes = pQuery->pExpr1[i].bytes;
H
Haojun Liao 已提交
2807 2808
        assert(bytes > 0 && newSize > 0);

2809
        char *tmp = realloc(pQuery->sdata[i], bytes * newSize + sizeof(tFilePage));
H
Haojun Liao 已提交
2810
        if (tmp == NULL) {
H
Haojun Liao 已提交
2811
          longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
2812
        } else {
2813
          memset(tmp + sizeof(tFilePage) + bytes * pRec->rows, 0, (size_t)((newSize - pRec->rows) * bytes));
2814 2815
          pQuery->sdata[i] = (tFilePage *)tmp;
        }
H
Haojun Liao 已提交
2816

2817 2818
        // set the pCtx output buffer position
        pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data + pRec->rows * bytes;
H
Haojun Liao 已提交
2819

H
Haojun Liao 已提交
2820
        int32_t functionId = pQuery->pExpr1[i].base.functionId;
2821 2822 2823 2824
        if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
          pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
        }
      }
H
Haojun Liao 已提交
2825

2826
      qDebug("QInfo:%p realloc output buffer, new size: %d rows, old:%" PRId64 ", remain:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv),
2827
             newSize, pRec->capacity, newSize - pRec->rows);
2828

2829 2830 2831 2832 2833
      pRec->capacity = newSize;
    }
  }
}

2834 2835 2836 2837 2838
static void doSetInitialTimewindow(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pBlockInfo) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  if (QUERY_IS_INTERVAL_QUERY(pQuery) && pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL) {
    STimeWindow w = TSWINDOW_INITIALIZER;
H
Haojun Liao 已提交
2839
    SResultRowInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854

    if (QUERY_IS_ASC_QUERY(pQuery)) {
      getAlignQueryTimeWindow(pQuery, pBlockInfo->window.skey, pBlockInfo->window.skey, pQuery->window.ekey, &w);
      pWindowResInfo->startTime = w.skey;
      pWindowResInfo->prevSKey = w.skey;
    } else {
      // the start position of the first time window in the endpoint that spreads beyond the queried last timestamp
      getAlignQueryTimeWindow(pQuery, pBlockInfo->window.ekey, pQuery->window.ekey, pBlockInfo->window.ekey, &w);

      pWindowResInfo->startTime = pQuery->window.skey;
      pWindowResInfo->prevSKey = w.skey;
    }
  }
}

2855 2856
static int64_t doScanAllDataBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
2857
  STableQueryInfo* pTableQueryInfo = pQuery->current;
H
Haojun Liao 已提交
2858
  SQueryCostInfo*  summary  = &pRuntimeEnv->summary;
2859

2860
  qDebug("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", lastkey:%" PRId64 ", order:%d",
H
hjxilinx 已提交
2861 2862
         GET_QINFO_ADDR(pRuntimeEnv), pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, pTableQueryInfo->lastKey,
         pQuery->order.order);
2863

2864
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
H
Haojun Liao 已提交
2865
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
2866

H
Haojun Liao 已提交
2867
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
2868
  while (tsdbNextDataBlock(pQueryHandle)) {
H
Haojun Liao 已提交
2869
    summary->totalBlocks += 1;
H
Haojun Liao 已提交
2870

2871
    if (isQueryKilled(GET_QINFO_ADDR(pRuntimeEnv))) {
H
Haojun Liao 已提交
2872
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
2873
    }
2874

H
Haojun Liao 已提交
2875
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
2876
    doSetInitialTimewindow(pRuntimeEnv, &blockInfo);
2877

H
hjxilinx 已提交
2878
    // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
2879
    ensureOutputBuffer(pRuntimeEnv, &blockInfo);
2880

2881
    SDataStatis *pStatis = NULL;
2882 2883
    SArray *     pDataBlock = NULL;
    uint32_t     status = 0;
2884

H
Haojun Liao 已提交
2885
    int32_t ret = loadDataBlockOnDemand(pRuntimeEnv, &pRuntimeEnv->windowResInfo, pQueryHandle, &blockInfo, &pStatis, &pDataBlock, &status);
2886
    if (ret != TSDB_CODE_SUCCESS) {
2887 2888 2889
      break;
    }

2890 2891 2892 2893 2894 2895
    if (status == BLK_DATA_DISCARD) {
      pQuery->current->lastKey =
              QUERY_IS_ASC_QUERY(pQuery) ? blockInfo.window.ekey + step : blockInfo.window.skey + step;
      continue;
    }

H
Haojun Liao 已提交
2896 2897
    // query start position can not move into tableApplyFunctionsOnBlock due to limit/offset condition
    pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : blockInfo.rows - 1;
H
hjxilinx 已提交
2898
    int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, binarySearchForKey, pDataBlock);
2899

H
Haojun Liao 已提交
2900
    summary->totalRows += blockInfo.rows;
2901
    qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
2902
           blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes, pQuery->current->lastKey);
2903

2904 2905
    // while the output buffer is full or limit/offset is applied, query may be paused here
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL | QUERY_COMPLETED)) {
H
hjxilinx 已提交
2906
      break;
2907 2908
    }
  }
2909

H
Haojun Liao 已提交
2910 2911 2912 2913
  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }

H
hjxilinx 已提交
2914
  // if the result buffer is not full, set the query complete
2915 2916 2917
  if (!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
2918

H
Haojun Liao 已提交
2919
  if (QUERY_IS_INTERVAL_QUERY(pQuery) && (IS_MASTER_SCAN(pRuntimeEnv)|| pRuntimeEnv->scanFlag == REPEAT_SCAN)) {
H
hjxilinx 已提交
2920
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
H
Haojun Liao 已提交
2921
      closeAllResultRows(&pRuntimeEnv->windowResInfo);
H
hjxilinx 已提交
2922
      pRuntimeEnv->windowResInfo.curIndex = pRuntimeEnv->windowResInfo.size - 1;  // point to the last time window
2923 2924 2925 2926
    } else {
      assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
    }
  }
2927

2928
  return 0;
2929 2930 2931 2932 2933 2934
}

/*
 * set tag value in SQLFunctionCtx
 * e.g.,tag information into input buffer
 */
2935
static void doSetTagValueInParam(void *tsdb, void* pTable, int32_t tagColId, tVariant *tag, int16_t type, int16_t bytes) {
H
[td-90]  
Haojun Liao 已提交
2936
  tVariantDestroy(tag);
2937

2938
  if (tagColId == TSDB_TBNAME_COLUMN_INDEX) {
2939
    char* val = tsdbGetTableName(pTable);
H
[td-90]  
Haojun Liao 已提交
2940
    assert(val != NULL);
2941

H
[td-90]  
Haojun Liao 已提交
2942
    tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), TSDB_DATA_TYPE_BINARY);
2943
  } else {
2944
    char* val = tsdbGetTableTagVal(pTable, tagColId, type, bytes);
H
[td-90]  
Haojun Liao 已提交
2945 2946 2947 2948
    if (val == NULL) {
      tag->nType = TSDB_DATA_TYPE_NULL;
      return;
    }
2949

H
hjxilinx 已提交
2950
    if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
H
Hongze Cheng 已提交
2951
      if (isNull(val, type)) {
H
Haojun Liao 已提交
2952 2953 2954 2955
        tag->nType = TSDB_DATA_TYPE_NULL;
        return;
      }

H
[td-90]  
Haojun Liao 已提交
2956
      tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), type);
H
hjxilinx 已提交
2957
    } else {
H
Haojun Liao 已提交
2958 2959 2960 2961 2962
      if (isNull(val, type)) {
        tag->nType = TSDB_DATA_TYPE_NULL;
        return;
      }

H
[td-90]  
Haojun Liao 已提交
2963
      tVariantCreateFromBinary(tag, val, bytes, type);
H
hjxilinx 已提交
2964
    }
2965
  }
2966 2967
}

2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979
static SColumnInfo* doGetTagColumnInfoById(SColumnInfo* pTagColList, int32_t numOfTags, int16_t colId) {
  assert(pTagColList != NULL && numOfTags > 0);

  for(int32_t i = 0; i < numOfTags; ++i) {
    if (pTagColList[i].colId == colId) {
      return &pTagColList[i];
    }
  }

  return NULL;
}

2980
void setTagVal(SQueryRuntimeEnv *pRuntimeEnv, void *pTable, void *tsdb) {
2981
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
2982
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
2983

H
Haojun Liao 已提交
2984
  SExprInfo *pExprInfo = &pQuery->pExpr1[0];
H
[td-90]  
Haojun Liao 已提交
2985 2986
  if (pQuery->numOfOutput == 1 && pExprInfo->base.functionId == TSDB_FUNC_TS_COMP) {
    assert(pExprInfo->base.numOfParams == 1);
H
Haojun Liao 已提交
2987

S
TD-1057  
Shengliang Guan 已提交
2988
    int16_t tagColId = (int16_t)pExprInfo->base.arg->argValue.i64;
2989
    SColumnInfo* pColInfo = doGetTagColumnInfoById(pQuery->tagColList, pQuery->numOfTags, tagColId);
H
Haojun Liao 已提交
2990

2991
    doSetTagValueInParam(tsdb, pTable, tagColId, &pRuntimeEnv->pCtx[0].tag, pColInfo->type, pColInfo->bytes);
2992 2993
  } else {
    // set tag value, by which the results are aggregated.
2994
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
H
Haojun Liao 已提交
2995
      SExprInfo* pLocalExprInfo = &pQuery->pExpr1[idx];
2996

2997
      // ts_comp column required the tag value for join filter
H
Haojun Liao 已提交
2998
      if (!TSDB_COL_IS_TAG(pLocalExprInfo->base.colInfo.flag)) {
2999 3000
        continue;
      }
3001

3002
      // todo use tag column index to optimize performance
3003
      doSetTagValueInParam(tsdb, pTable, pLocalExprInfo->base.colInfo.colId, &pRuntimeEnv->pCtx[idx].tag,
H
Haojun Liao 已提交
3004
                           pLocalExprInfo->type, pLocalExprInfo->bytes);
3005
    }
3006

3007
    // set the join tag for first column
H
[td-90]  
Haojun Liao 已提交
3008
    SSqlFuncMsg *pFuncMsg = &pExprInfo->base;
H
Haojun Liao 已提交
3009
    if ((pFuncMsg->functionId == TSDB_FUNC_TS || pFuncMsg->functionId == TSDB_FUNC_PRJ) && pRuntimeEnv->pTsBuf != NULL &&
3010
        pFuncMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
3011
      assert(pFuncMsg->numOfParams == 1);
H
Haojun Liao 已提交
3012

3013 3014
      int16_t      tagColId = (int16_t)pExprInfo->base.arg->argValue.i64;
      SColumnInfo *pColInfo = doGetTagColumnInfoById(pQuery->tagColList, pQuery->numOfTags, tagColId);
H
Haojun Liao 已提交
3015

3016
      doSetTagValueInParam(tsdb, pTable, tagColId, &pRuntimeEnv->pCtx[0].tag, pColInfo->type, pColInfo->bytes);
H
Haojun Liao 已提交
3017

3018 3019 3020 3021 3022 3023 3024 3025
      int16_t tagType = pRuntimeEnv->pCtx[0].tag.nType;
      if (tagType == TSDB_DATA_TYPE_BINARY || tagType == TSDB_DATA_TYPE_NCHAR) {
        qDebug("QInfo:%p set tag value for join comparison, colId:%" PRId64 ", val:%s", pQInfo,
               pExprInfo->base.arg->argValue.i64, pRuntimeEnv->pCtx[0].tag.pz);
      } else {
        qDebug("QInfo:%p set tag value for join comparison, colId:%" PRId64 ", val:%" PRId64, pQInfo,
               pExprInfo->base.arg->argValue.i64, pRuntimeEnv->pCtx[0].tag.i64Key);
      }
3026 3027 3028 3029
    }
  }
}

H
Haojun Liao 已提交
3030
static void doMerge(SQueryRuntimeEnv *pRuntimeEnv, int64_t timestamp, SResultRow *pWindowRes, bool mergeFlag) {
3031 3032
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
3033

3034
  tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes->pageId);
H
Haojun Liao 已提交
3035

3036
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
3037
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
3038 3039 3040
    if (!mergeFlag) {
      pCtx[i].aOutputBuf = pCtx[i].aOutputBuf + pCtx[i].outputBytes;
      pCtx[i].currentStage = FIRST_STAGE_MERGE;
3041

3042
      RESET_RESULT_INFO(pCtx[i].resultInfo);
3043 3044
      aAggs[functionId].init(&pCtx[i]);
    }
3045

3046 3047
    pCtx[i].hasNull = true;
    pCtx[i].nStartQueryTimestamp = timestamp;
H
Haojun Liao 已提交
3048
    pCtx[i].aInputElemBuf = getPosInResultPage(pRuntimeEnv, i, pWindowRes, page);
3049

3050 3051 3052
    // in case of tag column, the tag information should be extracted from input buffer
    if (functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TAG) {
      tVariantDestroy(&pCtx[i].tag);
3053

3054 3055 3056 3057 3058 3059
      int32_t type = pCtx[i].outputType;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        tVariantCreateFromBinary(&pCtx[i].tag, varDataVal(pCtx[i].aInputElemBuf), varDataLen(pCtx[i].aInputElemBuf), type);
      } else {
        tVariantCreateFromBinary(&pCtx[i].tag, pCtx[i].aInputElemBuf, pCtx[i].inputBytes, pCtx[i].inputType);
      }
3060

3061 3062
    }
  }
3063

3064
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
3065
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
3066 3067 3068
    if (functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
3069

3070 3071 3072 3073
    aAggs[functionId].distMergeFunc(&pCtx[i]);
  }
}

3074
static UNUSED_FUNC void printBinaryData(int32_t functionId, char *data, int32_t srcDataType) {
3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_LAST_DST) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_BINARY:
        printf("%" PRId64 ",%s\t", *(TSKEY *)data, (data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int8_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int16_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 ",%" PRId64 "\t", *(TSKEY *)data, *(TSKEY *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int32_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%" PRId64 ",%f\t", *(TSKEY *)data, *(float *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%" PRId64 ",%lf\t", *(TSKEY *)data, *(double *)(data + TSDB_KEYSIZE + 1));
        break;
    }
  } else if (functionId == TSDB_FUNC_AVG) {
    printf("%lf,%d\t", *(double *)data, *(int32_t *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_SPREAD) {
    printf("%lf,%lf\t", *(double *)data, *(double *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_TWA) {
    data += 1;
    printf("%lf,%" PRId64 ",%" PRId64 ",%" PRId64 "\t", *(double *)data, *(int64_t *)(data + 8),
           *(int64_t *)(data + 16), *(int64_t *)(data + 24));
  } else if (functionId == TSDB_FUNC_MIN || functionId == TSDB_FUNC_MAX) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%d\t", *(int8_t *)data);
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%d\t", *(int16_t *)data);
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 "\t", *(int64_t *)data);
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%d\t", *(int *)data);
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%f\t", *(float *)data);
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%f\t", *(float *)data);
        break;
    }
  } else if (functionId == TSDB_FUNC_SUM) {
    if (srcDataType == TSDB_DATA_TYPE_FLOAT || srcDataType == TSDB_DATA_TYPE_DOUBLE) {
      printf("%lf\t", *(float *)data);
    } else {
      printf("%" PRId64 "\t", *(int64_t *)data);
    }
  } else {
    printf("%s\t", data);
  }
}

3143
void UNUSED_FUNC displayInterResult(tFilePage **pdata, SQueryRuntimeEnv* pRuntimeEnv, int32_t numOfRows) {
3144
  SQuery* pQuery = pRuntimeEnv->pQuery;
3145
  int32_t numOfCols = pQuery->numOfOutput;
3146
  printf("super table query intermediate result, total:%d\n", numOfRows);
3147

3148 3149
  for (int32_t j = 0; j < numOfRows; ++j) {
    for (int32_t i = 0; i < numOfCols; ++i) {
3150

H
Haojun Liao 已提交
3151
      switch (pQuery->pExpr1[i].type) {
3152
        case TSDB_DATA_TYPE_BINARY: {
H
Haojun Liao 已提交
3153 3154
          int32_t type = pQuery->pExpr1[i].type;
          printBinaryData(pQuery->pExpr1[i].base.functionId, pdata[i]->data + pQuery->pExpr1[i].bytes * j,
3155 3156 3157 3158 3159
                          type);
          break;
        }
        case TSDB_DATA_TYPE_TIMESTAMP:
        case TSDB_DATA_TYPE_BIGINT:
H
Haojun Liao 已提交
3160
          printf("%" PRId64 "\t", *(int64_t *)(pdata[i]->data + pQuery->pExpr1[i].bytes * j));
3161 3162
          break;
        case TSDB_DATA_TYPE_INT:
H
Haojun Liao 已提交
3163
          printf("%d\t", *(int32_t *)(pdata[i]->data + pQuery->pExpr1[i].bytes * j));
3164 3165
          break;
        case TSDB_DATA_TYPE_FLOAT:
H
Haojun Liao 已提交
3166
          printf("%f\t", *(float *)(pdata[i]->data + pQuery->pExpr1[i].bytes * j));
3167 3168
          break;
        case TSDB_DATA_TYPE_DOUBLE:
H
Haojun Liao 已提交
3169
          printf("%lf\t", *(double *)(pdata[i]->data + pQuery->pExpr1[i].bytes * j));
3170 3171 3172 3173 3174 3175 3176 3177
          break;
      }
    }
    printf("\n");
  }
}

typedef struct SCompSupporter {
H
hjxilinx 已提交
3178 3179 3180
  STableQueryInfo **pTableQueryInfo;
  int32_t *         position;
  SQInfo *          pQInfo;
3181 3182 3183 3184 3185
} SCompSupporter;

int32_t tableResultComparFn(const void *pLeft, const void *pRight, void *param) {
  int32_t left = *(int32_t *)pLeft;
  int32_t right = *(int32_t *)pRight;
3186

3187 3188
  SCompSupporter *  supporter = (SCompSupporter *)param;
  SQueryRuntimeEnv *pRuntimeEnv = &supporter->pQInfo->runtimeEnv;
3189

3190 3191
  int32_t leftPos = supporter->position[left];
  int32_t rightPos = supporter->position[right];
3192

3193 3194 3195 3196
  /* left source is exhausted */
  if (leftPos == -1) {
    return 1;
  }
3197

3198 3199 3200 3201
  /* right source is exhausted*/
  if (rightPos == -1) {
    return -1;
  }
3202

H
Haojun Liao 已提交
3203
  SResultRowInfo *pWindowResInfo1 = &supporter->pTableQueryInfo[left]->windowResInfo;
3204
  SResultRow * pWindowRes1 = getResultRow(pWindowResInfo1, leftPos);
3205
  tFilePage *page1 = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes1->pageId);
3206

H
Haojun Liao 已提交
3207
  char *b1 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes1, page1);
3208
  TSKEY leftTimestamp = GET_INT64_VAL(b1);
3209

H
Haojun Liao 已提交
3210
  SResultRowInfo *pWindowResInfo2 = &supporter->pTableQueryInfo[right]->windowResInfo;
3211
  SResultRow * pWindowRes2 = getResultRow(pWindowResInfo2, rightPos);
3212
  tFilePage *page2 = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes2->pageId);
3213

H
Haojun Liao 已提交
3214
  char *b2 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes2, page2);
3215
  TSKEY rightTimestamp = GET_INT64_VAL(b2);
3216

3217 3218 3219
  if (leftTimestamp == rightTimestamp) {
    return 0;
  }
3220

3221 3222 3223
  return leftTimestamp > rightTimestamp ? 1 : -1;
}

3224
int32_t mergeIntoGroupResult(SQInfo *pQInfo) {
H
Haojun Liao 已提交
3225
  int64_t st = taosGetTimestampUs();
3226
  int32_t ret = TSDB_CODE_SUCCESS;
3227

S
TD-1057  
Shengliang Guan 已提交
3228
  int32_t numOfGroups = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
3229

3230
  while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
3231
    SArray *group = GET_TABLEGROUP(pQInfo, pQInfo->groupIndex);
3232
    ret = mergeIntoGroupResultImpl(pQInfo, group);
3233 3234 3235 3236
    if (ret < 0) {  // not enough disk space to save the data into disk
      return -1;
    }

3237
    pQInfo->groupIndex += 1;
3238 3239

    // this group generates at least one result, return results
3240 3241 3242
    if (ret > 0) {
      break;
    }
3243

H
Haojun Liao 已提交
3244
    assert(pQInfo->groupResInfo.numOfDataPages == 0);
3245
    qDebug("QInfo:%p no result in group %d, continue", pQInfo, pQInfo->groupIndex - 1);
3246
  }
3247

H
Haojun Liao 已提交
3248
  SGroupResInfo* info = &pQInfo->groupResInfo;
3249
  if (pQInfo->groupIndex == numOfGroups && info->pageId == info->numOfDataPages) {
H
Haojun Liao 已提交
3250 3251 3252
    SET_STABLE_QUERY_OVER(pQInfo);
  }

H
Haojun Liao 已提交
3253 3254 3255
  int64_t elapsedTime = taosGetTimestampUs() - st;
  qDebug("QInfo:%p merge res data into group, index:%d, total group:%d, elapsed time:%" PRId64 "us", pQInfo,
         pQInfo->groupIndex - 1, numOfGroups, elapsedTime);
3256

H
Haojun Liao 已提交
3257
  pQInfo->runtimeEnv.summary.firstStageMergeTime += elapsedTime;
3258 3259 3260 3261
  return TSDB_CODE_SUCCESS;
}

void copyResToQueryResultBuf(SQInfo *pQInfo, SQuery *pQuery) {
H
Haojun Liao 已提交
3262 3263 3264
  SGroupResInfo* pGroupResInfo = &pQInfo->groupResInfo;

  // all results have been return to client, try next group
3265
  if (pGroupResInfo->pageId == pGroupResInfo->numOfDataPages) {
H
Haojun Liao 已提交
3266
    pGroupResInfo->numOfDataPages = 0;
3267 3268
    pGroupResInfo->pageId = 0;
    pGroupResInfo->rowId = 0;
3269

3270
    // current results of group has been sent to client, try next group
3271
    if (mergeIntoGroupResult(pQInfo) != TSDB_CODE_SUCCESS) {
3272 3273
      return;  // failed to save data in the disk
    }
3274

3275
    // check if all results has been sent to client
S
TD-1057  
Shengliang Guan 已提交
3276
    int32_t numOfGroup = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
H
Haojun Liao 已提交
3277
    if (pGroupResInfo->numOfDataPages == 0 && pQInfo->groupIndex == numOfGroup) {
H
Haojun Liao 已提交
3278
      SET_STABLE_QUERY_OVER(pQInfo);
3279 3280
      return;
    }
3281
  }
3282 3283

  SQueryRuntimeEnv *   pRuntimeEnv = &pQInfo->runtimeEnv;
3284
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
3285

H
Haojun Liao 已提交
3286 3287
  int32_t id = pQInfo->groupResInfo.groupId;
  SIDList list = getDataBufPagesIdList(pResultBuf, id);
3288

3289
  int32_t offset = 0;
H
Haojun Liao 已提交
3290 3291 3292 3293 3294 3295
  int32_t numOfCopiedRows = 0;

  size_t size = taosArrayGetSize(list);
  assert(size == pGroupResInfo->numOfDataPages);

  bool done = false;
H
Haojun Liao 已提交
3296 3297 3298 3299

  //TODO add API for release none-dirty pages
//  SPageInfo* prev = NULL;

3300
  for (int32_t j = pGroupResInfo->pageId; j < size; ++j) {
H
Haojun Liao 已提交
3301
    SPageInfo* pi = *(SPageInfo**) taosArrayGet(list, j);
H
Haojun Liao 已提交
3302 3303
    tFilePage* pData = getResBufPage(pResultBuf, pi->pageId);

H
Haojun Liao 已提交
3304 3305 3306 3307 3308 3309 3310 3311 3312 3313
    // release previous buffer pages
//    if (prev == NULL) {
//      prev = pi;
//    } else {
//      if (prev->pageId != pi->pageId) {
//        releaseResBufPageInfo(pResultBuf, prev);
//        prev = pi;
//      }
//    }

3314 3315
    assert(pData->num > 0 && pData->num <= pRuntimeEnv->numOfRowsPerPage && pGroupResInfo->rowId < pData->num);
    int32_t numOfRes = (int32_t)(pData->num - pGroupResInfo->rowId);
H
Haojun Liao 已提交
3316 3317

    if (numOfRes > pQuery->rec.capacity - offset) {
3318
      numOfCopiedRows = (int32_t)(pQuery->rec.capacity - offset);
3319
      pGroupResInfo->rowId += numOfCopiedRows;
H
Haojun Liao 已提交
3320 3321
      done = true;
    } else {
3322
      numOfCopiedRows = (int32_t)pData->num;
H
Haojun Liao 已提交
3323

3324 3325
      pGroupResInfo->pageId += 1;
      pGroupResInfo->rowId = 0;
H
Haojun Liao 已提交
3326
    }
3327

3328
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3329
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
3330
      char *  pDest = pQuery->sdata[i]->data;
3331

H
Haojun Liao 已提交
3332 3333
      memcpy(pDest + offset * bytes, pData->data + pRuntimeEnv->offset[i] * pRuntimeEnv->numOfRowsPerPage,
          (size_t)bytes * numOfCopiedRows);
3334
    }
3335

H
Haojun Liao 已提交
3336 3337 3338 3339
    offset += numOfCopiedRows;
    if (done) {
      break;
    }
3340
  }
3341

3342
  assert(pQuery->rec.rows == 0);
H
Haojun Liao 已提交
3343
  pQuery->rec.rows += offset;
3344 3345
}

3346 3347 3348
int64_t getNumOfResultWindowRes(SQueryRuntimeEnv* pRuntimeEnv, SResultRow *pResultRow) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

3349
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
3350
    int32_t functionId = pQuery->pExpr1[j].base.functionId;
3351

3352 3353 3354 3355 3356 3357 3358
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
      continue;
    }
3359

3360
    SResultRowCellInfo *pResultInfo = getResultCell(pRuntimeEnv, pResultRow, j);
H
Haojun Liao 已提交
3361
    assert(pResultInfo != NULL);
3362

H
Haojun Liao 已提交
3363 3364
    if (pResultInfo->numOfRes > 0) {
      return pResultInfo->numOfRes;
3365 3366
    }
  }
3367

H
Haojun Liao 已提交
3368
  return 0;
3369 3370
}

3371
int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *pGroup) {
3372
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
3373
  SQuery *          pQuery = pRuntimeEnv->pQuery;
3374

3375
  size_t size = taosArrayGetSize(pGroup);
3376
  tFilePage **buffer = pQuery->sdata;
3377

H
Haojun Liao 已提交
3378
  int32_t *posList = calloc(size, sizeof(int32_t));
H
hjxilinx 已提交
3379
  STableQueryInfo **pTableList = malloc(POINTER_BYTES * size);
3380

3381
  if (pTableList == NULL || posList == NULL) {
S
TD-1848  
Shengliang Guan 已提交
3382 3383
    tfree(posList);
    tfree(pTableList);
3384 3385

    qError("QInfo:%p failed alloc memory", pQInfo);
H
Haojun Liao 已提交
3386
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
3387 3388
  }

3389
  // todo opt for the case of one table per group
3390
  int32_t numOfTables = 0;
H
Haojun Liao 已提交
3391 3392 3393
  SIDList pageList = NULL;
  int32_t tid = -1;

3394
  for (int32_t i = 0; i < size; ++i) {
3395
    STableQueryInfo *item = taosArrayGetP(pGroup, i);
3396

H
Haojun Liao 已提交
3397
    SIDList list = getDataBufPagesIdList(pRuntimeEnv->pResultBuf, TSDB_TABLEID(item->pTable)->tid);
H
Haojun Liao 已提交
3398
    if (taosArrayGetSize(list) > 0 && item->windowResInfo.size > 0) {
H
Haojun Liao 已提交
3399
      pTableList[numOfTables++] = item;
3400 3401
      tid = TSDB_TABLEID(item->pTable)->tid;
      pageList = list;
3402 3403
    }
  }
3404

H
Haojun Liao 已提交
3405
  // there is no data in current group
3406
  if (numOfTables == 0) {
S
TD-1848  
Shengliang Guan 已提交
3407 3408
    tfree(posList);
    tfree(pTableList);
3409
    return 0;
H
Haojun Liao 已提交
3410
  } else if (numOfTables == 1) { // no need to merge results since only one table in each group
S
TD-1848  
Shengliang Guan 已提交
3411 3412
    tfree(posList);
    tfree(pTableList);
H
Haojun Liao 已提交
3413 3414 3415

    SGroupResInfo* pGroupResInfo = &pQInfo->groupResInfo;

3416
    pGroupResInfo->numOfDataPages = (int32_t)taosArrayGetSize(pageList);
H
Haojun Liao 已提交
3417
    pGroupResInfo->groupId = tid;
3418 3419
    pGroupResInfo->pageId = 0;
    pGroupResInfo->rowId = 0;
H
Haojun Liao 已提交
3420 3421

    return pGroupResInfo->numOfDataPages;
3422
  }
3423

3424
  SCompSupporter cs = {pTableList, posList, pQInfo};
3425

3426
  SLoserTreeInfo *pTree = NULL;
3427
  tLoserTreeCreate(&pTree, numOfTables, &cs, tableResultComparFn);
3428

3429
  SResultRow* pRow = getNewResultRow(pRuntimeEnv->pool);
H
Haojun Liao 已提交
3430
  resetMergeResultBuf(pRuntimeEnv, pRuntimeEnv->pCtx, pRow);
3431

H
Haojun Liao 已提交
3432
  pQInfo->groupResInfo.groupId = getMergeResultGroupId(pQInfo->groupIndex);
H
Haojun Liao 已提交
3433

H
Haojun Liao 已提交
3434
  // todo add windowRes iterator
3435 3436
  int64_t lastTimestamp = -1;
  int64_t startt = taosGetTimestampMs();
3437

3438
  while (1) {
3439
    if (isQueryKilled(pQInfo)) {
3440
      qDebug("QInfo:%p it is already killed, abort", pQInfo);
H
Haojun Liao 已提交
3441

S
TD-1848  
Shengliang Guan 已提交
3442 3443 3444
      tfree(pTableList);
      tfree(posList);
      tfree(pTree);
3445 3446 3447
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
    }

3448
    int32_t pos = pTree->pNode[0].index;
3449

H
Haojun Liao 已提交
3450
    SResultRowInfo *pWindowResInfo = &pTableList[pos]->windowResInfo;
3451
    SResultRow  *pWindowRes = getResultRow(pWindowResInfo, cs.position[pos]);
3452
    tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes->pageId);
3453

H
Haojun Liao 已提交
3454
    char *b = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes, page);
3455
    TSKEY ts = GET_INT64_VAL(b);
3456

3457
    assert(ts == pWindowRes->win.skey);
3458
    int64_t num = getNumOfResultWindowRes(pRuntimeEnv, pWindowRes);
3459 3460
    if (num <= 0) {
      cs.position[pos] += 1;
3461

3462 3463
      if (cs.position[pos] >= pWindowResInfo->size) {
        cs.position[pos] = -1;
3464

3465
        // all input sources are exhausted
3466
        if (--numOfTables == 0) {
3467 3468 3469 3470 3471 3472 3473
          break;
        }
      }
    } else {
      if (ts == lastTimestamp) {  // merge with the last one
        doMerge(pRuntimeEnv, ts, pWindowRes, true);
      } else {  // copy data to disk buffer
3474
        if (buffer[0]->num == pQuery->rec.capacity) {
H
Haojun Liao 已提交
3475
          if (flushFromResultBuf(pRuntimeEnv, &pQInfo->groupResInfo) != TSDB_CODE_SUCCESS) {
3476 3477
            return -1;
          }
3478

H
Haojun Liao 已提交
3479
          resetMergeResultBuf(pRuntimeEnv, pRuntimeEnv->pCtx, pRow);
3480
        }
3481

3482
        doMerge(pRuntimeEnv, ts, pWindowRes, false);
3483
        buffer[0]->num += 1;
3484
      }
3485

3486
      lastTimestamp = ts;
3487

H
Haojun Liao 已提交
3488
      // move to the next element of current entry
3489
      int32_t currentPageId = pWindowRes->pageId;
H
Haojun Liao 已提交
3490

3491 3492 3493
      cs.position[pos] += 1;
      if (cs.position[pos] >= pWindowResInfo->size) {
        cs.position[pos] = -1;
3494

3495
        // all input sources are exhausted
3496
        if (--numOfTables == 0) {
3497 3498
          break;
        }
H
Haojun Liao 已提交
3499 3500
      } else {
        // current page is not needed anymore
3501
        SResultRow  *pNextWindowRes = getResultRow(pWindowResInfo, cs.position[pos]);
3502
        if (pNextWindowRes->pageId != currentPageId) {
H
Haojun Liao 已提交
3503 3504
          releaseResBufPage(pRuntimeEnv->pResultBuf, page);
        }
3505 3506
      }
    }
3507

3508 3509
    tLoserTreeAdjust(pTree, pos + pTree->numOfEntries);
  }
3510

3511
  if (buffer[0]->num != 0) {  // there are data in buffer
H
Haojun Liao 已提交
3512
    if (flushFromResultBuf(pRuntimeEnv, &pQInfo->groupResInfo) != TSDB_CODE_SUCCESS) {
S
slguan 已提交
3513
      qError("QInfo:%p failed to flush data into temp file, abort query", pQInfo);
3514

S
TD-1848  
Shengliang Guan 已提交
3515 3516 3517
      tfree(pTree);
      tfree(pTableList);
      tfree(posList);
3518 3519 3520
      return -1;
    }
  }
3521

3522 3523 3524
  int64_t endt = taosGetTimestampMs();

#ifdef _DEBUG_VIEW
3525
  displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
3526
#endif
3527

3528
  qDebug("QInfo:%p result merge completed for group:%d, elapsed time:%" PRId64 " ms", pQInfo, pQInfo->groupIndex, endt - startt);
3529

S
TD-1848  
Shengliang Guan 已提交
3530 3531 3532
  tfree(pTableList);
  tfree(posList);
  tfree(pTree);
3533

S
TD-1848  
Shengliang Guan 已提交
3534 3535
//  tfree(pResultInfo);
//  tfree(buf);
H
Haojun Liao 已提交
3536 3537

  return pQInfo->groupResInfo.numOfDataPages;
3538 3539
}

H
Haojun Liao 已提交
3540 3541
int32_t flushFromResultBuf(SQueryRuntimeEnv* pRuntimeEnv, SGroupResInfo* pGroupResInfo) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3542

3543
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
3544

3545 3546
  // the base value for group result, since the maximum number of table for each vnode will not exceed 100,000.
  int32_t pageId = -1;
3547
  int32_t capacity = pResultBuf->numOfRowsPerPage;
3548

H
Haojun Liao 已提交
3549
  int32_t remain = (int32_t) pQuery->sdata[0]->num;
3550
  int32_t offset = 0;
3551

3552
  while (remain > 0) {
H
Haojun Liao 已提交
3553 3554
    int32_t rows = (remain > capacity)? capacity:remain;
    assert(rows > 0);
3555

H
Haojun Liao 已提交
3556 3557 3558
    // get the output buffer page
    tFilePage *buf = getNewDataBuf(pResultBuf, pGroupResInfo->groupId, &pageId);
    buf->num = rows;
3559

H
Haojun Liao 已提交
3560
    // pagewisely copy to dest buffer
3561
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3562
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
3563

H
Haojun Liao 已提交
3564 3565
      char* output = buf->data + pRuntimeEnv->offset[i] * pRuntimeEnv->numOfRowsPerPage;
      char* src = ((char *) pQuery->sdata[i]->data) + offset * bytes;
3566
      memcpy(output, src, (size_t)(buf->num * bytes));
3567
    }
3568

H
Haojun Liao 已提交
3569 3570 3571 3572
    offset += rows;
    remain -= rows;

    pGroupResInfo->numOfDataPages += 1;
3573
  }
3574

3575 3576 3577
  return TSDB_CODE_SUCCESS;
}

H
Haojun Liao 已提交
3578 3579 3580
void resetMergeResultBuf(SQueryRuntimeEnv* pRuntimeEnv, SQLFunctionCtx *pCtx, SResultRow *pRow) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

3581
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
3582
    pCtx[k].aOutputBuf = pQuery->sdata[k]->data - pCtx[k].outputBytes;
3583 3584
    pCtx[k].size = 1;
    pCtx[k].startOffset = 0;
H
Haojun Liao 已提交
3585
    pCtx[k].resultInfo = getResultCell(pRuntimeEnv, pRow, k);
3586

3587
    pQuery->sdata[k]->num = 0;
3588 3589 3590
  }
}

3591 3592 3593 3594
static void updateTableQueryInfoForReverseScan(SQuery *pQuery, STableQueryInfo *pTableQueryInfo) {
  if (pTableQueryInfo == NULL) {
    return;
  }
3595

H
Haojun Liao 已提交
3596
  // order has changed already
3597
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
3598 3599
  if (pTableQueryInfo->lastKey == pTableQueryInfo->win.skey) {
    // do nothing, no results
H
Haojun Liao 已提交
3600
  } else {// NOTE: even win.skey != lastKey, the results may not generated.
H
Haojun Liao 已提交
3601 3602 3603
    pTableQueryInfo->win.ekey = pTableQueryInfo->lastKey + step;
  }

3604 3605
  SWAP(pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, TSKEY);
  pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
3606

3607 3608
  SWITCH_ORDER(pTableQueryInfo->cur.order);
  pTableQueryInfo->cur.vgroupIndex = -1;
H
Haojun Liao 已提交
3609 3610 3611

  // set the index at the end of time window
  pTableQueryInfo->windowResInfo.curIndex = pTableQueryInfo->windowResInfo.size - 1;
3612 3613
}

H
Haojun Liao 已提交
3614
static void disableFuncInReverseScanImpl(SQueryRuntimeEnv* pRuntimeEnv, SResultRowInfo *pWindowResInfo, int32_t order) {
3615
  SQuery* pQuery = pRuntimeEnv->pQuery;
3616

3617
  for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
3618
    bool closed = getResultRowStatus(pWindowResInfo, i);
3619
    if (!closed) {
3620 3621
      continue;
    }
3622

3623
    SResultRow *pRow = getResultRow(pWindowResInfo, i);
3624

3625
    // open/close the specified query for each group result
3626
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
3627
      int32_t functId = pQuery->pExpr1[j].base.functionId;
3628
      SResultRowCellInfo* pInfo = getResultCell(pRuntimeEnv, pRow, j);
3629

3630 3631
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
3632
        pInfo->complete = false;
3633
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
3634
        pInfo->complete = true;
3635 3636 3637 3638 3639
      }
    }
  }
}

3640 3641
void disableFuncInReverseScan(SQInfo *pQInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
3642
  SQuery *pQuery = pRuntimeEnv->pQuery;
3643
  int32_t order = pQuery->order.order;
3644

3645
  // group by normal columns and interval query on normal table
H
Haojun Liao 已提交
3646
  SResultRowInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
3647
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3648
    disableFuncInReverseScanImpl(pRuntimeEnv, pWindowResInfo, order);
3649
  } else {  // for simple result of table query,
3650
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {  // todo refactor
H
Haojun Liao 已提交
3651
      int32_t functId = pQuery->pExpr1[j].base.functionId;
3652

3653
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[j];
3654 3655 3656
      if (pCtx->resultInfo == NULL) {
        continue; // resultInfo is NULL, means no data checked in previous scan
      }
3657

3658 3659
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
3660 3661 3662 3663 3664 3665
        pCtx->resultInfo->complete = false;
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
        pCtx->resultInfo->complete = true;
      }
    }
  }
H
Haojun Liao 已提交
3666 3667 3668 3669
}

static void setupQueryRangeForReverseScan(SQInfo* pQInfo) {
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
S
Shengliang Guan 已提交
3670
  int32_t numOfGroups = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
H
Haojun Liao 已提交
3671

H
hjxilinx 已提交
3672
  for(int32_t i = 0; i < numOfGroups; ++i) {
H
Haojun Liao 已提交
3673
    SArray *group = GET_TABLEGROUP(pQInfo, i);
H
Haojun Liao 已提交
3674 3675
    SArray *tableKeyGroup = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, i);

H
hjxilinx 已提交
3676 3677
    size_t t = taosArrayGetSize(group);
    for (int32_t j = 0; j < t; ++j) {
3678 3679
      STableQueryInfo *pCheckInfo = taosArrayGetP(group, j);
      updateTableQueryInfoForReverseScan(pQuery, pCheckInfo);
H
Haojun Liao 已提交
3680

H
Haojun Liao 已提交
3681 3682
      // update the last key in tableKeyInfo list, the tableKeyInfo is used to build the tsdbQueryHandle and decide
      // the start check timestamp of tsdbQueryHandle
H
Haojun Liao 已提交
3683 3684 3685 3686
      STableKeyInfo *pTableKeyInfo = taosArrayGet(tableKeyGroup, j);
      pTableKeyInfo->lastKey = pCheckInfo->lastKey;

      assert(pCheckInfo->pTable == pTableKeyInfo->pTable);
H
hjxilinx 已提交
3687 3688
    }
  }
3689 3690
}

3691
void switchCtxOrder(SQueryRuntimeEnv *pRuntimeEnv) {
3692
  SQuery *pQuery = pRuntimeEnv->pQuery;
3693
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
3694
    SWITCH_ORDER(pRuntimeEnv->pCtx[i].order);
3695 3696 3697
  }
}

3698
int32_t initResultRow(SResultRow *pResultRow) {
H
Haojun Liao 已提交
3699
  pResultRow->pCellInfo = (SResultRowCellInfo*)((char*)pResultRow + sizeof(SResultRow));
H
Haojun Liao 已提交
3700 3701
  pResultRow->pageId    = -1;
  pResultRow->rowId     = -1;
B
Bomin Zhang 已提交
3702
  return TSDB_CODE_SUCCESS;
3703 3704
}

H
Haojun Liao 已提交
3705
void resetDefaultResInfoOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
3706
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
3707

H
Haojun Liao 已提交
3708 3709 3710
  int32_t tid = 0;
  int64_t uid = getResultInfoUId(pRuntimeEnv);
  SResultRow* pRow = doPrepareResultRowFromKey(pRuntimeEnv, &pRuntimeEnv->windowResInfo, (char *)&tid, sizeof(tid), true, uid);
3711

3712
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3713 3714
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
    pCtx->aOutputBuf = pQuery->sdata[i]->data;
3715

3716 3717 3718 3719
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT/TAGPRJ/PRJ/TAG etc.
     */
3720 3721 3722
    SResultRowCellInfo* pCellInfo = getResultCell(pRuntimeEnv, pRow, i);
    RESET_RESULT_INFO(pCellInfo);
    pCtx->resultInfo = pCellInfo;
3723

3724
    // set the timestamp output buffer for top/bottom/diff query
H
Haojun Liao 已提交
3725
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
3726 3727 3728
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3729

H
Haojun Liao 已提交
3730
    memset(pQuery->sdata[i]->data, 0, (size_t)(pQuery->pExpr1[i].bytes * pQuery->rec.capacity));
3731
  }
3732

3733 3734 3735 3736 3737
  initCtxOutputBuf(pRuntimeEnv);
}

void forwardCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, int64_t output) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3738

3739
  // reset the execution contexts
3740
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
3741
    int32_t functionId = pQuery->pExpr1[j].base.functionId;
3742
    assert(functionId != TSDB_FUNC_DIFF);
3743

3744 3745 3746 3747
    // set next output position
    if (IS_OUTER_FORWARD(aAggs[functionId].nStatus)) {
      pRuntimeEnv->pCtx[j].aOutputBuf += pRuntimeEnv->pCtx[j].outputBytes * output;
    }
3748

3749 3750 3751 3752 3753 3754 3755 3756
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      /*
       * NOTE: for top/bottom query, the value of first column of output (timestamp) are assigned
       * in the procedure of top/bottom routine
       * the output buffer in top/bottom routine is ptsOutputBuf, so we need to forward the output buffer
       *
       * diff function is handled in multi-output function
       */
S
TD-1057  
Shengliang Guan 已提交
3757
      pRuntimeEnv->pCtx[j].ptsOutputBuf = (char*)pRuntimeEnv->pCtx[j].ptsOutputBuf + TSDB_KEYSIZE * output;
3758
    }
3759

3760
    RESET_RESULT_INFO(pRuntimeEnv->pCtx[j].resultInfo);
3761 3762 3763 3764 3765
  }
}

void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3766

3767
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
3768
    int32_t functionId = pQuery->pExpr1[j].base.functionId;
3769
    pRuntimeEnv->pCtx[j].currentStage = 0;
3770

H
Haojun Liao 已提交
3771
    SResultRowCellInfo* pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
H
Haojun Liao 已提交
3772 3773 3774
    if (pResInfo->initialized) {
      continue;
    }
3775

3776 3777 3778 3779
    aAggs[functionId].init(&pRuntimeEnv->pCtx[j]);
  }
}

3780
void skipResults(SQueryRuntimeEnv *pRuntimeEnv) {
3781
  SQuery *pQuery = pRuntimeEnv->pQuery;
3782
  if (pQuery->rec.rows == 0 || pQuery->limit.offset == 0) {
3783 3784
    return;
  }
3785

3786
  if (pQuery->rec.rows <= pQuery->limit.offset) {
3787
    qDebug("QInfo:%p skip rows:%" PRId64 ", new offset:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), pQuery->rec.rows,
3788
        pQuery->limit.offset - pQuery->rec.rows);
3789

3790 3791
    pQuery->limit.offset -= pQuery->rec.rows;
    pQuery->rec.rows = 0;
3792

H
Haojun Liao 已提交
3793
    resetDefaultResInfoOutputBuf(pRuntimeEnv);
3794

H
Haojun Liao 已提交
3795
    // clear the buffer full flag if exists
3796
    CLEAR_QUERY_STATUS(pQuery, QUERY_RESBUF_FULL);
3797
  } else {
3798
    int64_t numOfSkip = pQuery->limit.offset;
3799
    pQuery->rec.rows -= numOfSkip;
3800
    pQuery->limit.offset = 0;
3801

3802
    qDebug("QInfo:%p skip row:%"PRId64", new offset:%d, numOfRows remain:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), numOfSkip,
3803
           0, pQuery->rec.rows);
3804

3805
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
3806
      int32_t functionId = pQuery->pExpr1[i].base.functionId;
3807
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
H
Haojun Liao 已提交
3808

3809
      memmove(pQuery->sdata[i]->data, (char*)pQuery->sdata[i]->data + bytes * numOfSkip, (size_t)(pQuery->rec.rows * bytes));
H
Haojun Liao 已提交
3810
      pRuntimeEnv->pCtx[i].aOutputBuf = ((char*) pQuery->sdata[i]->data) + pQuery->rec.rows * bytes;
3811

3812
      if (functionId == TSDB_FUNC_DIFF || functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
3813
        pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
3814 3815
      }
    }
3816

S
TD-1057  
Shengliang Guan 已提交
3817
    updateNumOfResult(pRuntimeEnv, (int32_t)pQuery->rec.rows);
3818 3819 3820 3821 3822 3823 3824 3825
  }
}

void setQueryStatus(SQuery *pQuery, int8_t status) {
  if (status == QUERY_NOT_COMPLETED) {
    pQuery->status = status;
  } else {
    // QUERY_NOT_COMPLETED is not compatible with any other status, so clear its position first
3826
    CLEAR_QUERY_STATUS(pQuery, QUERY_NOT_COMPLETED);
3827 3828 3829 3830 3831 3832
    pQuery->status |= status;
  }
}

bool needScanDataBlocksAgain(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3833

H
hjxilinx 已提交
3834
  bool toContinue = false;
H
Haojun Liao 已提交
3835
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3836
    // for each group result, call the finalize function for each column
H
Haojun Liao 已提交
3837
    SResultRowInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
3838

3839
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
3840
      SResultRow *pResult = getResultRow(pWindowResInfo, i);
3841
      if (!pResult->closed) {
3842 3843
        continue;
      }
3844

3845
      setResultOutputBuf(pRuntimeEnv, pResult);
3846

3847
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
3848
        int16_t functId = pQuery->pExpr1[j].base.functionId;
3849 3850 3851
        if (functId == TSDB_FUNC_TS) {
          continue;
        }
3852

3853
        aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
H
Haojun Liao 已提交
3854
        SResultRowCellInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3855

3856 3857 3858 3859
        toContinue |= (!pResInfo->complete);
      }
    }
  } else {
3860
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
3861
      int16_t functId = pQuery->pExpr1[j].base.functionId;
3862 3863 3864
      if (functId == TSDB_FUNC_TS) {
        continue;
      }
3865

3866
      aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
H
Haojun Liao 已提交
3867
      SResultRowCellInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3868

3869 3870 3871
      toContinue |= (!pResInfo->complete);
    }
  }
3872

3873 3874 3875
  return toContinue;
}

H
Haojun Liao 已提交
3876
static SQueryStatusInfo getQueryStatusInfo(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
3877
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3878
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3879

H
Haojun Liao 已提交
3880 3881
  assert((start <= pTableQueryInfo->lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (start >= pTableQueryInfo->lastKey && !QUERY_IS_ASC_QUERY(pQuery)));
3882

3883
  SQueryStatusInfo info = {
H
hjxilinx 已提交
3884
      .status      = pQuery->status,
3885
      .windowIndex = pRuntimeEnv->windowResInfo.curIndex,
H
Haojun Liao 已提交
3886
      .lastKey     = start,
3887
  };
3888

S
TD-1057  
Shengliang Guan 已提交
3889 3890 3891 3892 3893
  TIME_WINDOW_COPY(info.w, pQuery->window);
  TIME_WINDOW_COPY(info.curWindow, pTableQueryInfo->win);

  info.curWindow.skey = start;

3894 3895 3896
  return info;
}

3897 3898 3899 3900
static void setEnvBeforeReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQInfo *pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery *pQuery = pRuntimeEnv->pQuery;

H
Haojun Liao 已提交
3901 3902 3903 3904
  pStatus->cur = tsBufGetCursor(pRuntimeEnv->pTsBuf);  // save the cursor
  if (pRuntimeEnv->pTsBuf) {
    SWITCH_ORDER(pRuntimeEnv->pTsBuf->cur.order);
    bool ret = tsBufNextPos(pRuntimeEnv->pTsBuf);
3905
    assert(ret);
3906
  }
3907

3908
  // reverse order time range
3909 3910 3911
  pQuery->window = pStatus->curWindow;
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);

3912
  SWITCH_ORDER(pQuery->order.order);
3913 3914 3915 3916 3917 3918 3919

  if (QUERY_IS_ASC_QUERY(pQuery)) {
    assert(pQuery->window.skey <= pQuery->window.ekey);
  } else {
    assert(pQuery->window.skey >= pQuery->window.ekey);
  }

3920
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
H
Haojun Liao 已提交
3921
  STsdbQueryCond cond = createTsdbQueryCond(pQuery, &pQuery->window);
S
TD-1057  
Shengliang Guan 已提交
3922

H
Haojun Liao 已提交
3923 3924 3925 3926 3927
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
  disableFuncInReverseScan(pQInfo);
  setupQueryRangeForReverseScan(pQInfo);

3928 3929 3930 3931
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
3932

H
Haojun Liao 已提交
3933
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef);
B
Bomin Zhang 已提交
3934 3935 3936
  if (pRuntimeEnv->pSecQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
3937 3938
}

3939 3940
static void clearEnvAfterReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3941
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3942

3943 3944
  SWITCH_ORDER(pQuery->order.order);
  switchCtxOrder(pRuntimeEnv);
3945

H
Haojun Liao 已提交
3946 3947 3948
  tsBufSetCursor(pRuntimeEnv->pTsBuf, &pStatus->cur);
  if (pRuntimeEnv->pTsBuf) {
    pRuntimeEnv->pTsBuf->cur.order = pQuery->order.order;
3949
  }
3950

3951
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
3952

3953
  // update the pQuery->window.skey and pQuery->window.ekey to limit the scan scope of sliding query during reverse scan
H
hjxilinx 已提交
3954
  pTableQueryInfo->lastKey = pStatus->lastKey;
3955
  pQuery->status = pStatus->status;
3956

H
hjxilinx 已提交
3957
  pTableQueryInfo->win = pStatus->w;
3958
  pQuery->window = pTableQueryInfo->win;
3959 3960
}

H
Haojun Liao 已提交
3961 3962 3963 3964 3965 3966 3967
static void restoreTimeWindow(STableGroupInfo* pTableGroupInfo, STsdbQueryCond* pCond) {
  assert(pTableGroupInfo->numOfTables == 1);
  SArray* pTableKeyGroup = taosArrayGetP(pTableGroupInfo->pGroupList, 0);
  STableKeyInfo* pKeyInfo = taosArrayGet(pTableKeyGroup, 0);
  pKeyInfo->lastKey = pCond->twindow.skey;
}

3968
void scanOneTableDataBlocks(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
H
hjxilinx 已提交
3969
  SQInfo *pQInfo = (SQInfo *) GET_QINFO_ADDR(pRuntimeEnv);
3970
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3971
  STableQueryInfo *pTableQueryInfo = pQuery->current;
3972

3973
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
3974

3975
  // store the start query position
H
Haojun Liao 已提交
3976
  SQueryStatusInfo qstatus = getQueryStatusInfo(pRuntimeEnv, start);
3977

3978 3979
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
3980

3981 3982
  while (1) {
    doScanAllDataBlocks(pRuntimeEnv);
3983

3984 3985
    if (pRuntimeEnv->scanFlag == MASTER_SCAN) {
      qstatus.status = pQuery->status;
3986 3987 3988 3989

      // do nothing if no data blocks are found qualified during scan
      if (qstatus.lastKey != pTableQueryInfo->lastKey) {
        qstatus.curWindow.ekey = pTableQueryInfo->lastKey - step;
3990
      } else { // the lastkey does not increase, which means no data checked yet
H
Haojun Liao 已提交
3991
        qDebug("QInfo:%p no results generated in this scan", pQInfo);
3992
      }
H
Haojun Liao 已提交
3993 3994

      qstatus.lastKey = pTableQueryInfo->lastKey;
3995
    }
3996

3997
    if (!needScanDataBlocksAgain(pRuntimeEnv)) {
3998
      // restore the status code and jump out of loop
3999
      if (pRuntimeEnv->scanFlag == REPEAT_SCAN) {
4000
        pQuery->status = qstatus.status;
4001
      }
4002

4003 4004
      break;
    }
4005

4006 4007
    if (pRuntimeEnv->pSecQueryHandle != NULL) {
      tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
4008
    }
4009

H
Haojun Liao 已提交
4010
    STsdbQueryCond cond = createTsdbQueryCond(pQuery, &qstatus.curWindow);
H
Haojun Liao 已提交
4011
    restoreTimeWindow(&pQInfo->tableGroupInfo, &cond);
H
Haojun Liao 已提交
4012
    pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef);
B
Bomin Zhang 已提交
4013 4014 4015
    if (pRuntimeEnv->pSecQueryHandle == NULL) {
      longjmp(pRuntimeEnv->env, terrno);
    }
4016

4017
    pRuntimeEnv->windowResInfo.curIndex = qstatus.windowIndex;
4018 4019
    setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
    pRuntimeEnv->scanFlag = REPEAT_SCAN;
4020

4021
    qDebug("QInfo:%p start to repeat scan data blocks due to query func required, qrange:%"PRId64"-%"PRId64, pQInfo,
4022
        cond.twindow.skey, cond.twindow.ekey);
4023

4024
    // check if query is killed or not
4025
    if (isQueryKilled(pQInfo)) {
H
Haojun Liao 已提交
4026
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4027 4028
    }
  }
4029

H
hjxilinx 已提交
4030
  if (!needReverseScan(pQuery)) {
4031 4032
    return;
  }
4033

4034
  setEnvBeforeReverseScan(pRuntimeEnv, &qstatus);
4035

4036
  // reverse scan from current position
4037
  qDebug("QInfo:%p start to reverse scan", pQInfo);
4038
  doScanAllDataBlocks(pRuntimeEnv);
4039 4040

  clearEnvAfterReverseScan(pRuntimeEnv, &qstatus);
4041 4042
}

H
hjxilinx 已提交
4043
void finalizeQueryResult(SQueryRuntimeEnv *pRuntimeEnv) {
4044
  SQuery *pQuery = pRuntimeEnv->pQuery;
4045

H
Haojun Liao 已提交
4046
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
4047
    // for each group result, call the finalize function for each column
H
Haojun Liao 已提交
4048
    SResultRowInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
4049
    if (pRuntimeEnv->groupbyNormalCol) {
H
Haojun Liao 已提交
4050
      closeAllResultRows(pWindowResInfo);
4051
    }
4052

4053
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
H
Haojun Liao 已提交
4054
      SResultRow *buf = pWindowResInfo->pResult[i];
H
Haojun Liao 已提交
4055
      if (!isResultRowClosed(pWindowResInfo, i)) {
4056 4057
        continue;
      }
4058

4059
      setResultOutputBuf(pRuntimeEnv, buf);
4060

4061
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
4062
        aAggs[pQuery->pExpr1[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
4063
      }
4064

4065 4066 4067 4068
      /*
       * set the number of output results for group by normal columns, the number of output rows usually is 1 except
       * the top and bottom query
       */
S
TD-1057  
Shengliang Guan 已提交
4069
      buf->numOfRows = (uint16_t)getNumOfResult(pRuntimeEnv);
4070
    }
4071

4072
  } else {
4073
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
4074
      aAggs[pQuery->pExpr1[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
4075 4076 4077 4078 4079
    }
  }
}

static bool hasMainOutput(SQuery *pQuery) {
4080
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
4081
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
4082

4083 4084 4085 4086
    if (functionId != TSDB_FUNC_TS && functionId != TSDB_FUNC_TAG && functionId != TSDB_FUNC_TAGPRJ) {
      return true;
    }
  }
4087

4088 4089 4090
  return false;
}

H
Haojun Liao 已提交
4091
static STableQueryInfo *createTableQueryInfo(SQueryRuntimeEnv *pRuntimeEnv, void* pTable, STimeWindow win, void* buf) {
H
Haojun Liao 已提交
4092
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
4093

H
Haojun Liao 已提交
4094
  STableQueryInfo *pTableQueryInfo = buf;
4095

H
hjxilinx 已提交
4096 4097
  pTableQueryInfo->win = win;
  pTableQueryInfo->lastKey = win.skey;
4098

4099
  pTableQueryInfo->pTable = pTable;
4100
  pTableQueryInfo->cur.vgroupIndex = -1;
4101

H
Haojun Liao 已提交
4102 4103
  // set more initial size of interval/groupby query
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyNormalCol) {
H
Haojun Liao 已提交
4104
    int32_t initialSize = 128;
H
Haojun Liao 已提交
4105
    int32_t code = initResultRowInfo(&pTableQueryInfo->windowResInfo, initialSize, TSDB_DATA_TYPE_INT);
B
Bomin Zhang 已提交
4106 4107 4108
    if (code != TSDB_CODE_SUCCESS) {
      return NULL;
    }
H
Haojun Liao 已提交
4109
  } else { // in other aggregate query, do not initialize the windowResInfo
H
Haojun Liao 已提交
4110 4111
  }

4112 4113 4114
  return pTableQueryInfo;
}

H
Haojun Liao 已提交
4115
void destroyTableQueryInfoImpl(STableQueryInfo *pTableQueryInfo) {
4116 4117 4118
  if (pTableQueryInfo == NULL) {
    return;
  }
4119

H
Haojun Liao 已提交
4120
  tVariantDestroy(&pTableQueryInfo->tag);
H
Haojun Liao 已提交
4121
  cleanupResultRowInfo(&pTableQueryInfo->windowResInfo);
4122 4123 4124 4125 4126
}

/**
 * set output buffer for different group
 * @param pRuntimeEnv
4127
 * @param pDataBlockInfo
4128
 */
H
Haojun Liao 已提交
4129
void setExecutionContext(SQInfo *pQInfo, int32_t groupIndex, TSKEY nextKey) {
4130
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
Haojun Liao 已提交
4131
  STableQueryInfo  *pTableQueryInfo = pRuntimeEnv->pQuery->current;
H
Haojun Liao 已提交
4132
  SResultRowInfo   *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
4133

H
Haojun Liao 已提交
4134 4135
  // lastKey needs to be updated
  pTableQueryInfo->lastKey = nextKey;
H
Haojun Liao 已提交
4136

H
Haojun Liao 已提交
4137
  if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTsBuf != NULL) {
H
Haojun Liao 已提交
4138 4139
    setAdditionalInfo(pQInfo, pTableQueryInfo->pTable, pTableQueryInfo);
  }
H
Haojun Liao 已提交
4140

H
Haojun Liao 已提交
4141 4142 4143
  if (pRuntimeEnv->prevGroupId != INT32_MIN && pRuntimeEnv->prevGroupId == groupIndex) {
    return;
  }
4144

H
Haojun Liao 已提交
4145
  uint64_t uid = getResultInfoUId(pRuntimeEnv);
H
Haojun Liao 已提交
4146
  SResultRow *pResultRow = doPrepareResultRowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&groupIndex,
H
Haojun Liao 已提交
4147
      sizeof(groupIndex), true, uid);
H
Haojun Liao 已提交
4148
  if (pResultRow == NULL) {
4149 4150
    return;
  }
4151

4152 4153 4154 4155
  /*
   * not assign result buffer yet, add new result buffer
   * all group belong to one result set, and each group result has different group id so set the id to be one
   */
H
Haojun Liao 已提交
4156 4157
  if (pResultRow->pageId == -1) {
    if (addNewWindowResultBuf(pResultRow, pRuntimeEnv->pResultBuf, groupIndex, pRuntimeEnv->numOfRowsPerPage) !=
4158 4159 4160 4161
        TSDB_CODE_SUCCESS) {
      return;
    }
  }
4162

H
Haojun Liao 已提交
4163 4164
  // record the current active group id
  pRuntimeEnv->prevGroupId = groupIndex;
H
Haojun Liao 已提交
4165
  setResultOutputBuf(pRuntimeEnv, pResultRow);
4166 4167 4168
  initCtxOutputBuf(pRuntimeEnv);
}

4169
void setResultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SResultRow *pResult) {
4170
  SQuery *pQuery = pRuntimeEnv->pQuery;
4171

4172
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
4173
  tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pageId);
H
Haojun Liao 已提交
4174

4175
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
4176
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
H
Haojun Liao 已提交
4177
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult, page);
4178

H
Haojun Liao 已提交
4179
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
4180 4181 4182
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
4183

4184
    /*
4185
     * set the output buffer information and intermediate buffer,
4186 4187
     * not all queries require the interResultBuf, such as COUNT
     */
4188
    pCtx->resultInfo = getResultCell(pRuntimeEnv, pResult, i);
4189 4190 4191
  }
}

H
Haojun Liao 已提交
4192
void setResultRowOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SResultRow *pResult) {
H
Haojun Liao 已提交
4193
  SQuery *pQuery = pRuntimeEnv->pQuery;
4194

H
Haojun Liao 已提交
4195
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
4196
  tFilePage* bufPage = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pageId);
H
Haojun Liao 已提交
4197

H
Haojun Liao 已提交
4198 4199 4200
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];

4201
    pCtx->resultInfo = getResultCell(pRuntimeEnv, pResult, i);
H
Haojun Liao 已提交
4202
    if (pCtx->resultInfo->initialized && pCtx->resultInfo->complete) {
H
Haojun Liao 已提交
4203 4204
      continue;
    }
4205

H
Haojun Liao 已提交
4206
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult, bufPage);
H
Haojun Liao 已提交
4207
    pCtx->currentStage = 0;
4208

H
Haojun Liao 已提交
4209 4210 4211 4212
    int32_t functionId = pCtx->functionId;
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
4213

H
Haojun Liao 已提交
4214 4215 4216 4217 4218 4219
    if (!pCtx->resultInfo->initialized) {
      aAggs[functionId].init(pCtx);
    }
  }
}

4220
int32_t setAdditionalInfo(SQInfo *pQInfo, void* pTable, STableQueryInfo *pTableQueryInfo) {
4221
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4222

4223
  setTagVal(pRuntimeEnv, pTable, pQInfo->tsdb);
4224

4225
  // both the master and supplement scan needs to set the correct ts comp start position
H
Haojun Liao 已提交
4226
  if (pRuntimeEnv->pTsBuf != NULL) {
H
Haojun Liao 已提交
4227 4228
    tVariant* pTag = &pRuntimeEnv->pCtx[0].tag;

H
Haojun Liao 已提交
4229 4230
    if (pTableQueryInfo->cur.vgroupIndex == -1) {
      tVariantAssign(&pTableQueryInfo->tag, pTag);
4231

H
Haojun Liao 已提交
4232
      STSElem elem = tsBufGetElemStartPos(pRuntimeEnv->pTsBuf, pQInfo->vgId, &pTableQueryInfo->tag);
H
Haojun Liao 已提交
4233

H
Haojun Liao 已提交
4234
      // failed to find data with the specified tag value and vnodeId
4235
      if (!tsBufIsValidElem(&elem)) {
H
Haojun Liao 已提交
4236 4237 4238
        if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
          qError("QInfo:%p failed to find tag:%s in ts_comp", pQInfo, pTag->pz);
        } else {
H
Haojun Liao 已提交
4239
          qError("QInfo:%p failed to find tag:%" PRId64 " in ts_comp", pQInfo, pTag->i64Key);
H
Haojun Liao 已提交
4240 4241 4242 4243 4244
        }

        return false;
      }

H
Haojun Liao 已提交
4245
      // keep the cursor info of current meter
H
Haojun Liao 已提交
4246
      pTableQueryInfo->cur = tsBufGetCursor(pRuntimeEnv->pTsBuf);
H
Haojun Liao 已提交
4247
      if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
H
Haojun Liao 已提交
4248
        qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex);
H
Haojun Liao 已提交
4249
      } else {
H
Haojun Liao 已提交
4250
        qDebug("QInfo:%p find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex);
H
Haojun Liao 已提交
4251 4252
      }

H
Haojun Liao 已提交
4253
    } else {
H
Haojun Liao 已提交
4254
      tsBufSetCursor(pRuntimeEnv->pTsBuf, &pTableQueryInfo->cur);
H
Haojun Liao 已提交
4255 4256

      if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
H
Haojun Liao 已提交
4257
        qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex);
H
Haojun Liao 已提交
4258
      } else {
H
Haojun Liao 已提交
4259
        qDebug("QInfo:%p find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex);
H
Haojun Liao 已提交
4260
      }
4261 4262
    }
  }
4263

4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275
  return 0;
}

/*
 * There are two cases to handle:
 *
 * 1. Query range is not set yet (queryRangeSet = 0). we need to set the query range info, including pQuery->lastKey,
 *    pQuery->window.skey, and pQuery->eKey.
 * 2. Query range is set and query is in progress. There may be another result with the same query ranges to be
 *    merged during merge stage. In this case, we need the pTableQueryInfo->lastResRows to decide if there
 *    is a previous result generated or not.
 */
H
hjxilinx 已提交
4276
void setIntervalQueryRange(SQInfo *pQInfo, TSKEY key) {
4277 4278
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
4279
  STableQueryInfo *pTableQueryInfo = pQuery->current;
4280

4281 4282 4283
  if (pTableQueryInfo->queryRangeSet) {
    pTableQueryInfo->lastKey = key;
  } else {
4284
    pTableQueryInfo->win.skey = key;
4285
    STimeWindow win = {.skey = key, .ekey = pQuery->window.ekey};
4286

4287 4288 4289 4290 4291
    // for too small query range, no data in this interval.
    if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey < pQuery->window.skey)) ||
        (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey < pQuery->window.ekey))) {
      return;
    }
4292

4293 4294 4295 4296 4297 4298
    /**
     * In handling the both ascending and descending order super table query, we need to find the first qualified
     * timestamp of this table, and then set the first qualified start timestamp.
     * In ascending query, key is the first qualified timestamp. However, in the descending order query, additional
     * operations involve.
     */
H
Haojun Liao 已提交
4299
    STimeWindow     w = TSWINDOW_INITIALIZER;
H
Haojun Liao 已提交
4300
    SResultRowInfo *pWindowResInfo = &pTableQueryInfo->windowResInfo;
4301

H
Haojun Liao 已提交
4302 4303
    TSKEY sk = MIN(win.skey, win.ekey);
    TSKEY ek = MAX(win.skey, win.ekey);
H
Haojun Liao 已提交
4304
    getAlignQueryTimeWindow(pQuery, win.skey, sk, ek, &w);
4305
    pWindowResInfo->startTime = pTableQueryInfo->win.skey;  // windowSKey may be 0 in case of 1970 timestamp
4306

4307 4308
    if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
4309
        assert(win.ekey == pQuery->window.ekey);
4310
      }
4311

4312
      pWindowResInfo->prevSKey = w.skey;
4313
    }
4314

4315
    pTableQueryInfo->queryRangeSet = 1;
4316
    pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
4317 4318 4319 4320
  }
}

bool requireTimestamp(SQuery *pQuery) {
4321
  for (int32_t i = 0; i < pQuery->numOfOutput; i++) {
H
Haojun Liao 已提交
4322
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335
    if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_NEED_TS) != 0) {
      return true;
    }
  }
  return false;
}

bool needPrimaryTimestampCol(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo) {
  /*
   * 1. if skey or ekey locates in this block, we need to load the timestamp column to decide the precise position
   * 2. if there are top/bottom, first_dst/last_dst functions, we need to load timestamp column in any cases;
   */
  STimeWindow *w = &pDataBlockInfo->window;
H
hjxilinx 已提交
4336
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4337

H
hjxilinx 已提交
4338
  bool loadPrimaryTS = (pTableQueryInfo->lastKey >= w->skey && pTableQueryInfo->lastKey <= w->ekey) ||
4339 4340
                       (pQuery->window.ekey >= w->skey && pQuery->window.ekey <= w->ekey) || requireTimestamp(pQuery);

4341 4342 4343
  return loadPrimaryTS;
}

H
Haojun Liao 已提交
4344
static int32_t doCopyToSData(SQInfo *pQInfo, SResultRowInfo *pResultInfo, int32_t orderType) {
4345 4346
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4347

4348 4349 4350
  int32_t numOfResult = 0;
  int32_t startIdx = 0;
  int32_t step = -1;
4351

4352
  qDebug("QInfo:%p start to copy data from windowResInfo to query buf", pQInfo);
H
Haojun Liao 已提交
4353
  int32_t totalSet = numOfClosedResultRows(pResultInfo);
H
Haojun Liao 已提交
4354
  SResultRow** result = pResultInfo->pResult;
4355

4356
  if (orderType == TSDB_ORDER_ASC) {
4357
    startIdx = pQInfo->groupIndex;
4358 4359
    step = 1;
  } else {  // desc order copy all data
4360
    startIdx = totalSet - pQInfo->groupIndex - 1;
4361 4362
    step = -1;
  }
4363

H
Haojun Liao 已提交
4364 4365
  SGroupResInfo* pGroupResInfo = &pQInfo->groupResInfo;

4366
  for (int32_t i = startIdx; (i < totalSet) && (i >= 0); i += step) {
4367
    if (result[i]->numOfRows == 0) {
4368
      pQInfo->groupIndex += 1;
4369
      pGroupResInfo->rowId = 0;
4370 4371
      continue;
    }
4372

4373
    int32_t numOfRowsToCopy = result[i]->numOfRows - pGroupResInfo->rowId;
4374
    int32_t oldOffset = pGroupResInfo->rowId;
4375

4376
    /*
H
Haojun Liao 已提交
4377 4378
     * current output space is not enough to accommodate all data of this page, only partial results
     * will be copied to SQuery object's result buffer
4379
     */
4380
    if (numOfRowsToCopy > pQuery->rec.capacity - numOfResult) {
H
Haojun Liao 已提交
4381
      numOfRowsToCopy = (int32_t) pQuery->rec.capacity - numOfResult;
4382
      pGroupResInfo->rowId += numOfRowsToCopy;
4383
    } else {
4384
      pGroupResInfo->rowId = 0;
4385
      pQInfo->groupIndex += 1;
4386
    }
4387

4388
    tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, result[i]->pageId);
H
Haojun Liao 已提交
4389

4390
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
4391
      int32_t size = pRuntimeEnv->pCtx[j].outputBytes;
4392

4393
      char *out = pQuery->sdata[j]->data + numOfResult * size;
4394
      char *in = getPosInResultPage(pRuntimeEnv, j, result[i], page);
4395 4396
      memcpy(out, in + oldOffset * size, size * numOfRowsToCopy);
    }
4397

4398
    numOfResult += numOfRowsToCopy;
4399 4400 4401
    if (numOfResult == pQuery->rec.capacity) {
      break;
    }
4402
  }
4403

4404
  qDebug("QInfo:%p copy data to query buf completed", pQInfo);
4405 4406

#ifdef _DEBUG_VIEW
4407
  displayInterResult(pQuery->sdata, pRuntimeEnv, numOfResult);
4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420
#endif
  return numOfResult;
}

/**
 * copyFromWindowResToSData support copy data in ascending/descending order
 * For interval query of both super table and table, copy the data in ascending order, since the output results are
 * ordered in SWindowResutl already. While handling the group by query for both table and super table,
 * all group result are completed already.
 *
 * @param pQInfo
 * @param result
 */
H
Haojun Liao 已提交
4421
void copyFromWindowResToSData(SQInfo *pQInfo, SResultRowInfo *pResultInfo) {
4422
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
4423

4424
  int32_t orderType = (pQuery->pGroupbyExpr != NULL) ? pQuery->pGroupbyExpr->orderType : TSDB_ORDER_ASC;
4425
  int32_t numOfResult = doCopyToSData(pQInfo, pResultInfo, orderType);
4426

4427
  pQuery->rec.rows += numOfResult;
4428

4429
  assert(pQuery->rec.rows <= pQuery->rec.capacity);
4430 4431
}

H
Haojun Liao 已提交
4432
static void updateWindowResNumOfRes(SQueryRuntimeEnv *pRuntimeEnv) {
4433
  SQuery *pQuery = pRuntimeEnv->pQuery;
4434

4435
  // update the number of result for each, only update the number of rows for the corresponding window result.
H
Haojun Liao 已提交
4436 4437 4438
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
    return;
  }
4439

H
Haojun Liao 已提交
4440
  for (int32_t i = 0; i < pRuntimeEnv->windowResInfo.size; ++i) {
H
Haojun Liao 已提交
4441
    SResultRow *pResult = pRuntimeEnv->windowResInfo.pResult[i];
4442

H
Haojun Liao 已提交
4443 4444 4445 4446
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
      int32_t functionId = pRuntimeEnv->pCtx[j].functionId;
      if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
        continue;
4447
      }
H
Haojun Liao 已提交
4448

4449 4450
      SResultRowCellInfo* pCell = getResultCell(pRuntimeEnv, pResult, j);
      pResult->numOfRows = (uint16_t)(MAX(pResult->numOfRows, pCell->numOfRes));
4451 4452 4453 4454
    }
  }
}

H
Haojun Liao 已提交
4455
static void stableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo, SDataStatis *pStatis,
4456
    SArray *pDataBlock, __block_search_fn_t searchFn) {
4457
  SQuery *         pQuery = pRuntimeEnv->pQuery;
4458
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4459

H
Haojun Liao 已提交
4460
  SResultRowInfo * pWindowResInfo = &pTableQueryInfo->windowResInfo;
H
hjxilinx 已提交
4461
  pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : pDataBlockInfo->rows - 1;
4462

H
Haojun Liao 已提交
4463
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTsBuf != NULL || pRuntimeEnv->groupbyNormalCol) {
4464
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
4465
  } else {
4466
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
4467
  }
H
Haojun Liao 已提交
4468 4469 4470 4471 4472 4473

  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
    bool ascQuery = QUERY_IS_ASC_QUERY(pQuery);

    // TODO refactor
    if ((pTableQueryInfo->lastKey >= pTableQueryInfo->win.ekey && ascQuery) || (pTableQueryInfo->lastKey <= pTableQueryInfo->win.ekey && (!ascQuery))) {
H
Haojun Liao 已提交
4474
      closeAllResultRows(pWindowResInfo);
H
Haojun Liao 已提交
4475 4476 4477 4478 4479
      pWindowResInfo->curIndex = pWindowResInfo->size - 1;
    } else {
      updateResultRowCurrentIndex(pWindowResInfo, pTableQueryInfo->lastKey, ascQuery);
    }
  }
4480 4481
}

H
Haojun Liao 已提交
4482
bool queryHasRemainResForTableQuery(SQueryRuntimeEnv* pRuntimeEnv) {
4483 4484
  SQuery *pQuery = pRuntimeEnv->pQuery;
  SFillInfo *pFillInfo = pRuntimeEnv->pFillInfo;
4485

H
Haojun Liao 已提交
4486
  if (pQuery->limit.limit > 0 && pQuery->rec.total >= pQuery->limit.limit) {
4487 4488
    return false;
  }
4489

4490
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
H
Haojun Liao 已提交
4491
    // There are results not returned to client yet, so filling applied to the remain result is required firstly.
H
Haojun Liao 已提交
4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506
    int32_t remain = taosNumOfRemainRows(pFillInfo);
    if (remain > 0) {
      return true;
    }

    /*
     * While the code reaches here, there are no results remains now.
     * If query is not completed yet, the gaps between two results blocks need to be handled after next data block
     * is retrieved from TSDB.
     *
     * NOTE: If the result set is not the first block, the gap in front of the result set will be filled. If the result
     * set is the FIRST result block, the gap between the start time of query time window and the timestamp of the
     * first result row in the actual result set will fill nothing.
     */
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
H
Haojun Liao 已提交
4507
      int32_t numOfTotal = (int32_t)getNumOfResWithFill(pFillInfo, pQuery->window.ekey, (int32_t)pQuery->rec.capacity);
H
Haojun Liao 已提交
4508 4509 4510 4511 4512 4513
      return numOfTotal > 0;
    }

  } else {
    // there are results waiting for returned to client.
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED) &&
H
Haojun Liao 已提交
4514
        (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) &&
H
Haojun Liao 已提交
4515 4516 4517
        (pRuntimeEnv->windowResInfo.size > 0)) {
      return true;
    }
4518
  }
4519 4520

  return false;
4521 4522
}

H
Haojun Liao 已提交
4523 4524 4525 4526
static int16_t getNumOfFinalResCol(SQuery* pQuery) {
  return pQuery->pExpr2 == NULL? pQuery->numOfOutput:pQuery->numOfExpr2;
}

4527
static void doCopyQueryResultToMsg(SQInfo *pQInfo, int32_t numOfRows, char *data) {
4528
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
4529

H
Haojun Liao 已提交
4530 4531
  if (pQuery->pExpr2 == NULL) {
    for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
H
Haojun Liao 已提交
4532
      int32_t bytes = pQuery->pExpr1[col].bytes;
4533

H
Haojun Liao 已提交
4534 4535 4536 4537 4538 4539 4540 4541 4542 4543
      memmove(data, pQuery->sdata[col]->data, bytes * numOfRows);
      data += bytes * numOfRows;
    }
  } else {
    for (int32_t col = 0; col < pQuery->numOfExpr2; ++col) {
      int32_t bytes = pQuery->pExpr2[col].bytes;

      memmove(data, pQuery->sdata[col]->data, bytes * numOfRows);
      data += bytes * numOfRows;
    }
4544
  }
4545

H
Haojun Liao 已提交
4546
  int32_t numOfTables = (int32_t) taosHashGetSize(pQInfo->arrTableIdInfo);
weixin_48148422's avatar
weixin_48148422 已提交
4547 4548
  *(int32_t*)data = htonl(numOfTables);
  data += sizeof(int32_t);
4549 4550 4551

  STableIdInfo* item = taosHashIterate(pQInfo->arrTableIdInfo, NULL);
  while(item) {
weixin_48148422's avatar
weixin_48148422 已提交
4552
    STableIdInfo* pDst = (STableIdInfo*)data;
4553 4554 4555 4556
    pDst->uid = htobe64(item->uid);
    pDst->tid = htonl(item->tid);
    pDst->key = htobe64(item->key);

weixin_48148422's avatar
weixin_48148422 已提交
4557
    data += sizeof(STableIdInfo);
4558
    item = taosHashIterate(pQInfo->arrTableIdInfo, item);
weixin_48148422's avatar
weixin_48148422 已提交
4559 4560
  }

H
Haojun Liao 已提交
4561
  // Check if query is completed or not for stable query or normal table query respectively.
H
hjxilinx 已提交
4562
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4563
    if (pQInfo->runtimeEnv.stableQuery) {
H
Haojun Liao 已提交
4564
      if (IS_STASBLE_QUERY_OVER(pQInfo)) {
4565 4566 4567
        setQueryStatus(pQuery, QUERY_OVER);
      }
    } else {
H
Haojun Liao 已提交
4568
      if (!queryHasRemainResForTableQuery(&pQInfo->runtimeEnv)) {
4569 4570
        setQueryStatus(pQuery, QUERY_OVER);
      }
4571
    }
H
hjxilinx 已提交
4572
  }
4573 4574
}

H
Haojun Liao 已提交
4575
int32_t doFillGapsInResults(SQueryRuntimeEnv* pRuntimeEnv, tFilePage **pDst, int32_t *numOfFilled) {
4576
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
4577
  SQuery *pQuery = pRuntimeEnv->pQuery;
4578
  SFillInfo* pFillInfo = pRuntimeEnv->pFillInfo;
4579

4580
  while (1) {
H
Haojun Liao 已提交
4581
    int32_t ret = (int32_t)taosFillResultDataBlock(pFillInfo, (tFilePage**)pQuery->sdata, (int32_t)pQuery->rec.capacity);
4582

4583
    // todo apply limit output function
4584 4585
    /* reached the start position of according to offset value, return immediately */
    if (pQuery->limit.offset == 0) {
4586
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows", pQInfo, pFillInfo->numOfRows, ret);
4587 4588
      return ret;
    }
4589

4590
    if (pQuery->limit.offset < ret) {
4591
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%" PRId64 ". Discard due to offset, remain:%" PRId64 ", new offset:%d",
4592
             pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, ret - pQuery->limit.offset, 0);
4593

S
TD-1057  
Shengliang Guan 已提交
4594
      ret -= (int32_t)pQuery->limit.offset;
4595
      // todo !!!!there exactly number of interpo is not valid.
4596
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
4597 4598
        memmove(pDst[i]->data, pDst[i]->data + pQuery->pExpr1[i].bytes * pQuery->limit.offset,
                ret * pQuery->pExpr1[i].bytes);
4599
      }
4600

4601 4602 4603
      pQuery->limit.offset = 0;
      return ret;
    } else {
4604
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%" PRId64 ". Discard due to offset, "
B
Bomin Zhang 已提交
4605
             "remain:%d, new offset:%" PRId64, pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, 0,
4606
          pQuery->limit.offset - ret);
4607

4608
      pQuery->limit.offset -= ret;
4609
      pQuery->rec.rows = 0;
4610 4611
      ret = 0;
    }
4612

H
Haojun Liao 已提交
4613
    if (!queryHasRemainResForTableQuery(pRuntimeEnv)) {
4614 4615 4616 4617 4618
      return ret;
    }
  }
}

4619
static void queryCostStatis(SQInfo *pQInfo) {
4620
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4621
  SQueryCostInfo *pSummary = &pRuntimeEnv->summary;
H
Haojun Liao 已提交
4622

H
Haojun Liao 已提交
4623
  uint64_t hashSize = taosHashGetMemSize(pQInfo->runtimeEnv.pResultRowHashTable);
H
Haojun Liao 已提交
4624 4625 4626
  hashSize += taosHashGetMemSize(pQInfo->tableqinfoGroupInfo.map);
  pSummary->hashSize = hashSize;

H
Haojun Liao 已提交
4627 4628 4629
  // add the merge time
  pSummary->elapsedTime += pSummary->firstStageMergeTime;

H
Haojun Liao 已提交
4630 4631 4632
  SResultRowPool* p = pQInfo->runtimeEnv.pool;
  pSummary->winInfoSize = getResultRowPoolMemSize(p);
  pSummary->numOfTimeWindows = getNumOfAllocatedResultRows(p);
4633

H
Haojun Liao 已提交
4634 4635 4636
  qDebug("QInfo:%p :cost summary: elapsed time:%"PRId64" us, first merge:%"PRId64" us, total blocks:%d, "
         "load block statis:%d, load data block:%d, total rows:%"PRId64 ", check rows:%"PRId64,
         pQInfo, pSummary->elapsedTime, pSummary->firstStageMergeTime, pSummary->totalBlocks, pSummary->loadBlockStatis,
H
Haojun Liao 已提交
4637
         pSummary->loadBlocks, pSummary->totalRows, pSummary->totalCheckedRows);
4638

4639
  qDebug("QInfo:%p :cost summary: winResPool size:%.2f Kb, numOfWin:%"PRId64", tableInfoSize:%.2f Kb, hashTable:%.2f Kb", pQInfo, pSummary->winInfoSize/1024.0,
H
Haojun Liao 已提交
4640
      pSummary->numOfTimeWindows, pSummary->tableInfoSize/1024.0, pSummary->hashSize/1024.0);
4641 4642
}

4643 4644
static void updateOffsetVal(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pBlockInfo) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
4645
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4646

4647
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
4648

4649
  if (pQuery->limit.offset == pBlockInfo->rows) {  // current block will ignore completed
H
hjxilinx 已提交
4650
    pTableQueryInfo->lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->window.ekey + step : pBlockInfo->window.skey + step;
4651 4652 4653
    pQuery->limit.offset = 0;
    return;
  }
4654

4655
  if (QUERY_IS_ASC_QUERY(pQuery)) {
S
TD-1057  
Shengliang Guan 已提交
4656
    pQuery->pos = (int32_t)pQuery->limit.offset;
4657
  } else {
S
TD-1057  
Shengliang Guan 已提交
4658
    pQuery->pos = pBlockInfo->rows - (int32_t)pQuery->limit.offset - 1;
4659
  }
4660

4661
  assert(pQuery->pos >= 0 && pQuery->pos <= pBlockInfo->rows - 1);
4662

4663
  SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
4664
  SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);
4665

4666
  // update the pQuery->limit.offset value, and pQuery->pos value
H
Haojun Liao 已提交
4667
  TSKEY *keys = (TSKEY *) pColInfoData->pData;
4668 4669

  // update the offset value
H
hjxilinx 已提交
4670
  pTableQueryInfo->lastKey = keys[pQuery->pos];
4671
  pQuery->limit.offset = 0;
4672

H
hjxilinx 已提交
4673
  int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, pBlockInfo, NULL, binarySearchForKey, pDataBlock);
4674

4675
  qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
4676
         pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows, numOfRes, pQuery->current->lastKey);
4677
}
4678

4679 4680 4681 4682 4683
void skipBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;

  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0) {
    return;
4684
  }
4685

4686 4687 4688
  pQuery->pos = 0;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

H
hjxilinx 已提交
4689
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4690
  TsdbQueryHandleT pQueryHandle = pRuntimeEnv->pQueryHandle;
4691

H
Haojun Liao 已提交
4692
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4693
  while (tsdbNextDataBlock(pQueryHandle)) {
4694
    if (isQueryKilled(GET_QINFO_ADDR(pRuntimeEnv))) {
H
Haojun Liao 已提交
4695
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4696
    }
4697

H
Haojun Liao 已提交
4698
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
4699

4700 4701
    if (pQuery->limit.offset > blockInfo.rows) {
      pQuery->limit.offset -= blockInfo.rows;
H
hjxilinx 已提交
4702 4703
      pTableQueryInfo->lastKey = (QUERY_IS_ASC_QUERY(pQuery)) ? blockInfo.window.ekey : blockInfo.window.skey;
      pTableQueryInfo->lastKey += step;
4704

4705
      qDebug("QInfo:%p skip rows:%d, offset:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv), blockInfo.rows,
4706 4707
             pQuery->limit.offset);
    } else {  // find the appropriated start position in current block
4708 4709 4710
      updateOffsetVal(pRuntimeEnv, &blockInfo);
      break;
    }
4711
  }
H
Haojun Liao 已提交
4712 4713 4714 4715

  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }
4716
}
4717

H
Haojun Liao 已提交
4718 4719
static TSKEY doSkipIntervalProcess(SQueryRuntimeEnv* pRuntimeEnv, STimeWindow* win, SDataBlockInfo* pBlockInfo, STableQueryInfo* pTableQueryInfo) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
4720
  SResultRowInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767

  assert(pQuery->limit.offset == 0);
  STimeWindow tw = *win;
  getNextTimeWindow(pQuery, &tw);

  if ((tw.skey <= pBlockInfo->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (tw.ekey >= pBlockInfo->window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {

    // load the data block and check data remaining in current data block
    // TODO optimize performance
    SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
    SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

    tw = *win;
    int32_t startPos =
        getNextQualifiedWindow(pRuntimeEnv, &tw, pBlockInfo, pColInfoData->pData, binarySearchForKey, -1);
    assert(startPos >= 0);

    // set the abort info
    pQuery->pos = startPos;

    // reset the query start timestamp
    pTableQueryInfo->win.skey = ((TSKEY *)pColInfoData->pData)[startPos];
    pQuery->window.skey = pTableQueryInfo->win.skey;
    TSKEY key = pTableQueryInfo->win.skey;

    pWindowResInfo->prevSKey = tw.skey;
    int32_t index = pRuntimeEnv->windowResInfo.curIndex;

    int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, pBlockInfo, NULL, binarySearchForKey, pDataBlock);
    pRuntimeEnv->windowResInfo.curIndex = index;  // restore the window index

    qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%" PRId64,
           GET_QINFO_ADDR(pRuntimeEnv), pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows, numOfRes,
           pQuery->current->lastKey);

    return key;
  } else {  // do nothing
    pQuery->window.skey = tw.skey;
    pWindowResInfo->prevSKey = tw.skey;

    return tw.skey;
  }

  return true;
}

H
Haojun Liao 已提交
4768
static bool skipTimeInterval(SQueryRuntimeEnv *pRuntimeEnv, TSKEY* start) {
4769
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786

  // get the first unclosed time window
  bool assign = false;
  for(int32_t i = 0; i < pRuntimeEnv->windowResInfo.size; ++i) {
    if (pRuntimeEnv->windowResInfo.pResult[i]->closed) {
      continue;
    }

    assign = true;
    *start = pRuntimeEnv->windowResInfo.pResult[i]->win.skey;
  }

  if (!assign) {
    *start = pQuery->current->lastKey;
  }

  assert(*start <= pQuery->current->lastKey);
4787

4788
  // if queried with value filter, do NOT forward query start position
H
Haojun Liao 已提交
4789
  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTsBuf != NULL || pRuntimeEnv->pFillInfo != NULL) {
4790
    return true;
4791
  }
4792

4793
  /*
4794 4795
   * 1. for interval without interpolation query we forward pQuery->interval.interval at a time for
   *    pQuery->limit.offset times. Since hole exists, pQuery->interval.interval*pQuery->limit.offset value is
4796 4797
   *    not valid. otherwise, we only forward pQuery->limit.offset number of points
   */
4798
  assert(pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL);
4799

H
Haojun Liao 已提交
4800
  STimeWindow w = TSWINDOW_INITIALIZER;
4801

H
Haojun Liao 已提交
4802
  SResultRowInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
hjxilinx 已提交
4803
  STableQueryInfo *pTableQueryInfo = pQuery->current;
4804

H
Haojun Liao 已提交
4805
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4806
  while (tsdbNextDataBlock(pRuntimeEnv->pQueryHandle)) {
H
Haojun Liao 已提交
4807
    tsdbRetrieveDataBlockInfo(pRuntimeEnv->pQueryHandle, &blockInfo);
4808

H
Haojun Liao 已提交
4809 4810
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
H
Haojun Liao 已提交
4811
        getAlignQueryTimeWindow(pQuery, blockInfo.window.skey, blockInfo.window.skey, pQuery->window.ekey, &w);
H
Haojun Liao 已提交
4812 4813 4814
        pWindowResInfo->startTime = w.skey;
        pWindowResInfo->prevSKey = w.skey;
      }
4815
    } else {
H
Haojun Liao 已提交
4816
      getAlignQueryTimeWindow(pQuery, blockInfo.window.ekey, pQuery->window.ekey, blockInfo.window.ekey, &w);
4817

4818 4819 4820
      pWindowResInfo->startTime = pQuery->window.skey;
      pWindowResInfo->prevSKey = w.skey;
    }
4821

4822 4823
    // the first time window
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, pWindowResInfo->prevSKey, pQuery);
4824

4825
    while (pQuery->limit.offset > 0) {
H
Haojun Liao 已提交
4826 4827
      STimeWindow tw = win;

4828 4829 4830 4831
      if ((win.ekey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (win.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        pQuery->limit.offset -= 1;
        pWindowResInfo->prevSKey = win.skey;
H
Haojun Liao 已提交
4832
      }
4833

H
Haojun Liao 已提交
4834 4835
      // current window does not ended in current data block, try next data block
      getNextTimeWindow(pQuery, &tw);
4836
      if (pQuery->limit.offset == 0) {
H
Haojun Liao 已提交
4837 4838
        *start = doSkipIntervalProcess(pRuntimeEnv, &win, &blockInfo, pTableQueryInfo);
        return true;
4839 4840
      }

H
Haojun Liao 已提交
4841 4842 4843 4844 4845 4846 4847
      /*
       * If the next time window still starts from current data block,
       * load the primary timestamp column first, and then find the start position for the next queried time window.
       * Note that only the primary timestamp column is required.
       * TODO: Optimize for this cases. All data blocks are not needed to be loaded, only if the first actually required
       * time window resides in current data block.
       */
4848 4849 4850 4851 4852
      if ((tw.skey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (tw.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
        SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

H
Haojun Liao 已提交
4853 4854 4855 4856 4857 4858
        if ((win.ekey > blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
            (win.ekey < blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
          pQuery->limit.offset -= 1;
        }

        if (pQuery->limit.offset == 0) {
H
Haojun Liao 已提交
4859 4860
          *start = doSkipIntervalProcess(pRuntimeEnv, &win, &blockInfo, pTableQueryInfo);
          return true;
H
Haojun Liao 已提交
4861 4862 4863 4864 4865
        } else {
          tw = win;
          int32_t startPos =
              getNextQualifiedWindow(pRuntimeEnv, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey, -1);
          assert(startPos >= 0);
4866

H
Haojun Liao 已提交
4867 4868 4869 4870 4871 4872
          // set the abort info
          pQuery->pos = startPos;
          pTableQueryInfo->lastKey = ((TSKEY *)pColInfoData->pData)[startPos];
          pWindowResInfo->prevSKey = tw.skey;
          win = tw;
        }
4873
      } else {
H
Haojun Liao 已提交
4874
        break;  // offset is not 0, and next time window begins or ends in the next block.
4875 4876 4877
      }
    }
  }
4878

H
Haojun Liao 已提交
4879 4880 4881 4882 4883
  // check for error
  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }

4884 4885 4886
  return true;
}

H
Haojun Liao 已提交
4887 4888
static void doDestroyTableQueryInfo(STableGroupInfo* pTableqinfoGroupInfo);

B
Bomin Zhang 已提交
4889
static int32_t setupQueryHandle(void* tsdb, SQInfo* pQInfo, bool isSTableQuery) {
B
Bomin Zhang 已提交
4890
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4891 4892
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

B
Bomin Zhang 已提交
4893
  if (onlyQueryTags(pQuery)) {
B
Bomin Zhang 已提交
4894
    return TSDB_CODE_SUCCESS;
B
Bomin Zhang 已提交
4895 4896
  }

H
Haojun Liao 已提交
4897
  if (isSTableQuery && (!QUERY_IS_INTERVAL_QUERY(pQuery)) && (!isFixedOutputQuery(pRuntimeEnv))) {
B
Bomin Zhang 已提交
4898
    return TSDB_CODE_SUCCESS;
B
Bomin Zhang 已提交
4899
  }
4900

H
Haojun Liao 已提交
4901
  STsdbQueryCond cond = createTsdbQueryCond(pQuery, &pQuery->window);
S
TD-1057  
Shengliang Guan 已提交
4902

B
Bomin Zhang 已提交
4903
  if (!isSTableQuery
4904
    && (pQInfo->tableqinfoGroupInfo.numOfTables == 1)
4905
    && (cond.order == TSDB_ORDER_ASC)
H
Haojun Liao 已提交
4906
    && (!QUERY_IS_INTERVAL_QUERY(pQuery))
B
Bomin Zhang 已提交
4907
    && (!isGroupbyNormalCol(pQuery->pGroupbyExpr))
H
Haojun Liao 已提交
4908
    && (!isFixedOutputQuery(pRuntimeEnv))
B
Bomin Zhang 已提交
4909
  ) {
H
Haojun Liao 已提交
4910
    SArray* pa = GET_TABLEGROUP(pQInfo, 0);
4911 4912
    STableQueryInfo* pCheckInfo = taosArrayGetP(pa, 0);
    cond.twindow = pCheckInfo->win;
4913
  }
B
Bomin Zhang 已提交
4914

B
Bomin Zhang 已提交
4915
  terrno = TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
4916
  if (isFirstLastRowQuery(pQuery)) {
H
Haojun Liao 已提交
4917
    pRuntimeEnv->pQueryHandle = tsdbQueryLastRow(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef);
H
Haojun Liao 已提交
4918

H
Haojun Liao 已提交
4919 4920 4921
    // update the query time window
    pQuery->window = cond.twindow;

H
Haojun Liao 已提交
4922
    if (pQInfo->tableGroupInfo.numOfTables == 0) {
H
Haojun Liao 已提交
4923
      pQInfo->tableqinfoGroupInfo.numOfTables = 0;
H
Haojun Liao 已提交
4924 4925 4926 4927
    } else {
      size_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
      for(int32_t i = 0; i < numOfGroups; ++i) {
        SArray *group = GET_TABLEGROUP(pQInfo, i);
H
Haojun Liao 已提交
4928

H
Haojun Liao 已提交
4929 4930 4931
        size_t t = taosArrayGetSize(group);
        for (int32_t j = 0; j < t; ++j) {
          STableQueryInfo *pCheckInfo = taosArrayGetP(group, j);
H
Haojun Liao 已提交
4932

H
Haojun Liao 已提交
4933 4934 4935
          pCheckInfo->win = pQuery->window;
          pCheckInfo->lastKey = pCheckInfo->win.skey;
        }
H
Haojun Liao 已提交
4936 4937
      }
    }
4938
  } else if (isPointInterpoQuery(pQuery)) {
H
Haojun Liao 已提交
4939
    pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef);
H
Haojun Liao 已提交
4940
  } else {
H
Haojun Liao 已提交
4941
    pRuntimeEnv->pQueryHandle = tsdbQueryTables(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef);
H
Haojun Liao 已提交
4942
  }
4943

B
Bomin Zhang 已提交
4944
  return terrno;
B
Bomin Zhang 已提交
4945 4946
}

H
Haojun Liao 已提交
4947
static SFillColInfo* createFillColInfo(SQuery* pQuery) {
H
Haojun Liao 已提交
4948
  int32_t numOfCols = getNumOfFinalResCol(pQuery);
4949
  int32_t offset = 0;
4950

4951
  SFillColInfo* pFillCol = calloc(numOfCols, sizeof(SFillColInfo));
H
Haojun Liao 已提交
4952 4953 4954 4955
  if (pFillCol == NULL) {
    return NULL;
  }

H
Haojun Liao 已提交
4956
  // TODO refactor
4957
  for(int32_t i = 0; i < numOfCols; ++i) {
H
Haojun Liao 已提交
4958
    SExprInfo* pExprInfo = (pQuery->pExpr2 == NULL)? &pQuery->pExpr1[i]:&pQuery->pExpr2[i];
4959

4960
    pFillCol[i].col.bytes  = pExprInfo->bytes;
S
TD-1057  
Shengliang Guan 已提交
4961
    pFillCol[i].col.type   = (int8_t)pExprInfo->type;
4962
    pFillCol[i].col.offset = offset;
H
Haojun Liao 已提交
4963
    pFillCol[i].tagIndex   = -2;
4964 4965
    pFillCol[i].flag       = TSDB_COL_NORMAL;    // always be ta normal column for table query
    pFillCol[i].functionId = pExprInfo->base.functionId;
4966
    pFillCol[i].fillVal.i = pQuery->fillVal[i];
4967

4968 4969
    offset += pExprInfo->bytes;
  }
4970

4971 4972 4973
  return pFillCol;
}

4974
int32_t doInitQInfo(SQInfo *pQInfo, STSBuf *pTsBuf, void *tsdb, int32_t vgId, bool isSTableQuery) {
4975 4976 4977
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;

  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
4978 4979 4980

  pRuntimeEnv->topBotQuery = isTopBottomQuery(pQuery);
  pRuntimeEnv->hasTagResults = hasTagValOutput(pQuery);
4981
  pRuntimeEnv->timeWindowInterpo = timeWindowInterpoRequired(pQuery);
4982 4983

  setScanLimitationByResultBuffer(pQuery);
H
Haojun Liao 已提交
4984

H
Haojun Liao 已提交
4985
  int32_t code = setupQueryHandle(tsdb, pQInfo, isSTableQuery);
B
Bomin Zhang 已提交
4986 4987 4988
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }
4989

4990
  pQInfo->tsdb = tsdb;
4991
  pQInfo->vgId = vgId;
4992 4993

  pRuntimeEnv->pQuery = pQuery;
H
Haojun Liao 已提交
4994
  pRuntimeEnv->pTsBuf = pTsBuf;
4995
  pRuntimeEnv->cur.vgroupIndex = -1;
4996
  pRuntimeEnv->stableQuery = isSTableQuery;
H
Haojun Liao 已提交
4997
  pRuntimeEnv->prevGroupId = INT32_MIN;
H
Haojun Liao 已提交
4998
  pRuntimeEnv->groupbyNormalCol = isGroupbyNormalCol(pQuery->pGroupbyExpr);
4999

H
Haojun Liao 已提交
5000
  if (pTsBuf != NULL) {
H
Haojun Liao 已提交
5001 5002
    int16_t order = (pQuery->order.order == pRuntimeEnv->pTsBuf->tsOrder) ? TSDB_ORDER_ASC : TSDB_ORDER_DESC;
    tsBufSetTraverseOrder(pRuntimeEnv->pTsBuf, order);
5003 5004
  }

5005 5006 5007
  int32_t ps = DEFAULT_PAGE_SIZE;
  int32_t rowsize = 0;
  getIntermediateBufInfo(pRuntimeEnv, &ps, &rowsize);
H
Haojun Liao 已提交
5008
  int32_t TENMB = 1024*1024*10;
5009

H
Haojun Liao 已提交
5010
  if (isSTableQuery && !onlyQueryTags(pRuntimeEnv->pQuery)) {
H
Haojun Liao 已提交
5011
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rowsize, ps, TENMB, pQInfo);
5012 5013 5014 5015
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

H
Haojun Liao 已提交
5016
    if (!QUERY_IS_INTERVAL_QUERY(pQuery)) {
5017
      int16_t type = TSDB_DATA_TYPE_NULL;
H
Haojun Liao 已提交
5018
      if (pRuntimeEnv->groupbyNormalCol) {  // group by columns not tags;
5019 5020 5021 5022 5023
        type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
      } else {
        type = TSDB_DATA_TYPE_INT;  // group id
      }

H
Haojun Liao 已提交
5024
      code = initResultRowInfo(&pRuntimeEnv->windowResInfo, 8, type);
B
Bomin Zhang 已提交
5025 5026 5027
      if (code != TSDB_CODE_SUCCESS) {
        return code;
      }
5028
    }
H
Haojun Liao 已提交
5029
  } else if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery) || (!isSTableQuery)) {
5030 5031
    int32_t numOfResultRows = getInitialPageNum(pQInfo);
    getIntermediateBufInfo(pRuntimeEnv, &ps, &rowsize);
H
Haojun Liao 已提交
5032
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rowsize, ps, TENMB, pQInfo);
5033 5034 5035 5036 5037
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

    int16_t type = TSDB_DATA_TYPE_NULL;
H
Haojun Liao 已提交
5038
    if (pRuntimeEnv->groupbyNormalCol) {
5039 5040 5041 5042 5043
      type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
    } else {
      type = TSDB_DATA_TYPE_TIMESTAMP;
    }

H
Haojun Liao 已提交
5044
    code = initResultRowInfo(&pRuntimeEnv->windowResInfo, numOfResultRows, type);
B
Bomin Zhang 已提交
5045 5046 5047
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }
5048 5049
  }

H
Haojun Liao 已提交
5050 5051 5052 5053 5054 5055
  // create runtime environment
  code = setupQueryRuntimeEnv(pRuntimeEnv, pQuery->order.order);
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }

5056
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
H
Haojun Liao 已提交
5057
    SFillColInfo* pColInfo = createFillColInfo(pQuery);
H
Haojun Liao 已提交
5058 5059 5060 5061 5062 5063
    STimeWindow w = TSWINDOW_INITIALIZER;

    TSKEY sk = MIN(pQuery->window.skey, pQuery->window.ekey);
    TSKEY ek = MAX(pQuery->window.skey, pQuery->window.ekey);
    getAlignQueryTimeWindow(pQuery, pQuery->window.skey, sk, ek, &w);

H
Haojun Liao 已提交
5064
    int32_t numOfCols = getNumOfFinalResCol(pQuery);
H
Haojun Liao 已提交
5065
    pRuntimeEnv->pFillInfo = taosInitFillInfo(pQuery->order.order, w.skey, 0, (int32_t)pQuery->rec.capacity, numOfCols,
5066
                                              pQuery->interval.sliding, pQuery->interval.slidingUnit, (int8_t)pQuery->precision,
H
Haojun Liao 已提交
5067
                                              pQuery->fillType, pColInfo, pQInfo);
5068
  }
5069

H
Haojun Liao 已提交
5070
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
5071
  return TSDB_CODE_SUCCESS;
5072 5073
}

5074
static void enableExecutionForNextTable(SQueryRuntimeEnv *pRuntimeEnv) {
5075
  SQuery *pQuery = pRuntimeEnv->pQuery;
5076

5077
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
5078
    SResultRowCellInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[i]);
5079 5080 5081 5082 5083 5084
    if (pResInfo != NULL) {
      pResInfo->complete = false;
    }
  }
}

H
Haojun Liao 已提交
5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095
static FORCE_INLINE void setEnvForEachBlock(SQInfo* pQInfo, STableQueryInfo* pTableQueryInfo, SDataBlockInfo* pBlockInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

  if (!QUERY_IS_INTERVAL_QUERY(pQuery)) {
    setExecutionContext(pQInfo, pTableQueryInfo->groupIndex, pBlockInfo->window.ekey + step);
  } else {  // interval query
    TSKEY nextKey = pBlockInfo->window.skey;
    setIntervalQueryRange(pQInfo, nextKey);

H
Haojun Liao 已提交
5096
    if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTsBuf != NULL) {
H
Haojun Liao 已提交
5097 5098 5099 5100 5101
      setAdditionalInfo(pQInfo, pTableQueryInfo->pTable, pTableQueryInfo);
    }
  }
}

H
Haojun Liao 已提交
5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115
static void doTableQueryInfoTimeWindowCheck(SQuery* pQuery, STableQueryInfo* pTableQueryInfo) {
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    assert(
        (pTableQueryInfo->win.skey <= pTableQueryInfo->win.ekey) &&
        (pTableQueryInfo->lastKey >= pTableQueryInfo->win.skey) &&
        (pTableQueryInfo->win.skey >= pQuery->window.skey && pTableQueryInfo->win.ekey <= pQuery->window.ekey));
  } else {
    assert(
        (pTableQueryInfo->win.skey >= pTableQueryInfo->win.ekey) &&
        (pTableQueryInfo->lastKey <= pTableQueryInfo->win.skey) &&
        (pTableQueryInfo->win.skey <= pQuery->window.skey && pTableQueryInfo->win.ekey >= pQuery->window.ekey));
  }
}

H
Haojun Liao 已提交
5116
static int64_t scanMultiTableDataBlocks(SQInfo *pQInfo) {
5117
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
Haojun Liao 已提交
5118 5119
  SQuery*           pQuery = pRuntimeEnv->pQuery;
  SQueryCostInfo*   summary  = &pRuntimeEnv->summary;
H
Haojun Liao 已提交
5120

H
hjxilinx 已提交
5121
  int64_t st = taosGetTimestampMs();
5122

5123
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
H
Haojun Liao 已提交
5124
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
5125

H
Haojun Liao 已提交
5126 5127
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

H
Haojun Liao 已提交
5128
  while (tsdbNextDataBlock(pQueryHandle)) {
5129
    summary->totalBlocks += 1;
H
Haojun Liao 已提交
5130

5131
    if (isQueryKilled(pQInfo)) {
H
Haojun Liao 已提交
5132
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
5133
    }
5134

H
Haojun Liao 已提交
5135
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
H
Haojun Liao 已提交
5136 5137 5138 5139
    STableQueryInfo **pTableQueryInfo = (STableQueryInfo**) taosHashGet(pQInfo->tableqinfoGroupInfo.map, &blockInfo.tid, sizeof(blockInfo.tid));
    if(pTableQueryInfo == NULL) {
      break;
    }
5140

H
Haojun Liao 已提交
5141
    pQuery->current = *pTableQueryInfo;
H
Haojun Liao 已提交
5142
    doTableQueryInfoTimeWindowCheck(pQuery, *pTableQueryInfo);
5143

H
Haojun Liao 已提交
5144
    if (!pRuntimeEnv->groupbyNormalCol) {
H
Haojun Liao 已提交
5145
      setEnvForEachBlock(pQInfo, *pTableQueryInfo, &blockInfo);
5146
    }
5147

5148
    uint32_t     status = 0;
H
Haojun Liao 已提交
5149 5150
    SDataStatis *pStatis = NULL;
    SArray      *pDataBlock = NULL;
5151

H
Haojun Liao 已提交
5152
    int32_t ret = loadDataBlockOnDemand(pRuntimeEnv, &pQuery->current->windowResInfo, pQueryHandle, &blockInfo, &pStatis, &pDataBlock, &status);
5153 5154 5155 5156 5157
    if (ret != TSDB_CODE_SUCCESS) {
      break;
    }

    if (status == BLK_DATA_DISCARD) {
H
Haojun Liao 已提交
5158
      pQuery->current->lastKey = QUERY_IS_ASC_QUERY(pQuery)? blockInfo.window.ekey + step : blockInfo.window.skey + step;
H
Haojun Liao 已提交
5159 5160 5161
      continue;
    }

5162 5163
    summary->totalRows += blockInfo.rows;
    stableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, pDataBlock, binarySearchForKey);
H
Haojun Liao 已提交
5164

H
Haojun Liao 已提交
5165 5166 5167 5168
    qDebug("QInfo:%p check data block completed, uid:%"PRId64", tid:%d, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, "
           "lastKey:%" PRId64,
           pQInfo, blockInfo.uid, blockInfo.tid, blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows,
           pQuery->current->lastKey);
5169
  }
5170

H
Haojun Liao 已提交
5171 5172 5173 5174
  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }

H
Haojun Liao 已提交
5175 5176
  updateWindowResNumOfRes(pRuntimeEnv);

H
hjxilinx 已提交
5177 5178
  int64_t et = taosGetTimestampMs();
  return et - st;
5179 5180
}

5181 5182
static bool multiTableMultioutputHelper(SQInfo *pQInfo, int32_t index) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
5183
  SQuery *          pQuery = pRuntimeEnv->pQuery;
5184

5185
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
H
Haojun Liao 已提交
5186
  SArray *group = GET_TABLEGROUP(pQInfo, 0);
5187
  STableQueryInfo* pCheckInfo = taosArrayGetP(group, index);
5188

H
Haojun Liao 已提交
5189
  if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTsBuf != NULL) {
H
Haojun Liao 已提交
5190 5191
    setTagVal(pRuntimeEnv, pCheckInfo->pTable, pQInfo->tsdb);
  }
5192

H
Haojun Liao 已提交
5193
  STableId* id = TSDB_TABLEID(pCheckInfo->pTable);
5194
  qDebug("QInfo:%p query on (%d): uid:%" PRIu64 ", tid:%d, qrange:%" PRId64 "-%" PRId64, pQInfo, index,
H
Haojun Liao 已提交
5195
         id->uid, id->tid, pCheckInfo->lastKey, pCheckInfo->win.ekey);
5196

5197
  STsdbQueryCond cond = {
5198
      .twindow   = {pCheckInfo->lastKey, pCheckInfo->win.ekey},
H
hjxilinx 已提交
5199 5200
      .order     = pQuery->order.order,
      .colList   = pQuery->colList,
5201
      .numOfCols = pQuery->numOfCols,
5202
  };
5203

H
hjxilinx 已提交
5204
  // todo refactor
5205
  SArray *g1 = taosArrayInit(1, POINTER_BYTES);
H
Haojun Liao 已提交
5206 5207 5208 5209
  SArray *tx = taosArrayInit(1, sizeof(STableKeyInfo));

  STableKeyInfo info = {.pTable = pCheckInfo->pTable, .lastKey = pCheckInfo->lastKey};
  taosArrayPush(tx, &info);
5210

5211
  taosArrayPush(g1, &tx);
5212
  STableGroupInfo gp = {.numOfTables = 1, .pGroupList = g1};
5213

5214
  // include only current table
5215 5216 5217 5218
  if (pRuntimeEnv->pQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
    pRuntimeEnv->pQueryHandle = NULL;
  }
5219

H
Haojun Liao 已提交
5220
  pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo, &pQInfo->memRef);
5221 5222
  taosArrayDestroy(tx);
  taosArrayDestroy(g1);
B
Bomin Zhang 已提交
5223 5224 5225
  if (pRuntimeEnv->pQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
5226

H
Haojun Liao 已提交
5227
  if (pRuntimeEnv->pTsBuf != NULL) {
H
Haojun Liao 已提交
5228 5229
      tVariant* pTag = &pRuntimeEnv->pCtx[0].tag;

5230
    if (pRuntimeEnv->cur.vgroupIndex == -1) {
H
Haojun Liao 已提交
5231
      STSElem elem = tsBufGetElemStartPos(pRuntimeEnv->pTsBuf, pQInfo->vgId, pTag);
H
Haojun Liao 已提交
5232
      // failed to find data with the specified tag value and vnodeId
5233
      if (!tsBufIsValidElem(&elem)) {
H
Haojun Liao 已提交
5234 5235 5236 5237 5238 5239
        if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
          qError("QInfo:%p failed to find tag:%s in ts_comp", pQInfo, pTag->pz);
        } else {
          qError("QInfo:%p failed to find tag:%"PRId64" in ts_comp", pQInfo, pTag->i64Key);
        }

5240
        return false;
H
Haojun Liao 已提交
5241
      } else {
H
Haojun Liao 已提交
5242
        STSCursor cur = tsBufGetCursor(pRuntimeEnv->pTsBuf);
H
Haojun Liao 已提交
5243 5244 5245 5246 5247 5248 5249 5250

        if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
          qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz,
                 cur.blockIndex, cur.tsIndex);
        } else {
          qDebug("QInfo:%p find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key,
                 cur.blockIndex, cur.tsIndex);
        }
5251 5252
      }
    } else {
H
Haojun Liao 已提交
5253
      STSElem elem = tsBufGetElem(pRuntimeEnv->pTsBuf);
H
Haojun Liao 已提交
5254
      if (tVariantCompare(elem.tag, &pRuntimeEnv->pCtx[0].tag) != 0) {
H
Haojun Liao 已提交
5255

H
Haojun Liao 已提交
5256
        STSElem elem1 = tsBufGetElemStartPos(pRuntimeEnv->pTsBuf, pQInfo->vgId, pTag);
H
Haojun Liao 已提交
5257
        // failed to find data with the specified tag value and vnodeId
5258
        if (!tsBufIsValidElem(&elem1)) {
H
Haojun Liao 已提交
5259 5260 5261 5262 5263
          if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
            qError("QInfo:%p failed to find tag:%s in ts_comp", pQInfo, pTag->pz);
          } else {
            qError("QInfo:%p failed to find tag:%"PRId64" in ts_comp", pQInfo, pTag->i64Key);
          }
H
Haojun Liao 已提交
5264

H
Haojun Liao 已提交
5265
          return false;
H
Haojun Liao 已提交
5266
        } else {
H
Haojun Liao 已提交
5267
          STSCursor cur = tsBufGetCursor(pRuntimeEnv->pTsBuf);
H
Haojun Liao 已提交
5268 5269 5270 5271 5272
          if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
            qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, cur.blockIndex, cur.tsIndex);
          } else {
            qDebug("QInfo:%p find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key, cur.blockIndex, cur.tsIndex);
          }
H
Haojun Liao 已提交
5273
        }
H
Haojun Liao 已提交
5274

H
Haojun Liao 已提交
5275
      } else {
H
Haojun Liao 已提交
5276 5277
        tsBufSetCursor(pRuntimeEnv->pTsBuf, &pRuntimeEnv->cur);
        STSCursor cur = tsBufGetCursor(pRuntimeEnv->pTsBuf);
H
Haojun Liao 已提交
5278 5279 5280 5281 5282
        if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
          qDebug("QInfo:%p continue scan ts_comp file, tag:%s blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, cur.blockIndex, cur.tsIndex);
        } else {
          qDebug("QInfo:%p continue scan ts_comp file, tag:%"PRId64" blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key, cur.blockIndex, cur.tsIndex);
        }
H
Haojun Liao 已提交
5283
      }
5284 5285
    }
  }
5286

5287
  initCtxOutputBuf(pRuntimeEnv);
5288 5289 5290
  return true;
}

H
Haojun Liao 已提交
5291
STsdbQueryCond createTsdbQueryCond(SQuery* pQuery, STimeWindow* win) {
5292 5293 5294 5295 5296 5297
  STsdbQueryCond cond = {
      .colList   = pQuery->colList,
      .order     = pQuery->order.order,
      .numOfCols = pQuery->numOfCols,
  };

H
Haojun Liao 已提交
5298
  TIME_WINDOW_COPY(cond.twindow, *win);
5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325
  return cond;
}

static STableIdInfo createTableIdInfo(SQuery* pQuery) {
  assert(pQuery != NULL && pQuery->current != NULL);

  STableIdInfo tidInfo;
  STableId* id = TSDB_TABLEID(pQuery->current->pTable);

  tidInfo.uid = id->uid;
  tidInfo.tid = id->tid;
  tidInfo.key = pQuery->current->lastKey;

  return tidInfo;
}

static void updateTableIdInfo(SQuery* pQuery, SHashObj* pTableIdInfo) {
  STableIdInfo tidInfo = createTableIdInfo(pQuery);
  STableIdInfo* idinfo = taosHashGet(pTableIdInfo, &tidInfo.tid, sizeof(tidInfo.tid));
  if (idinfo != NULL) {
    assert(idinfo->tid == tidInfo.tid && idinfo->uid == tidInfo.uid);
    idinfo->key = tidInfo.key;
  } else {
    taosHashPut(pTableIdInfo, &tidInfo.tid, sizeof(tidInfo.tid), &tidInfo, sizeof(STableIdInfo));
  }
}

5326 5327 5328 5329 5330 5331 5332
/**
 * super table query handler
 * 1. super table projection query, group-by on normal columns query, ts-comp query
 * 2. point interpolation query, last row query
 *
 * @param pQInfo
 */
5333
static void sequentialTableProcess(SQInfo *pQInfo) {
5334
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
5335
  SQuery *          pQuery = pRuntimeEnv->pQuery;
5336
  setQueryStatus(pQuery, QUERY_COMPLETED);
5337

H
Haojun Liao 已提交
5338
  size_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
5339

5340
  if (isPointInterpoQuery(pQuery)) {
H
Haojun Liao 已提交
5341
    resetDefaultResInfoOutputBuf(pRuntimeEnv);
5342
    assert(pQuery->limit.offset == 0 && pQuery->limit.limit != 0);
5343

5344
    while (pQInfo->groupIndex < numOfGroups) {
5345
      SArray *group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex);
H
Haojun Liao 已提交
5346

5347 5348
      qDebug("QInfo:%p point interpolation query on group:%d, total group:%" PRIzu ", current group:%p", pQInfo,
             pQInfo->groupIndex, numOfGroups, group);
H
Haojun Liao 已提交
5349
      STsdbQueryCond cond = createTsdbQueryCond(pQuery, &pQuery->window);
S
TD-1057  
Shengliang Guan 已提交
5350

H
Haojun Liao 已提交
5351 5352 5353
      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);
5354

H
Haojun Liao 已提交
5355 5356 5357 5358 5359 5360 5361
      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }
5362

H
Haojun Liao 已提交
5363
      pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(pQInfo->tsdb, &cond, &gp, pQInfo, &pQInfo->memRef);
B
Bomin Zhang 已提交
5364 5365 5366 5367 5368 5369

      taosArrayDestroy(tx);
      taosArrayDestroy(g1);
      if (pRuntimeEnv->pQueryHandle == NULL) {
        longjmp(pRuntimeEnv->env, terrno);
      }
H
Haojun Liao 已提交
5370

H
Haojun Liao 已提交
5371
      initCtxOutputBuf(pRuntimeEnv);
5372

5373
      SArray *s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle);
5374
      assert(taosArrayGetSize(s) >= 1);
5375

5376
      setTagVal(pRuntimeEnv, taosArrayGetP(s, 0), pQInfo->tsdb);
dengyihao's avatar
dengyihao 已提交
5377
      taosArrayDestroy(s);
H
Haojun Liao 已提交
5378

H
Haojun Liao 已提交
5379
      // here we simply set the first table as current table
5380
      SArray *first = GET_TABLEGROUP(pQInfo, pQInfo->groupIndex);
5381 5382
      pQuery->current = taosArrayGetP(first, 0);

5383
      scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
5384

H
Haojun Liao 已提交
5385 5386 5387 5388 5389
      int64_t numOfRes = getNumOfResult(pRuntimeEnv);
      if (numOfRes > 0) {
        pQuery->rec.rows += numOfRes;
        forwardCtxOutputBuf(pRuntimeEnv, numOfRes);
      }
5390

H
Haojun Liao 已提交
5391 5392 5393 5394 5395
      skipResults(pRuntimeEnv);
      pQInfo->groupIndex += 1;

      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
5396 5397 5398 5399 5400 5401

      if (pQuery->rec.rows >= pQuery->rec.capacity) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
        break;
      }
    }
5402
  } else if (pRuntimeEnv->groupbyNormalCol) {  // group-by on normal columns query
5403
    while (pQInfo->groupIndex < numOfGroups) {
5404
      SArray *group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex);
5405

5406 5407
      qDebug("QInfo:%p group by normal columns group:%d, total group:%" PRIzu "", pQInfo, pQInfo->groupIndex,
             numOfGroups);
5408

H
Haojun Liao 已提交
5409
      STsdbQueryCond cond = createTsdbQueryCond(pQuery, &pQuery->window);
S
TD-1057  
Shengliang Guan 已提交
5410

5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422
      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);

      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }

H
Haojun Liao 已提交
5423
      // no need to update the lastkey for each table
H
Haojun Liao 已提交
5424
      pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo, &pQInfo->memRef);
H
Haojun Liao 已提交
5425

B
Bomin Zhang 已提交
5426 5427
      taosArrayDestroy(g1);
      taosArrayDestroy(tx);
B
Bomin Zhang 已提交
5428 5429 5430
      if (pRuntimeEnv->pQueryHandle == NULL) {
        longjmp(pRuntimeEnv->env, terrno);
      }
5431

5432
      SArray *s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle);
5433 5434
      assert(taosArrayGetSize(s) >= 1);

5435
      setTagVal(pRuntimeEnv, taosArrayGetP(s, 0), pQInfo->tsdb);
5436 5437 5438 5439 5440

      // here we simply set the first table as current table
      scanMultiTableDataBlocks(pQInfo);
      pQInfo->groupIndex += 1;

H
Haojun Liao 已提交
5441
      SResultRowInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
5442

5443
      // no results generated for current group, continue to try the next group
5444
      taosArrayDestroy(s);
5445 5446 5447 5448 5449
      if (pWindowResInfo->size <= 0) {
        continue;
      }

      for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
5450
        pWindowResInfo->pResult[i]->closed = true;  // enable return all results for group by normal columns
5451

H
Haojun Liao 已提交
5452
        SResultRow *pResult = pWindowResInfo->pResult[i];
5453
        for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
5454
          SResultRowCellInfo *pCell = getResultCell(pRuntimeEnv, pResult, j);
5455
          pResult->numOfRows = (uint16_t)(MAX(pResult->numOfRows, pCell->numOfRes));
5456 5457 5458
        }
      }

5459
      qDebug("QInfo:%p generated groupby columns results %d rows for group %d completed", pQInfo, pWindowResInfo->size,
5460
             pQInfo->groupIndex);
5461 5462 5463 5464 5465 5466
      int32_t currentGroupIndex = pQInfo->groupIndex;

      pQuery->rec.rows = 0;
      pQInfo->groupIndex = 0;

      ensureOutputBufferSimple(pRuntimeEnv, pWindowResInfo->size);
5467
      copyFromWindowResToSData(pQInfo, pWindowResInfo);
5468

5469
      pQInfo->groupIndex = currentGroupIndex;  // restore the group index
5470 5471
      assert(pQuery->rec.rows == pWindowResInfo->size);

H
Haojun Liao 已提交
5472
      clearClosedResultRows(pRuntimeEnv, &pRuntimeEnv->windowResInfo);
5473
      break;
5474
    }
H
Haojun Liao 已提交
5475
  } else if (pRuntimeEnv->queryWindowIdentical && pRuntimeEnv->pTsBuf == NULL && !isTSCompQuery(pQuery)) {
5476 5477 5478 5479 5480 5481 5482 5483 5484 5485
    //super table projection query with identical query time range for all tables.
    SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
    resetDefaultResInfoOutputBuf(pRuntimeEnv);

    SArray *group = GET_TABLEGROUP(pQInfo, 0);
    assert(taosArrayGetSize(group) == pQInfo->tableqinfoGroupInfo.numOfTables &&
           1 == taosArrayGetSize(pQInfo->tableqinfoGroupInfo.pGroupList));

    void *pQueryHandle = pRuntimeEnv->pQueryHandle;
    if (pQueryHandle == NULL) {
H
Haojun Liao 已提交
5486
      STsdbQueryCond con = createTsdbQueryCond(pQuery, &pQuery->window);
5487 5488 5489 5490 5491 5492 5493 5494 5495 5496 5497
      pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &con, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef);
      pQueryHandle = pRuntimeEnv->pQueryHandle;
    }

    // skip blocks without load the actual data block from file if no filter condition present
    //    skipBlocks(&pQInfo->runtimeEnv);
    //    if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0) {
    //      setQueryStatus(pQuery, QUERY_COMPLETED);
    //      return;
    //    }

H
Haojun Liao 已提交
5498 5499 5500 5501 5502 5503
    if (pQuery->prjInfo.vgroupLimit != -1) {
      assert(pQuery->limit.limit == -1 && pQuery->limit.offset == 0);
    } else if (pQuery->limit.limit != -1) {
      assert(pQuery->prjInfo.vgroupLimit == -1);
    }

5504
    bool hasMoreBlock = true;
H
Haojun Liao 已提交
5505
    int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
5506 5507 5508 5509
    SQueryCostInfo *summary = &pRuntimeEnv->summary;
    while ((hasMoreBlock = tsdbNextDataBlock(pQueryHandle)) == true) {
      summary->totalBlocks += 1;

5510
      if (isQueryKilled(pQInfo)) {
5511 5512 5513 5514 5515
        longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
      }

      tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
      STableQueryInfo **pTableQueryInfo =
H
Haojun Liao 已提交
5516
          (STableQueryInfo **) taosHashGet(pQInfo->tableqinfoGroupInfo.map, &blockInfo.tid, sizeof(blockInfo.tid));
5517 5518 5519 5520 5521
      if (pTableQueryInfo == NULL) {
        break;
      }

      pQuery->current = *pTableQueryInfo;
H
Haojun Liao 已提交
5522
      doTableQueryInfoTimeWindowCheck(pQuery, *pTableQueryInfo);
5523 5524 5525 5526 5527

      if (pRuntimeEnv->hasTagResults) {
        setTagVal(pRuntimeEnv, pQuery->current->pTable, pQInfo->tsdb);
      }

H
Haojun Liao 已提交
5528 5529 5530 5531 5532 5533 5534 5535 5536 5537 5538 5539 5540 5541 5542 5543 5544 5545 5546
      if (pQuery->prjInfo.vgroupLimit > 0 && pQuery->current->windowResInfo.size > pQuery->prjInfo.vgroupLimit) {
        pQuery->current->lastKey =
                QUERY_IS_ASC_QUERY(pQuery) ? blockInfo.window.ekey + step : blockInfo.window.skey + step;
        continue;
      }

      // it is a super table ordered projection query, check for the number of output for each vgroup
      if (pQuery->prjInfo.vgroupLimit > 0 && pQuery->rec.rows >= pQuery->prjInfo.vgroupLimit) {
        if (QUERY_IS_ASC_QUERY(pQuery) && blockInfo.window.skey >= pQuery->prjInfo.ts) {
          pQuery->current->lastKey =
                  QUERY_IS_ASC_QUERY(pQuery) ? blockInfo.window.ekey + step : blockInfo.window.skey + step;
          continue;
        } else if (!QUERY_IS_ASC_QUERY(pQuery) && blockInfo.window.ekey <= pQuery->prjInfo.ts) {
          pQuery->current->lastKey =
                  QUERY_IS_ASC_QUERY(pQuery) ? blockInfo.window.ekey + step : blockInfo.window.skey + step;
          continue;
        }
      }

5547 5548
      uint32_t     status = 0;
      SDataStatis *pStatis = NULL;
5549
      SArray      *pDataBlock = NULL;
5550 5551 5552 5553 5554 5555 5556

      int32_t ret = loadDataBlockOnDemand(pRuntimeEnv, &pQuery->current->windowResInfo, pQueryHandle, &blockInfo,
                                          &pStatis, &pDataBlock, &status);
      if (ret != TSDB_CODE_SUCCESS) {
        break;
      }

H
Haojun Liao 已提交
5557 5558 5559 5560 5561
      if(status == BLK_DATA_DISCARD) {
        pQuery->current->lastKey =
                QUERY_IS_ASC_QUERY(pQuery) ? blockInfo.window.ekey + step : blockInfo.window.skey + step;
        continue;
      }
5562

H
Haojun Liao 已提交
5563
      ensureOutputBuffer(pRuntimeEnv, &blockInfo);
H
Haojun Liao 已提交
5564 5565
      int64_t prev = getNumOfResult(pRuntimeEnv);

5566 5567 5568 5569 5570 5571 5572 5573 5574 5575
      pQuery->pos = QUERY_IS_ASC_QUERY(pQuery) ? 0 : blockInfo.rows - 1;
      int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, binarySearchForKey, pDataBlock);

      summary->totalRows += blockInfo.rows;
      qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%" PRId64,
             GET_QINFO_ADDR(pRuntimeEnv), blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes,
             pQuery->current->lastKey);

      pQuery->rec.rows = getNumOfResult(pRuntimeEnv);

H
Haojun Liao 已提交
5576
      int64_t inc = pQuery->rec.rows - prev;
H
Haojun Liao 已提交
5577
      pQuery->current->windowResInfo.size += (int32_t) inc;
H
Haojun Liao 已提交
5578

5579 5580 5581 5582 5583
      // the flag may be set by tableApplyFunctionsOnBlock, clear it here
      CLEAR_QUERY_STATUS(pQuery, QUERY_COMPLETED);

      updateTableIdInfo(pQuery, pQInfo->arrTableIdInfo);

H
Haojun Liao 已提交
5584 5585 5586 5587 5588 5589 5590 5591 5592 5593 5594 5595 5596 5597 5598 5599
      if (pQuery->prjInfo.vgroupLimit >= 0) {
        if (((pQuery->rec.rows + pQuery->rec.total) < pQuery->prjInfo.vgroupLimit) || ((pQuery->rec.rows + pQuery->rec.total) > pQuery->prjInfo.vgroupLimit && prev < pQuery->prjInfo.vgroupLimit)) {
          if (QUERY_IS_ASC_QUERY(pQuery) && pQuery->prjInfo.ts < blockInfo.window.ekey) {
            pQuery->prjInfo.ts = blockInfo.window.ekey;
          } else if (!QUERY_IS_ASC_QUERY(pQuery) && pQuery->prjInfo.ts > blockInfo.window.skey) {
            pQuery->prjInfo.ts = blockInfo.window.skey;
          }
        }
      } else {
        // the limitation of output result is reached, set the query completed
        skipResults(pRuntimeEnv);
        if (limitResults(pRuntimeEnv)) {
          setQueryStatus(pQuery, QUERY_COMPLETED);
          SET_STABLE_QUERY_OVER(pQInfo);
          break;
        }
5600 5601 5602 5603 5604 5605 5606 5607 5608 5609 5610 5611
      }

      // while the output buffer is full or limit/offset is applied, query may be paused here
      if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL|QUERY_COMPLETED)) {
        break;
      }
    }

    if (!hasMoreBlock) {
      setQueryStatus(pQuery, QUERY_COMPLETED);
      SET_STABLE_QUERY_OVER(pQInfo);
    }
5612 5613
  } else {
    /*
5614 5615 5616
     * the following two cases handled here.
     * 1. ts-comp query, and 2. the super table projection query with different query time range for each table.
     * If the subgroup index is larger than 0, results generated by group by tbname,k is existed.
5617 5618
     * we need to return it to client in the first place.
     */
5619
    if (pQInfo->groupIndex > 0) {
5620
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5621
      pQuery->rec.total += pQuery->rec.rows;
5622

5623
      if (pQuery->rec.rows > 0) {
5624 5625 5626
        return;
      }
    }
5627

5628
    // all data have returned already
5629
    if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
5630 5631
      return;
    }
5632

H
Haojun Liao 已提交
5633
    resetDefaultResInfoOutputBuf(pRuntimeEnv);
H
Haojun Liao 已提交
5634
    resetResultRowInfo(pRuntimeEnv, &pRuntimeEnv->windowResInfo);
5635

H
Haojun Liao 已提交
5636
    SArray *group = GET_TABLEGROUP(pQInfo, 0);
5637 5638
    assert(taosArrayGetSize(group) == pQInfo->tableqinfoGroupInfo.numOfTables &&
           1 == taosArrayGetSize(pQInfo->tableqinfoGroupInfo.pGroupList));
5639

5640
    while (pQInfo->tableIndex < pQInfo->tableqinfoGroupInfo.numOfTables) {
5641
      if (isQueryKilled(pQInfo)) {
H
Haojun Liao 已提交
5642
        longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
5643
      }
5644

5645
      pQuery->current = taosArrayGetP(group, pQInfo->tableIndex);
5646
      if (!multiTableMultioutputHelper(pQInfo, pQInfo->tableIndex)) {
5647
        pQInfo->tableIndex++;
5648 5649
        continue;
      }
5650

H
hjxilinx 已提交
5651
      // TODO handle the limit offset problem
5652
      if (pQuery->numOfFilterCols == 0 && pQuery->limit.offset > 0) {
5653 5654
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
          pQInfo->tableIndex++;
5655 5656 5657
          continue;
        }
      }
5658

5659
      scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
5660
      skipResults(pRuntimeEnv);
5661

5662
      // the limitation of output result is reached, set the query completed
5663
      if (limitResults(pRuntimeEnv)) {
H
Haojun Liao 已提交
5664
        SET_STABLE_QUERY_OVER(pQInfo);
5665 5666
        break;
      }
5667

5668 5669
      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
5670

5671
      if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
5672 5673 5674 5675 5676 5677
        /*
         * query range is identical in terms of all meters involved in query,
         * so we need to restore them at the *beginning* of query on each meter,
         * not the consecutive query on meter on which is aborted due to buffer limitation
         * to ensure that, we can reset the query range once query on a meter is completed.
         */
5678
        pQInfo->tableIndex++;
5679
        updateTableIdInfo(pQuery, pQInfo->arrTableIdInfo);
weixin_48148422's avatar
weixin_48148422 已提交
5680

5681
        // if the buffer is full or group by each table, we need to jump out of the loop
H
Haojun Liao 已提交
5682
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
5683 5684
          break;
        }
5685

H
Haojun Liao 已提交
5686 5687
        if (pRuntimeEnv->pTsBuf != NULL) {
          pRuntimeEnv->cur = pRuntimeEnv->pTsBuf->cur;
H
Haojun Liao 已提交
5688 5689
        }

5690
      } else {
5691
        // all data in the result buffer are skipped due to the offset, continue to retrieve data from current meter
5692 5693
        if (pQuery->rec.rows == 0) {
          assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
5694 5695
          continue;
        } else {
5696 5697 5698
          // buffer is full, wait for the next round to retrieve data from current meter
          assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
          break;
5699 5700 5701
        }
      }
    }
H
Haojun Liao 已提交
5702

5703
    if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
H
Haojun Liao 已提交
5704 5705
      setQueryStatus(pQuery, QUERY_COMPLETED);
    }
5706

5707 5708 5709 5710 5711 5712 5713 5714 5715 5716 5717 5718 5719 5720
    /*
     * 1. super table projection query, group-by on normal columns query, ts-comp query
     * 2. point interpolation query, last row query
     *
     * group-by on normal columns query and last_row query do NOT invoke the finalizer here,
     * since the finalize stage will be done at the client side.
     *
     * projection query, point interpolation query do not need the finalizer.
     *
     * Only the ts-comp query requires the finalizer function to be executed here.
     */
    if (isTSCompQuery(pQuery)) {
      finalizeQueryResult(pRuntimeEnv);
    }
5721

H
Haojun Liao 已提交
5722 5723
    if (pRuntimeEnv->pTsBuf != NULL) {
      pRuntimeEnv->cur = pRuntimeEnv->pTsBuf->cur;
5724
    }
5725

5726 5727 5728 5729 5730
    qDebug("QInfo %p numOfTables:%" PRIu64 ", index:%d, numOfGroups:%" PRIzu ", %" PRId64
           " points returned, total:%" PRId64 ", offset:%" PRId64,
           pQInfo, (uint64_t)pQInfo->tableqinfoGroupInfo.numOfTables, pQInfo->tableIndex, numOfGroups, pQuery->rec.rows,
           pQuery->rec.total, pQuery->limit.offset);
  }
5731 5732
}

5733 5734 5735 5736
static void doSaveContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

5737 5738 5739
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
  SWITCH_ORDER(pQuery->order.order);
5740

H
Haojun Liao 已提交
5741 5742
  if (pRuntimeEnv->pTsBuf != NULL) {
    SWITCH_ORDER(pRuntimeEnv->pTsBuf->cur.order);
5743
  }
5744

H
Haojun Liao 已提交
5745
  STsdbQueryCond cond = createTsdbQueryCond(pQuery, &pQuery->window);
S
TD-1057  
Shengliang Guan 已提交
5746

5747 5748 5749 5750
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
H
Haojun Liao 已提交
5751

H
Haojun Liao 已提交
5752 5753 5754 5755 5756
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
  disableFuncInReverseScan(pQInfo);
  setupQueryRangeForReverseScan(pQInfo);

H
Haojun Liao 已提交
5757
  pRuntimeEnv->prevGroupId = INT32_MIN;
H
Haojun Liao 已提交
5758
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef);
B
Bomin Zhang 已提交
5759 5760 5761
  if (pRuntimeEnv->pSecQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
H
hjxilinx 已提交
5762 5763
}

5764 5765 5766 5767
static void doRestoreContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

H
hjxilinx 已提交
5768
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
5769

H
Haojun Liao 已提交
5770 5771
  if (pRuntimeEnv->pTsBuf != NULL) {
    SWITCH_ORDER(pRuntimeEnv->pTsBuf->cur.order);
5772
  }
5773

5774
  switchCtxOrder(pRuntimeEnv);
5775 5776 5777
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
}

5778 5779 5780
static void doCloseAllTimeWindowAfterScan(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
Haojun Liao 已提交
5781
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
H
Haojun Liao 已提交
5782
    size_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
5783
    for (int32_t i = 0; i < numOfGroup; ++i) {
H
Haojun Liao 已提交
5784
      SArray *group = GET_TABLEGROUP(pQInfo, i);
5785

5786
      size_t num = taosArrayGetSize(group);
5787
      for (int32_t j = 0; j < num; ++j) {
5788
        STableQueryInfo* item = taosArrayGetP(group, j);
H
Haojun Liao 已提交
5789
        closeAllResultRows(&item->windowResInfo);
5790
      }
H
hjxilinx 已提交
5791 5792
    }
  } else {  // close results for group result
H
Haojun Liao 已提交
5793
    closeAllResultRows(&pQInfo->runtimeEnv.windowResInfo);
H
hjxilinx 已提交
5794 5795 5796 5797
  }
}

static void multiTableQueryProcess(SQInfo *pQInfo) {
5798 5799 5800
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

5801
  if (pQInfo->groupIndex > 0) {
5802
    /*
5803
     * if the groupIndex > 0, the query process must be completed yet, we only need to
5804 5805
     * copy the data into output buffer
     */
H
Haojun Liao 已提交
5806
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
5807 5808
      copyResToQueryResultBuf(pQInfo, pQuery);
#ifdef _DEBUG_VIEW
5809
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
5810 5811
#endif
    } else {
5812
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5813
    }
5814

5815
    qDebug("QInfo:%p current:%"PRId64", total:%"PRId64"", pQInfo, pQuery->rec.rows, pQuery->rec.total);
5816 5817
    return;
  }
5818

5819
  qDebug("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", order:%d, forward scan start", pQInfo,
5820 5821
         pQuery->window.skey, pQuery->window.ekey, pQuery->order.order);

H
hjxilinx 已提交
5822
  // do check all qualified data blocks
H
Haojun Liao 已提交
5823
  int64_t el = scanMultiTableDataBlocks(pQInfo);
5824
  qDebug("QInfo:%p master scan completed, elapsed time: %" PRId64 "ms, reverse scan start", pQInfo, el);
5825

H
hjxilinx 已提交
5826
  // query error occurred or query is killed, abort current execution
5827
  if (pQInfo->code != TSDB_CODE_SUCCESS || isQueryKilled(pQInfo)) {
5828
    qDebug("QInfo:%p query killed or error occurred, code:%s, abort", pQInfo, tstrerror(pQInfo->code));
H
Haojun Liao 已提交
5829
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
5830
  }
5831

H
hjxilinx 已提交
5832 5833
  // close all time window results
  doCloseAllTimeWindowAfterScan(pQInfo);
5834

H
hjxilinx 已提交
5835 5836
  if (needReverseScan(pQuery)) {
    doSaveContext(pQInfo);
5837

H
Haojun Liao 已提交
5838
    el = scanMultiTableDataBlocks(pQInfo);
5839
    qDebug("QInfo:%p reversed scan completed, elapsed time: %" PRId64 "ms", pQInfo, el);
5840

H
Haojun Liao 已提交
5841
    doRestoreContext(pQInfo);
H
hjxilinx 已提交
5842
  } else {
5843
    qDebug("QInfo:%p no need to do reversed scan, query completed", pQInfo);
5844
  }
5845

5846
  setQueryStatus(pQuery, QUERY_COMPLETED);
5847

5848
  if (pQInfo->code != TSDB_CODE_SUCCESS || isQueryKilled(pQInfo)) {
5849
    qDebug("QInfo:%p query killed or error occurred, code:%s, abort", pQInfo, tstrerror(pQInfo->code));
H
Haojun Liao 已提交
5850
    //TODO finalizeQueryResult may cause SEGSEV, since the memory may not allocated yet, add a cleanup function instead
H
Haojun Liao 已提交
5851
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
H
hjxilinx 已提交
5852
  }
5853

H
Haojun Liao 已提交
5854
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || isSumAvgRateQuery(pQuery)) {
5855
    if (mergeIntoGroupResult(pQInfo) == TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
5856
      copyResToQueryResultBuf(pQInfo, pQuery);
5857 5858

#ifdef _DEBUG_VIEW
5859
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
5860 5861 5862
#endif
    }
  } else {  // not a interval query
5863
    copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5864
  }
5865

5866
  // handle the limitation of output buffer
5867
  qDebug("QInfo:%p points returned:%" PRId64 ", total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
5868 5869
}

H
Haojun Liao 已提交
5870 5871 5872 5873 5874 5875 5876 5877 5878 5879 5880 5881 5882 5883 5884 5885 5886 5887

static char *getArithemicInputSrc(void *param, const char *name, int32_t colId) {
  SArithmeticSupport *pSupport = (SArithmeticSupport *) param;
  SExprInfo* pExprInfo = (SExprInfo*) pSupport->exprList;

  int32_t index = -1;
  for (int32_t i = 0; i < pSupport->numOfCols; ++i) {
    if (colId == pExprInfo[i].base.resColId) {
      index = i;
      break;
    }
  }

  assert(index >= 0 && index < pSupport->numOfCols);
  return pSupport->data[index] + pSupport->offset * pExprInfo[index].bytes;
}

static void doSecondaryArithmeticProcess(SQuery* pQuery) {
H
Haojun Liao 已提交
5888 5889 5890
  if (pQuery->numOfExpr2 == 0) {
    return;
  }
H
Haojun Liao 已提交
5891

H
Haojun Liao 已提交
5892
  SArithmeticSupport arithSup = {0};
H
Haojun Liao 已提交
5893 5894 5895
  tFilePage **data = calloc(pQuery->numOfExpr2, POINTER_BYTES);
  for (int32_t i = 0; i < pQuery->numOfExpr2; ++i) {
    int32_t bytes = pQuery->pExpr2[i].bytes;
P
plum-lihui 已提交
5896
    data[i] = (tFilePage *)malloc((size_t)(bytes * pQuery->rec.rows) + sizeof(tFilePage));
H
Haojun Liao 已提交
5897 5898
  }

H
Haojun Liao 已提交
5899 5900 5901 5902
  arithSup.offset = 0;
  arithSup.numOfCols = (int32_t)pQuery->numOfOutput;
  arithSup.exprList  = pQuery->pExpr1;
  arithSup.data      = calloc(arithSup.numOfCols, POINTER_BYTES);
H
Haojun Liao 已提交
5903

H
Haojun Liao 已提交
5904 5905
  for (int32_t k = 0; k < arithSup.numOfCols; ++k) {
    arithSup.data[k] = pQuery->sdata[k]->data;
H
Haojun Liao 已提交
5906 5907 5908 5909 5910 5911 5912 5913 5914 5915
  }

  for (int i = 0; i < pQuery->numOfExpr2; ++i) {
    SExprInfo *pExpr = &pQuery->pExpr2[i];

    // calculate the result from several other columns
    SSqlFuncMsg* pSqlFunc = &pExpr->base;
    if (pSqlFunc->functionId != TSDB_FUNC_ARITHM) {

      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
5916 5917
        if (pSqlFunc->functionId == pQuery->pExpr1[j].base.functionId &&
            pSqlFunc->colInfo.colId == pQuery->pExpr1[j].base.colInfo.colId) {
P
plum-lihui 已提交
5918
          memcpy(data[i]->data, pQuery->sdata[j]->data, (size_t)(pQuery->pExpr1[j].bytes * pQuery->rec.rows));
H
Haojun Liao 已提交
5919 5920 5921 5922
          break;
        }
      }
    } else {
H
Haojun Liao 已提交
5923 5924
      arithSup.pArithExpr = pExpr;
      tExprTreeCalcTraverse(arithSup.pArithExpr->pExpr, (int32_t)pQuery->rec.rows, data[i]->data, &arithSup, TSDB_ORDER_ASC,
H
Haojun Liao 已提交
5925 5926 5927 5928 5929
                            getArithemicInputSrc);
    }
  }

  for (int32_t i = 0; i < pQuery->numOfExpr2; ++i) {
P
plum-lihui 已提交
5930
    memcpy(pQuery->sdata[i]->data, data[i]->data, (size_t)(pQuery->pExpr2[i].bytes * pQuery->rec.rows));
H
Haojun Liao 已提交
5931 5932
  }

H
Haojun Liao 已提交
5933 5934 5935 5936 5937 5938
  for (int32_t i = 0; i < pQuery->numOfExpr2; ++i) {
    tfree(data[i]);
  }

  tfree(data);
  tfree(arithSup.data);
H
Haojun Liao 已提交
5939 5940
}

5941 5942 5943 5944 5945 5946
/*
 * in each query, this function will be called only once, no retry for further result.
 *
 * select count(*)/top(field,k)/avg(field name) from table_name [where ts>now-1a];
 * select count(*) from table_name group by status_column;
 */
H
hjxilinx 已提交
5947
static void tableFixedOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
5948
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
5949

H
hjxilinx 已提交
5950
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
5951
  if (!pRuntimeEnv->topBotQuery && pQuery->limit.offset > 0) {  // no need to execute, since the output will be ignore.
H
Haojun Liao 已提交
5952 5953
    return;
  }
5954

H
hjxilinx 已提交
5955
  pQuery->current = pTableInfo;  // set current query table info
5956

5957
  scanOneTableDataBlocks(pRuntimeEnv, pTableInfo->lastKey);
H
hjxilinx 已提交
5958
  finalizeQueryResult(pRuntimeEnv);
5959

H
Haojun Liao 已提交
5960 5961 5962 5963
  // since the numOfRows must be identical for all sql functions that are allowed to be executed simutaneously.
  pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
  doSecondaryArithmeticProcess(pQuery);

5964
  if (isQueryKilled(pQInfo)) {
H
Haojun Liao 已提交
5965
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
5966
  }
5967

5968
  skipResults(pRuntimeEnv);
5969
  limitResults(pRuntimeEnv);
5970 5971
}

H
hjxilinx 已提交
5972
static void tableMultiOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
5973
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
5974

H
hjxilinx 已提交
5975 5976
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
5977

5978 5979
  // for ts_comp query, re-initialized is not allowed
  if (!isTSCompQuery(pQuery)) {
H
Haojun Liao 已提交
5980
    resetDefaultResInfoOutputBuf(pRuntimeEnv);
5981
  }
5982

5983 5984 5985 5986 5987 5988
  // skip blocks without load the actual data block from file if no filter condition present
  skipBlocks(&pQInfo->runtimeEnv);
  if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return;
  }
5989 5990

  while (1) {
5991
    scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
H
hjxilinx 已提交
5992
    finalizeQueryResult(pRuntimeEnv);
5993

5994 5995
    pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
    if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols > 0 && pQuery->rec.rows > 0) {
5996
      skipResults(pRuntimeEnv);
5997 5998 5999
    }

    /*
H
hjxilinx 已提交
6000 6001
     * 1. if pQuery->size == 0, pQuery->limit.offset >= 0, still need to check data
     * 2. if pQuery->size > 0, pQuery->limit.offset must be 0
6002
     */
6003
    if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
6004 6005 6006
      break;
    }

6007
    qDebug("QInfo:%p skip current result, offset:%" PRId64 ", next qrange:%" PRId64 "-%" PRId64,
B
Bomin Zhang 已提交
6008
           pQInfo, pQuery->limit.offset, pQuery->current->lastKey, pQuery->current->win.ekey);
6009

H
Haojun Liao 已提交
6010
    resetDefaultResInfoOutputBuf(pRuntimeEnv);
6011 6012
  }

6013
  limitResults(pRuntimeEnv);
6014
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
6015
    qDebug("QInfo:%p query paused due to output limitation, next qrange:%" PRId64 "-%" PRId64, pQInfo,
H
hjxilinx 已提交
6016
        pQuery->current->lastKey, pQuery->window.ekey);
weixin_48148422's avatar
weixin_48148422 已提交
6017
  } else if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
6018 6019
    STableIdInfo tidInfo = createTableIdInfo(pQuery);
    taosHashPut(pQInfo->arrTableIdInfo, &tidInfo.tid, sizeof(tidInfo.tid), &tidInfo, sizeof(STableIdInfo));
6020 6021
  }

6022 6023 6024
  if (!isTSCompQuery(pQuery)) {
    assert(pQuery->rec.rows <= pQuery->rec.capacity);
  }
6025 6026
}

H
Haojun Liao 已提交
6027
static void tableIntervalProcessImpl(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
6028
  SQuery *pQuery = pRuntimeEnv->pQuery;
6029

6030
  while (1) {
6031
    scanOneTableDataBlocks(pRuntimeEnv, start);
6032

6033
    assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_NOT_COMPLETED));
H
hjxilinx 已提交
6034
    finalizeQueryResult(pRuntimeEnv);
6035

6036 6037
    // here we can ignore the records in case of no interpolation
    // todo handle offset, in case of top/bottom interval query
H
Haojun Liao 已提交
6038
    if ((pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTsBuf != NULL) && pQuery->limit.offset > 0 &&
6039
        pQuery->fillType == TSDB_FILL_NONE) {
6040
      // maxOutput <= 0, means current query does not generate any results
H
Haojun Liao 已提交
6041
      int32_t numOfClosed = numOfClosedResultRows(&pRuntimeEnv->windowResInfo);
6042

S
TD-1057  
Shengliang Guan 已提交
6043
      int32_t c = (int32_t)(MIN(numOfClosed, pQuery->limit.offset));
H
Haojun Liao 已提交
6044
      popFrontResultRow(pRuntimeEnv, &pRuntimeEnv->windowResInfo, c);
6045 6046
      pQuery->limit.offset -= c;
    }
6047

6048
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED | QUERY_RESBUF_FULL)) {
6049 6050 6051 6052 6053
      break;
    }
  }
}

6054
// handle time interval query on table
H
hjxilinx 已提交
6055
static void tableIntervalProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
6056 6057
  SQueryRuntimeEnv *pRuntimeEnv = &(pQInfo->runtimeEnv);

H
hjxilinx 已提交
6058 6059
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
6060

H
Haojun Liao 已提交
6061
  int32_t numOfFilled = 0;
H
Haojun Liao 已提交
6062
  TSKEY newStartKey = TSKEY_INITIAL_VAL;
6063

6064
  // skip blocks without load the actual data block from file if no filter condition present
H
Haojun Liao 已提交
6065 6066 6067 6068 6069 6070
  if (!pRuntimeEnv->groupbyNormalCol) {
    skipTimeInterval(pRuntimeEnv, &newStartKey);
    if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0 && pRuntimeEnv->pFillInfo == NULL) {
      setQueryStatus(pQuery, QUERY_COMPLETED);
      return;
    }
6071 6072
  }

6073
  while (1) {
H
Haojun Liao 已提交
6074
    tableIntervalProcessImpl(pRuntimeEnv, newStartKey);
6075

H
Haojun Liao 已提交
6076
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
6077
      pQInfo->groupIndex = 0;  // always start from 0
6078
      pQuery->rec.rows = 0;
6079
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
6080

H
Haojun Liao 已提交
6081
      popFrontResultRow(pRuntimeEnv, &pRuntimeEnv->windowResInfo, pQInfo->groupIndex);
6082
    }
6083

H
Haojun Liao 已提交
6084
    // no result generated, abort
H
Haojun Liao 已提交
6085
    if (pQuery->rec.rows == 0 || pRuntimeEnv->groupbyNormalCol) {
H
Haojun Liao 已提交
6086 6087 6088 6089 6090
      break;
    }

    doSecondaryArithmeticProcess(pQuery);
    
6091
    // the offset is handled at prepare stage if no interpolation involved
H
Haojun Liao 已提交
6092
    if (pQuery->fillType == TSDB_FILL_NONE) {
6093
      limitResults(pRuntimeEnv);
6094 6095
      break;
    } else {
S
TD-1057  
Shengliang Guan 已提交
6096
      taosFillSetStartInfo(pRuntimeEnv->pFillInfo, (int32_t)pQuery->rec.rows, pQuery->window.ekey);
H
Haojun Liao 已提交
6097
      taosFillCopyInputDataFromFilePage(pRuntimeEnv->pFillInfo, (const tFilePage**) pQuery->sdata);
H
Haojun Liao 已提交
6098
      numOfFilled = 0;
6099

H
Haojun Liao 已提交
6100
      pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, &numOfFilled);
6101
      if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
6102
        limitResults(pRuntimeEnv);
6103 6104
        break;
      }
6105

6106
      // no result generated yet, continue retrieve data
6107
      pQuery->rec.rows = 0;
6108 6109
    }
  }
6110

6111
  // all data scanned, the group by normal column can return
H
Haojun Liao 已提交
6112
  if (pRuntimeEnv->groupbyNormalCol) {  // todo refactor with merge interval time result
H
Haojun Liao 已提交
6113
    // maxOutput <= 0, means current query does not generate any results
H
Haojun Liao 已提交
6114
    int32_t numOfClosed = numOfClosedResultRows(&pRuntimeEnv->windowResInfo);
6115

H
Haojun Liao 已提交
6116 6117
    if ((pQuery->limit.offset > 0 && pQuery->limit.offset < numOfClosed) || pQuery->limit.offset == 0) {
      // skip offset result rows
H
Haojun Liao 已提交
6118
      popFrontResultRow(pRuntimeEnv, &pRuntimeEnv->windowResInfo, (int32_t) pQuery->limit.offset);
H
Haojun Liao 已提交
6119 6120 6121 6122

      pQuery->rec.rows   = 0;
      pQInfo->groupIndex = 0;
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
H
Haojun Liao 已提交
6123
      popFrontResultRow(pRuntimeEnv, &pRuntimeEnv->windowResInfo, pQInfo->groupIndex);
H
Haojun Liao 已提交
6124 6125 6126 6127

      doSecondaryArithmeticProcess(pQuery);
      limitResults(pRuntimeEnv);
    }
6128 6129 6130
  }
}

6131 6132 6133 6134
static void tableQueryImpl(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

H
Haojun Liao 已提交
6135
  if (queryHasRemainResForTableQuery(pRuntimeEnv)) {
6136

H
Haojun Liao 已提交
6137 6138 6139 6140 6141 6142 6143 6144 6145 6146 6147 6148
    if (pQuery->fillType != TSDB_FILL_NONE) {
      /*
       * There are remain results that are not returned due to result interpolation
       * So, we do keep in this procedure instead of launching retrieve procedure for next results.
       */
      int32_t numOfFilled = 0;
      pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, &numOfFilled);

      if (pQuery->rec.rows > 0) {
        limitResults(pRuntimeEnv);
      }

H
Haojun Liao 已提交
6149
      qDebug("QInfo:%p current:%" PRId64 " returned, total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total);
6150
      return;
H
Haojun Liao 已提交
6151
    } else {
6152
      pQuery->rec.rows = 0;
6153
      pQInfo->groupIndex = 0;  // always start from 0
6154

6155
      if (pRuntimeEnv->windowResInfo.size > 0) {
6156
        copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
H
Haojun Liao 已提交
6157
        popFrontResultRow(pRuntimeEnv, &pRuntimeEnv->windowResInfo, pQInfo->groupIndex);
6158

6159
        if (pQuery->rec.rows > 0) {
6160
          qDebug("QInfo:%p %"PRId64" rows returned from group results, total:%"PRId64"", pQInfo, pQuery->rec.rows, pQuery->rec.total);
H
Haojun Liao 已提交
6161 6162 6163

          // there are not data remains
          if (pRuntimeEnv->windowResInfo.size <= 0) {
H
Haojun Liao 已提交
6164
            qDebug("QInfo:%p query over, %"PRId64" rows are returned", pQInfo, pQuery->rec.total);
H
Haojun Liao 已提交
6165 6166
          }

6167 6168 6169 6170 6171
          return;
        }
      }
    }
  }
6172

H
hjxilinx 已提交
6173
  // number of points returned during this query
6174
  pQuery->rec.rows = 0;
6175
  int64_t st = taosGetTimestampUs();
6176

6177
  assert(pQInfo->tableqinfoGroupInfo.numOfTables == 1);
H
Haojun Liao 已提交
6178
  SArray* g = GET_TABLEGROUP(pQInfo, 0);
6179
  STableQueryInfo* item = taosArrayGetP(g, 0);
6180

6181
  // group by normal column, sliding window query, interval query are handled by interval query processor
H
Haojun Liao 已提交
6182
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyNormalCol) {  // interval (down sampling operation)
6183
    tableIntervalProcess(pQInfo, item);
H
Haojun Liao 已提交
6184
  } else if (isFixedOutputQuery(pRuntimeEnv)) {
6185
    tableFixedOutputProcess(pQInfo, item);
6186 6187
  } else {  // diff/add/multiply/subtract/division
    assert(pQuery->checkBuffer == 1);
6188
    tableMultiOutputProcess(pQInfo, item);
6189
  }
6190

6191
  // record the total elapsed time
6192
  pRuntimeEnv->summary.elapsedTime += (taosGetTimestampUs() - st);
6193
  assert(pQInfo->tableqinfoGroupInfo.numOfTables == 1);
6194 6195
}

6196
static void stableQueryImpl(SQInfo *pQInfo) {
H
Haojun Liao 已提交
6197 6198
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *pQuery = pRuntimeEnv->pQuery;
6199
  pQuery->rec.rows = 0;
6200

6201
  int64_t st = taosGetTimestampUs();
6202

H
Haojun Liao 已提交
6203
  if (QUERY_IS_INTERVAL_QUERY(pQuery) ||
6204
      (isFixedOutputQuery(pRuntimeEnv) && (!isPointInterpoQuery(pQuery)) && (!pRuntimeEnv->groupbyNormalCol))) {
H
hjxilinx 已提交
6205
    multiTableQueryProcess(pQInfo);
6206
  } else {
6207
    assert((pQuery->checkBuffer == 1 && pQuery->interval.interval == 0) || isPointInterpoQuery(pQuery) ||
H
Haojun Liao 已提交
6208
            isFirstLastRowQuery(pQuery) || pRuntimeEnv->groupbyNormalCol);
6209

6210
    sequentialTableProcess(pQInfo);
6211
  }
6212

H
hjxilinx 已提交
6213
  // record the total elapsed time
6214
  pQInfo->runtimeEnv.summary.elapsedTime += (taosGetTimestampUs() - st);
H
hjxilinx 已提交
6215 6216
}

6217
static int32_t getColumnIndexInSource(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
6218
  int32_t j = 0;
6219

6220
  if (TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) {
H
Haojun Liao 已提交
6221
    if (pExprMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
6222
      return TSDB_TBNAME_COLUMN_INDEX;
H
Haojun Liao 已提交
6223 6224
    }

6225 6226 6227 6228
    while(j < pQueryMsg->numOfTags) {
      if (pExprMsg->colInfo.colId == pTagCols[j].colId) {
        return j;
      }
6229

6230 6231
      j += 1;
    }
6232

Y
TD-1230  
yihaoDeng 已提交
6233
  } else if (TSDB_COL_IS_UD_COL(pExprMsg->colInfo.flag)) {  // user specified column data
H
Haojun Liao 已提交
6234
    return TSDB_UD_COLUMN_INDEX;
6235 6236 6237 6238 6239
  } else {
    while (j < pQueryMsg->numOfCols) {
      if (pExprMsg->colInfo.colId == pQueryMsg->colList[j].colId) {
        return j;
      }
6240

6241
      j += 1;
6242 6243
    }
  }
6244
  assert(0);
6245
  return -1;
6246 6247
}

6248 6249 6250
bool validateExprColumnInfo(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
  int32_t j = getColumnIndexInSource(pQueryMsg, pExprMsg, pTagCols);
  return j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags;
6251 6252
}

6253
static bool validateQueryMsg(SQueryTableMsg *pQueryMsg) {
6254 6255
  if (pQueryMsg->interval.interval < 0) {
    qError("qmsg:%p illegal value of interval time %" PRId64, pQueryMsg, pQueryMsg->interval.interval);
6256
    return false;
6257 6258
  }

H
hjxilinx 已提交
6259
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
6260
    qError("qmsg:%p illegal value of numOfTables %d", pQueryMsg, pQueryMsg->numOfTables);
6261
    return false;
6262 6263
  }

H
hjxilinx 已提交
6264
  if (pQueryMsg->numOfGroupCols < 0) {
S
slguan 已提交
6265
    qError("qmsg:%p illegal value of numOfGroupbyCols %d", pQueryMsg, pQueryMsg->numOfGroupCols);
6266
    return false;
6267 6268
  }

6269 6270
  if (pQueryMsg->numOfOutput > TSDB_MAX_COLUMNS || pQueryMsg->numOfOutput <= 0) {
    qError("qmsg:%p illegal value of output columns %d", pQueryMsg, pQueryMsg->numOfOutput);
6271
    return false;
6272 6273
  }

6274 6275 6276 6277 6278 6279 6280 6281 6282 6283
  return true;
}

static bool validateQuerySourceCols(SQueryTableMsg *pQueryMsg, SSqlFuncMsg** pExprMsg) {
  int32_t numOfTotal = pQueryMsg->numOfCols + pQueryMsg->numOfTags;
  if (pQueryMsg->numOfCols < 0 || pQueryMsg->numOfTags < 0 || numOfTotal > TSDB_MAX_COLUMNS) {
    qError("qmsg:%p illegal value of numOfCols %d numOfTags:%d", pQueryMsg, pQueryMsg->numOfCols, pQueryMsg->numOfTags);
    return false;
  } else if (numOfTotal == 0) {
    for(int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
H
Haojun Liao 已提交
6284 6285 6286 6287 6288
      SSqlFuncMsg* pFuncMsg = pExprMsg[i];

      if ((pFuncMsg->functionId == TSDB_FUNC_TAGPRJ) ||
          (pFuncMsg->functionId == TSDB_FUNC_TID_TAG && pFuncMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) ||
          (pFuncMsg->functionId == TSDB_FUNC_COUNT && pFuncMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX)) {
6289
        continue;
6290
      }
6291

6292
      return false;
6293 6294
    }
  }
6295

6296
  return true;
6297 6298
}

6299
static char *createTableIdList(SQueryTableMsg *pQueryMsg, char *pMsg, SArray **pTableIdList) {
H
hjxilinx 已提交
6300
  assert(pQueryMsg->numOfTables > 0);
6301

weixin_48148422's avatar
weixin_48148422 已提交
6302
  *pTableIdList = taosArrayInit(pQueryMsg->numOfTables, sizeof(STableIdInfo));
6303

weixin_48148422's avatar
weixin_48148422 已提交
6304 6305
  for (int32_t j = 0; j < pQueryMsg->numOfTables; ++j) {
    STableIdInfo* pTableIdInfo = (STableIdInfo *)pMsg;
6306

6307
    pTableIdInfo->tid = htonl(pTableIdInfo->tid);
H
hjxilinx 已提交
6308 6309
    pTableIdInfo->uid = htobe64(pTableIdInfo->uid);
    pTableIdInfo->key = htobe64(pTableIdInfo->key);
6310

H
hjxilinx 已提交
6311 6312 6313
    taosArrayPush(*pTableIdList, pTableIdInfo);
    pMsg += sizeof(STableIdInfo);
  }
6314

H
hjxilinx 已提交
6315 6316
  return pMsg;
}
6317

6318
/**
H
hjxilinx 已提交
6319
 * pQueryMsg->head has been converted before this function is called.
6320
 *
H
hjxilinx 已提交
6321
 * @param pQueryMsg
6322 6323 6324 6325
 * @param pTableIdList
 * @param pExpr
 * @return
 */
H
Haojun Liao 已提交
6326
static int32_t convertQueryMsg(SQueryTableMsg *pQueryMsg, SArray **pTableIdList, SSqlFuncMsg ***pExpr, SSqlFuncMsg ***pSecStageExpr,
weixin_48148422's avatar
weixin_48148422 已提交
6327
                               char **tagCond, char** tbnameCond, SColIndex **groupbyCols, SColumnInfo** tagCols) {
6328 6329
  int32_t code = TSDB_CODE_SUCCESS;

6330 6331 6332 6333
  if (taosCheckVersion(pQueryMsg->version, version, 3) != 0) {
    return TSDB_CODE_QRY_INVALID_MSG;
  }

6334 6335 6336 6337
  pQueryMsg->numOfTables = htonl(pQueryMsg->numOfTables);

  pQueryMsg->window.skey = htobe64(pQueryMsg->window.skey);
  pQueryMsg->window.ekey = htobe64(pQueryMsg->window.ekey);
6338 6339 6340
  pQueryMsg->interval.interval = htobe64(pQueryMsg->interval.interval);
  pQueryMsg->interval.sliding = htobe64(pQueryMsg->interval.sliding);
  pQueryMsg->interval.offset = htobe64(pQueryMsg->interval.offset);
6341 6342
  pQueryMsg->limit = htobe64(pQueryMsg->limit);
  pQueryMsg->offset = htobe64(pQueryMsg->offset);
H
Haojun Liao 已提交
6343
  pQueryMsg->vgroupLimit = htobe64(pQueryMsg->vgroupLimit);
H
hjxilinx 已提交
6344

6345 6346
  pQueryMsg->order = htons(pQueryMsg->order);
  pQueryMsg->orderColId = htons(pQueryMsg->orderColId);
H
Haojun Liao 已提交
6347
  pQueryMsg->queryType = htonl(pQueryMsg->queryType);
weixin_48148422's avatar
weixin_48148422 已提交
6348
  pQueryMsg->tagNameRelType = htons(pQueryMsg->tagNameRelType);
6349 6350

  pQueryMsg->numOfCols = htons(pQueryMsg->numOfCols);
6351
  pQueryMsg->numOfOutput = htons(pQueryMsg->numOfOutput);
H
hjxilinx 已提交
6352
  pQueryMsg->numOfGroupCols = htons(pQueryMsg->numOfGroupCols);
6353 6354 6355
  pQueryMsg->tagCondLen = htons(pQueryMsg->tagCondLen);
  pQueryMsg->tsOffset = htonl(pQueryMsg->tsOffset);
  pQueryMsg->tsLen = htonl(pQueryMsg->tsLen);
H
hjxilinx 已提交
6356
  pQueryMsg->tsNumOfBlocks = htonl(pQueryMsg->tsNumOfBlocks);
6357
  pQueryMsg->tsOrder = htonl(pQueryMsg->tsOrder);
6358
  pQueryMsg->numOfTags = htonl(pQueryMsg->numOfTags);
H
Haojun Liao 已提交
6359
  pQueryMsg->secondStageOutput = htonl(pQueryMsg->secondStageOutput);
6360

6361
  // query msg safety check
6362
  if (!validateQueryMsg(pQueryMsg)) {
6363 6364
    code = TSDB_CODE_QRY_INVALID_MSG;
    goto _cleanup;
6365 6366
  }

H
hjxilinx 已提交
6367 6368
  char *pMsg = (char *)(pQueryMsg->colList) + sizeof(SColumnInfo) * pQueryMsg->numOfCols;
  for (int32_t col = 0; col < pQueryMsg->numOfCols; ++col) {
6369 6370
    SColumnInfo *pColInfo = &pQueryMsg->colList[col];

H
hjxilinx 已提交
6371
    pColInfo->colId = htons(pColInfo->colId);
6372
    pColInfo->type = htons(pColInfo->type);
H
hjxilinx 已提交
6373 6374
    pColInfo->bytes = htons(pColInfo->bytes);
    pColInfo->numOfFilters = htons(pColInfo->numOfFilters);
6375

H
hjxilinx 已提交
6376
    assert(pColInfo->type >= TSDB_DATA_TYPE_BOOL && pColInfo->type <= TSDB_DATA_TYPE_NCHAR);
6377

H
hjxilinx 已提交
6378
    int32_t numOfFilters = pColInfo->numOfFilters;
6379
    if (numOfFilters > 0) {
H
hjxilinx 已提交
6380
      pColInfo->filters = calloc(numOfFilters, sizeof(SColumnFilterInfo));
H
Haojun Liao 已提交
6381 6382 6383 6384
      if (pColInfo->filters == NULL) {
        code = TSDB_CODE_QRY_OUT_OF_MEMORY;
        goto _cleanup;
      }
6385 6386 6387
    }

    for (int32_t f = 0; f < numOfFilters; ++f) {
6388
      SColumnFilterInfo *pFilterMsg = (SColumnFilterInfo *)pMsg;
6389

6390 6391
      SColumnFilterInfo *pColFilter = &pColInfo->filters[f];
      pColFilter->filterstr = htons(pFilterMsg->filterstr);
6392 6393 6394

      pMsg += sizeof(SColumnFilterInfo);

6395 6396
      if (pColFilter->filterstr) {
        pColFilter->len = htobe64(pFilterMsg->len);
6397

6398
        pColFilter->pz = (int64_t)calloc(1, (size_t)(pColFilter->len + 1 * TSDB_NCHAR_SIZE)); // note: null-terminator
H
Haojun Liao 已提交
6399 6400 6401 6402 6403
        if (pColFilter->pz == 0) {
          code = TSDB_CODE_QRY_OUT_OF_MEMORY;
          goto _cleanup;
        }

6404
        memcpy((void *)pColFilter->pz, pMsg, (size_t)pColFilter->len);
6405
        pMsg += (pColFilter->len + 1);
6406
      } else {
6407 6408
        pColFilter->lowerBndi = htobe64(pFilterMsg->lowerBndi);
        pColFilter->upperBndi = htobe64(pFilterMsg->upperBndi);
6409 6410
      }

6411 6412
      pColFilter->lowerRelOptr = htons(pFilterMsg->lowerRelOptr);
      pColFilter->upperRelOptr = htons(pFilterMsg->upperRelOptr);
6413 6414 6415
    }
  }

6416
  *pExpr = calloc(pQueryMsg->numOfOutput, POINTER_BYTES);
H
Haojun Liao 已提交
6417 6418 6419 6420 6421
  if (*pExpr == NULL) {
    code = TSDB_CODE_QRY_OUT_OF_MEMORY;
    goto _cleanup;
  }

6422
  SSqlFuncMsg *pExprMsg = (SSqlFuncMsg *)pMsg;
6423

6424
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
6425
    (*pExpr)[i] = pExprMsg;
6426

6427
    pExprMsg->colInfo.colIndex = htons(pExprMsg->colInfo.colIndex);
6428
    pExprMsg->colInfo.colId = htons(pExprMsg->colInfo.colId);
H
Haojun Liao 已提交
6429 6430 6431 6432
    pExprMsg->colInfo.flag  = htons(pExprMsg->colInfo.flag);
    pExprMsg->functionId    = htons(pExprMsg->functionId);
    pExprMsg->numOfParams   = htons(pExprMsg->numOfParams);
    pExprMsg->resColId      = htons(pExprMsg->resColId);
6433

6434
    pMsg += sizeof(SSqlFuncMsg);
6435 6436

    for (int32_t j = 0; j < pExprMsg->numOfParams; ++j) {
6437
      pExprMsg->arg[j].argType = htons(pExprMsg->arg[j].argType);
6438 6439 6440 6441
      pExprMsg->arg[j].argBytes = htons(pExprMsg->arg[j].argBytes);

      if (pExprMsg->arg[j].argType == TSDB_DATA_TYPE_BINARY) {
        pExprMsg->arg[j].argValue.pz = pMsg;
6442
        pMsg += pExprMsg->arg[j].argBytes;  // one more for the string terminated char.
6443 6444 6445 6446 6447
      } else {
        pExprMsg->arg[j].argValue.i64 = htobe64(pExprMsg->arg[j].argValue.i64);
      }
    }

H
Haojun Liao 已提交
6448 6449
    int16_t functionId = pExprMsg->functionId;
    if (functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ || functionId == TSDB_FUNC_TAG_DUMMY) {
Y
TD-1230  
yihaoDeng 已提交
6450
      if (!TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) {  // ignore the column  index check for arithmetic expression.
6451 6452
        code = TSDB_CODE_QRY_INVALID_MSG;
        goto _cleanup;
6453 6454
      }
    } else {
6455
//      if (!validateExprColumnInfo(pQueryMsg, pExprMsg)) {
6456
//        return TSDB_CODE_QRY_INVALID_MSG;
6457
//      }
6458 6459
    }

6460
    pExprMsg = (SSqlFuncMsg *)pMsg;
6461
  }
6462

6463
  if (!validateQuerySourceCols(pQueryMsg, *pExpr)) {
6464
    code = TSDB_CODE_QRY_INVALID_MSG;
dengyihao's avatar
dengyihao 已提交
6465
    goto _cleanup;
6466
  }
6467

H
Haojun Liao 已提交
6468 6469 6470 6471 6472 6473 6474 6475 6476 6477 6478 6479 6480 6481 6482 6483 6484 6485 6486 6487 6488 6489 6490 6491 6492 6493 6494 6495 6496 6497 6498 6499 6500 6501 6502 6503 6504 6505 6506 6507 6508 6509 6510
  if (pQueryMsg->secondStageOutput) {
    pExprMsg = (SSqlFuncMsg *)pMsg;
    *pSecStageExpr = calloc(pQueryMsg->secondStageOutput, POINTER_BYTES);
    
    for (int32_t i = 0; i < pQueryMsg->secondStageOutput; ++i) {
      (*pSecStageExpr)[i] = pExprMsg;

      pExprMsg->colInfo.colIndex = htons(pExprMsg->colInfo.colIndex);
      pExprMsg->colInfo.colId = htons(pExprMsg->colInfo.colId);
      pExprMsg->colInfo.flag = htons(pExprMsg->colInfo.flag);
      pExprMsg->functionId = htons(pExprMsg->functionId);
      pExprMsg->numOfParams = htons(pExprMsg->numOfParams);

      pMsg += sizeof(SSqlFuncMsg);

      for (int32_t j = 0; j < pExprMsg->numOfParams; ++j) {
        pExprMsg->arg[j].argType = htons(pExprMsg->arg[j].argType);
        pExprMsg->arg[j].argBytes = htons(pExprMsg->arg[j].argBytes);

        if (pExprMsg->arg[j].argType == TSDB_DATA_TYPE_BINARY) {
          pExprMsg->arg[j].argValue.pz = pMsg;
          pMsg += pExprMsg->arg[j].argBytes;  // one more for the string terminated char.
        } else {
          pExprMsg->arg[j].argValue.i64 = htobe64(pExprMsg->arg[j].argValue.i64);
        }
      }

      int16_t functionId = pExprMsg->functionId;
      if (functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ || functionId == TSDB_FUNC_TAG_DUMMY) {
        if (!TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) {  // ignore the column  index check for arithmetic expression.
          code = TSDB_CODE_QRY_INVALID_MSG;
          goto _cleanup;
        }
      } else {
//      if (!validateExprColumnInfo(pQueryMsg, pExprMsg)) {
//        return TSDB_CODE_QRY_INVALID_MSG;
//      }
      }

      pExprMsg = (SSqlFuncMsg *)pMsg;
    }
  }

H
hjxilinx 已提交
6511
  pMsg = createTableIdList(pQueryMsg, pMsg, pTableIdList);
6512

H
hjxilinx 已提交
6513
  if (pQueryMsg->numOfGroupCols > 0) {  // group by tag columns
6514
    *groupbyCols = malloc(pQueryMsg->numOfGroupCols * sizeof(SColIndex));
6515 6516 6517 6518
    if (*groupbyCols == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }
6519 6520 6521

    for (int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
      (*groupbyCols)[i].colId = *(int16_t *)pMsg;
6522
      pMsg += sizeof((*groupbyCols)[i].colId);
6523 6524

      (*groupbyCols)[i].colIndex = *(int16_t *)pMsg;
6525 6526
      pMsg += sizeof((*groupbyCols)[i].colIndex);

6527
      (*groupbyCols)[i].flag = *(int16_t *)pMsg;
6528 6529 6530 6531 6532
      pMsg += sizeof((*groupbyCols)[i].flag);

      memcpy((*groupbyCols)[i].name, pMsg, tListLen(groupbyCols[i]->name));
      pMsg += tListLen((*groupbyCols)[i].name);
    }
6533

H
hjxilinx 已提交
6534 6535
    pQueryMsg->orderByIdx = htons(pQueryMsg->orderByIdx);
    pQueryMsg->orderType = htons(pQueryMsg->orderType);
6536 6537
  }

6538 6539
  pQueryMsg->fillType = htons(pQueryMsg->fillType);
  if (pQueryMsg->fillType != TSDB_FILL_NONE) {
6540
    pQueryMsg->fillVal = (uint64_t)(pMsg);
6541 6542

    int64_t *v = (int64_t *)pMsg;
6543
    for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
6544 6545
      v[i] = htobe64(v[i]);
    }
6546

6547
    pMsg += sizeof(int64_t) * pQueryMsg->numOfOutput;
6548
  }
6549

6550 6551
  if (pQueryMsg->numOfTags > 0) {
    (*tagCols) = calloc(1, sizeof(SColumnInfo) * pQueryMsg->numOfTags);
H
Haojun Liao 已提交
6552 6553 6554 6555 6556
    if (*tagCols == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }

6557 6558
    for (int32_t i = 0; i < pQueryMsg->numOfTags; ++i) {
      SColumnInfo* pTagCol = (SColumnInfo*) pMsg;
6559

6560 6561 6562 6563
      pTagCol->colId = htons(pTagCol->colId);
      pTagCol->bytes = htons(pTagCol->bytes);
      pTagCol->type  = htons(pTagCol->type);
      pTagCol->numOfFilters = 0;
6564

6565
      (*tagCols)[i] = *pTagCol;
6566
      pMsg += sizeof(SColumnInfo);
6567
    }
H
hjxilinx 已提交
6568
  }
6569

6570 6571 6572
  // the tag query condition expression string is located at the end of query msg
  if (pQueryMsg->tagCondLen > 0) {
    *tagCond = calloc(1, pQueryMsg->tagCondLen);
H
Haojun Liao 已提交
6573 6574 6575 6576 6577 6578

    if (*tagCond == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;

    }
6579 6580 6581
    memcpy(*tagCond, pMsg, pQueryMsg->tagCondLen);
    pMsg += pQueryMsg->tagCondLen;
  }
6582

weixin_48148422's avatar
weixin_48148422 已提交
6583
  if (*pMsg != 0) {
6584
    size_t len = strlen(pMsg) + 1;
6585

6586
    *tbnameCond = malloc(len);
6587 6588 6589 6590 6591
    if (*tbnameCond == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }

weixin_48148422's avatar
weixin_48148422 已提交
6592
    strcpy(*tbnameCond, pMsg);
6593
    pMsg += len;
weixin_48148422's avatar
weixin_48148422 已提交
6594
  }
6595

6596
  qDebug("qmsg:%p query %d tables, type:%d, qrange:%" PRId64 "-%" PRId64 ", numOfGroupbyTagCols:%d, order:%d, "
H
Haojun Liao 已提交
6597 6598
         "outputCols:%d, numOfCols:%d, interval:%" PRId64 ", fillType:%d, comptsLen:%d, compNumOfBlocks:%d, limit:%" PRId64 ", offset:%" PRId64,
         pQueryMsg, pQueryMsg->numOfTables, pQueryMsg->queryType, pQueryMsg->window.skey, pQueryMsg->window.ekey, pQueryMsg->numOfGroupCols,
6599
         pQueryMsg->order, pQueryMsg->numOfOutput, pQueryMsg->numOfCols, pQueryMsg->interval.interval,
H
Haojun Liao 已提交
6600
         pQueryMsg->fillType, pQueryMsg->tsLen, pQueryMsg->tsNumOfBlocks, pQueryMsg->limit, pQueryMsg->offset);
6601 6602

  return TSDB_CODE_SUCCESS;
dengyihao's avatar
dengyihao 已提交
6603 6604

_cleanup:
S
TD-1848  
Shengliang Guan 已提交
6605
  tfree(*pExpr);
dengyihao's avatar
dengyihao 已提交
6606 6607
  taosArrayDestroy(*pTableIdList);
  *pTableIdList = NULL;
S
TD-1848  
Shengliang Guan 已提交
6608 6609 6610 6611
  tfree(*tbnameCond);
  tfree(*groupbyCols);
  tfree(*tagCols);
  tfree(*tagCond);
6612 6613

  return code;
6614 6615
}

H
Haojun Liao 已提交
6616 6617
static int32_t buildArithmeticExprFromMsg(SExprInfo *pArithExprInfo, SQueryTableMsg *pQueryMsg) {
  qDebug("qmsg:%p create arithmetic expr from binary", pQueryMsg);
weixin_48148422's avatar
weixin_48148422 已提交
6618 6619

  tExprNode* pExprNode = NULL;
dengyihao's avatar
dengyihao 已提交
6620
  TRY(TSDB_MAX_TAG_CONDITIONS) {
weixin_48148422's avatar
weixin_48148422 已提交
6621 6622 6623
    pExprNode = exprTreeFromBinary(pArithExprInfo->base.arg[0].argValue.pz, pArithExprInfo->base.arg[0].argBytes);
  } CATCH( code ) {
    CLEANUP_EXECUTE();
dengyihao's avatar
TD-816  
dengyihao 已提交
6624
    qError("qmsg:%p failed to create arithmetic expression string from:%s, reason: %s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz, tstrerror(code));
weixin_48148422's avatar
weixin_48148422 已提交
6625 6626 6627
    return code;
  } END_TRY

H
hjxilinx 已提交
6628
  if (pExprNode == NULL) {
6629
    qError("qmsg:%p failed to create arithmetic expression string from:%s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz);
6630
    return TSDB_CODE_QRY_APP_ERROR;
6631
  }
6632

6633
  pArithExprInfo->pExpr = pExprNode;
6634 6635 6636
  return TSDB_CODE_SUCCESS;
}

H
Haojun Liao 已提交
6637
static int32_t createQueryFuncExprFromMsg(SQueryTableMsg *pQueryMsg, int32_t numOfOutput, SExprInfo **pExprInfo, SSqlFuncMsg **pExprMsg,
6638 6639
    SColumnInfo* pTagCols) {
  *pExprInfo = NULL;
H
hjxilinx 已提交
6640
  int32_t code = TSDB_CODE_SUCCESS;
6641

H
Haojun Liao 已提交
6642
  SExprInfo *pExprs = (SExprInfo *)calloc(pQueryMsg->numOfOutput, sizeof(SExprInfo));
6643
  if (pExprs == NULL) {
6644
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
6645 6646 6647 6648 6649
  }

  bool    isSuperTable = QUERY_IS_STABLE_QUERY(pQueryMsg->queryType);
  int16_t tagLen = 0;

H
Haojun Liao 已提交
6650
  for (int32_t i = 0; i < numOfOutput; ++i) {
6651
    pExprs[i].base = *pExprMsg[i];
6652
    pExprs[i].bytes = 0;
6653 6654 6655 6656

    int16_t type = 0;
    int16_t bytes = 0;

6657
    // parse the arithmetic expression
6658
    if (pExprs[i].base.functionId == TSDB_FUNC_ARITHM) {
H
Haojun Liao 已提交
6659
      code = buildArithmeticExprFromMsg(&pExprs[i], pQueryMsg);
6660

6661
      if (code != TSDB_CODE_SUCCESS) {
S
TD-1848  
Shengliang Guan 已提交
6662
        tfree(pExprs);
6663
        return code;
6664 6665
      }

6666
      type  = TSDB_DATA_TYPE_DOUBLE;
6667
      bytes = tDataTypeDesc[type].nSize;
H
Haojun Liao 已提交
6668
    } else if (pExprs[i].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX && pExprs[i].base.functionId == TSDB_FUNC_TAGPRJ) {  // parse the normal column
H
Haojun Liao 已提交
6669
      SSchema s = tGetTableNameColumnSchema();
H
Haojun Liao 已提交
6670
      type = s.type;
H
Haojun Liao 已提交
6671
      bytes = s.bytes;
6672 6673
    } else if (pExprs[i].base.colInfo.colId <= TSDB_UD_COLUMN_INDEX) {
      // it is a user-defined constant value column
H
Haojun Liao 已提交
6674 6675
      assert(pExprs[i].base.functionId == TSDB_FUNC_PRJ);

6676 6677
      type = pExprs[i].base.arg[1].argType;
      bytes = pExprs[i].base.arg[1].argBytes;
H
Haojun Liao 已提交
6678 6679 6680 6681 6682

      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        bytes += VARSTR_HEADER_SIZE;
      }
    } else {
6683
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
dengyihao's avatar
dengyihao 已提交
6684
      assert(j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags);
H
Haojun Liao 已提交
6685

dengyihao's avatar
dengyihao 已提交
6686
      if (pExprs[i].base.colInfo.colId != TSDB_TBNAME_COLUMN_INDEX && j >= 0) {
H
Haojun Liao 已提交
6687 6688 6689 6690
        SColumnInfo* pCol = (TSDB_COL_IS_TAG(pExprs[i].base.colInfo.flag))? &pTagCols[j]:&pQueryMsg->colList[j];
        type = pCol->type;
        bytes = pCol->bytes;
      } else {
H
Haojun Liao 已提交
6691
        SSchema s = tGetTableNameColumnSchema();
H
hjxilinx 已提交
6692

H
Haojun Liao 已提交
6693 6694 6695
        type  = s.type;
        bytes = s.bytes;
      }
6696 6697
    }

S
TD-1057  
Shengliang Guan 已提交
6698
    int32_t param = (int32_t)pExprs[i].base.arg[0].argValue.i64;
6699
    if (getResultDataInfo(type, bytes, pExprs[i].base.functionId, param, &pExprs[i].type, &pExprs[i].bytes,
6700
                          &pExprs[i].interBytes, 0, isSuperTable) != TSDB_CODE_SUCCESS) {
S
TD-1848  
Shengliang Guan 已提交
6701
      tfree(pExprs);
6702
      return TSDB_CODE_QRY_INVALID_MSG;
6703 6704
    }

6705
    if (pExprs[i].base.functionId == TSDB_FUNC_TAG_DUMMY || pExprs[i].base.functionId == TSDB_FUNC_TS_DUMMY) {
6706
      tagLen += pExprs[i].bytes;
6707
    }
6708
    assert(isValidDataType(pExprs[i].type));
6709 6710 6711
  }

  // TODO refactor
H
Haojun Liao 已提交
6712
  for (int32_t i = 0; i < numOfOutput; ++i) {
6713 6714
    pExprs[i].base = *pExprMsg[i];
    int16_t functId = pExprs[i].base.functionId;
6715

6716
    if (functId == TSDB_FUNC_TOP || functId == TSDB_FUNC_BOTTOM) {
6717
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
6718 6719 6720 6721 6722 6723 6724 6725 6726
      if (j < 0 || j >= pQueryMsg->numOfCols) {
        assert(0);
      } else {
        SColumnInfo *pCol = &pQueryMsg->colList[j];
        int32_t ret =
            getResultDataInfo(pCol->type, pCol->bytes, functId, (int32_t)pExprs[i].base.arg[0].argValue.i64,
                              &pExprs[i].type, &pExprs[i].bytes, &pExprs[i].interBytes, tagLen, isSuperTable);
        assert(ret == TSDB_CODE_SUCCESS);
      }
6727 6728 6729
    }
  }

6730
  *pExprInfo = pExprs;
6731 6732 6733
  return TSDB_CODE_SUCCESS;
}

6734
static SSqlGroupbyExpr *createGroupbyExprFromMsg(SQueryTableMsg *pQueryMsg, SColIndex *pColIndex, int32_t *code) {
6735 6736 6737 6738 6739
  if (pQueryMsg->numOfGroupCols == 0) {
    return NULL;
  }

  // using group by tag columns
6740
  SSqlGroupbyExpr *pGroupbyExpr = (SSqlGroupbyExpr *)calloc(1, sizeof(SSqlGroupbyExpr));
6741
  if (pGroupbyExpr == NULL) {
6742
    *code = TSDB_CODE_QRY_OUT_OF_MEMORY;
6743 6744 6745 6746 6747 6748 6749
    return NULL;
  }

  pGroupbyExpr->numOfGroupCols = pQueryMsg->numOfGroupCols;
  pGroupbyExpr->orderType = pQueryMsg->orderType;
  pGroupbyExpr->orderIndex = pQueryMsg->orderByIdx;

6750 6751 6752 6753
  pGroupbyExpr->columnInfo = taosArrayInit(pQueryMsg->numOfGroupCols, sizeof(SColIndex));
  for(int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
    taosArrayPush(pGroupbyExpr->columnInfo, &pColIndex[i]);
  }
6754

6755 6756 6757
  return pGroupbyExpr;
}

6758
static int32_t createFilterInfo(void *pQInfo, SQuery *pQuery) {
6759
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
6760
    if (pQuery->colList[i].numOfFilters > 0) {
6761 6762 6763 6764 6765 6766 6767 6768 6769
      pQuery->numOfFilterCols++;
    }
  }

  if (pQuery->numOfFilterCols == 0) {
    return TSDB_CODE_SUCCESS;
  }

  pQuery->pFilterInfo = calloc(1, sizeof(SSingleColumnFilterInfo) * pQuery->numOfFilterCols);
H
Haojun Liao 已提交
6770 6771 6772
  if (pQuery->pFilterInfo == NULL) {
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
  }
6773 6774

  for (int32_t i = 0, j = 0; i < pQuery->numOfCols; ++i) {
6775
    if (pQuery->colList[i].numOfFilters > 0) {
6776 6777
      SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[j];

B
Bomin Zhang 已提交
6778
      memcpy(&pFilterInfo->info, &pQuery->colList[i], sizeof(SColumnInfo));
6779
      pFilterInfo->info = pQuery->colList[i];
6780

6781
      pFilterInfo->numOfFilters = pQuery->colList[i].numOfFilters;
6782
      pFilterInfo->pFilters = calloc(pFilterInfo->numOfFilters, sizeof(SColumnFilterElem));
H
Haojun Liao 已提交
6783 6784 6785
      if (pFilterInfo->pFilters == NULL) {
        return TSDB_CODE_QRY_OUT_OF_MEMORY;
      }
6786 6787 6788

      for (int32_t f = 0; f < pFilterInfo->numOfFilters; ++f) {
        SColumnFilterElem *pSingleColFilter = &pFilterInfo->pFilters[f];
6789
        pSingleColFilter->filterInfo = pQuery->colList[i].filters[f];
6790 6791 6792 6793 6794

        int32_t lower = pSingleColFilter->filterInfo.lowerRelOptr;
        int32_t upper = pSingleColFilter->filterInfo.upperRelOptr;

        if (lower == TSDB_RELATION_INVALID && upper == TSDB_RELATION_INVALID) {
S
slguan 已提交
6795
          qError("QInfo:%p invalid filter info", pQInfo);
6796
          return TSDB_CODE_QRY_INVALID_MSG;
6797 6798
        }

6799 6800
        int16_t type  = pQuery->colList[i].type;
        int16_t bytes = pQuery->colList[i].bytes;
6801

6802 6803 6804
        // todo refactor
        __filter_func_t *rangeFilterArray = getRangeFilterFuncArray(type);
        __filter_func_t *filterArray = getValueFilterFuncArray(type);
6805 6806

        if (rangeFilterArray == NULL && filterArray == NULL) {
S
slguan 已提交
6807
          qError("QInfo:%p failed to get filter function, invalid data type:%d", pQInfo, type);
6808
          return TSDB_CODE_QRY_INVALID_MSG;
6809 6810
        }

6811
        if ((lower == TSDB_RELATION_GREATER_EQUAL || lower == TSDB_RELATION_GREATER) &&
6812
            (upper == TSDB_RELATION_LESS_EQUAL || upper == TSDB_RELATION_LESS)) {
dengyihao's avatar
dengyihao 已提交
6813
          assert(rangeFilterArray != NULL);
6814
          if (lower == TSDB_RELATION_GREATER_EQUAL) {
6815 6816 6817 6818 6819 6820 6821 6822 6823 6824 6825 6826 6827
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[4];
            } else {
              pSingleColFilter->fp = rangeFilterArray[2];
            }
          } else {
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[3];
            } else {
              pSingleColFilter->fp = rangeFilterArray[1];
            }
          }
        } else {  // set callback filter function
dengyihao's avatar
dengyihao 已提交
6828
          assert(filterArray != NULL);
6829 6830 6831 6832
          if (lower != TSDB_RELATION_INVALID) {
            pSingleColFilter->fp = filterArray[lower];

            if (upper != TSDB_RELATION_INVALID) {
dengyihao's avatar
dengyihao 已提交
6833
              qError("pQInfo:%p failed to get filter function, invalid filter condition: %d", pQInfo, type);
6834
              return TSDB_CODE_QRY_INVALID_MSG;
6835 6836 6837 6838 6839 6840 6841 6842 6843 6844 6845 6846 6847 6848 6849 6850
            }
          } else {
            pSingleColFilter->fp = filterArray[upper];
          }
        }
        assert(pSingleColFilter->fp != NULL);
        pSingleColFilter->bytes = bytes;
      }

      j++;
    }
  }

  return TSDB_CODE_SUCCESS;
}

6851
static void doUpdateExprColumnIndex(SQuery *pQuery) {
H
Haojun Liao 已提交
6852
  assert(pQuery->pExpr1 != NULL && pQuery != NULL);
6853

6854
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
Haojun Liao 已提交
6855
    SSqlFuncMsg *pSqlExprMsg = &pQuery->pExpr1[k].base;
6856
    if (pSqlExprMsg->functionId == TSDB_FUNC_ARITHM) {
6857 6858
      continue;
    }
6859

6860
    // todo opt performance
H
Haojun Liao 已提交
6861
    SColIndex *pColIndex = &pSqlExprMsg->colInfo;
H
Haojun Liao 已提交
6862
    if (TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
6863 6864
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfCols; ++f) {
H
Haojun Liao 已提交
6865 6866
        if (pColIndex->colId == pQuery->colList[f].colId) {
          pColIndex->colIndex = f;
6867 6868 6869
          break;
        }
      }
H
Haojun Liao 已提交
6870 6871

      assert(f < pQuery->numOfCols);
6872 6873
    } else if (pColIndex->colId <= TSDB_UD_COLUMN_INDEX) {
      // do nothing for user-defined constant value result columns
6874
    } else {
6875 6876
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfTags; ++f) {
H
Haojun Liao 已提交
6877 6878
        if (pColIndex->colId == pQuery->tagColList[f].colId) {
          pColIndex->colIndex = f;
6879 6880
          break;
        }
6881
      }
6882

6883
      assert(f < pQuery->numOfTags || pColIndex->colId == TSDB_TBNAME_COLUMN_INDEX);
6884 6885 6886 6887
    }
  }
}

dengyihao's avatar
dengyihao 已提交
6888 6889
static void freeQInfo(SQInfo *pQInfo);

H
Haojun Liao 已提交
6890 6891 6892
static void calResultBufSize(SQuery* pQuery) {
  const int32_t RESULT_MSG_MIN_SIZE  = 1024 * (1024 + 512);  // bytes
  const int32_t RESULT_MSG_MIN_ROWS  = 8192;
S
TD-1057  
Shengliang Guan 已提交
6893
  const float RESULT_THRESHOLD_RATIO = 0.85f;
H
Haojun Liao 已提交
6894

6895 6896 6897 6898 6899
  if (isProjQuery(pQuery)) {
    int32_t numOfRes = RESULT_MSG_MIN_SIZE / pQuery->rowSize;
    if (numOfRes < RESULT_MSG_MIN_ROWS) {
      numOfRes = RESULT_MSG_MIN_ROWS;
    }
H
Haojun Liao 已提交
6900

6901
    pQuery->rec.capacity  = numOfRes;
S
TD-1057  
Shengliang Guan 已提交
6902
    pQuery->rec.threshold = (int32_t)(numOfRes * RESULT_THRESHOLD_RATIO);
6903 6904
  } else {  // in case of non-prj query, a smaller output buffer will be used.
    pQuery->rec.capacity = 4096;
S
TD-1057  
Shengliang Guan 已提交
6905
    pQuery->rec.threshold = (int32_t)(pQuery->rec.capacity * RESULT_THRESHOLD_RATIO);
6906
  }
H
Haojun Liao 已提交
6907 6908
}

6909
static SQInfo *createQInfoImpl(SQueryTableMsg *pQueryMsg, SSqlGroupbyExpr *pGroupbyExpr, SExprInfo *pExprs,
H
Haojun Liao 已提交
6910
                               SExprInfo *pSecExprs, STableGroupInfo *pTableGroupInfo, SColumnInfo* pTagCols, bool stableQuery) {
B
Bomin Zhang 已提交
6911 6912 6913
  int16_t numOfCols = pQueryMsg->numOfCols;
  int16_t numOfOutput = pQueryMsg->numOfOutput;

6914 6915
  SQInfo *pQInfo = (SQInfo *)calloc(1, sizeof(SQInfo));
  if (pQInfo == NULL) {
B
Bomin Zhang 已提交
6916
    goto _cleanup_qinfo;
6917
  }
6918

B
Bomin Zhang 已提交
6919 6920 6921
  // to make sure third party won't overwrite this structure
  pQInfo->signature = pQInfo;
  pQInfo->tableGroupInfo = *pTableGroupInfo;
6922 6923

  SQuery *pQuery = calloc(1, sizeof(SQuery));
B
Bomin Zhang 已提交
6924 6925 6926
  if (pQuery == NULL) {
    goto _cleanup_query;
  }
H
Haojun Liao 已提交
6927

6928 6929
  pQInfo->runtimeEnv.pQuery = pQuery;

6930
  pQuery->numOfCols       = numOfCols;
H
hjxilinx 已提交
6931
  pQuery->numOfOutput     = numOfOutput;
6932 6933 6934
  pQuery->limit.limit     = pQueryMsg->limit;
  pQuery->limit.offset    = pQueryMsg->offset;
  pQuery->order.order     = pQueryMsg->order;
6935
  pQuery->order.orderColId = pQueryMsg->orderColId;
H
Haojun Liao 已提交
6936
  pQuery->pExpr1          = pExprs;
H
Haojun Liao 已提交
6937 6938
  pQuery->pExpr2          = pSecExprs;
  pQuery->numOfExpr2      = pQueryMsg->secondStageOutput;
6939
  pQuery->pGroupbyExpr    = pGroupbyExpr;
6940
  memcpy(&pQuery->interval, &pQueryMsg->interval, sizeof(pQuery->interval));
6941
  pQuery->fillType        = pQueryMsg->fillType;
6942
  pQuery->numOfTags       = pQueryMsg->numOfTags;
B
Bomin Zhang 已提交
6943
  pQuery->tagColList      = pTagCols;
H
Haojun Liao 已提交
6944 6945
  pQuery->prjInfo.vgroupLimit = pQueryMsg->vgroupLimit;
  pQuery->prjInfo.ts      = (pQueryMsg->order == TSDB_ORDER_ASC)? INT64_MIN:INT64_MAX;
H
Haojun Liao 已提交
6946

6947
  pQuery->colList = calloc(numOfCols, sizeof(SSingleColumnFilterInfo));
6948
  if (pQuery->colList == NULL) {
6949
    goto _cleanup;
6950
  }
6951

6952
  int32_t srcSize = 0;
H
hjxilinx 已提交
6953
  for (int16_t i = 0; i < numOfCols; ++i) {
6954
    pQuery->colList[i] = pQueryMsg->colList[i];
6955
    pQuery->colList[i].filters = tscFilterInfoClone(pQueryMsg->colList[i].filters, pQuery->colList[i].numOfFilters);
6956
    srcSize += pQuery->colList[i].bytes;
H
hjxilinx 已提交
6957
  }
6958

6959
  // calculate the result row size
6960 6961 6962
  for (int16_t col = 0; col < numOfOutput; ++col) {
    assert(pExprs[col].bytes > 0);
    pQuery->rowSize += pExprs[col].bytes;
6963
  }
6964

6965
  doUpdateExprColumnIndex(pQuery);
6966

6967
  int32_t ret = createFilterInfo(pQInfo, pQuery);
6968
  if (ret != TSDB_CODE_SUCCESS) {
6969
    goto _cleanup;
6970 6971 6972
  }

  // prepare the result buffer
6973
  pQuery->sdata = (tFilePage **)calloc(pQuery->numOfOutput, POINTER_BYTES);
6974
  if (pQuery->sdata == NULL) {
6975
    goto _cleanup;
6976 6977
  }

H
Haojun Liao 已提交
6978
  calResultBufSize(pQuery);
6979

6980
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
6981
    // allocate additional memory for interResults that are usually larger then final results
H
Haojun Liao 已提交
6982
    // TODO refactor
H
Haojun Liao 已提交
6983 6984 6985 6986 6987 6988 6989 6990
    int16_t bytes = 0;
    if (pQuery->pExpr2 == NULL || col > pQuery->numOfExpr2) {
      bytes = pExprs[col].bytes;
    } else {
      bytes = MAX(pQuery->pExpr2[col].bytes, pExprs[col].bytes);
    }

    size_t size = (size_t)((pQuery->rec.capacity + 1) * bytes + pExprs[col].interBytes + sizeof(tFilePage));
6991
    pQuery->sdata[col] = (tFilePage *)calloc(1, size);
6992
    if (pQuery->sdata[col] == NULL) {
6993
      goto _cleanup;
6994 6995 6996
    }
  }

6997
  if (pQuery->fillType != TSDB_FILL_NONE) {
6998 6999
    pQuery->fillVal = malloc(sizeof(int64_t) * pQuery->numOfOutput);
    if (pQuery->fillVal == NULL) {
7000
      goto _cleanup;
7001 7002 7003
    }

    // the first column is the timestamp
7004
    memcpy(pQuery->fillVal, (char *)pQueryMsg->fillVal, pQuery->numOfOutput * sizeof(int64_t));
7005 7006
  }

dengyihao's avatar
dengyihao 已提交
7007 7008 7009 7010 7011 7012
  size_t numOfGroups = 0;
  if (pTableGroupInfo->pGroupList != NULL) {
    numOfGroups = taosArrayGetSize(pTableGroupInfo->pGroupList);

    pQInfo->tableqinfoGroupInfo.pGroupList = taosArrayInit(numOfGroups, POINTER_BYTES);
    pQInfo->tableqinfoGroupInfo.numOfTables = pTableGroupInfo->numOfTables;
H
Haojun Liao 已提交
7013
    pQInfo->tableqinfoGroupInfo.map = taosHashInit(pTableGroupInfo->numOfTables,
H
Haojun Liao 已提交
7014
                                                   taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_NO_LOCK);
H
Haojun Liao 已提交
7015
  }
7016

H
Haojun Liao 已提交
7017
  pQInfo->runtimeEnv.interBufSize = getOutputInterResultBufSize(pQuery);
H
Haojun Liao 已提交
7018 7019
  pQInfo->runtimeEnv.summary.tableInfoSize += (pTableGroupInfo->numOfTables * sizeof(STableQueryInfo));

H
Haojun Liao 已提交
7020
  pQInfo->runtimeEnv.pResultRowHashTable = taosHashInit(pTableGroupInfo->numOfTables, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_NO_LOCK);
H
Haojun Liao 已提交
7021
  pQInfo->runtimeEnv.keyBuf = malloc(TSDB_MAX_BYTES_PER_ROW);
H
Haojun Liao 已提交
7022
  pQInfo->runtimeEnv.pool = initResultRowPool(getResultRowSize(&pQInfo->runtimeEnv));
7023 7024 7025 7026 7027 7028 7029 7030
  pQInfo->runtimeEnv.prevRow = malloc(POINTER_BYTES * pQuery->numOfCols + srcSize);

  char* start = POINTER_BYTES * pQuery->numOfCols + (char*) pQInfo->runtimeEnv.prevRow;
  pQInfo->runtimeEnv.prevRow[0] = start;

  for(int32_t i = 1; i < pQuery->numOfCols; ++i) {
    pQInfo->runtimeEnv.prevRow[i] = pQInfo->runtimeEnv.prevRow[i - 1] + pQuery->colList[i-1].bytes;
  }
H
Haojun Liao 已提交
7031

H
Haojun Liao 已提交
7032
  pQInfo->pBuf = calloc(pTableGroupInfo->numOfTables, sizeof(STableQueryInfo));
H
Haojun Liao 已提交
7033 7034 7035 7036
  if (pQInfo->pBuf == NULL) {
    goto _cleanup;
  }

H
Haojun Liao 已提交
7037
  // NOTE: pTableCheckInfo need to update the query time range and the lastKey info
7038
  pQInfo->arrTableIdInfo = taosHashInit(pTableGroupInfo->numOfTables, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false, HASH_NO_LOCK);
7039
  pQInfo->dataReady = QUERY_RESULT_NOT_READY;
H
Haojun Liao 已提交
7040
  pQInfo->rspContext = NULL;
7041
  pthread_mutex_init(&pQInfo->lock, NULL);
H
Haojun Liao 已提交
7042
  tsem_init(&pQInfo->ready, 0, 0);
7043 7044 7045 7046 7047

  pQuery->pos = -1;
  pQuery->window = pQueryMsg->window;
  changeExecuteScanOrder(pQInfo, pQueryMsg, stableQuery);

7048
  pQInfo->runtimeEnv.queryWindowIdentical = true;
7049
  STimeWindow window = pQuery->window;
H
Haojun Liao 已提交
7050

H
Haojun Liao 已提交
7051
  int32_t index = 0;
H
hjxilinx 已提交
7052
  for(int32_t i = 0; i < numOfGroups; ++i) {
7053
    SArray* pa = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, i);
7054

H
Haojun Liao 已提交
7055
    size_t s = taosArrayGetSize(pa);
7056
    SArray* p1 = taosArrayInit(s, POINTER_BYTES);
B
Bomin Zhang 已提交
7057 7058 7059
    if (p1 == NULL) {
      goto _cleanup;
    }
H
Haojun Liao 已提交
7060

Y
yihaoDeng 已提交
7061
    taosArrayPush(pQInfo->tableqinfoGroupInfo.pGroupList, &p1);
7062

H
hjxilinx 已提交
7063
    for(int32_t j = 0; j < s; ++j) {
H
Haojun Liao 已提交
7064
      STableKeyInfo* info = taosArrayGet(pa, j);
7065

7066
      window.skey = info->lastKey;
7067 7068 7069 7070 7071
      if (info->lastKey != pQuery->window.skey) {
        pQInfo->runtimeEnv.queryWindowIdentical = false;
      }

      void* buf = (char*) pQInfo->pBuf + index * sizeof(STableQueryInfo);
H
Haojun Liao 已提交
7072
      STableQueryInfo* item = createTableQueryInfo(&pQInfo->runtimeEnv, info->pTable, window, buf);
B
Bomin Zhang 已提交
7073 7074 7075
      if (item == NULL) {
        goto _cleanup;
      }
H
Haojun Liao 已提交
7076

7077
      item->groupIndex = i;
H
hjxilinx 已提交
7078
      taosArrayPush(p1, &item);
7079 7080

      STableId* id = TSDB_TABLEID(info->pTable);
H
Haojun Liao 已提交
7081 7082
      taosHashPut(pQInfo->tableqinfoGroupInfo.map, &id->tid, sizeof(id->tid), &item, POINTER_BYTES);
      index += 1;
H
hjxilinx 已提交
7083 7084
    }
  }
7085

7086
  colIdCheck(pQuery);
7087

7088
  qDebug("qmsg:%p QInfo:%p created", pQueryMsg, pQInfo);
7089 7090
  return pQInfo;

B
Bomin Zhang 已提交
7091
_cleanup_qinfo:
H
Haojun Liao 已提交
7092
  tsdbDestroyTableGroup(pTableGroupInfo);
B
Bomin Zhang 已提交
7093 7094

_cleanup_query:
7095 7096 7097 7098
  if (pGroupbyExpr != NULL) {
    taosArrayDestroy(pGroupbyExpr->columnInfo);
    free(pGroupbyExpr);
  }
H
Haojun Liao 已提交
7099

S
TD-1848  
Shengliang Guan 已提交
7100
  tfree(pTagCols);
B
Bomin Zhang 已提交
7101 7102 7103 7104 7105 7106
  for (int32_t i = 0; i < numOfOutput; ++i) {
    SExprInfo* pExprInfo = &pExprs[i];
    if (pExprInfo->pExpr != NULL) {
      tExprTreeDestroy(&pExprInfo->pExpr, NULL);
    }
  }
H
Haojun Liao 已提交
7107

S
TD-1848  
Shengliang Guan 已提交
7108
  tfree(pExprs);
B
Bomin Zhang 已提交
7109

7110
_cleanup:
dengyihao's avatar
dengyihao 已提交
7111
  freeQInfo(pQInfo);
7112 7113 7114
  return NULL;
}

H
hjxilinx 已提交
7115
static bool isValidQInfo(void *param) {
H
hjxilinx 已提交
7116 7117 7118 7119
  SQInfo *pQInfo = (SQInfo *)param;
  if (pQInfo == NULL) {
    return false;
  }
7120

H
hjxilinx 已提交
7121 7122 7123 7124
  /*
   * pQInfo->signature may be changed by another thread, so we assign value of signature
   * into local variable, then compare by using local variable
   */
7125
  uint64_t sig = (uint64_t)pQInfo->signature;
H
hjxilinx 已提交
7126 7127 7128
  return (sig == (uint64_t)pQInfo);
}

7129
static int32_t initQInfo(SQueryTableMsg *pQueryMsg, void *tsdb, int32_t vgId, SQInfo *pQInfo, bool isSTable) {
H
hjxilinx 已提交
7130
  int32_t code = TSDB_CODE_SUCCESS;
7131
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
7132

H
hjxilinx 已提交
7133
  STSBuf *pTSBuf = NULL;
7134
  if (pQueryMsg->tsLen > 0) { // open new file to save the result
H
Haojun Liao 已提交
7135
    char *tsBlock = (char *) pQueryMsg + pQueryMsg->tsOffset;
H
Haojun Liao 已提交
7136
    pTSBuf = tsBufCreateFromCompBlocks(tsBlock, pQueryMsg->tsNumOfBlocks, pQueryMsg->tsLen, pQueryMsg->tsOrder, vgId);
7137

H
hjxilinx 已提交
7138
    tsBufResetPos(pTSBuf);
dengyihao's avatar
dengyihao 已提交
7139
    bool ret = tsBufNextPos(pTSBuf);
7140

dengyihao's avatar
dengyihao 已提交
7141
    UNUSED(ret);
H
hjxilinx 已提交
7142
  }
Y
TD-1665  
yihaoDeng 已提交
7143 7144
  
  pQuery->precision = tsdbGetCfg(tsdb)->precision;
7145

7146 7147
  if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey > pQuery->window.ekey)) ||
      (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey > pQuery->window.skey))) {
7148
    qDebug("QInfo:%p no result in time range %" PRId64 "-%" PRId64 ", order %d", pQInfo, pQuery->window.skey,
7149
           pQuery->window.ekey, pQuery->order.order);
7150
    setQueryStatus(pQuery, QUERY_COMPLETED);
B
Bomin Zhang 已提交
7151
    pQInfo->tableqinfoGroupInfo.numOfTables = 0;
7152 7153
    return TSDB_CODE_SUCCESS;
  }
7154

7155
  if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
7156
    qDebug("QInfo:%p no table qualified for tag filter, abort query", pQInfo);
7157 7158 7159
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return TSDB_CODE_SUCCESS;
  }
H
hjxilinx 已提交
7160 7161

  // filter the qualified
7162
  if ((code = doInitQInfo(pQInfo, pTSBuf, tsdb, vgId, isSTable)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
7163 7164
    goto _error;
  }
7165

H
hjxilinx 已提交
7166 7167 7168 7169
  return code;

_error:
  // table query ref will be decrease during error handling
7170
  freeQInfo(pQInfo);
H
hjxilinx 已提交
7171 7172 7173
  return code;
}

B
Bomin Zhang 已提交
7174
static void freeColumnFilterInfo(SColumnFilterInfo* pFilter, int32_t numOfFilters) {
H
Haojun Liao 已提交
7175
    if (pFilter == NULL || numOfFilters == 0) {
B
Bomin Zhang 已提交
7176 7177
      return;
    }
H
Haojun Liao 已提交
7178

B
Bomin Zhang 已提交
7179 7180 7181 7182 7183
    for (int32_t i = 0; i < numOfFilters; i++) {
      if (pFilter[i].filterstr) {
        free((void*)(pFilter[i].pz));
      }
    }
H
Haojun Liao 已提交
7184

B
Bomin Zhang 已提交
7185 7186 7187
    free(pFilter);
}

H
Haojun Liao 已提交
7188 7189
static void doDestroyTableQueryInfo(STableGroupInfo* pTableqinfoGroupInfo) {
  if (pTableqinfoGroupInfo->pGroupList != NULL) {
H
Haojun Liao 已提交
7190
    int32_t numOfGroups = (int32_t) taosArrayGetSize(pTableqinfoGroupInfo->pGroupList);
H
Haojun Liao 已提交
7191 7192 7193 7194 7195 7196 7197 7198 7199 7200 7201 7202 7203 7204 7205 7206 7207 7208 7209 7210 7211
    for (int32_t i = 0; i < numOfGroups; ++i) {
      SArray *p = taosArrayGetP(pTableqinfoGroupInfo->pGroupList, i);

      size_t num = taosArrayGetSize(p);
      for(int32_t j = 0; j < num; ++j) {
        STableQueryInfo* item = taosArrayGetP(p, j);
        destroyTableQueryInfoImpl(item);
      }

      taosArrayDestroy(p);
    }
  }

  taosArrayDestroy(pTableqinfoGroupInfo->pGroupList);
  taosHashCleanup(pTableqinfoGroupInfo->map);

  pTableqinfoGroupInfo->pGroupList = NULL;
  pTableqinfoGroupInfo->map = NULL;
  pTableqinfoGroupInfo->numOfTables = 0;
}

H
Haojun Liao 已提交
7212 7213 7214 7215 7216 7217 7218 7219 7220 7221 7222 7223 7224 7225 7226 7227
static void* destroyQueryFuncExpr(SExprInfo* pExprInfo, int32_t numOfExpr) {
  if (pExprInfo == NULL) {
    assert(numOfExpr == 0);
    return NULL;
  }

  for (int32_t i = 0; i < numOfExpr; ++i) {
    if (pExprInfo[i].pExpr != NULL) {
      tExprNodeDestroy(pExprInfo[i].pExpr, NULL);
    }
  }

  tfree(pExprInfo);
  return NULL;
}

H
hjxilinx 已提交
7228 7229 7230 7231
static void freeQInfo(SQInfo *pQInfo) {
  if (!isValidQInfo(pQInfo)) {
    return;
  }
7232

7233
  qDebug("QInfo:%p start to free QInfo", pQInfo);
7234

H
Haojun Liao 已提交
7235
  releaseQueryBuf(pQInfo->tableqinfoGroupInfo.numOfTables);
H
Haojun Liao 已提交
7236

7237
  teardownQueryRuntimeEnv(&pQInfo->runtimeEnv);
7238

H
Haojun Liao 已提交
7239 7240 7241 7242
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  if (pQuery != NULL) {
    if (pQuery->sdata != NULL) {
      for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
S
TD-1848  
Shengliang Guan 已提交
7243
        tfree(pQuery->sdata[col]);
H
Haojun Liao 已提交
7244
      }
S
TD-1848  
Shengliang Guan 已提交
7245
      tfree(pQuery->sdata);
H
hjxilinx 已提交
7246
    }
7247

H
Haojun Liao 已提交
7248
    if (pQuery->fillVal != NULL) {
S
TD-1848  
Shengliang Guan 已提交
7249
      tfree(pQuery->fillVal);
H
Haojun Liao 已提交
7250
    }
7251

H
Haojun Liao 已提交
7252 7253 7254
    for (int32_t i = 0; i < pQuery->numOfFilterCols; ++i) {
      SSingleColumnFilterInfo *pColFilter = &pQuery->pFilterInfo[i];
      if (pColFilter->numOfFilters > 0) {
S
TD-1848  
Shengliang Guan 已提交
7255
        tfree(pColFilter->pFilters);
H
hjxilinx 已提交
7256
      }
H
hjxilinx 已提交
7257
    }
7258

H
Haojun Liao 已提交
7259 7260
    pQuery->pExpr1 = destroyQueryFuncExpr(pQuery->pExpr1, pQuery->numOfOutput);
    pQuery->pExpr2 = destroyQueryFuncExpr(pQuery->pExpr2, pQuery->numOfExpr2);
7261

S
TD-1848  
Shengliang Guan 已提交
7262 7263
    tfree(pQuery->tagColList);
    tfree(pQuery->pFilterInfo);
H
Haojun Liao 已提交
7264 7265 7266 7267 7268 7269

    if (pQuery->colList != NULL) {
      for (int32_t i = 0; i < pQuery->numOfCols; i++) {
        SColumnInfo *column = pQuery->colList + i;
        freeColumnFilterInfo(column->filters, column->numOfFilters);
      }
S
TD-1848  
Shengliang Guan 已提交
7270
      tfree(pQuery->colList);
H
Haojun Liao 已提交
7271 7272
    }

H
Haojun Liao 已提交
7273 7274 7275 7276 7277
    if (pQuery->pGroupbyExpr != NULL) {
      taosArrayDestroy(pQuery->pGroupbyExpr->columnInfo);
      tfree(pQuery->pGroupbyExpr);
    }

S
TD-1848  
Shengliang Guan 已提交
7278
    tfree(pQuery);
H
hjxilinx 已提交
7279
  }
7280

H
Haojun Liao 已提交
7281
  doDestroyTableQueryInfo(&pQInfo->tableqinfoGroupInfo);
H
Haojun Liao 已提交
7282

S
TD-1848  
Shengliang Guan 已提交
7283
  tfree(pQInfo->pBuf);
H
Haojun Liao 已提交
7284
  tsdbDestroyTableGroup(&pQInfo->tableGroupInfo);
7285
  taosHashCleanup(pQInfo->arrTableIdInfo);
7286

7287
  pQInfo->signature = 0;
7288

7289
  qDebug("QInfo:%p QInfo is freed", pQInfo);
7290

S
TD-1848  
Shengliang Guan 已提交
7291
  tfree(pQInfo);
H
hjxilinx 已提交
7292 7293
}

H
hjxilinx 已提交
7294
static size_t getResultSize(SQInfo *pQInfo, int64_t *numOfRows) {
7295 7296
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
hjxilinx 已提交
7297 7298 7299 7300 7301 7302 7303 7304 7305 7306 7307
  /*
   * get the file size and set the numOfRows to be the file size, since for tsComp query,
   * the returned row size is equalled to 1
   * TODO handle the case that the file is too large to send back one time
   */
  if (isTSCompQuery(pQuery) && (*numOfRows) > 0) {
    struct stat fstat;
    if (stat(pQuery->sdata[0]->data, &fstat) == 0) {
      *numOfRows = fstat.st_size;
      return fstat.st_size;
    } else {
S
slguan 已提交
7308
      qError("QInfo:%p failed to get file info, path:%s, reason:%s", pQInfo, pQuery->sdata[0]->data, strerror(errno));
H
hjxilinx 已提交
7309 7310 7311
      return 0;
    }
  } else {
7312
    return (size_t)(pQuery->rowSize * (*numOfRows));
7313
  }
H
hjxilinx 已提交
7314
}
7315

H
hjxilinx 已提交
7316 7317 7318
static int32_t doDumpQueryResult(SQInfo *pQInfo, char *data) {
  // the remained number of retrieved rows, not the interpolated result
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
7319

H
hjxilinx 已提交
7320 7321 7322
  // load data from file to msg buffer
  if (isTSCompQuery(pQuery)) {
    int32_t fd = open(pQuery->sdata[0]->data, O_RDONLY, 0666);
7323

H
hjxilinx 已提交
7324 7325
    // make sure file exist
    if (FD_VALID(fd)) {
7326 7327 7328
      uint64_t s = lseek(fd, 0, SEEK_END);

      qDebug("QInfo:%p ts comp data return, file:%s, size:%"PRId64, pQInfo, pQuery->sdata[0]->data, s);
H
Haojun Liao 已提交
7329
      if (lseek(fd, 0, SEEK_SET) >= 0) {
H
Haojun Liao 已提交
7330
        size_t sz = read(fd, data, (uint32_t) s);
7331 7332 7333
        if(sz < s) {  // todo handle error
          assert(0);
        }
H
Haojun Liao 已提交
7334
      } else {
7335
        UNUSED(s);
dengyihao's avatar
dengyihao 已提交
7336
      }
H
Haojun Liao 已提交
7337

H
hjxilinx 已提交
7338 7339 7340
      close(fd);
      unlink(pQuery->sdata[0]->data);
    } else {
dengyihao's avatar
dengyihao 已提交
7341
      // todo return the error code to client and handle invalid fd
S
slguan 已提交
7342
      qError("QInfo:%p failed to open tmp file to send ts-comp data to client, path:%s, reason:%s", pQInfo,
H
hjxilinx 已提交
7343
             pQuery->sdata[0]->data, strerror(errno));
dengyihao's avatar
dengyihao 已提交
7344
      if (fd != -1) {
7345
        close(fd);
dengyihao's avatar
dengyihao 已提交
7346
      }
H
hjxilinx 已提交
7347
    }
7348

H
hjxilinx 已提交
7349 7350 7351 7352
    // all data returned, set query over
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
      setQueryStatus(pQuery, QUERY_OVER);
    }
H
hjxilinx 已提交
7353
  } else {
S
TD-1057  
Shengliang Guan 已提交
7354
    doCopyQueryResultToMsg(pQInfo, (int32_t)pQuery->rec.rows, data);
7355
  }
7356

7357
  pQuery->rec.total += pQuery->rec.rows;
7358
  qDebug("QInfo:%p current numOfRes rows:%" PRId64 ", total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total);
7359

7360
  if (pQuery->limit.limit > 0 && pQuery->limit.limit == pQuery->rec.total) {
7361
    qDebug("QInfo:%p results limitation reached, limitation:%"PRId64, pQInfo, pQuery->limit.limit);
7362 7363
    setQueryStatus(pQuery, QUERY_OVER);
  }
7364

H
hjxilinx 已提交
7365
  return TSDB_CODE_SUCCESS;
7366 7367
}

7368 7369 7370 7371 7372 7373 7374
typedef struct SQueryMgmt {
  SCacheObj      *qinfoPool;      // query handle pool
  int32_t         vgId;
  bool            closed;
  pthread_mutex_t lock;
} SQueryMgmt;

7375
int32_t qCreateQueryInfo(void* tsdb, int32_t vgId, SQueryTableMsg* pQueryMsg, qinfo_t* pQInfo) {
7376
  assert(pQueryMsg != NULL && tsdb != NULL);
7377 7378

  int32_t code = TSDB_CODE_SUCCESS;
7379

H
Haojun Liao 已提交
7380 7381
  char            *tagCond      = NULL;
  char            *tbnameCond   = NULL;
7382
  SArray          *pTableIdList = NULL;
H
Haojun Liao 已提交
7383 7384 7385 7386 7387
  SSqlFuncMsg    **pExprMsg     = NULL;
  SSqlFuncMsg    **pSecExprMsg  = NULL;
  SExprInfo       *pExprs       = NULL;
  SExprInfo       *pSecExprs    = NULL;

7388 7389 7390
  SColIndex       *pGroupColIndex = NULL;
  SColumnInfo     *pTagColumnInfo = NULL;
  SSqlGroupbyExpr *pGroupbyExpr   = NULL;
7391

H
Haojun Liao 已提交
7392
  code = convertQueryMsg(pQueryMsg, &pTableIdList, &pExprMsg, &pSecExprMsg, &tagCond, &tbnameCond, &pGroupColIndex, &pTagColumnInfo);
7393
  if (code != TSDB_CODE_SUCCESS) {
B
Bomin Zhang 已提交
7394
    goto _over;
7395 7396
  }

H
hjxilinx 已提交
7397
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
7398
    qError("Invalid number of tables to query, numOfTables:%d", pQueryMsg->numOfTables);
7399
    code = TSDB_CODE_QRY_INVALID_MSG;
H
hjxilinx 已提交
7400
    goto _over;
7401 7402
  }

H
hjxilinx 已提交
7403
  if (pTableIdList == NULL || taosArrayGetSize(pTableIdList) == 0) {
S
slguan 已提交
7404
    qError("qmsg:%p, SQueryTableMsg wrong format", pQueryMsg);
7405
    code = TSDB_CODE_QRY_INVALID_MSG;
H
hjxilinx 已提交
7406
    goto _over;
7407 7408
  }

H
Haojun Liao 已提交
7409
  if ((code = createQueryFuncExprFromMsg(pQueryMsg, pQueryMsg->numOfOutput, &pExprs, pExprMsg, pTagColumnInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
7410
    goto _over;
7411 7412
  }

H
Haojun Liao 已提交
7413
  if (pSecExprMsg != NULL) {
H
Haojun Liao 已提交
7414
    if ((code = createQueryFuncExprFromMsg(pQueryMsg, pQueryMsg->secondStageOutput, &pSecExprs, pSecExprMsg, pTagColumnInfo)) != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
7415 7416 7417 7418
      goto _over;
    }
  }

dengyihao's avatar
dengyihao 已提交
7419
  pGroupbyExpr = createGroupbyExprFromMsg(pQueryMsg, pGroupColIndex, &code);
H
hjxilinx 已提交
7420
  if ((pGroupbyExpr == NULL && pQueryMsg->numOfGroupCols != 0) || code != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
7421
    goto _over;
7422
  }
7423

H
hjxilinx 已提交
7424
  bool isSTableQuery = false;
7425
  STableGroupInfo tableGroupInfo = {0};
7426 7427
  int64_t st = taosGetTimestampUs();

H
Haojun Liao 已提交
7428
  if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_TABLE_QUERY)) {
weixin_48148422's avatar
weixin_48148422 已提交
7429
    STableIdInfo *id = taosArrayGet(pTableIdList, 0);
H
Haojun Liao 已提交
7430

7431
    qDebug("qmsg:%p query normal table, uid:%"PRId64", tid:%d", pQueryMsg, id->uid, id->tid);
H
Haojun Liao 已提交
7432
    if ((code = tsdbGetOneTableGroup(tsdb, id->uid, pQueryMsg->window.skey, &tableGroupInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
7433
      goto _over;
7434
    }
H
Haojun Liao 已提交
7435
  } else if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY|TSDB_QUERY_TYPE_STABLE_QUERY)) {
7436
    isSTableQuery = true;
H
Haojun Liao 已提交
7437 7438 7439

    // also note there's possibility that only one table in the super table
    if (!TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY)) {
weixin_48148422's avatar
weixin_48148422 已提交
7440 7441 7442 7443 7444 7445 7446
      STableIdInfo *id = taosArrayGet(pTableIdList, 0);

      // group by normal column, do not pass the group by condition to tsdb to group table into different group
      int32_t numOfGroupByCols = pQueryMsg->numOfGroupCols;
      if (pQueryMsg->numOfGroupCols == 1 && !TSDB_COL_IS_TAG(pGroupColIndex->flag)) {
        numOfGroupByCols = 0;
      }
7447 7448

      qDebug("qmsg:%p query stable, uid:%"PRId64", tid:%d", pQueryMsg, id->uid, id->tid);
H
Haojun Liao 已提交
7449 7450 7451
      code = tsdbQuerySTableByTagCond(tsdb, id->uid, pQueryMsg->window.skey, tagCond, pQueryMsg->tagCondLen,
          pQueryMsg->tagNameRelType, tbnameCond, &tableGroupInfo, pGroupColIndex, numOfGroupByCols);

7452
      if (code != TSDB_CODE_SUCCESS) {
7453
        qError("qmsg:%p failed to query stable, reason: %s", pQueryMsg, tstrerror(code));
7454 7455
        goto _over;
      }
weixin_48148422's avatar
weixin_48148422 已提交
7456
    } else {
7457 7458 7459 7460
      code = tsdbGetTableGroupFromIdList(tsdb, pTableIdList, &tableGroupInfo);
      if (code != TSDB_CODE_SUCCESS) {
        goto _over;
      }
H
Haojun Liao 已提交
7461

S
TD-1057  
Shengliang Guan 已提交
7462
      qDebug("qmsg:%p query on %" PRIzu " tables in one group from client", pQueryMsg, tableGroupInfo.numOfTables);
7463
    }
7464 7465

    int64_t el = taosGetTimestampUs() - st;
S
TD-1057  
Shengliang Guan 已提交
7466
    qDebug("qmsg:%p tag filter completed, numOfTables:%" PRIzu ", elapsed time:%"PRId64"us", pQueryMsg, tableGroupInfo.numOfTables, el);
H
hjxilinx 已提交
7467
  } else {
7468
    assert(0);
7469
  }
7470

H
Haojun Liao 已提交
7471 7472 7473 7474 7475
  code = checkForQueryBuf(tableGroupInfo.numOfTables);
  if (code != TSDB_CODE_SUCCESS) {  // not enough query buffer, abort
    goto _over;
  }

H
Haojun Liao 已提交
7476
  (*pQInfo) = createQInfoImpl(pQueryMsg, pGroupbyExpr, pExprs, pSecExprs, &tableGroupInfo, pTagColumnInfo, isSTableQuery);
H
Haojun Liao 已提交
7477

dengyihao's avatar
dengyihao 已提交
7478
  pExprs = NULL;
H
Haojun Liao 已提交
7479
  pSecExprs = NULL;
dengyihao's avatar
dengyihao 已提交
7480 7481
  pGroupbyExpr = NULL;
  pTagColumnInfo = NULL;
7482

7483
  if ((*pQInfo) == NULL) {
7484
    code = TSDB_CODE_QRY_OUT_OF_MEMORY;
H
hjxilinx 已提交
7485
    goto _over;
7486
  }
7487

7488
  code = initQInfo(pQueryMsg, tsdb, vgId, *pQInfo, isSTableQuery);
7489

H
hjxilinx 已提交
7490
_over:
dengyihao's avatar
dengyihao 已提交
7491 7492 7493
  free(tagCond);
  free(tbnameCond);
  free(pGroupColIndex);
H
Haojun Liao 已提交
7494

dengyihao's avatar
dengyihao 已提交
7495 7496
  if (pGroupbyExpr != NULL) {
    taosArrayDestroy(pGroupbyExpr->columnInfo);
dengyihao's avatar
dengyihao 已提交
7497
    free(pGroupbyExpr);
7498
  }
H
Haojun Liao 已提交
7499

dengyihao's avatar
dengyihao 已提交
7500 7501
  free(pTagColumnInfo);
  free(pExprs);
H
Haojun Liao 已提交
7502 7503
  free(pSecExprs);

dengyihao's avatar
dengyihao 已提交
7504
  free(pExprMsg);
H
Haojun Liao 已提交
7505 7506
  free(pSecExprMsg);

H
hjxilinx 已提交
7507
  taosArrayDestroy(pTableIdList);
7508

B
Bomin Zhang 已提交
7509 7510 7511 7512 7513
  for (int32_t i = 0; i < pQueryMsg->numOfCols; i++) {
    SColumnInfo* column = pQueryMsg->colList + i;
    freeColumnFilterInfo(column->filters, column->numOfFilters);
  }

H
Haojun Liao 已提交
7514
  //pQInfo already freed in initQInfo, but *pQInfo may not pointer to null;
7515 7516 7517 7518
  if (code != TSDB_CODE_SUCCESS) {
    *pQInfo = NULL;
  }

7519
  // if failed to add ref for all tables in this query, abort current query
7520
  return code;
H
hjxilinx 已提交
7521 7522
}

H
Haojun Liao 已提交
7523
void qDestroyQueryInfo(qinfo_t qHandle) {
H
Haojun Liao 已提交
7524 7525 7526 7527 7528
  SQInfo* pQInfo = (SQInfo*) qHandle;
  if (!isValidQInfo(pQInfo)) {
    return;
  }

H
Haojun Liao 已提交
7529 7530 7531
  qDebug("QInfo:%p query completed", pQInfo);
  queryCostStatis(pQInfo);   // print the query cost summary
  freeQInfo(pQInfo);
H
Haojun Liao 已提交
7532 7533
}

7534 7535 7536 7537 7538 7539
static bool doBuildResCheck(SQInfo* pQInfo) {
  bool buildRes = false;

  pthread_mutex_lock(&pQInfo->lock);

  pQInfo->dataReady = QUERY_RESULT_READY;
7540
  buildRes = needBuildResAfterQueryComplete(pQInfo);
7541

H
Haojun Liao 已提交
7542 7543
  // clear qhandle owner, it must be in the secure area. other thread may run ahead before current, after it is
  // put into task to be executed.
7544 7545
  assert(pQInfo->owner == taosGetPthreadId());
  pQInfo->owner = 0;
H
Haojun Liao 已提交
7546

H
Haojun Liao 已提交
7547
  pthread_mutex_unlock(&pQInfo->lock);
H
Haojun Liao 已提交
7548

7549
  // used in retrieve blocking model.
H
Haojun Liao 已提交
7550
  tsem_post(&pQInfo->ready);
7551 7552 7553
  return buildRes;
}

7554
bool qTableQuery(qinfo_t qinfo) {
7555
  SQInfo *pQInfo = (SQInfo *)qinfo;
H
Haojun Liao 已提交
7556
  assert(pQInfo && pQInfo->signature == pQInfo);
7557
  int64_t threadId = taosGetPthreadId();
7558

7559 7560 7561 7562
  int64_t curOwner = 0;
  if ((curOwner = atomic_val_compare_exchange_64(&pQInfo->owner, 0, threadId)) != 0) {
    qError("QInfo:%p qhandle is now executed by thread:%p", pQInfo, (void*) curOwner);
    pQInfo->code = TSDB_CODE_QRY_IN_EXEC;
7563
    return false;
H
hjxilinx 已提交
7564
  }
7565

7566 7567 7568
  pQInfo->startExecTs = taosGetTimestampSec();

  if (isQueryKilled(pQInfo)) {
7569
    qDebug("QInfo:%p it is already killed, abort", pQInfo);
7570
    return doBuildResCheck(pQInfo);
H
hjxilinx 已提交
7571
  }
7572

7573 7574
  if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
    qDebug("QInfo:%p no table exists for query, abort", pQInfo);
7575 7576
    setQueryStatus(pQInfo->runtimeEnv.pQuery, QUERY_COMPLETED);
    return doBuildResCheck(pQInfo);
7577 7578 7579
  }

  // error occurs, record the error code and return to client
H
Haojun Liao 已提交
7580
  int32_t ret = setjmp(pQInfo->runtimeEnv.env);
7581 7582
  if (ret != TSDB_CODE_SUCCESS) {
    pQInfo->code = ret;
H
Haojun Liao 已提交
7583
    qDebug("QInfo:%p query abort due to error/cancel occurs, code:%s", pQInfo, tstrerror(pQInfo->code));
7584
    return doBuildResCheck(pQInfo);
7585 7586
  }

7587
  qDebug("QInfo:%p query task is launched", pQInfo);
7588

7589
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
7590
  if (onlyQueryTags(pQInfo->runtimeEnv.pQuery)) {
H
Haojun Liao 已提交
7591
    assert(pQInfo->runtimeEnv.pQueryHandle == NULL);
7592
    buildTagQueryResult(pQInfo);
H
hjxilinx 已提交
7593
  } else if (pQInfo->runtimeEnv.stableQuery) {
7594
    stableQueryImpl(pQInfo);
H
hjxilinx 已提交
7595
  } else {
7596
    tableQueryImpl(pQInfo);
H
hjxilinx 已提交
7597
  }
7598

7599
  SQuery* pQuery = pRuntimeEnv->pQuery;
7600
  if (isQueryKilled(pQInfo)) {
7601 7602
    qDebug("QInfo:%p query is killed", pQInfo);
  } else if (pQuery->rec.rows == 0) {
S
TD-1057  
Shengliang Guan 已提交
7603
    qDebug("QInfo:%p over, %" PRIzu " tables queried, %"PRId64" rows are returned", pQInfo, pQInfo->tableqinfoGroupInfo.numOfTables, pQuery->rec.total);
7604 7605 7606 7607 7608
  } else {
    qDebug("QInfo:%p query paused, %" PRId64 " rows returned, numOfTotal:%" PRId64 " rows",
           pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
  }

7609
  return doBuildResCheck(pQInfo);
H
hjxilinx 已提交
7610 7611
}

7612
int32_t qRetrieveQueryResultInfo(qinfo_t qinfo, bool* buildRes, void* pRspContext) {
7613 7614
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
7615
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
H
Haojun Liao 已提交
7616
    qError("QInfo:%p invalid qhandle", pQInfo);
7617
    return TSDB_CODE_QRY_INVALID_QHANDLE;
H
hjxilinx 已提交
7618
  }
7619

7620
  *buildRes = false;
H
Haojun Liao 已提交
7621
  if (IS_QUERY_KILLED(pQInfo)) {
B
Bomin Zhang 已提交
7622
    qDebug("QInfo:%p query is killed, code:0x%08x", pQInfo, pQInfo->code);
H
hjxilinx 已提交
7623
    return pQInfo->code;
H
hjxilinx 已提交
7624
  }
7625

7626
  int32_t code = TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
7627

H
Haojun Liao 已提交
7628
  if (tsRetrieveBlockingModel) {
7629
    pQInfo->rspContext = pRspContext;
H
Haojun Liao 已提交
7630
    tsem_wait(&pQInfo->ready);
7631
    *buildRes = true;
H
Haojun Liao 已提交
7632
    code = pQInfo->code;
7633
  } else {
H
Haojun Liao 已提交
7634
    SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
7635

H
Haojun Liao 已提交
7636 7637
    pthread_mutex_lock(&pQInfo->lock);

7638
    assert(pQInfo->rspContext == NULL);
H
Haojun Liao 已提交
7639 7640
    if (pQInfo->dataReady == QUERY_RESULT_READY) {
      *buildRes = true;
7641 7642
      qDebug("QInfo:%p retrieve result info, rowsize:%d, rows:%" PRId64 ", code:%s", pQInfo, pQuery->rowSize,
             pQuery->rec.rows, tstrerror(pQInfo->code));
H
Haojun Liao 已提交
7643 7644 7645 7646 7647 7648 7649 7650 7651 7652
    } else {
      *buildRes = false;
      qDebug("QInfo:%p retrieve req set query return result after paused", pQInfo);
      pQInfo->rspContext = pRspContext;
      assert(pQInfo->rspContext != NULL);
    }

    code = pQInfo->code;
    pthread_mutex_unlock(&pQInfo->lock);
  }
H
Haojun Liao 已提交
7653

7654
  return code;
H
hjxilinx 已提交
7655
}
7656

7657
int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *contLen, bool* continueExec) {
7658 7659
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
7660
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
7661
    return TSDB_CODE_QRY_INVALID_QHANDLE;
7662
  }
7663

7664
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
7665 7666
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  size_t  size = getResultSize(pQInfo, &pQuery->rec.rows);
7667

weixin_48148422's avatar
weixin_48148422 已提交
7668
  size += sizeof(int32_t);
7669
  size += sizeof(STableIdInfo) * taosHashGetSize(pQInfo->arrTableIdInfo);
7670

S
TD-1057  
Shengliang Guan 已提交
7671
  *contLen = (int32_t)(size + sizeof(SRetrieveTableRsp));
7672

B
Bomin Zhang 已提交
7673 7674
  // todo proper handle failed to allocate memory,
  // current solution only avoid crash, but cannot return error code to client
7675
  *pRsp = (SRetrieveTableRsp *)rpcMallocCont(*contLen);
B
Bomin Zhang 已提交
7676 7677 7678
  if (*pRsp == NULL) {
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
  }
7679

S
TD-1057  
Shengliang Guan 已提交
7680
  (*pRsp)->numOfRows = htonl((int32_t)pQuery->rec.rows);
7681

H
Haojun Liao 已提交
7682
  if (pQInfo->code == TSDB_CODE_SUCCESS) {
7683
    (*pRsp)->offset   = htobe64(pQuery->limit.offset);
7684
    (*pRsp)->useconds = htobe64(pRuntimeEnv->summary.elapsedTime);
7685
  } else {
7686 7687
    (*pRsp)->offset   = 0;
    (*pRsp)->useconds = htobe64(pRuntimeEnv->summary.elapsedTime);
7688
  }
7689

7690
  (*pRsp)->precision = htons(pQuery->precision);
H
Haojun Liao 已提交
7691 7692
  if (pQuery->rec.rows > 0 && pQInfo->code == TSDB_CODE_SUCCESS) {
    doDumpQueryResult(pQInfo, (*pRsp)->data);
7693
  } else {
H
hjxilinx 已提交
7694
    setQueryStatus(pQuery, QUERY_OVER);
7695
  }
7696

7697
  pQInfo->rspContext = NULL;
7698
  pQInfo->dataReady  = QUERY_RESULT_NOT_READY;
7699

H
Haojun Liao 已提交
7700
  if (IS_QUERY_KILLED(pQInfo) || Q_STATUS_EQUAL(pQuery->status, QUERY_OVER)) {
H
Haojun Liao 已提交
7701
    // here current thread hold the refcount, so it is safe to free tsdbQueryHandle.
7702
    *continueExec = false;
7703
    (*pRsp)->completed = 1;  // notify no more result to client
7704
  } else {
7705
    *continueExec = true;
7706
    qDebug("QInfo:%p has more results waits for client retrieve", pQInfo);
7707 7708
  }

H
Haojun Liao 已提交
7709
  return pQInfo->code;
7710
}
H
hjxilinx 已提交
7711

7712 7713 7714 7715 7716 7717 7718 7719
int32_t qQueryCompleted(qinfo_t qinfo) {
  SQInfo *pQInfo = (SQInfo *)qinfo;

  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
    return TSDB_CODE_QRY_INVALID_QHANDLE;
  }

  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
7720
  return isQueryKilled(pQInfo) || Q_STATUS_EQUAL(pQuery->status, QUERY_OVER);
7721 7722
}

H
Haojun Liao 已提交
7723
int32_t qKillQuery(qinfo_t qinfo) {
H
Haojun Liao 已提交
7724 7725 7726 7727 7728 7729 7730
  SQInfo *pQInfo = (SQInfo *)qinfo;

  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
    return TSDB_CODE_QRY_INVALID_QHANDLE;
  }

  setQueryKilled(pQInfo);
7731 7732 7733

  // Wait for the query executing thread being stopped/
  // Once the query is stopped, the owner of qHandle will be cleared immediately.
7734
  while (pQInfo->owner != 0) {
7735 7736 7737
    taosMsleep(100);
  }

H
Haojun Liao 已提交
7738 7739 7740
  return TSDB_CODE_SUCCESS;
}

7741 7742 7743 7744 7745 7746 7747 7748 7749 7750 7751 7752 7753 7754 7755 7756
static void doSetTagValueToResultBuf(char* output, const char* val, int16_t type, int16_t bytes) {
  if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
    if (val == NULL) {
      setVardataNull(output, type);
    } else {
      memcpy(output, val, varDataTLen(val));
    }
  } else {
    if (val == NULL) {
      setNull(output, type, bytes);
    } else {  // todo here stop will cause client crash
      memcpy(output, val, bytes);
    }
  }
}

H
hjxilinx 已提交
7757 7758 7759
static void buildTagQueryResult(SQInfo* pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
7760

H
Haojun Liao 已提交
7761
  size_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
H
Haojun Liao 已提交
7762
  assert(numOfGroup == 0 || numOfGroup == 1);
7763

H
Haojun Liao 已提交
7764
  if (numOfGroup == 0) {
7765 7766
    return;
  }
7767

H
Haojun Liao 已提交
7768
  SArray* pa = GET_TABLEGROUP(pQInfo, 0);
7769

H
Haojun Liao 已提交
7770
  size_t num = taosArrayGetSize(pa);
7771
  assert(num == pQInfo->tableqinfoGroupInfo.numOfTables);
7772

H
Haojun Liao 已提交
7773
  int32_t count = 0;
H
Haojun Liao 已提交
7774
  int32_t functionId = pQuery->pExpr1[0].base.functionId;
7775 7776
  if (functionId == TSDB_FUNC_TID_TAG) { // return the tags & table Id
    assert(pQuery->numOfOutput == 1);
7777

H
Haojun Liao 已提交
7778
    SExprInfo* pExprInfo = &pQuery->pExpr1[0];
7779
    int32_t rsize = pExprInfo->bytes;
H
Haojun Liao 已提交
7780
    count = 0;
7781

H
Haojun Liao 已提交
7782 7783 7784 7785 7786 7787 7788 7789 7790 7791 7792
    int16_t bytes = pExprInfo->bytes;
    int16_t type = pExprInfo->type;

    for(int32_t i = 0; i < pQuery->numOfTags; ++i) {
      if (pQuery->tagColList[i].colId == pExprInfo->base.colInfo.colId) {
        bytes = pQuery->tagColList[i].bytes;
        type = pQuery->tagColList[i].type;
        break;
      }
    }

H
Haojun Liao 已提交
7793 7794
    while(pQInfo->tableIndex < num && count < pQuery->rec.capacity) {
      int32_t i = pQInfo->tableIndex++;
7795
      STableQueryInfo *item = taosArrayGetP(pa, i);
7796

7797
      char *output = pQuery->sdata[0]->data + count * rsize;
7798
      varDataSetLen(output, rsize - VARSTR_HEADER_SIZE);
7799

7800
      output = varDataVal(output);
H
Haojun Liao 已提交
7801
      STableId* id = TSDB_TABLEID(item->pTable);
7802

7803 7804 7805
      *(int16_t *)output = 0;
      output += sizeof(int16_t);

H
Haojun Liao 已提交
7806 7807
      *(int64_t *)output = id->uid;  // memory align problem, todo serialize
      output += sizeof(id->uid);
7808

H
Haojun Liao 已提交
7809 7810
      *(int32_t *)output = id->tid;
      output += sizeof(id->tid);
7811

7812
      *(int32_t *)output = pQInfo->vgId;
7813
      output += sizeof(pQInfo->vgId);
7814

7815
      if (pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
7816
        char* data = tsdbGetTableName(item->pTable);
7817
        memcpy(output, data, varDataTLen(data));
H
[td-90]  
Haojun Liao 已提交
7818
      } else {
7819 7820
        char* data = tsdbGetTableTagVal(item->pTable, pExprInfo->base.colInfo.colId, type, bytes);
        doSetTagValueToResultBuf(output, data, type, bytes);
H
[td-90]  
Haojun Liao 已提交
7821
      }
7822

H
Haojun Liao 已提交
7823
      count += 1;
7824
    }
7825

7826
    qDebug("QInfo:%p create (tableId, tag) info completed, rows:%d", pQInfo, count);
7827

H
Haojun Liao 已提交
7828 7829 7830 7831
  } else if (functionId == TSDB_FUNC_COUNT) {// handle the "count(tbname)" query
    *(int64_t*) pQuery->sdata[0]->data = num;

    count = 1;
H
Haojun Liao 已提交
7832
    SET_STABLE_QUERY_OVER(pQInfo);
7833
    qDebug("QInfo:%p create count(tbname) query, res:%d rows:1", pQInfo, count);
7834
  } else {  // return only the tags|table name etc.
H
Haojun Liao 已提交
7835
    count = 0;
H
Haojun Liao 已提交
7836
    SSchema tbnameSchema = tGetTableNameColumnSchema();
7837

S
TD-1057  
Shengliang Guan 已提交
7838
    int32_t maxNumOfTables = (int32_t)pQuery->rec.capacity;
7839
    if (pQuery->limit.limit >= 0 && pQuery->limit.limit < pQuery->rec.capacity) {
S
TD-1057  
Shengliang Guan 已提交
7840
      maxNumOfTables = (int32_t)pQuery->limit.limit;
7841 7842
    }

7843
    while(pQInfo->tableIndex < num && count < maxNumOfTables) {
H
Haojun Liao 已提交
7844
      int32_t i = pQInfo->tableIndex++;
7845

7846 7847 7848 7849 7850 7851
      // discard current result due to offset
      if (pQuery->limit.offset > 0) {
        pQuery->limit.offset -= 1;
        continue;
      }

H
Haojun Liao 已提交
7852
      SExprInfo* pExprInfo = pQuery->pExpr1;
7853
      STableQueryInfo* item = taosArrayGetP(pa, i);
7854

7855 7856
      char *data = NULL, *dst = NULL;
      int16_t type = 0, bytes = 0;
7857
      for(int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
7858
        // not assign value in case of user defined constant output column
Y
TD-1230  
yihaoDeng 已提交
7859
        if (TSDB_COL_IS_UD_COL(pExprInfo[j].base.colInfo.flag)) {
H
Haojun Liao 已提交
7860 7861
          continue;
        }
7862

7863
        if (pExprInfo[j].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
7864 7865 7866 7867 7868 7869 7870 7871
          bytes = tbnameSchema.bytes;
          type = tbnameSchema.type;

          data = tsdbGetTableName(item->pTable);
          dst = pQuery->sdata[j]->data + count * tbnameSchema.bytes;
        } else {
          type = pExprInfo[j].type;
          bytes = pExprInfo[j].bytes;
7872

7873 7874
          data = tsdbGetTableTagVal(item->pTable, pExprInfo[j].base.colInfo.colId, type, bytes);
          dst = pQuery->sdata[j]->data + count * pExprInfo[j].bytes;
7875

7876
        }
7877 7878

        doSetTagValueToResultBuf(dst, data, type, bytes);
H
hjxilinx 已提交
7879
      }
H
Haojun Liao 已提交
7880
      count += 1;
H
hjxilinx 已提交
7881
    }
7882

7883
    qDebug("QInfo:%p create tag values results completed, rows:%d", pQInfo, count);
H
hjxilinx 已提交
7884
  }
7885

H
Haojun Liao 已提交
7886
  pQuery->rec.rows = count;
H
hjxilinx 已提交
7887
  setQueryStatus(pQuery, QUERY_COMPLETED);
H
hjxilinx 已提交
7888 7889
}

H
Haojun Liao 已提交
7890
static int64_t getQuerySupportBufSize(size_t numOfTables) {
H
Haojun Liao 已提交
7891 7892 7893 7894
  size_t s1 = sizeof(STableQueryInfo);
  size_t s2 = sizeof(SHashNode);

//  size_t s3 = sizeof(STableCheckInfo);  buffer consumption in tsdb
H
Haojun Liao 已提交
7895
  return (int64_t)((s1 + s2) * 1.5 * numOfTables);
H
Haojun Liao 已提交
7896 7897
}

H
Haojun Liao 已提交
7898
int32_t checkForQueryBuf(size_t numOfTables) {
H
Haojun Liao 已提交
7899 7900
  int64_t t = getQuerySupportBufSize(numOfTables);
  if (tsQueryBufferSize < 0) {
H
Haojun Liao 已提交
7901
    return TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
7902 7903 7904 7905 7906 7907 7908 7909 7910 7911 7912 7913 7914 7915 7916 7917 7918 7919 7920
  } else if (tsQueryBufferSize > 0) {

    while(1) {
      int64_t s = tsQueryBufferSize;
      int64_t remain = s - t;
      if (remain >= 0) {
        if (atomic_val_compare_exchange_64(&tsQueryBufferSize, s, remain) == s) {
          return TSDB_CODE_SUCCESS;
        }
      } else {
        return TSDB_CODE_QRY_NOT_ENOUGH_BUFFER;
      }
    }
  }

  // disable query processing if the value of tsQueryBufferSize is zero.
  return TSDB_CODE_QRY_NOT_ENOUGH_BUFFER;
}

H
Haojun Liao 已提交
7921
void releaseQueryBuf(size_t numOfTables) {
H
Haojun Liao 已提交
7922 7923 7924 7925 7926 7927 7928 7929 7930 7931
  if (tsQueryBufferSize <= 0) {
    return;
  }

  int64_t t = getQuerySupportBufSize(numOfTables);

  // restore value is not enough buffer available
  atomic_add_fetch_64(&tsQueryBufferSize, t);
}

7932 7933 7934 7935 7936 7937 7938
void* qGetResultRetrieveMsg(qinfo_t qinfo) {
  SQInfo* pQInfo = (SQInfo*) qinfo;
  assert(pQInfo != NULL);

  return pQInfo->rspContext;
}

7939 7940 7941 7942 7943 7944 7945
void freeqinfoFn(void *qhandle) {
  void** handle = qhandle;
  if (handle == NULL || *handle == NULL) {
    return;
  }

  qKillQuery(*handle);
H
Haojun Liao 已提交
7946
  qDestroyQueryInfo(*handle);
7947 7948 7949
}

void* qOpenQueryMgmt(int32_t vgId) {
H
Haojun Liao 已提交
7950
  const int32_t REFRESH_HANDLE_INTERVAL = 30; // every 30 seconds, refresh handle pool
7951 7952 7953 7954

  char cacheName[128] = {0};
  sprintf(cacheName, "qhandle_%d", vgId);

7955
  SQueryMgmt* pQueryMgmt = calloc(1, sizeof(SQueryMgmt));
H
Haojun Liao 已提交
7956 7957 7958 7959
  if (pQueryMgmt == NULL) {
    terrno = TSDB_CODE_QRY_OUT_OF_MEMORY;
    return NULL;
  }
7960

S
TD-1530  
Shengliang Guan 已提交
7961
  pQueryMgmt->qinfoPool = taosCacheInit(TSDB_CACHE_PTR_KEY, REFRESH_HANDLE_INTERVAL, true, freeqinfoFn, cacheName);
7962 7963 7964 7965
  pQueryMgmt->closed    = false;
  pQueryMgmt->vgId      = vgId;

  pthread_mutex_init(&pQueryMgmt->lock, NULL);
7966 7967

  qDebug("vgId:%d, open querymgmt success", vgId);
7968
  return pQueryMgmt;
7969 7970
}

H
Haojun Liao 已提交
7971
static void queryMgmtKillQueryFn(void* handle) {
H
Hui Li 已提交
7972 7973
  void** fp = (void**)handle;
  qKillQuery(*fp);
H
Haojun Liao 已提交
7974 7975 7976
}

void qQueryMgmtNotifyClosed(void* pQMgmt) {
7977 7978 7979 7980 7981 7982 7983
  if (pQMgmt == NULL) {
    return;
  }

  SQueryMgmt* pQueryMgmt = pQMgmt;
  qDebug("vgId:%d, set querymgmt closed, wait for all queries cancelled", pQueryMgmt->vgId);

H
Haojun Liao 已提交
7984
//  pthread_mutex_lock(&pQueryMgmt->lock);
7985
  pQueryMgmt->closed = true;
H
Haojun Liao 已提交
7986
//  pthread_mutex_unlock(&pQueryMgmt->lock);
7987

H
Haojun Liao 已提交
7988
  taosCacheRefresh(pQueryMgmt->qinfoPool, queryMgmtKillQueryFn);
7989 7990 7991 7992 7993 7994 7995 7996 7997 7998 7999 8000 8001 8002 8003 8004 8005
}

void qCleanupQueryMgmt(void* pQMgmt) {
  if (pQMgmt == NULL) {
    return;
  }

  SQueryMgmt* pQueryMgmt = pQMgmt;
  int32_t vgId = pQueryMgmt->vgId;

  assert(pQueryMgmt->closed);

  SCacheObj* pqinfoPool = pQueryMgmt->qinfoPool;
  pQueryMgmt->qinfoPool = NULL;

  taosCacheCleanup(pqinfoPool);
  pthread_mutex_destroy(&pQueryMgmt->lock);
S
TD-1848  
Shengliang Guan 已提交
8006
  tfree(pQueryMgmt);
8007

S
Shengliang Guan 已提交
8008
  qDebug("vgId:%d, queryMgmt cleanup completed", vgId);
8009 8010
}

8011
void** qRegisterQInfo(void* pMgmt, uint64_t qInfo) {
8012
  if (pMgmt == NULL) {
8013
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
8014 8015 8016 8017 8018
    return NULL;
  }

  SQueryMgmt *pQueryMgmt = pMgmt;
  if (pQueryMgmt->qinfoPool == NULL) {
8019
    qError("QInfo:%p failed to add qhandle into qMgmt, since qMgmt is closed", (void *)qInfo);
8020
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
8021 8022 8023
    return NULL;
  }

H
Haojun Liao 已提交
8024
//  pthread_mutex_lock(&pQueryMgmt->lock);
8025
  if (pQueryMgmt->closed) {
H
Haojun Liao 已提交
8026
//    pthread_mutex_unlock(&pQueryMgmt->lock);
8027
    qError("QInfo:%p failed to add qhandle into cache, since qMgmt is colsing", (void *)qInfo);
8028
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
8029 8030
    return NULL;
  } else {
S
TD-1530  
Shengliang Guan 已提交
8031
    TSDB_CACHE_PTR_TYPE handleVal = (TSDB_CACHE_PTR_TYPE) qInfo;
8032 8033
    void** handle = taosCachePut(pQueryMgmt->qinfoPool, &handleVal, sizeof(TSDB_CACHE_PTR_TYPE), &qInfo, sizeof(TSDB_CACHE_PTR_TYPE),
        (getMaximumIdleDurationSec()*1000));
H
Haojun Liao 已提交
8034
//    pthread_mutex_unlock(&pQueryMgmt->lock);
8035 8036 8037 8038 8039

    return handle;
  }
}

S
TD-1530  
Shengliang Guan 已提交
8040
void** qAcquireQInfo(void* pMgmt, uint64_t _key) {
8041 8042
  SQueryMgmt *pQueryMgmt = pMgmt;

B
Bomin Zhang 已提交
8043 8044 8045 8046 8047 8048 8049
  if (pQueryMgmt->closed) {
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
    return NULL;
  }

  if (pQueryMgmt->qinfoPool == NULL) {
    terrno = TSDB_CODE_QRY_INVALID_QHANDLE;
8050 8051 8052
    return NULL;
  }

S
TD-1530  
Shengliang Guan 已提交
8053 8054
  TSDB_CACHE_PTR_TYPE key = (TSDB_CACHE_PTR_TYPE)_key;
  void** handle = taosCacheAcquireByKey(pQueryMgmt->qinfoPool, &key, sizeof(TSDB_CACHE_PTR_TYPE));
8055
  if (handle == NULL || *handle == NULL) {
B
Bomin Zhang 已提交
8056
    terrno = TSDB_CODE_QRY_INVALID_QHANDLE;
8057 8058 8059 8060 8061 8062
    return NULL;
  } else {
    return handle;
  }
}

H
Haojun Liao 已提交
8063
void** qReleaseQInfo(void* pMgmt, void* pQInfo, bool freeHandle) {
8064 8065 8066 8067 8068
  SQueryMgmt *pQueryMgmt = pMgmt;
  if (pQueryMgmt->qinfoPool == NULL) {
    return NULL;
  }

H
Haojun Liao 已提交
8069
  taosCacheRelease(pQueryMgmt->qinfoPool, pQInfo, freeHandle);
8070 8071 8072
  return 0;
}

8073