qExecutor.c 245.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
#include "os.h"
H
Haojun Liao 已提交
16 17
#include "qFill.h"
#include "taosmsg.h"
18 19
#include "tcache.h"
#include "tglobal.h"
20

H
Haojun Liao 已提交
21
#include "exception.h"
22
#include "hash.h"
H
Haojun Liao 已提交
23 24 25 26
#include "qAst.h"
#include "qExecutor.h"
#include "qResultbuf.h"
#include "qUtil.h"
H
hjxilinx 已提交
27
#include "query.h"
S
slguan 已提交
28
#include "queryLog.h"
29
#include "tlosertree.h"
30

H
Haojun Liao 已提交
31
#define MAX_ROWS_PER_RESBUF_PAGE  ((1u<<12) - 1)
32 33 34 35 36

/**
 * check if the primary column is load by default, otherwise, the program will
 * forced to load primary column explicitly.
 */
37
#define Q_STATUS_EQUAL(p, s)  (((p) & (s)) != 0)
38 39
#define QUERY_IS_ASC_QUERY(q) (GET_FORWARD_DIRECTION_FACTOR((q)->order.order) == QUERY_ASC_FORWARD_STEP)

40
#define IS_MASTER_SCAN(runtime)        ((runtime)->scanFlag == MASTER_SCAN)
H
hjxilinx 已提交
41
#define IS_REVERSE_SCAN(runtime)       ((runtime)->scanFlag == REVERSE_SCAN)
42
#define SET_MASTER_SCAN_FLAG(runtime)  ((runtime)->scanFlag = MASTER_SCAN)
H
hjxilinx 已提交
43
#define SET_REVERSE_SCAN_FLAG(runtime) ((runtime)->scanFlag = REVERSE_SCAN)
44

H
Haojun Liao 已提交
45
#define GET_QINFO_ADDR(x) ((SQInfo *)((char *)(x)-offsetof(SQInfo, runtimeEnv)))
46

47
#define GET_COL_DATA_POS(query, index, step) ((query)->pos + (index) * (step))
48
#define SWITCH_ORDER(n) (((n) = ((n) == TSDB_ORDER_ASC) ? TSDB_ORDER_DESC : TSDB_ORDER_ASC))
49

H
Haojun Liao 已提交
50 51
#define SDATA_BLOCK_INITIALIZER (SDataBlockInfo) {{0}, 0}

S
TD-1057  
Shengliang Guan 已提交
52 53 54 55 56
#define TIME_WINDOW_COPY(_dst, _src)  do {\
   _dst.skey = _src.skey;\
   _dst.ekey = _src.ekey;\
} while (0);

57
enum {
H
hjxilinx 已提交
58
  // when query starts to execute, this status will set
59 60
  QUERY_NOT_COMPLETED = 0x1u,

H
hjxilinx 已提交
61 62
  /* result output buffer is full, current query is paused.
   * this status is only exist in group-by clause and diff/add/division/multiply/ query.
63
   */
64 65
  QUERY_RESBUF_FULL = 0x2u,

H
hjxilinx 已提交
66 67 68
  /* query is over
   * 1. this status is used in one row result query process, e.g., count/sum/first/last/ avg...etc.
   * 2. when all data within queried time window, it is also denoted as query_completed
69
   */
70
  QUERY_COMPLETED = 0x4u,
71

H
hjxilinx 已提交
72 73
  /* when the result is not completed return to client, this status will be
   * usually used in case of interval query with interpolation option
74
   */
75
  QUERY_OVER = 0x8u,
76
};
77 78

enum {
79 80
  TS_JOIN_TS_EQUAL       = 0,
  TS_JOIN_TS_NOT_EQUALS  = 1,
81 82 83
  TS_JOIN_TAG_NOT_EQUALS = 2,
};

84
typedef struct {
85 86 87 88 89 90
  int32_t     status;       // query status
  TSKEY       lastKey;      // the lastKey value before query executed
  STimeWindow w;            // whole query time window
  STimeWindow curWindow;    // current query window
  int32_t     windowIndex;  // index of active time window result for interval query
  STSCursor   cur;
91 92
} SQueryStatusInfo;

H
Haojun Liao 已提交
93
#if 0
H
Haojun Liao 已提交
94
static UNUSED_FUNC void *u_malloc (size_t __size) {
H
Haojun Liao 已提交
95
  uint32_t v = rand();
H
Haojun Liao 已提交
96 97

  if (v % 1000 <= 0) {
H
Haojun Liao 已提交
98 99
    return NULL;
  } else {
H
Haojun Liao 已提交
100
    return malloc(__size);
H
Haojun Liao 已提交
101
  }
H
Haojun Liao 已提交
102 103
}

H
Haojun Liao 已提交
104 105
static UNUSED_FUNC void* u_calloc(size_t num, size_t __size) {
  uint32_t v = rand();
H
Haojun Liao 已提交
106
  if (v % 1000 <= 0) {
H
Haojun Liao 已提交
107 108 109 110 111 112
    return NULL;
  } else {
    return calloc(num, __size);
  }
}

H
Haojun Liao 已提交
113 114 115 116 117 118 119 120 121
static UNUSED_FUNC void* u_realloc(void* p, size_t __size) {
  uint32_t v = rand();
  if (v % 5 <= 1) {
    return NULL;
  } else {
    return realloc(p, __size);
  }
}

H
Haojun Liao 已提交
122
#define calloc  u_calloc
H
Haojun Liao 已提交
123
#define malloc  u_malloc
H
Haojun Liao 已提交
124
#define realloc u_realloc
H
Haojun Liao 已提交
125
#endif
H
Haojun Liao 已提交
126

127
#define CLEAR_QUERY_STATUS(q, st)   ((q)->status &= (~(st)))
H
Haojun Liao 已提交
128 129 130
#define GET_NUM_OF_TABLEGROUP(q)    taosArrayGetSize((q)->tableqinfoGroupInfo.pGroupList)
#define GET_TABLEGROUP(q, _index)   ((SArray*) taosArrayGetP((q)->tableqinfoGroupInfo.pGroupList, (_index)))

131
static void setQueryStatus(SQuery *pQuery, int8_t status);
H
Haojun Liao 已提交
132
static void finalizeQueryResult(SQueryRuntimeEnv *pRuntimeEnv);
133

134
#define QUERY_IS_INTERVAL_QUERY(_q) ((_q)->interval.interval > 0)
135

136 137
static void getNextTimeWindow(SQuery* pQuery, STimeWindow* tw) {
  int32_t factor = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
138 139 140
  if (pQuery->interval.intervalUnit != 'n' && pQuery->interval.intervalUnit != 'y') {
    tw->skey += pQuery->interval.sliding * factor;
    tw->ekey = tw->skey + pQuery->interval.interval - 1;
141 142 143
    return;
  }

144
  int64_t key = tw->skey / 1000, interval = pQuery->interval.interval;
145 146 147
  if (pQuery->precision == TSDB_TIME_PRECISION_MICRO) {
    key /= 1000;
  }
148
  if (pQuery->interval.intervalUnit == 'y') {
149 150
    interval *= 12;
  }
151 152 153 154 155

  struct tm tm;
  time_t t = (time_t)key;
  localtime_r(&t, &tm);

S
Shengliang Guan 已提交
156
  int mon = (int)(tm.tm_year * 12 + tm.tm_mon + interval * factor);
157 158 159 160
  tm.tm_year = mon / 12;
  tm.tm_mon = mon % 12;
  tw->skey = mktime(&tm) * 1000L;

S
Shengliang Guan 已提交
161
  mon = (int)(mon + interval);
162 163 164 165 166 167 168 169 170 171 172 173
  tm.tm_year = mon / 12;
  tm.tm_mon = mon % 12;
  tw->ekey = mktime(&tm) * 1000L;

  if (pQuery->precision == TSDB_TIME_PRECISION_MICRO) {
    tw->skey *= 1000L;
    tw->ekey *= 1000L;
  }
  tw->ekey -= 1;
}

#define GET_NEXT_TIMEWINDOW(_q, tw) getNextTimeWindow((_q), (tw))
H
Haojun Liao 已提交
174

175 176
#define SET_STABLE_QUERY_OVER(_q) ((_q)->tableIndex = (int32_t)((_q)->tableqinfoGroupInfo.numOfTables))
#define IS_STASBLE_QUERY_OVER(_q) ((_q)->tableIndex >= (int32_t)((_q)->tableqinfoGroupInfo.numOfTables))
H
Haojun Liao 已提交
177

H
hjxilinx 已提交
178
// todo move to utility
179
static int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *group);
180

H
hjxilinx 已提交
181
static void setWindowResOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult);
H
Haojun Liao 已提交
182
static void setWindowResOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult);
183 184
static void resetMergeResultBuf(SQuery *pQuery, SQLFunctionCtx *pCtx, SResultInfo *pResultInfo);
static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId);
185

186
static void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
H
Haojun Liao 已提交
187
                          SDataStatis *pStatis, void *param, int32_t colIndex, int32_t vgId);
188

189
static void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
H
Haojun Liao 已提交
190
static void destroyTableQueryInfoImpl(STableQueryInfo *pTableQueryInfo);
191 192
static void resetCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
static bool hasMainOutput(SQuery *pQuery);
H
hjxilinx 已提交
193
static void buildTagQueryResult(SQInfo *pQInfo);
194

195
static int32_t setAdditionalInfo(SQInfo *pQInfo, void *pTable, STableQueryInfo *pTableQueryInfo);
H
Haojun Liao 已提交
196
static int32_t flushFromResultBuf(SQueryRuntimeEnv* pRuntimeEnv, SGroupResInfo* pGroupResInfo);
H
Haojun Liao 已提交
197 198
static int32_t checkForQueryBuf(int32_t numOfTables);
static void releaseQueryBuf(int32_t numOfTables);
199

200
bool doFilterData(SQuery *pQuery, int32_t elemPos) {
201 202
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
203

S
TD-1057  
Shengliang Guan 已提交
204
    char *pElem = (char*)pFilterInfo->pData + pFilterInfo->info.bytes * elemPos;
205

206 207
    bool qualified = false;
    for (int32_t j = 0; j < pFilterInfo->numOfFilters; ++j) {
208
      SColumnFilterElem *pFilterElem = &pFilterInfo->pFilters[j];
209

H
Haojun Liao 已提交
210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226
      bool isnull = isNull(pElem, pFilterInfo->info.type);
      if (isnull) {
        if (pFilterElem->fp == isNull_filter) {
          qualified = true;
          break;
        } else {
          continue;
        }
      } else {
        if (pFilterElem->fp == notNull_filter) {
          qualified = true;
          break;
        } else if (pFilterElem->fp == isNull_filter) {
          continue;
        }
      }

227 228 229 230 231
      if (pFilterElem->fp(pFilterElem, pElem, pElem)) {
        qualified = true;
        break;
      }
    }
232

233 234 235 236
    if (!qualified) {
      return false;
    }
  }
237

238 239 240 241 242 243
  return true;
}

int64_t getNumOfResult(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  bool    hasMainFunction = hasMainOutput(pQuery);
244

245
  int64_t maxOutput = 0;
246
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
247
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
248

249 250 251 252 253 254 255 256
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (hasMainFunction &&
        (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ)) {
      continue;
    }
257

258 259 260 261 262
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
    if (pResInfo != NULL && maxOutput < pResInfo->numOfRes) {
      maxOutput = pResInfo->numOfRes;
    }
  }
263

264
  assert(maxOutput >= 0);
265 266 267
  return maxOutput;
}

268 269 270 271 272
/*
 * the value of number of result needs to be update due to offset value upated.
 */
void updateNumOfResult(SQueryRuntimeEnv *pRuntimeEnv, int32_t numOfRes) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
273

274 275
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
276

H
Haojun Liao 已提交
277 278 279 280 281
    int16_t functionId = pRuntimeEnv->pCtx[j].functionId;
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ ||
        functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
282

H
Haojun Liao 已提交
283
    assert(pResInfo->numOfRes > numOfRes);
284 285 286 287
    pResInfo->numOfRes = numOfRes;
  }
}

H
Haojun Liao 已提交
288
static UNUSED_FUNC int32_t getGroupResultId(int32_t groupIndex) {
H
Haojun Liao 已提交
289
  int32_t base = 20000000;
290 291 292 293 294 295 296
  return base + (groupIndex * 10000);
}

bool isGroupbyNormalCol(SSqlGroupbyExpr *pGroupbyExpr) {
  if (pGroupbyExpr == NULL || pGroupbyExpr->numOfGroupCols == 0) {
    return false;
  }
297

298
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
299
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
Y
TD-1230  
yihaoDeng 已提交
300
    if (TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
301
      //make sure the normal column locates at the second position if tbname exists in group by clause
302
      if (pGroupbyExpr->numOfGroupCols > 1) {
303
        assert(pColIndex->colIndex > 0);
304
      }
305

306 307 308
      return true;
    }
  }
309

310 311 312 313 314
  return false;
}

int16_t getGroupbyColumnType(SQuery *pQuery, SSqlGroupbyExpr *pGroupbyExpr) {
  assert(pGroupbyExpr != NULL);
315

316 317
  int32_t colId = -2;
  int16_t type = TSDB_DATA_TYPE_NULL;
318

319
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
320
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
Y
TD-1230  
yihaoDeng 已提交
321
    if (TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
322 323 324 325
      colId = pColIndex->colId;
      break;
    }
  }
326

327
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
328 329
    if (colId == pQuery->colList[i].colId) {
      type = pQuery->colList[i].type;
330 331 332
      break;
    }
  }
333

334 335 336 337 338 339
  return type;
}

bool isSelectivityWithTagsQuery(SQuery *pQuery) {
  bool    hasTags = false;
  int32_t numOfSelectivity = 0;
340

341
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
342
    int32_t functId = pQuery->pSelectExpr[i].base.functionId;
343 344 345 346
    if (functId == TSDB_FUNC_TAG_DUMMY || functId == TSDB_FUNC_TS_DUMMY) {
      hasTags = true;
      continue;
    }
347

348 349 350 351
    if ((aAggs[functId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
      numOfSelectivity++;
    }
  }
352

353 354 355
  if (numOfSelectivity > 0 && hasTags) {
    return true;
  }
356

357 358 359
  return false;
}

360 361 362 363 364 365 366 367 368 369 370
bool isProjQuery(SQuery *pQuery) {
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    int32_t functId = pQuery->pSelectExpr[i].base.functionId;
    if (functId != TSDB_FUNC_PRJ && functId != TSDB_FUNC_TAGPRJ) {
      return false;
    }
  }

  return true;
}

371
bool isTSCompQuery(SQuery *pQuery) { return pQuery->pSelectExpr[0].base.functionId == TSDB_FUNC_TS_COMP; }
372

373 374 375
static bool limitResults(SQueryRuntimeEnv* pRuntimeEnv) {
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery* pQuery = pRuntimeEnv->pQuery;
376

377 378
  if ((pQuery->limit.limit > 0) && (pQuery->rec.total + pQuery->rec.rows > pQuery->limit.limit)) {
    pQuery->rec.rows = pQuery->limit.limit - pQuery->rec.total;
379

380
    qDebug("QInfo:%p discard remain data due to result limitation, limit:%"PRId64", current return:%" PRId64 ", total:%"PRId64,
381 382
        pQInfo, pQuery->limit.limit, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
    assert(pQuery->rec.rows >= 0);
383 384 385
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return true;
  }
386

387 388 389 390
  return false;
}

static bool isTopBottomQuery(SQuery *pQuery) {
391
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
392
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
393 394 395
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
396

397 398 399 400
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      return true;
    }
  }
401

402 403 404
  return false;
}

H
Haojun Liao 已提交
405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422
static bool hasTagValOutput(SQuery* pQuery) {
  SExprInfo *pExprInfo = &pQuery->pSelectExpr[0];
  if (pQuery->numOfOutput == 1 && pExprInfo->base.functionId == TSDB_FUNC_TS_COMP) {
    return true;
  } else {  // set tag value, by which the results are aggregated.
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
      SExprInfo *pLocalExprInfo = &pQuery->pSelectExpr[idx];

      // ts_comp column required the tag value for join filter
      if (TSDB_COL_IS_TAG(pLocalExprInfo->base.colInfo.flag)) {
        return true;
      }
    }
  }

  return false;
}

423 424 425 426 427 428 429 430
/**
 * @param pQuery
 * @param col
 * @param pDataBlockInfo
 * @param pStatis
 * @param pColStatis
 * @return
 */
H
Haojun Liao 已提交
431
static bool hasNullValue(SColIndex* pColIndex, SDataStatis *pStatis, SDataStatis **pColStatis) {
H
Haojun Liao 已提交
432
  if (pStatis != NULL && TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
H
Haojun Liao 已提交
433 434
    *pColStatis = &pStatis[pColIndex->colIndex];
    assert((*pColStatis)->colId == pColIndex->colId);
H
hjxilinx 已提交
435 436
  } else {
    *pColStatis = NULL;
437
  }
438

H
Haojun Liao 已提交
439
  if (TSDB_COL_IS_TAG(pColIndex->flag) || TSDB_COL_IS_UD_COL(pColIndex->flag) || pColIndex->colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
H
Haojun Liao 已提交
440 441 442
    return false;
  }

443 444 445
  if ((*pColStatis) != NULL && (*pColStatis)->numOfNull == 0) {
    return false;
  }
446

447 448 449 450
  return true;
}

static SWindowResult *doSetTimeWindowFromKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, char *pData,
451
                                             int16_t bytes, bool masterscan) {
452
  SQuery *pQuery = pRuntimeEnv->pQuery;
453

454
  int32_t *p1 = (int32_t *) taosHashGet(pWindowResInfo->hashList, pData, bytes);
455 456
  if (p1 != NULL) {
    pWindowResInfo->curIndex = *p1;
457
  } else {
H
Haojun Liao 已提交
458 459 460
    if (!masterscan) {  // not master scan, do not add new timewindow
      return NULL;
    }
461

H
Haojun Liao 已提交
462 463
    // more than the capacity, reallocate the resources
    if (pWindowResInfo->size >= pWindowResInfo->capacity) {
464 465
      int64_t newCap = 0;
      if (pWindowResInfo->capacity > 10000) {
S
Shengliang Guan 已提交
466
        newCap = (int64_t)(pWindowResInfo->capacity * 1.25);
467
      } else {
S
Shengliang Guan 已提交
468
        newCap = (int64_t)(pWindowResInfo->capacity * 1.5);
469 470
      }

471
      char *t = realloc(pWindowResInfo->pResult, (size_t)(newCap * sizeof(SWindowResult)));
472
      pRuntimeEnv->summary.internalSupSize += (newCap - pWindowResInfo->capacity) * sizeof(SWindowResult);
473
      pRuntimeEnv->summary.numOfTimeWindows += (newCap - pWindowResInfo->capacity);
474

475 476
      if (t == NULL) {
        longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
477
      }
478

479 480
      pWindowResInfo->pResult = (SWindowResult *)t;

S
TD-1057  
Shengliang Guan 已提交
481
      int32_t inc = (int32_t)newCap - pWindowResInfo->capacity;
482 483
      memset(&pWindowResInfo->pResult[pWindowResInfo->capacity], 0, sizeof(SWindowResult) * inc);

484 485
      pRuntimeEnv->summary.internalSupSize += (pQuery->numOfOutput * sizeof(SResultInfo) + pRuntimeEnv->interBufSize) * inc;

H
Haojun Liao 已提交
486
      for (int32_t i = pWindowResInfo->capacity; i < newCap; ++i) {
H
Haojun Liao 已提交
487 488 489 490
        int32_t ret = createQueryResultInfo(pQuery, &pWindowResInfo->pResult[i], pRuntimeEnv->stableQuery, pRuntimeEnv->interBufSize);
        if (ret != TSDB_CODE_SUCCESS) {
          longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
        }
H
Haojun Liao 已提交
491 492
      }

S
TD-1057  
Shengliang Guan 已提交
493
      pWindowResInfo->capacity = (int32_t)newCap;
494
    }
H
Haojun Liao 已提交
495 496 497 498

    // add a new result set for a new group
    pWindowResInfo->curIndex = pWindowResInfo->size++;
    taosHashPut(pWindowResInfo->hashList, pData, bytes, (char *)&pWindowResInfo->curIndex, sizeof(int32_t));
499
  }
500

501 502 503 504 505
  // too many time window in query
  if (pWindowResInfo->size > MAX_INTERVAL_TIME_WINDOW) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_TOO_MANY_TIMEWINDOW);
  }

506 507 508 509 510 511
  return getWindowResult(pWindowResInfo, pWindowResInfo->curIndex);
}

// get the correct time window according to the handled timestamp
static STimeWindow getActiveTimeWindow(SWindowResInfo *pWindowResInfo, int64_t ts, SQuery *pQuery) {
  STimeWindow w = {0};
512

513
 if (pWindowResInfo->curIndex == -1) {  // the first window, from the previous stored value
514
    w.skey = pWindowResInfo->prevSKey;
515 516
    if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
      w.ekey = taosTimeAdd(w.skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision);
517
    } else {
518
      w.ekey = w.skey + pQuery->interval.interval - 1;
519
    }
520
  } else {
521 522
    int32_t slot = curTimeWindowIndex(pWindowResInfo);
    SWindowResult* pWindowRes = getWindowResult(pWindowResInfo, slot);
523
    w = pWindowRes->win;
524
  }
525

526
  if (w.skey > ts || w.ekey < ts) {
527 528 529
    if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
      w.skey = taosTimeTruncate(ts, &pQuery->interval, pQuery->precision);
      w.ekey = taosTimeAdd(w.skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
530 531
    } else {
      int64_t st = w.skey;
532

533
      if (st > ts) {
534
        st -= ((st - ts + pQuery->interval.sliding - 1) / pQuery->interval.sliding) * pQuery->interval.sliding;
535
      }
536

537
      int64_t et = st + pQuery->interval.interval - 1;
538
      if (et < ts) {
539
        st += ((ts - et + pQuery->interval.sliding - 1) / pQuery->interval.sliding) * pQuery->interval.sliding;
540
      }
541

542
      w.skey = st;
543
      w.ekey = w.skey + pQuery->interval.interval - 1;
544
    }
545
  }
546

547 548 549 550 551 552 553
  /*
   * query border check, skey should not be bounded by the query time range, since the value skey will
   * be used as the time window index value. So we only change ekey of time window accordingly.
   */
  if (w.ekey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) {
    w.ekey = pQuery->window.ekey;
  }
554

555 556 557 558 559 560 561 562
  return w;
}

static int32_t addNewWindowResultBuf(SWindowResult *pWindowRes, SDiskbasedResultBuf *pResultBuf, int32_t sid,
                                     int32_t numOfRowsPerPage) {
  if (pWindowRes->pos.pageId != -1) {
    return 0;
  }
563

564
  tFilePage *pData = NULL;
565

566 567 568
  // in the first scan, new space needed for results
  int32_t pageId = -1;
  SIDList list = getDataBufPagesIdList(pResultBuf, sid);
569

H
Haojun Liao 已提交
570
  if (taosArrayGetSize(list) == 0) {
571 572
    pData = getNewDataBuf(pResultBuf, sid, &pageId);
  } else {
H
Haojun Liao 已提交
573 574 575
    SPageInfo* pi = getLastPageInfo(list);
    pData = getResBufPage(pResultBuf, pi->pageId);
    pageId = pi->pageId;
576

577
    if (pData->num >= numOfRowsPerPage) {
H
Haojun Liao 已提交
578 579 580
      // release current page first, and prepare the next one
      releaseResBufPageInfo(pResultBuf, pi);

581 582
      pData = getNewDataBuf(pResultBuf, sid, &pageId);
      if (pData != NULL) {
583
        assert(pData->num == 0);  // number of elements must be 0 for new allocated buffer
584 585 586
      }
    }
  }
587

588 589 590
  if (pData == NULL) {
    return -1;
  }
591

592 593 594
  // set the number of rows in current disk page
  if (pWindowRes->pos.pageId == -1) {  // not allocated yet, allocate new buffer
    pWindowRes->pos.pageId = pageId;
S
TD-1057  
Shengliang Guan 已提交
595
    pWindowRes->pos.rowId = (int32_t)(pData->num++);
H
Haojun Liao 已提交
596 597

    assert(pWindowRes->pos.pageId >= 0);
598
  }
599

600 601 602 603
  return 0;
}

static int32_t setWindowOutputBufByKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, int32_t sid,
604
                                       STimeWindow *win, bool masterscan, bool* newWind) {
605 606
  assert(win->skey <= win->ekey);
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
607

608 609
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&win->skey,
      TSDB_KEYSIZE, masterscan);
610
  if (pWindowRes == NULL) {
611 612 613
    *newWind = false;

    return masterscan? -1:0;
614
  }
615

616
  *newWind = true;
H
Haojun Liao 已提交
617

618 619 620
  // not assign result buffer yet, add new result buffer
  if (pWindowRes->pos.pageId == -1) {
    int32_t ret = addNewWindowResultBuf(pWindowRes, pResultBuf, sid, pRuntimeEnv->numOfRowsPerPage);
H
Haojun Liao 已提交
621
    if (ret != TSDB_CODE_SUCCESS) {
622 623 624
      return -1;
    }
  }
625

626
  // set time window for current result
627
  pWindowRes->win = (*win);
628

H
Haojun Liao 已提交
629
  setWindowResOutputBufInitCtx(pRuntimeEnv, pWindowRes);
630 631 632
  return TSDB_CODE_SUCCESS;
}

633
static bool getTimeWindowResStatus(SWindowResInfo *pWindowResInfo, int32_t slot) {
634
  assert(slot >= 0 && slot < pWindowResInfo->size);
635
  return pWindowResInfo->pResult[slot].closed;
636 637
}

H
Haojun Liao 已提交
638
static FORCE_INLINE int32_t getForwardStepsInBlock(int32_t numOfRows, __block_search_fn_t searchFn, TSKEY ekey, int16_t pos,
639 640
                                      int16_t order, int64_t *pData) {
  int32_t forwardStep = 0;
641

H
Haojun Liao 已提交
642 643 644 645
  if (order == TSDB_ORDER_ASC) {
    int32_t end = searchFn((char*) &pData[pos], numOfRows - pos, ekey, order);
    if (end >= 0) {
      forwardStep = end;
646

H
Haojun Liao 已提交
647 648 649 650 651 652 653 654 655 656 657 658
      if (pData[end + pos] == ekey) {
        forwardStep += 1;
      }
    }
  } else {
    int32_t end = searchFn((char *)pData, pos + 1, ekey, order);
    if (end >= 0) {
      forwardStep = pos - end;

      if (pData[end] == ekey) {
        forwardStep += 1;
      }
659 660
    }
  }
661

H
Haojun Liao 已提交
662
  assert(forwardStep > 0);
663 664 665 666 667 668
  return forwardStep;
}

/**
 * NOTE: the query status only set for the first scan of master scan.
 */
669
static int32_t doCheckQueryCompleted(SQueryRuntimeEnv *pRuntimeEnv, TSKEY lastKey, SWindowResInfo *pWindowResInfo) {
670
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
671
  if (pRuntimeEnv->scanFlag != MASTER_SCAN || (!QUERY_IS_INTERVAL_QUERY(pQuery))) {
672
    return pWindowResInfo->size;
673
  }
674

675
  // no qualified results exist, abort check
676
  int32_t numOfClosed = 0;
677

678
  if (pWindowResInfo->size == 0) {
679
    return pWindowResInfo->size;
680
  }
681

682
  // query completed
H
hjxilinx 已提交
683 684
  if ((lastKey >= pQuery->current->win.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (lastKey <= pQuery->current->win.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
685
    closeAllTimeWindow(pWindowResInfo);
686

687 688 689 690
    pWindowResInfo->curIndex = pWindowResInfo->size - 1;
    setQueryStatus(pQuery, QUERY_COMPLETED | QUERY_RESBUF_FULL);
  } else {  // set the current index to be the last unclosed window
    int32_t i = 0;
691
    int64_t skey = TSKEY_INITIAL_VAL;
692

693 694
    for (i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *pResult = &pWindowResInfo->pResult[i];
695
      if (pResult->closed) {
696
        numOfClosed += 1;
697 698
        continue;
      }
699

700
      TSKEY ekey = pResult->win.ekey;
701
      if ((ekey <= lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
702
          (pResult->win.skey >= lastKey && !QUERY_IS_ASC_QUERY(pQuery))) {
703 704
        closeTimeWindow(pWindowResInfo, i);
      } else {
705
        skey = pResult->win.skey;
706 707 708
        break;
      }
    }
709

710
    // all windows are closed, set the last one to be the skey
711
    if (skey == TSKEY_INITIAL_VAL) {
712 713 714 715 716
      assert(i == pWindowResInfo->size);
      pWindowResInfo->curIndex = pWindowResInfo->size - 1;
    } else {
      pWindowResInfo->curIndex = i;
    }
717

718
    pWindowResInfo->prevSKey = pWindowResInfo->pResult[pWindowResInfo->curIndex].win.skey;
719

720 721
    // the number of completed slots are larger than the threshold, return current generated results to client.
    if (numOfClosed > pWindowResInfo->threshold) {
722
      qDebug("QInfo:%p total result window:%d closed:%d, reached the output threshold %d, return",
723
          GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size, numOfClosed, pQuery->rec.threshold);
724

725
      setQueryStatus(pQuery, QUERY_RESBUF_FULL);
726
    } else {
727
      qDebug("QInfo:%p total result window:%d already closed:%d", GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size,
728
             numOfClosed);
729 730
    }
  }
731

732 733 734 735 736
  // output has reached the limitation, set query completed
  if (pQuery->limit.limit > 0 && (pQuery->limit.limit + pQuery->limit.offset) <= numOfClosed &&
      pRuntimeEnv->scanFlag == MASTER_SCAN) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
737

738
  assert(pWindowResInfo->prevSKey != TSKEY_INITIAL_VAL);
739
  return numOfClosed;
740 741 742
}

static int32_t getNumOfRowsInTimeWindow(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo, TSKEY *pPrimaryColumn,
H
hjxilinx 已提交
743
                                        int32_t startPos, TSKEY ekey, __block_search_fn_t searchFn, bool updateLastKey) {
744
  assert(startPos >= 0 && startPos < pDataBlockInfo->rows);
745

H
Haojun Liao 已提交
746
  int32_t num   = -1;
747
  int32_t order = pQuery->order.order;
H
Haojun Liao 已提交
748
  int32_t step  = GET_FORWARD_DIRECTION_FACTOR(order);
749

H
hjxilinx 已提交
750
  STableQueryInfo* item = pQuery->current;
751

752 753
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (ekey < pDataBlockInfo->window.ekey) {
754
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
H
Haojun Liao 已提交
755 756
      if (updateLastKey) { // update the last key
        item->lastKey = pPrimaryColumn[startPos + (num - 1)] + step;
757 758
      }
    } else {
759
      num = pDataBlockInfo->rows - startPos;
760
      if (updateLastKey) {
H
hjxilinx 已提交
761
        item->lastKey = pDataBlockInfo->window.ekey + step;
762 763 764 765
      }
    }
  } else {  // desc
    if (ekey > pDataBlockInfo->window.skey) {
766
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
H
Haojun Liao 已提交
767 768
      if (updateLastKey) {  // update the last key
        item->lastKey = pPrimaryColumn[startPos - (num - 1)] + step;
769 770 771 772
      }
    } else {
      num = startPos + 1;
      if (updateLastKey) {
H
hjxilinx 已提交
773
        item->lastKey = pDataBlockInfo->window.skey + step;
774 775 776
      }
    }
  }
777

H
Haojun Liao 已提交
778
  assert(num > 0);
779 780 781
  return num;
}

H
Haojun Liao 已提交
782 783
static void doBlockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, bool closed, STimeWindow *pWin, int32_t offset,
                                      int32_t forwardStep, TSKEY *tsCol, int32_t numOfTotal) {
784 785
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
786

H
Haojun Liao 已提交
787 788
  bool hasPrev = pCtx[0].preAggVals.isSet;

789
  if (IS_MASTER_SCAN(pRuntimeEnv) || closed) {
790 791 792 793
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
      pCtx[k].nStartQueryTimestamp = pWin->skey;
      pCtx[k].size = forwardStep;
      pCtx[k].startOffset = (QUERY_IS_ASC_QUERY(pQuery)) ? offset : offset - (forwardStep - 1);
794

H
Haojun Liao 已提交
795
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
796
      if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
H
Haojun Liao 已提交
797
        pCtx[k].ptsList = &tsCol[pCtx[k].startOffset];
798
      }
799

800
      // not a whole block involved in query processing, statistics data can not be used
H
Haojun Liao 已提交
801 802 803 804
      // NOTE: the original value of isSet have been changed here
      if (pCtx[k].preAggVals.isSet && forwardStep < numOfTotal) {
        pCtx[k].preAggVals.isSet = false;
      }
H
Haojun Liao 已提交
805

806 807 808
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunction(&pCtx[k]);
      }
H
Haojun Liao 已提交
809 810 811

      // restore it
      pCtx[k].preAggVals.isSet = hasPrev;
812 813 814 815
    }
  }
}

816
static void doRowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, bool closed, STimeWindow *pWin, int32_t offset) {
817 818
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
819

820
  if (IS_MASTER_SCAN(pRuntimeEnv) || closed) {
821 822
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
      pCtx[k].nStartQueryTimestamp = pWin->skey;
823

824 825 826 827
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunctionF(&pCtx[k], offset);
      }
828 829 830 831
    }
  }
}

H
Haojun Liao 已提交
832 833
static int32_t getNextQualifiedWindow(SQueryRuntimeEnv *pRuntimeEnv, STimeWindow *pNext, SDataBlockInfo *pDataBlockInfo,
    TSKEY *primaryKeys, __block_search_fn_t searchFn, int32_t prevPosition) {
834
  SQuery *pQuery = pRuntimeEnv->pQuery;
835

H
Haojun Liao 已提交
836
  GET_NEXT_TIMEWINDOW(pQuery, pNext);
837

H
Haojun Liao 已提交
838
  // next time window is not in current block
H
Haojun Liao 已提交
839 840
  if ((pNext->skey > pDataBlockInfo->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (pNext->ekey < pDataBlockInfo->window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
H
Haojun Liao 已提交
841 842
    return -1;
  }
843

H
Haojun Liao 已提交
844 845
  TSKEY startKey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
846
    startKey = pNext->skey;
H
Haojun Liao 已提交
847 848
    if (startKey < pQuery->window.skey) {
      startKey = pQuery->window.skey;
849
    }
H
Haojun Liao 已提交
850
  } else {
H
Haojun Liao 已提交
851
    startKey = pNext->ekey;
H
Haojun Liao 已提交
852 853
    if (startKey > pQuery->window.skey) {
      startKey = pQuery->window.skey;
854
    }
H
Haojun Liao 已提交
855
  }
856

H
Haojun Liao 已提交
857 858
  int32_t startPos = 0;
  // tumbling time window query, a special case of sliding time window query
859
  if (pQuery->interval.sliding == pQuery->interval.interval && prevPosition != -1) {
H
Haojun Liao 已提交
860 861 862 863 864
    int32_t factor = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
    startPos = prevPosition + factor;
  } else {
    startPos = searchFn((char *)primaryKeys, pDataBlockInfo->rows, startKey, pQuery->order.order);
  }
865

H
Haojun Liao 已提交
866 867 868 869
  /*
   * This time window does not cover any data, try next time window,
   * this case may happen when the time window is too small
   */
H
Haojun Liao 已提交
870
  if (QUERY_IS_ASC_QUERY(pQuery) && primaryKeys[startPos] > pNext->ekey) {
H
Haojun Liao 已提交
871
    TSKEY next = primaryKeys[startPos];
872 873 874
    if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
      pNext->skey = taosTimeTruncate(next, &pQuery->interval, pQuery->precision);
      pNext->ekey = taosTimeAdd(pNext->skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
875
    } else {
876 877
      pNext->ekey += ((next - pNext->ekey + pQuery->interval.sliding - 1)/pQuery->interval.sliding) * pQuery->interval.sliding;
      pNext->skey = pNext->ekey - pQuery->interval.interval + 1;
878
    }
H
Haojun Liao 已提交
879
  } else if ((!QUERY_IS_ASC_QUERY(pQuery)) && primaryKeys[startPos] < pNext->skey) {
H
Haojun Liao 已提交
880
    TSKEY next = primaryKeys[startPos];
881 882 883
    if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
      pNext->skey = taosTimeTruncate(next, &pQuery->interval, pQuery->precision);
      pNext->ekey = taosTimeAdd(pNext->skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
884
    } else {
885 886
      pNext->skey -= ((pNext->skey - next + pQuery->interval.sliding - 1) / pQuery->interval.sliding) * pQuery->interval.sliding;
      pNext->ekey = pNext->skey + pQuery->interval.interval - 1;
887
    }
888
  }
889

H
Haojun Liao 已提交
890
  return startPos;
891 892
}

H
Haojun Liao 已提交
893
static FORCE_INLINE TSKEY reviseWindowEkey(SQuery *pQuery, STimeWindow *pWindow) {
894 895 896 897 898 899 900 901 902 903 904 905
  TSKEY ekey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    ekey = pWindow->ekey;
    if (ekey > pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  } else {
    ekey = pWindow->skey;
    if (ekey < pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  }
906

907 908 909
  return ekey;
}

H
hjxilinx 已提交
910 911
//todo binary search
static void* getDataBlockImpl(SArray* pDataBlock, int32_t colId) {
S
TD-1057  
Shengliang Guan 已提交
912
  int32_t numOfCols = (int32_t)taosArrayGetSize(pDataBlock);
913

H
hjxilinx 已提交
914 915 916 917 918 919
  for (int32_t i = 0; i < numOfCols; ++i) {
    SColumnInfoData *p = taosArrayGet(pDataBlock, i);
    if (colId == p->info.colId) {
      return p->pData;
    }
  }
920

H
hjxilinx 已提交
921 922 923 924
  return NULL;
}

static char *getDataBlock(SQueryRuntimeEnv *pRuntimeEnv, SArithmeticSupport *sas, int32_t col, int32_t size,
925
                    SArray *pDataBlock) {
dengyihao's avatar
dengyihao 已提交
926 927 928
  if (pDataBlock == NULL) {
    return NULL;
  }
929

H
Haojun Liao 已提交
930
  char *dataBlock = NULL;
H
Haojun Liao 已提交
931
  SQuery *pQuery = pRuntimeEnv->pQuery;
932

933
  int32_t functionId = pQuery->pSelectExpr[col].base.functionId;
934
  if (functionId == TSDB_FUNC_ARITHM) {
935
    sas->pArithExpr = &pQuery->pSelectExpr[col];
936

937 938 939 940
    sas->offset  = 0;
    sas->colList = pQuery->colList;
    sas->numOfCols = pQuery->numOfCols;
    sas->data    = calloc(pQuery->numOfCols, POINTER_BYTES);
941

H
Haojun Liao 已提交
942
    if (sas->data == NULL) {
H
Haojun Liao 已提交
943
      finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
H
Haojun Liao 已提交
944 945 946
      longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
    }

947
    // here the pQuery->colList and sas->colList are identical
S
TD-1057  
Shengliang Guan 已提交
948
    int32_t numOfCols = (int32_t)taosArrayGetSize(pDataBlock);
949
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
950
      SColumnInfo *pColMsg = &pQuery->colList[i];
951

952 953 954 955 956 957 958 959
      dataBlock = NULL;
      for (int32_t k = 0; k < numOfCols; ++k) {  //todo refactor
        SColumnInfoData *p = taosArrayGet(pDataBlock, k);
        if (pColMsg->colId == p->info.colId) {
          dataBlock = p->pData;
          break;
        }
      }
960

961
      assert(dataBlock != NULL);
962
      sas->data[i] = dataBlock;  // start from the offset
963
    }
964

965
  } else {  // other type of query function
966
    SColIndex *pCol = &pQuery->pSelectExpr[col].base.colInfo;
H
Haojun Liao 已提交
967
    if (TSDB_COL_IS_NORMAL_COL(pCol->flag)) {
H
Haojun Liao 已提交
968 969 970 971 972
      SColIndex* pColIndex = &pQuery->pSelectExpr[col].base.colInfo;
      SColumnInfoData *p = taosArrayGet(pDataBlock, pColIndex->colIndex);
      assert(p->info.colId == pColIndex->colId);

      dataBlock = p->pData;
H
Haojun Liao 已提交
973 974
    } else {
      dataBlock = NULL;
975 976
    }
  }
977

978 979 980 981
  return dataBlock;
}

/**
H
Haojun Liao 已提交
982
 * todo set the last value for pQueryTableInfo as in rowwiseapplyfunctions
983 984
 * @param pRuntimeEnv
 * @param forwardStep
985
 * @param tsCols
986 987 988 989 990
 * @param pFields
 * @param isDiskFileBlock
 * @return                  the incremental number of output value, so it maybe 0 for fixed number of query,
 *                          such as count/min/max etc.
 */
991
static void blockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis,
992 993
                                       SDataBlockInfo *pDataBlockInfo, SWindowResInfo *pWindowResInfo,
                                       __block_search_fn_t searchFn, SArray *pDataBlock) {
994
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
995 996
  bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);

997 998
  SQuery *pQuery = pRuntimeEnv->pQuery;
  TSKEY  *tsCols = NULL;
999
  if (pDataBlock != NULL) {
1000
    SColumnInfoData* pColInfo = taosArrayGet(pDataBlock, 0);
1001
    tsCols = (TSKEY *)(pColInfo->pData);
1002
  }
1003

1004
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
H
Haojun Liao 已提交
1005
  if (sasArray == NULL) {
H
Haojun Liao 已提交
1006
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
H
Haojun Liao 已提交
1007 1008
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }
1009

H
Haojun Liao 已提交
1010
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
1011
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
1012
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
H
Haojun Liao 已提交
1013
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k, pQInfo->vgId);
1014
  }
1015

1016
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
1017
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
H
Haojun Liao 已提交
1018
    TSKEY ts = TSKEY_INITIAL_VAL;
1019

H
Haojun Liao 已提交
1020 1021 1022 1023 1024 1025 1026 1027
    if (tsCols == NULL) {
      ts = QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->window.skey:pDataBlockInfo->window.ekey;
    } else {
      int32_t offset = GET_COL_DATA_POS(pQuery, 0, step);
      ts = tsCols[offset];
    }

    bool        hasTimeWindow = false;
1028
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
H
Haojun Liao 已提交
1029 1030
    if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &win, masterScan, &hasTimeWindow) !=
        TSDB_CODE_SUCCESS) {
S
Shengliang Guan 已提交
1031
      taosTFree(sasArray);
H
hjxilinx 已提交
1032
      return;
1033
    }
1034

H
Haojun Liao 已提交
1035 1036 1037
    int32_t forwardStep = 0;
    int32_t startPos = pQuery->pos;

1038
    if (hasTimeWindow) {
H
Haojun Liao 已提交
1039
      TSKEY ekey = reviseWindowEkey(pQuery, &win);
H
Haojun Liao 已提交
1040
      forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, pQuery->pos, ekey, searchFn, true);
1041

1042
      bool pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
H
Haojun Liao 已提交
1043
      doBlockwiseApplyFunctions(pRuntimeEnv, pStatus, &win, startPos, forwardStep, tsCols, pDataBlockInfo->rows);
1044
    }
1045

1046 1047
    int32_t     index = pWindowResInfo->curIndex;
    STimeWindow nextWin = win;
1048

1049
    while (1) {
H
Haojun Liao 已提交
1050 1051
      int32_t prevEndPos = (forwardStep - 1) * step + startPos;
      startPos = getNextQualifiedWindow(pRuntimeEnv, &nextWin, pDataBlockInfo, tsCols, searchFn, prevEndPos);
1052 1053 1054
      if (startPos < 0) {
        break;
      }
1055

1056
      // null data, failed to allocate more memory buffer
H
Haojun Liao 已提交
1057
      hasTimeWindow = false;
H
Haojun Liao 已提交
1058 1059
      if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &nextWin, masterScan,
                                  &hasTimeWindow) != TSDB_CODE_SUCCESS) {
1060 1061
        break;
      }
1062

1063 1064 1065 1066 1067
      if (!hasTimeWindow) {
        continue;
      }

      TSKEY ekey = reviseWindowEkey(pQuery, &nextWin);
H
Haojun Liao 已提交
1068
      forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, startPos, ekey, searchFn, true);
1069

1070 1071
      bool closed = getTimeWindowResStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
      doBlockwiseApplyFunctions(pRuntimeEnv, closed, &nextWin, startPos, forwardStep, tsCols, pDataBlockInfo->rows);
1072
    }
1073

1074 1075 1076 1077 1078 1079 1080
    pWindowResInfo->curIndex = index;
  } else {
    /*
     * the sqlfunctionCtx parameters should be set done before all functions are invoked,
     * since the selectivity + tag_prj query needs all parameters been set done.
     * tag_prj function are changed to be TSDB_FUNC_TAG_DUMMY
     */
1081
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
1082
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
1083 1084 1085 1086 1087
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunction(&pCtx[k]);
      }
    }
  }
1088

1089 1090 1091 1092
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    if (pQuery->pSelectExpr[i].base.functionId != TSDB_FUNC_ARITHM) {
      continue;
    }
1093

S
Shengliang Guan 已提交
1094
    taosTFree(sasArray[i].data);
1095
  }
1096

S
Shengliang Guan 已提交
1097
  taosTFree(sasArray);
1098 1099 1100 1101 1102 1103
}

static int32_t setGroupResultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, char *pData, int16_t type, int16_t bytes) {
  if (isNull(pData, type)) {  // ignore the null value
    return -1;
  }
1104

1105
  int32_t GROUPRESULTID = 1;
1106

1107
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
1108

1109
  // not assign result buffer yet, add new result buffer
1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127
  char* d = pData;
  int16_t len = bytes;
  if (type == TSDB_DATA_TYPE_BINARY||type == TSDB_DATA_TYPE_NCHAR) {
    d = varDataVal(pData);
    len = varDataLen(pData);
  } else if (type == TSDB_DATA_TYPE_FLOAT || type == TSDB_DATA_TYPE_DOUBLE) {
    SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
    qError("QInfo:%p group by not supported on double/float/binary/nchar columns, abort", pQInfo);

    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_APP_ERROR);
  }

  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, &pRuntimeEnv->windowResInfo, d, len, true);
  if (pWindowRes == NULL) {
    return -1;
  }

  int64_t v = -1;
1128 1129 1130 1131 1132 1133 1134 1135
  switch(type) {
    case TSDB_DATA_TYPE_BOOL:
    case TSDB_DATA_TYPE_TINYINT:  v = GET_INT8_VAL(pData);  break;
    case TSDB_DATA_TYPE_SMALLINT: v = GET_INT16_VAL(pData); break;
    case TSDB_DATA_TYPE_INT:      v = GET_INT32_VAL(pData); break;
    case TSDB_DATA_TYPE_BIGINT:   v = GET_INT64_VAL(pData); break;
  }

1136 1137 1138 1139 1140 1141
  if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
    pWindowRes->key = malloc(varDataTLen(pData));
    varDataCopy(pWindowRes->key, pData);
  } else {
    pWindowRes->win.skey = v;
    pWindowRes->win.ekey = v;
1142
  }
1143

1144
  assert(pRuntimeEnv->windowResInfo.interval == 0);
1145

1146 1147 1148 1149 1150 1151
  if (pWindowRes->pos.pageId == -1) {
    int32_t ret = addNewWindowResultBuf(pWindowRes, pResultBuf, GROUPRESULTID, pRuntimeEnv->numOfRowsPerPage);
    if (ret != 0) {
      return -1;
    }
  }
1152

1153 1154 1155 1156 1157
  setWindowResOutputBuf(pRuntimeEnv, pWindowRes);
  initCtxOutputBuf(pRuntimeEnv);
  return TSDB_CODE_SUCCESS;
}

1158
static char *getGroupbyColumnData(SQuery *pQuery, int16_t *type, int16_t *bytes, SArray* pDataBlock) {
1159
  SSqlGroupbyExpr *pGroupbyExpr = pQuery->pGroupbyExpr;
1160

1161
  for (int32_t k = 0; k < pGroupbyExpr->numOfGroupCols; ++k) {
1162
    SColIndex* pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, k);
Y
TD-1230  
yihaoDeng 已提交
1163
    if (TSDB_COL_IS_TAG(pColIndex->flag)) {
1164 1165
      continue;
    }
1166

1167
    int16_t colIndex = -1;
1168
    int32_t colId = pColIndex->colId;
1169

1170
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
1171
      if (pQuery->colList[i].colId == colId) {
1172 1173 1174 1175
        colIndex = i;
        break;
      }
    }
1176

1177
    assert(colIndex >= 0 && colIndex < pQuery->numOfCols);
1178

1179 1180
    *type = pQuery->colList[colIndex].type;
    *bytes = pQuery->colList[colIndex].bytes;
1181
    /*
1182 1183 1184
     *  the colIndex is acquired from the first tables of all qualified tables in this vnode during query prepare
     * stage, the remain tables may not have the required column in cache actually. So, the validation of required
     * column in cache with the corresponding schema is reinforced.
1185
     */
S
TD-1057  
Shengliang Guan 已提交
1186
    int32_t numOfCols = (int32_t)taosArrayGetSize(pDataBlock);
1187

1188 1189 1190 1191 1192 1193
    for (int32_t i = 0; i < numOfCols; ++i) {
      SColumnInfoData *p = taosArrayGet(pDataBlock, i);
      if (pColIndex->colId == p->info.colId) {
        return p->pData;
      }
    }
1194
  }
1195

1196
  return NULL;
1197 1198 1199 1200
}

static int32_t doTSJoinFilter(SQueryRuntimeEnv *pRuntimeEnv, int32_t offset) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
1201

1202 1203
  STSElem         elem = tsBufGetElem(pRuntimeEnv->pTSBuf);
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1204

1205
  // compare tag first
H
Haojun Liao 已提交
1206
  if (tVariantCompare(&pCtx[0].tag, elem.tag) != 0) {
1207 1208
    return TS_JOIN_TAG_NOT_EQUALS;
  }
1209

S
TD-1057  
Shengliang Guan 已提交
1210
  TSKEY key = *(TSKEY *)((char*)pCtx[0].aInputElemBuf + TSDB_KEYSIZE * offset);
1211 1212

#if defined(_DEBUG_VIEW)
1213
  printf("elem in comp ts file:%" PRId64 ", key:%" PRId64 ", tag:%"PRIu64", query order:%d, ts order:%d, traverse:%d, index:%d\n",
H
Haojun Liao 已提交
1214
         elem.ts, key, elem.tag.i64Key, pQuery->order.order, pRuntimeEnv->pTSBuf->tsOrder,
1215 1216
         pRuntimeEnv->pTSBuf->cur.order, pRuntimeEnv->pTSBuf->cur.tsIndex);
#endif
1217

1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (key < elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key > elem.ts) {
      assert(false);
    }
  } else {
    if (key > elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key < elem.ts) {
      assert(false);
    }
  }
1231

1232 1233 1234 1235 1236
  return TS_JOIN_TS_EQUAL;
}

static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId) {
  SResultInfo *pResInfo = GET_RES_INFO(pCtx);
H
hjxilinx 已提交
1237
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
1238 1239 1240 1241 1242

  // in case of timestamp column, always generated results.
  if (functionId == TSDB_FUNC_TS) {
    return true;
  }
1243

1244 1245 1246
  if (pResInfo->complete || functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TS_DUMMY) {
    return false;
  }
1247

1248
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_FIRST) {
H
hjxilinx 已提交
1249 1250
    return QUERY_IS_ASC_QUERY(pQuery);
  }
1251 1252 1253 1254 1255 1256

  // todo add comments
  if ((functionId == TSDB_FUNC_LAST_DST || functionId == TSDB_FUNC_LAST)) {
    return pCtx->param[0].i64Key == pQuery->order.order;
  }

1257
  // in the supplementary scan, only the following functions need to be executed
H
Haojun Liao 已提交
1258
  if (IS_REVERSE_SCAN(pRuntimeEnv)) {
1259 1260
    return false;
  }
1261

1262 1263 1264
  return true;
}

1265 1266
static void rowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis, SDataBlockInfo *pDataBlockInfo,
    SWindowResInfo *pWindowResInfo, SArray *pDataBlock) {
1267
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1268
  bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);
1269

1270
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1271
  STableQueryInfo* item = pQuery->current;
H
Haojun Liao 已提交
1272 1273 1274 1275

  SColumnInfoData* pColumnInfoData = (SColumnInfoData *)taosArrayGet(pDataBlock, 0);

  TSKEY  *tsCols = (pColumnInfoData->info.type == TSDB_DATA_TYPE_TIMESTAMP)? (TSKEY*) pColumnInfoData->pData:NULL;
H
Haojun Liao 已提交
1276 1277
  bool    groupbyColumnValue = pRuntimeEnv->groupbyNormalCol;

1278
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
H
Haojun Liao 已提交
1279
  if (sasArray == NULL) {
H
Haojun Liao 已提交
1280
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
H
Haojun Liao 已提交
1281 1282
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }
1283

1284 1285
  int16_t type = 0;
  int16_t bytes = 0;
1286

1287
  char *groupbyColumnData = NULL;
H
Haojun Liao 已提交
1288
  if (groupbyColumnValue) {
1289
    groupbyColumnData = getGroupbyColumnData(pQuery, &type, &bytes, pDataBlock);
1290
  }
1291

H
Haojun Liao 已提交
1292
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
1293
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
1294
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
H
Haojun Liao 已提交
1295
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k, pQInfo->vgId);
1296
  }
1297

1298 1299
  // set the input column data
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
1300
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
H
hjxilinx 已提交
1301 1302
    pFilterInfo->pData = getDataBlockImpl(pDataBlock, pFilterInfo->info.colId);
    assert(pFilterInfo->pData != NULL);
1303
  }
1304

1305
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1306

1307 1308 1309
  // from top to bottom in desc
  // from bottom to top in asc order
  if (pRuntimeEnv->pTSBuf != NULL) {
1310
    qDebug("QInfo:%p process data rows, numOfRows:%d, query order:%d, ts comp order:%d", pQInfo, pDataBlockInfo->rows,
1311 1312
           pQuery->order.order, pRuntimeEnv->pTSBuf->cur.order);
  }
1313

1314
  int32_t j = 0;
H
hjxilinx 已提交
1315
  int32_t offset = -1;
1316

1317
  for (j = 0; j < pDataBlockInfo->rows; ++j) {
H
hjxilinx 已提交
1318
    offset = GET_COL_DATA_POS(pQuery, j, step);
1319

1320 1321 1322 1323 1324 1325 1326 1327 1328 1329
    if (pRuntimeEnv->pTSBuf != NULL) {
      int32_t r = doTSJoinFilter(pRuntimeEnv, offset);
      if (r == TS_JOIN_TAG_NOT_EQUALS) {
        break;
      } else if (r == TS_JOIN_TS_NOT_EQUALS) {
        continue;
      } else {
        assert(r == TS_JOIN_TS_EQUAL);
      }
    }
1330

1331
    if (pQuery->numOfFilterCols > 0 && (!doFilterData(pQuery, offset))) {
1332 1333
      continue;
    }
1334

1335
    // interval window query, decide the time window according to the primary timestamp
H
Haojun Liao 已提交
1336
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1337
      int64_t     ts = tsCols[offset];
1338
      STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
1339

1340 1341
      bool hasTimeWindow = false;
      int32_t ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &win, masterScan, &hasTimeWindow);
1342 1343 1344
      if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
        continue;
      }
1345

1346 1347 1348 1349
      if (!hasTimeWindow) {
        continue;
      }

1350 1351
      bool closed = getTimeWindowResStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
      doRowwiseApplyFunctions(pRuntimeEnv, closed, &win, offset);
1352

1353 1354
      STimeWindow nextWin = win;
      int32_t     index = pWindowResInfo->curIndex;
1355

1356
      while (1) {
H
Haojun Liao 已提交
1357
        GET_NEXT_TIMEWINDOW(pQuery, &nextWin);
1358
        if ((nextWin.skey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
H
Haojun Liao 已提交
1359
            (nextWin.skey < pQuery->window.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
1360 1361
          break;
        }
1362

1363 1364 1365
        if (ts < nextWin.skey || ts > nextWin.ekey) {
          break;
        }
1366

1367
        // null data, failed to allocate more memory buffer
H
Haojun Liao 已提交
1368
        hasTimeWindow = false;
1369
        if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &nextWin, masterScan, &hasTimeWindow) != TSDB_CODE_SUCCESS) {
1370 1371
          break;
        }
1372

1373
        if (hasTimeWindow) {
1374 1375
          closed = getTimeWindowResStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
          doRowwiseApplyFunctions(pRuntimeEnv, closed, &nextWin, offset);
1376
        }
1377
      }
1378

1379 1380 1381
      pWindowResInfo->curIndex = index;
    } else {  // other queries
      // decide which group this rows belongs to according to current state value
H
Haojun Liao 已提交
1382
      if (groupbyColumnValue) {
H
hjxilinx 已提交
1383
        char *val = groupbyColumnData + bytes * offset;
1384

H
hjxilinx 已提交
1385
        int32_t ret = setGroupResultOutputBuf(pRuntimeEnv, val, type, bytes);
1386 1387 1388 1389
        if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
          continue;
        }
      }
1390

1391
      for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
1392
        int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
1393 1394 1395 1396 1397
        if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
          aAggs[functionId].xFunctionF(&pCtx[k], offset);
        }
      }
    }
1398

1399 1400 1401
    if (pRuntimeEnv->pTSBuf != NULL) {
      // if timestamp filter list is empty, quit current query
      if (!tsBufNextPos(pRuntimeEnv->pTSBuf)) {
H
hjxilinx 已提交
1402
        setQueryStatus(pQuery, QUERY_COMPLETED);
1403 1404 1405 1406
        break;
      }
    }
  }
H
Haojun Liao 已提交
1407 1408 1409 1410 1411 1412 1413 1414

  assert(offset >= 0);
  if (tsCols != NULL) {
    item->lastKey = tsCols[offset] + step;
  } else {
    item->lastKey = (QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->window.ekey:pDataBlockInfo->window.skey) + step;
  }

H
Haojun Liao 已提交
1415 1416 1417
  if (pRuntimeEnv->pTSBuf != NULL) {
    item->cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);
  }
H
Haojun Liao 已提交
1418

1419 1420 1421 1422 1423
  // todo refactor: extract method
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    if (pQuery->pSelectExpr[i].base.functionId != TSDB_FUNC_ARITHM) {
      continue;
    }
1424

S
Shengliang Guan 已提交
1425
    taosTFree(sasArray[i].data);
1426
  }
1427

1428 1429 1430 1431
  free(sasArray);
}

static int32_t tableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo,
H
hjxilinx 已提交
1432
                                          SDataStatis *pStatis, __block_search_fn_t searchFn, SArray *pDataBlock) {
H
hjxilinx 已提交
1433
  SQuery *pQuery = pRuntimeEnv->pQuery;
1434

H
hjxilinx 已提交
1435 1436
  STableQueryInfo* pTableQInfo = pQuery->current;
  SWindowResInfo*  pWindowResInfo = &pRuntimeEnv->windowResInfo;
1437

H
Haojun Liao 已提交
1438
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->groupbyNormalCol) {
1439
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
1440
  } else {
1441
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
1442
  }
1443

1444
  // update the lastkey of current table
1445
  TSKEY lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pDataBlockInfo->window.ekey : pDataBlockInfo->window.skey;
H
hjxilinx 已提交
1446
  pTableQInfo->lastKey = lastKey + GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1447

1448
  // interval query with limit applied
1449
  int32_t numOfRes = 0;
H
Haojun Liao 已提交
1450
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1451 1452
    numOfRes = doCheckQueryCompleted(pRuntimeEnv, lastKey, pWindowResInfo);
  } else {
S
TD-1057  
Shengliang Guan 已提交
1453
    numOfRes = (int32_t)getNumOfResult(pRuntimeEnv);
1454

1455 1456 1457 1458
    // update the number of output result
    if (numOfRes > 0 && pQuery->checkBuffer == 1) {
      assert(numOfRes >= pQuery->rec.rows);
      pQuery->rec.rows = numOfRes;
1459

1460 1461 1462
      if (numOfRes >= pQuery->rec.threshold) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
      }
1463

1464 1465 1466
      if ((pQuery->limit.limit >= 0) && (pQuery->limit.limit + pQuery->limit.offset) <= numOfRes) {
        setQueryStatus(pQuery, QUERY_COMPLETED);
      }
1467 1468 1469 1470 1471

      if (((pTableQInfo->lastKey > pTableQInfo->win.ekey) && QUERY_IS_ASC_QUERY(pQuery)) ||
          ((pTableQInfo->lastKey < pTableQInfo->win.ekey) && (!QUERY_IS_ASC_QUERY(pQuery)))) {
        setQueryStatus(pQuery, QUERY_COMPLETED);
      }
H
Haojun Liao 已提交
1472
    }
1473
  }
1474

1475
  return numOfRes;
1476 1477
}

H
Haojun Liao 已提交
1478
void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
H
Haojun Liao 已提交
1479
                   SDataStatis *pStatis, void *param, int32_t colIndex, int32_t vgId) {
1480

1481 1482
  int32_t functionId = pQuery->pSelectExpr[colIndex].base.functionId;
  int32_t colId = pQuery->pSelectExpr[colIndex].base.colInfo.colId;
1483

1484
  SDataStatis *tpField = NULL;
H
Haojun Liao 已提交
1485
  pCtx->hasNull = hasNullValue(&pQuery->pSelectExpr[colIndex].base.colInfo, pStatis, &tpField);
1486
  pCtx->aInputElemBuf = inputData;
1487

1488
  if (tpField != NULL) {
H
Haojun Liao 已提交
1489
    pCtx->preAggVals.isSet  = true;
1490 1491
    pCtx->preAggVals.statis = *tpField;
    assert(pCtx->preAggVals.statis.numOfNull <= pBlockInfo->rows);
1492 1493 1494
  } else {
    pCtx->preAggVals.isSet = false;
  }
1495

H
Haojun Liao 已提交
1496 1497
  pCtx->preAggVals.dataBlockLoaded = (inputData != NULL);

H
Haojun Liao 已提交
1498 1499
  // limit/offset query will affect this value
  pCtx->size = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->rows - pQuery->pos : pQuery->pos + 1;
1500

H
Haojun Liao 已提交
1501
  // minimum value no matter ascending/descending order query
H
Haojun Liao 已提交
1502 1503
  pCtx->startOffset = QUERY_IS_ASC_QUERY(pQuery) ? pQuery->pos: (pQuery->pos - pCtx->size + 1);
  assert(pCtx->startOffset >= 0);
H
Haojun Liao 已提交
1504

1505 1506
  uint32_t status = aAggs[functionId].nStatus;
  if (((status & (TSDB_FUNCSTATE_SELECTIVITY | TSDB_FUNCSTATE_NEED_TS)) != 0) && (tsCol != NULL)) {
H
Haojun Liao 已提交
1507
    pCtx->ptsList = &tsCol[pCtx->startOffset];
1508
  }
1509

1510 1511 1512 1513 1514
  if (functionId >= TSDB_FUNC_FIRST_DST && functionId <= TSDB_FUNC_LAST_DST) {
    // last_dist or first_dist function
    // store the first&last timestamp into the intermediate buffer [1], the true
    // value may be null but timestamp will never be null
  } else if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_TWA ||
1515
             functionId == TSDB_FUNC_DIFF || (functionId >= TSDB_FUNC_RATE && functionId <= TSDB_FUNC_AVG_IRATE)) {
1516
    /*
H
Haojun Liao 已提交
1517
     * least squares function needs two columns of input, currently, the x value of linear equation is set to
1518 1519 1520 1521 1522 1523 1524 1525 1526 1527
     * timestamp column, and the y-value is the column specified in pQuery->pSelectExpr[i].colIdxInBuffer
     *
     * top/bottom function needs timestamp to indicate when the
     * top/bottom values emerge, so does diff function
     */
    if (functionId == TSDB_FUNC_TWA) {
      STwaInfo *pTWAInfo = GET_RES_INFO(pCtx)->interResultBuf;
      pTWAInfo->SKey = pQuery->window.skey;
      pTWAInfo->EKey = pQuery->window.ekey;
    }
1528

1529 1530
  } else if (functionId == TSDB_FUNC_ARITHM) {
    pCtx->param[1].pz = param;
H
Haojun Liao 已提交
1531 1532 1533 1534 1535 1536
  } else if (functionId == TSDB_FUNC_SPREAD) {  // set the statistics data for primary time stamp column
    if (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
      pCtx->preAggVals.isSet  = true;
      pCtx->preAggVals.statis.min = pBlockInfo->window.skey;
      pCtx->preAggVals.statis.max = pBlockInfo->window.ekey;
    }
1537 1538
  } else if (functionId == TSDB_FUNC_INTERP) {
    SInterpInfoDetail *pInterpInfo = GET_RES_INFO(pCtx)->interResultBuf;
S
TD-1057  
Shengliang Guan 已提交
1539
    pInterpInfo->type = (int8_t)pQuery->fillType;
1540 1541
    pInterpInfo->ts = pQuery->window.skey;
    pInterpInfo->primaryCol = (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX);
1542

1543 1544 1545 1546
    if (pQuery->fillVal != NULL) {
      if (isNull((const char*) &pQuery->fillVal[colIndex], pCtx->inputType)) {
        pCtx->param[1].nType = TSDB_DATA_TYPE_NULL;
      } else { // todo refactor, tVariantCreateFromBinary should handle the NULL value
H
Haojun Liao 已提交
1547 1548 1549
        if (pCtx->inputType != TSDB_DATA_TYPE_BINARY && pCtx->inputType != TSDB_DATA_TYPE_NCHAR) {
          tVariantCreateFromBinary(&pCtx->param[1], (char*) &pQuery->fillVal[colIndex], pCtx->inputBytes, pCtx->inputType);
        }
1550 1551
      }
    }
H
Haojun Liao 已提交
1552 1553 1554
  } else if (functionId == TSDB_FUNC_TS_COMP) {
    pCtx->param[0].i64Key = vgId;
    pCtx->param[0].nType = TSDB_DATA_TYPE_BIGINT;
1555
  }
1556

1557 1558 1559 1560 1561 1562
#if defined(_DEBUG_VIEW)
  //  int64_t *tsList = (int64_t *)primaryColumnData;
//  int64_t  s = tsList[0];
//  int64_t  e = tsList[size - 1];

//    if (IS_DATA_BLOCK_LOADED(blockStatus)) {
1563
//        qDebug("QInfo:%p query ts:%lld-%lld, offset:%d, rows:%d, bstatus:%d,
1564 1565 1566
//        functId:%d", GET_QINFO_ADDR(pQuery),
//               s, e, startOffset, size, blockStatus, functionId);
//    } else {
1567
//        qDebug("QInfo:%p block not loaded, bstatus:%d",
1568 1569 1570 1571 1572 1573
//        GET_QINFO_ADDR(pQuery), blockStatus);
//    }
#endif
}

// set the output buffer for the selectivity + tag query
H
Haojun Liao 已提交
1574
static int32_t setCtxTagColumnInfo(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx) {
H
Haojun Liao 已提交
1575 1576
  SQuery* pQuery = pRuntimeEnv->pQuery;

1577
  if (isSelectivityWithTagsQuery(pQuery)) {
1578
    int32_t num = 0;
1579
    int16_t tagLen = 0;
1580

1581
    SQLFunctionCtx *p = NULL;
1582
    SQLFunctionCtx **pTagCtx = calloc(pQuery->numOfOutput, POINTER_BYTES);
H
Haojun Liao 已提交
1583 1584 1585
    if (pTagCtx == NULL) {
      return TSDB_CODE_QRY_OUT_OF_MEMORY;
    }
H
Haojun Liao 已提交
1586

1587
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1588
      SSqlFuncMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].base;
1589

1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602
      if (pSqlFuncMsg->functionId == TSDB_FUNC_TAG_DUMMY || pSqlFuncMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        tagLen += pCtx[i].outputBytes;
        pTagCtx[num++] = &pCtx[i];
      } else if ((aAggs[pSqlFuncMsg->functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
        p = &pCtx[i];
      } else if (pSqlFuncMsg->functionId == TSDB_FUNC_TS || pSqlFuncMsg->functionId == TSDB_FUNC_TAG) {
        // tag function may be the group by tag column
        // ts may be the required primary timestamp column
        continue;
      } else {
        // the column may be the normal column, group by normal_column, the functionId is TSDB_FUNC_PRJ
      }
    }
dengyihao's avatar
dengyihao 已提交
1603 1604 1605 1606 1607
    if (p != NULL) {
      p->tagInfo.pTagCtxList = pTagCtx;
      p->tagInfo.numOfTagCols = num;
      p->tagInfo.tagsLen = tagLen;
    } else {
1608
      taosTFree(pTagCtx);
dengyihao's avatar
dengyihao 已提交
1609
    }
1610
  }
H
Haojun Liao 已提交
1611 1612

  return TSDB_CODE_SUCCESS;
1613 1614
}

H
Haojun Liao 已提交
1615 1616
static FORCE_INLINE void setWindowResultInfo(SResultInfo *pResultInfo, SQuery *pQuery, bool isStableQuery, char* buf) {
  char* p = buf;
1617
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1618 1619 1620 1621
    int32_t size = pQuery->pSelectExpr[i].interBytes;
    setResultInfoBuf(&pResultInfo[i], size, isStableQuery, p);

    p += size;
1622 1623 1624
  }
}

1625
static int32_t setupQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv, int16_t order) {
1626
  qDebug("QInfo:%p setup runtime env", GET_QINFO_ADDR(pRuntimeEnv));
1627 1628
  SQuery *pQuery = pRuntimeEnv->pQuery;

H
Haojun Liao 已提交
1629 1630 1631
  size_t size = pRuntimeEnv->interBufSize + pQuery->numOfOutput * sizeof(SResultInfo);

  pRuntimeEnv->resultInfo = calloc(1, size);
1632
  pRuntimeEnv->pCtx = (SQLFunctionCtx *)calloc(pQuery->numOfOutput, sizeof(SQLFunctionCtx));
1633

1634
  if (pRuntimeEnv->resultInfo == NULL || pRuntimeEnv->pCtx == NULL) {
1635
    goto _clean;
1636
  }
1637

1638
  pRuntimeEnv->offset[0] = 0;
1639
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1640
    SSqlFuncMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].base;
1641

1642
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1643
    SColIndex* pIndex = &pSqlFuncMsg->colInfo;
1644

Y
TD-1230  
yihaoDeng 已提交
1645
    if (TSDB_COL_REQ_NULL(pIndex->flag)) {
1646 1647
      pCtx->requireNull = true;
      pIndex->flag &= ~(TSDB_COL_NULL);
Y
TD-1230  
yihaoDeng 已提交
1648
    } else {
1649 1650
      pCtx->requireNull = false;
    }
Y
TD-1230  
yihaoDeng 已提交
1651

1652 1653
    int32_t index = pSqlFuncMsg->colInfo.colIndex;
    if (TSDB_COL_IS_TAG(pIndex->flag)) {
1654
      if (pIndex->colId == TSDB_TBNAME_COLUMN_INDEX) {  // todo refactor
H
Haojun Liao 已提交
1655 1656 1657 1658
        SSchema s = tGetTableNameColumnSchema();

        pCtx->inputBytes = s.bytes;
        pCtx->inputType = s.type;
1659 1660 1661 1662
      } else {
        pCtx->inputBytes = pQuery->tagColList[index].bytes;
        pCtx->inputType = pQuery->tagColList[index].type;
      }
H
Haojun Liao 已提交
1663 1664 1665
    } else if (TSDB_COL_IS_UD_COL(pIndex->flag)) {
      pCtx->inputBytes = pSqlFuncMsg->arg[0].argBytes;
      pCtx->inputType = pSqlFuncMsg->arg[0].argType;
1666 1667 1668 1669
    } else {
      pCtx->inputBytes = pQuery->colList[index].bytes;
      pCtx->inputType = pQuery->colList[index].type;
    }
1670

Y
TD-1230  
yihaoDeng 已提交
1671

1672
    assert(isValidDataType(pCtx->inputType));
1673
    pCtx->ptsOutputBuf = NULL;
1674

1675 1676
    pCtx->outputBytes = pQuery->pSelectExpr[i].bytes;
    pCtx->outputType = pQuery->pSelectExpr[i].type;
1677

1678 1679
    pCtx->order = pQuery->order.order;
    pCtx->functionId = pSqlFuncMsg->functionId;
1680

1681 1682 1683 1684 1685
    pCtx->numOfParams = pSqlFuncMsg->numOfParams;
    for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
      int16_t type = pSqlFuncMsg->arg[j].argType;
      int16_t bytes = pSqlFuncMsg->arg[j].argBytes;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
1686
        tVariantCreateFromBinary(&pCtx->param[j], pSqlFuncMsg->arg[j].argValue.pz, bytes, type);
1687 1688 1689 1690
      } else {
        tVariantCreateFromBinary(&pCtx->param[j], (char *)&pSqlFuncMsg->arg[j].argValue.i64, bytes, type);
      }
    }
1691

1692 1693
    // set the order information for top/bottom query
    int32_t functionId = pCtx->functionId;
1694

1695
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
1696
      int32_t f = pQuery->pSelectExpr[0].base.functionId;
1697
      assert(f == TSDB_FUNC_TS || f == TSDB_FUNC_TS_DUMMY);
1698

1699 1700 1701 1702
      pCtx->param[2].i64Key = order;
      pCtx->param[2].nType = TSDB_DATA_TYPE_BIGINT;
      pCtx->param[3].i64Key = functionId;
      pCtx->param[3].nType = TSDB_DATA_TYPE_BIGINT;
1703

1704 1705
      pCtx->param[1].i64Key = pQuery->order.orderColId;
    }
1706

1707 1708 1709 1710
    if (i > 0) {
      pRuntimeEnv->offset[i] = pRuntimeEnv->offset[i - 1] + pRuntimeEnv->pCtx[i - 1].outputBytes;
    }
  }
1711

H
Haojun Liao 已提交
1712
  char* buf = (char*) pRuntimeEnv->resultInfo + sizeof(SResultInfo) * pQuery->numOfOutput;
H
Haojun Liao 已提交
1713

1714
  // set the intermediate result output buffer
H
Haojun Liao 已提交
1715
  setWindowResultInfo(pRuntimeEnv->resultInfo, pQuery, pRuntimeEnv->stableQuery, buf);
1716

1717
  // if it is group by normal column, do not set output buffer, the output buffer is pResult
1718
  if (!pRuntimeEnv->groupbyNormalCol && !pRuntimeEnv->stableQuery) {
1719 1720
    resetCtxOutputBuf(pRuntimeEnv);
  }
1721

H
Haojun Liao 已提交
1722 1723 1724
  if (setCtxTagColumnInfo(pRuntimeEnv, pRuntimeEnv->pCtx) != TSDB_CODE_SUCCESS) {
    goto _clean;
  }
1725

H
Haojun Liao 已提交
1726
  qDebug("QInfo:%p init runtime completed", GET_QINFO_ADDR(pRuntimeEnv));
1727
  return TSDB_CODE_SUCCESS;
1728

1729
_clean:
S
Shengliang Guan 已提交
1730 1731
  taosTFree(pRuntimeEnv->resultInfo);
  taosTFree(pRuntimeEnv->pCtx);
1732

1733
  return TSDB_CODE_QRY_OUT_OF_MEMORY;
1734 1735 1736 1737 1738 1739
}

static void teardownQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv) {
  if (pRuntimeEnv->pQuery == NULL) {
    return;
  }
1740

1741
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1742
  SQInfo* pQInfo = (SQInfo*) GET_QINFO_ADDR(pRuntimeEnv);
1743

1744
  qDebug("QInfo:%p teardown runtime env", pQInfo);
H
Haojun Liao 已提交
1745
  cleanupTimeWindowInfo(&pRuntimeEnv->windowResInfo);
1746

1747
  if (pRuntimeEnv->pCtx != NULL) {
1748
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1749
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1750

1751 1752 1753
      for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
        tVariantDestroy(&pCtx->param[j]);
      }
1754

1755
      tVariantDestroy(&pCtx->tag);
S
Shengliang Guan 已提交
1756
      taosTFree(pCtx->tagInfo.pTagCtxList);
1757
    }
1758

S
Shengliang Guan 已提交
1759 1760
    taosTFree(pRuntimeEnv->resultInfo);
    taosTFree(pRuntimeEnv->pCtx);
1761
  }
1762

H
Haojun Liao 已提交
1763
  pRuntimeEnv->pFillInfo = taosDestoryFillInfo(pRuntimeEnv->pFillInfo);
1764

H
Haojun Liao 已提交
1765
  destroyResultBuf(pRuntimeEnv->pResultBuf);
1766
  tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
1767
  tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
1768

H
Haojun Liao 已提交
1769
  pRuntimeEnv->pTSBuf = tsBufDestroy(pRuntimeEnv->pTSBuf);
1770 1771
}

H
Haojun Liao 已提交
1772
#define IS_QUERY_KILLED(_q) ((_q)->code == TSDB_CODE_TSC_QUERY_CANCELLED)
1773

H
Haojun Liao 已提交
1774
static void setQueryKilled(SQInfo *pQInfo) { pQInfo->code = TSDB_CODE_TSC_QUERY_CANCELLED;}
H
hjxilinx 已提交
1775

H
Haojun Liao 已提交
1776 1777 1778
static bool isFixedOutputQuery(SQueryRuntimeEnv* pRuntimeEnv) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1779 1780
    return false;
  }
1781

1782
  // Note:top/bottom query is fixed output query
H
Haojun Liao 已提交
1783
  if (pRuntimeEnv->topBotQuery || pRuntimeEnv->groupbyNormalCol) {
1784 1785
    return true;
  }
1786

1787
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1788
    SSqlFuncMsg *pExprMsg = &pQuery->pSelectExpr[i].base;
1789

1790 1791
    // ignore the ts_comp function
    if (i == 0 && pExprMsg->functionId == TSDB_FUNC_PRJ && pExprMsg->numOfParams == 1 &&
1792
        pExprMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
1793 1794
      continue;
    }
1795

1796 1797 1798
    if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
1799

1800 1801 1802 1803
    if (!IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus)) {
      return true;
    }
  }
1804

1805 1806 1807
  return false;
}

1808
// todo refactor with isLastRowQuery
H
hjxilinx 已提交
1809
static bool isPointInterpoQuery(SQuery *pQuery) {
1810
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1811
    int32_t functionID = pQuery->pSelectExpr[i].base.functionId;
1812
    if (functionID == TSDB_FUNC_INTERP) {
1813 1814 1815
      return true;
    }
  }
1816

1817 1818 1819 1820
  return false;
}

// TODO REFACTOR:MERGE WITH CLIENT-SIDE FUNCTION
H
hjxilinx 已提交
1821
static bool isSumAvgRateQuery(SQuery *pQuery) {
1822
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1823
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1824 1825 1826
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
1827

1828 1829 1830 1831 1832
    if (functionId == TSDB_FUNC_SUM_RATE || functionId == TSDB_FUNC_SUM_IRATE || functionId == TSDB_FUNC_AVG_RATE ||
        functionId == TSDB_FUNC_AVG_IRATE) {
      return true;
    }
  }
1833

1834 1835 1836
  return false;
}

H
hjxilinx 已提交
1837
static bool isFirstLastRowQuery(SQuery *pQuery) {
1838
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1839
    int32_t functionID = pQuery->pSelectExpr[i].base.functionId;
1840 1841 1842 1843
    if (functionID == TSDB_FUNC_LAST_ROW) {
      return true;
    }
  }
1844

1845 1846 1847
  return false;
}

H
hjxilinx 已提交
1848
static bool needReverseScan(SQuery *pQuery) {
1849
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1850
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1851 1852 1853
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG) {
      continue;
    }
1854

1855
    if ((functionId == TSDB_FUNC_FIRST || functionId == TSDB_FUNC_FIRST_DST) && !QUERY_IS_ASC_QUERY(pQuery)) {
1856 1857
      return true;
    }
1858 1859

    if (functionId == TSDB_FUNC_LAST || functionId == TSDB_FUNC_LAST_DST) {
S
TD-1057  
Shengliang Guan 已提交
1860
      int32_t order = (int32_t)pQuery->pSelectExpr[i].base.arg->argValue.i64;
1861 1862
      return order != pQuery->order.order;
    }
1863
  }
1864

1865 1866
  return false;
}
H
hjxilinx 已提交
1867

H
Haojun Liao 已提交
1868 1869 1870 1871
/**
 * The following 4 kinds of query are treated as the tags query
 * tagprj, tid_tag query, count(tbname), 'abc' (user defined constant value column) query
 */
H
hjxilinx 已提交
1872 1873
static bool onlyQueryTags(SQuery* pQuery) {
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1874 1875 1876
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];

    int32_t functionId = pExprInfo->base.functionId;
H
Haojun Liao 已提交
1877 1878 1879 1880

    if (functionId != TSDB_FUNC_TAGPRJ &&
        functionId != TSDB_FUNC_TID_TAG &&
        (!(functionId == TSDB_FUNC_COUNT && pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX)) &&
Y
TD-1230  
yihaoDeng 已提交
1881
        (!(functionId == TSDB_FUNC_PRJ && TSDB_COL_IS_UD_COL(pExprInfo->base.colInfo.flag)))) {
H
hjxilinx 已提交
1882 1883 1884
      return false;
    }
  }
1885

H
hjxilinx 已提交
1886 1887 1888
  return true;
}

1889 1890
/////////////////////////////////////////////////////////////////////////////////////////////

H
Haojun Liao 已提交
1891
void getAlignQueryTimeWindow(SQuery *pQuery, int64_t key, int64_t keyFirst, int64_t keyLast, STimeWindow *win) {
1892 1893
  assert(key >= keyFirst && key <= keyLast && pQuery->interval.sliding <= pQuery->interval.interval);
  win->skey = taosTimeTruncate(key, &pQuery->interval, pQuery->precision);
H
Haojun Liao 已提交
1894 1895

  /*
1896
   * if the realSkey > INT64_MAX - pQuery->interval.interval, the query duration between
H
Haojun Liao 已提交
1897 1898
   * realSkey and realEkey must be less than one interval.Therefore, no need to adjust the query ranges.
   */
1899 1900
  if (keyFirst > (INT64_MAX - pQuery->interval.interval)) {
    assert(keyLast - keyFirst < pQuery->interval.interval);
1901
    win->ekey = INT64_MAX;
1902 1903
  } else if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
    win->ekey = taosTimeAdd(win->skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
H
Haojun Liao 已提交
1904
  } else {
1905
    win->ekey = win->skey + pQuery->interval.interval - 1;
1906 1907 1908 1909 1910
  }
}

static void setScanLimitationByResultBuffer(SQuery *pQuery) {
  if (isTopBottomQuery(pQuery)) {
1911
    pQuery->checkBuffer = 0;
1912
  } else if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
1913
    pQuery->checkBuffer = 0;
1914 1915
  } else {
    bool hasMultioutput = false;
1916
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1917
      SSqlFuncMsg *pExprMsg = &pQuery->pSelectExpr[i].base;
1918 1919 1920
      if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        continue;
      }
1921

1922 1923 1924 1925 1926
      hasMultioutput = IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus);
      if (!hasMultioutput) {
        break;
      }
    }
1927

1928
    pQuery->checkBuffer = hasMultioutput ? 1 : 0;
1929 1930 1931 1932 1933 1934
  }
}

/*
 * todo add more parameters to check soon..
 */
1935
bool colIdCheck(SQuery *pQuery) {
1936 1937
  // load data column information is incorrect
  for (int32_t i = 0; i < pQuery->numOfCols - 1; ++i) {
1938
    if (pQuery->colList[i].colId == pQuery->colList[i + 1].colId) {
S
slguan 已提交
1939
      qError("QInfo:%p invalid data load column for query", GET_QINFO_ADDR(pQuery));
1940 1941 1942
      return false;
    }
  }
1943

1944 1945 1946 1947 1948 1949
  return true;
}

// todo ignore the avg/sum/min/max/count/stddev/top/bottom functions, of which
// the scan order is not matter
static bool onlyOneQueryType(SQuery *pQuery, int32_t functId, int32_t functIdDst) {
1950
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1951
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1952

1953 1954 1955 1956
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG ||
        functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
1957

1958 1959 1960 1961
    if (functionId != functId && functionId != functIdDst) {
      return false;
    }
  }
1962

1963 1964 1965 1966 1967 1968 1969
  return true;
}

static bool onlyFirstQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_FIRST, TSDB_FUNC_FIRST_DST); }

static bool onlyLastQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_LAST, TSDB_FUNC_LAST_DST); }

H
Haojun Liao 已提交
1970
// todo refactor, add iterator
1971 1972
static void doExchangeTimeWindow(SQInfo* pQInfo, STimeWindow* win) {
  size_t t = taosArrayGetSize(pQInfo->tableGroupInfo.pGroupList);
H
Haojun Liao 已提交
1973
  for(int32_t i = 0; i < t; ++i) {
1974
    SArray* p1 = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, i);
H
Haojun Liao 已提交
1975 1976 1977

    size_t len = taosArrayGetSize(p1);
    for(int32_t j = 0; j < len; ++j) {
1978
      STableKeyInfo* pInfo = taosArrayGet(p1, j);
H
Haojun Liao 已提交
1979

1980 1981 1982 1983
      // update the new lastkey if it is equalled to the value of the old skey
      if (pInfo->lastKey == win->ekey) {
        pInfo->lastKey = win->skey;
      }
H
Haojun Liao 已提交
1984 1985 1986 1987
    }
  }
}

1988
static void changeExecuteScanOrder(SQInfo *pQInfo, SQueryTableMsg* pQueryMsg, bool stableQuery) {
H
Haojun Liao 已提交
1989 1990
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;

1991 1992 1993
  // in case of point-interpolation query, use asc order scan
  char msg[] = "QInfo:%p scan order changed for %s query, old:%d, new:%d, qrange exchanged, old qrange:%" PRId64
               "-%" PRId64 ", new qrange:%" PRId64 "-%" PRId64;
1994

1995 1996
  // todo handle the case the the order irrelevant query type mixed up with order critical query type
  // descending order query for last_row query
H
Haojun Liao 已提交
1997
  if (isFirstLastRowQuery(pQuery)) {
H
Haojun Liao 已提交
1998
    qDebug("QInfo:%p scan order changed for last_row query, old:%d, new:%d", pQInfo, pQuery->order.order, TSDB_ORDER_ASC);
1999

H
Haojun Liao 已提交
2000
    pQuery->order.order = TSDB_ORDER_ASC;
H
Haojun Liao 已提交
2001 2002 2003
    if (pQuery->window.skey > pQuery->window.ekey) {
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
2004

2005 2006
    return;
  }
2007

H
Haojun Liao 已提交
2008
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr) && pQuery->order.order == TSDB_ORDER_DESC) {
H
Haojun Liao 已提交
2009
    pQuery->order.order = TSDB_ORDER_ASC;
H
Haojun Liao 已提交
2010 2011 2012
    if (pQuery->window.skey > pQuery->window.ekey) {
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
H
Haojun Liao 已提交
2013

2014
    doExchangeTimeWindow(pQInfo, &pQuery->window);
H
Haojun Liao 已提交
2015 2016 2017
    return;
  }

2018
  if (isPointInterpoQuery(pQuery) && pQuery->interval.interval == 0) {
H
Haojun Liao 已提交
2019 2020 2021 2022 2023
    if (!QUERY_IS_ASC_QUERY(pQuery)) {
      qDebug(msg, GET_QINFO_ADDR(pQuery), "interp", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
             pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
2024

2025
    pQuery->order.order = TSDB_ORDER_ASC;
2026 2027
    return;
  }
2028

2029
  if (pQuery->interval.interval == 0) {
2030 2031
    if (onlyFirstQuery(pQuery)) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
2032
        qDebug(msg, GET_QINFO_ADDR(pQuery), "only-first", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
2033 2034
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2035
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2036
        doExchangeTimeWindow(pQInfo, &pQuery->window);
2037
      }
2038

2039
      pQuery->order.order = TSDB_ORDER_ASC;
2040 2041
    } else if (onlyLastQuery(pQuery)) {
      if (QUERY_IS_ASC_QUERY(pQuery)) {
2042
        qDebug(msg, GET_QINFO_ADDR(pQuery), "only-last", pQuery->order.order, TSDB_ORDER_DESC, pQuery->window.skey,
2043 2044
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2045
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2046
        doExchangeTimeWindow(pQInfo, &pQuery->window);
2047
      }
2048

2049
      pQuery->order.order = TSDB_ORDER_DESC;
2050
    }
2051

2052
  } else {  // interval query
2053
    if (stableQuery) {
2054 2055
      if (onlyFirstQuery(pQuery)) {
        if (!QUERY_IS_ASC_QUERY(pQuery)) {
2056
          qDebug(msg, GET_QINFO_ADDR(pQuery), "only-first stable", pQuery->order.order, TSDB_ORDER_ASC,
2057 2058
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2059
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2060
          doExchangeTimeWindow(pQInfo, &pQuery->window);
2061
        }
2062

2063
        pQuery->order.order = TSDB_ORDER_ASC;
2064 2065
      } else if (onlyLastQuery(pQuery)) {
        if (QUERY_IS_ASC_QUERY(pQuery)) {
2066
          qDebug(msg, GET_QINFO_ADDR(pQuery), "only-last stable", pQuery->order.order, TSDB_ORDER_DESC,
2067 2068
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2069
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2070
          doExchangeTimeWindow(pQInfo, &pQuery->window);
2071
        }
2072

2073
        pQuery->order.order = TSDB_ORDER_DESC;
2074 2075 2076 2077 2078 2079 2080 2081
      }
    }
  }
}

static int32_t getInitialPageNum(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  int32_t INITIAL_RESULT_ROWS_VALUE = 16;
2082

2083
  int32_t num = 0;
2084

2085 2086
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
    num = 128;
H
Haojun Liao 已提交
2087
  } else if (QUERY_IS_INTERVAL_QUERY(pQuery)) {  // time window query, allocate one page for each table
2088
    size_t s = pQInfo->tableqinfoGroupInfo.numOfTables;
S
TD-1057  
Shengliang Guan 已提交
2089
    num = (int32_t)(MAX(s, INITIAL_RESULT_ROWS_VALUE));
2090 2091
  } else {    // for super table query, one page for each subset
    num = 1;  // pQInfo->pSidSet->numOfSubSet;
2092
  }
2093

2094 2095 2096 2097
  assert(num > 0);
  return num;
}

2098 2099
static void getIntermediateBufInfo(SQueryRuntimeEnv* pRuntimeEnv, int32_t* ps, int32_t* rowsize) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
2100
  int32_t MIN_ROWS_PER_PAGE = 4;
2101

S
TD-1057  
Shengliang Guan 已提交
2102
  *rowsize = (int32_t)(pQuery->rowSize * GET_ROW_PARAM_FOR_MULTIOUTPUT(pQuery, pRuntimeEnv->topBotQuery, pRuntimeEnv->stableQuery));
2103 2104 2105 2106
  int32_t overhead = sizeof(tFilePage);

  // one page contains at least two rows
  *ps = DEFAULT_INTERN_BUF_PAGE_SIZE;
H
Haojun Liao 已提交
2107
  while(((*rowsize) * MIN_ROWS_PER_PAGE) > (*ps) - overhead) {
2108 2109 2110 2111
    *ps = (*ps << 1u);
  }

  pRuntimeEnv->numOfRowsPerPage = ((*ps) - sizeof(tFilePage)) / (*rowsize);
2112
  assert(pRuntimeEnv->numOfRowsPerPage <= MAX_ROWS_PER_RESBUF_PAGE);
2113 2114
}

H
Haojun Liao 已提交
2115
#define IS_PREFILTER_TYPE(_t) ((_t) != TSDB_DATA_TYPE_BINARY && (_t) != TSDB_DATA_TYPE_NCHAR)
2116

H
Haojun Liao 已提交
2117 2118 2119 2120
static bool needToLoadDataBlock(SQueryRuntimeEnv* pRuntimeEnv, SDataStatis *pDataStatis, SQLFunctionCtx *pCtx,
    int32_t numOfRows) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
  if (pDataStatis == NULL || (pQuery->numOfFilterCols == 0 && (!pRuntimeEnv->topBotQuery))) {
2121 2122 2123 2124 2125
    return true;
  }

  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
2126

H
Haojun Liao 已提交
2127 2128 2129 2130 2131 2132 2133 2134
    int32_t index = -1;
    for(int32_t i = 0; i < pQuery->numOfCols; ++i) {
      if (pDataStatis[i].colId == pFilterInfo->info.colId) {
        index = i;
        break;
      }
    }

2135
    // no statistics data, load the true data block
H
Haojun Liao 已提交
2136
    if (index == -1) {
H
Haojun Liao 已提交
2137
      return true;
2138
    }
2139

2140
    // not support pre-filter operation on binary/nchar data type
H
Haojun Liao 已提交
2141
    if (!IS_PREFILTER_TYPE(pFilterInfo->info.type)) {
H
Haojun Liao 已提交
2142
      return true;
2143
    }
2144

2145
    // all data in current column are NULL, no need to check its boundary value
H
Haojun Liao 已提交
2146
    if (pDataStatis[index].numOfNull == numOfRows) {
2147 2148 2149 2150 2151 2152 2153 2154 2155

      // if isNULL query exists, load the null data column
      for (int32_t j = 0; j < pFilterInfo->numOfFilters; ++j) {
        SColumnFilterElem *pFilterElem = &pFilterInfo->pFilters[j];
        if (pFilterElem->fp == isNull_filter) {
          return true;
        }
      }

2156 2157
      continue;
    }
2158

H
Haojun Liao 已提交
2159 2160 2161
    SDataStatis* pDataBlockst = &pDataStatis[index];

    if (pFilterInfo->info.type == TSDB_DATA_TYPE_FLOAT) {
S
TD-1057  
Shengliang Guan 已提交
2162 2163
      float minval = (float)(*(double *)(&pDataBlockst->min));
      float maxval = (float)(*(double *)(&pDataBlockst->max));
2164

2165 2166 2167 2168 2169 2170 2171
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&minval, (char *)&maxval)) {
          return true;
        }
      }
    } else {
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
H
Haojun Liao 已提交
2172
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&pDataBlockst->min, (char *)&pDataBlockst->max)) {
2173 2174 2175 2176 2177
          return true;
        }
      }
    }
  }
2178

H
Haojun Liao 已提交
2179 2180 2181 2182 2183 2184 2185 2186
  if (pRuntimeEnv->topBotQuery) {
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
      int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
      if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
        return topbot_datablock_filter(&pCtx[i], functionId, (char *)&pDataStatis[i].min, (char *)&pDataStatis[i].max);
      }
    }
  }
2187

H
Haojun Liao 已提交
2188
  return false;
2189 2190
}

H
Haojun Liao 已提交
2191 2192 2193 2194 2195 2196 2197 2198
static bool overlapWithTimeWindow(SQuery* pQuery, SDataBlockInfo* pBlockInfo) {
  STimeWindow w = {0};

  TSKEY sk = MIN(pQuery->window.skey, pQuery->window.ekey);
  TSKEY ek = MAX(pQuery->window.skey, pQuery->window.ekey);

  if (QUERY_IS_ASC_QUERY(pQuery)) {
    getAlignQueryTimeWindow(pQuery, pBlockInfo->window.skey, sk, ek, &w);
H
Haojun Liao 已提交
2199
    assert(w.ekey >= pBlockInfo->window.skey);
H
Haojun Liao 已提交
2200

H
Haojun Liao 已提交
2201
    if (w.ekey < pBlockInfo->window.ekey) {
H
Haojun Liao 已提交
2202 2203 2204 2205 2206
      return true;
    }

    while(1) {
      GET_NEXT_TIMEWINDOW(pQuery, &w);
H
Haojun Liao 已提交
2207
      if (w.skey > pBlockInfo->window.ekey) {
H
Haojun Liao 已提交
2208 2209 2210
        break;
      }

H
Haojun Liao 已提交
2211 2212
      assert(w.ekey > pBlockInfo->window.ekey);
      if (w.skey <= pBlockInfo->window.ekey && w.skey > pBlockInfo->window.skey) {
H
Haojun Liao 已提交
2213 2214 2215 2216 2217
        return true;
      }
    }
  } else {
    getAlignQueryTimeWindow(pQuery, pBlockInfo->window.ekey, sk, ek, &w);
H
Haojun Liao 已提交
2218 2219 2220
    assert(w.skey <= pBlockInfo->window.ekey);

    if (w.skey > pBlockInfo->window.skey) {
H
Haojun Liao 已提交
2221 2222 2223 2224 2225 2226 2227 2228 2229
      return true;
    }

    while(1) {
      GET_NEXT_TIMEWINDOW(pQuery, &w);
      if (w.ekey < pBlockInfo->window.skey) {
        break;
      }

H
Haojun Liao 已提交
2230 2231
      assert(w.skey < pBlockInfo->window.skey);
      if (w.ekey < pBlockInfo->window.ekey && w.ekey >= pBlockInfo->window.skey) {
H
Haojun Liao 已提交
2232 2233 2234 2235 2236 2237 2238 2239
        return true;
      }
    }
  }

  return false;
}

H
Haojun Liao 已提交
2240
int32_t loadDataBlockOnDemand(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo * pWindowResInfo, void* pQueryHandle, SDataBlockInfo* pBlockInfo, SDataStatis **pStatis, SArray** pDataBlock, uint32_t* status) {
2241
  SQuery *pQuery = pRuntimeEnv->pQuery;
2242

H
Haojun Liao 已提交
2243 2244
  *status = BLK_DATA_NO_NEEDED;

H
Haojun Liao 已提交
2245
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf > 0) {
2246
    *status = BLK_DATA_ALL_NEEDED;
H
Haojun Liao 已提交
2247
  } else { // check if this data block is required to load
H
Haojun Liao 已提交
2248

H
Haojun Liao 已提交
2249
    // Calculate all time windows that are overlapping or contain current data block.
2250
    // If current data block is contained by all possible time window, do not load current data block.
H
Haojun Liao 已提交
2251
    if (QUERY_IS_INTERVAL_QUERY(pQuery) && overlapWithTimeWindow(pQuery, pBlockInfo)) {
2252
      *status = BLK_DATA_ALL_NEEDED;
2253
    }
2254

2255
    if ((*status) != BLK_DATA_ALL_NEEDED) {
H
Haojun Liao 已提交
2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270
      // the pCtx[i] result is belonged to previous time window since the outputBuf has not been set yet,
      // the filter result may be incorrect. So in case of interval query, we need to set the correct time output buffer
      if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
        bool hasTimeWindow = false;
        bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);

        TSKEY k = QUERY_IS_ASC_QUERY(pQuery)? pBlockInfo->window.skey:pBlockInfo->window.ekey;

        STimeWindow win = getActiveTimeWindow(pWindowResInfo, k, pQuery);
        if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pBlockInfo->tid, &win, masterScan, &hasTimeWindow) !=
            TSDB_CODE_SUCCESS) {
          // todo handle error in set result for timewindow
        }
      }

H
Haojun Liao 已提交
2271 2272 2273 2274 2275
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        SSqlFuncMsg* pSqlFunc = &pQuery->pSelectExpr[i].base;

        int32_t functionId = pSqlFunc->functionId;
        int32_t colId = pSqlFunc->colInfo.colId;
2276 2277
        (*status) |= aAggs[functionId].dataReqFunc(&pRuntimeEnv->pCtx[i], pBlockInfo->window.skey, pBlockInfo->window.ekey, colId);
        if (((*status) & BLK_DATA_ALL_NEEDED) == BLK_DATA_ALL_NEEDED) {
H
Haojun Liao 已提交
2278 2279 2280
          break;
        }
      }
2281 2282
    }
  }
2283

2284
  if ((*status) == BLK_DATA_NO_NEEDED) {
H
Haojun Liao 已提交
2285 2286
    qDebug("QInfo:%p data block discard, brange:%"PRId64 "-%"PRId64", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
           pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
2287
    pRuntimeEnv->summary.discardBlocks += 1;
2288 2289 2290 2291
  } else if ((*status) == BLK_DATA_STATIS_NEEDED) {

    // this function never returns error?
    tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis);
2292

2293
    pRuntimeEnv->summary.loadBlockStatis += 1;
2294

2295
    if (*pStatis == NULL) { // data block statistics does not exist, load data block
H
Haojun Liao 已提交
2296
      *pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2297
      pRuntimeEnv->summary.totalCheckedRows += pBlockInfo->rows;
2298 2299
    }
  } else {
2300
    assert((*status) == BLK_DATA_ALL_NEEDED);
2301

2302
    // load the data block statistics to perform further filter
H
Haojun Liao 已提交
2303
    pRuntimeEnv->summary.loadBlockStatis += 1;
2304
    tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis);
2305

H
Haojun Liao 已提交
2306
    if (!needToLoadDataBlock(pRuntimeEnv, *pStatis, pRuntimeEnv->pCtx, pBlockInfo->rows)) {
2307 2308
      // current block has been discard due to filter applied
      pRuntimeEnv->summary.discardBlocks += 1;
H
Haojun Liao 已提交
2309 2310
      qDebug("QInfo:%p data block discard, brange:%"PRId64 "-%"PRId64", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
          pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
2311
      (*status) = BLK_DATA_DISCARD;
2312
    }
2313

2314
    pRuntimeEnv->summary.totalCheckedRows += pBlockInfo->rows;
H
Haojun Liao 已提交
2315
    pRuntimeEnv->summary.loadBlocks += 1;
H
Haojun Liao 已提交
2316
    *pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2317 2318 2319
    if (*pDataBlock == NULL) {
      return terrno;
    }
2320
  }
2321

H
Haojun Liao 已提交
2322
  return TSDB_CODE_SUCCESS;
2323 2324
}

H
hjxilinx 已提交
2325
int32_t binarySearchForKey(char *pValue, int num, TSKEY key, int order) {
2326
  int32_t midPos = -1;
H
Haojun Liao 已提交
2327
  int32_t numOfRows;
2328

2329 2330 2331
  if (num <= 0) {
    return -1;
  }
2332

2333
  assert(order == TSDB_ORDER_ASC || order == TSDB_ORDER_DESC);
2334 2335

  TSKEY * keyList = (TSKEY *)pValue;
2336
  int32_t firstPos = 0;
2337
  int32_t lastPos = num - 1;
2338

2339
  if (order == TSDB_ORDER_DESC) {
H
hjxilinx 已提交
2340 2341 2342 2343 2344
    // find the first position which is smaller than the key
    while (1) {
      if (key >= keyList[lastPos]) return lastPos;
      if (key == keyList[firstPos]) return firstPos;
      if (key < keyList[firstPos]) return firstPos - 1;
2345

H
Haojun Liao 已提交
2346 2347
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2348

H
hjxilinx 已提交
2349 2350 2351 2352 2353 2354 2355 2356
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
2357

H
hjxilinx 已提交
2358 2359 2360 2361 2362
  } else {
    // find the first position which is bigger than the key
    while (1) {
      if (key <= keyList[firstPos]) return firstPos;
      if (key == keyList[lastPos]) return lastPos;
2363

H
hjxilinx 已提交
2364 2365 2366 2367 2368 2369 2370
      if (key > keyList[lastPos]) {
        lastPos = lastPos + 1;
        if (lastPos >= num)
          return -1;
        else
          return lastPos;
      }
2371

H
Haojun Liao 已提交
2372 2373
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2374

H
hjxilinx 已提交
2375 2376 2377 2378 2379 2380 2381 2382 2383
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
  }
2384

H
hjxilinx 已提交
2385 2386 2387
  return midPos;
}

2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400
static void ensureOutputBufferSimple(SQueryRuntimeEnv* pRuntimeEnv, int32_t capacity) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  if (capacity < pQuery->rec.capacity) {
    return;
  }

  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    int32_t bytes = pQuery->pSelectExpr[i].bytes;
    assert(bytes > 0 && capacity > 0);

    char *tmp = realloc(pQuery->sdata[i], bytes * capacity + sizeof(tFilePage));
    if (tmp == NULL) {  // todo handle the oom
H
Haojun Liao 已提交
2401
      longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
2402 2403 2404 2405 2406 2407 2408 2409
    } else {
      pQuery->sdata[i] = (tFilePage *)tmp;
    }

    // set the pCtx output buffer position
    pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data;
  }

2410
  qDebug("QInfo:%p realloc output buffer to inc output buffer from: %" PRId64 " rows to:%d rows", GET_QINFO_ADDR(pRuntimeEnv),
2411 2412 2413 2414 2415
         pQuery->rec.capacity, capacity);

  pQuery->rec.capacity = capacity;
}

2416 2417 2418
static void ensureOutputBuffer(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pBlockInfo) {
  // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
  SQuery* pQuery = pRuntimeEnv->pQuery;
2419
  if (!QUERY_IS_INTERVAL_QUERY(pQuery) && !pRuntimeEnv->groupbyNormalCol && !isFixedOutputQuery(pRuntimeEnv) && !isTSCompQuery(pQuery)) {
2420
    SResultRec *pRec = &pQuery->rec;
2421

2422
    if (pQuery->rec.capacity - pQuery->rec.rows < pBlockInfo->rows) {
S
TD-1057  
Shengliang Guan 已提交
2423 2424
      int32_t remain = (int32_t)(pRec->capacity - pRec->rows);
      int32_t newSize = (int32_t)(pRec->capacity + (pBlockInfo->rows - remain));
2425

2426 2427
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        int32_t bytes = pQuery->pSelectExpr[i].bytes;
H
Haojun Liao 已提交
2428 2429
        assert(bytes > 0 && newSize > 0);

2430 2431
        char *tmp = realloc(pQuery->sdata[i], bytes * newSize + sizeof(tFilePage));
        if (tmp == NULL) {  // todo handle the oom
H
Haojun Liao 已提交
2432
          longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
2433
        } else {
2434
          memset(tmp + sizeof(tFilePage) + bytes * pRec->rows, 0, (size_t)((newSize - pRec->rows) * bytes));
2435 2436
          pQuery->sdata[i] = (tFilePage *)tmp;
        }
H
Haojun Liao 已提交
2437

2438 2439
        // set the pCtx output buffer position
        pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data + pRec->rows * bytes;
H
Haojun Liao 已提交
2440

2441 2442 2443 2444 2445
        int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
        if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
          pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
        }
      }
H
Haojun Liao 已提交
2446

2447
      qDebug("QInfo:%p realloc output buffer, new size: %d rows, old:%" PRId64 ", remain:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv),
2448
             newSize, pRec->capacity, newSize - pRec->rows);
2449

2450 2451 2452 2453 2454
      pRec->capacity = newSize;
    }
  }
}

2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475
static void doSetInitialTimewindow(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pBlockInfo) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  if (QUERY_IS_INTERVAL_QUERY(pQuery) && pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL) {
    STimeWindow w = TSWINDOW_INITIALIZER;
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;

    if (QUERY_IS_ASC_QUERY(pQuery)) {
      getAlignQueryTimeWindow(pQuery, pBlockInfo->window.skey, pBlockInfo->window.skey, pQuery->window.ekey, &w);
      pWindowResInfo->startTime = w.skey;
      pWindowResInfo->prevSKey = w.skey;
    } else {
      // the start position of the first time window in the endpoint that spreads beyond the queried last timestamp
      getAlignQueryTimeWindow(pQuery, pBlockInfo->window.ekey, pQuery->window.ekey, pBlockInfo->window.ekey, &w);

      pWindowResInfo->startTime = pQuery->window.skey;
      pWindowResInfo->prevSKey = w.skey;
    }
  }
}

2476 2477
static int64_t doScanAllDataBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
2478
  STableQueryInfo* pTableQueryInfo = pQuery->current;
H
Haojun Liao 已提交
2479
  SQueryCostInfo*  summary  = &pRuntimeEnv->summary;
2480

2481
  qDebug("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", lastkey:%" PRId64 ", order:%d",
H
hjxilinx 已提交
2482 2483
         GET_QINFO_ADDR(pRuntimeEnv), pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, pTableQueryInfo->lastKey,
         pQuery->order.order);
2484

2485
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
H
Haojun Liao 已提交
2486
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
2487

H
Haojun Liao 已提交
2488
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
2489
  while (tsdbNextDataBlock(pQueryHandle)) {
H
Haojun Liao 已提交
2490
    summary->totalBlocks += 1;
H
Haojun Liao 已提交
2491

H
Haojun Liao 已提交
2492
    if (IS_QUERY_KILLED(GET_QINFO_ADDR(pRuntimeEnv))) {
H
Haojun Liao 已提交
2493
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
2494
    }
2495

H
Haojun Liao 已提交
2496
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
2497
    doSetInitialTimewindow(pRuntimeEnv, &blockInfo);
2498

H
hjxilinx 已提交
2499
    // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
2500
    ensureOutputBuffer(pRuntimeEnv, &blockInfo);
2501

2502
    SDataStatis *pStatis = NULL;
2503 2504
    SArray *     pDataBlock = NULL;
    uint32_t     status = 0;
2505

H
Haojun Liao 已提交
2506
    int32_t ret = loadDataBlockOnDemand(pRuntimeEnv, &pRuntimeEnv->windowResInfo, pQueryHandle, &blockInfo, &pStatis, &pDataBlock, &status);
2507
    if (ret != TSDB_CODE_SUCCESS) {
2508 2509 2510
      break;
    }

2511 2512 2513 2514 2515 2516
    if (status == BLK_DATA_DISCARD) {
      pQuery->current->lastKey =
              QUERY_IS_ASC_QUERY(pQuery) ? blockInfo.window.ekey + step : blockInfo.window.skey + step;
      continue;
    }

H
Haojun Liao 已提交
2517 2518
    // query start position can not move into tableApplyFunctionsOnBlock due to limit/offset condition
    pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : blockInfo.rows - 1;
H
hjxilinx 已提交
2519
    int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, binarySearchForKey, pDataBlock);
2520

H
Haojun Liao 已提交
2521
    summary->totalRows += blockInfo.rows;
2522
    qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
2523
           blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes, pQuery->current->lastKey);
2524

2525 2526
    // while the output buffer is full or limit/offset is applied, query may be paused here
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL | QUERY_COMPLETED)) {
H
hjxilinx 已提交
2527
      break;
2528 2529
    }
  }
2530

H
Haojun Liao 已提交
2531 2532 2533 2534
  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }

H
hjxilinx 已提交
2535
  // if the result buffer is not full, set the query complete
2536 2537 2538
  if (!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
2539

H
Haojun Liao 已提交
2540
  if (QUERY_IS_INTERVAL_QUERY(pQuery) && IS_MASTER_SCAN(pRuntimeEnv)) {
H
hjxilinx 已提交
2541
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
2542
      closeAllTimeWindow(&pRuntimeEnv->windowResInfo);
H
hjxilinx 已提交
2543
      pRuntimeEnv->windowResInfo.curIndex = pRuntimeEnv->windowResInfo.size - 1;  // point to the last time window
2544 2545 2546 2547
    } else {
      assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
    }
  }
2548

2549
  return 0;
2550 2551 2552 2553 2554 2555
}

/*
 * set tag value in SQLFunctionCtx
 * e.g.,tag information into input buffer
 */
2556
static void doSetTagValueInParam(void *tsdb, void* pTable, int32_t tagColId, tVariant *tag, int16_t type, int16_t bytes) {
H
[td-90]  
Haojun Liao 已提交
2557
  tVariantDestroy(tag);
2558

2559
  if (tagColId == TSDB_TBNAME_COLUMN_INDEX) {
2560
    char* val = tsdbGetTableName(pTable);
H
[td-90]  
Haojun Liao 已提交
2561
    assert(val != NULL);
2562

H
[td-90]  
Haojun Liao 已提交
2563
    tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), TSDB_DATA_TYPE_BINARY);
2564
  } else {
2565
    char* val = tsdbGetTableTagVal(pTable, tagColId, type, bytes);
H
[td-90]  
Haojun Liao 已提交
2566 2567 2568 2569
    if (val == NULL) {
      tag->nType = TSDB_DATA_TYPE_NULL;
      return;
    }
2570

H
hjxilinx 已提交
2571
    if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
H
Hongze Cheng 已提交
2572
      if (isNull(val, type)) {
H
Haojun Liao 已提交
2573 2574 2575 2576
        tag->nType = TSDB_DATA_TYPE_NULL;
        return;
      }

H
[td-90]  
Haojun Liao 已提交
2577
      tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), type);
H
hjxilinx 已提交
2578
    } else {
H
Haojun Liao 已提交
2579 2580 2581 2582 2583
      if (isNull(val, type)) {
        tag->nType = TSDB_DATA_TYPE_NULL;
        return;
      }

H
[td-90]  
Haojun Liao 已提交
2584
      tVariantCreateFromBinary(tag, val, bytes, type);
H
hjxilinx 已提交
2585
    }
2586
  }
2587 2588
}

2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600
static SColumnInfo* doGetTagColumnInfoById(SColumnInfo* pTagColList, int32_t numOfTags, int16_t colId) {
  assert(pTagColList != NULL && numOfTags > 0);

  for(int32_t i = 0; i < numOfTags; ++i) {
    if (pTagColList[i].colId == colId) {
      return &pTagColList[i];
    }
  }

  return NULL;
}

2601
void setTagVal(SQueryRuntimeEnv *pRuntimeEnv, void *pTable, void *tsdb) {
2602
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
2603
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
2604

H
[td-90]  
Haojun Liao 已提交
2605 2606 2607
  SExprInfo *pExprInfo = &pQuery->pSelectExpr[0];
  if (pQuery->numOfOutput == 1 && pExprInfo->base.functionId == TSDB_FUNC_TS_COMP) {
    assert(pExprInfo->base.numOfParams == 1);
H
Haojun Liao 已提交
2608

S
TD-1057  
Shengliang Guan 已提交
2609
    int16_t tagColId = (int16_t)pExprInfo->base.arg->argValue.i64;
2610
    SColumnInfo* pColInfo = doGetTagColumnInfoById(pQuery->tagColList, pQuery->numOfTags, tagColId);
H
Haojun Liao 已提交
2611

2612
    doSetTagValueInParam(tsdb, pTable, tagColId, &pRuntimeEnv->pCtx[0].tag, pColInfo->type, pColInfo->bytes);
2613 2614
  } else {
    // set tag value, by which the results are aggregated.
2615
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
H
Haojun Liao 已提交
2616
      SExprInfo* pLocalExprInfo = &pQuery->pSelectExpr[idx];
2617

2618
      // ts_comp column required the tag value for join filter
H
Haojun Liao 已提交
2619
      if (!TSDB_COL_IS_TAG(pLocalExprInfo->base.colInfo.flag)) {
2620 2621
        continue;
      }
2622

2623
      // todo use tag column index to optimize performance
2624
      doSetTagValueInParam(tsdb, pTable, pLocalExprInfo->base.colInfo.colId, &pRuntimeEnv->pCtx[idx].tag,
H
Haojun Liao 已提交
2625
                           pLocalExprInfo->type, pLocalExprInfo->bytes);
2626
    }
2627

2628
    // set the join tag for first column
H
[td-90]  
Haojun Liao 已提交
2629
    SSqlFuncMsg *pFuncMsg = &pExprInfo->base;
2630 2631
    if ((pFuncMsg->functionId == TSDB_FUNC_TS || pFuncMsg->functionId == TSDB_FUNC_PRJ) && pRuntimeEnv->pTSBuf != NULL &&
        pFuncMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
2632
      assert(pFuncMsg->numOfParams == 1);
H
Haojun Liao 已提交
2633

S
TD-1057  
Shengliang Guan 已提交
2634
      int16_t tagColId = (int16_t)pExprInfo->base.arg->argValue.i64;
2635
      SColumnInfo* pColInfo = doGetTagColumnInfoById(pQuery->tagColList, pQuery->numOfTags, tagColId);
H
Haojun Liao 已提交
2636

2637
      doSetTagValueInParam(tsdb, pTable, tagColId, &pRuntimeEnv->pCtx[0].tag, pColInfo->type, pColInfo->bytes);
H
Haojun Liao 已提交
2638 2639 2640 2641

    if (pRuntimeEnv->pCtx[0].tag.nType == TSDB_DATA_TYPE_BINARY || pRuntimeEnv->pCtx[0].tag.nType == TSDB_DATA_TYPE_NCHAR) {}
      qDebug("QInfo:%p set tag value for join comparison, colId:%" PRId64 ", val:%s", pQInfo, pExprInfo->base.arg->argValue.i64,
             pRuntimeEnv->pCtx[0].tag.pz);
H
Haojun Liao 已提交
2642 2643 2644
    } else {
      qDebug("QInfo:%p set tag value for join comparison, colId:%" PRId64 ", val:%"PRId64, pQInfo, pExprInfo->base.arg->argValue.i64,
          pRuntimeEnv->pCtx[0].tag.i64Key);
2645 2646 2647 2648 2649 2650 2651
    }
  }
}

static void doMerge(SQueryRuntimeEnv *pRuntimeEnv, int64_t timestamp, SWindowResult *pWindowRes, bool mergeFlag) {
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
2652

H
Haojun Liao 已提交
2653 2654
  tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes->pos.pageId);

2655
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2656
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2657 2658 2659
    if (!mergeFlag) {
      pCtx[i].aOutputBuf = pCtx[i].aOutputBuf + pCtx[i].outputBytes;
      pCtx[i].currentStage = FIRST_STAGE_MERGE;
2660

2661
      RESET_RESULT_INFO(pCtx[i].resultInfo);
2662 2663
      aAggs[functionId].init(&pCtx[i]);
    }
2664

2665 2666
    pCtx[i].hasNull = true;
    pCtx[i].nStartQueryTimestamp = timestamp;
H
Haojun Liao 已提交
2667
    pCtx[i].aInputElemBuf = getPosInResultPage(pRuntimeEnv, i, pWindowRes, page);
2668

2669 2670 2671
    // in case of tag column, the tag information should be extracted from input buffer
    if (functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TAG) {
      tVariantDestroy(&pCtx[i].tag);
2672

2673 2674 2675 2676 2677 2678
      int32_t type = pCtx[i].outputType;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        tVariantCreateFromBinary(&pCtx[i].tag, varDataVal(pCtx[i].aInputElemBuf), varDataLen(pCtx[i].aInputElemBuf), type);
      } else {
        tVariantCreateFromBinary(&pCtx[i].tag, pCtx[i].aInputElemBuf, pCtx[i].inputBytes, pCtx[i].inputType);
      }
2679

2680 2681
    }
  }
2682

2683
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2684
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2685 2686 2687
    if (functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
2688

2689 2690 2691 2692
    aAggs[functionId].distMergeFunc(&pCtx[i]);
  }
}

2693
static UNUSED_FUNC void printBinaryData(int32_t functionId, char *data, int32_t srcDataType) {
2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_LAST_DST) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_BINARY:
        printf("%" PRId64 ",%s\t", *(TSKEY *)data, (data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int8_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int16_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 ",%" PRId64 "\t", *(TSKEY *)data, *(TSKEY *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int32_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%" PRId64 ",%f\t", *(TSKEY *)data, *(float *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%" PRId64 ",%lf\t", *(TSKEY *)data, *(double *)(data + TSDB_KEYSIZE + 1));
        break;
    }
  } else if (functionId == TSDB_FUNC_AVG) {
    printf("%lf,%d\t", *(double *)data, *(int32_t *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_SPREAD) {
    printf("%lf,%lf\t", *(double *)data, *(double *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_TWA) {
    data += 1;
    printf("%lf,%" PRId64 ",%" PRId64 ",%" PRId64 "\t", *(double *)data, *(int64_t *)(data + 8),
           *(int64_t *)(data + 16), *(int64_t *)(data + 24));
  } else if (functionId == TSDB_FUNC_MIN || functionId == TSDB_FUNC_MAX) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%d\t", *(int8_t *)data);
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%d\t", *(int16_t *)data);
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 "\t", *(int64_t *)data);
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%d\t", *(int *)data);
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%f\t", *(float *)data);
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%f\t", *(float *)data);
        break;
    }
  } else if (functionId == TSDB_FUNC_SUM) {
    if (srcDataType == TSDB_DATA_TYPE_FLOAT || srcDataType == TSDB_DATA_TYPE_DOUBLE) {
      printf("%lf\t", *(float *)data);
    } else {
      printf("%" PRId64 "\t", *(int64_t *)data);
    }
  } else {
    printf("%s\t", data);
  }
}

2762
void UNUSED_FUNC displayInterResult(tFilePage **pdata, SQueryRuntimeEnv* pRuntimeEnv, int32_t numOfRows) {
2763
  SQuery* pQuery = pRuntimeEnv->pQuery;
2764
  int32_t numOfCols = pQuery->numOfOutput;
2765
  printf("super table query intermediate result, total:%d\n", numOfRows);
2766

2767 2768
  for (int32_t j = 0; j < numOfRows; ++j) {
    for (int32_t i = 0; i < numOfCols; ++i) {
2769

2770
      switch (pQuery->pSelectExpr[i].type) {
2771
        case TSDB_DATA_TYPE_BINARY: {
2772
          int32_t type = pQuery->pSelectExpr[i].type;
2773
          printBinaryData(pQuery->pSelectExpr[i].base.functionId, pdata[i]->data + pQuery->pSelectExpr[i].bytes * j,
2774 2775 2776 2777 2778
                          type);
          break;
        }
        case TSDB_DATA_TYPE_TIMESTAMP:
        case TSDB_DATA_TYPE_BIGINT:
2779
          printf("%" PRId64 "\t", *(int64_t *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2780 2781
          break;
        case TSDB_DATA_TYPE_INT:
2782
          printf("%d\t", *(int32_t *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2783 2784
          break;
        case TSDB_DATA_TYPE_FLOAT:
2785
          printf("%f\t", *(float *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2786 2787
          break;
        case TSDB_DATA_TYPE_DOUBLE:
2788
          printf("%lf\t", *(double *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2789 2790 2791 2792 2793 2794 2795 2796
          break;
      }
    }
    printf("\n");
  }
}

typedef struct SCompSupporter {
H
hjxilinx 已提交
2797 2798 2799
  STableQueryInfo **pTableQueryInfo;
  int32_t *         position;
  SQInfo *          pQInfo;
2800 2801 2802 2803 2804
} SCompSupporter;

int32_t tableResultComparFn(const void *pLeft, const void *pRight, void *param) {
  int32_t left = *(int32_t *)pLeft;
  int32_t right = *(int32_t *)pRight;
2805

2806 2807
  SCompSupporter *  supporter = (SCompSupporter *)param;
  SQueryRuntimeEnv *pRuntimeEnv = &supporter->pQInfo->runtimeEnv;
2808

2809 2810
  int32_t leftPos = supporter->position[left];
  int32_t rightPos = supporter->position[right];
2811

2812 2813 2814 2815
  /* left source is exhausted */
  if (leftPos == -1) {
    return 1;
  }
2816

2817 2818 2819 2820
  /* right source is exhausted*/
  if (rightPos == -1) {
    return -1;
  }
2821

H
hjxilinx 已提交
2822
  SWindowResInfo *pWindowResInfo1 = &supporter->pTableQueryInfo[left]->windowResInfo;
2823
  SWindowResult * pWindowRes1 = getWindowResult(pWindowResInfo1, leftPos);
H
Haojun Liao 已提交
2824
  tFilePage *page1 = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes1->pos.pageId);
2825

H
Haojun Liao 已提交
2826
  char *b1 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes1, page1);
2827
  TSKEY leftTimestamp = GET_INT64_VAL(b1);
2828

H
hjxilinx 已提交
2829
  SWindowResInfo *pWindowResInfo2 = &supporter->pTableQueryInfo[right]->windowResInfo;
2830
  SWindowResult * pWindowRes2 = getWindowResult(pWindowResInfo2, rightPos);
H
Haojun Liao 已提交
2831
  tFilePage *page2 = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes2->pos.pageId);
2832

H
Haojun Liao 已提交
2833
  char *b2 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes2, page2);
2834
  TSKEY rightTimestamp = GET_INT64_VAL(b2);
2835

2836 2837 2838
  if (leftTimestamp == rightTimestamp) {
    return 0;
  }
2839

2840 2841 2842
  return leftTimestamp > rightTimestamp ? 1 : -1;
}

2843
int32_t mergeIntoGroupResult(SQInfo *pQInfo) {
H
Haojun Liao 已提交
2844
  int64_t st = taosGetTimestampUs();
2845
  int32_t ret = TSDB_CODE_SUCCESS;
2846

S
TD-1057  
Shengliang Guan 已提交
2847
  int32_t numOfGroups = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
2848

2849
  while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
2850
    SArray *group = GET_TABLEGROUP(pQInfo, pQInfo->groupIndex);
2851
    ret = mergeIntoGroupResultImpl(pQInfo, group);
2852 2853 2854 2855
    if (ret < 0) {  // not enough disk space to save the data into disk
      return -1;
    }

2856
    pQInfo->groupIndex += 1;
2857 2858

    // this group generates at least one result, return results
2859 2860 2861
    if (ret > 0) {
      break;
    }
2862

H
Haojun Liao 已提交
2863
    assert(pQInfo->groupResInfo.numOfDataPages == 0);
2864
    qDebug("QInfo:%p no result in group %d, continue", pQInfo, pQInfo->groupIndex - 1);
2865
  }
2866

H
Haojun Liao 已提交
2867 2868
  SGroupResInfo* info = &pQInfo->groupResInfo;
  if (pQInfo->groupIndex == numOfGroups && info->pos.pageId == info->numOfDataPages) {
H
Haojun Liao 已提交
2869 2870 2871
    SET_STABLE_QUERY_OVER(pQInfo);
  }

H
Haojun Liao 已提交
2872 2873 2874
  int64_t elapsedTime = taosGetTimestampUs() - st;
  qDebug("QInfo:%p merge res data into group, index:%d, total group:%d, elapsed time:%" PRId64 "us", pQInfo,
         pQInfo->groupIndex - 1, numOfGroups, elapsedTime);
2875

H
Haojun Liao 已提交
2876
  pQInfo->runtimeEnv.summary.firstStageMergeTime += elapsedTime;
2877 2878 2879 2880
  return TSDB_CODE_SUCCESS;
}

void copyResToQueryResultBuf(SQInfo *pQInfo, SQuery *pQuery) {
H
Haojun Liao 已提交
2881 2882 2883 2884 2885
  SGroupResInfo* pGroupResInfo = &pQInfo->groupResInfo;

  // all results have been return to client, try next group
  if (pGroupResInfo->pos.pageId == pGroupResInfo->numOfDataPages) {
    pGroupResInfo->numOfDataPages = 0;
H
Haojun Liao 已提交
2886
    pGroupResInfo->pos.pageId = 0;
H
Haojun Liao 已提交
2887
    pGroupResInfo->pos.rowId = 0;
2888

2889
    // current results of group has been sent to client, try next group
2890
    if (mergeIntoGroupResult(pQInfo) != TSDB_CODE_SUCCESS) {
2891 2892
      return;  // failed to save data in the disk
    }
2893

2894
    // check if all results has been sent to client
S
TD-1057  
Shengliang Guan 已提交
2895
    int32_t numOfGroup = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
H
Haojun Liao 已提交
2896
    if (pGroupResInfo->numOfDataPages == 0 && pQInfo->groupIndex == numOfGroup) {
H
Haojun Liao 已提交
2897
      SET_STABLE_QUERY_OVER(pQInfo);
2898 2899
      return;
    }
2900
  }
2901 2902

  SQueryRuntimeEnv *   pRuntimeEnv = &pQInfo->runtimeEnv;
2903
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
2904

H
Haojun Liao 已提交
2905 2906
  int32_t id = pQInfo->groupResInfo.groupId;
  SIDList list = getDataBufPagesIdList(pResultBuf, id);
2907

2908
  int32_t offset = 0;
H
Haojun Liao 已提交
2909 2910 2911 2912 2913 2914 2915
  int32_t numOfCopiedRows = 0;

  size_t size = taosArrayGetSize(list);
  assert(size == pGroupResInfo->numOfDataPages);

  bool done = false;
  for (int32_t j = pGroupResInfo->pos.pageId; j < size; ++j) {
H
Haojun Liao 已提交
2916
    SPageInfo* pi = *(SPageInfo**) taosArrayGet(list, j);
H
Haojun Liao 已提交
2917 2918 2919
    tFilePage* pData = getResBufPage(pResultBuf, pi->pageId);

    assert(pData->num > 0 && pData->num <= pRuntimeEnv->numOfRowsPerPage && pGroupResInfo->pos.rowId < pData->num);
2920
    int32_t numOfRes = (int32_t)(pData->num - pGroupResInfo->pos.rowId);
H
Haojun Liao 已提交
2921 2922

    if (numOfRes > pQuery->rec.capacity - offset) {
2923
      numOfCopiedRows = (int32_t)(pQuery->rec.capacity - offset);
H
Haojun Liao 已提交
2924 2925 2926
      pGroupResInfo->pos.rowId += numOfCopiedRows;
      done = true;
    } else {
2927
      numOfCopiedRows = (int32_t)pData->num;
H
Haojun Liao 已提交
2928 2929 2930 2931

      pGroupResInfo->pos.pageId += 1;
      pGroupResInfo->pos.rowId = 0;
    }
2932

2933
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2934
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
2935
      char *  pDest = pQuery->sdata[i]->data;
2936

H
Haojun Liao 已提交
2937 2938
      memcpy(pDest + offset * bytes, pData->data + pRuntimeEnv->offset[i] * pRuntimeEnv->numOfRowsPerPage,
          (size_t)bytes * numOfCopiedRows);
2939
    }
2940

H
Haojun Liao 已提交
2941 2942 2943 2944
    offset += numOfCopiedRows;
    if (done) {
      break;
    }
2945
  }
2946

2947
  assert(pQuery->rec.rows == 0);
H
Haojun Liao 已提交
2948
  pQuery->rec.rows += offset;
2949 2950
}

H
Haojun Liao 已提交
2951
int64_t getNumOfResultWindowRes(SQuery *pQuery, SWindowResult *pWindowRes) {
2952
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
2953
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
2954

2955 2956 2957 2958 2959 2960 2961
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
      continue;
    }
2962

2963
    SResultInfo *pResultInfo = &pWindowRes->resultInfo[j];
H
Haojun Liao 已提交
2964
    assert(pResultInfo != NULL);
2965

H
Haojun Liao 已提交
2966 2967
    if (pResultInfo->numOfRes > 0) {
      return pResultInfo->numOfRes;
2968 2969
    }
  }
2970

H
Haojun Liao 已提交
2971
  return 0;
2972 2973
}

2974
int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *pGroup) {
2975
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
2976
  SQuery *          pQuery = pRuntimeEnv->pQuery;
2977

2978
  size_t size = taosArrayGetSize(pGroup);
2979
  tFilePage **buffer = pQuery->sdata;
2980

H
Haojun Liao 已提交
2981
  int32_t *posList = calloc(size, sizeof(int32_t));
H
hjxilinx 已提交
2982
  STableQueryInfo **pTableList = malloc(POINTER_BYTES * size);
2983

2984
  if (pTableList == NULL || posList == NULL) {
S
Shengliang Guan 已提交
2985 2986
    taosTFree(posList);
    taosTFree(pTableList);
2987 2988

    qError("QInfo:%p failed alloc memory", pQInfo);
H
Haojun Liao 已提交
2989
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
2990 2991
  }

2992
  // todo opt for the case of one table per group
2993
  int32_t numOfTables = 0;
H
Haojun Liao 已提交
2994 2995 2996
  SIDList pageList = NULL;
  int32_t tid = -1;

2997
  for (int32_t i = 0; i < size; ++i) {
2998
    STableQueryInfo *item = taosArrayGetP(pGroup, i);
2999

H
Haojun Liao 已提交
3000
    SIDList list = getDataBufPagesIdList(pRuntimeEnv->pResultBuf, TSDB_TABLEID(item->pTable)->tid);
H
Haojun Liao 已提交
3001
    if (taosArrayGetSize(list) > 0 && item->windowResInfo.size > 0) {
H
Haojun Liao 已提交
3002
      pTableList[numOfTables++] = item;
3003 3004
      tid = TSDB_TABLEID(item->pTable)->tid;
      pageList = list;
3005 3006
    }
  }
3007

H
Haojun Liao 已提交
3008
  // there is no data in current group
3009
  if (numOfTables == 0) {
S
Shengliang Guan 已提交
3010 3011
    taosTFree(posList);
    taosTFree(pTableList);
3012
    return 0;
H
Haojun Liao 已提交
3013
  } else if (numOfTables == 1) { // no need to merge results since only one table in each group
H
Haojun Liao 已提交
3014 3015 3016 3017 3018
    taosTFree(posList);
    taosTFree(pTableList);

    SGroupResInfo* pGroupResInfo = &pQInfo->groupResInfo;

3019
    pGroupResInfo->numOfDataPages = (int32_t)taosArrayGetSize(pageList);
H
Haojun Liao 已提交
3020 3021 3022 3023 3024
    pGroupResInfo->groupId = tid;
    pGroupResInfo->pos.pageId = 0;
    pGroupResInfo->pos.rowId = 0;

    return pGroupResInfo->numOfDataPages;
3025
  }
3026

3027
  SCompSupporter cs = {pTableList, posList, pQInfo};
3028

3029
  SLoserTreeInfo *pTree = NULL;
3030
  tLoserTreeCreate(&pTree, numOfTables, &cs, tableResultComparFn);
3031

3032
  SResultInfo *pResultInfo = calloc(pQuery->numOfOutput, sizeof(SResultInfo));
H
Haojun Liao 已提交
3033 3034 3035 3036
  if (pResultInfo == NULL) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }

H
Haojun Liao 已提交
3037
  char* buf = calloc(1, pRuntimeEnv->interBufSize);
H
Haojun Liao 已提交
3038 3039 3040 3041
  if (buf == NULL) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }

H
Haojun Liao 已提交
3042
  setWindowResultInfo(pResultInfo, pQuery, pRuntimeEnv->stableQuery, buf);
3043
  resetMergeResultBuf(pQuery, pRuntimeEnv->pCtx, pResultInfo);
3044

H
Haojun Liao 已提交
3045 3046
  pQInfo->groupResInfo.groupId = getGroupResultId(pQInfo->groupIndex);

H
Haojun Liao 已提交
3047
  // todo add windowRes iterator
3048 3049
  int64_t lastTimestamp = -1;
  int64_t startt = taosGetTimestampMs();
3050

3051
  while (1) {
3052 3053
    if (IS_QUERY_KILLED(pQInfo)) {
      qDebug("QInfo:%p it is already killed, abort", pQInfo);
H
Haojun Liao 已提交
3054 3055 3056 3057 3058 3059 3060

      taosTFree(pTableList);
      taosTFree(posList);
      taosTFree(pTree);
      taosTFree(pResultInfo);
      taosTFree(buf);

3061 3062 3063
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
    }

3064
    int32_t pos = pTree->pNode[0].index;
3065

H
hjxilinx 已提交
3066
    SWindowResInfo *pWindowResInfo = &pTableList[pos]->windowResInfo;
H
Haojun Liao 已提交
3067
    SWindowResult  *pWindowRes = getWindowResult(pWindowResInfo, cs.position[pos]);
H
Haojun Liao 已提交
3068
    tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes->pos.pageId);
3069

H
Haojun Liao 已提交
3070
    char *b = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes, page);
3071
    TSKEY ts = GET_INT64_VAL(b);
3072

3073
    assert(ts == pWindowRes->win.skey);
H
Haojun Liao 已提交
3074
    int64_t num = getNumOfResultWindowRes(pQuery, pWindowRes);
3075 3076
    if (num <= 0) {
      cs.position[pos] += 1;
3077

3078 3079
      if (cs.position[pos] >= pWindowResInfo->size) {
        cs.position[pos] = -1;
3080

3081
        // all input sources are exhausted
3082
        if (--numOfTables == 0) {
3083 3084 3085 3086 3087 3088 3089
          break;
        }
      }
    } else {
      if (ts == lastTimestamp) {  // merge with the last one
        doMerge(pRuntimeEnv, ts, pWindowRes, true);
      } else {  // copy data to disk buffer
3090
        if (buffer[0]->num == pQuery->rec.capacity) {
H
Haojun Liao 已提交
3091
          if (flushFromResultBuf(pRuntimeEnv, &pQInfo->groupResInfo) != TSDB_CODE_SUCCESS) {
3092 3093
            return -1;
          }
3094

3095 3096
          resetMergeResultBuf(pQuery, pRuntimeEnv->pCtx, pResultInfo);
        }
3097

3098
        doMerge(pRuntimeEnv, ts, pWindowRes, false);
3099
        buffer[0]->num += 1;
3100
      }
3101

3102
      lastTimestamp = ts;
3103

H
Haojun Liao 已提交
3104 3105 3106
      // move to the next element of current entry
      int32_t currentPageId = pWindowRes->pos.pageId;

3107 3108 3109
      cs.position[pos] += 1;
      if (cs.position[pos] >= pWindowResInfo->size) {
        cs.position[pos] = -1;
3110

3111
        // all input sources are exhausted
3112
        if (--numOfTables == 0) {
3113 3114
          break;
        }
H
Haojun Liao 已提交
3115 3116 3117 3118 3119 3120
      } else {
        // current page is not needed anymore
        SWindowResult  *pNextWindowRes = getWindowResult(pWindowResInfo, cs.position[pos]);
        if (pNextWindowRes->pos.pageId != currentPageId) {
          releaseResBufPage(pRuntimeEnv->pResultBuf, page);
        }
3121 3122
      }
    }
3123

3124 3125
    tLoserTreeAdjust(pTree, pos + pTree->numOfEntries);
  }
3126

3127
  if (buffer[0]->num != 0) {  // there are data in buffer
H
Haojun Liao 已提交
3128
    if (flushFromResultBuf(pRuntimeEnv, &pQInfo->groupResInfo) != TSDB_CODE_SUCCESS) {
S
slguan 已提交
3129
      qError("QInfo:%p failed to flush data into temp file, abort query", pQInfo);
3130

S
Shengliang Guan 已提交
3131 3132 3133 3134
      taosTFree(pTree);
      taosTFree(pTableList);
      taosTFree(posList);
      taosTFree(pResultInfo);
3135

3136 3137 3138
      return -1;
    }
  }
3139

3140 3141 3142
  int64_t endt = taosGetTimestampMs();

#ifdef _DEBUG_VIEW
3143
  displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
3144
#endif
3145

3146
  qDebug("QInfo:%p result merge completed for group:%d, elapsed time:%" PRId64 " ms", pQInfo, pQInfo->groupIndex, endt - startt);
3147

S
Shengliang Guan 已提交
3148 3149 3150
  taosTFree(pTableList);
  taosTFree(posList);
  taosTFree(pTree);
3151

S
Shengliang Guan 已提交
3152 3153
  taosTFree(pResultInfo);
  taosTFree(buf);
H
Haojun Liao 已提交
3154 3155

  return pQInfo->groupResInfo.numOfDataPages;
3156 3157
}

H
Haojun Liao 已提交
3158 3159
int32_t flushFromResultBuf(SQueryRuntimeEnv* pRuntimeEnv, SGroupResInfo* pGroupResInfo) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3160

3161
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
3162

3163 3164
  // the base value for group result, since the maximum number of table for each vnode will not exceed 100,000.
  int32_t pageId = -1;
3165
  int32_t capacity = pResultBuf->numOfRowsPerPage;
3166

H
Haojun Liao 已提交
3167
  int32_t remain = (int32_t) pQuery->sdata[0]->num;
3168
  int32_t offset = 0;
3169

3170
  while (remain > 0) {
H
Haojun Liao 已提交
3171 3172
    int32_t rows = (remain > capacity)? capacity:remain;
    assert(rows > 0);
3173

H
Haojun Liao 已提交
3174 3175 3176
    // get the output buffer page
    tFilePage *buf = getNewDataBuf(pResultBuf, pGroupResInfo->groupId, &pageId);
    buf->num = rows;
3177

H
Haojun Liao 已提交
3178
    // pagewisely copy to dest buffer
3179
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3180
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
3181

H
Haojun Liao 已提交
3182 3183
      char* output = buf->data + pRuntimeEnv->offset[i] * pRuntimeEnv->numOfRowsPerPage;
      char* src = ((char *) pQuery->sdata[i]->data) + offset * bytes;
3184
      memcpy(output, src, (size_t)(buf->num * bytes));
3185
    }
3186

H
Haojun Liao 已提交
3187 3188 3189 3190
    offset += rows;
    remain -= rows;

    pGroupResInfo->numOfDataPages += 1;
3191
  }
3192

3193 3194 3195 3196
  return TSDB_CODE_SUCCESS;
}

void resetMergeResultBuf(SQuery *pQuery, SQLFunctionCtx *pCtx, SResultInfo *pResultInfo) {
3197
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
3198
    pCtx[k].aOutputBuf = pQuery->sdata[k]->data - pCtx[k].outputBytes;
3199 3200 3201
    pCtx[k].size = 1;
    pCtx[k].startOffset = 0;
    pCtx[k].resultInfo = &pResultInfo[k];
3202

3203
    pQuery->sdata[k]->num = 0;
3204 3205 3206
  }
}

3207 3208 3209 3210
static void updateTableQueryInfoForReverseScan(SQuery *pQuery, STableQueryInfo *pTableQueryInfo) {
  if (pTableQueryInfo == NULL) {
    return;
  }
3211

H
Haojun Liao 已提交
3212
  // order has changed already
3213
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
3214

H
Haojun Liao 已提交
3215 3216 3217 3218 3219 3220
  // TODO validate the assertion
//  if (!QUERY_IS_ASC_QUERY(pQuery)) {
//    assert(pTableQueryInfo->win.ekey >= pTableQueryInfo->lastKey + step);
//  } else {
//    assert(pTableQueryInfo->win.ekey <= pTableQueryInfo->lastKey + step);
//  }
H
Haojun Liao 已提交
3221 3222 3223 3224 3225 3226 3227

  if (pTableQueryInfo->lastKey == pTableQueryInfo->win.skey) {
    // do nothing, no results
  } else {
    pTableQueryInfo->win.ekey = pTableQueryInfo->lastKey + step;
  }

3228 3229
  SWAP(pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, TSKEY);
  pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
3230

3231 3232
  SWITCH_ORDER(pTableQueryInfo->cur.order);
  pTableQueryInfo->cur.vgroupIndex = -1;
H
Haojun Liao 已提交
3233 3234 3235

  // set the index at the end of time window
  pTableQueryInfo->windowResInfo.curIndex = pTableQueryInfo->windowResInfo.size - 1;
3236 3237 3238 3239
}

static void disableFuncInReverseScanImpl(SQInfo* pQInfo, SWindowResInfo *pWindowResInfo, int32_t order) {
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
3240

3241
  for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
3242 3243
    bool closed = getTimeWindowResStatus(pWindowResInfo, i);
    if (!closed) {
3244 3245
      continue;
    }
3246

3247
    SWindowResult *buf = getWindowResult(pWindowResInfo, i);
3248

3249
    // open/close the specified query for each group result
3250
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3251
      int32_t functId = pQuery->pSelectExpr[j].base.functionId;
3252

3253 3254
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
3255 3256 3257 3258 3259 3260 3261 3262
        buf->resultInfo[j].complete = false;
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
        buf->resultInfo[j].complete = true;
      }
    }
  }
}

3263 3264
void disableFuncInReverseScan(SQInfo *pQInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
3265
  SQuery *pQuery = pRuntimeEnv->pQuery;
3266
  int32_t order = pQuery->order.order;
3267

3268 3269
  // group by normal columns and interval query on normal table
  SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
3270
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3271
    disableFuncInReverseScanImpl(pQInfo, pWindowResInfo, order);
3272
  } else {  // for simple result of table query,
3273
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {  // todo refactor
3274
      int32_t functId = pQuery->pSelectExpr[j].base.functionId;
3275

3276
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[j];
3277 3278 3279
      if (pCtx->resultInfo == NULL) {
        continue; // resultInfo is NULL, means no data checked in previous scan
      }
3280

3281 3282
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
3283 3284 3285 3286 3287 3288
        pCtx->resultInfo->complete = false;
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
        pCtx->resultInfo->complete = true;
      }
    }
  }
H
Haojun Liao 已提交
3289 3290 3291 3292
}

static void setupQueryRangeForReverseScan(SQInfo* pQInfo) {
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
S
Shengliang Guan 已提交
3293
  int32_t numOfGroups = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
H
Haojun Liao 已提交
3294

H
hjxilinx 已提交
3295
  for(int32_t i = 0; i < numOfGroups; ++i) {
H
Haojun Liao 已提交
3296
    SArray *group = GET_TABLEGROUP(pQInfo, i);
H
Haojun Liao 已提交
3297 3298
    SArray *tableKeyGroup = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, i);

H
hjxilinx 已提交
3299 3300
    size_t t = taosArrayGetSize(group);
    for (int32_t j = 0; j < t; ++j) {
3301 3302
      STableQueryInfo *pCheckInfo = taosArrayGetP(group, j);
      updateTableQueryInfoForReverseScan(pQuery, pCheckInfo);
H
Haojun Liao 已提交
3303

H
Haojun Liao 已提交
3304 3305
      // update the last key in tableKeyInfo list, the tableKeyInfo is used to build the tsdbQueryHandle and decide
      // the start check timestamp of tsdbQueryHandle
H
Haojun Liao 已提交
3306 3307 3308 3309
      STableKeyInfo *pTableKeyInfo = taosArrayGet(tableKeyGroup, j);
      pTableKeyInfo->lastKey = pCheckInfo->lastKey;

      assert(pCheckInfo->pTable == pTableKeyInfo->pTable);
H
hjxilinx 已提交
3310 3311
    }
  }
3312 3313
}

3314
void switchCtxOrder(SQueryRuntimeEnv *pRuntimeEnv) {
3315
  SQuery *pQuery = pRuntimeEnv->pQuery;
3316
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
3317
    SWITCH_ORDER(pRuntimeEnv->pCtx[i].order);
3318 3319 3320
  }
}

H
Haojun Liao 已提交
3321
int32_t createQueryResultInfo(SQuery *pQuery, SWindowResult *pResultRow, bool isSTableQuery, size_t interBufSize) {
3322
  int32_t numOfCols = pQuery->numOfOutput;
3323

H
Haojun Liao 已提交
3324 3325
  size_t size = numOfCols * sizeof(SResultInfo) + interBufSize;
  pResultRow->resultInfo = calloc(1, size);
B
Bomin Zhang 已提交
3326 3327 3328
  if (pResultRow->resultInfo == NULL) {
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
  }
3329

H
Haojun Liao 已提交
3330
  pResultRow->pos = (SPosInfo) {-1, -1};
3331

H
Haojun Liao 已提交
3332
  char* buf = (char*) pResultRow->resultInfo + numOfCols * sizeof(SResultInfo);
H
Haojun Liao 已提交
3333

3334
  // set the intermediate result output buffer
H
Haojun Liao 已提交
3335
  setWindowResultInfo(pResultRow->resultInfo, pQuery, isSTableQuery, buf);
B
Bomin Zhang 已提交
3336
  return TSDB_CODE_SUCCESS;
3337 3338 3339 3340
}

void resetCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3341

3342
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3343 3344
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
    pCtx->aOutputBuf = pQuery->sdata[i]->data;
3345

3346 3347 3348 3349
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT/TAGPRJ/PRJ/TAG etc.
     */
3350
    RESET_RESULT_INFO(&pRuntimeEnv->resultInfo[i]);
3351
    pCtx->resultInfo = &pRuntimeEnv->resultInfo[i];
3352

3353
    // set the timestamp output buffer for top/bottom/diff query
3354
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3355 3356 3357
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3358

3359
    memset(pQuery->sdata[i]->data, 0, (size_t)(pQuery->pSelectExpr[i].bytes * pQuery->rec.capacity));
3360
  }
3361

3362 3363 3364 3365 3366
  initCtxOutputBuf(pRuntimeEnv);
}

void forwardCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, int64_t output) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3367

3368
  // reset the execution contexts
3369
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3370
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
3371
    assert(functionId != TSDB_FUNC_DIFF);
3372

3373 3374 3375 3376
    // set next output position
    if (IS_OUTER_FORWARD(aAggs[functionId].nStatus)) {
      pRuntimeEnv->pCtx[j].aOutputBuf += pRuntimeEnv->pCtx[j].outputBytes * output;
    }
3377

3378 3379 3380 3381 3382 3383 3384 3385
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      /*
       * NOTE: for top/bottom query, the value of first column of output (timestamp) are assigned
       * in the procedure of top/bottom routine
       * the output buffer in top/bottom routine is ptsOutputBuf, so we need to forward the output buffer
       *
       * diff function is handled in multi-output function
       */
S
TD-1057  
Shengliang Guan 已提交
3386
      pRuntimeEnv->pCtx[j].ptsOutputBuf = (char*)pRuntimeEnv->pCtx[j].ptsOutputBuf + TSDB_KEYSIZE * output;
3387
    }
3388

3389
    RESET_RESULT_INFO(pRuntimeEnv->pCtx[j].resultInfo);
3390 3391 3392 3393 3394
  }
}

void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3395

3396
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3397
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
3398
    pRuntimeEnv->pCtx[j].currentStage = 0;
3399

H
Haojun Liao 已提交
3400 3401 3402 3403
    SResultInfo* pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
    if (pResInfo->initialized) {
      continue;
    }
3404

3405 3406 3407 3408
    aAggs[functionId].init(&pRuntimeEnv->pCtx[j]);
  }
}

3409
void skipResults(SQueryRuntimeEnv *pRuntimeEnv) {
3410
  SQuery *pQuery = pRuntimeEnv->pQuery;
3411
  if (pQuery->rec.rows == 0 || pQuery->limit.offset == 0) {
3412 3413
    return;
  }
3414

3415
  if (pQuery->rec.rows <= pQuery->limit.offset) {
3416
    qDebug("QInfo:%p skip rows:%" PRId64 ", new offset:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), pQuery->rec.rows,
3417
        pQuery->limit.offset - pQuery->rec.rows);
3418

3419 3420
    pQuery->limit.offset -= pQuery->rec.rows;
    pQuery->rec.rows = 0;
3421

3422
    resetCtxOutputBuf(pRuntimeEnv);
3423

H
Haojun Liao 已提交
3424
    // clear the buffer full flag if exists
3425
    CLEAR_QUERY_STATUS(pQuery, QUERY_RESBUF_FULL);
3426
  } else {
3427
    int64_t numOfSkip = pQuery->limit.offset;
3428
    pQuery->rec.rows -= numOfSkip;
3429
    pQuery->limit.offset = 0;
3430

3431
    qDebug("QInfo:%p skip row:%"PRId64", new offset:%d, numOfRows remain:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), numOfSkip,
3432
           0, pQuery->rec.rows);
3433

3434
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3435
      int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3436
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
H
Haojun Liao 已提交
3437

3438
      memmove(pQuery->sdata[i]->data, (char*)pQuery->sdata[i]->data + bytes * numOfSkip, (size_t)(pQuery->rec.rows * bytes));
H
Haojun Liao 已提交
3439
      pRuntimeEnv->pCtx[i].aOutputBuf = ((char*) pQuery->sdata[i]->data) + pQuery->rec.rows * bytes;
3440

3441
      if (functionId == TSDB_FUNC_DIFF || functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
3442
        pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
3443 3444
      }
    }
3445

S
TD-1057  
Shengliang Guan 已提交
3446
    updateNumOfResult(pRuntimeEnv, (int32_t)pQuery->rec.rows);
3447 3448 3449 3450 3451 3452 3453 3454
  }
}

void setQueryStatus(SQuery *pQuery, int8_t status) {
  if (status == QUERY_NOT_COMPLETED) {
    pQuery->status = status;
  } else {
    // QUERY_NOT_COMPLETED is not compatible with any other status, so clear its position first
3455
    CLEAR_QUERY_STATUS(pQuery, QUERY_NOT_COMPLETED);
3456 3457 3458 3459 3460 3461
    pQuery->status |= status;
  }
}

bool needScanDataBlocksAgain(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3462

H
hjxilinx 已提交
3463
  bool toContinue = false;
H
Haojun Liao 已提交
3464
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3465 3466
    // for each group result, call the finalize function for each column
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
3467

3468 3469
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *pResult = getWindowResult(pWindowResInfo, i);
3470
      if (!pResult->closed) {
3471 3472
        continue;
      }
3473

3474
      setWindowResOutputBuf(pRuntimeEnv, pResult);
3475

3476
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3477
        int16_t functId = pQuery->pSelectExpr[j].base.functionId;
3478 3479 3480
        if (functId == TSDB_FUNC_TS) {
          continue;
        }
3481

3482 3483
        aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
        SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3484

3485 3486 3487 3488
        toContinue |= (!pResInfo->complete);
      }
    }
  } else {
3489
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3490
      int16_t functId = pQuery->pSelectExpr[j].base.functionId;
3491 3492 3493
      if (functId == TSDB_FUNC_TS) {
        continue;
      }
3494

3495 3496
      aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
      SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3497

3498 3499 3500
      toContinue |= (!pResInfo->complete);
    }
  }
3501

3502 3503 3504
  return toContinue;
}

H
Haojun Liao 已提交
3505
static SQueryStatusInfo getQueryStatusInfo(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
3506
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3507
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3508

H
Haojun Liao 已提交
3509 3510
  assert((start <= pTableQueryInfo->lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (start >= pTableQueryInfo->lastKey && !QUERY_IS_ASC_QUERY(pQuery)));
3511

3512
  SQueryStatusInfo info = {
H
hjxilinx 已提交
3513
      .status      = pQuery->status,
3514
      .windowIndex = pRuntimeEnv->windowResInfo.curIndex,
H
Haojun Liao 已提交
3515
      .lastKey     = start,
3516
  };
3517

S
TD-1057  
Shengliang Guan 已提交
3518 3519 3520 3521 3522
  TIME_WINDOW_COPY(info.w, pQuery->window);
  TIME_WINDOW_COPY(info.curWindow, pTableQueryInfo->win);

  info.curWindow.skey = start;

3523 3524 3525
  return info;
}

3526 3527 3528 3529
static void setEnvBeforeReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQInfo *pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery *pQuery = pRuntimeEnv->pQuery;

3530 3531 3532
  pStatus->cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);  // save the cursor
  if (pRuntimeEnv->pTSBuf) {
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
3533 3534
    bool ret = tsBufNextPos(pRuntimeEnv->pTSBuf);
    assert(ret);
3535
  }
3536

3537
  // reverse order time range
3538 3539 3540
  pQuery->window = pStatus->curWindow;
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);

3541
  SWITCH_ORDER(pQuery->order.order);
3542 3543 3544 3545 3546 3547 3548

  if (QUERY_IS_ASC_QUERY(pQuery)) {
    assert(pQuery->window.skey <= pQuery->window.ekey);
  } else {
    assert(pQuery->window.skey >= pQuery->window.ekey);
  }

3549
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
3550

3551
  STsdbQueryCond cond = {
H
hjxilinx 已提交
3552
      .order   = pQuery->order.order,
3553
      .colList = pQuery->colList,
3554 3555
      .numOfCols = pQuery->numOfCols,
  };
3556

S
TD-1057  
Shengliang Guan 已提交
3557 3558
  TIME_WINDOW_COPY(cond.twindow, pQuery->window);

H
Haojun Liao 已提交
3559 3560 3561 3562 3563
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
  disableFuncInReverseScan(pQInfo);
  setupQueryRangeForReverseScan(pQInfo);

3564 3565 3566 3567
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
3568

3569
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
B
Bomin Zhang 已提交
3570 3571 3572
  if (pRuntimeEnv->pSecQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
3573 3574
}

3575 3576
static void clearEnvAfterReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3577
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3578

3579 3580
  SWITCH_ORDER(pQuery->order.order);
  switchCtxOrder(pRuntimeEnv);
3581

3582 3583 3584 3585
  tsBufSetCursor(pRuntimeEnv->pTSBuf, &pStatus->cur);
  if (pRuntimeEnv->pTSBuf) {
    pRuntimeEnv->pTSBuf->cur.order = pQuery->order.order;
  }
3586

3587
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
3588

3589
  // update the pQuery->window.skey and pQuery->window.ekey to limit the scan scope of sliding query during reverse scan
H
hjxilinx 已提交
3590
  pTableQueryInfo->lastKey = pStatus->lastKey;
3591
  pQuery->status = pStatus->status;
3592

H
hjxilinx 已提交
3593
  pTableQueryInfo->win = pStatus->w;
3594
  pQuery->window = pTableQueryInfo->win;
3595 3596
}

H
Haojun Liao 已提交
3597 3598 3599 3600 3601 3602 3603
static void restoreTimeWindow(STableGroupInfo* pTableGroupInfo, STsdbQueryCond* pCond) {
  assert(pTableGroupInfo->numOfTables == 1);
  SArray* pTableKeyGroup = taosArrayGetP(pTableGroupInfo->pGroupList, 0);
  STableKeyInfo* pKeyInfo = taosArrayGet(pTableKeyGroup, 0);
  pKeyInfo->lastKey = pCond->twindow.skey;
}

3604
void scanOneTableDataBlocks(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
H
hjxilinx 已提交
3605
  SQInfo *pQInfo = (SQInfo *) GET_QINFO_ADDR(pRuntimeEnv);
3606
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3607
  STableQueryInfo *pTableQueryInfo = pQuery->current;
3608

3609
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
3610

3611
  // store the start query position
H
Haojun Liao 已提交
3612
  SQueryStatusInfo qstatus = getQueryStatusInfo(pRuntimeEnv, start);
3613

3614 3615
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
3616

3617 3618
  while (1) {
    doScanAllDataBlocks(pRuntimeEnv);
3619

3620 3621
    if (pRuntimeEnv->scanFlag == MASTER_SCAN) {
      qstatus.status = pQuery->status;
3622 3623 3624 3625 3626 3627

      // do nothing if no data blocks are found qualified during scan
      if (qstatus.lastKey != pTableQueryInfo->lastKey) {
        qstatus.curWindow.ekey = pTableQueryInfo->lastKey - step;
      }

3628
      qstatus.lastKey = pTableQueryInfo->lastKey;
3629
    }
3630

3631
    if (!needScanDataBlocksAgain(pRuntimeEnv)) {
3632
      // restore the status code and jump out of loop
3633
      if (pRuntimeEnv->scanFlag == REPEAT_SCAN) {
3634
        pQuery->status = qstatus.status;
3635
      }
3636

3637 3638
      break;
    }
3639

3640
    STsdbQueryCond cond = {
H
hjxilinx 已提交
3641
        .order   = pQuery->order.order,
3642
        .colList = pQuery->colList,
3643
        .numOfCols = pQuery->numOfCols,
3644
    };
3645

S
TD-1057  
Shengliang Guan 已提交
3646 3647
    TIME_WINDOW_COPY(cond.twindow, qstatus.curWindow);

3648 3649
    if (pRuntimeEnv->pSecQueryHandle != NULL) {
      tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
3650
    }
3651

H
Haojun Liao 已提交
3652
    restoreTimeWindow(&pQInfo->tableGroupInfo, &cond);
3653
    pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
B
Bomin Zhang 已提交
3654 3655 3656
    if (pRuntimeEnv->pSecQueryHandle == NULL) {
      longjmp(pRuntimeEnv->env, terrno);
    }
3657

3658
    pRuntimeEnv->windowResInfo.curIndex = qstatus.windowIndex;
3659 3660
    setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
    pRuntimeEnv->scanFlag = REPEAT_SCAN;
3661

3662
    qDebug("QInfo:%p start to repeat scan data blocks due to query func required, qrange:%"PRId64"-%"PRId64, pQInfo,
3663
        cond.twindow.skey, cond.twindow.ekey);
3664

3665
    // check if query is killed or not
H
Haojun Liao 已提交
3666
    if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
3667 3668
      finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
3669 3670
    }
  }
3671

H
hjxilinx 已提交
3672
  if (!needReverseScan(pQuery)) {
3673 3674
    return;
  }
3675

3676
  setEnvBeforeReverseScan(pRuntimeEnv, &qstatus);
3677

3678
  // reverse scan from current position
3679
  qDebug("QInfo:%p start to reverse scan", pQInfo);
3680
  doScanAllDataBlocks(pRuntimeEnv);
3681 3682

  clearEnvAfterReverseScan(pRuntimeEnv, &qstatus);
3683 3684
}

H
hjxilinx 已提交
3685
void finalizeQueryResult(SQueryRuntimeEnv *pRuntimeEnv) {
3686
  SQuery *pQuery = pRuntimeEnv->pQuery;
3687

H
Haojun Liao 已提交
3688
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3689 3690
    // for each group result, call the finalize function for each column
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
3691
    if (pRuntimeEnv->groupbyNormalCol) {
3692 3693
      closeAllTimeWindow(pWindowResInfo);
    }
3694

3695 3696 3697 3698 3699
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *buf = &pWindowResInfo->pResult[i];
      if (!isWindowResClosed(pWindowResInfo, i)) {
        continue;
      }
3700

3701
      setWindowResOutputBuf(pRuntimeEnv, buf);
3702

3703
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3704
        aAggs[pQuery->pSelectExpr[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3705
      }
3706

3707 3708 3709 3710
      /*
       * set the number of output results for group by normal columns, the number of output rows usually is 1 except
       * the top and bottom query
       */
S
TD-1057  
Shengliang Guan 已提交
3711
      buf->numOfRows = (uint16_t)getNumOfResult(pRuntimeEnv);
3712
    }
3713

3714
  } else {
3715
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3716
      aAggs[pQuery->pSelectExpr[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3717 3718 3719 3720 3721
    }
  }
}

static bool hasMainOutput(SQuery *pQuery) {
3722
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3723
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3724

3725 3726 3727 3728
    if (functionId != TSDB_FUNC_TS && functionId != TSDB_FUNC_TAG && functionId != TSDB_FUNC_TAGPRJ) {
      return true;
    }
  }
3729

3730 3731 3732
  return false;
}

H
Haojun Liao 已提交
3733
static STableQueryInfo *createTableQueryInfo(SQueryRuntimeEnv *pRuntimeEnv, void* pTable, STimeWindow win, void* buf) {
H
Haojun Liao 已提交
3734
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
3735

H
Haojun Liao 已提交
3736
  STableQueryInfo *pTableQueryInfo = buf;
3737

H
hjxilinx 已提交
3738 3739
  pTableQueryInfo->win = win;
  pTableQueryInfo->lastKey = win.skey;
3740

3741
  pTableQueryInfo->pTable = pTable;
3742
  pTableQueryInfo->cur.vgroupIndex = -1;
3743

H
Haojun Liao 已提交
3744 3745
  // set more initial size of interval/groupby query
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyNormalCol) {
H
Haojun Liao 已提交
3746
    int32_t initialSize = 16;
H
Haojun Liao 已提交
3747
    int32_t initialThreshold = 100;
B
Bomin Zhang 已提交
3748 3749 3750 3751
    int32_t code = initWindowResInfo(&pTableQueryInfo->windowResInfo, pRuntimeEnv, initialSize, initialThreshold, TSDB_DATA_TYPE_INT);
    if (code != TSDB_CODE_SUCCESS) {
      return NULL;
    }
H
Haojun Liao 已提交
3752
  } else { // in other aggregate query, do not initialize the windowResInfo
H
Haojun Liao 已提交
3753 3754
  }

3755 3756 3757
  return pTableQueryInfo;
}

H
Haojun Liao 已提交
3758
void destroyTableQueryInfoImpl(STableQueryInfo *pTableQueryInfo) {
3759 3760 3761
  if (pTableQueryInfo == NULL) {
    return;
  }
3762

H
Haojun Liao 已提交
3763
  cleanupTimeWindowInfo(&pTableQueryInfo->windowResInfo);
3764 3765 3766 3767 3768
}

/**
 * set output buffer for different group
 * @param pRuntimeEnv
3769
 * @param pDataBlockInfo
3770
 */
H
Haojun Liao 已提交
3771
void setExecutionContext(SQInfo *pQInfo, int32_t groupIndex, TSKEY nextKey) {
3772
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
Haojun Liao 已提交
3773 3774 3775
  STableQueryInfo  *pTableQueryInfo = pRuntimeEnv->pQuery->current;
  SWindowResInfo   *pWindowResInfo = &pRuntimeEnv->windowResInfo;

H
Haojun Liao 已提交
3776 3777
  // lastKey needs to be updated
  pTableQueryInfo->lastKey = nextKey;
H
Haojun Liao 已提交
3778 3779 3780 3781

  if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
    setAdditionalInfo(pQInfo, pTableQueryInfo->pTable, pTableQueryInfo);
  }
H
Haojun Liao 已提交
3782

H
Haojun Liao 已提交
3783 3784 3785
  if (pRuntimeEnv->prevGroupId != INT32_MIN && pRuntimeEnv->prevGroupId == groupIndex) {
    return;
  }
3786

3787 3788
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&groupIndex,
      sizeof(groupIndex), true);
3789 3790 3791
  if (pWindowRes == NULL) {
    return;
  }
3792

3793 3794 3795 3796 3797
  /*
   * not assign result buffer yet, add new result buffer
   * all group belong to one result set, and each group result has different group id so set the id to be one
   */
  if (pWindowRes->pos.pageId == -1) {
3798
    if (addNewWindowResultBuf(pWindowRes, pRuntimeEnv->pResultBuf, groupIndex, pRuntimeEnv->numOfRowsPerPage) !=
3799 3800 3801 3802
        TSDB_CODE_SUCCESS) {
      return;
    }
  }
3803

H
Haojun Liao 已提交
3804 3805
  // record the current active group id
  pRuntimeEnv->prevGroupId = groupIndex;
3806 3807 3808 3809
  setWindowResOutputBuf(pRuntimeEnv, pWindowRes);
  initCtxOutputBuf(pRuntimeEnv);
}

H
Haojun Liao 已提交
3810
void setWindowResOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult) {
3811
  SQuery *pQuery = pRuntimeEnv->pQuery;
3812

3813
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
H
Haojun Liao 已提交
3814 3815
  tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pos.pageId);

3816
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3817
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
H
Haojun Liao 已提交
3818
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult, page);
3819

3820
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3821 3822 3823
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3824

3825 3826 3827 3828 3829
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT
     */
    pCtx->resultInfo = &pResult->resultInfo[i];
3830

3831 3832 3833 3834 3835 3836
    // set super table query flag
    SResultInfo *pResInfo = GET_RES_INFO(pCtx);
    pResInfo->superTableQ = pRuntimeEnv->stableQuery;
  }
}

H
Haojun Liao 已提交
3837 3838
void setWindowResOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3839

H
Haojun Liao 已提交
3840
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
H
Haojun Liao 已提交
3841 3842
  tFilePage* bufPage = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pos.pageId);

H
Haojun Liao 已提交
3843 3844 3845 3846
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];

    pCtx->resultInfo = &pResult->resultInfo[i];
H
Haojun Liao 已提交
3847
    if (pCtx->resultInfo->initialized && pCtx->resultInfo->complete) {
H
Haojun Liao 已提交
3848 3849
      continue;
    }
3850

H
Haojun Liao 已提交
3851
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult, bufPage);
H
Haojun Liao 已提交
3852
    pCtx->currentStage = 0;
3853

H
Haojun Liao 已提交
3854 3855 3856 3857
    int32_t functionId = pCtx->functionId;
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3858

H
Haojun Liao 已提交
3859 3860 3861 3862 3863
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT
     */
    pCtx->resultInfo->superTableQ = pRuntimeEnv->stableQuery;     // set super table query flag
3864

H
Haojun Liao 已提交
3865 3866 3867 3868 3869 3870
    if (!pCtx->resultInfo->initialized) {
      aAggs[functionId].init(pCtx);
    }
  }
}

3871
int32_t setAdditionalInfo(SQInfo *pQInfo, void* pTable, STableQueryInfo *pTableQueryInfo) {
3872
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
3873

3874
  setTagVal(pRuntimeEnv, pTable, pQInfo->tsdb);
3875

3876 3877
  // both the master and supplement scan needs to set the correct ts comp start position
  if (pRuntimeEnv->pTSBuf != NULL) {
H
Haojun Liao 已提交
3878 3879
    tVariant* pTag = &pRuntimeEnv->pCtx[0].tag;

H
Haojun Liao 已提交
3880 3881
    if (pTableQueryInfo->cur.vgroupIndex == -1) {
      tVariantAssign(&pTableQueryInfo->tag, pTag);
3882

H
Haojun Liao 已提交
3883 3884
      STSElem elem = tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, pQInfo->vgId, &pTableQueryInfo->tag);

H
Haojun Liao 已提交
3885 3886 3887 3888 3889
      // failed to find data with the specified tag value and vnodeId
      if (elem.vnode < 0) {
        if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
          qError("QInfo:%p failed to find tag:%s in ts_comp", pQInfo, pTag->pz);
        } else {
H
Haojun Liao 已提交
3890
          qError("QInfo:%p failed to find tag:%" PRId64 " in ts_comp", pQInfo, pTag->i64Key);
H
Haojun Liao 已提交
3891 3892 3893 3894 3895
        }

        return false;
      }

H
Haojun Liao 已提交
3896
      // keep the cursor info of current meter
H
Haojun Liao 已提交
3897 3898
      pTableQueryInfo->cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);
      if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
H
Haojun Liao 已提交
3899
        qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex);
H
Haojun Liao 已提交
3900
      } else {
H
Haojun Liao 已提交
3901
        qDebug("QInfo:%p find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex);
H
Haojun Liao 已提交
3902 3903
      }

H
Haojun Liao 已提交
3904 3905
    } else {
      tsBufSetCursor(pRuntimeEnv->pTSBuf, &pTableQueryInfo->cur);
H
Haojun Liao 已提交
3906 3907

      if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
H
Haojun Liao 已提交
3908
        qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex);
H
Haojun Liao 已提交
3909
      } else {
H
Haojun Liao 已提交
3910
        qDebug("QInfo:%p find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex);
H
Haojun Liao 已提交
3911
      }
3912 3913
    }
  }
3914

3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926
  return 0;
}

/*
 * There are two cases to handle:
 *
 * 1. Query range is not set yet (queryRangeSet = 0). we need to set the query range info, including pQuery->lastKey,
 *    pQuery->window.skey, and pQuery->eKey.
 * 2. Query range is set and query is in progress. There may be another result with the same query ranges to be
 *    merged during merge stage. In this case, we need the pTableQueryInfo->lastResRows to decide if there
 *    is a previous result generated or not.
 */
H
hjxilinx 已提交
3927
void setIntervalQueryRange(SQInfo *pQInfo, TSKEY key) {
3928 3929
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3930
  STableQueryInfo *pTableQueryInfo = pQuery->current;
3931

3932 3933 3934
  if (pTableQueryInfo->queryRangeSet) {
    pTableQueryInfo->lastKey = key;
  } else {
3935
    pTableQueryInfo->win.skey = key;
3936
    STimeWindow win = {.skey = key, .ekey = pQuery->window.ekey};
3937

3938 3939 3940 3941 3942
    // for too small query range, no data in this interval.
    if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey < pQuery->window.skey)) ||
        (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey < pQuery->window.ekey))) {
      return;
    }
3943

3944 3945 3946 3947 3948 3949
    /**
     * In handling the both ascending and descending order super table query, we need to find the first qualified
     * timestamp of this table, and then set the first qualified start timestamp.
     * In ascending query, key is the first qualified timestamp. However, in the descending order query, additional
     * operations involve.
     */
H
Haojun Liao 已提交
3950
    STimeWindow     w = TSWINDOW_INITIALIZER;
3951
    SWindowResInfo *pWindowResInfo = &pTableQueryInfo->windowResInfo;
3952

H
Haojun Liao 已提交
3953 3954
    TSKEY sk = MIN(win.skey, win.ekey);
    TSKEY ek = MAX(win.skey, win.ekey);
H
Haojun Liao 已提交
3955
    getAlignQueryTimeWindow(pQuery, win.skey, sk, ek, &w);
3956
    pWindowResInfo->startTime = pTableQueryInfo->win.skey;  // windowSKey may be 0 in case of 1970 timestamp
3957

3958 3959
    if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
3960
        assert(win.ekey == pQuery->window.ekey);
3961
      }
3962

3963
      pWindowResInfo->prevSKey = w.skey;
3964
    }
3965

3966
    pTableQueryInfo->queryRangeSet = 1;
3967
    pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
3968 3969 3970 3971
  }
}

bool requireTimestamp(SQuery *pQuery) {
3972
  for (int32_t i = 0; i < pQuery->numOfOutput; i++) {
3973
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986
    if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_NEED_TS) != 0) {
      return true;
    }
  }
  return false;
}

bool needPrimaryTimestampCol(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo) {
  /*
   * 1. if skey or ekey locates in this block, we need to load the timestamp column to decide the precise position
   * 2. if there are top/bottom, first_dst/last_dst functions, we need to load timestamp column in any cases;
   */
  STimeWindow *w = &pDataBlockInfo->window;
H
hjxilinx 已提交
3987
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3988

H
hjxilinx 已提交
3989
  bool loadPrimaryTS = (pTableQueryInfo->lastKey >= w->skey && pTableQueryInfo->lastKey <= w->ekey) ||
3990 3991
                       (pQuery->window.ekey >= w->skey && pQuery->window.ekey <= w->ekey) || requireTimestamp(pQuery);

3992 3993 3994
  return loadPrimaryTS;
}

3995
static int32_t doCopyToSData(SQInfo *pQInfo, SWindowResInfo *pResultInfo, int32_t orderType) {
3996 3997
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
3998

3999 4000 4001
  int32_t numOfResult = 0;
  int32_t startIdx = 0;
  int32_t step = -1;
4002

4003
  qDebug("QInfo:%p start to copy data from windowResInfo to query buf", pQInfo);
4004 4005
  int32_t totalSet = numOfClosedTimeWindow(pResultInfo);
  SWindowResult* result = pResultInfo->pResult;
4006

4007
  if (orderType == TSDB_ORDER_ASC) {
4008
    startIdx = pQInfo->groupIndex;
4009 4010
    step = 1;
  } else {  // desc order copy all data
4011
    startIdx = totalSet - pQInfo->groupIndex - 1;
4012 4013
    step = -1;
  }
4014

H
Haojun Liao 已提交
4015 4016
  SGroupResInfo* pGroupResInfo = &pQInfo->groupResInfo;

4017
  for (int32_t i = startIdx; (i < totalSet) && (i >= 0); i += step) {
4018
    if (result[i].numOfRows == 0) {
4019
      pQInfo->groupIndex += 1;
H
Haojun Liao 已提交
4020
      pGroupResInfo->pos.rowId = 0;
4021 4022
      continue;
    }
4023

H
Haojun Liao 已提交
4024 4025
    int32_t numOfRowsToCopy = result[i].numOfRows - pGroupResInfo->pos.rowId;
    int32_t oldOffset = pGroupResInfo->pos.rowId;
4026

4027
    /*
H
Haojun Liao 已提交
4028 4029
     * current output space is not enough to accommodate all data of this page, only partial results
     * will be copied to SQuery object's result buffer
4030
     */
4031
    if (numOfRowsToCopy > pQuery->rec.capacity - numOfResult) {
H
Haojun Liao 已提交
4032 4033
      numOfRowsToCopy = (int32_t) pQuery->rec.capacity - numOfResult;
      pGroupResInfo->pos.rowId += numOfRowsToCopy;
4034
    } else {
H
Haojun Liao 已提交
4035
      pGroupResInfo->pos.rowId = 0;
4036
      pQInfo->groupIndex += 1;
4037
    }
4038

H
Haojun Liao 已提交
4039 4040
    tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, result[i].pos.pageId);

4041
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
4042
      int32_t size = pRuntimeEnv->pCtx[j].outputBytes;
4043

4044
      char *out = pQuery->sdata[j]->data + numOfResult * size;
H
Haojun Liao 已提交
4045
      char *in = getPosInResultPage(pRuntimeEnv, j, &result[i], page);
4046 4047
      memcpy(out, in + oldOffset * size, size * numOfRowsToCopy);
    }
4048

4049
    numOfResult += numOfRowsToCopy;
4050 4051 4052
    if (numOfResult == pQuery->rec.capacity) {
      break;
    }
4053
  }
4054

4055
  qDebug("QInfo:%p copy data to query buf completed", pQInfo);
4056 4057

#ifdef _DEBUG_VIEW
4058
  displayInterResult(pQuery->sdata, pRuntimeEnv, numOfResult);
4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071
#endif
  return numOfResult;
}

/**
 * copyFromWindowResToSData support copy data in ascending/descending order
 * For interval query of both super table and table, copy the data in ascending order, since the output results are
 * ordered in SWindowResutl already. While handling the group by query for both table and super table,
 * all group result are completed already.
 *
 * @param pQInfo
 * @param result
 */
4072
void copyFromWindowResToSData(SQInfo *pQInfo, SWindowResInfo *pResultInfo) {
4073
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
4074

4075
  int32_t orderType = (pQuery->pGroupbyExpr != NULL) ? pQuery->pGroupbyExpr->orderType : TSDB_ORDER_ASC;
4076
  int32_t numOfResult = doCopyToSData(pQInfo, pResultInfo, orderType);
4077

4078
  pQuery->rec.rows += numOfResult;
4079

4080
  assert(pQuery->rec.rows <= pQuery->rec.capacity);
4081 4082
}

H
Haojun Liao 已提交
4083
static void updateWindowResNumOfRes(SQueryRuntimeEnv *pRuntimeEnv) {
4084
  SQuery *pQuery = pRuntimeEnv->pQuery;
4085

4086
  // update the number of result for each, only update the number of rows for the corresponding window result.
H
Haojun Liao 已提交
4087 4088 4089
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
    return;
  }
4090

H
Haojun Liao 已提交
4091 4092
  for (int32_t i = 0; i < pRuntimeEnv->windowResInfo.size; ++i) {
    SWindowResult *pResult = &pRuntimeEnv->windowResInfo.pResult[i];
4093

H
Haojun Liao 已提交
4094 4095 4096 4097
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
      int32_t functionId = pRuntimeEnv->pCtx[j].functionId;
      if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
        continue;
4098
      }
H
Haojun Liao 已提交
4099

4100
      pResult->numOfRows = (uint16_t)(MAX(pResult->numOfRows, pResult->resultInfo[j].numOfRes));
4101 4102 4103 4104
    }
  }
}

H
Haojun Liao 已提交
4105
static void stableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo, SDataStatis *pStatis,
4106
    SArray *pDataBlock, __block_search_fn_t searchFn) {
4107
  SQuery *         pQuery = pRuntimeEnv->pQuery;
4108
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4109

4110
  SWindowResInfo * pWindowResInfo = &pTableQueryInfo->windowResInfo;
H
hjxilinx 已提交
4111
  pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : pDataBlockInfo->rows - 1;
4112

H
Haojun Liao 已提交
4113
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->groupbyNormalCol) {
4114
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
4115
  } else {
4116
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
4117 4118 4119
  }
}

H
Haojun Liao 已提交
4120
bool queryHasRemainResForTableQuery(SQueryRuntimeEnv* pRuntimeEnv) {
4121 4122
  SQuery *pQuery = pRuntimeEnv->pQuery;
  SFillInfo *pFillInfo = pRuntimeEnv->pFillInfo;
4123

H
Haojun Liao 已提交
4124
  if (pQuery->limit.limit > 0 && pQuery->rec.total >= pQuery->limit.limit) {
4125 4126
    return false;
  }
4127

4128
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
H
Haojun Liao 已提交
4129
    // There are results not returned to client yet, so filling applied to the remain result is required firstly.
H
Haojun Liao 已提交
4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144
    int32_t remain = taosNumOfRemainRows(pFillInfo);
    if (remain > 0) {
      return true;
    }

    /*
     * While the code reaches here, there are no results remains now.
     * If query is not completed yet, the gaps between two results blocks need to be handled after next data block
     * is retrieved from TSDB.
     *
     * NOTE: If the result set is not the first block, the gap in front of the result set will be filled. If the result
     * set is the FIRST result block, the gap between the start time of query time window and the timestamp of the
     * first result row in the actual result set will fill nothing.
     */
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
S
TD-1057  
Shengliang Guan 已提交
4145
      int32_t numOfTotal = (int32_t)getFilledNumOfRes(pFillInfo, pQuery->window.ekey, (int32_t)pQuery->rec.capacity);
H
Haojun Liao 已提交
4146 4147 4148 4149 4150 4151
      return numOfTotal > 0;
    }

  } else {
    // there are results waiting for returned to client.
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED) &&
H
Haojun Liao 已提交
4152
        (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) &&
H
Haojun Liao 已提交
4153 4154 4155
        (pRuntimeEnv->windowResInfo.size > 0)) {
      return true;
    }
4156
  }
4157 4158

  return false;
4159 4160 4161
}

static void doCopyQueryResultToMsg(SQInfo *pQInfo, int32_t numOfRows, char *data) {
4162
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
4163

4164 4165
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
    int32_t bytes = pQuery->pSelectExpr[col].bytes;
4166

4167 4168 4169
    memmove(data, pQuery->sdata[col]->data, bytes * numOfRows);
    data += bytes * numOfRows;
  }
4170

weixin_48148422's avatar
weixin_48148422 已提交
4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182
  int32_t numOfTables = (int32_t)taosArrayGetSize(pQInfo->arrTableIdInfo);
  *(int32_t*)data = htonl(numOfTables);
  data += sizeof(int32_t);
  for(int32_t i = 0; i < numOfTables; i++) {
    STableIdInfo* pSrc = taosArrayGet(pQInfo->arrTableIdInfo, i);
    STableIdInfo* pDst = (STableIdInfo*)data;
    pDst->uid = htobe64(pSrc->uid);
    pDst->tid = htonl(pSrc->tid);
    pDst->key = htobe64(pSrc->key);
    data += sizeof(STableIdInfo);
  }

H
Haojun Liao 已提交
4183
  // Check if query is completed or not for stable query or normal table query respectively.
H
hjxilinx 已提交
4184
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4185
    if (pQInfo->runtimeEnv.stableQuery) {
H
Haojun Liao 已提交
4186
      if (IS_STASBLE_QUERY_OVER(pQInfo)) {
4187 4188 4189
        setQueryStatus(pQuery, QUERY_OVER);
      }
    } else {
H
Haojun Liao 已提交
4190
      if (!queryHasRemainResForTableQuery(&pQInfo->runtimeEnv)) {
4191 4192
        setQueryStatus(pQuery, QUERY_OVER);
      }
4193
    }
H
hjxilinx 已提交
4194
  }
4195 4196
}

H
Haojun Liao 已提交
4197
int32_t doFillGapsInResults(SQueryRuntimeEnv* pRuntimeEnv, tFilePage **pDst, int32_t *numOfFilled) {
4198
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
4199
  SQuery *pQuery = pRuntimeEnv->pQuery;
4200
  SFillInfo* pFillInfo = pRuntimeEnv->pFillInfo;
4201

4202
  while (1) {
S
TD-1057  
Shengliang Guan 已提交
4203
    int32_t ret = (int32_t)taosGenerateDataBlock(pFillInfo, (tFilePage**)pQuery->sdata, (int32_t)pQuery->rec.capacity);
4204

4205
    // todo apply limit output function
4206 4207
    /* reached the start position of according to offset value, return immediately */
    if (pQuery->limit.offset == 0) {
4208
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows", pQInfo, pFillInfo->numOfRows, ret);
4209 4210
      return ret;
    }
4211

4212
    if (pQuery->limit.offset < ret) {
4213
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%" PRId64 ". Discard due to offset, remain:%" PRId64 ", new offset:%d",
4214
             pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, ret - pQuery->limit.offset, 0);
4215

S
TD-1057  
Shengliang Guan 已提交
4216
      ret -= (int32_t)pQuery->limit.offset;
4217 4218
      // todo !!!!there exactly number of interpo is not valid.
      // todo refactor move to the beginning of buffer
4219 4220 4221
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        memmove(pDst[i]->data, pDst[i]->data + pQuery->pSelectExpr[i].bytes * pQuery->limit.offset,
                ret * pQuery->pSelectExpr[i].bytes);
4222
      }
4223

4224 4225 4226
      pQuery->limit.offset = 0;
      return ret;
    } else {
4227
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%" PRId64 ". Discard due to offset, "
B
Bomin Zhang 已提交
4228
             "remain:%d, new offset:%" PRId64, pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, 0,
4229
          pQuery->limit.offset - ret);
4230

4231
      pQuery->limit.offset -= ret;
4232
      pQuery->rec.rows = 0;
4233 4234
      ret = 0;
    }
4235

H
Haojun Liao 已提交
4236
    if (!queryHasRemainResForTableQuery(pRuntimeEnv)) {
4237 4238 4239 4240 4241
      return ret;
    }
  }
}

4242
static void queryCostStatis(SQInfo *pQInfo) {
4243
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4244
  SQueryCostInfo *pSummary = &pRuntimeEnv->summary;
H
Haojun Liao 已提交
4245

H
Haojun Liao 已提交
4246 4247 4248
  // add the merge time
  pSummary->elapsedTime += pSummary->firstStageMergeTime;

H
Haojun Liao 已提交
4249 4250 4251
  qDebug("QInfo:%p :cost summary: elapsed time:%"PRId64" us, first merge:%"PRId64" us, total blocks:%d, "
         "load block statis:%d, load data block:%d, total rows:%"PRId64 ", check rows:%"PRId64,
         pQInfo, pSummary->elapsedTime, pSummary->firstStageMergeTime, pSummary->totalBlocks, pSummary->loadBlockStatis,
H
Haojun Liao 已提交
4252
         pSummary->loadBlocks, pSummary->totalRows, pSummary->totalCheckedRows);
4253

H
Haojun Liao 已提交
4254
  qDebug("QInfo:%p :cost summary: internal size:%"PRId64"B, numOfWin:%"PRId64, pQInfo, pSummary->internalSupSize,
4255
      pSummary->numOfTimeWindows);
4256 4257
}

4258 4259
static void updateOffsetVal(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pBlockInfo) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
4260
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4261

4262
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
4263

4264
  if (pQuery->limit.offset == pBlockInfo->rows) {  // current block will ignore completed
H
hjxilinx 已提交
4265
    pTableQueryInfo->lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->window.ekey + step : pBlockInfo->window.skey + step;
4266 4267 4268
    pQuery->limit.offset = 0;
    return;
  }
4269

4270
  if (QUERY_IS_ASC_QUERY(pQuery)) {
S
TD-1057  
Shengliang Guan 已提交
4271
    pQuery->pos = (int32_t)pQuery->limit.offset;
4272
  } else {
S
TD-1057  
Shengliang Guan 已提交
4273
    pQuery->pos = pBlockInfo->rows - (int32_t)pQuery->limit.offset - 1;
4274
  }
4275

4276
  assert(pQuery->pos >= 0 && pQuery->pos <= pBlockInfo->rows - 1);
4277

4278
  SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
4279
  SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);
4280

4281
  // update the pQuery->limit.offset value, and pQuery->pos value
H
Haojun Liao 已提交
4282
  TSKEY *keys = (TSKEY *) pColInfoData->pData;
4283 4284

  // update the offset value
H
hjxilinx 已提交
4285
  pTableQueryInfo->lastKey = keys[pQuery->pos];
4286
  pQuery->limit.offset = 0;
4287

H
hjxilinx 已提交
4288
  int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, pBlockInfo, NULL, binarySearchForKey, pDataBlock);
4289

4290
  qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
4291
         pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows, numOfRes, pQuery->current->lastKey);
4292
}
4293

4294 4295 4296 4297 4298
void skipBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;

  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0) {
    return;
4299
  }
4300

4301 4302 4303
  pQuery->pos = 0;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

H
hjxilinx 已提交
4304
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4305
  TsdbQueryHandleT pQueryHandle = pRuntimeEnv->pQueryHandle;
4306

H
Haojun Liao 已提交
4307
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4308
  while (tsdbNextDataBlock(pQueryHandle)) {
H
Haojun Liao 已提交
4309
    if (IS_QUERY_KILLED(GET_QINFO_ADDR(pRuntimeEnv))) {
H
Haojun Liao 已提交
4310 4311
      finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4312
    }
4313

H
Haojun Liao 已提交
4314
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
4315

4316 4317
    if (pQuery->limit.offset > blockInfo.rows) {
      pQuery->limit.offset -= blockInfo.rows;
H
hjxilinx 已提交
4318 4319
      pTableQueryInfo->lastKey = (QUERY_IS_ASC_QUERY(pQuery)) ? blockInfo.window.ekey : blockInfo.window.skey;
      pTableQueryInfo->lastKey += step;
4320

4321
      qDebug("QInfo:%p skip rows:%d, offset:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv), blockInfo.rows,
4322 4323
             pQuery->limit.offset);
    } else {  // find the appropriated start position in current block
4324 4325 4326
      updateOffsetVal(pRuntimeEnv, &blockInfo);
      break;
    }
4327
  }
H
Haojun Liao 已提交
4328 4329 4330 4331

  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }
4332
}
4333

H
Haojun Liao 已提交
4334
static bool skipTimeInterval(SQueryRuntimeEnv *pRuntimeEnv, TSKEY* start) {
4335
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
4336
  *start = pQuery->current->lastKey;
4337

4338
  // if queried with value filter, do NOT forward query start position
4339
  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->pFillInfo != NULL) {
4340
    return true;
4341
  }
4342

4343
  /*
4344 4345
   * 1. for interval without interpolation query we forward pQuery->interval.interval at a time for
   *    pQuery->limit.offset times. Since hole exists, pQuery->interval.interval*pQuery->limit.offset value is
4346 4347
   *    not valid. otherwise, we only forward pQuery->limit.offset number of points
   */
4348
  assert(pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL);
4349

H
Haojun Liao 已提交
4350
  STimeWindow w = TSWINDOW_INITIALIZER;
4351

4352
  SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
hjxilinx 已提交
4353
  STableQueryInfo *pTableQueryInfo = pQuery->current;
4354

H
Haojun Liao 已提交
4355
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4356
  while (tsdbNextDataBlock(pRuntimeEnv->pQueryHandle)) {
H
Haojun Liao 已提交
4357
    tsdbRetrieveDataBlockInfo(pRuntimeEnv->pQueryHandle, &blockInfo);
4358

H
Haojun Liao 已提交
4359 4360
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
H
Haojun Liao 已提交
4361
        getAlignQueryTimeWindow(pQuery, blockInfo.window.skey, blockInfo.window.skey, pQuery->window.ekey, &w);
H
Haojun Liao 已提交
4362 4363 4364
        pWindowResInfo->startTime = w.skey;
        pWindowResInfo->prevSKey = w.skey;
      }
4365
    } else {
H
Haojun Liao 已提交
4366
      getAlignQueryTimeWindow(pQuery, blockInfo.window.ekey, pQuery->window.ekey, blockInfo.window.ekey, &w);
4367

4368 4369 4370
      pWindowResInfo->startTime = pQuery->window.skey;
      pWindowResInfo->prevSKey = w.skey;
    }
4371

4372 4373
    // the first time window
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, pWindowResInfo->prevSKey, pQuery);
4374

4375 4376 4377 4378 4379 4380
    while (pQuery->limit.offset > 0) {
      if ((win.ekey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (win.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        pQuery->limit.offset -= 1;
        pWindowResInfo->prevSKey = win.skey;
      }
4381

4382
      STimeWindow tw = win;
H
Haojun Liao 已提交
4383
      GET_NEXT_TIMEWINDOW(pQuery, &tw);
4384

4385
      if (pQuery->limit.offset == 0) {
4386 4387
        if ((tw.skey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
            (tw.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
H
Haojun Liao 已提交
4388 4389
          // load the data block and check data remaining in current data block
          // TODO optimize performance
4390 4391 4392
          SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
          SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

4393 4394
          tw = win;
          int32_t startPos =
H
Haojun Liao 已提交
4395
              getNextQualifiedWindow(pRuntimeEnv, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey, -1);
4396 4397 4398 4399
          assert(startPos >= 0);

          // set the abort info
          pQuery->pos = startPos;
4400

H
Haojun Liao 已提交
4401 4402 4403 4404
          // reset the query start timestamp
          pTableQueryInfo->win.skey = ((TSKEY *)pColInfoData->pData)[startPos];
          pQuery->window.skey = pTableQueryInfo->win.skey;
          *start = pTableQueryInfo->win.skey;
4405

4406
          pWindowResInfo->prevSKey = tw.skey;
H
Haojun Liao 已提交
4407
          int32_t index = pRuntimeEnv->windowResInfo.curIndex;
4408

H
hjxilinx 已提交
4409
          int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, NULL, binarySearchForKey, pDataBlock);
H
Haojun Liao 已提交
4410
          pRuntimeEnv->windowResInfo.curIndex = index;  // restore the window index
4411

4412
          qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64,
4413
                 GET_QINFO_ADDR(pRuntimeEnv), blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes, pQuery->current->lastKey);
4414

4415
          return true;
H
Haojun Liao 已提交
4416 4417 4418 4419
        } else { // do nothing
          *start = tw.skey;
          pQuery->window.skey = tw.skey;
          pWindowResInfo->prevSKey = tw.skey;
4420
          return true;
4421 4422 4423
        }
      }

H
Haojun Liao 已提交
4424 4425 4426 4427 4428 4429 4430
      /*
       * If the next time window still starts from current data block,
       * load the primary timestamp column first, and then find the start position for the next queried time window.
       * Note that only the primary timestamp column is required.
       * TODO: Optimize for this cases. All data blocks are not needed to be loaded, only if the first actually required
       * time window resides in current data block.
       */
4431 4432 4433 4434 4435 4436 4437
      if ((tw.skey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (tw.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
        SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

        tw = win;
        int32_t startPos =
H
Haojun Liao 已提交
4438
            getNextQualifiedWindow(pRuntimeEnv, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey, -1);
4439 4440 4441 4442
        assert(startPos >= 0);

        // set the abort info
        pQuery->pos = startPos;
H
hjxilinx 已提交
4443
        pTableQueryInfo->lastKey = ((TSKEY *)pColInfoData->pData)[startPos];
4444 4445
        pWindowResInfo->prevSKey = tw.skey;
        win = tw;
4446
      } else {
H
Haojun Liao 已提交
4447
        break;  // offset is not 0, and next time window begins or ends in the next block.
4448 4449 4450
      }
    }
  }
4451

H
Haojun Liao 已提交
4452 4453 4454 4455 4456
  // check for error
  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }

4457 4458 4459
  return true;
}

H
Haojun Liao 已提交
4460 4461
static void doDestroyTableQueryInfo(STableGroupInfo* pTableqinfoGroupInfo);

B
Bomin Zhang 已提交
4462
static int32_t setupQueryHandle(void* tsdb, SQInfo* pQInfo, bool isSTableQuery) {
B
Bomin Zhang 已提交
4463
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4464 4465
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

B
Bomin Zhang 已提交
4466
  if (onlyQueryTags(pQuery)) {
B
Bomin Zhang 已提交
4467
    return TSDB_CODE_SUCCESS;
B
Bomin Zhang 已提交
4468 4469
  }

H
Haojun Liao 已提交
4470
  if (isSTableQuery && (!QUERY_IS_INTERVAL_QUERY(pQuery)) && (!isFixedOutputQuery(pRuntimeEnv))) {
B
Bomin Zhang 已提交
4471
    return TSDB_CODE_SUCCESS;
B
Bomin Zhang 已提交
4472
  }
4473 4474

  STsdbQueryCond cond = {
B
Bomin Zhang 已提交
4475 4476 4477
    .order   = pQuery->order.order,
    .colList = pQuery->colList,
    .numOfCols = pQuery->numOfCols,
4478
  };
weixin_48148422's avatar
weixin_48148422 已提交
4479

S
TD-1057  
Shengliang Guan 已提交
4480 4481
  TIME_WINDOW_COPY(cond.twindow, pQuery->window);

B
Bomin Zhang 已提交
4482
  if (!isSTableQuery
4483
    && (pQInfo->tableqinfoGroupInfo.numOfTables == 1)
4484
    && (cond.order == TSDB_ORDER_ASC)
H
Haojun Liao 已提交
4485
    && (!QUERY_IS_INTERVAL_QUERY(pQuery))
B
Bomin Zhang 已提交
4486
    && (!isGroupbyNormalCol(pQuery->pGroupbyExpr))
H
Haojun Liao 已提交
4487
    && (!isFixedOutputQuery(pRuntimeEnv))
B
Bomin Zhang 已提交
4488
  ) {
H
Haojun Liao 已提交
4489
    SArray* pa = GET_TABLEGROUP(pQInfo, 0);
4490 4491
    STableQueryInfo* pCheckInfo = taosArrayGetP(pa, 0);
    cond.twindow = pCheckInfo->win;
4492
  }
B
Bomin Zhang 已提交
4493

B
Bomin Zhang 已提交
4494
  terrno = TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
4495
  if (isFirstLastRowQuery(pQuery)) {
4496
    pRuntimeEnv->pQueryHandle = tsdbQueryLastRow(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
H
Haojun Liao 已提交
4497

H
Haojun Liao 已提交
4498 4499 4500
    // update the query time window
    pQuery->window = cond.twindow;

H
Haojun Liao 已提交
4501
    if (pQInfo->tableGroupInfo.numOfTables == 0) {
H
Haojun Liao 已提交
4502
      pQInfo->tableqinfoGroupInfo.numOfTables = 0;
H
Haojun Liao 已提交
4503 4504 4505 4506
    } else {
      size_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
      for(int32_t i = 0; i < numOfGroups; ++i) {
        SArray *group = GET_TABLEGROUP(pQInfo, i);
H
Haojun Liao 已提交
4507

H
Haojun Liao 已提交
4508 4509 4510
        size_t t = taosArrayGetSize(group);
        for (int32_t j = 0; j < t; ++j) {
          STableQueryInfo *pCheckInfo = taosArrayGetP(group, j);
H
Haojun Liao 已提交
4511

H
Haojun Liao 已提交
4512 4513 4514
          pCheckInfo->win = pQuery->window;
          pCheckInfo->lastKey = pCheckInfo->win.skey;
        }
H
Haojun Liao 已提交
4515 4516
      }
    }
4517
  } else if (isPointInterpoQuery(pQuery)) {
4518
    pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
H
Haojun Liao 已提交
4519
  } else {
4520
    pRuntimeEnv->pQueryHandle = tsdbQueryTables(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
H
Haojun Liao 已提交
4521
  }
4522

B
Bomin Zhang 已提交
4523
  return terrno;
B
Bomin Zhang 已提交
4524 4525
}

4526 4527 4528
static SFillColInfo* taosCreateFillColInfo(SQuery* pQuery) {
  int32_t numOfCols = pQuery->numOfOutput;
  int32_t offset = 0;
4529

4530
  SFillColInfo* pFillCol = calloc(numOfCols, sizeof(SFillColInfo));
H
Haojun Liao 已提交
4531 4532 4533 4534
  if (pFillCol == NULL) {
    return NULL;
  }

4535 4536
  for(int32_t i = 0; i < numOfCols; ++i) {
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];
4537

4538
    pFillCol[i].col.bytes  = pExprInfo->bytes;
S
TD-1057  
Shengliang Guan 已提交
4539
    pFillCol[i].col.type   = (int8_t)pExprInfo->type;
4540 4541 4542
    pFillCol[i].col.offset = offset;
    pFillCol[i].flag       = TSDB_COL_NORMAL;    // always be ta normal column for table query
    pFillCol[i].functionId = pExprInfo->base.functionId;
4543
    pFillCol[i].fillVal.i = pQuery->fillVal[i];
4544

4545 4546
    offset += pExprInfo->bytes;
  }
4547

4548 4549 4550
  return pFillCol;
}

4551
int32_t doInitQInfo(SQInfo *pQInfo, STSBuf *pTsBuf, void *tsdb, int32_t vgId, bool isSTableQuery) {
4552 4553
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;

4554
  int32_t code = TSDB_CODE_SUCCESS;
4555
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
4556 4557 4558

  pRuntimeEnv->topBotQuery = isTopBottomQuery(pQuery);
  pRuntimeEnv->hasTagResults = hasTagValOutput(pQuery);
4559 4560

  setScanLimitationByResultBuffer(pQuery);
H
Haojun Liao 已提交
4561

B
Bomin Zhang 已提交
4562 4563 4564 4565
  code = setupQueryHandle(tsdb, pQInfo, isSTableQuery);
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }
4566

4567
  pQInfo->tsdb = tsdb;
4568
  pQInfo->vgId = vgId;
4569 4570

  pRuntimeEnv->pQuery = pQuery;
H
Haojun Liao 已提交
4571
  pRuntimeEnv->pTSBuf = pTsBuf;
4572
  pRuntimeEnv->cur.vgroupIndex = -1;
4573
  pRuntimeEnv->stableQuery = isSTableQuery;
H
Haojun Liao 已提交
4574
  pRuntimeEnv->prevGroupId = INT32_MIN;
H
Haojun Liao 已提交
4575
  pRuntimeEnv->groupbyNormalCol = isGroupbyNormalCol(pQuery->pGroupbyExpr);
4576

H
Haojun Liao 已提交
4577
  if (pTsBuf != NULL) {
4578 4579 4580 4581 4582 4583 4584 4585 4586 4587
    int16_t order = (pQuery->order.order == pRuntimeEnv->pTSBuf->tsOrder) ? TSDB_ORDER_ASC : TSDB_ORDER_DESC;
    tsBufSetTraverseOrder(pRuntimeEnv->pTSBuf, order);
  }

  // create runtime environment
  code = setupQueryRuntimeEnv(pRuntimeEnv, pQuery->order.order);
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }

4588 4589 4590
  int32_t ps = DEFAULT_PAGE_SIZE;
  int32_t rowsize = 0;
  getIntermediateBufInfo(pRuntimeEnv, &ps, &rowsize);
H
Haojun Liao 已提交
4591
  int32_t TWOMB = 1024*1024*2;
4592

H
Haojun Liao 已提交
4593
  if (isSTableQuery && !onlyQueryTags(pRuntimeEnv->pQuery)) {
H
Haojun Liao 已提交
4594
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rowsize, ps, TWOMB, pQInfo);
4595 4596 4597 4598
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

H
Haojun Liao 已提交
4599
    if (!QUERY_IS_INTERVAL_QUERY(pQuery)) {
4600
      int16_t type = TSDB_DATA_TYPE_NULL;
4601
      int32_t threshold = 0;
4602

H
Haojun Liao 已提交
4603
      if (pRuntimeEnv->groupbyNormalCol) {  // group by columns not tags;
4604
        type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
4605
        threshold = 4000;
4606 4607
      } else {
        type = TSDB_DATA_TYPE_INT;  // group id
S
TD-1057  
Shengliang Guan 已提交
4608
        threshold = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
4609 4610 4611
        if (threshold < 8) {
          threshold = 8;
        }
4612 4613
      }

4614
      code = initWindowResInfo(&pRuntimeEnv->windowResInfo, pRuntimeEnv, 8, threshold, type);
B
Bomin Zhang 已提交
4615 4616 4617
      if (code != TSDB_CODE_SUCCESS) {
        return code;
      }
4618
    }
H
Haojun Liao 已提交
4619
  } else if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
4620 4621
    int32_t numOfResultRows = getInitialPageNum(pQInfo);
    getIntermediateBufInfo(pRuntimeEnv, &ps, &rowsize);
H
Haojun Liao 已提交
4622
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rowsize, ps, TWOMB, pQInfo);
4623 4624 4625 4626 4627
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

    int16_t type = TSDB_DATA_TYPE_NULL;
H
Haojun Liao 已提交
4628
    if (pRuntimeEnv->groupbyNormalCol) {
4629 4630 4631 4632 4633
      type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
    } else {
      type = TSDB_DATA_TYPE_TIMESTAMP;
    }

4634
    code = initWindowResInfo(&pRuntimeEnv->windowResInfo, pRuntimeEnv, numOfResultRows, 4096, type);
B
Bomin Zhang 已提交
4635 4636 4637
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }
4638 4639
  }

4640
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
4641
    SFillColInfo* pColInfo = taosCreateFillColInfo(pQuery);
H
Haojun Liao 已提交
4642 4643 4644 4645 4646 4647
    STimeWindow w = TSWINDOW_INITIALIZER;

    TSKEY sk = MIN(pQuery->window.skey, pQuery->window.ekey);
    TSKEY ek = MAX(pQuery->window.skey, pQuery->window.ekey);
    getAlignQueryTimeWindow(pQuery, pQuery->window.skey, sk, ek, &w);

S
TD-1057  
Shengliang Guan 已提交
4648
    pRuntimeEnv->pFillInfo = taosInitFillInfo(pQuery->order.order, w.skey, 0, (int32_t)pQuery->rec.capacity, pQuery->numOfOutput,
4649 4650
                                              pQuery->interval.sliding, pQuery->interval.slidingUnit, (int8_t)pQuery->precision,
                                              pQuery->fillType, pColInfo);
4651
  }
4652

H
Haojun Liao 已提交
4653
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
4654
  return TSDB_CODE_SUCCESS;
4655 4656
}

4657
static void enableExecutionForNextTable(SQueryRuntimeEnv *pRuntimeEnv) {
4658
  SQuery *pQuery = pRuntimeEnv->pQuery;
4659

4660
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
4661 4662 4663 4664 4665 4666 4667
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[i]);
    if (pResInfo != NULL) {
      pResInfo->complete = false;
    }
  }
}

H
Haojun Liao 已提交
4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684
static FORCE_INLINE void setEnvForEachBlock(SQInfo* pQInfo, STableQueryInfo* pTableQueryInfo, SDataBlockInfo* pBlockInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

  if (!QUERY_IS_INTERVAL_QUERY(pQuery)) {
    setExecutionContext(pQInfo, pTableQueryInfo->groupIndex, pBlockInfo->window.ekey + step);
  } else {  // interval query
    TSKEY nextKey = pBlockInfo->window.skey;
    setIntervalQueryRange(pQInfo, nextKey);

    if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
      setAdditionalInfo(pQInfo, pTableQueryInfo->pTable, pTableQueryInfo);
    }
  }
}

H
Haojun Liao 已提交
4685
static int64_t scanMultiTableDataBlocks(SQInfo *pQInfo) {
4686
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
Haojun Liao 已提交
4687 4688
  SQuery*           pQuery = pRuntimeEnv->pQuery;
  SQueryCostInfo*   summary  = &pRuntimeEnv->summary;
H
Haojun Liao 已提交
4689

H
hjxilinx 已提交
4690
  int64_t st = taosGetTimestampMs();
4691

4692
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
H
Haojun Liao 已提交
4693
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4694

H
Haojun Liao 已提交
4695 4696
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

H
Haojun Liao 已提交
4697
  while (tsdbNextDataBlock(pQueryHandle)) {
4698
    summary->totalBlocks += 1;
H
Haojun Liao 已提交
4699

H
Haojun Liao 已提交
4700
    if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
4701
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4702
    }
4703

H
Haojun Liao 已提交
4704
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
H
Haojun Liao 已提交
4705 4706 4707 4708
    STableQueryInfo **pTableQueryInfo = (STableQueryInfo**) taosHashGet(pQInfo->tableqinfoGroupInfo.map, &blockInfo.tid, sizeof(blockInfo.tid));
    if(pTableQueryInfo == NULL) {
      break;
    }
4709

H
Haojun Liao 已提交
4710
    pQuery->current = *pTableQueryInfo;
H
Haojun Liao 已提交
4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      assert(
          ((*pTableQueryInfo)->win.skey <= (*pTableQueryInfo)->win.ekey) &&
          ((*pTableQueryInfo)->lastKey >= (*pTableQueryInfo)->win.skey) &&
          ((*pTableQueryInfo)->win.skey >= pQuery->window.skey && (*pTableQueryInfo)->win.ekey <= pQuery->window.ekey));
    } else {
      assert(
          ((*pTableQueryInfo)->win.skey >= (*pTableQueryInfo)->win.ekey) &&
          ((*pTableQueryInfo)->lastKey <= (*pTableQueryInfo)->win.skey) &&
          ((*pTableQueryInfo)->win.skey <= pQuery->window.skey && (*pTableQueryInfo)->win.ekey >= pQuery->window.ekey));
    }
4722

H
Haojun Liao 已提交
4723
    if (!pRuntimeEnv->groupbyNormalCol) {
H
Haojun Liao 已提交
4724
      setEnvForEachBlock(pQInfo, *pTableQueryInfo, &blockInfo);
4725
    }
4726

4727
    uint32_t     status = 0;
H
Haojun Liao 已提交
4728 4729
    SDataStatis *pStatis = NULL;
    SArray      *pDataBlock = NULL;
4730

H
Haojun Liao 已提交
4731
    int32_t ret = loadDataBlockOnDemand(pRuntimeEnv, &pQuery->current->windowResInfo, pQueryHandle, &blockInfo, &pStatis, &pDataBlock, &status);
4732 4733 4734 4735 4736
    if (ret != TSDB_CODE_SUCCESS) {
      break;
    }

    if (status == BLK_DATA_DISCARD) {
H
Haojun Liao 已提交
4737
      pQuery->current->lastKey = QUERY_IS_ASC_QUERY(pQuery)? blockInfo.window.ekey + step : blockInfo.window.skey + step;
H
Haojun Liao 已提交
4738 4739 4740
      continue;
    }

4741 4742
    summary->totalRows += blockInfo.rows;
    stableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, pDataBlock, binarySearchForKey);
H
Haojun Liao 已提交
4743

H
Haojun Liao 已提交
4744 4745 4746 4747
    qDebug("QInfo:%p check data block completed, uid:%"PRId64", tid:%d, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, "
           "lastKey:%" PRId64,
           pQInfo, blockInfo.uid, blockInfo.tid, blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows,
           pQuery->current->lastKey);
4748
  }
4749

H
Haojun Liao 已提交
4750 4751 4752 4753
  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }

H
Haojun Liao 已提交
4754 4755
  updateWindowResNumOfRes(pRuntimeEnv);

H
hjxilinx 已提交
4756 4757
  int64_t et = taosGetTimestampMs();
  return et - st;
4758 4759
}

4760 4761
static bool multiTableMultioutputHelper(SQInfo *pQInfo, int32_t index) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4762
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4763

4764
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
H
Haojun Liao 已提交
4765
  SArray *group = GET_TABLEGROUP(pQInfo, 0);
4766
  STableQueryInfo* pCheckInfo = taosArrayGetP(group, index);
4767

H
Haojun Liao 已提交
4768 4769 4770
  if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
    setTagVal(pRuntimeEnv, pCheckInfo->pTable, pQInfo->tsdb);
  }
4771

H
Haojun Liao 已提交
4772
  STableId* id = TSDB_TABLEID(pCheckInfo->pTable);
4773
  qDebug("QInfo:%p query on (%d): uid:%" PRIu64 ", tid:%d, qrange:%" PRId64 "-%" PRId64, pQInfo, index,
H
Haojun Liao 已提交
4774
         id->uid, id->tid, pCheckInfo->lastKey, pCheckInfo->win.ekey);
4775

4776
  STsdbQueryCond cond = {
4777
      .twindow   = {pCheckInfo->lastKey, pCheckInfo->win.ekey},
H
hjxilinx 已提交
4778 4779
      .order     = pQuery->order.order,
      .colList   = pQuery->colList,
4780
      .numOfCols = pQuery->numOfCols,
4781
  };
4782

H
hjxilinx 已提交
4783
  // todo refactor
4784
  SArray *g1 = taosArrayInit(1, POINTER_BYTES);
H
Haojun Liao 已提交
4785 4786 4787 4788
  SArray *tx = taosArrayInit(1, sizeof(STableKeyInfo));

  STableKeyInfo info = {.pTable = pCheckInfo->pTable, .lastKey = pCheckInfo->lastKey};
  taosArrayPush(tx, &info);
4789

4790
  taosArrayPush(g1, &tx);
4791
  STableGroupInfo gp = {.numOfTables = 1, .pGroupList = g1};
4792

4793
  // include only current table
4794 4795 4796 4797
  if (pRuntimeEnv->pQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
    pRuntimeEnv->pQueryHandle = NULL;
  }
4798

H
Haojun Liao 已提交
4799
  pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo);
4800 4801
  taosArrayDestroy(tx);
  taosArrayDestroy(g1);
B
Bomin Zhang 已提交
4802 4803 4804
  if (pRuntimeEnv->pQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
4805

4806
  if (pRuntimeEnv->pTSBuf != NULL) {
H
Haojun Liao 已提交
4807 4808
      tVariant* pTag = &pRuntimeEnv->pCtx[0].tag;

4809
    if (pRuntimeEnv->cur.vgroupIndex == -1) {
H
Haojun Liao 已提交
4810
      STSElem elem = tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, pQInfo->vgId, pTag);
H
Haojun Liao 已提交
4811
      // failed to find data with the specified tag value and vnodeId
4812
      if (elem.vnode < 0) {
H
Haojun Liao 已提交
4813 4814 4815 4816 4817 4818
        if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
          qError("QInfo:%p failed to find tag:%s in ts_comp", pQInfo, pTag->pz);
        } else {
          qError("QInfo:%p failed to find tag:%"PRId64" in ts_comp", pQInfo, pTag->i64Key);
        }

4819
        return false;
H
Haojun Liao 已提交
4820 4821
      } else {
        STSCursor cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);
H
Haojun Liao 已提交
4822 4823 4824 4825 4826 4827 4828 4829

        if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
          qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz,
                 cur.blockIndex, cur.tsIndex);
        } else {
          qDebug("QInfo:%p find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key,
                 cur.blockIndex, cur.tsIndex);
        }
4830 4831
      }
    } else {
H
Haojun Liao 已提交
4832
      STSElem elem = tsBufGetElem(pRuntimeEnv->pTSBuf);
H
Haojun Liao 已提交
4833
      if (tVariantCompare(elem.tag, &pRuntimeEnv->pCtx[0].tag) != 0) {
H
Haojun Liao 已提交
4834

H
Haojun Liao 已提交
4835
        STSElem elem1 = tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, pQInfo->vgId, pTag);
H
Haojun Liao 已提交
4836
        // failed to find data with the specified tag value and vnodeId
H
Haojun Liao 已提交
4837
        if (elem1.vnode < 0) {
H
Haojun Liao 已提交
4838 4839 4840 4841 4842
          if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
            qError("QInfo:%p failed to find tag:%s in ts_comp", pQInfo, pTag->pz);
          } else {
            qError("QInfo:%p failed to find tag:%"PRId64" in ts_comp", pQInfo, pTag->i64Key);
          }
H
Haojun Liao 已提交
4843

H
Haojun Liao 已提交
4844
          return false;
H
Haojun Liao 已提交
4845 4846
        } else {
          STSCursor cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);
H
Haojun Liao 已提交
4847 4848 4849 4850 4851
          if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
            qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, cur.blockIndex, cur.tsIndex);
          } else {
            qDebug("QInfo:%p find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key, cur.blockIndex, cur.tsIndex);
          }
H
Haojun Liao 已提交
4852
        }
H
Haojun Liao 已提交
4853

H
Haojun Liao 已提交
4854 4855
      } else {
        tsBufSetCursor(pRuntimeEnv->pTSBuf, &pRuntimeEnv->cur);
H
Haojun Liao 已提交
4856
        STSCursor cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);
H
Haojun Liao 已提交
4857 4858 4859 4860 4861
        if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
          qDebug("QInfo:%p continue scan ts_comp file, tag:%s blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, cur.blockIndex, cur.tsIndex);
        } else {
          qDebug("QInfo:%p continue scan ts_comp file, tag:%"PRId64" blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key, cur.blockIndex, cur.tsIndex);
        }
H
Haojun Liao 已提交
4862
      }
4863 4864
    }
  }
4865

4866
  initCtxOutputBuf(pRuntimeEnv);
4867 4868 4869 4870 4871 4872 4873 4874 4875 4876
  return true;
}

/**
 * super table query handler
 * 1. super table projection query, group-by on normal columns query, ts-comp query
 * 2. point interpolation query, last row query
 *
 * @param pQInfo
 */
4877
static void sequentialTableProcess(SQInfo *pQInfo) {
4878
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4879
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4880
  setQueryStatus(pQuery, QUERY_COMPLETED);
4881

H
Haojun Liao 已提交
4882
  size_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
4883

H
Haojun Liao 已提交
4884
  if (isPointInterpoQuery(pQuery) || isFirstLastRowQuery(pQuery)) {
4885 4886
    resetCtxOutputBuf(pRuntimeEnv);
    assert(pQuery->limit.offset == 0 && pQuery->limit.limit != 0);
4887

4888
    while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
4889
      SArray* group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex);
4890

S
TD-1057  
Shengliang Guan 已提交
4891
      qDebug("QInfo:%p last_row query on group:%d, total group:%" PRIzu ", current group:%p", pQInfo, pQInfo->groupIndex,
dengyihao's avatar
dengyihao 已提交
4892
             numOfGroups, group);
H
Haojun Liao 已提交
4893 4894 4895 4896 4897 4898 4899

      STsdbQueryCond cond = {
          .colList = pQuery->colList,
          .order   = pQuery->order.order,
          .numOfCols = pQuery->numOfCols,
      };

S
TD-1057  
Shengliang Guan 已提交
4900 4901
      TIME_WINDOW_COPY(cond.twindow, pQuery->window);

H
Haojun Liao 已提交
4902 4903 4904
      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);
4905

H
Haojun Liao 已提交
4906 4907 4908 4909 4910 4911 4912
      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }
4913

4914
      if (isFirstLastRowQuery(pQuery)) {
H
Haojun Liao 已提交
4915
        assert(0);  // last_row query switch to other routine to handle
H
Haojun Liao 已提交
4916
      } else {
H
Haojun Liao 已提交
4917
        pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(pQInfo->tsdb, &cond, &gp, pQInfo);
4918
      }
B
Bomin Zhang 已提交
4919 4920 4921 4922 4923 4924

      taosArrayDestroy(tx);
      taosArrayDestroy(g1);
      if (pRuntimeEnv->pQueryHandle == NULL) {
        longjmp(pRuntimeEnv->env, terrno);
      }
H
Haojun Liao 已提交
4925

H
Haojun Liao 已提交
4926
      initCtxOutputBuf(pRuntimeEnv);
4927

4928
      SArray* s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle);
4929
      assert(taosArrayGetSize(s) >= 1);
4930

4931
      setTagVal(pRuntimeEnv, taosArrayGetP(s, 0), pQInfo->tsdb);
4932 4933 4934
      if (isFirstLastRowQuery(pQuery)) {
        assert(taosArrayGetSize(s) == 1);
      }
H
Haojun Liao 已提交
4935

dengyihao's avatar
dengyihao 已提交
4936
      taosArrayDestroy(s);
H
Haojun Liao 已提交
4937

H
Haojun Liao 已提交
4938
      // here we simply set the first table as current table
4939 4940 4941
      SArray* first = GET_TABLEGROUP(pQInfo, pQInfo->groupIndex);
      pQuery->current = taosArrayGetP(first, 0);

4942
      scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
4943

H
Haojun Liao 已提交
4944 4945 4946 4947 4948
      int64_t numOfRes = getNumOfResult(pRuntimeEnv);
      if (numOfRes > 0) {
        pQuery->rec.rows += numOfRes;
        forwardCtxOutputBuf(pRuntimeEnv, numOfRes);
      }
4949

H
Haojun Liao 已提交
4950 4951 4952 4953 4954
      skipResults(pRuntimeEnv);
      pQInfo->groupIndex += 1;

      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
4955 4956 4957 4958 4959 4960

      if (pQuery->rec.rows >= pQuery->rec.capacity) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
        break;
      }
    }
H
Haojun Liao 已提交
4961
  } else if (pRuntimeEnv->groupbyNormalCol) { // group-by on normal columns query
4962
    while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
4963
      SArray* group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex);
4964

S
TD-1057  
Shengliang Guan 已提交
4965
      qDebug("QInfo:%p group by normal columns group:%d, total group:%" PRIzu "", pQInfo, pQInfo->groupIndex, numOfGroups);
4966 4967 4968 4969 4970 4971 4972

      STsdbQueryCond cond = {
          .colList = pQuery->colList,
          .order   = pQuery->order.order,
          .numOfCols = pQuery->numOfCols,
      };

S
TD-1057  
Shengliang Guan 已提交
4973 4974
      TIME_WINDOW_COPY(cond.twindow, pQuery->window);

4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986
      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);

      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }

H
Haojun Liao 已提交
4987
      // no need to update the lastkey for each table
4988
      pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo);
H
Haojun Liao 已提交
4989

B
Bomin Zhang 已提交
4990 4991
      taosArrayDestroy(g1);
      taosArrayDestroy(tx);
B
Bomin Zhang 已提交
4992 4993 4994
      if (pRuntimeEnv->pQueryHandle == NULL) {
        longjmp(pRuntimeEnv->env, terrno);
      }
4995

4996
      SArray* s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle);
4997 4998
      assert(taosArrayGetSize(s) >= 1);

4999
      setTagVal(pRuntimeEnv, taosArrayGetP(s, 0), pQInfo->tsdb);
5000 5001 5002 5003 5004 5005 5006 5007

      // here we simply set the first table as current table
      scanMultiTableDataBlocks(pQInfo);
      pQInfo->groupIndex += 1;

      SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;

        // no results generated for current group, continue to try the next group
5008
      taosArrayDestroy(s);
5009 5010 5011 5012 5013
      if (pWindowResInfo->size <= 0) {
        continue;
      }

      for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
5014
        pWindowResInfo->pResult[i].closed = true; // enable return all results for group by normal columns
5015 5016 5017

        SWindowResult *pResult = &pWindowResInfo->pResult[i];
        for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
5018
          pResult->numOfRows = (uint16_t)(MAX(pResult->numOfRows, pResult->resultInfo[j].numOfRes));
5019 5020 5021
        }
      }

5022
      qDebug("QInfo:%p generated groupby columns results %d rows for group %d completed", pQInfo, pWindowResInfo->size,
5023 5024 5025 5026 5027 5028 5029
          pQInfo->groupIndex);
      int32_t currentGroupIndex = pQInfo->groupIndex;

      pQuery->rec.rows = 0;
      pQInfo->groupIndex = 0;

      ensureOutputBufferSimple(pRuntimeEnv, pWindowResInfo->size);
5030
      copyFromWindowResToSData(pQInfo, pWindowResInfo);
5031 5032 5033 5034 5035 5036

      pQInfo->groupIndex = currentGroupIndex;  //restore the group index
      assert(pQuery->rec.rows == pWindowResInfo->size);

      clearClosedTimeWindow(pRuntimeEnv);
      break;
5037 5038 5039
    }
  } else {
    /*
5040
     * 1. super table projection query, 2. ts-comp query
5041 5042 5043
     * if the subgroup index is larger than 0, results generated by group by tbname,k is existed.
     * we need to return it to client in the first place.
     */
5044
    if (pQInfo->groupIndex > 0) {
5045
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5046
      pQuery->rec.total += pQuery->rec.rows;
5047

5048
      if (pQuery->rec.rows > 0) {
5049 5050 5051
        return;
      }
    }
5052

5053
    // all data have returned already
5054
    if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
5055 5056
      return;
    }
5057

5058 5059
    resetCtxOutputBuf(pRuntimeEnv);
    resetTimeWindowInfo(pRuntimeEnv, &pRuntimeEnv->windowResInfo);
5060

H
Haojun Liao 已提交
5061
    SArray *group = GET_TABLEGROUP(pQInfo, 0);
5062 5063
    assert(taosArrayGetSize(group) == pQInfo->tableqinfoGroupInfo.numOfTables &&
           1 == taosArrayGetSize(pQInfo->tableqinfoGroupInfo.pGroupList));
5064

5065
    while (pQInfo->tableIndex < pQInfo->tableqinfoGroupInfo.numOfTables) {
H
Haojun Liao 已提交
5066
      if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
5067
        longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
5068
      }
5069

5070
      pQuery->current = taosArrayGetP(group, pQInfo->tableIndex);
5071
      if (!multiTableMultioutputHelper(pQInfo, pQInfo->tableIndex)) {
5072
        pQInfo->tableIndex++;
5073 5074
        continue;
      }
5075

H
hjxilinx 已提交
5076
      // TODO handle the limit offset problem
5077
      if (pQuery->numOfFilterCols == 0 && pQuery->limit.offset > 0) {
5078 5079
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
          pQInfo->tableIndex++;
5080 5081 5082
          continue;
        }
      }
5083

5084
      scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
5085
      skipResults(pRuntimeEnv);
5086

5087
      // the limitation of output result is reached, set the query completed
5088
      if (limitResults(pRuntimeEnv)) {
H
Haojun Liao 已提交
5089
        SET_STABLE_QUERY_OVER(pQInfo);
5090 5091
        break;
      }
5092

5093 5094
      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
5095

5096
      if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
5097 5098 5099 5100 5101 5102
        /*
         * query range is identical in terms of all meters involved in query,
         * so we need to restore them at the *beginning* of query on each meter,
         * not the consecutive query on meter on which is aborted due to buffer limitation
         * to ensure that, we can reset the query range once query on a meter is completed.
         */
5103
        pQInfo->tableIndex++;
weixin_48148422's avatar
weixin_48148422 已提交
5104

H
Haojun Liao 已提交
5105
        STableIdInfo tidInfo = {0};
5106

H
Haojun Liao 已提交
5107 5108 5109
        STableId* id = TSDB_TABLEID(pQuery->current->pTable);
        tidInfo.uid = id->uid;
        tidInfo.tid = id->tid;
weixin_48148422's avatar
weixin_48148422 已提交
5110
        tidInfo.key = pQuery->current->lastKey;
weixin_48148422's avatar
weixin_48148422 已提交
5111 5112
        taosArrayPush(pQInfo->arrTableIdInfo, &tidInfo);

5113
        // if the buffer is full or group by each table, we need to jump out of the loop
H
Haojun Liao 已提交
5114
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
5115 5116
          break;
        }
5117

H
Haojun Liao 已提交
5118 5119 5120 5121
        if (pRuntimeEnv->pTSBuf != NULL) {
          pRuntimeEnv->cur = pRuntimeEnv->pTSBuf->cur;
        }

5122
      } else {
5123
        // all data in the result buffer are skipped due to the offset, continue to retrieve data from current meter
5124 5125
        if (pQuery->rec.rows == 0) {
          assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
5126 5127
          continue;
        } else {
5128 5129 5130
          // buffer is full, wait for the next round to retrieve data from current meter
          assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
          break;
5131 5132 5133
        }
      }
    }
H
Haojun Liao 已提交
5134

5135
    if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
H
Haojun Liao 已提交
5136 5137
      setQueryStatus(pQuery, QUERY_COMPLETED);
    }
5138
  }
5139

5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151
  /*
   * 1. super table projection query, group-by on normal columns query, ts-comp query
   * 2. point interpolation query, last row query
   *
   * group-by on normal columns query and last_row query do NOT invoke the finalizer here,
   * since the finalize stage will be done at the client side.
   *
   * projection query, point interpolation query do not need the finalizer.
   *
   * Only the ts-comp query requires the finalizer function to be executed here.
   */
  if (isTSCompQuery(pQuery)) {
H
hjxilinx 已提交
5152
    finalizeQueryResult(pRuntimeEnv);
5153
  }
5154

5155 5156 5157
  if (pRuntimeEnv->pTSBuf != NULL) {
    pRuntimeEnv->cur = pRuntimeEnv->pTSBuf->cur;
  }
5158

5159
  qDebug(
S
TD-1530  
Shengliang Guan 已提交
5160 5161
      "QInfo %p numOfTables:%" PRIu64 ", index:%d, numOfGroups:%" PRIzu ", %" PRId64 " points returned, total:%" PRId64 ", offset:%" PRId64,
      pQInfo, (uint64_t)pQInfo->tableqinfoGroupInfo.numOfTables, pQInfo->tableIndex, numOfGroups, pQuery->rec.rows, pQuery->rec.total,
5162
      pQuery->limit.offset);
5163 5164
}

5165 5166 5167 5168
static void doSaveContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

5169 5170 5171
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
  SWITCH_ORDER(pQuery->order.order);
5172

5173
  if (pRuntimeEnv->pTSBuf != NULL) {
5174
    pRuntimeEnv->pTSBuf->cur.order = pQuery->order.order;
5175
  }
5176

5177 5178 5179 5180 5181
  STsdbQueryCond cond = {
      .order   = pQuery->order.order,
      .colList = pQuery->colList,
      .numOfCols = pQuery->numOfCols,
  };
H
Haojun Liao 已提交
5182

S
TD-1057  
Shengliang Guan 已提交
5183 5184
  TIME_WINDOW_COPY(cond.twindow, pQuery->window);

5185 5186 5187 5188
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
H
Haojun Liao 已提交
5189

H
Haojun Liao 已提交
5190 5191 5192 5193 5194
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
  disableFuncInReverseScan(pQInfo);
  setupQueryRangeForReverseScan(pQInfo);

H
Haojun Liao 已提交
5195
  pRuntimeEnv->prevGroupId = INT32_MIN;
5196
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
B
Bomin Zhang 已提交
5197 5198 5199
  if (pRuntimeEnv->pSecQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
H
hjxilinx 已提交
5200 5201
}

5202 5203 5204 5205
static void doRestoreContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

H
hjxilinx 已提交
5206
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
5207

5208
  if (pRuntimeEnv->pTSBuf != NULL) {
5209
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
5210
  }
5211

5212
  switchCtxOrder(pRuntimeEnv);
5213 5214 5215
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
}

5216 5217 5218
static void doCloseAllTimeWindowAfterScan(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
Haojun Liao 已提交
5219
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
H
Haojun Liao 已提交
5220
    size_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
5221
    for (int32_t i = 0; i < numOfGroup; ++i) {
H
Haojun Liao 已提交
5222
      SArray *group = GET_TABLEGROUP(pQInfo, i);
5223

5224
      size_t num = taosArrayGetSize(group);
5225
      for (int32_t j = 0; j < num; ++j) {
5226 5227
        STableQueryInfo* item = taosArrayGetP(group, j);
        closeAllTimeWindow(&item->windowResInfo);
5228
      }
H
hjxilinx 已提交
5229 5230 5231 5232 5233 5234 5235
    }
  } else {  // close results for group result
    closeAllTimeWindow(&pQInfo->runtimeEnv.windowResInfo);
  }
}

static void multiTableQueryProcess(SQInfo *pQInfo) {
5236 5237 5238
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

5239
  if (pQInfo->groupIndex > 0) {
5240
    /*
5241
     * if the groupIndex > 0, the query process must be completed yet, we only need to
5242 5243
     * copy the data into output buffer
     */
H
Haojun Liao 已提交
5244
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
5245 5246
      copyResToQueryResultBuf(pQInfo, pQuery);
#ifdef _DEBUG_VIEW
5247
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
5248 5249
#endif
    } else {
5250
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5251
    }
5252

5253
    qDebug("QInfo:%p current:%"PRId64", total:%"PRId64"", pQInfo, pQuery->rec.rows, pQuery->rec.total);
5254 5255
    return;
  }
5256

5257
  qDebug("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", order:%d, forward scan start", pQInfo,
5258 5259
         pQuery->window.skey, pQuery->window.ekey, pQuery->order.order);

H
hjxilinx 已提交
5260
  // do check all qualified data blocks
H
Haojun Liao 已提交
5261
  int64_t el = scanMultiTableDataBlocks(pQInfo);
5262
  qDebug("QInfo:%p master scan completed, elapsed time: %" PRId64 "ms, reverse scan start", pQInfo, el);
5263

H
hjxilinx 已提交
5264
  // query error occurred or query is killed, abort current execution
H
Haojun Liao 已提交
5265
  if (pQInfo->code != TSDB_CODE_SUCCESS || IS_QUERY_KILLED(pQInfo)) {
5266
    qDebug("QInfo:%p query killed or error occurred, code:%s, abort", pQInfo, tstrerror(pQInfo->code));
H
Haojun Liao 已提交
5267 5268
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
5269
  }
5270

H
hjxilinx 已提交
5271 5272
  // close all time window results
  doCloseAllTimeWindowAfterScan(pQInfo);
5273

H
hjxilinx 已提交
5274 5275
  if (needReverseScan(pQuery)) {
    doSaveContext(pQInfo);
5276

H
Haojun Liao 已提交
5277
    el = scanMultiTableDataBlocks(pQInfo);
5278
    qDebug("QInfo:%p reversed scan completed, elapsed time: %" PRId64 "ms", pQInfo, el);
5279

H
Haojun Liao 已提交
5280
    doRestoreContext(pQInfo);
H
hjxilinx 已提交
5281
  } else {
5282
    qDebug("QInfo:%p no need to do reversed scan, query completed", pQInfo);
5283
  }
5284

5285
  setQueryStatus(pQuery, QUERY_COMPLETED);
5286

H
Haojun Liao 已提交
5287
  if (pQInfo->code != TSDB_CODE_SUCCESS || IS_QUERY_KILLED(pQInfo)) {
5288
    qDebug("QInfo:%p query killed or error occurred, code:%s, abort", pQInfo, tstrerror(pQInfo->code));
H
Haojun Liao 已提交
5289 5290
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
H
hjxilinx 已提交
5291
  }
5292

H
Haojun Liao 已提交
5293
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || isSumAvgRateQuery(pQuery)) {
5294
    if (mergeIntoGroupResult(pQInfo) == TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
5295
      copyResToQueryResultBuf(pQInfo, pQuery);
5296 5297

#ifdef _DEBUG_VIEW
5298
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
5299 5300 5301
#endif
    }
  } else {  // not a interval query
5302
    copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5303
  }
5304

5305
  // handle the limitation of output buffer
5306
  qDebug("QInfo:%p points returned:%" PRId64 ", total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
5307 5308 5309 5310 5311 5312 5313 5314
}

/*
 * in each query, this function will be called only once, no retry for further result.
 *
 * select count(*)/top(field,k)/avg(field name) from table_name [where ts>now-1a];
 * select count(*) from table_name group by status_column;
 */
H
hjxilinx 已提交
5315
static void tableFixedOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
5316
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
5317

H
hjxilinx 已提交
5318
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
5319
  if (!pRuntimeEnv->topBotQuery && pQuery->limit.offset > 0) {  // no need to execute, since the output will be ignore.
H
Haojun Liao 已提交
5320 5321
    return;
  }
5322

H
hjxilinx 已提交
5323
  pQuery->current = pTableInfo;  // set current query table info
5324

5325
  scanOneTableDataBlocks(pRuntimeEnv, pTableInfo->lastKey);
H
hjxilinx 已提交
5326
  finalizeQueryResult(pRuntimeEnv);
5327

H
Haojun Liao 已提交
5328
  if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
5329 5330
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
5331
  }
5332

H
Haojun Liao 已提交
5333
  // since the numOfRows must be identical for all sql functions that are allowed to be executed simutaneously.
5334
  pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
5335

5336
  skipResults(pRuntimeEnv);
5337
  limitResults(pRuntimeEnv);
5338 5339
}

H
hjxilinx 已提交
5340
static void tableMultiOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
5341
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
5342

H
hjxilinx 已提交
5343 5344
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
5345

5346 5347 5348 5349
  // for ts_comp query, re-initialized is not allowed
  if (!isTSCompQuery(pQuery)) {
    resetCtxOutputBuf(pRuntimeEnv);
  }
5350

5351 5352 5353 5354 5355 5356
  // skip blocks without load the actual data block from file if no filter condition present
  skipBlocks(&pQInfo->runtimeEnv);
  if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return;
  }
5357 5358

  while (1) {
5359
    scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
H
hjxilinx 已提交
5360
    finalizeQueryResult(pRuntimeEnv);
5361

5362 5363
    pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
    if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols > 0 && pQuery->rec.rows > 0) {
5364
      skipResults(pRuntimeEnv);
5365 5366 5367
    }

    /*
H
hjxilinx 已提交
5368 5369
     * 1. if pQuery->size == 0, pQuery->limit.offset >= 0, still need to check data
     * 2. if pQuery->size > 0, pQuery->limit.offset must be 0
5370
     */
5371
    if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
5372 5373 5374
      break;
    }

5375
    qDebug("QInfo:%p skip current result, offset:%" PRId64 ", next qrange:%" PRId64 "-%" PRId64,
B
Bomin Zhang 已提交
5376
           pQInfo, pQuery->limit.offset, pQuery->current->lastKey, pQuery->current->win.ekey);
5377 5378 5379 5380

    resetCtxOutputBuf(pRuntimeEnv);
  }

5381
  limitResults(pRuntimeEnv);
5382
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
5383
    qDebug("QInfo:%p query paused due to output limitation, next qrange:%" PRId64 "-%" PRId64, pQInfo,
H
hjxilinx 已提交
5384
        pQuery->current->lastKey, pQuery->window.ekey);
weixin_48148422's avatar
weixin_48148422 已提交
5385 5386
  } else if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
    STableIdInfo tidInfo;
B
Bomin Zhang 已提交
5387
    STableId* id = TSDB_TABLEID(pQuery->current->pTable);
5388

H
Haojun Liao 已提交
5389 5390
    tidInfo.uid = id->uid;
    tidInfo.tid = id->tid;
weixin_48148422's avatar
weixin_48148422 已提交
5391 5392
    tidInfo.key = pQuery->current->lastKey;
    taosArrayPush(pQInfo->arrTableIdInfo, &tidInfo);
5393 5394
  }

5395 5396 5397
  if (!isTSCompQuery(pQuery)) {
    assert(pQuery->rec.rows <= pQuery->rec.capacity);
  }
5398 5399
}

H
Haojun Liao 已提交
5400
static void tableIntervalProcessImpl(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
5401
  SQuery *pQuery = pRuntimeEnv->pQuery;
5402

5403
  while (1) {
5404
    scanOneTableDataBlocks(pRuntimeEnv, start);
5405

5406
    assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_NOT_COMPLETED));
H
hjxilinx 已提交
5407
    finalizeQueryResult(pRuntimeEnv);
5408

5409 5410 5411
    // here we can ignore the records in case of no interpolation
    // todo handle offset, in case of top/bottom interval query
    if ((pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL) && pQuery->limit.offset > 0 &&
5412
        pQuery->fillType == TSDB_FILL_NONE) {
5413 5414
      // maxOutput <= 0, means current query does not generate any results
      int32_t numOfClosed = numOfClosedTimeWindow(&pRuntimeEnv->windowResInfo);
5415

S
TD-1057  
Shengliang Guan 已提交
5416
      int32_t c = (int32_t)(MIN(numOfClosed, pQuery->limit.offset));
5417 5418 5419
      clearFirstNTimeWindow(pRuntimeEnv, c);
      pQuery->limit.offset -= c;
    }
5420

5421
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED | QUERY_RESBUF_FULL)) {
5422 5423 5424 5425 5426
      break;
    }
  }
}

5427
// handle time interval query on table
H
hjxilinx 已提交
5428
static void tableIntervalProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
5429 5430
  SQueryRuntimeEnv *pRuntimeEnv = &(pQInfo->runtimeEnv);

H
hjxilinx 已提交
5431 5432
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
5433

H
Haojun Liao 已提交
5434
  int32_t numOfFilled = 0;
H
Haojun Liao 已提交
5435
  TSKEY newStartKey = TSKEY_INITIAL_VAL;
5436

5437
  // skip blocks without load the actual data block from file if no filter condition present
H
Haojun Liao 已提交
5438
  skipTimeInterval(pRuntimeEnv, &newStartKey);
5439
  if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0 && pRuntimeEnv->pFillInfo == NULL) {
5440 5441 5442 5443
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return;
  }

5444
  while (1) {
H
Haojun Liao 已提交
5445
    tableIntervalProcessImpl(pRuntimeEnv, newStartKey);
5446

H
Haojun Liao 已提交
5447
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
5448
      pQInfo->groupIndex = 0;  // always start from 0
5449
      pQuery->rec.rows = 0;
5450
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5451

5452
      clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
5453
    }
5454

5455
    // the offset is handled at prepare stage if no interpolation involved
5456
    if (pQuery->fillType == TSDB_FILL_NONE || pQuery->rec.rows == 0) {
5457
      limitResults(pRuntimeEnv);
5458 5459
      break;
    } else {
S
TD-1057  
Shengliang Guan 已提交
5460
      taosFillSetStartInfo(pRuntimeEnv->pFillInfo, (int32_t)pQuery->rec.rows, pQuery->window.ekey);
5461
      taosFillCopyInputDataFromFilePage(pRuntimeEnv->pFillInfo, (tFilePage**) pQuery->sdata);
H
Haojun Liao 已提交
5462
      numOfFilled = 0;
5463

H
Haojun Liao 已提交
5464
      pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, &numOfFilled);
5465
      if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
5466
        limitResults(pRuntimeEnv);
5467 5468
        break;
      }
5469

5470
      // no result generated yet, continue retrieve data
5471
      pQuery->rec.rows = 0;
5472 5473
    }
  }
5474

5475
  // all data scanned, the group by normal column can return
H
Haojun Liao 已提交
5476
  if (pRuntimeEnv->groupbyNormalCol) {  // todo refactor with merge interval time result
5477
    pQInfo->groupIndex = 0;
5478
    pQuery->rec.rows = 0;
5479
    copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5480
    clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
5481 5482 5483
  }
}

5484 5485 5486 5487
static void tableQueryImpl(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

H
Haojun Liao 已提交
5488
  if (queryHasRemainResForTableQuery(pRuntimeEnv)) {
5489

H
Haojun Liao 已提交
5490 5491 5492 5493 5494 5495 5496 5497 5498 5499 5500 5501
    if (pQuery->fillType != TSDB_FILL_NONE) {
      /*
       * There are remain results that are not returned due to result interpolation
       * So, we do keep in this procedure instead of launching retrieve procedure for next results.
       */
      int32_t numOfFilled = 0;
      pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, &numOfFilled);

      if (pQuery->rec.rows > 0) {
        limitResults(pRuntimeEnv);
      }

H
Haojun Liao 已提交
5502
      qDebug("QInfo:%p current:%" PRId64 " returned, total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total);
5503
      return;
H
Haojun Liao 已提交
5504
    } else {
5505
      pQuery->rec.rows = 0;
5506
      pQInfo->groupIndex = 0;  // always start from 0
5507

5508
      if (pRuntimeEnv->windowResInfo.size > 0) {
5509
        copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5510
        clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
5511

5512
        if (pQuery->rec.rows > 0) {
5513
          qDebug("QInfo:%p %"PRId64" rows returned from group results, total:%"PRId64"", pQInfo, pQuery->rec.rows, pQuery->rec.total);
H
Haojun Liao 已提交
5514 5515 5516

          // there are not data remains
          if (pRuntimeEnv->windowResInfo.size <= 0) {
H
Haojun Liao 已提交
5517
            qDebug("QInfo:%p query over, %"PRId64" rows are returned", pQInfo, pQuery->rec.total);
H
Haojun Liao 已提交
5518 5519
          }

5520 5521 5522 5523 5524
          return;
        }
      }
    }
  }
5525

H
hjxilinx 已提交
5526
  // number of points returned during this query
5527
  pQuery->rec.rows = 0;
5528
  int64_t st = taosGetTimestampUs();
5529

5530
  assert(pQInfo->tableqinfoGroupInfo.numOfTables == 1);
H
Haojun Liao 已提交
5531
  SArray* g = GET_TABLEGROUP(pQInfo, 0);
5532
  STableQueryInfo* item = taosArrayGetP(g, 0);
5533

5534
  // group by normal column, sliding window query, interval query are handled by interval query processor
H
Haojun Liao 已提交
5535
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyNormalCol) {  // interval (down sampling operation)
5536
    tableIntervalProcess(pQInfo, item);
H
Haojun Liao 已提交
5537
  } else if (isFixedOutputQuery(pRuntimeEnv)) {
5538
    tableFixedOutputProcess(pQInfo, item);
5539 5540
  } else {  // diff/add/multiply/subtract/division
    assert(pQuery->checkBuffer == 1);
5541
    tableMultiOutputProcess(pQInfo, item);
5542
  }
5543

5544
  // record the total elapsed time
5545
  pRuntimeEnv->summary.elapsedTime += (taosGetTimestampUs() - st);
5546
  assert(pQInfo->tableqinfoGroupInfo.numOfTables == 1);
5547 5548
}

5549
static void stableQueryImpl(SQInfo *pQInfo) {
H
Haojun Liao 已提交
5550 5551
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *pQuery = pRuntimeEnv->pQuery;
5552
  pQuery->rec.rows = 0;
5553

5554
  int64_t st = taosGetTimestampUs();
5555

H
Haojun Liao 已提交
5556
  if (QUERY_IS_INTERVAL_QUERY(pQuery) ||
5557
      (isFixedOutputQuery(pRuntimeEnv) && (!isPointInterpoQuery(pQuery)) && (!pRuntimeEnv->groupbyNormalCol))) {
H
hjxilinx 已提交
5558
    multiTableQueryProcess(pQInfo);
5559
  } else {
5560
    assert((pQuery->checkBuffer == 1 && pQuery->interval.interval == 0) || isPointInterpoQuery(pQuery) ||
H
Haojun Liao 已提交
5561
            isFirstLastRowQuery(pQuery) || pRuntimeEnv->groupbyNormalCol);
5562

5563
    sequentialTableProcess(pQInfo);
5564
  }
5565

H
hjxilinx 已提交
5566
  // record the total elapsed time
5567
  pQInfo->runtimeEnv.summary.elapsedTime += (taosGetTimestampUs() - st);
H
hjxilinx 已提交
5568 5569
}

5570
static int32_t getColumnIndexInSource(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
5571
  int32_t j = 0;
5572

5573
  if (TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) {
H
Haojun Liao 已提交
5574
    if (pExprMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
5575
      return TSDB_TBNAME_COLUMN_INDEX;
H
Haojun Liao 已提交
5576 5577
    }

5578 5579 5580 5581
    while(j < pQueryMsg->numOfTags) {
      if (pExprMsg->colInfo.colId == pTagCols[j].colId) {
        return j;
      }
5582

5583 5584
      j += 1;
    }
5585

Y
TD-1230  
yihaoDeng 已提交
5586
  } else if (TSDB_COL_IS_UD_COL(pExprMsg->colInfo.flag)) {  // user specified column data
H
Haojun Liao 已提交
5587
    return TSDB_UD_COLUMN_INDEX;
5588 5589 5590 5591 5592
  } else {
    while (j < pQueryMsg->numOfCols) {
      if (pExprMsg->colInfo.colId == pQueryMsg->colList[j].colId) {
        return j;
      }
5593

5594
      j += 1;
5595 5596
    }
  }
5597
  assert(0);
5598
  return -1;
5599 5600
}

5601 5602 5603
bool validateExprColumnInfo(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
  int32_t j = getColumnIndexInSource(pQueryMsg, pExprMsg, pTagCols);
  return j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags;
5604 5605
}

5606
static bool validateQueryMsg(SQueryTableMsg *pQueryMsg) {
5607 5608
  if (pQueryMsg->interval.interval < 0) {
    qError("qmsg:%p illegal value of interval time %" PRId64, pQueryMsg, pQueryMsg->interval.interval);
5609
    return false;
5610 5611
  }

H
hjxilinx 已提交
5612
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
5613
    qError("qmsg:%p illegal value of numOfTables %d", pQueryMsg, pQueryMsg->numOfTables);
5614
    return false;
5615 5616
  }

H
hjxilinx 已提交
5617
  if (pQueryMsg->numOfGroupCols < 0) {
S
slguan 已提交
5618
    qError("qmsg:%p illegal value of numOfGroupbyCols %d", pQueryMsg, pQueryMsg->numOfGroupCols);
5619
    return false;
5620 5621
  }

5622 5623
  if (pQueryMsg->numOfOutput > TSDB_MAX_COLUMNS || pQueryMsg->numOfOutput <= 0) {
    qError("qmsg:%p illegal value of output columns %d", pQueryMsg, pQueryMsg->numOfOutput);
5624
    return false;
5625 5626
  }

5627 5628 5629 5630 5631 5632 5633 5634 5635 5636
  return true;
}

static bool validateQuerySourceCols(SQueryTableMsg *pQueryMsg, SSqlFuncMsg** pExprMsg) {
  int32_t numOfTotal = pQueryMsg->numOfCols + pQueryMsg->numOfTags;
  if (pQueryMsg->numOfCols < 0 || pQueryMsg->numOfTags < 0 || numOfTotal > TSDB_MAX_COLUMNS) {
    qError("qmsg:%p illegal value of numOfCols %d numOfTags:%d", pQueryMsg, pQueryMsg->numOfCols, pQueryMsg->numOfTags);
    return false;
  } else if (numOfTotal == 0) {
    for(int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
H
Haojun Liao 已提交
5637 5638 5639 5640 5641
      SSqlFuncMsg* pFuncMsg = pExprMsg[i];

      if ((pFuncMsg->functionId == TSDB_FUNC_TAGPRJ) ||
          (pFuncMsg->functionId == TSDB_FUNC_TID_TAG && pFuncMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) ||
          (pFuncMsg->functionId == TSDB_FUNC_COUNT && pFuncMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX)) {
5642
        continue;
5643
      }
5644

5645
      return false;
5646 5647
    }
  }
5648

5649
  return true;
5650 5651
}

5652
static char *createTableIdList(SQueryTableMsg *pQueryMsg, char *pMsg, SArray **pTableIdList) {
H
hjxilinx 已提交
5653
  assert(pQueryMsg->numOfTables > 0);
5654

weixin_48148422's avatar
weixin_48148422 已提交
5655
  *pTableIdList = taosArrayInit(pQueryMsg->numOfTables, sizeof(STableIdInfo));
5656

weixin_48148422's avatar
weixin_48148422 已提交
5657 5658
  for (int32_t j = 0; j < pQueryMsg->numOfTables; ++j) {
    STableIdInfo* pTableIdInfo = (STableIdInfo *)pMsg;
5659

5660
    pTableIdInfo->tid = htonl(pTableIdInfo->tid);
H
hjxilinx 已提交
5661 5662
    pTableIdInfo->uid = htobe64(pTableIdInfo->uid);
    pTableIdInfo->key = htobe64(pTableIdInfo->key);
5663

H
hjxilinx 已提交
5664 5665 5666
    taosArrayPush(*pTableIdList, pTableIdInfo);
    pMsg += sizeof(STableIdInfo);
  }
5667

H
hjxilinx 已提交
5668 5669
  return pMsg;
}
5670

5671
/**
H
hjxilinx 已提交
5672
 * pQueryMsg->head has been converted before this function is called.
5673
 *
H
hjxilinx 已提交
5674
 * @param pQueryMsg
5675 5676 5677 5678
 * @param pTableIdList
 * @param pExpr
 * @return
 */
5679
static int32_t convertQueryMsg(SQueryTableMsg *pQueryMsg, SArray **pTableIdList, SSqlFuncMsg ***pExpr,
weixin_48148422's avatar
weixin_48148422 已提交
5680
                               char **tagCond, char** tbnameCond, SColIndex **groupbyCols, SColumnInfo** tagCols) {
5681 5682
  int32_t code = TSDB_CODE_SUCCESS;

5683 5684 5685 5686
  pQueryMsg->numOfTables = htonl(pQueryMsg->numOfTables);

  pQueryMsg->window.skey = htobe64(pQueryMsg->window.skey);
  pQueryMsg->window.ekey = htobe64(pQueryMsg->window.ekey);
5687 5688 5689 5690 5691 5692
  pQueryMsg->interval.interval = htobe64(pQueryMsg->interval.interval);
  pQueryMsg->interval.sliding = htobe64(pQueryMsg->interval.sliding);
  pQueryMsg->interval.offset = htobe64(pQueryMsg->interval.offset);
  pQueryMsg->interval.intervalUnit = pQueryMsg->interval.intervalUnit;
  pQueryMsg->interval.slidingUnit = pQueryMsg->interval.slidingUnit;
  pQueryMsg->interval.offsetUnit = pQueryMsg->interval.offsetUnit;
5693 5694
  pQueryMsg->limit = htobe64(pQueryMsg->limit);
  pQueryMsg->offset = htobe64(pQueryMsg->offset);
H
hjxilinx 已提交
5695

5696 5697
  pQueryMsg->order = htons(pQueryMsg->order);
  pQueryMsg->orderColId = htons(pQueryMsg->orderColId);
H
Haojun Liao 已提交
5698
  pQueryMsg->queryType = htonl(pQueryMsg->queryType);
weixin_48148422's avatar
weixin_48148422 已提交
5699
  pQueryMsg->tagNameRelType = htons(pQueryMsg->tagNameRelType);
5700 5701

  pQueryMsg->numOfCols = htons(pQueryMsg->numOfCols);
5702
  pQueryMsg->numOfOutput = htons(pQueryMsg->numOfOutput);
H
hjxilinx 已提交
5703
  pQueryMsg->numOfGroupCols = htons(pQueryMsg->numOfGroupCols);
5704 5705 5706
  pQueryMsg->tagCondLen = htons(pQueryMsg->tagCondLen);
  pQueryMsg->tsOffset = htonl(pQueryMsg->tsOffset);
  pQueryMsg->tsLen = htonl(pQueryMsg->tsLen);
H
hjxilinx 已提交
5707
  pQueryMsg->tsNumOfBlocks = htonl(pQueryMsg->tsNumOfBlocks);
5708
  pQueryMsg->tsOrder = htonl(pQueryMsg->tsOrder);
5709
  pQueryMsg->numOfTags = htonl(pQueryMsg->numOfTags);
5710

5711
  // query msg safety check
5712
  if (!validateQueryMsg(pQueryMsg)) {
5713 5714
    code = TSDB_CODE_QRY_INVALID_MSG;
    goto _cleanup;
5715 5716
  }

H
hjxilinx 已提交
5717 5718
  char *pMsg = (char *)(pQueryMsg->colList) + sizeof(SColumnInfo) * pQueryMsg->numOfCols;
  for (int32_t col = 0; col < pQueryMsg->numOfCols; ++col) {
5719 5720
    SColumnInfo *pColInfo = &pQueryMsg->colList[col];

H
hjxilinx 已提交
5721
    pColInfo->colId = htons(pColInfo->colId);
5722
    pColInfo->type = htons(pColInfo->type);
H
hjxilinx 已提交
5723 5724
    pColInfo->bytes = htons(pColInfo->bytes);
    pColInfo->numOfFilters = htons(pColInfo->numOfFilters);
5725

H
hjxilinx 已提交
5726
    assert(pColInfo->type >= TSDB_DATA_TYPE_BOOL && pColInfo->type <= TSDB_DATA_TYPE_NCHAR);
5727

H
hjxilinx 已提交
5728
    int32_t numOfFilters = pColInfo->numOfFilters;
5729
    if (numOfFilters > 0) {
H
hjxilinx 已提交
5730
      pColInfo->filters = calloc(numOfFilters, sizeof(SColumnFilterInfo));
H
Haojun Liao 已提交
5731 5732 5733 5734
      if (pColInfo->filters == NULL) {
        code = TSDB_CODE_QRY_OUT_OF_MEMORY;
        goto _cleanup;
      }
5735 5736 5737
    }

    for (int32_t f = 0; f < numOfFilters; ++f) {
5738
      SColumnFilterInfo *pFilterMsg = (SColumnFilterInfo *)pMsg;
5739

5740 5741
      SColumnFilterInfo *pColFilter = &pColInfo->filters[f];
      pColFilter->filterstr = htons(pFilterMsg->filterstr);
5742 5743 5744

      pMsg += sizeof(SColumnFilterInfo);

5745 5746
      if (pColFilter->filterstr) {
        pColFilter->len = htobe64(pFilterMsg->len);
5747

5748
        pColFilter->pz = (int64_t)calloc(1, (size_t)(pColFilter->len + 1 * TSDB_NCHAR_SIZE)); // note: null-terminator
H
Haojun Liao 已提交
5749 5750 5751 5752 5753
        if (pColFilter->pz == 0) {
          code = TSDB_CODE_QRY_OUT_OF_MEMORY;
          goto _cleanup;
        }

5754
        memcpy((void *)pColFilter->pz, pMsg, (size_t)pColFilter->len);
5755
        pMsg += (pColFilter->len + 1);
5756
      } else {
5757 5758
        pColFilter->lowerBndi = htobe64(pFilterMsg->lowerBndi);
        pColFilter->upperBndi = htobe64(pFilterMsg->upperBndi);
5759 5760
      }

5761 5762
      pColFilter->lowerRelOptr = htons(pFilterMsg->lowerRelOptr);
      pColFilter->upperRelOptr = htons(pFilterMsg->upperRelOptr);
5763 5764 5765
    }
  }

5766
  *pExpr = calloc(pQueryMsg->numOfOutput, POINTER_BYTES);
H
Haojun Liao 已提交
5767 5768 5769 5770 5771
  if (*pExpr == NULL) {
    code = TSDB_CODE_QRY_OUT_OF_MEMORY;
    goto _cleanup;
  }

5772
  SSqlFuncMsg *pExprMsg = (SSqlFuncMsg *)pMsg;
5773

5774
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5775
    (*pExpr)[i] = pExprMsg;
5776

5777
    pExprMsg->colInfo.colIndex = htons(pExprMsg->colInfo.colIndex);
5778 5779 5780 5781
    pExprMsg->colInfo.colId = htons(pExprMsg->colInfo.colId);
    pExprMsg->colInfo.flag = htons(pExprMsg->colInfo.flag);
    pExprMsg->functionId = htons(pExprMsg->functionId);
    pExprMsg->numOfParams = htons(pExprMsg->numOfParams);
5782

5783
    pMsg += sizeof(SSqlFuncMsg);
5784 5785

    for (int32_t j = 0; j < pExprMsg->numOfParams; ++j) {
5786
      pExprMsg->arg[j].argType = htons(pExprMsg->arg[j].argType);
5787 5788 5789 5790
      pExprMsg->arg[j].argBytes = htons(pExprMsg->arg[j].argBytes);

      if (pExprMsg->arg[j].argType == TSDB_DATA_TYPE_BINARY) {
        pExprMsg->arg[j].argValue.pz = pMsg;
5791
        pMsg += pExprMsg->arg[j].argBytes;  // one more for the string terminated char.
5792 5793 5794 5795 5796
      } else {
        pExprMsg->arg[j].argValue.i64 = htobe64(pExprMsg->arg[j].argValue.i64);
      }
    }

H
Haojun Liao 已提交
5797 5798
    int16_t functionId = pExprMsg->functionId;
    if (functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ || functionId == TSDB_FUNC_TAG_DUMMY) {
Y
TD-1230  
yihaoDeng 已提交
5799
      if (!TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) {  // ignore the column  index check for arithmetic expression.
5800 5801
        code = TSDB_CODE_QRY_INVALID_MSG;
        goto _cleanup;
5802 5803
      }
    } else {
5804
//      if (!validateExprColumnInfo(pQueryMsg, pExprMsg)) {
5805
//        return TSDB_CODE_QRY_INVALID_MSG;
5806
//      }
5807 5808
    }

5809
    pExprMsg = (SSqlFuncMsg *)pMsg;
5810
  }
5811

5812
  if (!validateQuerySourceCols(pQueryMsg, *pExpr)) {
5813
    code = TSDB_CODE_QRY_INVALID_MSG;
dengyihao's avatar
dengyihao 已提交
5814
    goto _cleanup;
5815
  }
5816

H
hjxilinx 已提交
5817
  pMsg = createTableIdList(pQueryMsg, pMsg, pTableIdList);
5818

H
hjxilinx 已提交
5819
  if (pQueryMsg->numOfGroupCols > 0) {  // group by tag columns
5820
    *groupbyCols = malloc(pQueryMsg->numOfGroupCols * sizeof(SColIndex));
5821 5822 5823 5824
    if (*groupbyCols == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }
5825 5826 5827

    for (int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
      (*groupbyCols)[i].colId = *(int16_t *)pMsg;
5828
      pMsg += sizeof((*groupbyCols)[i].colId);
5829 5830

      (*groupbyCols)[i].colIndex = *(int16_t *)pMsg;
5831 5832
      pMsg += sizeof((*groupbyCols)[i].colIndex);

5833
      (*groupbyCols)[i].flag = *(int16_t *)pMsg;
5834 5835 5836 5837 5838
      pMsg += sizeof((*groupbyCols)[i].flag);

      memcpy((*groupbyCols)[i].name, pMsg, tListLen(groupbyCols[i]->name));
      pMsg += tListLen((*groupbyCols)[i].name);
    }
5839

H
hjxilinx 已提交
5840 5841
    pQueryMsg->orderByIdx = htons(pQueryMsg->orderByIdx);
    pQueryMsg->orderType = htons(pQueryMsg->orderType);
5842 5843
  }

5844 5845
  pQueryMsg->fillType = htons(pQueryMsg->fillType);
  if (pQueryMsg->fillType != TSDB_FILL_NONE) {
5846
    pQueryMsg->fillVal = (uint64_t)(pMsg);
5847 5848

    int64_t *v = (int64_t *)pMsg;
5849
    for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5850 5851
      v[i] = htobe64(v[i]);
    }
5852

5853
    pMsg += sizeof(int64_t) * pQueryMsg->numOfOutput;
5854
  }
5855

5856 5857
  if (pQueryMsg->numOfTags > 0) {
    (*tagCols) = calloc(1, sizeof(SColumnInfo) * pQueryMsg->numOfTags);
H
Haojun Liao 已提交
5858 5859 5860 5861 5862
    if (*tagCols == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }

5863 5864
    for (int32_t i = 0; i < pQueryMsg->numOfTags; ++i) {
      SColumnInfo* pTagCol = (SColumnInfo*) pMsg;
5865

5866 5867 5868 5869
      pTagCol->colId = htons(pTagCol->colId);
      pTagCol->bytes = htons(pTagCol->bytes);
      pTagCol->type  = htons(pTagCol->type);
      pTagCol->numOfFilters = 0;
5870

5871
      (*tagCols)[i] = *pTagCol;
5872
      pMsg += sizeof(SColumnInfo);
5873
    }
H
hjxilinx 已提交
5874
  }
5875

5876 5877 5878
  // the tag query condition expression string is located at the end of query msg
  if (pQueryMsg->tagCondLen > 0) {
    *tagCond = calloc(1, pQueryMsg->tagCondLen);
H
Haojun Liao 已提交
5879 5880 5881 5882 5883 5884

    if (*tagCond == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;

    }
5885 5886 5887
    memcpy(*tagCond, pMsg, pQueryMsg->tagCondLen);
    pMsg += pQueryMsg->tagCondLen;
  }
5888

weixin_48148422's avatar
weixin_48148422 已提交
5889
  if (*pMsg != 0) {
5890
    size_t len = strlen(pMsg) + 1;
5891

5892
    *tbnameCond = malloc(len);
5893 5894 5895 5896 5897
    if (*tbnameCond == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }

weixin_48148422's avatar
weixin_48148422 已提交
5898
    strcpy(*tbnameCond, pMsg);
5899
    pMsg += len;
weixin_48148422's avatar
weixin_48148422 已提交
5900
  }
5901

5902
  qDebug("qmsg:%p query %d tables, type:%d, qrange:%" PRId64 "-%" PRId64 ", numOfGroupbyTagCols:%d, order:%d, "
H
Haojun Liao 已提交
5903 5904
         "outputCols:%d, numOfCols:%d, interval:%" PRId64 ", fillType:%d, comptsLen:%d, compNumOfBlocks:%d, limit:%" PRId64 ", offset:%" PRId64,
         pQueryMsg, pQueryMsg->numOfTables, pQueryMsg->queryType, pQueryMsg->window.skey, pQueryMsg->window.ekey, pQueryMsg->numOfGroupCols,
5905
         pQueryMsg->order, pQueryMsg->numOfOutput, pQueryMsg->numOfCols, pQueryMsg->interval.interval,
H
Haojun Liao 已提交
5906
         pQueryMsg->fillType, pQueryMsg->tsLen, pQueryMsg->tsNumOfBlocks, pQueryMsg->limit, pQueryMsg->offset);
5907 5908

  return TSDB_CODE_SUCCESS;
dengyihao's avatar
dengyihao 已提交
5909 5910

_cleanup:
S
Shengliang Guan 已提交
5911
  taosTFree(*pExpr);
dengyihao's avatar
dengyihao 已提交
5912 5913
  taosArrayDestroy(*pTableIdList);
  *pTableIdList = NULL;
S
Shengliang Guan 已提交
5914 5915 5916 5917
  taosTFree(*tbnameCond);
  taosTFree(*groupbyCols);
  taosTFree(*tagCols);
  taosTFree(*tagCond);
5918 5919

  return code;
5920 5921
}

H
hjxilinx 已提交
5922
static int32_t buildAirthmeticExprFromMsg(SExprInfo *pArithExprInfo, SQueryTableMsg *pQueryMsg) {
5923
  qDebug("qmsg:%p create arithmetic expr from binary string: %s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz);
weixin_48148422's avatar
weixin_48148422 已提交
5924 5925

  tExprNode* pExprNode = NULL;
dengyihao's avatar
dengyihao 已提交
5926
  TRY(TSDB_MAX_TAG_CONDITIONS) {
weixin_48148422's avatar
weixin_48148422 已提交
5927 5928 5929
    pExprNode = exprTreeFromBinary(pArithExprInfo->base.arg[0].argValue.pz, pArithExprInfo->base.arg[0].argBytes);
  } CATCH( code ) {
    CLEANUP_EXECUTE();
dengyihao's avatar
TD-816  
dengyihao 已提交
5930
    qError("qmsg:%p failed to create arithmetic expression string from:%s, reason: %s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz, tstrerror(code));
weixin_48148422's avatar
weixin_48148422 已提交
5931 5932 5933
    return code;
  } END_TRY

H
hjxilinx 已提交
5934
  if (pExprNode == NULL) {
5935
    qError("qmsg:%p failed to create arithmetic expression string from:%s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz);
5936
    return TSDB_CODE_QRY_APP_ERROR;
5937
  }
5938

5939
  pArithExprInfo->pExpr = pExprNode;
5940 5941 5942
  return TSDB_CODE_SUCCESS;
}

H
Haojun Liao 已提交
5943
static int32_t createQFunctionExprFromMsg(SQueryTableMsg *pQueryMsg, SExprInfo **pExprInfo, SSqlFuncMsg **pExprMsg,
5944 5945
    SColumnInfo* pTagCols) {
  *pExprInfo = NULL;
H
hjxilinx 已提交
5946
  int32_t code = TSDB_CODE_SUCCESS;
5947

H
Haojun Liao 已提交
5948
  SExprInfo *pExprs = (SExprInfo *)calloc(pQueryMsg->numOfOutput, sizeof(SExprInfo));
5949
  if (pExprs == NULL) {
5950
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
5951 5952 5953 5954 5955
  }

  bool    isSuperTable = QUERY_IS_STABLE_QUERY(pQueryMsg->queryType);
  int16_t tagLen = 0;

5956
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5957
    pExprs[i].base = *pExprMsg[i];
5958
    pExprs[i].bytes = 0;
5959 5960 5961 5962

    int16_t type = 0;
    int16_t bytes = 0;

5963
    // parse the arithmetic expression
5964
    if (pExprs[i].base.functionId == TSDB_FUNC_ARITHM) {
5965
      code = buildAirthmeticExprFromMsg(&pExprs[i], pQueryMsg);
5966

5967
      if (code != TSDB_CODE_SUCCESS) {
S
Shengliang Guan 已提交
5968
        taosTFree(pExprs);
5969
        return code;
5970 5971
      }

5972
      type  = TSDB_DATA_TYPE_DOUBLE;
5973
      bytes = tDataTypeDesc[type].nSize;
H
Haojun Liao 已提交
5974
    } else if (pExprs[i].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX && pExprs[i].base.functionId == TSDB_FUNC_TAGPRJ) {  // parse the normal column
H
Haojun Liao 已提交
5975
      SSchema s = tGetTableNameColumnSchema();
H
Haojun Liao 已提交
5976
      type = s.type;
H
Haojun Liao 已提交
5977
      bytes = s.bytes;
5978 5979
    } else if (pExprs[i].base.colInfo.colId <= TSDB_UD_COLUMN_INDEX) {
      // it is a user-defined constant value column
H
Haojun Liao 已提交
5980 5981
      assert(pExprs[i].base.functionId == TSDB_FUNC_PRJ);

5982 5983
      type = pExprs[i].base.arg[1].argType;
      bytes = pExprs[i].base.arg[1].argBytes;
H
Haojun Liao 已提交
5984 5985 5986 5987 5988

      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        bytes += VARSTR_HEADER_SIZE;
      }
    } else {
5989
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
dengyihao's avatar
dengyihao 已提交
5990
      assert(j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags);
H
Haojun Liao 已提交
5991

dengyihao's avatar
dengyihao 已提交
5992
      if (pExprs[i].base.colInfo.colId != TSDB_TBNAME_COLUMN_INDEX && j >= 0) {
H
Haojun Liao 已提交
5993 5994 5995 5996
        SColumnInfo* pCol = (TSDB_COL_IS_TAG(pExprs[i].base.colInfo.flag))? &pTagCols[j]:&pQueryMsg->colList[j];
        type = pCol->type;
        bytes = pCol->bytes;
      } else {
H
Haojun Liao 已提交
5997
        SSchema s = tGetTableNameColumnSchema();
H
hjxilinx 已提交
5998

H
Haojun Liao 已提交
5999 6000 6001
        type  = s.type;
        bytes = s.bytes;
      }
6002 6003
    }

S
TD-1057  
Shengliang Guan 已提交
6004
    int32_t param = (int32_t)pExprs[i].base.arg[0].argValue.i64;
6005
    if (getResultDataInfo(type, bytes, pExprs[i].base.functionId, param, &pExprs[i].type, &pExprs[i].bytes,
6006
                          &pExprs[i].interBytes, 0, isSuperTable) != TSDB_CODE_SUCCESS) {
S
Shengliang Guan 已提交
6007
      taosTFree(pExprs);
6008
      return TSDB_CODE_QRY_INVALID_MSG;
6009 6010
    }

6011
    if (pExprs[i].base.functionId == TSDB_FUNC_TAG_DUMMY || pExprs[i].base.functionId == TSDB_FUNC_TS_DUMMY) {
6012
      tagLen += pExprs[i].bytes;
6013
    }
6014
    assert(isValidDataType(pExprs[i].type));
6015 6016 6017
  }

  // TODO refactor
6018
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
6019 6020
    pExprs[i].base = *pExprMsg[i];
    int16_t functId = pExprs[i].base.functionId;
6021

6022
    if (functId == TSDB_FUNC_TOP || functId == TSDB_FUNC_BOTTOM) {
6023
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
6024 6025 6026 6027 6028 6029 6030 6031 6032
      if (j < 0 || j >= pQueryMsg->numOfCols) {
        assert(0);
      } else {
        SColumnInfo *pCol = &pQueryMsg->colList[j];
        int32_t ret =
            getResultDataInfo(pCol->type, pCol->bytes, functId, (int32_t)pExprs[i].base.arg[0].argValue.i64,
                              &pExprs[i].type, &pExprs[i].bytes, &pExprs[i].interBytes, tagLen, isSuperTable);
        assert(ret == TSDB_CODE_SUCCESS);
      }
6033 6034 6035
    }
  }

6036
  *pExprInfo = pExprs;
6037 6038 6039
  return TSDB_CODE_SUCCESS;
}

6040
static SSqlGroupbyExpr *createGroupbyExprFromMsg(SQueryTableMsg *pQueryMsg, SColIndex *pColIndex, int32_t *code) {
6041 6042 6043 6044 6045
  if (pQueryMsg->numOfGroupCols == 0) {
    return NULL;
  }

  // using group by tag columns
6046
  SSqlGroupbyExpr *pGroupbyExpr = (SSqlGroupbyExpr *)calloc(1, sizeof(SSqlGroupbyExpr));
6047
  if (pGroupbyExpr == NULL) {
6048
    *code = TSDB_CODE_QRY_OUT_OF_MEMORY;
6049 6050 6051 6052 6053 6054 6055
    return NULL;
  }

  pGroupbyExpr->numOfGroupCols = pQueryMsg->numOfGroupCols;
  pGroupbyExpr->orderType = pQueryMsg->orderType;
  pGroupbyExpr->orderIndex = pQueryMsg->orderByIdx;

6056 6057 6058 6059
  pGroupbyExpr->columnInfo = taosArrayInit(pQueryMsg->numOfGroupCols, sizeof(SColIndex));
  for(int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
    taosArrayPush(pGroupbyExpr->columnInfo, &pColIndex[i]);
  }
6060

6061 6062 6063
  return pGroupbyExpr;
}

6064
static int32_t createFilterInfo(void *pQInfo, SQuery *pQuery) {
6065
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
6066
    if (pQuery->colList[i].numOfFilters > 0) {
6067 6068 6069 6070 6071 6072 6073 6074 6075
      pQuery->numOfFilterCols++;
    }
  }

  if (pQuery->numOfFilterCols == 0) {
    return TSDB_CODE_SUCCESS;
  }

  pQuery->pFilterInfo = calloc(1, sizeof(SSingleColumnFilterInfo) * pQuery->numOfFilterCols);
H
Haojun Liao 已提交
6076 6077 6078
  if (pQuery->pFilterInfo == NULL) {
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
  }
6079 6080

  for (int32_t i = 0, j = 0; i < pQuery->numOfCols; ++i) {
6081
    if (pQuery->colList[i].numOfFilters > 0) {
6082 6083
      SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[j];

B
Bomin Zhang 已提交
6084
      memcpy(&pFilterInfo->info, &pQuery->colList[i], sizeof(SColumnInfo));
6085
      pFilterInfo->info = pQuery->colList[i];
6086

6087
      pFilterInfo->numOfFilters = pQuery->colList[i].numOfFilters;
6088
      pFilterInfo->pFilters = calloc(pFilterInfo->numOfFilters, sizeof(SColumnFilterElem));
H
Haojun Liao 已提交
6089 6090 6091
      if (pFilterInfo->pFilters == NULL) {
        return TSDB_CODE_QRY_OUT_OF_MEMORY;
      }
6092 6093 6094

      for (int32_t f = 0; f < pFilterInfo->numOfFilters; ++f) {
        SColumnFilterElem *pSingleColFilter = &pFilterInfo->pFilters[f];
6095
        pSingleColFilter->filterInfo = pQuery->colList[i].filters[f];
6096 6097 6098 6099 6100

        int32_t lower = pSingleColFilter->filterInfo.lowerRelOptr;
        int32_t upper = pSingleColFilter->filterInfo.upperRelOptr;

        if (lower == TSDB_RELATION_INVALID && upper == TSDB_RELATION_INVALID) {
S
slguan 已提交
6101
          qError("QInfo:%p invalid filter info", pQInfo);
6102
          return TSDB_CODE_QRY_INVALID_MSG;
6103 6104
        }

6105 6106
        int16_t type  = pQuery->colList[i].type;
        int16_t bytes = pQuery->colList[i].bytes;
6107

6108 6109 6110
        // todo refactor
        __filter_func_t *rangeFilterArray = getRangeFilterFuncArray(type);
        __filter_func_t *filterArray = getValueFilterFuncArray(type);
6111 6112

        if (rangeFilterArray == NULL && filterArray == NULL) {
S
slguan 已提交
6113
          qError("QInfo:%p failed to get filter function, invalid data type:%d", pQInfo, type);
6114
          return TSDB_CODE_QRY_INVALID_MSG;
6115 6116
        }

6117
        if ((lower == TSDB_RELATION_GREATER_EQUAL || lower == TSDB_RELATION_GREATER) &&
6118
            (upper == TSDB_RELATION_LESS_EQUAL || upper == TSDB_RELATION_LESS)) {
dengyihao's avatar
dengyihao 已提交
6119
          assert(rangeFilterArray != NULL);
6120
          if (lower == TSDB_RELATION_GREATER_EQUAL) {
6121 6122 6123 6124 6125 6126 6127 6128 6129 6130 6131 6132 6133
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[4];
            } else {
              pSingleColFilter->fp = rangeFilterArray[2];
            }
          } else {
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[3];
            } else {
              pSingleColFilter->fp = rangeFilterArray[1];
            }
          }
        } else {  // set callback filter function
dengyihao's avatar
dengyihao 已提交
6134
          assert(filterArray != NULL);
6135 6136 6137 6138
          if (lower != TSDB_RELATION_INVALID) {
            pSingleColFilter->fp = filterArray[lower];

            if (upper != TSDB_RELATION_INVALID) {
dengyihao's avatar
dengyihao 已提交
6139
              qError("pQInfo:%p failed to get filter function, invalid filter condition: %d", pQInfo, type);
6140
              return TSDB_CODE_QRY_INVALID_MSG;
6141 6142 6143 6144 6145 6146 6147 6148 6149 6150 6151 6152 6153 6154 6155 6156
            }
          } else {
            pSingleColFilter->fp = filterArray[upper];
          }
        }
        assert(pSingleColFilter->fp != NULL);
        pSingleColFilter->bytes = bytes;
      }

      j++;
    }
  }

  return TSDB_CODE_SUCCESS;
}

6157
static void doUpdateExprColumnIndex(SQuery *pQuery) {
6158
  assert(pQuery->pSelectExpr != NULL && pQuery != NULL);
6159

6160
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
6161
    SSqlFuncMsg *pSqlExprMsg = &pQuery->pSelectExpr[k].base;
6162
    if (pSqlExprMsg->functionId == TSDB_FUNC_ARITHM) {
6163 6164
      continue;
    }
6165

6166
    // todo opt performance
H
Haojun Liao 已提交
6167
    SColIndex *pColIndex = &pSqlExprMsg->colInfo;
H
Haojun Liao 已提交
6168
    if (TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
6169 6170
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfCols; ++f) {
H
Haojun Liao 已提交
6171 6172
        if (pColIndex->colId == pQuery->colList[f].colId) {
          pColIndex->colIndex = f;
6173 6174 6175
          break;
        }
      }
H
Haojun Liao 已提交
6176 6177

      assert(f < pQuery->numOfCols);
6178 6179
    } else if (pColIndex->colId <= TSDB_UD_COLUMN_INDEX) {
      // do nothing for user-defined constant value result columns
6180
    } else {
6181 6182
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfTags; ++f) {
H
Haojun Liao 已提交
6183 6184
        if (pColIndex->colId == pQuery->tagColList[f].colId) {
          pColIndex->colIndex = f;
6185 6186
          break;
        }
6187
      }
6188

6189
      assert(f < pQuery->numOfTags || pColIndex->colId == TSDB_TBNAME_COLUMN_INDEX);
6190 6191 6192 6193
    }
  }
}

dengyihao's avatar
dengyihao 已提交
6194 6195
static void freeQInfo(SQInfo *pQInfo);

H
Haojun Liao 已提交
6196 6197 6198
static void calResultBufSize(SQuery* pQuery) {
  const int32_t RESULT_MSG_MIN_SIZE  = 1024 * (1024 + 512);  // bytes
  const int32_t RESULT_MSG_MIN_ROWS  = 8192;
S
TD-1057  
Shengliang Guan 已提交
6199
  const float RESULT_THRESHOLD_RATIO = 0.85f;
H
Haojun Liao 已提交
6200

6201 6202 6203 6204 6205
  if (isProjQuery(pQuery)) {
    int32_t numOfRes = RESULT_MSG_MIN_SIZE / pQuery->rowSize;
    if (numOfRes < RESULT_MSG_MIN_ROWS) {
      numOfRes = RESULT_MSG_MIN_ROWS;
    }
H
Haojun Liao 已提交
6206

6207
    pQuery->rec.capacity  = numOfRes;
S
TD-1057  
Shengliang Guan 已提交
6208
    pQuery->rec.threshold = (int32_t)(numOfRes * RESULT_THRESHOLD_RATIO);
6209 6210
  } else {  // in case of non-prj query, a smaller output buffer will be used.
    pQuery->rec.capacity = 4096;
S
TD-1057  
Shengliang Guan 已提交
6211
    pQuery->rec.threshold = (int32_t)(pQuery->rec.capacity * RESULT_THRESHOLD_RATIO);
6212
  }
H
Haojun Liao 已提交
6213 6214
}

6215 6216
static SQInfo *createQInfoImpl(SQueryTableMsg *pQueryMsg, SSqlGroupbyExpr *pGroupbyExpr, SExprInfo *pExprs,
                               STableGroupInfo *pTableGroupInfo, SColumnInfo* pTagCols, bool stableQuery) {
B
Bomin Zhang 已提交
6217 6218 6219
  int16_t numOfCols = pQueryMsg->numOfCols;
  int16_t numOfOutput = pQueryMsg->numOfOutput;

6220 6221
  SQInfo *pQInfo = (SQInfo *)calloc(1, sizeof(SQInfo));
  if (pQInfo == NULL) {
B
Bomin Zhang 已提交
6222
    goto _cleanup_qinfo;
6223
  }
6224

B
Bomin Zhang 已提交
6225 6226 6227
  // to make sure third party won't overwrite this structure
  pQInfo->signature = pQInfo;
  pQInfo->tableGroupInfo = *pTableGroupInfo;
6228 6229

  SQuery *pQuery = calloc(1, sizeof(SQuery));
B
Bomin Zhang 已提交
6230 6231 6232
  if (pQuery == NULL) {
    goto _cleanup_query;
  }
H
Haojun Liao 已提交
6233

6234 6235
  pQInfo->runtimeEnv.pQuery = pQuery;

6236
  pQuery->numOfCols       = numOfCols;
H
hjxilinx 已提交
6237
  pQuery->numOfOutput     = numOfOutput;
6238 6239 6240
  pQuery->limit.limit     = pQueryMsg->limit;
  pQuery->limit.offset    = pQueryMsg->offset;
  pQuery->order.order     = pQueryMsg->order;
6241
  pQuery->order.orderColId = pQueryMsg->orderColId;
6242 6243
  pQuery->pSelectExpr     = pExprs;
  pQuery->pGroupbyExpr    = pGroupbyExpr;
6244
  memcpy(&pQuery->interval, &pQueryMsg->interval, sizeof(pQuery->interval));
6245
  pQuery->fillType        = pQueryMsg->fillType;
6246
  pQuery->numOfTags       = pQueryMsg->numOfTags;
B
Bomin Zhang 已提交
6247
  pQuery->tagColList      = pTagCols;
H
Haojun Liao 已提交
6248

6249
  pQuery->colList = calloc(numOfCols, sizeof(SSingleColumnFilterInfo));
6250
  if (pQuery->colList == NULL) {
6251
    goto _cleanup;
6252
  }
6253

H
hjxilinx 已提交
6254
  for (int16_t i = 0; i < numOfCols; ++i) {
6255
    pQuery->colList[i] = pQueryMsg->colList[i];
6256
    pQuery->colList[i].filters = tscFilterInfoClone(pQueryMsg->colList[i].filters, pQuery->colList[i].numOfFilters);
H
hjxilinx 已提交
6257
  }
6258

6259
  // calculate the result row size
6260 6261 6262
  for (int16_t col = 0; col < numOfOutput; ++col) {
    assert(pExprs[col].bytes > 0);
    pQuery->rowSize += pExprs[col].bytes;
6263
  }
6264

6265
  doUpdateExprColumnIndex(pQuery);
6266

6267
  int32_t ret = createFilterInfo(pQInfo, pQuery);
6268
  if (ret != TSDB_CODE_SUCCESS) {
6269
    goto _cleanup;
6270 6271 6272
  }

  // prepare the result buffer
6273
  pQuery->sdata = (tFilePage **)calloc(pQuery->numOfOutput, POINTER_BYTES);
6274
  if (pQuery->sdata == NULL) {
6275
    goto _cleanup;
6276 6277
  }

H
Haojun Liao 已提交
6278
  calResultBufSize(pQuery);
6279

6280
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
6281
    assert(pExprs[col].interBytes >= pExprs[col].bytes);
6282 6283

    // allocate additional memory for interResults that are usually larger then final results
6284
    size_t size = (size_t)((pQuery->rec.capacity + 1) * pExprs[col].bytes + pExprs[col].interBytes + sizeof(tFilePage));
6285
    pQuery->sdata[col] = (tFilePage *)calloc(1, size);
6286
    if (pQuery->sdata[col] == NULL) {
6287
      goto _cleanup;
6288 6289 6290
    }
  }

6291
  if (pQuery->fillType != TSDB_FILL_NONE) {
6292 6293
    pQuery->fillVal = malloc(sizeof(int64_t) * pQuery->numOfOutput);
    if (pQuery->fillVal == NULL) {
6294
      goto _cleanup;
6295 6296 6297
    }

    // the first column is the timestamp
6298
    memcpy(pQuery->fillVal, (char *)pQueryMsg->fillVal, pQuery->numOfOutput * sizeof(int64_t));
6299 6300
  }

dengyihao's avatar
dengyihao 已提交
6301 6302 6303 6304 6305 6306
  size_t numOfGroups = 0;
  if (pTableGroupInfo->pGroupList != NULL) {
    numOfGroups = taosArrayGetSize(pTableGroupInfo->pGroupList);

    pQInfo->tableqinfoGroupInfo.pGroupList = taosArrayInit(numOfGroups, POINTER_BYTES);
    pQInfo->tableqinfoGroupInfo.numOfTables = pTableGroupInfo->numOfTables;
H
Haojun Liao 已提交
6307
    pQInfo->tableqinfoGroupInfo.map = taosHashInit(pTableGroupInfo->numOfTables,
H
Haojun Liao 已提交
6308
                                                   taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, false);
H
Haojun Liao 已提交
6309
  }
6310

weixin_48148422's avatar
weixin_48148422 已提交
6311
  int tableIndex = 0;
6312

H
Haojun Liao 已提交
6313
  pQInfo->runtimeEnv.interBufSize = getOutputInterResultBufSize(pQuery);
H
Haojun Liao 已提交
6314
  pQInfo->pBuf = calloc(pTableGroupInfo->numOfTables, sizeof(STableQueryInfo));
H
Haojun Liao 已提交
6315 6316 6317 6318
  if (pQInfo->pBuf == NULL) {
    goto _cleanup;
  }

H
Haojun Liao 已提交
6319
  // NOTE: pTableCheckInfo need to update the query time range and the lastKey info
6320 6321
  pQInfo->arrTableIdInfo = taosArrayInit(tableIndex, sizeof(STableIdInfo));
  pQInfo->dataReady = QUERY_RESULT_NOT_READY;
H
Haojun Liao 已提交
6322
  pQInfo->rspContext = NULL;
6323
  pthread_mutex_init(&pQInfo->lock, NULL);
H
Haojun Liao 已提交
6324
  tsem_init(&pQInfo->ready, 0, 0);
6325 6326 6327 6328 6329 6330

  pQuery->pos = -1;
  pQuery->window = pQueryMsg->window;
  changeExecuteScanOrder(pQInfo, pQueryMsg, stableQuery);

  STimeWindow window = pQuery->window;
H
Haojun Liao 已提交
6331

H
Haojun Liao 已提交
6332 6333
  int32_t index = 0;

H
hjxilinx 已提交
6334
  for(int32_t i = 0; i < numOfGroups; ++i) {
6335
    SArray* pa = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, i);
6336

H
Haojun Liao 已提交
6337
    size_t s = taosArrayGetSize(pa);
6338
    SArray* p1 = taosArrayInit(s, POINTER_BYTES);
B
Bomin Zhang 已提交
6339 6340 6341
    if (p1 == NULL) {
      goto _cleanup;
    }
H
Haojun Liao 已提交
6342

Y
yihaoDeng 已提交
6343
    taosArrayPush(pQInfo->tableqinfoGroupInfo.pGroupList, &p1);
6344

H
hjxilinx 已提交
6345
    for(int32_t j = 0; j < s; ++j) {
H
Haojun Liao 已提交
6346
      STableKeyInfo* info = taosArrayGet(pa, j);
6347

S
TD-1057  
Shengliang Guan 已提交
6348
      void* buf = (char*)pQInfo->pBuf + index * sizeof(STableQueryInfo);
H
Haojun Liao 已提交
6349

6350
      window.skey = info->lastKey;
H
Haojun Liao 已提交
6351
      STableQueryInfo* item = createTableQueryInfo(&pQInfo->runtimeEnv, info->pTable, window, buf);
B
Bomin Zhang 已提交
6352 6353 6354
      if (item == NULL) {
        goto _cleanup;
      }
H
Haojun Liao 已提交
6355

6356
      item->groupIndex = i;
H
hjxilinx 已提交
6357
      taosArrayPush(p1, &item);
6358 6359

      STableId* id = TSDB_TABLEID(info->pTable);
H
Haojun Liao 已提交
6360 6361
      taosHashPut(pQInfo->tableqinfoGroupInfo.map, &id->tid, sizeof(id->tid), &item, POINTER_BYTES);
      index += 1;
H
hjxilinx 已提交
6362 6363
    }
  }
6364

6365
  colIdCheck(pQuery);
6366

6367
  qDebug("qmsg:%p QInfo:%p created", pQueryMsg, pQInfo);
6368 6369
  return pQInfo;

B
Bomin Zhang 已提交
6370
_cleanup_qinfo:
H
Haojun Liao 已提交
6371
  tsdbDestroyTableGroup(pTableGroupInfo);
B
Bomin Zhang 已提交
6372 6373

_cleanup_query:
6374 6375 6376 6377
  if (pGroupbyExpr != NULL) {
    taosArrayDestroy(pGroupbyExpr->columnInfo);
    free(pGroupbyExpr);
  }
H
Haojun Liao 已提交
6378

S
Shengliang Guan 已提交
6379
  taosTFree(pTagCols);
B
Bomin Zhang 已提交
6380 6381 6382 6383 6384 6385
  for (int32_t i = 0; i < numOfOutput; ++i) {
    SExprInfo* pExprInfo = &pExprs[i];
    if (pExprInfo->pExpr != NULL) {
      tExprTreeDestroy(&pExprInfo->pExpr, NULL);
    }
  }
H
Haojun Liao 已提交
6386

S
Shengliang Guan 已提交
6387
  taosTFree(pExprs);
B
Bomin Zhang 已提交
6388

6389
_cleanup:
dengyihao's avatar
dengyihao 已提交
6390
  freeQInfo(pQInfo);
6391 6392 6393
  return NULL;
}

H
hjxilinx 已提交
6394
static bool isValidQInfo(void *param) {
H
hjxilinx 已提交
6395 6396 6397 6398
  SQInfo *pQInfo = (SQInfo *)param;
  if (pQInfo == NULL) {
    return false;
  }
6399

H
hjxilinx 已提交
6400 6401 6402 6403
  /*
   * pQInfo->signature may be changed by another thread, so we assign value of signature
   * into local variable, then compare by using local variable
   */
6404
  uint64_t sig = (uint64_t)pQInfo->signature;
H
hjxilinx 已提交
6405 6406 6407
  return (sig == (uint64_t)pQInfo);
}

6408
static int32_t initQInfo(SQueryTableMsg *pQueryMsg, void *tsdb, int32_t vgId, SQInfo *pQInfo, bool isSTable) {
H
hjxilinx 已提交
6409
  int32_t code = TSDB_CODE_SUCCESS;
6410
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
6411

H
hjxilinx 已提交
6412 6413
  STSBuf *pTSBuf = NULL;
  if (pQueryMsg->tsLen > 0) {  // open new file to save the result
H
Haojun Liao 已提交
6414
    char *tsBlock = (char *) pQueryMsg + pQueryMsg->tsOffset;
H
Haojun Liao 已提交
6415
    pTSBuf = tsBufCreateFromCompBlocks(tsBlock, pQueryMsg->tsNumOfBlocks, pQueryMsg->tsLen, pQueryMsg->tsOrder, vgId);
6416

H
hjxilinx 已提交
6417
    tsBufResetPos(pTSBuf);
dengyihao's avatar
dengyihao 已提交
6418 6419
    bool ret = tsBufNextPos(pTSBuf);
    UNUSED(ret);
H
hjxilinx 已提交
6420
  }
Y
TD-1665  
yihaoDeng 已提交
6421 6422
  
  pQuery->precision = tsdbGetCfg(tsdb)->precision;
6423

6424 6425
  if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey > pQuery->window.ekey)) ||
      (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey > pQuery->window.skey))) {
6426
    qDebug("QInfo:%p no result in time range %" PRId64 "-%" PRId64 ", order %d", pQInfo, pQuery->window.skey,
6427
           pQuery->window.ekey, pQuery->order.order);
6428
    setQueryStatus(pQuery, QUERY_COMPLETED);
B
Bomin Zhang 已提交
6429
    pQInfo->tableqinfoGroupInfo.numOfTables = 0;
6430 6431
    return TSDB_CODE_SUCCESS;
  }
6432

6433
  if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
6434
    qDebug("QInfo:%p no table qualified for tag filter, abort query", pQInfo);
6435 6436 6437
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return TSDB_CODE_SUCCESS;
  }
H
hjxilinx 已提交
6438 6439

  // filter the qualified
6440
  if ((code = doInitQInfo(pQInfo, pTSBuf, tsdb, vgId, isSTable)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6441 6442
    goto _error;
  }
6443

H
hjxilinx 已提交
6444 6445 6446 6447
  return code;

_error:
  // table query ref will be decrease during error handling
6448
  freeQInfo(pQInfo);
H
hjxilinx 已提交
6449 6450 6451
  return code;
}

B
Bomin Zhang 已提交
6452
static void freeColumnFilterInfo(SColumnFilterInfo* pFilter, int32_t numOfFilters) {
H
Haojun Liao 已提交
6453
    if (pFilter == NULL || numOfFilters == 0) {
B
Bomin Zhang 已提交
6454 6455
      return;
    }
H
Haojun Liao 已提交
6456

B
Bomin Zhang 已提交
6457 6458 6459 6460 6461
    for (int32_t i = 0; i < numOfFilters; i++) {
      if (pFilter[i].filterstr) {
        free((void*)(pFilter[i].pz));
      }
    }
H
Haojun Liao 已提交
6462

B
Bomin Zhang 已提交
6463 6464 6465
    free(pFilter);
}

H
Haojun Liao 已提交
6466 6467
static void doDestroyTableQueryInfo(STableGroupInfo* pTableqinfoGroupInfo) {
  if (pTableqinfoGroupInfo->pGroupList != NULL) {
H
Haojun Liao 已提交
6468
    int32_t numOfGroups = (int32_t) taosArrayGetSize(pTableqinfoGroupInfo->pGroupList);
H
Haojun Liao 已提交
6469 6470 6471 6472 6473 6474 6475 6476 6477 6478 6479 6480 6481 6482 6483 6484 6485 6486 6487 6488 6489
    for (int32_t i = 0; i < numOfGroups; ++i) {
      SArray *p = taosArrayGetP(pTableqinfoGroupInfo->pGroupList, i);

      size_t num = taosArrayGetSize(p);
      for(int32_t j = 0; j < num; ++j) {
        STableQueryInfo* item = taosArrayGetP(p, j);
        destroyTableQueryInfoImpl(item);
      }

      taosArrayDestroy(p);
    }
  }

  taosArrayDestroy(pTableqinfoGroupInfo->pGroupList);
  taosHashCleanup(pTableqinfoGroupInfo->map);

  pTableqinfoGroupInfo->pGroupList = NULL;
  pTableqinfoGroupInfo->map = NULL;
  pTableqinfoGroupInfo->numOfTables = 0;
}

H
hjxilinx 已提交
6490 6491 6492 6493
static void freeQInfo(SQInfo *pQInfo) {
  if (!isValidQInfo(pQInfo)) {
    return;
  }
6494

6495
  qDebug("QInfo:%p start to free QInfo", pQInfo);
6496

H
Haojun Liao 已提交
6497
  releaseQueryBuf((int32_t) pQInfo->tableqinfoGroupInfo.numOfTables);
H
Haojun Liao 已提交
6498

6499
  teardownQueryRuntimeEnv(&pQInfo->runtimeEnv);
6500

H
Haojun Liao 已提交
6501 6502 6503 6504 6505 6506 6507
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  if (pQuery != NULL) {
    if (pQuery->sdata != NULL) {
      for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
        taosTFree(pQuery->sdata[col]);
      }
      taosTFree(pQuery->sdata);
H
hjxilinx 已提交
6508
    }
6509

H
Haojun Liao 已提交
6510 6511 6512
    if (pQuery->fillVal != NULL) {
      taosTFree(pQuery->fillVal);
    }
6513

H
Haojun Liao 已提交
6514 6515 6516 6517
    for (int32_t i = 0; i < pQuery->numOfFilterCols; ++i) {
      SSingleColumnFilterInfo *pColFilter = &pQuery->pFilterInfo[i];
      if (pColFilter->numOfFilters > 0) {
        taosTFree(pColFilter->pFilters);
H
hjxilinx 已提交
6518
      }
H
hjxilinx 已提交
6519
    }
6520

H
Haojun Liao 已提交
6521 6522 6523 6524 6525 6526 6527 6528 6529 6530 6531 6532 6533 6534 6535 6536
    if (pQuery->pSelectExpr != NULL) {
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        SExprInfo *pExprInfo = &pQuery->pSelectExpr[i];

        if (pExprInfo->pExpr != NULL) {
          tExprTreeDestroy(&pExprInfo->pExpr, NULL);
        }
      }

      taosTFree(pQuery->pSelectExpr);
    }

    if (pQuery->pGroupbyExpr != NULL) {
      taosArrayDestroy(pQuery->pGroupbyExpr->columnInfo);
      taosTFree(pQuery->pGroupbyExpr);
    }
6537

H
Haojun Liao 已提交
6538 6539 6540 6541 6542 6543 6544 6545 6546 6547 6548 6549
    taosTFree(pQuery->tagColList);
    taosTFree(pQuery->pFilterInfo);

    if (pQuery->colList != NULL) {
      for (int32_t i = 0; i < pQuery->numOfCols; i++) {
        SColumnInfo *column = pQuery->colList + i;
        freeColumnFilterInfo(column->filters, column->numOfFilters);
      }
      taosTFree(pQuery->colList);
    }

    taosTFree(pQuery);
H
hjxilinx 已提交
6550
  }
6551

H
Haojun Liao 已提交
6552
  doDestroyTableQueryInfo(&pQInfo->tableqinfoGroupInfo);
H
Haojun Liao 已提交
6553

H
Haojun Liao 已提交
6554
  taosTFree(pQInfo->pBuf);
H
Haojun Liao 已提交
6555
  tsdbDestroyTableGroup(&pQInfo->tableGroupInfo);
weixin_48148422's avatar
weixin_48148422 已提交
6556
  taosArrayDestroy(pQInfo->arrTableIdInfo);
6557

6558
  pQInfo->signature = 0;
6559

6560
  qDebug("QInfo:%p QInfo is freed", pQInfo);
6561

S
Shengliang Guan 已提交
6562
  taosTFree(pQInfo);
H
hjxilinx 已提交
6563 6564
}

H
hjxilinx 已提交
6565
static size_t getResultSize(SQInfo *pQInfo, int64_t *numOfRows) {
6566 6567
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
hjxilinx 已提交
6568 6569 6570 6571 6572 6573 6574 6575 6576 6577 6578
  /*
   * get the file size and set the numOfRows to be the file size, since for tsComp query,
   * the returned row size is equalled to 1
   * TODO handle the case that the file is too large to send back one time
   */
  if (isTSCompQuery(pQuery) && (*numOfRows) > 0) {
    struct stat fstat;
    if (stat(pQuery->sdata[0]->data, &fstat) == 0) {
      *numOfRows = fstat.st_size;
      return fstat.st_size;
    } else {
S
slguan 已提交
6579
      qError("QInfo:%p failed to get file info, path:%s, reason:%s", pQInfo, pQuery->sdata[0]->data, strerror(errno));
H
hjxilinx 已提交
6580 6581 6582
      return 0;
    }
  } else {
6583
    return (size_t)(pQuery->rowSize * (*numOfRows));
6584
  }
H
hjxilinx 已提交
6585
}
6586

H
hjxilinx 已提交
6587 6588 6589
static int32_t doDumpQueryResult(SQInfo *pQInfo, char *data) {
  // the remained number of retrieved rows, not the interpolated result
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
6590

H
hjxilinx 已提交
6591 6592 6593
  // load data from file to msg buffer
  if (isTSCompQuery(pQuery)) {
    int32_t fd = open(pQuery->sdata[0]->data, O_RDONLY, 0666);
6594

H
hjxilinx 已提交
6595 6596
    // make sure file exist
    if (FD_VALID(fd)) {
6597 6598 6599
      uint64_t s = lseek(fd, 0, SEEK_END);

      qDebug("QInfo:%p ts comp data return, file:%s, size:%"PRId64, pQInfo, pQuery->sdata[0]->data, s);
H
Haojun Liao 已提交
6600
      if (lseek(fd, 0, SEEK_SET) >= 0) {
6601
        size_t sz = read(fd, data, (uint32_t)s);
6602 6603 6604
        if(sz < s) {  // todo handle error
          assert(0);
        }
H
Haojun Liao 已提交
6605
      } else {
6606
        UNUSED(s);
dengyihao's avatar
dengyihao 已提交
6607
      }
H
Haojun Liao 已提交
6608

H
hjxilinx 已提交
6609 6610 6611
      close(fd);
      unlink(pQuery->sdata[0]->data);
    } else {
dengyihao's avatar
dengyihao 已提交
6612
      // todo return the error code to client and handle invalid fd
S
slguan 已提交
6613
      qError("QInfo:%p failed to open tmp file to send ts-comp data to client, path:%s, reason:%s", pQInfo,
H
hjxilinx 已提交
6614
             pQuery->sdata[0]->data, strerror(errno));
dengyihao's avatar
dengyihao 已提交
6615
      if (fd != -1) {
6616
        close(fd);
dengyihao's avatar
dengyihao 已提交
6617
      }
H
hjxilinx 已提交
6618
    }
6619

H
hjxilinx 已提交
6620 6621 6622 6623
    // all data returned, set query over
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
      setQueryStatus(pQuery, QUERY_OVER);
    }
H
hjxilinx 已提交
6624
  } else {
S
TD-1057  
Shengliang Guan 已提交
6625
    doCopyQueryResultToMsg(pQInfo, (int32_t)pQuery->rec.rows, data);
6626
  }
6627

6628
  pQuery->rec.total += pQuery->rec.rows;
6629
  qDebug("QInfo:%p current numOfRes rows:%" PRId64 ", total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total);
6630

6631
  if (pQuery->limit.limit > 0 && pQuery->limit.limit == pQuery->rec.total) {
6632
    qDebug("QInfo:%p results limitation reached, limitation:%"PRId64, pQInfo, pQuery->limit.limit);
6633 6634
    setQueryStatus(pQuery, QUERY_OVER);
  }
6635

H
hjxilinx 已提交
6636
  return TSDB_CODE_SUCCESS;
6637 6638
}

6639 6640 6641 6642 6643 6644 6645
typedef struct SQueryMgmt {
  SCacheObj      *qinfoPool;      // query handle pool
  int32_t         vgId;
  bool            closed;
  pthread_mutex_t lock;
} SQueryMgmt;

6646
int32_t qCreateQueryInfo(void* tsdb, int32_t vgId, SQueryTableMsg* pQueryMsg, qinfo_t* pQInfo) {
6647
  assert(pQueryMsg != NULL && tsdb != NULL);
6648 6649

  int32_t code = TSDB_CODE_SUCCESS;
6650

6651 6652 6653 6654 6655 6656 6657 6658
  char            *tagCond  = NULL;
  char            *tbnameCond = NULL;
  SArray          *pTableIdList = NULL;
  SSqlFuncMsg    **pExprMsg = NULL;
  SExprInfo       *pExprs   = NULL;
  SColIndex       *pGroupColIndex = NULL;
  SColumnInfo     *pTagColumnInfo = NULL;
  SSqlGroupbyExpr *pGroupbyExpr   = NULL;
6659

6660 6661
  code = convertQueryMsg(pQueryMsg, &pTableIdList, &pExprMsg, &tagCond, &tbnameCond, &pGroupColIndex, &pTagColumnInfo);
  if (code != TSDB_CODE_SUCCESS) {
B
Bomin Zhang 已提交
6662
    goto _over;
6663 6664
  }

H
hjxilinx 已提交
6665
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
6666
    qError("Invalid number of tables to query, numOfTables:%d", pQueryMsg->numOfTables);
6667
    code = TSDB_CODE_QRY_INVALID_MSG;
H
hjxilinx 已提交
6668
    goto _over;
6669 6670
  }

H
hjxilinx 已提交
6671
  if (pTableIdList == NULL || taosArrayGetSize(pTableIdList) == 0) {
S
slguan 已提交
6672
    qError("qmsg:%p, SQueryTableMsg wrong format", pQueryMsg);
6673
    code = TSDB_CODE_QRY_INVALID_MSG;
H
hjxilinx 已提交
6674
    goto _over;
6675 6676
  }

H
Haojun Liao 已提交
6677
  if ((code = createQFunctionExprFromMsg(pQueryMsg, &pExprs, pExprMsg, pTagColumnInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6678
    goto _over;
6679 6680
  }

dengyihao's avatar
dengyihao 已提交
6681
  pGroupbyExpr = createGroupbyExprFromMsg(pQueryMsg, pGroupColIndex, &code);
H
hjxilinx 已提交
6682
  if ((pGroupbyExpr == NULL && pQueryMsg->numOfGroupCols != 0) || code != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6683
    goto _over;
6684
  }
6685

H
hjxilinx 已提交
6686
  bool isSTableQuery = false;
6687
  STableGroupInfo tableGroupInfo = {0};
6688 6689
  int64_t st = taosGetTimestampUs();

H
Haojun Liao 已提交
6690
  if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_TABLE_QUERY)) {
weixin_48148422's avatar
weixin_48148422 已提交
6691
    STableIdInfo *id = taosArrayGet(pTableIdList, 0);
H
Haojun Liao 已提交
6692

6693
    qDebug("qmsg:%p query normal table, uid:%"PRId64", tid:%d", pQueryMsg, id->uid, id->tid);
H
Haojun Liao 已提交
6694
    if ((code = tsdbGetOneTableGroup(tsdb, id->uid, pQueryMsg->window.skey, &tableGroupInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6695
      goto _over;
6696
    }
H
Haojun Liao 已提交
6697
  } else if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY|TSDB_QUERY_TYPE_STABLE_QUERY)) {
6698
    isSTableQuery = true;
H
Haojun Liao 已提交
6699 6700 6701

    // also note there's possibility that only one table in the super table
    if (!TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY)) {
weixin_48148422's avatar
weixin_48148422 已提交
6702 6703 6704 6705 6706 6707 6708
      STableIdInfo *id = taosArrayGet(pTableIdList, 0);

      // group by normal column, do not pass the group by condition to tsdb to group table into different group
      int32_t numOfGroupByCols = pQueryMsg->numOfGroupCols;
      if (pQueryMsg->numOfGroupCols == 1 && !TSDB_COL_IS_TAG(pGroupColIndex->flag)) {
        numOfGroupByCols = 0;
      }
6709 6710

      qDebug("qmsg:%p query stable, uid:%"PRId64", tid:%d", pQueryMsg, id->uid, id->tid);
H
Haojun Liao 已提交
6711 6712 6713
      code = tsdbQuerySTableByTagCond(tsdb, id->uid, pQueryMsg->window.skey, tagCond, pQueryMsg->tagCondLen,
          pQueryMsg->tagNameRelType, tbnameCond, &tableGroupInfo, pGroupColIndex, numOfGroupByCols);

6714
      if (code != TSDB_CODE_SUCCESS) {
6715
        qError("qmsg:%p failed to query stable, reason: %s", pQueryMsg, tstrerror(code));
6716 6717
        goto _over;
      }
weixin_48148422's avatar
weixin_48148422 已提交
6718
    } else {
6719 6720 6721 6722
      code = tsdbGetTableGroupFromIdList(tsdb, pTableIdList, &tableGroupInfo);
      if (code != TSDB_CODE_SUCCESS) {
        goto _over;
      }
H
Haojun Liao 已提交
6723

S
TD-1057  
Shengliang Guan 已提交
6724
      qDebug("qmsg:%p query on %" PRIzu " tables in one group from client", pQueryMsg, tableGroupInfo.numOfTables);
6725
    }
6726 6727

    int64_t el = taosGetTimestampUs() - st;
S
TD-1057  
Shengliang Guan 已提交
6728
    qDebug("qmsg:%p tag filter completed, numOfTables:%" PRIzu ", elapsed time:%"PRId64"us", pQueryMsg, tableGroupInfo.numOfTables, el);
H
hjxilinx 已提交
6729
  } else {
6730
    assert(0);
6731
  }
6732

H
Haojun Liao 已提交
6733 6734 6735 6736 6737
  code = checkForQueryBuf(tableGroupInfo.numOfTables);
  if (code != TSDB_CODE_SUCCESS) {  // not enough query buffer, abort
    goto _over;
  }

6738
  (*pQInfo) = createQInfoImpl(pQueryMsg, pGroupbyExpr, pExprs, &tableGroupInfo, pTagColumnInfo, isSTableQuery);
dengyihao's avatar
dengyihao 已提交
6739 6740 6741
  pExprs = NULL;
  pGroupbyExpr = NULL;
  pTagColumnInfo = NULL;
6742

6743
  if ((*pQInfo) == NULL) {
6744
    code = TSDB_CODE_QRY_OUT_OF_MEMORY;
H
hjxilinx 已提交
6745
    goto _over;
6746
  }
6747

6748
  code = initQInfo(pQueryMsg, tsdb, vgId, *pQInfo, isSTableQuery);
6749

H
hjxilinx 已提交
6750
_over:
dengyihao's avatar
dengyihao 已提交
6751 6752 6753
  free(tagCond);
  free(tbnameCond);
  free(pGroupColIndex);
dengyihao's avatar
dengyihao 已提交
6754 6755
  if (pGroupbyExpr != NULL) {
    taosArrayDestroy(pGroupbyExpr->columnInfo);
dengyihao's avatar
dengyihao 已提交
6756
    free(pGroupbyExpr);
6757
  }
dengyihao's avatar
dengyihao 已提交
6758 6759
  free(pTagColumnInfo);
  free(pExprs);
dengyihao's avatar
dengyihao 已提交
6760
  free(pExprMsg);
H
hjxilinx 已提交
6761
  taosArrayDestroy(pTableIdList);
6762

B
Bomin Zhang 已提交
6763 6764 6765 6766 6767
  for (int32_t i = 0; i < pQueryMsg->numOfCols; i++) {
    SColumnInfo* column = pQueryMsg->colList + i;
    freeColumnFilterInfo(column->filters, column->numOfFilters);
  }

H
Haojun Liao 已提交
6768
  //pQInfo already freed in initQInfo, but *pQInfo may not pointer to null;
6769 6770 6771 6772
  if (code != TSDB_CODE_SUCCESS) {
    *pQInfo = NULL;
  }

6773
  // if failed to add ref for all tables in this query, abort current query
6774
  return code;
H
hjxilinx 已提交
6775 6776
}

H
Haojun Liao 已提交
6777
void qDestroyQueryInfo(qinfo_t qHandle) {
H
Haojun Liao 已提交
6778 6779 6780 6781 6782
  SQInfo* pQInfo = (SQInfo*) qHandle;
  if (!isValidQInfo(pQInfo)) {
    return;
  }

H
Haojun Liao 已提交
6783 6784 6785
  qDebug("QInfo:%p query completed", pQInfo);
  queryCostStatis(pQInfo);   // print the query cost summary
  freeQInfo(pQInfo);
H
Haojun Liao 已提交
6786 6787
}

6788 6789 6790 6791 6792 6793 6794 6795
static bool doBuildResCheck(SQInfo* pQInfo) {
  bool buildRes = false;

  pthread_mutex_lock(&pQInfo->lock);

  pQInfo->dataReady = QUERY_RESULT_READY;
  buildRes = (pQInfo->rspContext != NULL);

H
Haojun Liao 已提交
6796 6797
  // clear qhandle owner, it must be in the secure area. other thread may run ahead before current, after it is
  // put into task to be executed.
6798 6799
  assert(pQInfo->owner == taosGetPthreadId());
  pQInfo->owner = 0;
H
Haojun Liao 已提交
6800

H
Haojun Liao 已提交
6801
  pthread_mutex_unlock(&pQInfo->lock);
H
Haojun Liao 已提交
6802 6803

  tsem_post(&pQInfo->ready);
6804 6805 6806
  return buildRes;
}

6807
bool qTableQuery(qinfo_t qinfo) {
6808
  SQInfo *pQInfo = (SQInfo *)qinfo;
H
Haojun Liao 已提交
6809
  assert(pQInfo && pQInfo->signature == pQInfo);
6810
  int64_t threadId = taosGetPthreadId();
6811

6812 6813 6814 6815
  int64_t curOwner = 0;
  if ((curOwner = atomic_val_compare_exchange_64(&pQInfo->owner, 0, threadId)) != 0) {
    qError("QInfo:%p qhandle is now executed by thread:%p", pQInfo, (void*) curOwner);
    pQInfo->code = TSDB_CODE_QRY_IN_EXEC;
6816
    return false;
H
hjxilinx 已提交
6817
  }
6818

H
Haojun Liao 已提交
6819
  if (IS_QUERY_KILLED(pQInfo)) {
6820
    qDebug("QInfo:%p it is already killed, abort", pQInfo);
6821
    return doBuildResCheck(pQInfo);
H
hjxilinx 已提交
6822
  }
6823

6824 6825
  if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
    qDebug("QInfo:%p no table exists for query, abort", pQInfo);
6826 6827
    setQueryStatus(pQInfo->runtimeEnv.pQuery, QUERY_COMPLETED);
    return doBuildResCheck(pQInfo);
6828 6829 6830
  }

  // error occurs, record the error code and return to client
H
Haojun Liao 已提交
6831
  int32_t ret = setjmp(pQInfo->runtimeEnv.env);
6832 6833
  if (ret != TSDB_CODE_SUCCESS) {
    pQInfo->code = ret;
H
Haojun Liao 已提交
6834
    qDebug("QInfo:%p query abort due to error/cancel occurs, code:%s", pQInfo, tstrerror(pQInfo->code));
6835
    return doBuildResCheck(pQInfo);
6836 6837
  }

6838
  qDebug("QInfo:%p query task is launched", pQInfo);
6839

6840
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
6841
  if (onlyQueryTags(pQInfo->runtimeEnv.pQuery)) {
H
Haojun Liao 已提交
6842
    assert(pQInfo->runtimeEnv.pQueryHandle == NULL);
6843
    buildTagQueryResult(pQInfo);
H
hjxilinx 已提交
6844
  } else if (pQInfo->runtimeEnv.stableQuery) {
6845
    stableQueryImpl(pQInfo);
H
hjxilinx 已提交
6846
  } else {
6847
    tableQueryImpl(pQInfo);
H
hjxilinx 已提交
6848
  }
6849

6850
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
6851
  if (IS_QUERY_KILLED(pQInfo)) {
6852 6853
    qDebug("QInfo:%p query is killed", pQInfo);
  } else if (pQuery->rec.rows == 0) {
S
TD-1057  
Shengliang Guan 已提交
6854
    qDebug("QInfo:%p over, %" PRIzu " tables queried, %"PRId64" rows are returned", pQInfo, pQInfo->tableqinfoGroupInfo.numOfTables, pQuery->rec.total);
6855 6856 6857 6858 6859
  } else {
    qDebug("QInfo:%p query paused, %" PRId64 " rows returned, numOfTotal:%" PRId64 " rows",
           pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
  }

6860
  return doBuildResCheck(pQInfo);
H
hjxilinx 已提交
6861 6862
}

6863
int32_t qRetrieveQueryResultInfo(qinfo_t qinfo, bool* buildRes, void* pRspContext) {
6864 6865
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
6866
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
H
Haojun Liao 已提交
6867
    qError("QInfo:%p invalid qhandle", pQInfo);
6868
    return TSDB_CODE_QRY_INVALID_QHANDLE;
H
hjxilinx 已提交
6869
  }
6870

6871
  *buildRes = false;
H
Haojun Liao 已提交
6872
  if (IS_QUERY_KILLED(pQInfo)) {
B
Bomin Zhang 已提交
6873
    qDebug("QInfo:%p query is killed, code:0x%08x", pQInfo, pQInfo->code);
H
hjxilinx 已提交
6874
    return pQInfo->code;
H
hjxilinx 已提交
6875
  }
6876

6877
  int32_t code = TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
6878 6879 6880 6881

#if 0
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

6882
  pthread_mutex_lock(&pQInfo->lock);
H
Haojun Liao 已提交
6883 6884
  assert(pQInfo->rspContext == NULL);

6885 6886 6887 6888 6889
  if (pQInfo->dataReady == QUERY_RESULT_READY) {
    *buildRes = true;
    qDebug("QInfo:%p retrieve result info, rowsize:%d, rows:%"PRId64", code:%d", pQInfo, pQuery->rowSize, pQuery->rec.rows,
           pQInfo->code);
  } else {
H
Haojun Liao 已提交
6890
    *buildRes = false;
6891
    qDebug("QInfo:%p retrieve req set query return result after paused", pQInfo);
6892
    pQInfo->rspContext = pRspContext;
H
Haojun Liao 已提交
6893
    assert(pQInfo->rspContext != NULL);
6894
  }
6895

6896
  code = pQInfo->code;
6897
  pthread_mutex_unlock(&pQInfo->lock);
H
Haojun Liao 已提交
6898 6899 6900 6901 6902 6903
#else
  tsem_wait(&pQInfo->ready);
  *buildRes = true;
  code = pQInfo->code;
#endif

6904
  return code;
H
hjxilinx 已提交
6905
}
6906

6907
int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *contLen, bool* continueExec) {
6908 6909
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
6910
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
6911
    return TSDB_CODE_QRY_INVALID_QHANDLE;
6912
  }
6913

6914
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
6915 6916
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  size_t  size = getResultSize(pQInfo, &pQuery->rec.rows);
6917

weixin_48148422's avatar
weixin_48148422 已提交
6918 6919
  size += sizeof(int32_t);
  size += sizeof(STableIdInfo) * taosArrayGetSize(pQInfo->arrTableIdInfo);
6920

S
TD-1057  
Shengliang Guan 已提交
6921
  *contLen = (int32_t)(size + sizeof(SRetrieveTableRsp));
6922

B
Bomin Zhang 已提交
6923 6924
  // todo proper handle failed to allocate memory,
  // current solution only avoid crash, but cannot return error code to client
6925
  *pRsp = (SRetrieveTableRsp *)rpcMallocCont(*contLen);
B
Bomin Zhang 已提交
6926 6927 6928
  if (*pRsp == NULL) {
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
  }
6929

S
TD-1057  
Shengliang Guan 已提交
6930
  (*pRsp)->numOfRows = htonl((int32_t)pQuery->rec.rows);
6931

H
Haojun Liao 已提交
6932
  if (pQInfo->code == TSDB_CODE_SUCCESS) {
6933
    (*pRsp)->offset   = htobe64(pQuery->limit.offset);
6934
    (*pRsp)->useconds = htobe64(pRuntimeEnv->summary.elapsedTime);
6935
  } else {
6936 6937
    (*pRsp)->offset   = 0;
    (*pRsp)->useconds = htobe64(pRuntimeEnv->summary.elapsedTime);
6938
  }
6939

6940
  (*pRsp)->precision = htons(pQuery->precision);
H
Haojun Liao 已提交
6941 6942
  if (pQuery->rec.rows > 0 && pQInfo->code == TSDB_CODE_SUCCESS) {
    doDumpQueryResult(pQInfo, (*pRsp)->data);
6943
  } else {
H
hjxilinx 已提交
6944
    setQueryStatus(pQuery, QUERY_OVER);
6945
  }
6946

6947
  pQInfo->rspContext = NULL;
6948
  pQInfo->dataReady  = QUERY_RESULT_NOT_READY;
6949

H
Haojun Liao 已提交
6950
  if (IS_QUERY_KILLED(pQInfo) || Q_STATUS_EQUAL(pQuery->status, QUERY_OVER)) {
6951
    *continueExec = false;
6952
    (*pRsp)->completed = 1;  // notify no more result to client
6953
  } else {
6954
    *continueExec = true;
6955
    qDebug("QInfo:%p has more results waits for client retrieve", pQInfo);
6956 6957
  }

H
Haojun Liao 已提交
6958
  return pQInfo->code;
6959
}
H
hjxilinx 已提交
6960

6961 6962 6963 6964 6965 6966 6967 6968 6969 6970 6971
int32_t qQueryCompleted(qinfo_t qinfo) {
  SQInfo *pQInfo = (SQInfo *)qinfo;

  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
    return TSDB_CODE_QRY_INVALID_QHANDLE;
  }

  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
  return IS_QUERY_KILLED(pQInfo) || Q_STATUS_EQUAL(pQuery->status, QUERY_OVER);
}

H
Haojun Liao 已提交
6972
int32_t qKillQuery(qinfo_t qinfo) {
H
Haojun Liao 已提交
6973 6974 6975 6976 6977 6978 6979
  SQInfo *pQInfo = (SQInfo *)qinfo;

  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
    return TSDB_CODE_QRY_INVALID_QHANDLE;
  }

  setQueryKilled(pQInfo);
6980 6981 6982

  // Wait for the query executing thread being stopped/
  // Once the query is stopped, the owner of qHandle will be cleared immediately.
6983
  while (pQInfo->owner != 0) {
6984 6985 6986
    taosMsleep(100);
  }

H
Haojun Liao 已提交
6987 6988 6989
  return TSDB_CODE_SUCCESS;
}

6990 6991 6992 6993 6994 6995 6996 6997 6998 6999 7000 7001 7002 7003 7004 7005
static void doSetTagValueToResultBuf(char* output, const char* val, int16_t type, int16_t bytes) {
  if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
    if (val == NULL) {
      setVardataNull(output, type);
    } else {
      memcpy(output, val, varDataTLen(val));
    }
  } else {
    if (val == NULL) {
      setNull(output, type, bytes);
    } else {  // todo here stop will cause client crash
      memcpy(output, val, bytes);
    }
  }
}

H
hjxilinx 已提交
7006 7007 7008
static void buildTagQueryResult(SQInfo* pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
7009

H
Haojun Liao 已提交
7010
  size_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
H
Haojun Liao 已提交
7011
  assert(numOfGroup == 0 || numOfGroup == 1);
7012

H
Haojun Liao 已提交
7013
  if (numOfGroup == 0) {
7014 7015
    return;
  }
7016

H
Haojun Liao 已提交
7017
  SArray* pa = GET_TABLEGROUP(pQInfo, 0);
7018

H
Haojun Liao 已提交
7019
  size_t num = taosArrayGetSize(pa);
7020
  assert(num == pQInfo->tableqinfoGroupInfo.numOfTables);
7021

H
Haojun Liao 已提交
7022
  int32_t count = 0;
7023 7024 7025
  int32_t functionId = pQuery->pSelectExpr[0].base.functionId;
  if (functionId == TSDB_FUNC_TID_TAG) { // return the tags & table Id
    assert(pQuery->numOfOutput == 1);
7026

7027 7028
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[0];
    int32_t rsize = pExprInfo->bytes;
H
Haojun Liao 已提交
7029
    count = 0;
7030

H
Haojun Liao 已提交
7031 7032 7033 7034 7035 7036 7037 7038 7039 7040 7041
    int16_t bytes = pExprInfo->bytes;
    int16_t type = pExprInfo->type;

    for(int32_t i = 0; i < pQuery->numOfTags; ++i) {
      if (pQuery->tagColList[i].colId == pExprInfo->base.colInfo.colId) {
        bytes = pQuery->tagColList[i].bytes;
        type = pQuery->tagColList[i].type;
        break;
      }
    }

H
Haojun Liao 已提交
7042 7043
    while(pQInfo->tableIndex < num && count < pQuery->rec.capacity) {
      int32_t i = pQInfo->tableIndex++;
7044
      STableQueryInfo *item = taosArrayGetP(pa, i);
7045

7046
      char *output = pQuery->sdata[0]->data + count * rsize;
7047
      varDataSetLen(output, rsize - VARSTR_HEADER_SIZE);
7048

7049
      output = varDataVal(output);
H
Haojun Liao 已提交
7050
      STableId* id = TSDB_TABLEID(item->pTable);
7051

7052 7053 7054
      *(int16_t *)output = 0;
      output += sizeof(int16_t);

H
Haojun Liao 已提交
7055 7056
      *(int64_t *)output = id->uid;  // memory align problem, todo serialize
      output += sizeof(id->uid);
7057

H
Haojun Liao 已提交
7058 7059
      *(int32_t *)output = id->tid;
      output += sizeof(id->tid);
7060

7061
      *(int32_t *)output = pQInfo->vgId;
7062
      output += sizeof(pQInfo->vgId);
7063

7064
      if (pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
7065
        char* data = tsdbGetTableName(item->pTable);
7066
        memcpy(output, data, varDataTLen(data));
H
[td-90]  
Haojun Liao 已提交
7067
      } else {
7068 7069
        char* data = tsdbGetTableTagVal(item->pTable, pExprInfo->base.colInfo.colId, type, bytes);
        doSetTagValueToResultBuf(output, data, type, bytes);
H
[td-90]  
Haojun Liao 已提交
7070
      }
7071

H
Haojun Liao 已提交
7072
      count += 1;
7073
    }
7074

7075
    qDebug("QInfo:%p create (tableId, tag) info completed, rows:%d", pQInfo, count);
7076

H
Haojun Liao 已提交
7077 7078 7079 7080
  } else if (functionId == TSDB_FUNC_COUNT) {// handle the "count(tbname)" query
    *(int64_t*) pQuery->sdata[0]->data = num;

    count = 1;
H
Haojun Liao 已提交
7081
    SET_STABLE_QUERY_OVER(pQInfo);
7082
    qDebug("QInfo:%p create count(tbname) query, res:%d rows:1", pQInfo, count);
7083
  } else {  // return only the tags|table name etc.
H
Haojun Liao 已提交
7084
    count = 0;
H
Haojun Liao 已提交
7085
    SSchema tbnameSchema = tGetTableNameColumnSchema();
7086

S
TD-1057  
Shengliang Guan 已提交
7087
    int32_t maxNumOfTables = (int32_t)pQuery->rec.capacity;
7088
    if (pQuery->limit.limit >= 0 && pQuery->limit.limit < pQuery->rec.capacity) {
S
TD-1057  
Shengliang Guan 已提交
7089
      maxNumOfTables = (int32_t)pQuery->limit.limit;
7090 7091
    }

7092
    while(pQInfo->tableIndex < num && count < maxNumOfTables) {
H
Haojun Liao 已提交
7093
      int32_t i = pQInfo->tableIndex++;
7094

7095 7096 7097 7098 7099 7100
      // discard current result due to offset
      if (pQuery->limit.offset > 0) {
        pQuery->limit.offset -= 1;
        continue;
      }

7101
      SExprInfo* pExprInfo = pQuery->pSelectExpr;
7102
      STableQueryInfo* item = taosArrayGetP(pa, i);
7103

7104 7105
      char *data = NULL, *dst = NULL;
      int16_t type = 0, bytes = 0;
7106
      for(int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
7107
        // not assign value in case of user defined constant output column
Y
TD-1230  
yihaoDeng 已提交
7108
        if (TSDB_COL_IS_UD_COL(pExprInfo[j].base.colInfo.flag)) {
H
Haojun Liao 已提交
7109 7110
          continue;
        }
7111

7112
        if (pExprInfo[j].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
7113 7114 7115 7116 7117 7118 7119 7120
          bytes = tbnameSchema.bytes;
          type = tbnameSchema.type;

          data = tsdbGetTableName(item->pTable);
          dst = pQuery->sdata[j]->data + count * tbnameSchema.bytes;
        } else {
          type = pExprInfo[j].type;
          bytes = pExprInfo[j].bytes;
7121

7122 7123
          data = tsdbGetTableTagVal(item->pTable, pExprInfo[j].base.colInfo.colId, type, bytes);
          dst = pQuery->sdata[j]->data + count * pExprInfo[j].bytes;
7124

7125
        }
7126 7127

        doSetTagValueToResultBuf(dst, data, type, bytes);
H
hjxilinx 已提交
7128
      }
H
Haojun Liao 已提交
7129
      count += 1;
H
hjxilinx 已提交
7130
    }
7131

7132
    qDebug("QInfo:%p create tag values results completed, rows:%d", pQInfo, count);
H
hjxilinx 已提交
7133
  }
7134

H
Haojun Liao 已提交
7135
  pQuery->rec.rows = count;
H
hjxilinx 已提交
7136
  setQueryStatus(pQuery, QUERY_COMPLETED);
H
hjxilinx 已提交
7137 7138
}

H
Haojun Liao 已提交
7139 7140 7141 7142 7143 7144 7145 7146 7147 7148 7149
static int64_t getQuerySupportBufSize(int32_t numOfTables) {
  size_t s1 = sizeof(STableQueryInfo);
  size_t s2 = sizeof(SHashNode);

//  size_t s3 = sizeof(STableCheckInfo);  buffer consumption in tsdb
  return (s1 + s2) * 1.5 * numOfTables;
}

int32_t checkForQueryBuf(int32_t numOfTables) {
  int64_t t = getQuerySupportBufSize(numOfTables);
  if (tsQueryBufferSize < 0) {
H
Haojun Liao 已提交
7150
    return TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
7151 7152 7153 7154 7155 7156 7157 7158 7159 7160 7161 7162 7163 7164 7165 7166 7167 7168 7169 7170 7171 7172 7173 7174 7175 7176 7177 7178 7179 7180
  } else if (tsQueryBufferSize > 0) {

    while(1) {
      int64_t s = tsQueryBufferSize;
      int64_t remain = s - t;
      if (remain >= 0) {
        if (atomic_val_compare_exchange_64(&tsQueryBufferSize, s, remain) == s) {
          return TSDB_CODE_SUCCESS;
        }
      } else {
        return TSDB_CODE_QRY_NOT_ENOUGH_BUFFER;
      }
    }
  }

  // disable query processing if the value of tsQueryBufferSize is zero.
  return TSDB_CODE_QRY_NOT_ENOUGH_BUFFER;
}

void releaseQueryBuf(int32_t numOfTables) {
  if (tsQueryBufferSize <= 0) {
    return;
  }

  int64_t t = getQuerySupportBufSize(numOfTables);

  // restore value is not enough buffer available
  atomic_add_fetch_64(&tsQueryBufferSize, t);
}

7181 7182 7183 7184 7185 7186 7187
void* qGetResultRetrieveMsg(qinfo_t qinfo) {
  SQInfo* pQInfo = (SQInfo*) qinfo;
  assert(pQInfo != NULL);

  return pQInfo->rspContext;
}

7188 7189 7190 7191 7192 7193 7194
void freeqinfoFn(void *qhandle) {
  void** handle = qhandle;
  if (handle == NULL || *handle == NULL) {
    return;
  }

  qKillQuery(*handle);
H
Haojun Liao 已提交
7195
  qDestroyQueryInfo(*handle);
7196 7197 7198
}

void* qOpenQueryMgmt(int32_t vgId) {
H
Haojun Liao 已提交
7199
  const int32_t REFRESH_HANDLE_INTERVAL = 30; // every 30 seconds, refresh handle pool
7200 7201 7202 7203

  char cacheName[128] = {0};
  sprintf(cacheName, "qhandle_%d", vgId);

7204
  SQueryMgmt* pQueryMgmt = calloc(1, sizeof(SQueryMgmt));
H
Haojun Liao 已提交
7205 7206 7207 7208
  if (pQueryMgmt == NULL) {
    terrno = TSDB_CODE_QRY_OUT_OF_MEMORY;
    return NULL;
  }
7209

S
TD-1530  
Shengliang Guan 已提交
7210
  pQueryMgmt->qinfoPool = taosCacheInit(TSDB_CACHE_PTR_KEY, REFRESH_HANDLE_INTERVAL, true, freeqinfoFn, cacheName);
7211 7212 7213 7214
  pQueryMgmt->closed    = false;
  pQueryMgmt->vgId      = vgId;

  pthread_mutex_init(&pQueryMgmt->lock, NULL);
7215 7216

  qDebug("vgId:%d, open querymgmt success", vgId);
7217
  return pQueryMgmt;
7218 7219
}

H
Haojun Liao 已提交
7220
static void queryMgmtKillQueryFn(void* handle) {
H
Hui Li 已提交
7221 7222
  void** fp = (void**)handle;
  qKillQuery(*fp);
H
Haojun Liao 已提交
7223 7224 7225
}

void qQueryMgmtNotifyClosed(void* pQMgmt) {
7226 7227 7228 7229 7230 7231 7232
  if (pQMgmt == NULL) {
    return;
  }

  SQueryMgmt* pQueryMgmt = pQMgmt;
  qDebug("vgId:%d, set querymgmt closed, wait for all queries cancelled", pQueryMgmt->vgId);

H
Haojun Liao 已提交
7233
//  pthread_mutex_lock(&pQueryMgmt->lock);
7234
  pQueryMgmt->closed = true;
H
Haojun Liao 已提交
7235
//  pthread_mutex_unlock(&pQueryMgmt->lock);
7236

H
Haojun Liao 已提交
7237
  taosCacheRefresh(pQueryMgmt->qinfoPool, queryMgmtKillQueryFn);
7238 7239 7240 7241 7242 7243 7244 7245 7246 7247 7248 7249 7250 7251 7252 7253 7254
}

void qCleanupQueryMgmt(void* pQMgmt) {
  if (pQMgmt == NULL) {
    return;
  }

  SQueryMgmt* pQueryMgmt = pQMgmt;
  int32_t vgId = pQueryMgmt->vgId;

  assert(pQueryMgmt->closed);

  SCacheObj* pqinfoPool = pQueryMgmt->qinfoPool;
  pQueryMgmt->qinfoPool = NULL;

  taosCacheCleanup(pqinfoPool);
  pthread_mutex_destroy(&pQueryMgmt->lock);
S
Shengliang Guan 已提交
7255
  taosTFree(pQueryMgmt);
7256

7257
  qDebug("vgId:%d queryMgmt cleanup completed", vgId);
7258 7259
}

7260
void** qRegisterQInfo(void* pMgmt, uint64_t qInfo) {
7261
  if (pMgmt == NULL) {
7262
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
7263 7264 7265
    return NULL;
  }

7266
  const int32_t DEFAULT_QHANDLE_LIFE_SPAN = tsShellActivityTimer * 2 * 1000;
7267

7268 7269
  SQueryMgmt *pQueryMgmt = pMgmt;
  if (pQueryMgmt->qinfoPool == NULL) {
7270
    qError("QInfo:%p failed to add qhandle into qMgmt, since qMgmt is closed", (void *)qInfo);
7271
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
7272 7273 7274
    return NULL;
  }

H
Haojun Liao 已提交
7275
//  pthread_mutex_lock(&pQueryMgmt->lock);
7276
  if (pQueryMgmt->closed) {
H
Haojun Liao 已提交
7277
//    pthread_mutex_unlock(&pQueryMgmt->lock);
7278
    qError("QInfo:%p failed to add qhandle into cache, since qMgmt is colsing", (void *)qInfo);
7279
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
7280 7281
    return NULL;
  } else {
S
TD-1530  
Shengliang Guan 已提交
7282 7283
    TSDB_CACHE_PTR_TYPE handleVal = (TSDB_CACHE_PTR_TYPE) qInfo;
    void** handle = taosCachePut(pQueryMgmt->qinfoPool, &handleVal, sizeof(TSDB_CACHE_PTR_TYPE), &qInfo, sizeof(TSDB_CACHE_PTR_TYPE), DEFAULT_QHANDLE_LIFE_SPAN);
H
Haojun Liao 已提交
7284
//    pthread_mutex_unlock(&pQueryMgmt->lock);
7285 7286 7287 7288 7289

    return handle;
  }
}

S
TD-1530  
Shengliang Guan 已提交
7290
void** qAcquireQInfo(void* pMgmt, uint64_t _key) {
7291 7292
  SQueryMgmt *pQueryMgmt = pMgmt;

B
Bomin Zhang 已提交
7293 7294 7295 7296 7297 7298 7299
  if (pQueryMgmt->closed) {
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
    return NULL;
  }

  if (pQueryMgmt->qinfoPool == NULL) {
    terrno = TSDB_CODE_QRY_INVALID_QHANDLE;
7300 7301 7302
    return NULL;
  }

S
TD-1530  
Shengliang Guan 已提交
7303 7304
  TSDB_CACHE_PTR_TYPE key = (TSDB_CACHE_PTR_TYPE)_key;
  void** handle = taosCacheAcquireByKey(pQueryMgmt->qinfoPool, &key, sizeof(TSDB_CACHE_PTR_TYPE));
7305
  if (handle == NULL || *handle == NULL) {
B
Bomin Zhang 已提交
7306
    terrno = TSDB_CODE_QRY_INVALID_QHANDLE;
7307 7308 7309 7310 7311 7312
    return NULL;
  } else {
    return handle;
  }
}

H
Haojun Liao 已提交
7313
void** qReleaseQInfo(void* pMgmt, void* pQInfo, bool freeHandle) {
7314 7315 7316 7317 7318
  SQueryMgmt *pQueryMgmt = pMgmt;
  if (pQueryMgmt->qinfoPool == NULL) {
    return NULL;
  }

H
Haojun Liao 已提交
7319
  taosCacheRelease(pQueryMgmt->qinfoPool, pQInfo, freeHandle);
7320 7321 7322
  return 0;
}

7323