qExecutor.c 244.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
#include "os.h"
H
Haojun Liao 已提交
16 17
#include "qFill.h"
#include "taosmsg.h"
18 19
#include "tcache.h"
#include "tglobal.h"
20

H
Haojun Liao 已提交
21
#include "exception.h"
22
#include "hash.h"
H
Haojun Liao 已提交
23 24 25 26
#include "qAst.h"
#include "qExecutor.h"
#include "qResultbuf.h"
#include "qUtil.h"
H
hjxilinx 已提交
27
#include "query.h"
S
slguan 已提交
28
#include "queryLog.h"
29
#include "tlosertree.h"
30

H
Haojun Liao 已提交
31
#define MAX_ROWS_PER_RESBUF_PAGE  ((1u<<12) - 1)
32 33 34 35 36

/**
 * check if the primary column is load by default, otherwise, the program will
 * forced to load primary column explicitly.
 */
37
#define Q_STATUS_EQUAL(p, s)  (((p) & (s)) != 0)
38 39
#define QUERY_IS_ASC_QUERY(q) (GET_FORWARD_DIRECTION_FACTOR((q)->order.order) == QUERY_ASC_FORWARD_STEP)

40
#define IS_MASTER_SCAN(runtime)        ((runtime)->scanFlag == MASTER_SCAN)
H
hjxilinx 已提交
41
#define IS_REVERSE_SCAN(runtime)       ((runtime)->scanFlag == REVERSE_SCAN)
42
#define SET_MASTER_SCAN_FLAG(runtime)  ((runtime)->scanFlag = MASTER_SCAN)
H
hjxilinx 已提交
43
#define SET_REVERSE_SCAN_FLAG(runtime) ((runtime)->scanFlag = REVERSE_SCAN)
44

H
Haojun Liao 已提交
45
#define GET_QINFO_ADDR(x) ((SQInfo *)((char *)(x)-offsetof(SQInfo, runtimeEnv)))
46

47
#define GET_COL_DATA_POS(query, index, step) ((query)->pos + (index) * (step))
48
#define SWITCH_ORDER(n) (((n) = ((n) == TSDB_ORDER_ASC) ? TSDB_ORDER_DESC : TSDB_ORDER_ASC))
49

H
Haojun Liao 已提交
50 51
#define SDATA_BLOCK_INITIALIZER (SDataBlockInfo) {{0}, 0}

S
TD-1057  
Shengliang Guan 已提交
52 53 54 55 56
#define TIME_WINDOW_COPY(_dst, _src)  do {\
   _dst.skey = _src.skey;\
   _dst.ekey = _src.ekey;\
} while (0);

57
enum {
H
hjxilinx 已提交
58
  // when query starts to execute, this status will set
59 60
  QUERY_NOT_COMPLETED = 0x1u,

H
hjxilinx 已提交
61 62
  /* result output buffer is full, current query is paused.
   * this status is only exist in group-by clause and diff/add/division/multiply/ query.
63
   */
64 65
  QUERY_RESBUF_FULL = 0x2u,

H
hjxilinx 已提交
66 67 68
  /* query is over
   * 1. this status is used in one row result query process, e.g., count/sum/first/last/ avg...etc.
   * 2. when all data within queried time window, it is also denoted as query_completed
69
   */
70
  QUERY_COMPLETED = 0x4u,
71

H
hjxilinx 已提交
72 73
  /* when the result is not completed return to client, this status will be
   * usually used in case of interval query with interpolation option
74
   */
75
  QUERY_OVER = 0x8u,
76
};
77 78

enum {
79 80
  TS_JOIN_TS_EQUAL       = 0,
  TS_JOIN_TS_NOT_EQUALS  = 1,
81 82 83
  TS_JOIN_TAG_NOT_EQUALS = 2,
};

84
typedef struct {
85 86 87 88 89 90
  int32_t     status;       // query status
  TSKEY       lastKey;      // the lastKey value before query executed
  STimeWindow w;            // whole query time window
  STimeWindow curWindow;    // current query window
  int32_t     windowIndex;  // index of active time window result for interval query
  STSCursor   cur;
91 92
} SQueryStatusInfo;

H
Haojun Liao 已提交
93
#if 0
H
Haojun Liao 已提交
94
static UNUSED_FUNC void *u_malloc (size_t __size) {
H
Haojun Liao 已提交
95
  uint32_t v = rand();
H
Haojun Liao 已提交
96 97

  if (v % 1000 <= 0) {
H
Haojun Liao 已提交
98 99
    return NULL;
  } else {
H
Haojun Liao 已提交
100
    return malloc(__size);
H
Haojun Liao 已提交
101
  }
H
Haojun Liao 已提交
102 103
}

H
Haojun Liao 已提交
104 105
static UNUSED_FUNC void* u_calloc(size_t num, size_t __size) {
  uint32_t v = rand();
H
Haojun Liao 已提交
106
  if (v % 1000 <= 0) {
H
Haojun Liao 已提交
107 108 109 110 111 112
    return NULL;
  } else {
    return calloc(num, __size);
  }
}

H
Haojun Liao 已提交
113 114 115 116 117 118 119 120 121
static UNUSED_FUNC void* u_realloc(void* p, size_t __size) {
  uint32_t v = rand();
  if (v % 5 <= 1) {
    return NULL;
  } else {
    return realloc(p, __size);
  }
}

H
Haojun Liao 已提交
122
#define calloc  u_calloc
H
Haojun Liao 已提交
123
#define malloc  u_malloc
H
Haojun Liao 已提交
124
#define realloc u_realloc
H
Haojun Liao 已提交
125
#endif
H
Haojun Liao 已提交
126

127
#define CLEAR_QUERY_STATUS(q, st)   ((q)->status &= (~(st)))
H
Haojun Liao 已提交
128 129 130
#define GET_NUM_OF_TABLEGROUP(q)    taosArrayGetSize((q)->tableqinfoGroupInfo.pGroupList)
#define GET_TABLEGROUP(q, _index)   ((SArray*) taosArrayGetP((q)->tableqinfoGroupInfo.pGroupList, (_index)))

131
static void setQueryStatus(SQuery *pQuery, int8_t status);
H
Haojun Liao 已提交
132
static void finalizeQueryResult(SQueryRuntimeEnv *pRuntimeEnv);
133

134
#define QUERY_IS_INTERVAL_QUERY(_q) ((_q)->interval.interval > 0)
135

136 137
static void getNextTimeWindow(SQuery* pQuery, STimeWindow* tw) {
  int32_t factor = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
138 139 140
  if (pQuery->interval.intervalUnit != 'n' && pQuery->interval.intervalUnit != 'y') {
    tw->skey += pQuery->interval.sliding * factor;
    tw->ekey = tw->skey + pQuery->interval.interval - 1;
141 142 143
    return;
  }

144
  int64_t key = tw->skey / 1000, interval = pQuery->interval.interval;
145 146 147
  if (pQuery->precision == TSDB_TIME_PRECISION_MICRO) {
    key /= 1000;
  }
148
  if (pQuery->interval.intervalUnit == 'y') {
149 150
    interval *= 12;
  }
151 152 153 154 155

  struct tm tm;
  time_t t = (time_t)key;
  localtime_r(&t, &tm);

S
Shengliang Guan 已提交
156
  int mon = (int)(tm.tm_year * 12 + tm.tm_mon + interval * factor);
157 158 159 160
  tm.tm_year = mon / 12;
  tm.tm_mon = mon % 12;
  tw->skey = mktime(&tm) * 1000L;

S
Shengliang Guan 已提交
161
  mon = (int)(mon + interval);
162 163 164 165 166 167 168 169 170 171 172 173
  tm.tm_year = mon / 12;
  tm.tm_mon = mon % 12;
  tw->ekey = mktime(&tm) * 1000L;

  if (pQuery->precision == TSDB_TIME_PRECISION_MICRO) {
    tw->skey *= 1000L;
    tw->ekey *= 1000L;
  }
  tw->ekey -= 1;
}

#define GET_NEXT_TIMEWINDOW(_q, tw) getNextTimeWindow((_q), (tw))
H
Haojun Liao 已提交
174

175 176
#define SET_STABLE_QUERY_OVER(_q) ((_q)->tableIndex = (int32_t)((_q)->tableqinfoGroupInfo.numOfTables))
#define IS_STASBLE_QUERY_OVER(_q) ((_q)->tableIndex >= (int32_t)((_q)->tableqinfoGroupInfo.numOfTables))
H
Haojun Liao 已提交
177

H
hjxilinx 已提交
178
// todo move to utility
179
static int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *group);
180

H
hjxilinx 已提交
181
static void setWindowResOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult);
H
Haojun Liao 已提交
182
static void setWindowResOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult);
183 184
static void resetMergeResultBuf(SQuery *pQuery, SQLFunctionCtx *pCtx, SResultInfo *pResultInfo);
static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId);
185

186
static void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
H
Haojun Liao 已提交
187
                          SDataStatis *pStatis, void *param, int32_t colIndex, int32_t vgId);
188

189
static void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
H
Haojun Liao 已提交
190
static void destroyTableQueryInfoImpl(STableQueryInfo *pTableQueryInfo);
191 192
static void resetCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
static bool hasMainOutput(SQuery *pQuery);
H
hjxilinx 已提交
193
static void buildTagQueryResult(SQInfo *pQInfo);
194

195
static int32_t setAdditionalInfo(SQInfo *pQInfo, void *pTable, STableQueryInfo *pTableQueryInfo);
H
Haojun Liao 已提交
196
static int32_t flushFromResultBuf(SQueryRuntimeEnv* pRuntimeEnv, SGroupResInfo* pGroupResInfo);
197

198
bool doFilterData(SQuery *pQuery, int32_t elemPos) {
199 200
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
201

S
TD-1057  
Shengliang Guan 已提交
202
    char *pElem = (char*)pFilterInfo->pData + pFilterInfo->info.bytes * elemPos;
203

204 205
    bool qualified = false;
    for (int32_t j = 0; j < pFilterInfo->numOfFilters; ++j) {
206
      SColumnFilterElem *pFilterElem = &pFilterInfo->pFilters[j];
207

H
Haojun Liao 已提交
208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224
      bool isnull = isNull(pElem, pFilterInfo->info.type);
      if (isnull) {
        if (pFilterElem->fp == isNull_filter) {
          qualified = true;
          break;
        } else {
          continue;
        }
      } else {
        if (pFilterElem->fp == notNull_filter) {
          qualified = true;
          break;
        } else if (pFilterElem->fp == isNull_filter) {
          continue;
        }
      }

225 226 227 228 229
      if (pFilterElem->fp(pFilterElem, pElem, pElem)) {
        qualified = true;
        break;
      }
    }
230

231 232 233 234
    if (!qualified) {
      return false;
    }
  }
235

236 237 238 239 240 241
  return true;
}

int64_t getNumOfResult(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  bool    hasMainFunction = hasMainOutput(pQuery);
242

243
  int64_t maxOutput = 0;
244
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
245
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
246

247 248 249 250 251 252 253 254
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (hasMainFunction &&
        (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ)) {
      continue;
    }
255

256 257 258 259 260
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
    if (pResInfo != NULL && maxOutput < pResInfo->numOfRes) {
      maxOutput = pResInfo->numOfRes;
    }
  }
261

262
  assert(maxOutput >= 0);
263 264 265
  return maxOutput;
}

266 267 268 269 270
/*
 * the value of number of result needs to be update due to offset value upated.
 */
void updateNumOfResult(SQueryRuntimeEnv *pRuntimeEnv, int32_t numOfRes) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
271

272 273
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
274

H
Haojun Liao 已提交
275 276 277 278 279
    int16_t functionId = pRuntimeEnv->pCtx[j].functionId;
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ ||
        functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
280

H
Haojun Liao 已提交
281
    assert(pResInfo->numOfRes > numOfRes);
282 283 284 285
    pResInfo->numOfRes = numOfRes;
  }
}

H
Haojun Liao 已提交
286
static UNUSED_FUNC int32_t getGroupResultId(int32_t groupIndex) {
H
Haojun Liao 已提交
287
  int32_t base = 20000000;
288 289 290 291 292 293 294
  return base + (groupIndex * 10000);
}

bool isGroupbyNormalCol(SSqlGroupbyExpr *pGroupbyExpr) {
  if (pGroupbyExpr == NULL || pGroupbyExpr->numOfGroupCols == 0) {
    return false;
  }
295

296
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
297
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
Y
TD-1230  
yihaoDeng 已提交
298
    if (TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
299
      //make sure the normal column locates at the second position if tbname exists in group by clause
300
      if (pGroupbyExpr->numOfGroupCols > 1) {
301
        assert(pColIndex->colIndex > 0);
302
      }
303

304 305 306
      return true;
    }
  }
307

308 309 310 311 312
  return false;
}

int16_t getGroupbyColumnType(SQuery *pQuery, SSqlGroupbyExpr *pGroupbyExpr) {
  assert(pGroupbyExpr != NULL);
313

314 315
  int32_t colId = -2;
  int16_t type = TSDB_DATA_TYPE_NULL;
316

317
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
318
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
Y
TD-1230  
yihaoDeng 已提交
319
    if (TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
320 321 322 323
      colId = pColIndex->colId;
      break;
    }
  }
324

325
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
326 327
    if (colId == pQuery->colList[i].colId) {
      type = pQuery->colList[i].type;
328 329 330
      break;
    }
  }
331

332 333 334 335 336 337
  return type;
}

bool isSelectivityWithTagsQuery(SQuery *pQuery) {
  bool    hasTags = false;
  int32_t numOfSelectivity = 0;
338

339
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
340
    int32_t functId = pQuery->pSelectExpr[i].base.functionId;
341 342 343 344
    if (functId == TSDB_FUNC_TAG_DUMMY || functId == TSDB_FUNC_TS_DUMMY) {
      hasTags = true;
      continue;
    }
345

346 347 348 349
    if ((aAggs[functId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
      numOfSelectivity++;
    }
  }
350

351 352 353
  if (numOfSelectivity > 0 && hasTags) {
    return true;
  }
354

355 356 357
  return false;
}

358 359 360 361 362 363 364 365 366 367 368
bool isProjQuery(SQuery *pQuery) {
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    int32_t functId = pQuery->pSelectExpr[i].base.functionId;
    if (functId != TSDB_FUNC_PRJ && functId != TSDB_FUNC_TAGPRJ) {
      return false;
    }
  }

  return true;
}

369
bool isTSCompQuery(SQuery *pQuery) { return pQuery->pSelectExpr[0].base.functionId == TSDB_FUNC_TS_COMP; }
370

371 372 373
static bool limitResults(SQueryRuntimeEnv* pRuntimeEnv) {
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery* pQuery = pRuntimeEnv->pQuery;
374

375 376
  if ((pQuery->limit.limit > 0) && (pQuery->rec.total + pQuery->rec.rows > pQuery->limit.limit)) {
    pQuery->rec.rows = pQuery->limit.limit - pQuery->rec.total;
377

378
    qDebug("QInfo:%p discard remain data due to result limitation, limit:%"PRId64", current return:%" PRId64 ", total:%"PRId64,
379 380
        pQInfo, pQuery->limit.limit, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
    assert(pQuery->rec.rows >= 0);
381 382 383
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return true;
  }
384

385 386 387 388
  return false;
}

static bool isTopBottomQuery(SQuery *pQuery) {
389
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
390
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
391 392 393
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
394

395 396 397 398
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      return true;
    }
  }
399

400 401 402
  return false;
}

H
Haojun Liao 已提交
403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420
static bool hasTagValOutput(SQuery* pQuery) {
  SExprInfo *pExprInfo = &pQuery->pSelectExpr[0];
  if (pQuery->numOfOutput == 1 && pExprInfo->base.functionId == TSDB_FUNC_TS_COMP) {
    return true;
  } else {  // set tag value, by which the results are aggregated.
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
      SExprInfo *pLocalExprInfo = &pQuery->pSelectExpr[idx];

      // ts_comp column required the tag value for join filter
      if (TSDB_COL_IS_TAG(pLocalExprInfo->base.colInfo.flag)) {
        return true;
      }
    }
  }

  return false;
}

421 422 423 424 425 426 427 428
/**
 * @param pQuery
 * @param col
 * @param pDataBlockInfo
 * @param pStatis
 * @param pColStatis
 * @return
 */
H
Haojun Liao 已提交
429
static bool hasNullValue(SColIndex* pColIndex, SDataStatis *pStatis, SDataStatis **pColStatis) {
H
Haojun Liao 已提交
430
  if (pStatis != NULL && TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
H
Haojun Liao 已提交
431 432
    *pColStatis = &pStatis[pColIndex->colIndex];
    assert((*pColStatis)->colId == pColIndex->colId);
H
hjxilinx 已提交
433 434
  } else {
    *pColStatis = NULL;
435
  }
436

H
Haojun Liao 已提交
437
  if (TSDB_COL_IS_TAG(pColIndex->flag) || TSDB_COL_IS_UD_COL(pColIndex->flag) || pColIndex->colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
H
Haojun Liao 已提交
438 439 440
    return false;
  }

441 442 443
  if ((*pColStatis) != NULL && (*pColStatis)->numOfNull == 0) {
    return false;
  }
444

445 446 447 448
  return true;
}

static SWindowResult *doSetTimeWindowFromKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, char *pData,
449
                                             int16_t bytes, bool masterscan) {
450
  SQuery *pQuery = pRuntimeEnv->pQuery;
451

452
  int32_t *p1 = (int32_t *) taosHashGet(pWindowResInfo->hashList, pData, bytes);
453 454
  if (p1 != NULL) {
    pWindowResInfo->curIndex = *p1;
455
  } else {
H
Haojun Liao 已提交
456 457 458
    if (!masterscan) {  // not master scan, do not add new timewindow
      return NULL;
    }
459

H
Haojun Liao 已提交
460 461
    // more than the capacity, reallocate the resources
    if (pWindowResInfo->size >= pWindowResInfo->capacity) {
462 463
      int64_t newCap = 0;
      if (pWindowResInfo->capacity > 10000) {
S
Shengliang Guan 已提交
464
        newCap = (int64_t)(pWindowResInfo->capacity * 1.25);
465
      } else {
S
Shengliang Guan 已提交
466
        newCap = (int64_t)(pWindowResInfo->capacity * 1.5);
467 468
      }

469
      char *t = realloc(pWindowResInfo->pResult, (size_t)(newCap * sizeof(SWindowResult)));
470
      pRuntimeEnv->summary.internalSupSize += (newCap - pWindowResInfo->capacity) * sizeof(SWindowResult);
471
      pRuntimeEnv->summary.numOfTimeWindows += (newCap - pWindowResInfo->capacity);
472

473 474
      if (t == NULL) {
        longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
475
      }
476

477 478
      pWindowResInfo->pResult = (SWindowResult *)t;

S
TD-1057  
Shengliang Guan 已提交
479
      int32_t inc = (int32_t)newCap - pWindowResInfo->capacity;
480 481
      memset(&pWindowResInfo->pResult[pWindowResInfo->capacity], 0, sizeof(SWindowResult) * inc);

482 483
      pRuntimeEnv->summary.internalSupSize += (pQuery->numOfOutput * sizeof(SResultInfo) + pRuntimeEnv->interBufSize) * inc;

H
Haojun Liao 已提交
484
      for (int32_t i = pWindowResInfo->capacity; i < newCap; ++i) {
H
Haojun Liao 已提交
485 486 487 488
        int32_t ret = createQueryResultInfo(pQuery, &pWindowResInfo->pResult[i], pRuntimeEnv->stableQuery, pRuntimeEnv->interBufSize);
        if (ret != TSDB_CODE_SUCCESS) {
          longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
        }
H
Haojun Liao 已提交
489 490
      }

S
TD-1057  
Shengliang Guan 已提交
491
      pWindowResInfo->capacity = (int32_t)newCap;
492
    }
H
Haojun Liao 已提交
493 494 495 496

    // add a new result set for a new group
    pWindowResInfo->curIndex = pWindowResInfo->size++;
    taosHashPut(pWindowResInfo->hashList, pData, bytes, (char *)&pWindowResInfo->curIndex, sizeof(int32_t));
497
  }
498

499 500 501 502 503
  // too many time window in query
  if (pWindowResInfo->size > MAX_INTERVAL_TIME_WINDOW) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_TOO_MANY_TIMEWINDOW);
  }

504 505 506 507 508 509
  return getWindowResult(pWindowResInfo, pWindowResInfo->curIndex);
}

// get the correct time window according to the handled timestamp
static STimeWindow getActiveTimeWindow(SWindowResInfo *pWindowResInfo, int64_t ts, SQuery *pQuery) {
  STimeWindow w = {0};
510

511
 if (pWindowResInfo->curIndex == -1) {  // the first window, from the previous stored value
512
    w.skey = pWindowResInfo->prevSKey;
513 514
    if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
      w.ekey = taosTimeAdd(w.skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision);
515
    } else {
516
      w.ekey = w.skey + pQuery->interval.interval - 1;
517
    }
518
  } else {
519 520
    int32_t slot = curTimeWindowIndex(pWindowResInfo);
    SWindowResult* pWindowRes = getWindowResult(pWindowResInfo, slot);
521
    w = pWindowRes->win;
522
  }
523

524
  if (w.skey > ts || w.ekey < ts) {
525 526 527
    if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
      w.skey = taosTimeTruncate(ts, &pQuery->interval, pQuery->precision);
      w.ekey = taosTimeAdd(w.skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
528 529
    } else {
      int64_t st = w.skey;
530

531
      if (st > ts) {
532
        st -= ((st - ts + pQuery->interval.sliding - 1) / pQuery->interval.sliding) * pQuery->interval.sliding;
533
      }
534

535
      int64_t et = st + pQuery->interval.interval - 1;
536
      if (et < ts) {
537
        st += ((ts - et + pQuery->interval.sliding - 1) / pQuery->interval.sliding) * pQuery->interval.sliding;
538
      }
539

540
      w.skey = st;
541
      w.ekey = w.skey + pQuery->interval.interval - 1;
542
    }
543
  }
544

545 546 547 548 549 550 551
  /*
   * query border check, skey should not be bounded by the query time range, since the value skey will
   * be used as the time window index value. So we only change ekey of time window accordingly.
   */
  if (w.ekey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) {
    w.ekey = pQuery->window.ekey;
  }
552

553 554 555 556 557 558 559 560
  return w;
}

static int32_t addNewWindowResultBuf(SWindowResult *pWindowRes, SDiskbasedResultBuf *pResultBuf, int32_t sid,
                                     int32_t numOfRowsPerPage) {
  if (pWindowRes->pos.pageId != -1) {
    return 0;
  }
561

562
  tFilePage *pData = NULL;
563

564 565 566
  // in the first scan, new space needed for results
  int32_t pageId = -1;
  SIDList list = getDataBufPagesIdList(pResultBuf, sid);
567

H
Haojun Liao 已提交
568
  if (taosArrayGetSize(list) == 0) {
569 570
    pData = getNewDataBuf(pResultBuf, sid, &pageId);
  } else {
H
Haojun Liao 已提交
571 572 573
    SPageInfo* pi = getLastPageInfo(list);
    pData = getResBufPage(pResultBuf, pi->pageId);
    pageId = pi->pageId;
574

575
    if (pData->num >= numOfRowsPerPage) {
H
Haojun Liao 已提交
576 577 578
      // release current page first, and prepare the next one
      releaseResBufPageInfo(pResultBuf, pi);

579 580
      pData = getNewDataBuf(pResultBuf, sid, &pageId);
      if (pData != NULL) {
581
        assert(pData->num == 0);  // number of elements must be 0 for new allocated buffer
582 583 584
      }
    }
  }
585

586 587 588
  if (pData == NULL) {
    return -1;
  }
589

590 591 592
  // set the number of rows in current disk page
  if (pWindowRes->pos.pageId == -1) {  // not allocated yet, allocate new buffer
    pWindowRes->pos.pageId = pageId;
S
TD-1057  
Shengliang Guan 已提交
593
    pWindowRes->pos.rowId = (int32_t)(pData->num++);
H
Haojun Liao 已提交
594 595

    assert(pWindowRes->pos.pageId >= 0);
596
  }
597

598 599 600 601
  return 0;
}

static int32_t setWindowOutputBufByKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, int32_t sid,
602
                                       STimeWindow *win, bool masterscan, bool* newWind) {
603 604
  assert(win->skey <= win->ekey);
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
605

606 607
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&win->skey,
      TSDB_KEYSIZE, masterscan);
608
  if (pWindowRes == NULL) {
609 610 611
    *newWind = false;

    return masterscan? -1:0;
612
  }
613

614
  *newWind = true;
H
Haojun Liao 已提交
615

616 617 618
  // not assign result buffer yet, add new result buffer
  if (pWindowRes->pos.pageId == -1) {
    int32_t ret = addNewWindowResultBuf(pWindowRes, pResultBuf, sid, pRuntimeEnv->numOfRowsPerPage);
H
Haojun Liao 已提交
619
    if (ret != TSDB_CODE_SUCCESS) {
620 621 622
      return -1;
    }
  }
623

624
  // set time window for current result
625
  pWindowRes->win = (*win);
626

H
Haojun Liao 已提交
627
  setWindowResOutputBufInitCtx(pRuntimeEnv, pWindowRes);
628 629 630
  return TSDB_CODE_SUCCESS;
}

631
static bool getTimeWindowResStatus(SWindowResInfo *pWindowResInfo, int32_t slot) {
632
  assert(slot >= 0 && slot < pWindowResInfo->size);
633
  return pWindowResInfo->pResult[slot].closed;
634 635
}

H
Haojun Liao 已提交
636
static FORCE_INLINE int32_t getForwardStepsInBlock(int32_t numOfRows, __block_search_fn_t searchFn, TSKEY ekey, int16_t pos,
637 638
                                      int16_t order, int64_t *pData) {
  int32_t forwardStep = 0;
639

H
Haojun Liao 已提交
640 641 642 643
  if (order == TSDB_ORDER_ASC) {
    int32_t end = searchFn((char*) &pData[pos], numOfRows - pos, ekey, order);
    if (end >= 0) {
      forwardStep = end;
644

H
Haojun Liao 已提交
645 646 647 648 649 650 651 652 653 654 655 656
      if (pData[end + pos] == ekey) {
        forwardStep += 1;
      }
    }
  } else {
    int32_t end = searchFn((char *)pData, pos + 1, ekey, order);
    if (end >= 0) {
      forwardStep = pos - end;

      if (pData[end] == ekey) {
        forwardStep += 1;
      }
657 658
    }
  }
659

H
Haojun Liao 已提交
660
  assert(forwardStep > 0);
661 662 663 664 665 666
  return forwardStep;
}

/**
 * NOTE: the query status only set for the first scan of master scan.
 */
667
static int32_t doCheckQueryCompleted(SQueryRuntimeEnv *pRuntimeEnv, TSKEY lastKey, SWindowResInfo *pWindowResInfo) {
668
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
669
  if (pRuntimeEnv->scanFlag != MASTER_SCAN || (!QUERY_IS_INTERVAL_QUERY(pQuery))) {
670
    return pWindowResInfo->size;
671
  }
672

673
  // no qualified results exist, abort check
674
  int32_t numOfClosed = 0;
675

676
  if (pWindowResInfo->size == 0) {
677
    return pWindowResInfo->size;
678
  }
679

680
  // query completed
H
hjxilinx 已提交
681 682
  if ((lastKey >= pQuery->current->win.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (lastKey <= pQuery->current->win.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
683
    closeAllTimeWindow(pWindowResInfo);
684

685 686 687 688
    pWindowResInfo->curIndex = pWindowResInfo->size - 1;
    setQueryStatus(pQuery, QUERY_COMPLETED | QUERY_RESBUF_FULL);
  } else {  // set the current index to be the last unclosed window
    int32_t i = 0;
689
    int64_t skey = TSKEY_INITIAL_VAL;
690

691 692
    for (i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *pResult = &pWindowResInfo->pResult[i];
693
      if (pResult->closed) {
694
        numOfClosed += 1;
695 696
        continue;
      }
697

698
      TSKEY ekey = pResult->win.ekey;
699
      if ((ekey <= lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
700
          (pResult->win.skey >= lastKey && !QUERY_IS_ASC_QUERY(pQuery))) {
701 702
        closeTimeWindow(pWindowResInfo, i);
      } else {
703
        skey = pResult->win.skey;
704 705 706
        break;
      }
    }
707

708
    // all windows are closed, set the last one to be the skey
709
    if (skey == TSKEY_INITIAL_VAL) {
710 711 712 713 714
      assert(i == pWindowResInfo->size);
      pWindowResInfo->curIndex = pWindowResInfo->size - 1;
    } else {
      pWindowResInfo->curIndex = i;
    }
715

716
    pWindowResInfo->prevSKey = pWindowResInfo->pResult[pWindowResInfo->curIndex].win.skey;
717

718 719
    // the number of completed slots are larger than the threshold, return current generated results to client.
    if (numOfClosed > pWindowResInfo->threshold) {
720
      qDebug("QInfo:%p total result window:%d closed:%d, reached the output threshold %d, return",
721
          GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size, numOfClosed, pQuery->rec.threshold);
722

723
      setQueryStatus(pQuery, QUERY_RESBUF_FULL);
724
    } else {
725
      qDebug("QInfo:%p total result window:%d already closed:%d", GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size,
726
             numOfClosed);
727 728
    }
  }
729

730 731 732 733 734
  // output has reached the limitation, set query completed
  if (pQuery->limit.limit > 0 && (pQuery->limit.limit + pQuery->limit.offset) <= numOfClosed &&
      pRuntimeEnv->scanFlag == MASTER_SCAN) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
735

736
  assert(pWindowResInfo->prevSKey != TSKEY_INITIAL_VAL);
737
  return numOfClosed;
738 739 740
}

static int32_t getNumOfRowsInTimeWindow(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo, TSKEY *pPrimaryColumn,
H
hjxilinx 已提交
741
                                        int32_t startPos, TSKEY ekey, __block_search_fn_t searchFn, bool updateLastKey) {
742
  assert(startPos >= 0 && startPos < pDataBlockInfo->rows);
743

H
Haojun Liao 已提交
744
  int32_t num   = -1;
745
  int32_t order = pQuery->order.order;
H
Haojun Liao 已提交
746
  int32_t step  = GET_FORWARD_DIRECTION_FACTOR(order);
747

H
hjxilinx 已提交
748
  STableQueryInfo* item = pQuery->current;
749

750 751
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (ekey < pDataBlockInfo->window.ekey) {
752
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
H
Haojun Liao 已提交
753 754
      if (updateLastKey) { // update the last key
        item->lastKey = pPrimaryColumn[startPos + (num - 1)] + step;
755 756
      }
    } else {
757
      num = pDataBlockInfo->rows - startPos;
758
      if (updateLastKey) {
H
hjxilinx 已提交
759
        item->lastKey = pDataBlockInfo->window.ekey + step;
760 761 762 763
      }
    }
  } else {  // desc
    if (ekey > pDataBlockInfo->window.skey) {
764
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
H
Haojun Liao 已提交
765 766
      if (updateLastKey) {  // update the last key
        item->lastKey = pPrimaryColumn[startPos - (num - 1)] + step;
767 768 769 770
      }
    } else {
      num = startPos + 1;
      if (updateLastKey) {
H
hjxilinx 已提交
771
        item->lastKey = pDataBlockInfo->window.skey + step;
772 773 774
      }
    }
  }
775

H
Haojun Liao 已提交
776
  assert(num > 0);
777 778 779
  return num;
}

H
Haojun Liao 已提交
780 781
static void doBlockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, bool closed, STimeWindow *pWin, int32_t offset,
                                      int32_t forwardStep, TSKEY *tsCol, int32_t numOfTotal) {
782 783
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
784

H
Haojun Liao 已提交
785 786
  bool hasPrev = pCtx[0].preAggVals.isSet;

787
  if (IS_MASTER_SCAN(pRuntimeEnv) || closed) {
788 789 790 791
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
      pCtx[k].nStartQueryTimestamp = pWin->skey;
      pCtx[k].size = forwardStep;
      pCtx[k].startOffset = (QUERY_IS_ASC_QUERY(pQuery)) ? offset : offset - (forwardStep - 1);
792

H
Haojun Liao 已提交
793
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
794
      if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
H
Haojun Liao 已提交
795
        pCtx[k].ptsList = &tsCol[pCtx[k].startOffset];
796
      }
797

798
      // not a whole block involved in query processing, statistics data can not be used
H
Haojun Liao 已提交
799 800 801 802
      // NOTE: the original value of isSet have been changed here
      if (pCtx[k].preAggVals.isSet && forwardStep < numOfTotal) {
        pCtx[k].preAggVals.isSet = false;
      }
H
Haojun Liao 已提交
803

804 805 806
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunction(&pCtx[k]);
      }
H
Haojun Liao 已提交
807 808 809

      // restore it
      pCtx[k].preAggVals.isSet = hasPrev;
810 811 812 813
    }
  }
}

814
static void doRowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, bool closed, STimeWindow *pWin, int32_t offset) {
815 816
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
817

818
  if (IS_MASTER_SCAN(pRuntimeEnv) || closed) {
819 820
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
      pCtx[k].nStartQueryTimestamp = pWin->skey;
821

822 823 824 825
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunctionF(&pCtx[k], offset);
      }
826 827 828 829
    }
  }
}

H
Haojun Liao 已提交
830 831
static int32_t getNextQualifiedWindow(SQueryRuntimeEnv *pRuntimeEnv, STimeWindow *pNext, SDataBlockInfo *pDataBlockInfo,
    TSKEY *primaryKeys, __block_search_fn_t searchFn, int32_t prevPosition) {
832
  SQuery *pQuery = pRuntimeEnv->pQuery;
833

H
Haojun Liao 已提交
834
  GET_NEXT_TIMEWINDOW(pQuery, pNext);
835

H
Haojun Liao 已提交
836
  // next time window is not in current block
H
Haojun Liao 已提交
837 838
  if ((pNext->skey > pDataBlockInfo->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (pNext->ekey < pDataBlockInfo->window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
H
Haojun Liao 已提交
839 840
    return -1;
  }
841

H
Haojun Liao 已提交
842 843
  TSKEY startKey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
844
    startKey = pNext->skey;
H
Haojun Liao 已提交
845 846
    if (startKey < pQuery->window.skey) {
      startKey = pQuery->window.skey;
847
    }
H
Haojun Liao 已提交
848
  } else {
H
Haojun Liao 已提交
849
    startKey = pNext->ekey;
H
Haojun Liao 已提交
850 851
    if (startKey > pQuery->window.skey) {
      startKey = pQuery->window.skey;
852
    }
H
Haojun Liao 已提交
853
  }
854

H
Haojun Liao 已提交
855 856
  int32_t startPos = 0;
  // tumbling time window query, a special case of sliding time window query
857
  if (pQuery->interval.sliding == pQuery->interval.interval && prevPosition != -1) {
H
Haojun Liao 已提交
858 859 860 861 862
    int32_t factor = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
    startPos = prevPosition + factor;
  } else {
    startPos = searchFn((char *)primaryKeys, pDataBlockInfo->rows, startKey, pQuery->order.order);
  }
863

H
Haojun Liao 已提交
864 865 866 867
  /*
   * This time window does not cover any data, try next time window,
   * this case may happen when the time window is too small
   */
H
Haojun Liao 已提交
868
  if (QUERY_IS_ASC_QUERY(pQuery) && primaryKeys[startPos] > pNext->ekey) {
H
Haojun Liao 已提交
869
    TSKEY next = primaryKeys[startPos];
870 871 872
    if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
      pNext->skey = taosTimeTruncate(next, &pQuery->interval, pQuery->precision);
      pNext->ekey = taosTimeAdd(pNext->skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
873
    } else {
874 875
      pNext->ekey += ((next - pNext->ekey + pQuery->interval.sliding - 1)/pQuery->interval.sliding) * pQuery->interval.sliding;
      pNext->skey = pNext->ekey - pQuery->interval.interval + 1;
876
    }
H
Haojun Liao 已提交
877
  } else if ((!QUERY_IS_ASC_QUERY(pQuery)) && primaryKeys[startPos] < pNext->skey) {
H
Haojun Liao 已提交
878
    TSKEY next = primaryKeys[startPos];
879 880 881
    if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
      pNext->skey = taosTimeTruncate(next, &pQuery->interval, pQuery->precision);
      pNext->ekey = taosTimeAdd(pNext->skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
882
    } else {
883 884
      pNext->skey -= ((pNext->skey - next + pQuery->interval.sliding - 1) / pQuery->interval.sliding) * pQuery->interval.sliding;
      pNext->ekey = pNext->skey + pQuery->interval.interval - 1;
885
    }
886
  }
887

H
Haojun Liao 已提交
888
  return startPos;
889 890
}

H
Haojun Liao 已提交
891
static FORCE_INLINE TSKEY reviseWindowEkey(SQuery *pQuery, STimeWindow *pWindow) {
892 893 894 895 896 897 898 899 900 901 902 903
  TSKEY ekey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    ekey = pWindow->ekey;
    if (ekey > pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  } else {
    ekey = pWindow->skey;
    if (ekey < pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  }
904

905 906 907
  return ekey;
}

H
hjxilinx 已提交
908 909
//todo binary search
static void* getDataBlockImpl(SArray* pDataBlock, int32_t colId) {
S
TD-1057  
Shengliang Guan 已提交
910
  int32_t numOfCols = (int32_t)taosArrayGetSize(pDataBlock);
911

H
hjxilinx 已提交
912 913 914 915 916 917
  for (int32_t i = 0; i < numOfCols; ++i) {
    SColumnInfoData *p = taosArrayGet(pDataBlock, i);
    if (colId == p->info.colId) {
      return p->pData;
    }
  }
918

H
hjxilinx 已提交
919 920 921 922
  return NULL;
}

static char *getDataBlock(SQueryRuntimeEnv *pRuntimeEnv, SArithmeticSupport *sas, int32_t col, int32_t size,
923
                    SArray *pDataBlock) {
dengyihao's avatar
dengyihao 已提交
924 925 926
  if (pDataBlock == NULL) {
    return NULL;
  }
927

H
Haojun Liao 已提交
928
  char *dataBlock = NULL;
H
Haojun Liao 已提交
929
  SQuery *pQuery = pRuntimeEnv->pQuery;
930

931
  int32_t functionId = pQuery->pSelectExpr[col].base.functionId;
932
  if (functionId == TSDB_FUNC_ARITHM) {
933
    sas->pArithExpr = &pQuery->pSelectExpr[col];
934

935 936 937 938
    sas->offset  = 0;
    sas->colList = pQuery->colList;
    sas->numOfCols = pQuery->numOfCols;
    sas->data    = calloc(pQuery->numOfCols, POINTER_BYTES);
939

H
Haojun Liao 已提交
940
    if (sas->data == NULL) {
H
Haojun Liao 已提交
941
      finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
H
Haojun Liao 已提交
942 943 944
      longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
    }

945
    // here the pQuery->colList and sas->colList are identical
S
TD-1057  
Shengliang Guan 已提交
946
    int32_t numOfCols = (int32_t)taosArrayGetSize(pDataBlock);
947
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
948
      SColumnInfo *pColMsg = &pQuery->colList[i];
949

950 951 952 953 954 955 956 957
      dataBlock = NULL;
      for (int32_t k = 0; k < numOfCols; ++k) {  //todo refactor
        SColumnInfoData *p = taosArrayGet(pDataBlock, k);
        if (pColMsg->colId == p->info.colId) {
          dataBlock = p->pData;
          break;
        }
      }
958

959
      assert(dataBlock != NULL);
960
      sas->data[i] = dataBlock;  // start from the offset
961
    }
962

963
  } else {  // other type of query function
964
    SColIndex *pCol = &pQuery->pSelectExpr[col].base.colInfo;
H
Haojun Liao 已提交
965
    if (TSDB_COL_IS_NORMAL_COL(pCol->flag)) {
H
Haojun Liao 已提交
966 967 968 969 970
      SColIndex* pColIndex = &pQuery->pSelectExpr[col].base.colInfo;
      SColumnInfoData *p = taosArrayGet(pDataBlock, pColIndex->colIndex);
      assert(p->info.colId == pColIndex->colId);

      dataBlock = p->pData;
H
Haojun Liao 已提交
971 972
    } else {
      dataBlock = NULL;
973 974
    }
  }
975

976 977 978 979
  return dataBlock;
}

/**
H
Haojun Liao 已提交
980
 * todo set the last value for pQueryTableInfo as in rowwiseapplyfunctions
981 982
 * @param pRuntimeEnv
 * @param forwardStep
983
 * @param tsCols
984 985 986 987 988
 * @param pFields
 * @param isDiskFileBlock
 * @return                  the incremental number of output value, so it maybe 0 for fixed number of query,
 *                          such as count/min/max etc.
 */
989
static void blockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis,
990 991
                                       SDataBlockInfo *pDataBlockInfo, SWindowResInfo *pWindowResInfo,
                                       __block_search_fn_t searchFn, SArray *pDataBlock) {
992
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
993 994
  bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);

995 996
  SQuery *pQuery = pRuntimeEnv->pQuery;
  TSKEY  *tsCols = NULL;
997
  if (pDataBlock != NULL) {
998
    SColumnInfoData* pColInfo = taosArrayGet(pDataBlock, 0);
999
    tsCols = (TSKEY *)(pColInfo->pData);
1000
  }
1001

1002
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
H
Haojun Liao 已提交
1003
  if (sasArray == NULL) {
H
Haojun Liao 已提交
1004
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
H
Haojun Liao 已提交
1005 1006
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }
1007

H
Haojun Liao 已提交
1008
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
1009
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
1010
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
H
Haojun Liao 已提交
1011
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k, pQInfo->vgId);
1012
  }
1013

1014
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
1015
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
H
Haojun Liao 已提交
1016
    TSKEY ts = TSKEY_INITIAL_VAL;
1017

H
Haojun Liao 已提交
1018 1019 1020 1021 1022 1023 1024 1025
    if (tsCols == NULL) {
      ts = QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->window.skey:pDataBlockInfo->window.ekey;
    } else {
      int32_t offset = GET_COL_DATA_POS(pQuery, 0, step);
      ts = tsCols[offset];
    }

    bool        hasTimeWindow = false;
1026
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
H
Haojun Liao 已提交
1027 1028
    if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &win, masterScan, &hasTimeWindow) !=
        TSDB_CODE_SUCCESS) {
S
Shengliang Guan 已提交
1029
      taosTFree(sasArray);
H
hjxilinx 已提交
1030
      return;
1031
    }
1032

H
Haojun Liao 已提交
1033 1034 1035
    int32_t forwardStep = 0;
    int32_t startPos = pQuery->pos;

1036
    if (hasTimeWindow) {
H
Haojun Liao 已提交
1037
      TSKEY ekey = reviseWindowEkey(pQuery, &win);
H
Haojun Liao 已提交
1038
      forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, pQuery->pos, ekey, searchFn, true);
1039

1040
      bool pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
H
Haojun Liao 已提交
1041
      doBlockwiseApplyFunctions(pRuntimeEnv, pStatus, &win, startPos, forwardStep, tsCols, pDataBlockInfo->rows);
1042
    }
1043

1044 1045
    int32_t     index = pWindowResInfo->curIndex;
    STimeWindow nextWin = win;
1046

1047
    while (1) {
H
Haojun Liao 已提交
1048 1049
      int32_t prevEndPos = (forwardStep - 1) * step + startPos;
      startPos = getNextQualifiedWindow(pRuntimeEnv, &nextWin, pDataBlockInfo, tsCols, searchFn, prevEndPos);
1050 1051 1052
      if (startPos < 0) {
        break;
      }
1053

1054
      // null data, failed to allocate more memory buffer
H
Haojun Liao 已提交
1055
      hasTimeWindow = false;
H
Haojun Liao 已提交
1056 1057
      if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &nextWin, masterScan,
                                  &hasTimeWindow) != TSDB_CODE_SUCCESS) {
1058 1059
        break;
      }
1060

1061 1062 1063 1064 1065
      if (!hasTimeWindow) {
        continue;
      }

      TSKEY ekey = reviseWindowEkey(pQuery, &nextWin);
H
Haojun Liao 已提交
1066
      forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, startPos, ekey, searchFn, true);
1067

1068 1069
      bool closed = getTimeWindowResStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
      doBlockwiseApplyFunctions(pRuntimeEnv, closed, &nextWin, startPos, forwardStep, tsCols, pDataBlockInfo->rows);
1070
    }
1071

1072 1073 1074 1075 1076 1077 1078
    pWindowResInfo->curIndex = index;
  } else {
    /*
     * the sqlfunctionCtx parameters should be set done before all functions are invoked,
     * since the selectivity + tag_prj query needs all parameters been set done.
     * tag_prj function are changed to be TSDB_FUNC_TAG_DUMMY
     */
1079
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
1080
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
1081 1082 1083 1084 1085
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunction(&pCtx[k]);
      }
    }
  }
1086

1087 1088 1089 1090
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    if (pQuery->pSelectExpr[i].base.functionId != TSDB_FUNC_ARITHM) {
      continue;
    }
1091

S
Shengliang Guan 已提交
1092
    taosTFree(sasArray[i].data);
1093
  }
1094

S
Shengliang Guan 已提交
1095
  taosTFree(sasArray);
1096 1097 1098 1099 1100 1101
}

static int32_t setGroupResultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, char *pData, int16_t type, int16_t bytes) {
  if (isNull(pData, type)) {  // ignore the null value
    return -1;
  }
1102

1103
  int32_t GROUPRESULTID = 1;
1104

1105
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
1106

1107
  // not assign result buffer yet, add new result buffer
1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125
  char* d = pData;
  int16_t len = bytes;
  if (type == TSDB_DATA_TYPE_BINARY||type == TSDB_DATA_TYPE_NCHAR) {
    d = varDataVal(pData);
    len = varDataLen(pData);
  } else if (type == TSDB_DATA_TYPE_FLOAT || type == TSDB_DATA_TYPE_DOUBLE) {
    SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
    qError("QInfo:%p group by not supported on double/float/binary/nchar columns, abort", pQInfo);

    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_APP_ERROR);
  }

  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, &pRuntimeEnv->windowResInfo, d, len, true);
  if (pWindowRes == NULL) {
    return -1;
  }

  int64_t v = -1;
1126 1127 1128 1129 1130 1131 1132 1133
  switch(type) {
    case TSDB_DATA_TYPE_BOOL:
    case TSDB_DATA_TYPE_TINYINT:  v = GET_INT8_VAL(pData);  break;
    case TSDB_DATA_TYPE_SMALLINT: v = GET_INT16_VAL(pData); break;
    case TSDB_DATA_TYPE_INT:      v = GET_INT32_VAL(pData); break;
    case TSDB_DATA_TYPE_BIGINT:   v = GET_INT64_VAL(pData); break;
  }

1134 1135 1136 1137 1138 1139
  if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
    pWindowRes->key = malloc(varDataTLen(pData));
    varDataCopy(pWindowRes->key, pData);
  } else {
    pWindowRes->win.skey = v;
    pWindowRes->win.ekey = v;
1140
  }
1141

1142
  assert(pRuntimeEnv->windowResInfo.interval == 0);
1143

1144 1145 1146 1147 1148 1149
  if (pWindowRes->pos.pageId == -1) {
    int32_t ret = addNewWindowResultBuf(pWindowRes, pResultBuf, GROUPRESULTID, pRuntimeEnv->numOfRowsPerPage);
    if (ret != 0) {
      return -1;
    }
  }
1150

1151 1152 1153 1154 1155
  setWindowResOutputBuf(pRuntimeEnv, pWindowRes);
  initCtxOutputBuf(pRuntimeEnv);
  return TSDB_CODE_SUCCESS;
}

1156
static char *getGroupbyColumnData(SQuery *pQuery, int16_t *type, int16_t *bytes, SArray* pDataBlock) {
1157
  SSqlGroupbyExpr *pGroupbyExpr = pQuery->pGroupbyExpr;
1158

1159
  for (int32_t k = 0; k < pGroupbyExpr->numOfGroupCols; ++k) {
1160
    SColIndex* pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, k);
Y
TD-1230  
yihaoDeng 已提交
1161
    if (TSDB_COL_IS_TAG(pColIndex->flag)) {
1162 1163
      continue;
    }
1164

1165
    int16_t colIndex = -1;
1166
    int32_t colId = pColIndex->colId;
1167

1168
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
1169
      if (pQuery->colList[i].colId == colId) {
1170 1171 1172 1173
        colIndex = i;
        break;
      }
    }
1174

1175
    assert(colIndex >= 0 && colIndex < pQuery->numOfCols);
1176

1177 1178
    *type = pQuery->colList[colIndex].type;
    *bytes = pQuery->colList[colIndex].bytes;
1179
    /*
1180 1181 1182
     *  the colIndex is acquired from the first tables of all qualified tables in this vnode during query prepare
     * stage, the remain tables may not have the required column in cache actually. So, the validation of required
     * column in cache with the corresponding schema is reinforced.
1183
     */
S
TD-1057  
Shengliang Guan 已提交
1184
    int32_t numOfCols = (int32_t)taosArrayGetSize(pDataBlock);
1185

1186 1187 1188 1189 1190 1191
    for (int32_t i = 0; i < numOfCols; ++i) {
      SColumnInfoData *p = taosArrayGet(pDataBlock, i);
      if (pColIndex->colId == p->info.colId) {
        return p->pData;
      }
    }
1192
  }
1193

1194
  return NULL;
1195 1196 1197 1198
}

static int32_t doTSJoinFilter(SQueryRuntimeEnv *pRuntimeEnv, int32_t offset) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
1199

1200 1201
  STSElem         elem = tsBufGetElem(pRuntimeEnv->pTSBuf);
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1202

1203
  // compare tag first
1204
  if (tVariantCompare(&pCtx[0].tag, &elem.tag) != 0) {
1205 1206
    return TS_JOIN_TAG_NOT_EQUALS;
  }
1207

S
TD-1057  
Shengliang Guan 已提交
1208
  TSKEY key = *(TSKEY *)((char*)pCtx[0].aInputElemBuf + TSDB_KEYSIZE * offset);
1209 1210

#if defined(_DEBUG_VIEW)
1211
  printf("elem in comp ts file:%" PRId64 ", key:%" PRId64 ", tag:%"PRIu64", query order:%d, ts order:%d, traverse:%d, index:%d\n",
H
Haojun Liao 已提交
1212
         elem.ts, key, elem.tag.i64Key, pQuery->order.order, pRuntimeEnv->pTSBuf->tsOrder,
1213 1214
         pRuntimeEnv->pTSBuf->cur.order, pRuntimeEnv->pTSBuf->cur.tsIndex);
#endif
1215

1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (key < elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key > elem.ts) {
      assert(false);
    }
  } else {
    if (key > elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key < elem.ts) {
      assert(false);
    }
  }
1229

1230 1231 1232 1233 1234
  return TS_JOIN_TS_EQUAL;
}

static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId) {
  SResultInfo *pResInfo = GET_RES_INFO(pCtx);
H
hjxilinx 已提交
1235
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
1236 1237 1238 1239 1240

  // in case of timestamp column, always generated results.
  if (functionId == TSDB_FUNC_TS) {
    return true;
  }
1241

1242 1243 1244
  if (pResInfo->complete || functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TS_DUMMY) {
    return false;
  }
1245

1246
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_FIRST) {
H
hjxilinx 已提交
1247 1248
    return QUERY_IS_ASC_QUERY(pQuery);
  }
1249 1250 1251 1252 1253 1254

  // todo add comments
  if ((functionId == TSDB_FUNC_LAST_DST || functionId == TSDB_FUNC_LAST)) {
    return pCtx->param[0].i64Key == pQuery->order.order;
  }

1255
  // in the supplementary scan, only the following functions need to be executed
H
Haojun Liao 已提交
1256
  if (IS_REVERSE_SCAN(pRuntimeEnv)) {
1257 1258
    return false;
  }
1259

1260 1261 1262
  return true;
}

1263 1264
static void rowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis, SDataBlockInfo *pDataBlockInfo,
    SWindowResInfo *pWindowResInfo, SArray *pDataBlock) {
1265
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1266
  bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);
1267

1268
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1269
  STableQueryInfo* item = pQuery->current;
H
Haojun Liao 已提交
1270 1271 1272 1273

  SColumnInfoData* pColumnInfoData = (SColumnInfoData *)taosArrayGet(pDataBlock, 0);

  TSKEY  *tsCols = (pColumnInfoData->info.type == TSDB_DATA_TYPE_TIMESTAMP)? (TSKEY*) pColumnInfoData->pData:NULL;
H
Haojun Liao 已提交
1274 1275
  bool    groupbyColumnValue = pRuntimeEnv->groupbyNormalCol;

1276
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
H
Haojun Liao 已提交
1277
  if (sasArray == NULL) {
H
Haojun Liao 已提交
1278
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
H
Haojun Liao 已提交
1279 1280
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }
1281

1282 1283
  int16_t type = 0;
  int16_t bytes = 0;
1284

1285
  char *groupbyColumnData = NULL;
H
Haojun Liao 已提交
1286
  if (groupbyColumnValue) {
1287
    groupbyColumnData = getGroupbyColumnData(pQuery, &type, &bytes, pDataBlock);
1288
  }
1289

H
Haojun Liao 已提交
1290
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
1291
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
1292
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
H
Haojun Liao 已提交
1293
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k, pQInfo->vgId);
1294
  }
1295

1296 1297
  // set the input column data
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
1298
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
H
hjxilinx 已提交
1299 1300
    pFilterInfo->pData = getDataBlockImpl(pDataBlock, pFilterInfo->info.colId);
    assert(pFilterInfo->pData != NULL);
1301
  }
1302

1303
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1304

1305 1306 1307
  // from top to bottom in desc
  // from bottom to top in asc order
  if (pRuntimeEnv->pTSBuf != NULL) {
1308
    qDebug("QInfo:%p process data rows, numOfRows:%d, query order:%d, ts comp order:%d", pQInfo, pDataBlockInfo->rows,
1309 1310
           pQuery->order.order, pRuntimeEnv->pTSBuf->cur.order);
  }
1311

1312
  int32_t j = 0;
H
hjxilinx 已提交
1313
  int32_t offset = -1;
1314

1315
  for (j = 0; j < pDataBlockInfo->rows; ++j) {
H
hjxilinx 已提交
1316
    offset = GET_COL_DATA_POS(pQuery, j, step);
1317

1318 1319 1320 1321 1322 1323 1324 1325 1326 1327
    if (pRuntimeEnv->pTSBuf != NULL) {
      int32_t r = doTSJoinFilter(pRuntimeEnv, offset);
      if (r == TS_JOIN_TAG_NOT_EQUALS) {
        break;
      } else if (r == TS_JOIN_TS_NOT_EQUALS) {
        continue;
      } else {
        assert(r == TS_JOIN_TS_EQUAL);
      }
    }
1328

1329
    if (pQuery->numOfFilterCols > 0 && (!doFilterData(pQuery, offset))) {
1330 1331
      continue;
    }
1332

1333
    // interval window query, decide the time window according to the primary timestamp
H
Haojun Liao 已提交
1334
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1335
      int64_t     ts = tsCols[offset];
1336
      STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
1337

1338 1339
      bool hasTimeWindow = false;
      int32_t ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &win, masterScan, &hasTimeWindow);
1340 1341 1342
      if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
        continue;
      }
1343

1344 1345 1346 1347
      if (!hasTimeWindow) {
        continue;
      }

1348 1349
      bool closed = getTimeWindowResStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
      doRowwiseApplyFunctions(pRuntimeEnv, closed, &win, offset);
1350

1351 1352
      STimeWindow nextWin = win;
      int32_t     index = pWindowResInfo->curIndex;
1353

1354
      while (1) {
H
Haojun Liao 已提交
1355
        GET_NEXT_TIMEWINDOW(pQuery, &nextWin);
1356
        if ((nextWin.skey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
H
Haojun Liao 已提交
1357
            (nextWin.skey < pQuery->window.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
1358 1359
          break;
        }
1360

1361 1362 1363
        if (ts < nextWin.skey || ts > nextWin.ekey) {
          break;
        }
1364

1365
        // null data, failed to allocate more memory buffer
H
Haojun Liao 已提交
1366
        hasTimeWindow = false;
1367
        if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &nextWin, masterScan, &hasTimeWindow) != TSDB_CODE_SUCCESS) {
1368 1369
          break;
        }
1370

1371
        if (hasTimeWindow) {
1372 1373
          closed = getTimeWindowResStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
          doRowwiseApplyFunctions(pRuntimeEnv, closed, &nextWin, offset);
1374
        }
1375
      }
1376

1377 1378 1379
      pWindowResInfo->curIndex = index;
    } else {  // other queries
      // decide which group this rows belongs to according to current state value
H
Haojun Liao 已提交
1380
      if (groupbyColumnValue) {
H
hjxilinx 已提交
1381
        char *val = groupbyColumnData + bytes * offset;
1382

H
hjxilinx 已提交
1383
        int32_t ret = setGroupResultOutputBuf(pRuntimeEnv, val, type, bytes);
1384 1385 1386 1387
        if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
          continue;
        }
      }
1388

1389
      for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
1390
        int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
1391 1392 1393 1394 1395
        if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
          aAggs[functionId].xFunctionF(&pCtx[k], offset);
        }
      }
    }
1396

1397 1398 1399
    if (pRuntimeEnv->pTSBuf != NULL) {
      // if timestamp filter list is empty, quit current query
      if (!tsBufNextPos(pRuntimeEnv->pTSBuf)) {
H
hjxilinx 已提交
1400
        setQueryStatus(pQuery, QUERY_COMPLETED);
1401 1402 1403 1404
        break;
      }
    }
  }
H
Haojun Liao 已提交
1405 1406 1407 1408 1409 1410 1411 1412

  assert(offset >= 0);
  if (tsCols != NULL) {
    item->lastKey = tsCols[offset] + step;
  } else {
    item->lastKey = (QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->window.ekey:pDataBlockInfo->window.skey) + step;
  }

H
Haojun Liao 已提交
1413 1414
  item->cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);

1415 1416 1417 1418 1419
  // todo refactor: extract method
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    if (pQuery->pSelectExpr[i].base.functionId != TSDB_FUNC_ARITHM) {
      continue;
    }
1420

S
Shengliang Guan 已提交
1421
    taosTFree(sasArray[i].data);
1422
  }
1423

1424 1425 1426 1427
  free(sasArray);
}

static int32_t tableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo,
H
hjxilinx 已提交
1428
                                          SDataStatis *pStatis, __block_search_fn_t searchFn, SArray *pDataBlock) {
H
hjxilinx 已提交
1429
  SQuery *pQuery = pRuntimeEnv->pQuery;
1430

H
hjxilinx 已提交
1431 1432
  STableQueryInfo* pTableQInfo = pQuery->current;
  SWindowResInfo*  pWindowResInfo = &pRuntimeEnv->windowResInfo;
1433

H
Haojun Liao 已提交
1434
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->groupbyNormalCol) {
1435
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
1436
  } else {
1437
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
1438
  }
1439

1440
  // update the lastkey of current table
1441
  TSKEY lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pDataBlockInfo->window.ekey : pDataBlockInfo->window.skey;
H
hjxilinx 已提交
1442
  pTableQInfo->lastKey = lastKey + GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1443

1444
  // interval query with limit applied
1445
  int32_t numOfRes = 0;
H
Haojun Liao 已提交
1446
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1447 1448
    numOfRes = doCheckQueryCompleted(pRuntimeEnv, lastKey, pWindowResInfo);
  } else {
S
TD-1057  
Shengliang Guan 已提交
1449
    numOfRes = (int32_t)getNumOfResult(pRuntimeEnv);
1450

1451 1452 1453 1454
    // update the number of output result
    if (numOfRes > 0 && pQuery->checkBuffer == 1) {
      assert(numOfRes >= pQuery->rec.rows);
      pQuery->rec.rows = numOfRes;
1455

1456 1457 1458
      if (numOfRes >= pQuery->rec.threshold) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
      }
1459

1460 1461 1462
      if ((pQuery->limit.limit >= 0) && (pQuery->limit.limit + pQuery->limit.offset) <= numOfRes) {
        setQueryStatus(pQuery, QUERY_COMPLETED);
      }
1463 1464 1465 1466 1467

      if (((pTableQInfo->lastKey > pTableQInfo->win.ekey) && QUERY_IS_ASC_QUERY(pQuery)) ||
          ((pTableQInfo->lastKey < pTableQInfo->win.ekey) && (!QUERY_IS_ASC_QUERY(pQuery)))) {
        setQueryStatus(pQuery, QUERY_COMPLETED);
      }
H
Haojun Liao 已提交
1468
    }
1469
  }
1470

1471
  return numOfRes;
1472 1473
}

H
Haojun Liao 已提交
1474
void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
H
Haojun Liao 已提交
1475
                   SDataStatis *pStatis, void *param, int32_t colIndex, int32_t vgId) {
1476

1477 1478
  int32_t functionId = pQuery->pSelectExpr[colIndex].base.functionId;
  int32_t colId = pQuery->pSelectExpr[colIndex].base.colInfo.colId;
1479

1480
  SDataStatis *tpField = NULL;
H
Haojun Liao 已提交
1481
  pCtx->hasNull = hasNullValue(&pQuery->pSelectExpr[colIndex].base.colInfo, pStatis, &tpField);
1482
  pCtx->aInputElemBuf = inputData;
1483

1484
  if (tpField != NULL) {
H
Haojun Liao 已提交
1485
    pCtx->preAggVals.isSet  = true;
1486 1487
    pCtx->preAggVals.statis = *tpField;
    assert(pCtx->preAggVals.statis.numOfNull <= pBlockInfo->rows);
1488 1489 1490
  } else {
    pCtx->preAggVals.isSet = false;
  }
1491

H
Haojun Liao 已提交
1492 1493
  pCtx->preAggVals.dataBlockLoaded = (inputData != NULL);

H
Haojun Liao 已提交
1494 1495
  // limit/offset query will affect this value
  pCtx->size = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->rows - pQuery->pos : pQuery->pos + 1;
1496

H
Haojun Liao 已提交
1497
  // minimum value no matter ascending/descending order query
H
Haojun Liao 已提交
1498 1499
  pCtx->startOffset = QUERY_IS_ASC_QUERY(pQuery) ? pQuery->pos: (pQuery->pos - pCtx->size + 1);
  assert(pCtx->startOffset >= 0);
H
Haojun Liao 已提交
1500

1501 1502
  uint32_t status = aAggs[functionId].nStatus;
  if (((status & (TSDB_FUNCSTATE_SELECTIVITY | TSDB_FUNCSTATE_NEED_TS)) != 0) && (tsCol != NULL)) {
H
Haojun Liao 已提交
1503
    pCtx->ptsList = &tsCol[pCtx->startOffset];
1504
  }
1505

1506 1507 1508 1509 1510
  if (functionId >= TSDB_FUNC_FIRST_DST && functionId <= TSDB_FUNC_LAST_DST) {
    // last_dist or first_dist function
    // store the first&last timestamp into the intermediate buffer [1], the true
    // value may be null but timestamp will never be null
  } else if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_TWA ||
1511
             functionId == TSDB_FUNC_DIFF || (functionId >= TSDB_FUNC_RATE && functionId <= TSDB_FUNC_AVG_IRATE)) {
1512
    /*
H
Haojun Liao 已提交
1513
     * least squares function needs two columns of input, currently, the x value of linear equation is set to
1514 1515 1516 1517 1518 1519 1520 1521 1522 1523
     * timestamp column, and the y-value is the column specified in pQuery->pSelectExpr[i].colIdxInBuffer
     *
     * top/bottom function needs timestamp to indicate when the
     * top/bottom values emerge, so does diff function
     */
    if (functionId == TSDB_FUNC_TWA) {
      STwaInfo *pTWAInfo = GET_RES_INFO(pCtx)->interResultBuf;
      pTWAInfo->SKey = pQuery->window.skey;
      pTWAInfo->EKey = pQuery->window.ekey;
    }
1524

1525 1526
  } else if (functionId == TSDB_FUNC_ARITHM) {
    pCtx->param[1].pz = param;
H
Haojun Liao 已提交
1527 1528 1529 1530 1531 1532
  } else if (functionId == TSDB_FUNC_SPREAD) {  // set the statistics data for primary time stamp column
    if (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
      pCtx->preAggVals.isSet  = true;
      pCtx->preAggVals.statis.min = pBlockInfo->window.skey;
      pCtx->preAggVals.statis.max = pBlockInfo->window.ekey;
    }
1533 1534
  } else if (functionId == TSDB_FUNC_INTERP) {
    SInterpInfoDetail *pInterpInfo = GET_RES_INFO(pCtx)->interResultBuf;
S
TD-1057  
Shengliang Guan 已提交
1535
    pInterpInfo->type = (int8_t)pQuery->fillType;
1536 1537
    pInterpInfo->ts = pQuery->window.skey;
    pInterpInfo->primaryCol = (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX);
1538

1539 1540 1541 1542
    if (pQuery->fillVal != NULL) {
      if (isNull((const char*) &pQuery->fillVal[colIndex], pCtx->inputType)) {
        pCtx->param[1].nType = TSDB_DATA_TYPE_NULL;
      } else { // todo refactor, tVariantCreateFromBinary should handle the NULL value
H
Haojun Liao 已提交
1543 1544 1545
        if (pCtx->inputType != TSDB_DATA_TYPE_BINARY && pCtx->inputType != TSDB_DATA_TYPE_NCHAR) {
          tVariantCreateFromBinary(&pCtx->param[1], (char*) &pQuery->fillVal[colIndex], pCtx->inputBytes, pCtx->inputType);
        }
1546 1547
      }
    }
H
Haojun Liao 已提交
1548 1549 1550
  } else if (functionId == TSDB_FUNC_TS_COMP) {
    pCtx->param[0].i64Key = vgId;
    pCtx->param[0].nType = TSDB_DATA_TYPE_BIGINT;
1551
  }
1552

1553 1554 1555 1556 1557 1558
#if defined(_DEBUG_VIEW)
  //  int64_t *tsList = (int64_t *)primaryColumnData;
//  int64_t  s = tsList[0];
//  int64_t  e = tsList[size - 1];

//    if (IS_DATA_BLOCK_LOADED(blockStatus)) {
1559
//        qDebug("QInfo:%p query ts:%lld-%lld, offset:%d, rows:%d, bstatus:%d,
1560 1561 1562
//        functId:%d", GET_QINFO_ADDR(pQuery),
//               s, e, startOffset, size, blockStatus, functionId);
//    } else {
1563
//        qDebug("QInfo:%p block not loaded, bstatus:%d",
1564 1565 1566 1567 1568 1569
//        GET_QINFO_ADDR(pQuery), blockStatus);
//    }
#endif
}

// set the output buffer for the selectivity + tag query
H
Haojun Liao 已提交
1570
static int32_t setCtxTagColumnInfo(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx) {
H
Haojun Liao 已提交
1571 1572
  SQuery* pQuery = pRuntimeEnv->pQuery;

1573
  if (isSelectivityWithTagsQuery(pQuery)) {
1574
    int32_t num = 0;
1575
    int16_t tagLen = 0;
1576

1577
    SQLFunctionCtx *p = NULL;
1578
    SQLFunctionCtx **pTagCtx = calloc(pQuery->numOfOutput, POINTER_BYTES);
H
Haojun Liao 已提交
1579 1580 1581
    if (pTagCtx == NULL) {
      return TSDB_CODE_QRY_OUT_OF_MEMORY;
    }
H
Haojun Liao 已提交
1582

1583
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1584
      SSqlFuncMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].base;
1585

1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598
      if (pSqlFuncMsg->functionId == TSDB_FUNC_TAG_DUMMY || pSqlFuncMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        tagLen += pCtx[i].outputBytes;
        pTagCtx[num++] = &pCtx[i];
      } else if ((aAggs[pSqlFuncMsg->functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
        p = &pCtx[i];
      } else if (pSqlFuncMsg->functionId == TSDB_FUNC_TS || pSqlFuncMsg->functionId == TSDB_FUNC_TAG) {
        // tag function may be the group by tag column
        // ts may be the required primary timestamp column
        continue;
      } else {
        // the column may be the normal column, group by normal_column, the functionId is TSDB_FUNC_PRJ
      }
    }
dengyihao's avatar
dengyihao 已提交
1599 1600 1601 1602 1603
    if (p != NULL) {
      p->tagInfo.pTagCtxList = pTagCtx;
      p->tagInfo.numOfTagCols = num;
      p->tagInfo.tagsLen = tagLen;
    } else {
1604
      taosTFree(pTagCtx);
dengyihao's avatar
dengyihao 已提交
1605
    }
1606
  }
H
Haojun Liao 已提交
1607 1608

  return TSDB_CODE_SUCCESS;
1609 1610
}

H
Haojun Liao 已提交
1611 1612
static FORCE_INLINE void setWindowResultInfo(SResultInfo *pResultInfo, SQuery *pQuery, bool isStableQuery, char* buf) {
  char* p = buf;
1613
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1614 1615 1616 1617
    int32_t size = pQuery->pSelectExpr[i].interBytes;
    setResultInfoBuf(&pResultInfo[i], size, isStableQuery, p);

    p += size;
1618 1619 1620
  }
}

1621
static int32_t setupQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv, int16_t order) {
1622
  qDebug("QInfo:%p setup runtime env", GET_QINFO_ADDR(pRuntimeEnv));
1623 1624
  SQuery *pQuery = pRuntimeEnv->pQuery;

H
Haojun Liao 已提交
1625 1626 1627
  size_t size = pRuntimeEnv->interBufSize + pQuery->numOfOutput * sizeof(SResultInfo);

  pRuntimeEnv->resultInfo = calloc(1, size);
1628
  pRuntimeEnv->pCtx = (SQLFunctionCtx *)calloc(pQuery->numOfOutput, sizeof(SQLFunctionCtx));
1629

1630
  if (pRuntimeEnv->resultInfo == NULL || pRuntimeEnv->pCtx == NULL) {
1631
    goto _clean;
1632
  }
1633

1634
  pRuntimeEnv->offset[0] = 0;
1635
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1636
    SSqlFuncMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].base;
1637

1638
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1639
    SColIndex* pIndex = &pSqlFuncMsg->colInfo;
1640

Y
TD-1230  
yihaoDeng 已提交
1641
    if (TSDB_COL_REQ_NULL(pIndex->flag)) {
1642 1643
      pCtx->requireNull = true;
      pIndex->flag &= ~(TSDB_COL_NULL);
Y
TD-1230  
yihaoDeng 已提交
1644
    } else {
1645 1646
      pCtx->requireNull = false;
    }
Y
TD-1230  
yihaoDeng 已提交
1647

1648 1649
    int32_t index = pSqlFuncMsg->colInfo.colIndex;
    if (TSDB_COL_IS_TAG(pIndex->flag)) {
1650
      if (pIndex->colId == TSDB_TBNAME_COLUMN_INDEX) {  // todo refactor
H
Haojun Liao 已提交
1651 1652 1653 1654
        SSchema s = tGetTableNameColumnSchema();

        pCtx->inputBytes = s.bytes;
        pCtx->inputType = s.type;
1655 1656 1657 1658
      } else {
        pCtx->inputBytes = pQuery->tagColList[index].bytes;
        pCtx->inputType = pQuery->tagColList[index].type;
      }
H
Haojun Liao 已提交
1659 1660 1661
    } else if (TSDB_COL_IS_UD_COL(pIndex->flag)) {
      pCtx->inputBytes = pSqlFuncMsg->arg[0].argBytes;
      pCtx->inputType = pSqlFuncMsg->arg[0].argType;
1662 1663 1664 1665
    } else {
      pCtx->inputBytes = pQuery->colList[index].bytes;
      pCtx->inputType = pQuery->colList[index].type;
    }
1666

Y
TD-1230  
yihaoDeng 已提交
1667

1668
    assert(isValidDataType(pCtx->inputType));
1669
    pCtx->ptsOutputBuf = NULL;
1670

1671 1672
    pCtx->outputBytes = pQuery->pSelectExpr[i].bytes;
    pCtx->outputType = pQuery->pSelectExpr[i].type;
1673

1674 1675
    pCtx->order = pQuery->order.order;
    pCtx->functionId = pSqlFuncMsg->functionId;
1676

1677 1678 1679 1680 1681
    pCtx->numOfParams = pSqlFuncMsg->numOfParams;
    for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
      int16_t type = pSqlFuncMsg->arg[j].argType;
      int16_t bytes = pSqlFuncMsg->arg[j].argBytes;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
1682
        tVariantCreateFromBinary(&pCtx->param[j], pSqlFuncMsg->arg[j].argValue.pz, bytes, type);
1683 1684 1685 1686
      } else {
        tVariantCreateFromBinary(&pCtx->param[j], (char *)&pSqlFuncMsg->arg[j].argValue.i64, bytes, type);
      }
    }
1687

1688 1689
    // set the order information for top/bottom query
    int32_t functionId = pCtx->functionId;
1690

1691
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
1692
      int32_t f = pQuery->pSelectExpr[0].base.functionId;
1693
      assert(f == TSDB_FUNC_TS || f == TSDB_FUNC_TS_DUMMY);
1694

1695 1696 1697 1698
      pCtx->param[2].i64Key = order;
      pCtx->param[2].nType = TSDB_DATA_TYPE_BIGINT;
      pCtx->param[3].i64Key = functionId;
      pCtx->param[3].nType = TSDB_DATA_TYPE_BIGINT;
1699

1700 1701
      pCtx->param[1].i64Key = pQuery->order.orderColId;
    }
1702

1703 1704 1705 1706
    if (i > 0) {
      pRuntimeEnv->offset[i] = pRuntimeEnv->offset[i - 1] + pRuntimeEnv->pCtx[i - 1].outputBytes;
    }
  }
1707

H
Haojun Liao 已提交
1708
  char* buf = (char*) pRuntimeEnv->resultInfo + sizeof(SResultInfo) * pQuery->numOfOutput;
H
Haojun Liao 已提交
1709

1710
  // set the intermediate result output buffer
H
Haojun Liao 已提交
1711
  setWindowResultInfo(pRuntimeEnv->resultInfo, pQuery, pRuntimeEnv->stableQuery, buf);
1712

1713
  // if it is group by normal column, do not set output buffer, the output buffer is pResult
1714
  if (!pRuntimeEnv->groupbyNormalCol && !pRuntimeEnv->stableQuery) {
1715 1716
    resetCtxOutputBuf(pRuntimeEnv);
  }
1717

H
Haojun Liao 已提交
1718 1719 1720
  if (setCtxTagColumnInfo(pRuntimeEnv, pRuntimeEnv->pCtx) != TSDB_CODE_SUCCESS) {
    goto _clean;
  }
1721

H
Haojun Liao 已提交
1722
  qDebug("QInfo:%p init runtime completed", GET_QINFO_ADDR(pRuntimeEnv));
1723
  return TSDB_CODE_SUCCESS;
1724

1725
_clean:
S
Shengliang Guan 已提交
1726 1727
  taosTFree(pRuntimeEnv->resultInfo);
  taosTFree(pRuntimeEnv->pCtx);
1728

1729
  return TSDB_CODE_QRY_OUT_OF_MEMORY;
1730 1731 1732 1733 1734 1735
}

static void teardownQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv) {
  if (pRuntimeEnv->pQuery == NULL) {
    return;
  }
1736

1737
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1738
  SQInfo* pQInfo = (SQInfo*) GET_QINFO_ADDR(pRuntimeEnv);
1739

1740
  qDebug("QInfo:%p teardown runtime env", pQInfo);
H
Haojun Liao 已提交
1741
  cleanupTimeWindowInfo(&pRuntimeEnv->windowResInfo);
1742

1743
  if (pRuntimeEnv->pCtx != NULL) {
1744
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1745
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1746

1747 1748 1749
      for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
        tVariantDestroy(&pCtx->param[j]);
      }
1750

1751
      tVariantDestroy(&pCtx->tag);
S
Shengliang Guan 已提交
1752
      taosTFree(pCtx->tagInfo.pTagCtxList);
1753
    }
1754

S
Shengliang Guan 已提交
1755 1756
    taosTFree(pRuntimeEnv->resultInfo);
    taosTFree(pRuntimeEnv->pCtx);
1757
  }
1758

H
Haojun Liao 已提交
1759
  pRuntimeEnv->pFillInfo = taosDestoryFillInfo(pRuntimeEnv->pFillInfo);
1760

H
Haojun Liao 已提交
1761
  destroyResultBuf(pRuntimeEnv->pResultBuf);
1762
  tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
1763
  tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
1764

H
Haojun Liao 已提交
1765
  pRuntimeEnv->pTSBuf = tsBufDestroy(pRuntimeEnv->pTSBuf);
1766 1767
}

H
Haojun Liao 已提交
1768
#define IS_QUERY_KILLED(_q) ((_q)->code == TSDB_CODE_TSC_QUERY_CANCELLED)
1769

H
Haojun Liao 已提交
1770
static void setQueryKilled(SQInfo *pQInfo) { pQInfo->code = TSDB_CODE_TSC_QUERY_CANCELLED;}
H
hjxilinx 已提交
1771

H
Haojun Liao 已提交
1772 1773 1774
static bool isFixedOutputQuery(SQueryRuntimeEnv* pRuntimeEnv) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1775 1776
    return false;
  }
1777

1778
  // Note:top/bottom query is fixed output query
H
Haojun Liao 已提交
1779
  if (pRuntimeEnv->topBotQuery || pRuntimeEnv->groupbyNormalCol) {
1780 1781
    return true;
  }
1782

1783
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1784
    SSqlFuncMsg *pExprMsg = &pQuery->pSelectExpr[i].base;
1785

1786 1787
    // ignore the ts_comp function
    if (i == 0 && pExprMsg->functionId == TSDB_FUNC_PRJ && pExprMsg->numOfParams == 1 &&
1788
        pExprMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
1789 1790
      continue;
    }
1791

1792 1793 1794
    if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
1795

1796 1797 1798 1799
    if (!IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus)) {
      return true;
    }
  }
1800

1801 1802 1803
  return false;
}

1804
// todo refactor with isLastRowQuery
H
hjxilinx 已提交
1805
static bool isPointInterpoQuery(SQuery *pQuery) {
1806
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1807
    int32_t functionID = pQuery->pSelectExpr[i].base.functionId;
1808
    if (functionID == TSDB_FUNC_INTERP) {
1809 1810 1811
      return true;
    }
  }
1812

1813 1814 1815 1816
  return false;
}

// TODO REFACTOR:MERGE WITH CLIENT-SIDE FUNCTION
H
hjxilinx 已提交
1817
static bool isSumAvgRateQuery(SQuery *pQuery) {
1818
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1819
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1820 1821 1822
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
1823

1824 1825 1826 1827 1828
    if (functionId == TSDB_FUNC_SUM_RATE || functionId == TSDB_FUNC_SUM_IRATE || functionId == TSDB_FUNC_AVG_RATE ||
        functionId == TSDB_FUNC_AVG_IRATE) {
      return true;
    }
  }
1829

1830 1831 1832
  return false;
}

H
hjxilinx 已提交
1833
static bool isFirstLastRowQuery(SQuery *pQuery) {
1834
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1835
    int32_t functionID = pQuery->pSelectExpr[i].base.functionId;
1836 1837 1838 1839
    if (functionID == TSDB_FUNC_LAST_ROW) {
      return true;
    }
  }
1840

1841 1842 1843
  return false;
}

H
hjxilinx 已提交
1844
static bool needReverseScan(SQuery *pQuery) {
1845
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1846
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1847 1848 1849
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG) {
      continue;
    }
1850

1851
    if ((functionId == TSDB_FUNC_FIRST || functionId == TSDB_FUNC_FIRST_DST) && !QUERY_IS_ASC_QUERY(pQuery)) {
1852 1853
      return true;
    }
1854 1855

    if (functionId == TSDB_FUNC_LAST || functionId == TSDB_FUNC_LAST_DST) {
S
TD-1057  
Shengliang Guan 已提交
1856
      int32_t order = (int32_t)pQuery->pSelectExpr[i].base.arg->argValue.i64;
1857 1858
      return order != pQuery->order.order;
    }
1859
  }
1860

1861 1862
  return false;
}
H
hjxilinx 已提交
1863

H
Haojun Liao 已提交
1864 1865 1866 1867
/**
 * The following 4 kinds of query are treated as the tags query
 * tagprj, tid_tag query, count(tbname), 'abc' (user defined constant value column) query
 */
H
hjxilinx 已提交
1868 1869
static bool onlyQueryTags(SQuery* pQuery) {
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1870 1871 1872
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];

    int32_t functionId = pExprInfo->base.functionId;
H
Haojun Liao 已提交
1873 1874 1875 1876

    if (functionId != TSDB_FUNC_TAGPRJ &&
        functionId != TSDB_FUNC_TID_TAG &&
        (!(functionId == TSDB_FUNC_COUNT && pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX)) &&
Y
TD-1230  
yihaoDeng 已提交
1877
        (!(functionId == TSDB_FUNC_PRJ && TSDB_COL_IS_UD_COL(pExprInfo->base.colInfo.flag)))) {
H
hjxilinx 已提交
1878 1879 1880
      return false;
    }
  }
1881

H
hjxilinx 已提交
1882 1883 1884
  return true;
}

1885 1886
/////////////////////////////////////////////////////////////////////////////////////////////

H
Haojun Liao 已提交
1887
void getAlignQueryTimeWindow(SQuery *pQuery, int64_t key, int64_t keyFirst, int64_t keyLast, STimeWindow *win) {
1888 1889
  assert(key >= keyFirst && key <= keyLast && pQuery->interval.sliding <= pQuery->interval.interval);
  win->skey = taosTimeTruncate(key, &pQuery->interval, pQuery->precision);
H
Haojun Liao 已提交
1890 1891

  /*
1892
   * if the realSkey > INT64_MAX - pQuery->interval.interval, the query duration between
H
Haojun Liao 已提交
1893 1894
   * realSkey and realEkey must be less than one interval.Therefore, no need to adjust the query ranges.
   */
1895 1896
  if (keyFirst > (INT64_MAX - pQuery->interval.interval)) {
    assert(keyLast - keyFirst < pQuery->interval.interval);
1897
    win->ekey = INT64_MAX;
1898 1899
  } else if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
    win->ekey = taosTimeAdd(win->skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
H
Haojun Liao 已提交
1900
  } else {
1901
    win->ekey = win->skey + pQuery->interval.interval - 1;
1902 1903 1904 1905 1906
  }
}

static void setScanLimitationByResultBuffer(SQuery *pQuery) {
  if (isTopBottomQuery(pQuery)) {
1907
    pQuery->checkBuffer = 0;
1908
  } else if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
1909
    pQuery->checkBuffer = 0;
1910 1911
  } else {
    bool hasMultioutput = false;
1912
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1913
      SSqlFuncMsg *pExprMsg = &pQuery->pSelectExpr[i].base;
1914 1915 1916
      if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        continue;
      }
1917

1918 1919 1920 1921 1922
      hasMultioutput = IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus);
      if (!hasMultioutput) {
        break;
      }
    }
1923

1924
    pQuery->checkBuffer = hasMultioutput ? 1 : 0;
1925 1926 1927 1928 1929 1930
  }
}

/*
 * todo add more parameters to check soon..
 */
1931
bool colIdCheck(SQuery *pQuery) {
1932 1933
  // load data column information is incorrect
  for (int32_t i = 0; i < pQuery->numOfCols - 1; ++i) {
1934
    if (pQuery->colList[i].colId == pQuery->colList[i + 1].colId) {
S
slguan 已提交
1935
      qError("QInfo:%p invalid data load column for query", GET_QINFO_ADDR(pQuery));
1936 1937 1938
      return false;
    }
  }
1939

1940 1941 1942 1943 1944 1945
  return true;
}

// todo ignore the avg/sum/min/max/count/stddev/top/bottom functions, of which
// the scan order is not matter
static bool onlyOneQueryType(SQuery *pQuery, int32_t functId, int32_t functIdDst) {
1946
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1947
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1948

1949 1950 1951 1952
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG ||
        functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
1953

1954 1955 1956 1957
    if (functionId != functId && functionId != functIdDst) {
      return false;
    }
  }
1958

1959 1960 1961 1962 1963 1964 1965
  return true;
}

static bool onlyFirstQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_FIRST, TSDB_FUNC_FIRST_DST); }

static bool onlyLastQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_LAST, TSDB_FUNC_LAST_DST); }

H
Haojun Liao 已提交
1966
// todo refactor, add iterator
1967 1968
static void doExchangeTimeWindow(SQInfo* pQInfo, STimeWindow* win) {
  size_t t = taosArrayGetSize(pQInfo->tableGroupInfo.pGroupList);
H
Haojun Liao 已提交
1969
  for(int32_t i = 0; i < t; ++i) {
1970
    SArray* p1 = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, i);
H
Haojun Liao 已提交
1971 1972 1973

    size_t len = taosArrayGetSize(p1);
    for(int32_t j = 0; j < len; ++j) {
1974
      STableKeyInfo* pInfo = taosArrayGet(p1, j);
H
Haojun Liao 已提交
1975

1976 1977 1978 1979
      // update the new lastkey if it is equalled to the value of the old skey
      if (pInfo->lastKey == win->ekey) {
        pInfo->lastKey = win->skey;
      }
H
Haojun Liao 已提交
1980 1981 1982 1983
    }
  }
}

1984
static void changeExecuteScanOrder(SQInfo *pQInfo, SQueryTableMsg* pQueryMsg, bool stableQuery) {
H
Haojun Liao 已提交
1985 1986
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;

1987 1988 1989
  // in case of point-interpolation query, use asc order scan
  char msg[] = "QInfo:%p scan order changed for %s query, old:%d, new:%d, qrange exchanged, old qrange:%" PRId64
               "-%" PRId64 ", new qrange:%" PRId64 "-%" PRId64;
1990

1991 1992
  // todo handle the case the the order irrelevant query type mixed up with order critical query type
  // descending order query for last_row query
H
Haojun Liao 已提交
1993
  if (isFirstLastRowQuery(pQuery)) {
H
Haojun Liao 已提交
1994
    qDebug("QInfo:%p scan order changed for last_row query, old:%d, new:%d", pQInfo, pQuery->order.order, TSDB_ORDER_ASC);
1995

H
Haojun Liao 已提交
1996
    pQuery->order.order = TSDB_ORDER_ASC;
H
Haojun Liao 已提交
1997 1998 1999
    if (pQuery->window.skey > pQuery->window.ekey) {
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
2000

2001 2002
    return;
  }
2003

H
Haojun Liao 已提交
2004
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr) && pQuery->order.order == TSDB_ORDER_DESC) {
H
Haojun Liao 已提交
2005
    pQuery->order.order = TSDB_ORDER_ASC;
H
Haojun Liao 已提交
2006 2007 2008
    if (pQuery->window.skey > pQuery->window.ekey) {
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
H
Haojun Liao 已提交
2009

2010
    doExchangeTimeWindow(pQInfo, &pQuery->window);
H
Haojun Liao 已提交
2011 2012 2013
    return;
  }

2014
  if (isPointInterpoQuery(pQuery) && pQuery->interval.interval == 0) {
H
Haojun Liao 已提交
2015 2016 2017 2018 2019
    if (!QUERY_IS_ASC_QUERY(pQuery)) {
      qDebug(msg, GET_QINFO_ADDR(pQuery), "interp", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
             pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
2020

2021
    pQuery->order.order = TSDB_ORDER_ASC;
2022 2023
    return;
  }
2024

2025
  if (pQuery->interval.interval == 0) {
2026 2027
    if (onlyFirstQuery(pQuery)) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
2028
        qDebug(msg, GET_QINFO_ADDR(pQuery), "only-first", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
2029 2030
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2031
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2032
        doExchangeTimeWindow(pQInfo, &pQuery->window);
2033
      }
2034

2035
      pQuery->order.order = TSDB_ORDER_ASC;
2036 2037
    } else if (onlyLastQuery(pQuery)) {
      if (QUERY_IS_ASC_QUERY(pQuery)) {
2038
        qDebug(msg, GET_QINFO_ADDR(pQuery), "only-last", pQuery->order.order, TSDB_ORDER_DESC, pQuery->window.skey,
2039 2040
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2041
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2042
        doExchangeTimeWindow(pQInfo, &pQuery->window);
2043
      }
2044

2045
      pQuery->order.order = TSDB_ORDER_DESC;
2046
    }
2047

2048
  } else {  // interval query
2049
    if (stableQuery) {
2050 2051
      if (onlyFirstQuery(pQuery)) {
        if (!QUERY_IS_ASC_QUERY(pQuery)) {
2052
          qDebug(msg, GET_QINFO_ADDR(pQuery), "only-first stable", pQuery->order.order, TSDB_ORDER_ASC,
2053 2054
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2055
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2056
          doExchangeTimeWindow(pQInfo, &pQuery->window);
2057
        }
2058

2059
        pQuery->order.order = TSDB_ORDER_ASC;
2060 2061
      } else if (onlyLastQuery(pQuery)) {
        if (QUERY_IS_ASC_QUERY(pQuery)) {
2062
          qDebug(msg, GET_QINFO_ADDR(pQuery), "only-last stable", pQuery->order.order, TSDB_ORDER_DESC,
2063 2064
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2065
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2066
          doExchangeTimeWindow(pQInfo, &pQuery->window);
2067
        }
2068

2069
        pQuery->order.order = TSDB_ORDER_DESC;
2070 2071 2072 2073 2074 2075 2076 2077
      }
    }
  }
}

static int32_t getInitialPageNum(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  int32_t INITIAL_RESULT_ROWS_VALUE = 16;
2078

2079
  int32_t num = 0;
2080

2081 2082
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
    num = 128;
H
Haojun Liao 已提交
2083
  } else if (QUERY_IS_INTERVAL_QUERY(pQuery)) {  // time window query, allocate one page for each table
2084
    size_t s = pQInfo->tableqinfoGroupInfo.numOfTables;
S
TD-1057  
Shengliang Guan 已提交
2085
    num = (int32_t)(MAX(s, INITIAL_RESULT_ROWS_VALUE));
2086 2087
  } else {    // for super table query, one page for each subset
    num = 1;  // pQInfo->pSidSet->numOfSubSet;
2088
  }
2089

2090 2091 2092 2093
  assert(num > 0);
  return num;
}

2094 2095
static void getIntermediateBufInfo(SQueryRuntimeEnv* pRuntimeEnv, int32_t* ps, int32_t* rowsize) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
2096
  int32_t MIN_ROWS_PER_PAGE = 4;
2097

S
TD-1057  
Shengliang Guan 已提交
2098
  *rowsize = (int32_t)(pQuery->rowSize * GET_ROW_PARAM_FOR_MULTIOUTPUT(pQuery, pRuntimeEnv->topBotQuery, pRuntimeEnv->stableQuery));
2099 2100 2101 2102
  int32_t overhead = sizeof(tFilePage);

  // one page contains at least two rows
  *ps = DEFAULT_INTERN_BUF_PAGE_SIZE;
H
Haojun Liao 已提交
2103
  while(((*rowsize) * MIN_ROWS_PER_PAGE) > (*ps) - overhead) {
2104 2105 2106 2107
    *ps = (*ps << 1u);
  }

  pRuntimeEnv->numOfRowsPerPage = ((*ps) - sizeof(tFilePage)) / (*rowsize);
2108
  assert(pRuntimeEnv->numOfRowsPerPage <= MAX_ROWS_PER_RESBUF_PAGE);
2109 2110
}

H
Haojun Liao 已提交
2111
#define IS_PREFILTER_TYPE(_t) ((_t) != TSDB_DATA_TYPE_BINARY && (_t) != TSDB_DATA_TYPE_NCHAR)
2112

H
Haojun Liao 已提交
2113 2114 2115 2116
static bool needToLoadDataBlock(SQueryRuntimeEnv* pRuntimeEnv, SDataStatis *pDataStatis, SQLFunctionCtx *pCtx,
    int32_t numOfRows) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
  if (pDataStatis == NULL || (pQuery->numOfFilterCols == 0 && (!pRuntimeEnv->topBotQuery))) {
2117 2118 2119 2120 2121
    return true;
  }

  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
2122

H
Haojun Liao 已提交
2123 2124 2125 2126 2127 2128 2129 2130
    int32_t index = -1;
    for(int32_t i = 0; i < pQuery->numOfCols; ++i) {
      if (pDataStatis[i].colId == pFilterInfo->info.colId) {
        index = i;
        break;
      }
    }

2131
    // no statistics data, load the true data block
H
Haojun Liao 已提交
2132
    if (index == -1) {
H
Haojun Liao 已提交
2133
      return true;
2134
    }
2135

2136
    // not support pre-filter operation on binary/nchar data type
H
Haojun Liao 已提交
2137
    if (!IS_PREFILTER_TYPE(pFilterInfo->info.type)) {
H
Haojun Liao 已提交
2138
      return true;
2139
    }
2140

2141
    // all data in current column are NULL, no need to check its boundary value
H
Haojun Liao 已提交
2142
    if (pDataStatis[index].numOfNull == numOfRows) {
2143 2144 2145 2146 2147 2148 2149 2150 2151

      // if isNULL query exists, load the null data column
      for (int32_t j = 0; j < pFilterInfo->numOfFilters; ++j) {
        SColumnFilterElem *pFilterElem = &pFilterInfo->pFilters[j];
        if (pFilterElem->fp == isNull_filter) {
          return true;
        }
      }

2152 2153
      continue;
    }
2154

H
Haojun Liao 已提交
2155 2156 2157
    SDataStatis* pDataBlockst = &pDataStatis[index];

    if (pFilterInfo->info.type == TSDB_DATA_TYPE_FLOAT) {
S
TD-1057  
Shengliang Guan 已提交
2158 2159
      float minval = (float)(*(double *)(&pDataBlockst->min));
      float maxval = (float)(*(double *)(&pDataBlockst->max));
2160

2161 2162 2163 2164 2165 2166 2167
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&minval, (char *)&maxval)) {
          return true;
        }
      }
    } else {
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
H
Haojun Liao 已提交
2168
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&pDataBlockst->min, (char *)&pDataBlockst->max)) {
2169 2170 2171 2172 2173
          return true;
        }
      }
    }
  }
2174

H
Haojun Liao 已提交
2175 2176 2177 2178 2179 2180 2181 2182
  if (pRuntimeEnv->topBotQuery) {
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
      int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
      if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
        return topbot_datablock_filter(&pCtx[i], functionId, (char *)&pDataStatis[i].min, (char *)&pDataStatis[i].max);
      }
    }
  }
2183

H
Haojun Liao 已提交
2184
  return false;
2185 2186
}

H
Haojun Liao 已提交
2187 2188 2189 2190 2191 2192 2193 2194
static bool overlapWithTimeWindow(SQuery* pQuery, SDataBlockInfo* pBlockInfo) {
  STimeWindow w = {0};

  TSKEY sk = MIN(pQuery->window.skey, pQuery->window.ekey);
  TSKEY ek = MAX(pQuery->window.skey, pQuery->window.ekey);

  if (QUERY_IS_ASC_QUERY(pQuery)) {
    getAlignQueryTimeWindow(pQuery, pBlockInfo->window.skey, sk, ek, &w);
H
Haojun Liao 已提交
2195
    assert(w.ekey >= pBlockInfo->window.skey);
H
Haojun Liao 已提交
2196

H
Haojun Liao 已提交
2197
    if (w.ekey < pBlockInfo->window.ekey) {
H
Haojun Liao 已提交
2198 2199 2200 2201 2202
      return true;
    }

    while(1) {
      GET_NEXT_TIMEWINDOW(pQuery, &w);
H
Haojun Liao 已提交
2203
      if (w.skey > pBlockInfo->window.ekey) {
H
Haojun Liao 已提交
2204 2205 2206
        break;
      }

H
Haojun Liao 已提交
2207 2208
      assert(w.ekey > pBlockInfo->window.ekey);
      if (w.skey <= pBlockInfo->window.ekey && w.skey > pBlockInfo->window.skey) {
H
Haojun Liao 已提交
2209 2210 2211 2212 2213
        return true;
      }
    }
  } else {
    getAlignQueryTimeWindow(pQuery, pBlockInfo->window.ekey, sk, ek, &w);
H
Haojun Liao 已提交
2214 2215 2216
    assert(w.skey <= pBlockInfo->window.ekey);

    if (w.skey > pBlockInfo->window.skey) {
H
Haojun Liao 已提交
2217 2218 2219 2220 2221 2222 2223 2224 2225
      return true;
    }

    while(1) {
      GET_NEXT_TIMEWINDOW(pQuery, &w);
      if (w.ekey < pBlockInfo->window.skey) {
        break;
      }

H
Haojun Liao 已提交
2226 2227
      assert(w.skey < pBlockInfo->window.skey);
      if (w.ekey < pBlockInfo->window.ekey && w.ekey >= pBlockInfo->window.skey) {
H
Haojun Liao 已提交
2228 2229 2230 2231 2232 2233 2234 2235
        return true;
      }
    }
  }

  return false;
}

H
Haojun Liao 已提交
2236
int32_t loadDataBlockOnDemand(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo * pWindowResInfo, void* pQueryHandle, SDataBlockInfo* pBlockInfo, SDataStatis **pStatis, SArray** pDataBlock, uint32_t* status) {
2237
  SQuery *pQuery = pRuntimeEnv->pQuery;
2238

H
Haojun Liao 已提交
2239 2240
  *status = BLK_DATA_NO_NEEDED;

H
Haojun Liao 已提交
2241
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf > 0) {
2242
    *status = BLK_DATA_ALL_NEEDED;
H
Haojun Liao 已提交
2243
  } else { // check if this data block is required to load
H
Haojun Liao 已提交
2244

H
Haojun Liao 已提交
2245
    // Calculate all time windows that are overlapping or contain current data block.
2246
    // If current data block is contained by all possible time window, do not load current data block.
H
Haojun Liao 已提交
2247
    if (QUERY_IS_INTERVAL_QUERY(pQuery) && overlapWithTimeWindow(pQuery, pBlockInfo)) {
2248
      *status = BLK_DATA_ALL_NEEDED;
2249
    }
2250

2251
    if ((*status) != BLK_DATA_ALL_NEEDED) {
H
Haojun Liao 已提交
2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266
      // the pCtx[i] result is belonged to previous time window since the outputBuf has not been set yet,
      // the filter result may be incorrect. So in case of interval query, we need to set the correct time output buffer
      if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
        bool hasTimeWindow = false;
        bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);

        TSKEY k = QUERY_IS_ASC_QUERY(pQuery)? pBlockInfo->window.skey:pBlockInfo->window.ekey;

        STimeWindow win = getActiveTimeWindow(pWindowResInfo, k, pQuery);
        if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pBlockInfo->tid, &win, masterScan, &hasTimeWindow) !=
            TSDB_CODE_SUCCESS) {
          // todo handle error in set result for timewindow
        }
      }

H
Haojun Liao 已提交
2267 2268 2269 2270 2271
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        SSqlFuncMsg* pSqlFunc = &pQuery->pSelectExpr[i].base;

        int32_t functionId = pSqlFunc->functionId;
        int32_t colId = pSqlFunc->colInfo.colId;
2272 2273
        (*status) |= aAggs[functionId].dataReqFunc(&pRuntimeEnv->pCtx[i], pBlockInfo->window.skey, pBlockInfo->window.ekey, colId);
        if (((*status) & BLK_DATA_ALL_NEEDED) == BLK_DATA_ALL_NEEDED) {
H
Haojun Liao 已提交
2274 2275 2276
          break;
        }
      }
2277 2278
    }
  }
2279

2280
  if ((*status) == BLK_DATA_NO_NEEDED) {
H
Haojun Liao 已提交
2281 2282
    qDebug("QInfo:%p data block discard, brange:%"PRId64 "-%"PRId64", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
           pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
2283
    pRuntimeEnv->summary.discardBlocks += 1;
2284 2285 2286 2287
  } else if ((*status) == BLK_DATA_STATIS_NEEDED) {

    // this function never returns error?
    tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis);
2288

2289
    pRuntimeEnv->summary.loadBlockStatis += 1;
2290

2291
    if (*pStatis == NULL) { // data block statistics does not exist, load data block
H
Haojun Liao 已提交
2292
      *pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2293
      pRuntimeEnv->summary.totalCheckedRows += pBlockInfo->rows;
2294 2295
    }
  } else {
2296
    assert((*status) == BLK_DATA_ALL_NEEDED);
2297

2298
    // load the data block statistics to perform further filter
H
Haojun Liao 已提交
2299
    pRuntimeEnv->summary.loadBlockStatis += 1;
2300
    tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis);
2301

H
Haojun Liao 已提交
2302
    if (!needToLoadDataBlock(pRuntimeEnv, *pStatis, pRuntimeEnv->pCtx, pBlockInfo->rows)) {
2303 2304
      // current block has been discard due to filter applied
      pRuntimeEnv->summary.discardBlocks += 1;
H
Haojun Liao 已提交
2305 2306
      qDebug("QInfo:%p data block discard, brange:%"PRId64 "-%"PRId64", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
          pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
2307
      (*status) = BLK_DATA_DISCARD;
2308
    }
2309

2310
    pRuntimeEnv->summary.totalCheckedRows += pBlockInfo->rows;
H
Haojun Liao 已提交
2311
    pRuntimeEnv->summary.loadBlocks += 1;
H
Haojun Liao 已提交
2312
    *pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2313 2314 2315
    if (*pDataBlock == NULL) {
      return terrno;
    }
2316
  }
2317

H
Haojun Liao 已提交
2318
  return TSDB_CODE_SUCCESS;
2319 2320
}

H
hjxilinx 已提交
2321
int32_t binarySearchForKey(char *pValue, int num, TSKEY key, int order) {
2322
  int32_t midPos = -1;
H
Haojun Liao 已提交
2323
  int32_t numOfRows;
2324

2325 2326 2327
  if (num <= 0) {
    return -1;
  }
2328

2329
  assert(order == TSDB_ORDER_ASC || order == TSDB_ORDER_DESC);
2330 2331

  TSKEY * keyList = (TSKEY *)pValue;
2332
  int32_t firstPos = 0;
2333
  int32_t lastPos = num - 1;
2334

2335
  if (order == TSDB_ORDER_DESC) {
H
hjxilinx 已提交
2336 2337 2338 2339 2340
    // find the first position which is smaller than the key
    while (1) {
      if (key >= keyList[lastPos]) return lastPos;
      if (key == keyList[firstPos]) return firstPos;
      if (key < keyList[firstPos]) return firstPos - 1;
2341

H
Haojun Liao 已提交
2342 2343
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2344

H
hjxilinx 已提交
2345 2346 2347 2348 2349 2350 2351 2352
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
2353

H
hjxilinx 已提交
2354 2355 2356 2357 2358
  } else {
    // find the first position which is bigger than the key
    while (1) {
      if (key <= keyList[firstPos]) return firstPos;
      if (key == keyList[lastPos]) return lastPos;
2359

H
hjxilinx 已提交
2360 2361 2362 2363 2364 2365 2366
      if (key > keyList[lastPos]) {
        lastPos = lastPos + 1;
        if (lastPos >= num)
          return -1;
        else
          return lastPos;
      }
2367

H
Haojun Liao 已提交
2368 2369
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2370

H
hjxilinx 已提交
2371 2372 2373 2374 2375 2376 2377 2378 2379
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
  }
2380

H
hjxilinx 已提交
2381 2382 2383
  return midPos;
}

2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396
static void ensureOutputBufferSimple(SQueryRuntimeEnv* pRuntimeEnv, int32_t capacity) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  if (capacity < pQuery->rec.capacity) {
    return;
  }

  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    int32_t bytes = pQuery->pSelectExpr[i].bytes;
    assert(bytes > 0 && capacity > 0);

    char *tmp = realloc(pQuery->sdata[i], bytes * capacity + sizeof(tFilePage));
    if (tmp == NULL) {  // todo handle the oom
H
Haojun Liao 已提交
2397
      longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
2398 2399 2400 2401 2402 2403 2404 2405
    } else {
      pQuery->sdata[i] = (tFilePage *)tmp;
    }

    // set the pCtx output buffer position
    pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data;
  }

2406
  qDebug("QInfo:%p realloc output buffer to inc output buffer from: %" PRId64 " rows to:%d rows", GET_QINFO_ADDR(pRuntimeEnv),
2407 2408 2409 2410 2411
         pQuery->rec.capacity, capacity);

  pQuery->rec.capacity = capacity;
}

2412 2413 2414
static void ensureOutputBuffer(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pBlockInfo) {
  // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
  SQuery* pQuery = pRuntimeEnv->pQuery;
2415
  if (!QUERY_IS_INTERVAL_QUERY(pQuery) && !pRuntimeEnv->groupbyNormalCol && !isFixedOutputQuery(pRuntimeEnv) && !isTSCompQuery(pQuery)) {
2416
    SResultRec *pRec = &pQuery->rec;
2417

2418
    if (pQuery->rec.capacity - pQuery->rec.rows < pBlockInfo->rows) {
S
TD-1057  
Shengliang Guan 已提交
2419 2420
      int32_t remain = (int32_t)(pRec->capacity - pRec->rows);
      int32_t newSize = (int32_t)(pRec->capacity + (pBlockInfo->rows - remain));
2421

2422 2423
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        int32_t bytes = pQuery->pSelectExpr[i].bytes;
H
Haojun Liao 已提交
2424 2425
        assert(bytes > 0 && newSize > 0);

2426 2427
        char *tmp = realloc(pQuery->sdata[i], bytes * newSize + sizeof(tFilePage));
        if (tmp == NULL) {  // todo handle the oom
H
Haojun Liao 已提交
2428
          longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
2429
        } else {
2430
          memset(tmp + sizeof(tFilePage) + bytes * pRec->rows, 0, (size_t)((newSize - pRec->rows) * bytes));
2431 2432
          pQuery->sdata[i] = (tFilePage *)tmp;
        }
H
Haojun Liao 已提交
2433

2434 2435
        // set the pCtx output buffer position
        pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data + pRec->rows * bytes;
H
Haojun Liao 已提交
2436

2437 2438 2439 2440 2441
        int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
        if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
          pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
        }
      }
H
Haojun Liao 已提交
2442

2443
      qDebug("QInfo:%p realloc output buffer, new size: %d rows, old:%" PRId64 ", remain:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv),
2444
             newSize, pRec->capacity, newSize - pRec->rows);
2445

2446 2447 2448 2449 2450
      pRec->capacity = newSize;
    }
  }
}

2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471
static void doSetInitialTimewindow(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pBlockInfo) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  if (QUERY_IS_INTERVAL_QUERY(pQuery) && pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL) {
    STimeWindow w = TSWINDOW_INITIALIZER;
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;

    if (QUERY_IS_ASC_QUERY(pQuery)) {
      getAlignQueryTimeWindow(pQuery, pBlockInfo->window.skey, pBlockInfo->window.skey, pQuery->window.ekey, &w);
      pWindowResInfo->startTime = w.skey;
      pWindowResInfo->prevSKey = w.skey;
    } else {
      // the start position of the first time window in the endpoint that spreads beyond the queried last timestamp
      getAlignQueryTimeWindow(pQuery, pBlockInfo->window.ekey, pQuery->window.ekey, pBlockInfo->window.ekey, &w);

      pWindowResInfo->startTime = pQuery->window.skey;
      pWindowResInfo->prevSKey = w.skey;
    }
  }
}

2472 2473
static int64_t doScanAllDataBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
2474
  STableQueryInfo* pTableQueryInfo = pQuery->current;
H
Haojun Liao 已提交
2475
  SQueryCostInfo*  summary  = &pRuntimeEnv->summary;
2476

2477
  qDebug("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", lastkey:%" PRId64 ", order:%d",
H
hjxilinx 已提交
2478 2479
         GET_QINFO_ADDR(pRuntimeEnv), pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, pTableQueryInfo->lastKey,
         pQuery->order.order);
2480

2481
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
H
Haojun Liao 已提交
2482
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
2483

H
Haojun Liao 已提交
2484
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
2485
  while (tsdbNextDataBlock(pQueryHandle)) {
H
Haojun Liao 已提交
2486
    summary->totalBlocks += 1;
H
Haojun Liao 已提交
2487

H
Haojun Liao 已提交
2488
    if (IS_QUERY_KILLED(GET_QINFO_ADDR(pRuntimeEnv))) {
H
Haojun Liao 已提交
2489
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
2490
    }
2491

H
Haojun Liao 已提交
2492
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
2493
    doSetInitialTimewindow(pRuntimeEnv, &blockInfo);
2494

H
hjxilinx 已提交
2495
    // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
2496
    ensureOutputBuffer(pRuntimeEnv, &blockInfo);
2497

2498
    SDataStatis *pStatis = NULL;
2499 2500
    SArray *     pDataBlock = NULL;
    uint32_t     status = 0;
2501

H
Haojun Liao 已提交
2502
    int32_t ret = loadDataBlockOnDemand(pRuntimeEnv, &pRuntimeEnv->windowResInfo, pQueryHandle, &blockInfo, &pStatis, &pDataBlock, &status);
2503
    if (ret != TSDB_CODE_SUCCESS) {
2504 2505 2506
      break;
    }

2507 2508 2509 2510 2511 2512
    if (status == BLK_DATA_DISCARD) {
      pQuery->current->lastKey =
              QUERY_IS_ASC_QUERY(pQuery) ? blockInfo.window.ekey + step : blockInfo.window.skey + step;
      continue;
    }

H
Haojun Liao 已提交
2513 2514
    // query start position can not move into tableApplyFunctionsOnBlock due to limit/offset condition
    pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : blockInfo.rows - 1;
H
hjxilinx 已提交
2515
    int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, binarySearchForKey, pDataBlock);
2516

H
Haojun Liao 已提交
2517
    summary->totalRows += blockInfo.rows;
2518
    qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
2519
           blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes, pQuery->current->lastKey);
2520

2521 2522
    // while the output buffer is full or limit/offset is applied, query may be paused here
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL | QUERY_COMPLETED)) {
H
hjxilinx 已提交
2523
      break;
2524 2525
    }
  }
2526

H
Haojun Liao 已提交
2527 2528 2529 2530
  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }

H
hjxilinx 已提交
2531
  // if the result buffer is not full, set the query complete
2532 2533 2534
  if (!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
2535

H
Haojun Liao 已提交
2536
  if (QUERY_IS_INTERVAL_QUERY(pQuery) && IS_MASTER_SCAN(pRuntimeEnv)) {
H
hjxilinx 已提交
2537
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
2538
      closeAllTimeWindow(&pRuntimeEnv->windowResInfo);
H
hjxilinx 已提交
2539
      pRuntimeEnv->windowResInfo.curIndex = pRuntimeEnv->windowResInfo.size - 1;  // point to the last time window
2540 2541 2542 2543
    } else {
      assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
    }
  }
2544

2545
  return 0;
2546 2547 2548 2549 2550 2551
}

/*
 * set tag value in SQLFunctionCtx
 * e.g.,tag information into input buffer
 */
2552
static void doSetTagValueInParam(void *tsdb, void* pTable, int32_t tagColId, tVariant *tag, int16_t type, int16_t bytes) {
H
[td-90]  
Haojun Liao 已提交
2553
  tVariantDestroy(tag);
2554

2555
  if (tagColId == TSDB_TBNAME_COLUMN_INDEX) {
2556
    char* val = tsdbGetTableName(pTable);
H
[td-90]  
Haojun Liao 已提交
2557
    assert(val != NULL);
2558

H
[td-90]  
Haojun Liao 已提交
2559
    tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), TSDB_DATA_TYPE_BINARY);
2560
  } else {
2561
    char* val = tsdbGetTableTagVal(pTable, tagColId, type, bytes);
H
[td-90]  
Haojun Liao 已提交
2562 2563 2564 2565
    if (val == NULL) {
      tag->nType = TSDB_DATA_TYPE_NULL;
      return;
    }
2566

H
hjxilinx 已提交
2567
    if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
H
Hongze Cheng 已提交
2568
      if (isNull(val, type)) {
H
Haojun Liao 已提交
2569 2570 2571 2572
        tag->nType = TSDB_DATA_TYPE_NULL;
        return;
      }

H
[td-90]  
Haojun Liao 已提交
2573
      tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), type);
H
hjxilinx 已提交
2574
    } else {
H
Haojun Liao 已提交
2575 2576 2577 2578 2579
      if (isNull(val, type)) {
        tag->nType = TSDB_DATA_TYPE_NULL;
        return;
      }

H
[td-90]  
Haojun Liao 已提交
2580
      tVariantCreateFromBinary(tag, val, bytes, type);
H
hjxilinx 已提交
2581
    }
2582
  }
2583 2584
}

2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596
static SColumnInfo* doGetTagColumnInfoById(SColumnInfo* pTagColList, int32_t numOfTags, int16_t colId) {
  assert(pTagColList != NULL && numOfTags > 0);

  for(int32_t i = 0; i < numOfTags; ++i) {
    if (pTagColList[i].colId == colId) {
      return &pTagColList[i];
    }
  }

  return NULL;
}

2597
void setTagVal(SQueryRuntimeEnv *pRuntimeEnv, void *pTable, void *tsdb) {
2598
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
2599
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
2600

H
[td-90]  
Haojun Liao 已提交
2601 2602 2603
  SExprInfo *pExprInfo = &pQuery->pSelectExpr[0];
  if (pQuery->numOfOutput == 1 && pExprInfo->base.functionId == TSDB_FUNC_TS_COMP) {
    assert(pExprInfo->base.numOfParams == 1);
H
Haojun Liao 已提交
2604

S
TD-1057  
Shengliang Guan 已提交
2605
    int16_t tagColId = (int16_t)pExprInfo->base.arg->argValue.i64;
2606
    SColumnInfo* pColInfo = doGetTagColumnInfoById(pQuery->tagColList, pQuery->numOfTags, tagColId);
H
Haojun Liao 已提交
2607

2608
    doSetTagValueInParam(tsdb, pTable, tagColId, &pRuntimeEnv->pCtx[0].tag, pColInfo->type, pColInfo->bytes);
2609 2610
  } else {
    // set tag value, by which the results are aggregated.
2611
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
H
Haojun Liao 已提交
2612
      SExprInfo* pLocalExprInfo = &pQuery->pSelectExpr[idx];
2613

2614
      // ts_comp column required the tag value for join filter
H
Haojun Liao 已提交
2615
      if (!TSDB_COL_IS_TAG(pLocalExprInfo->base.colInfo.flag)) {
2616 2617
        continue;
      }
2618

2619
      // todo use tag column index to optimize performance
2620
      doSetTagValueInParam(tsdb, pTable, pLocalExprInfo->base.colInfo.colId, &pRuntimeEnv->pCtx[idx].tag,
H
Haojun Liao 已提交
2621
                           pLocalExprInfo->type, pLocalExprInfo->bytes);
2622
    }
2623

2624
    // set the join tag for first column
H
[td-90]  
Haojun Liao 已提交
2625
    SSqlFuncMsg *pFuncMsg = &pExprInfo->base;
2626 2627
    if ((pFuncMsg->functionId == TSDB_FUNC_TS || pFuncMsg->functionId == TSDB_FUNC_PRJ) && pRuntimeEnv->pTSBuf != NULL &&
        pFuncMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
2628
      assert(pFuncMsg->numOfParams == 1);
H
Haojun Liao 已提交
2629

S
TD-1057  
Shengliang Guan 已提交
2630
      int16_t tagColId = (int16_t)pExprInfo->base.arg->argValue.i64;
2631
      SColumnInfo* pColInfo = doGetTagColumnInfoById(pQuery->tagColList, pQuery->numOfTags, tagColId);
H
Haojun Liao 已提交
2632

2633
      doSetTagValueInParam(tsdb, pTable, tagColId, &pRuntimeEnv->pCtx[0].tag, pColInfo->type, pColInfo->bytes);
H
Haojun Liao 已提交
2634 2635 2636 2637

    if (pRuntimeEnv->pCtx[0].tag.nType == TSDB_DATA_TYPE_BINARY || pRuntimeEnv->pCtx[0].tag.nType == TSDB_DATA_TYPE_NCHAR) {}
      qDebug("QInfo:%p set tag value for join comparison, colId:%" PRId64 ", val:%s", pQInfo, pExprInfo->base.arg->argValue.i64,
             pRuntimeEnv->pCtx[0].tag.pz);
H
Haojun Liao 已提交
2638 2639 2640
    } else {
      qDebug("QInfo:%p set tag value for join comparison, colId:%" PRId64 ", val:%"PRId64, pQInfo, pExprInfo->base.arg->argValue.i64,
          pRuntimeEnv->pCtx[0].tag.i64Key);
2641 2642 2643 2644 2645 2646 2647
    }
  }
}

static void doMerge(SQueryRuntimeEnv *pRuntimeEnv, int64_t timestamp, SWindowResult *pWindowRes, bool mergeFlag) {
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
2648

H
Haojun Liao 已提交
2649 2650
  tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes->pos.pageId);

2651
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2652
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2653 2654 2655
    if (!mergeFlag) {
      pCtx[i].aOutputBuf = pCtx[i].aOutputBuf + pCtx[i].outputBytes;
      pCtx[i].currentStage = FIRST_STAGE_MERGE;
2656

2657
      RESET_RESULT_INFO(pCtx[i].resultInfo);
2658 2659
      aAggs[functionId].init(&pCtx[i]);
    }
2660

2661 2662
    pCtx[i].hasNull = true;
    pCtx[i].nStartQueryTimestamp = timestamp;
H
Haojun Liao 已提交
2663
    pCtx[i].aInputElemBuf = getPosInResultPage(pRuntimeEnv, i, pWindowRes, page);
2664

2665 2666 2667
    // in case of tag column, the tag information should be extracted from input buffer
    if (functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TAG) {
      tVariantDestroy(&pCtx[i].tag);
2668

2669 2670 2671 2672 2673 2674
      int32_t type = pCtx[i].outputType;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        tVariantCreateFromBinary(&pCtx[i].tag, varDataVal(pCtx[i].aInputElemBuf), varDataLen(pCtx[i].aInputElemBuf), type);
      } else {
        tVariantCreateFromBinary(&pCtx[i].tag, pCtx[i].aInputElemBuf, pCtx[i].inputBytes, pCtx[i].inputType);
      }
2675

2676 2677
    }
  }
2678

2679
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2680
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2681 2682 2683
    if (functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
2684

2685 2686 2687 2688
    aAggs[functionId].distMergeFunc(&pCtx[i]);
  }
}

2689
static UNUSED_FUNC void printBinaryData(int32_t functionId, char *data, int32_t srcDataType) {
2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_LAST_DST) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_BINARY:
        printf("%" PRId64 ",%s\t", *(TSKEY *)data, (data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int8_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int16_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 ",%" PRId64 "\t", *(TSKEY *)data, *(TSKEY *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int32_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%" PRId64 ",%f\t", *(TSKEY *)data, *(float *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%" PRId64 ",%lf\t", *(TSKEY *)data, *(double *)(data + TSDB_KEYSIZE + 1));
        break;
    }
  } else if (functionId == TSDB_FUNC_AVG) {
    printf("%lf,%d\t", *(double *)data, *(int32_t *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_SPREAD) {
    printf("%lf,%lf\t", *(double *)data, *(double *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_TWA) {
    data += 1;
    printf("%lf,%" PRId64 ",%" PRId64 ",%" PRId64 "\t", *(double *)data, *(int64_t *)(data + 8),
           *(int64_t *)(data + 16), *(int64_t *)(data + 24));
  } else if (functionId == TSDB_FUNC_MIN || functionId == TSDB_FUNC_MAX) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%d\t", *(int8_t *)data);
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%d\t", *(int16_t *)data);
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 "\t", *(int64_t *)data);
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%d\t", *(int *)data);
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%f\t", *(float *)data);
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%f\t", *(float *)data);
        break;
    }
  } else if (functionId == TSDB_FUNC_SUM) {
    if (srcDataType == TSDB_DATA_TYPE_FLOAT || srcDataType == TSDB_DATA_TYPE_DOUBLE) {
      printf("%lf\t", *(float *)data);
    } else {
      printf("%" PRId64 "\t", *(int64_t *)data);
    }
  } else {
    printf("%s\t", data);
  }
}

2758
void UNUSED_FUNC displayInterResult(tFilePage **pdata, SQueryRuntimeEnv* pRuntimeEnv, int32_t numOfRows) {
2759
  SQuery* pQuery = pRuntimeEnv->pQuery;
2760
  int32_t numOfCols = pQuery->numOfOutput;
2761
  printf("super table query intermediate result, total:%d\n", numOfRows);
2762

2763 2764
  for (int32_t j = 0; j < numOfRows; ++j) {
    for (int32_t i = 0; i < numOfCols; ++i) {
2765

2766
      switch (pQuery->pSelectExpr[i].type) {
2767
        case TSDB_DATA_TYPE_BINARY: {
2768
          int32_t type = pQuery->pSelectExpr[i].type;
2769
          printBinaryData(pQuery->pSelectExpr[i].base.functionId, pdata[i]->data + pQuery->pSelectExpr[i].bytes * j,
2770 2771 2772 2773 2774
                          type);
          break;
        }
        case TSDB_DATA_TYPE_TIMESTAMP:
        case TSDB_DATA_TYPE_BIGINT:
2775
          printf("%" PRId64 "\t", *(int64_t *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2776 2777
          break;
        case TSDB_DATA_TYPE_INT:
2778
          printf("%d\t", *(int32_t *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2779 2780
          break;
        case TSDB_DATA_TYPE_FLOAT:
2781
          printf("%f\t", *(float *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2782 2783
          break;
        case TSDB_DATA_TYPE_DOUBLE:
2784
          printf("%lf\t", *(double *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2785 2786 2787 2788 2789 2790 2791 2792
          break;
      }
    }
    printf("\n");
  }
}

typedef struct SCompSupporter {
H
hjxilinx 已提交
2793 2794 2795
  STableQueryInfo **pTableQueryInfo;
  int32_t *         position;
  SQInfo *          pQInfo;
2796 2797 2798 2799 2800
} SCompSupporter;

int32_t tableResultComparFn(const void *pLeft, const void *pRight, void *param) {
  int32_t left = *(int32_t *)pLeft;
  int32_t right = *(int32_t *)pRight;
2801

2802 2803
  SCompSupporter *  supporter = (SCompSupporter *)param;
  SQueryRuntimeEnv *pRuntimeEnv = &supporter->pQInfo->runtimeEnv;
2804

2805 2806
  int32_t leftPos = supporter->position[left];
  int32_t rightPos = supporter->position[right];
2807

2808 2809 2810 2811
  /* left source is exhausted */
  if (leftPos == -1) {
    return 1;
  }
2812

2813 2814 2815 2816
  /* right source is exhausted*/
  if (rightPos == -1) {
    return -1;
  }
2817

H
hjxilinx 已提交
2818
  SWindowResInfo *pWindowResInfo1 = &supporter->pTableQueryInfo[left]->windowResInfo;
2819
  SWindowResult * pWindowRes1 = getWindowResult(pWindowResInfo1, leftPos);
H
Haojun Liao 已提交
2820
  tFilePage *page1 = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes1->pos.pageId);
2821

H
Haojun Liao 已提交
2822
  char *b1 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes1, page1);
2823
  TSKEY leftTimestamp = GET_INT64_VAL(b1);
2824

H
hjxilinx 已提交
2825
  SWindowResInfo *pWindowResInfo2 = &supporter->pTableQueryInfo[right]->windowResInfo;
2826
  SWindowResult * pWindowRes2 = getWindowResult(pWindowResInfo2, rightPos);
H
Haojun Liao 已提交
2827
  tFilePage *page2 = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes2->pos.pageId);
2828

H
Haojun Liao 已提交
2829
  char *b2 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes2, page2);
2830
  TSKEY rightTimestamp = GET_INT64_VAL(b2);
2831

2832 2833 2834
  if (leftTimestamp == rightTimestamp) {
    return 0;
  }
2835

2836 2837 2838
  return leftTimestamp > rightTimestamp ? 1 : -1;
}

2839
int32_t mergeIntoGroupResult(SQInfo *pQInfo) {
H
Haojun Liao 已提交
2840
  int64_t st = taosGetTimestampUs();
2841
  int32_t ret = TSDB_CODE_SUCCESS;
2842

S
TD-1057  
Shengliang Guan 已提交
2843
  int32_t numOfGroups = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
2844

2845
  while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
2846
    SArray *group = GET_TABLEGROUP(pQInfo, pQInfo->groupIndex);
2847
    ret = mergeIntoGroupResultImpl(pQInfo, group);
2848 2849 2850 2851
    if (ret < 0) {  // not enough disk space to save the data into disk
      return -1;
    }

2852
    pQInfo->groupIndex += 1;
2853 2854

    // this group generates at least one result, return results
2855 2856 2857
    if (ret > 0) {
      break;
    }
2858

H
Haojun Liao 已提交
2859
    assert(pQInfo->groupResInfo.numOfDataPages == 0);
2860
    qDebug("QInfo:%p no result in group %d, continue", pQInfo, pQInfo->groupIndex - 1);
2861
  }
2862

H
Haojun Liao 已提交
2863 2864
  SGroupResInfo* info = &pQInfo->groupResInfo;
  if (pQInfo->groupIndex == numOfGroups && info->pos.pageId == info->numOfDataPages) {
H
Haojun Liao 已提交
2865 2866 2867
    SET_STABLE_QUERY_OVER(pQInfo);
  }

H
Haojun Liao 已提交
2868 2869 2870
  int64_t elapsedTime = taosGetTimestampUs() - st;
  qDebug("QInfo:%p merge res data into group, index:%d, total group:%d, elapsed time:%" PRId64 "us", pQInfo,
         pQInfo->groupIndex - 1, numOfGroups, elapsedTime);
2871

H
Haojun Liao 已提交
2872
  pQInfo->runtimeEnv.summary.firstStageMergeTime += elapsedTime;
2873 2874 2875 2876
  return TSDB_CODE_SUCCESS;
}

void copyResToQueryResultBuf(SQInfo *pQInfo, SQuery *pQuery) {
H
Haojun Liao 已提交
2877 2878 2879 2880 2881
  SGroupResInfo* pGroupResInfo = &pQInfo->groupResInfo;

  // all results have been return to client, try next group
  if (pGroupResInfo->pos.pageId == pGroupResInfo->numOfDataPages) {
    pGroupResInfo->numOfDataPages = 0;
H
Haojun Liao 已提交
2882
    pGroupResInfo->pos.pageId = 0;
H
Haojun Liao 已提交
2883
    pGroupResInfo->pos.rowId = 0;
2884

2885
    // current results of group has been sent to client, try next group
2886
    if (mergeIntoGroupResult(pQInfo) != TSDB_CODE_SUCCESS) {
2887 2888
      return;  // failed to save data in the disk
    }
2889

2890
    // check if all results has been sent to client
S
TD-1057  
Shengliang Guan 已提交
2891
    int32_t numOfGroup = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
H
Haojun Liao 已提交
2892
    if (pGroupResInfo->numOfDataPages == 0 && pQInfo->groupIndex == numOfGroup) {
H
Haojun Liao 已提交
2893
      SET_STABLE_QUERY_OVER(pQInfo);
2894 2895
      return;
    }
2896
  }
2897 2898

  SQueryRuntimeEnv *   pRuntimeEnv = &pQInfo->runtimeEnv;
2899
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
2900

H
Haojun Liao 已提交
2901 2902
  int32_t id = pQInfo->groupResInfo.groupId;
  SIDList list = getDataBufPagesIdList(pResultBuf, id);
2903

2904
  int32_t offset = 0;
H
Haojun Liao 已提交
2905 2906 2907 2908 2909 2910 2911
  int32_t numOfCopiedRows = 0;

  size_t size = taosArrayGetSize(list);
  assert(size == pGroupResInfo->numOfDataPages);

  bool done = false;
  for (int32_t j = pGroupResInfo->pos.pageId; j < size; ++j) {
H
Haojun Liao 已提交
2912
    SPageInfo* pi = *(SPageInfo**) taosArrayGet(list, j);
H
Haojun Liao 已提交
2913 2914 2915
    tFilePage* pData = getResBufPage(pResultBuf, pi->pageId);

    assert(pData->num > 0 && pData->num <= pRuntimeEnv->numOfRowsPerPage && pGroupResInfo->pos.rowId < pData->num);
2916
    int32_t numOfRes = (int32_t)(pData->num - pGroupResInfo->pos.rowId);
H
Haojun Liao 已提交
2917 2918

    if (numOfRes > pQuery->rec.capacity - offset) {
2919
      numOfCopiedRows = (int32_t)(pQuery->rec.capacity - offset);
H
Haojun Liao 已提交
2920 2921 2922
      pGroupResInfo->pos.rowId += numOfCopiedRows;
      done = true;
    } else {
2923
      numOfCopiedRows = (int32_t)pData->num;
H
Haojun Liao 已提交
2924 2925 2926 2927

      pGroupResInfo->pos.pageId += 1;
      pGroupResInfo->pos.rowId = 0;
    }
2928

2929
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2930
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
2931
      char *  pDest = pQuery->sdata[i]->data;
2932

H
Haojun Liao 已提交
2933 2934
      memcpy(pDest + offset * bytes, pData->data + pRuntimeEnv->offset[i] * pRuntimeEnv->numOfRowsPerPage,
          (size_t)bytes * numOfCopiedRows);
2935
    }
2936

H
Haojun Liao 已提交
2937 2938 2939 2940
    offset += numOfCopiedRows;
    if (done) {
      break;
    }
2941
  }
2942

2943
  assert(pQuery->rec.rows == 0);
H
Haojun Liao 已提交
2944
  pQuery->rec.rows += offset;
2945 2946
}

H
Haojun Liao 已提交
2947
int64_t getNumOfResultWindowRes(SQuery *pQuery, SWindowResult *pWindowRes) {
2948
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
2949
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
2950

2951 2952 2953 2954 2955 2956 2957
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
      continue;
    }
2958

2959
    SResultInfo *pResultInfo = &pWindowRes->resultInfo[j];
H
Haojun Liao 已提交
2960
    assert(pResultInfo != NULL);
2961

H
Haojun Liao 已提交
2962 2963
    if (pResultInfo->numOfRes > 0) {
      return pResultInfo->numOfRes;
2964 2965
    }
  }
2966

H
Haojun Liao 已提交
2967
  return 0;
2968 2969
}

2970
int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *pGroup) {
2971
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
2972
  SQuery *          pQuery = pRuntimeEnv->pQuery;
2973

2974
  size_t size = taosArrayGetSize(pGroup);
2975
  tFilePage **buffer = pQuery->sdata;
2976

H
Haojun Liao 已提交
2977
  int32_t *posList = calloc(size, sizeof(int32_t));
H
hjxilinx 已提交
2978
  STableQueryInfo **pTableList = malloc(POINTER_BYTES * size);
2979

2980
  if (pTableList == NULL || posList == NULL) {
S
Shengliang Guan 已提交
2981 2982
    taosTFree(posList);
    taosTFree(pTableList);
2983 2984

    qError("QInfo:%p failed alloc memory", pQInfo);
H
Haojun Liao 已提交
2985
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
2986 2987
  }

2988
  // todo opt for the case of one table per group
2989
  int32_t numOfTables = 0;
H
Haojun Liao 已提交
2990 2991 2992
  SIDList pageList = NULL;
  int32_t tid = -1;

2993
  for (int32_t i = 0; i < size; ++i) {
2994
    STableQueryInfo *item = taosArrayGetP(pGroup, i);
2995

H
Haojun Liao 已提交
2996
    SIDList list = getDataBufPagesIdList(pRuntimeEnv->pResultBuf, TSDB_TABLEID(item->pTable)->tid);
H
Haojun Liao 已提交
2997
    if (taosArrayGetSize(list) > 0 && item->windowResInfo.size > 0) {
H
Haojun Liao 已提交
2998
      pTableList[numOfTables++] = item;
2999 3000
      tid = TSDB_TABLEID(item->pTable)->tid;
      pageList = list;
3001 3002
    }
  }
3003

H
Haojun Liao 已提交
3004
  // there is no data in current group
3005
  if (numOfTables == 0) {
S
Shengliang Guan 已提交
3006 3007
    taosTFree(posList);
    taosTFree(pTableList);
3008
    return 0;
H
Haojun Liao 已提交
3009
  } else if (numOfTables == 1) { // no need to merge results since only one table in each group
H
Haojun Liao 已提交
3010 3011 3012 3013 3014
    taosTFree(posList);
    taosTFree(pTableList);

    SGroupResInfo* pGroupResInfo = &pQInfo->groupResInfo;

3015
    pGroupResInfo->numOfDataPages = (int32_t)taosArrayGetSize(pageList);
H
Haojun Liao 已提交
3016 3017 3018 3019 3020
    pGroupResInfo->groupId = tid;
    pGroupResInfo->pos.pageId = 0;
    pGroupResInfo->pos.rowId = 0;

    return pGroupResInfo->numOfDataPages;
3021
  }
3022

3023
  SCompSupporter cs = {pTableList, posList, pQInfo};
3024

3025
  SLoserTreeInfo *pTree = NULL;
3026
  tLoserTreeCreate(&pTree, numOfTables, &cs, tableResultComparFn);
3027

3028
  SResultInfo *pResultInfo = calloc(pQuery->numOfOutput, sizeof(SResultInfo));
H
Haojun Liao 已提交
3029 3030 3031 3032
  if (pResultInfo == NULL) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }

H
Haojun Liao 已提交
3033
  char* buf = calloc(1, pRuntimeEnv->interBufSize);
H
Haojun Liao 已提交
3034 3035 3036 3037
  if (buf == NULL) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }

H
Haojun Liao 已提交
3038
  setWindowResultInfo(pResultInfo, pQuery, pRuntimeEnv->stableQuery, buf);
3039
  resetMergeResultBuf(pQuery, pRuntimeEnv->pCtx, pResultInfo);
3040

H
Haojun Liao 已提交
3041 3042
  pQInfo->groupResInfo.groupId = getGroupResultId(pQInfo->groupIndex);

H
Haojun Liao 已提交
3043
  // todo add windowRes iterator
3044 3045
  int64_t lastTimestamp = -1;
  int64_t startt = taosGetTimestampMs();
3046

3047
  while (1) {
3048 3049
    if (IS_QUERY_KILLED(pQInfo)) {
      qDebug("QInfo:%p it is already killed, abort", pQInfo);
H
Haojun Liao 已提交
3050 3051 3052 3053 3054 3055 3056

      taosTFree(pTableList);
      taosTFree(posList);
      taosTFree(pTree);
      taosTFree(pResultInfo);
      taosTFree(buf);

3057 3058 3059
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
    }

3060
    int32_t pos = pTree->pNode[0].index;
3061

H
hjxilinx 已提交
3062
    SWindowResInfo *pWindowResInfo = &pTableList[pos]->windowResInfo;
H
Haojun Liao 已提交
3063
    SWindowResult  *pWindowRes = getWindowResult(pWindowResInfo, cs.position[pos]);
H
Haojun Liao 已提交
3064
    tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes->pos.pageId);
3065

H
Haojun Liao 已提交
3066
    char *b = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes, page);
3067
    TSKEY ts = GET_INT64_VAL(b);
3068

3069
    assert(ts == pWindowRes->win.skey);
H
Haojun Liao 已提交
3070
    int64_t num = getNumOfResultWindowRes(pQuery, pWindowRes);
3071 3072
    if (num <= 0) {
      cs.position[pos] += 1;
3073

3074 3075
      if (cs.position[pos] >= pWindowResInfo->size) {
        cs.position[pos] = -1;
3076

3077
        // all input sources are exhausted
3078
        if (--numOfTables == 0) {
3079 3080 3081 3082 3083 3084 3085
          break;
        }
      }
    } else {
      if (ts == lastTimestamp) {  // merge with the last one
        doMerge(pRuntimeEnv, ts, pWindowRes, true);
      } else {  // copy data to disk buffer
3086
        if (buffer[0]->num == pQuery->rec.capacity) {
H
Haojun Liao 已提交
3087
          if (flushFromResultBuf(pRuntimeEnv, &pQInfo->groupResInfo) != TSDB_CODE_SUCCESS) {
3088 3089
            return -1;
          }
3090

3091 3092
          resetMergeResultBuf(pQuery, pRuntimeEnv->pCtx, pResultInfo);
        }
3093

3094
        doMerge(pRuntimeEnv, ts, pWindowRes, false);
3095
        buffer[0]->num += 1;
3096
      }
3097

3098
      lastTimestamp = ts;
3099

H
Haojun Liao 已提交
3100 3101 3102
      // move to the next element of current entry
      int32_t currentPageId = pWindowRes->pos.pageId;

3103 3104 3105
      cs.position[pos] += 1;
      if (cs.position[pos] >= pWindowResInfo->size) {
        cs.position[pos] = -1;
3106

3107
        // all input sources are exhausted
3108
        if (--numOfTables == 0) {
3109 3110
          break;
        }
H
Haojun Liao 已提交
3111 3112 3113 3114 3115 3116
      } else {
        // current page is not needed anymore
        SWindowResult  *pNextWindowRes = getWindowResult(pWindowResInfo, cs.position[pos]);
        if (pNextWindowRes->pos.pageId != currentPageId) {
          releaseResBufPage(pRuntimeEnv->pResultBuf, page);
        }
3117 3118
      }
    }
3119

3120 3121
    tLoserTreeAdjust(pTree, pos + pTree->numOfEntries);
  }
3122

3123
  if (buffer[0]->num != 0) {  // there are data in buffer
H
Haojun Liao 已提交
3124
    if (flushFromResultBuf(pRuntimeEnv, &pQInfo->groupResInfo) != TSDB_CODE_SUCCESS) {
S
slguan 已提交
3125
      qError("QInfo:%p failed to flush data into temp file, abort query", pQInfo);
3126

S
Shengliang Guan 已提交
3127 3128 3129 3130
      taosTFree(pTree);
      taosTFree(pTableList);
      taosTFree(posList);
      taosTFree(pResultInfo);
3131

3132 3133 3134
      return -1;
    }
  }
3135

3136 3137 3138
  int64_t endt = taosGetTimestampMs();

#ifdef _DEBUG_VIEW
3139
  displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
3140
#endif
3141

3142
  qDebug("QInfo:%p result merge completed for group:%d, elapsed time:%" PRId64 " ms", pQInfo, pQInfo->groupIndex, endt - startt);
3143

S
Shengliang Guan 已提交
3144 3145 3146
  taosTFree(pTableList);
  taosTFree(posList);
  taosTFree(pTree);
3147

S
Shengliang Guan 已提交
3148 3149
  taosTFree(pResultInfo);
  taosTFree(buf);
H
Haojun Liao 已提交
3150 3151

  return pQInfo->groupResInfo.numOfDataPages;
3152 3153
}

H
Haojun Liao 已提交
3154 3155
int32_t flushFromResultBuf(SQueryRuntimeEnv* pRuntimeEnv, SGroupResInfo* pGroupResInfo) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3156

3157
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
3158

3159 3160
  // the base value for group result, since the maximum number of table for each vnode will not exceed 100,000.
  int32_t pageId = -1;
3161
  int32_t capacity = pResultBuf->numOfRowsPerPage;
3162

H
Haojun Liao 已提交
3163
  int32_t remain = (int32_t) pQuery->sdata[0]->num;
3164
  int32_t offset = 0;
3165

3166
  while (remain > 0) {
H
Haojun Liao 已提交
3167 3168
    int32_t rows = (remain > capacity)? capacity:remain;
    assert(rows > 0);
3169

H
Haojun Liao 已提交
3170 3171 3172
    // get the output buffer page
    tFilePage *buf = getNewDataBuf(pResultBuf, pGroupResInfo->groupId, &pageId);
    buf->num = rows;
3173

H
Haojun Liao 已提交
3174
    // pagewisely copy to dest buffer
3175
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3176
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
3177

H
Haojun Liao 已提交
3178 3179
      char* output = buf->data + pRuntimeEnv->offset[i] * pRuntimeEnv->numOfRowsPerPage;
      char* src = ((char *) pQuery->sdata[i]->data) + offset * bytes;
3180
      memcpy(output, src, (size_t)(buf->num * bytes));
3181
    }
3182

H
Haojun Liao 已提交
3183 3184 3185 3186
    offset += rows;
    remain -= rows;

    pGroupResInfo->numOfDataPages += 1;
3187
  }
3188

3189 3190 3191 3192
  return TSDB_CODE_SUCCESS;
}

void resetMergeResultBuf(SQuery *pQuery, SQLFunctionCtx *pCtx, SResultInfo *pResultInfo) {
3193
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
3194
    pCtx[k].aOutputBuf = pQuery->sdata[k]->data - pCtx[k].outputBytes;
3195 3196 3197
    pCtx[k].size = 1;
    pCtx[k].startOffset = 0;
    pCtx[k].resultInfo = &pResultInfo[k];
3198

3199
    pQuery->sdata[k]->num = 0;
3200 3201 3202
  }
}

3203 3204 3205 3206
static void updateTableQueryInfoForReverseScan(SQuery *pQuery, STableQueryInfo *pTableQueryInfo) {
  if (pTableQueryInfo == NULL) {
    return;
  }
3207

H
Haojun Liao 已提交
3208
  // order has changed already
3209
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
3210

H
Haojun Liao 已提交
3211 3212 3213 3214 3215 3216
  // TODO validate the assertion
//  if (!QUERY_IS_ASC_QUERY(pQuery)) {
//    assert(pTableQueryInfo->win.ekey >= pTableQueryInfo->lastKey + step);
//  } else {
//    assert(pTableQueryInfo->win.ekey <= pTableQueryInfo->lastKey + step);
//  }
H
Haojun Liao 已提交
3217 3218 3219 3220 3221 3222 3223

  if (pTableQueryInfo->lastKey == pTableQueryInfo->win.skey) {
    // do nothing, no results
  } else {
    pTableQueryInfo->win.ekey = pTableQueryInfo->lastKey + step;
  }

3224 3225
  SWAP(pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, TSKEY);
  pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
3226

3227 3228
  SWITCH_ORDER(pTableQueryInfo->cur.order);
  pTableQueryInfo->cur.vgroupIndex = -1;
H
Haojun Liao 已提交
3229 3230 3231

  // set the index at the end of time window
  pTableQueryInfo->windowResInfo.curIndex = pTableQueryInfo->windowResInfo.size - 1;
3232 3233 3234 3235
}

static void disableFuncInReverseScanImpl(SQInfo* pQInfo, SWindowResInfo *pWindowResInfo, int32_t order) {
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
3236

3237
  for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
3238 3239
    bool closed = getTimeWindowResStatus(pWindowResInfo, i);
    if (!closed) {
3240 3241
      continue;
    }
3242

3243
    SWindowResult *buf = getWindowResult(pWindowResInfo, i);
3244

3245
    // open/close the specified query for each group result
3246
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3247
      int32_t functId = pQuery->pSelectExpr[j].base.functionId;
3248

3249 3250
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
3251 3252 3253 3254 3255 3256 3257 3258
        buf->resultInfo[j].complete = false;
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
        buf->resultInfo[j].complete = true;
      }
    }
  }
}

3259 3260
void disableFuncInReverseScan(SQInfo *pQInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
3261
  SQuery *pQuery = pRuntimeEnv->pQuery;
3262
  int32_t order = pQuery->order.order;
3263

3264 3265
  // group by normal columns and interval query on normal table
  SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
3266
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3267
    disableFuncInReverseScanImpl(pQInfo, pWindowResInfo, order);
3268
  } else {  // for simple result of table query,
3269
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {  // todo refactor
3270
      int32_t functId = pQuery->pSelectExpr[j].base.functionId;
3271

3272
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[j];
3273 3274 3275
      if (pCtx->resultInfo == NULL) {
        continue; // resultInfo is NULL, means no data checked in previous scan
      }
3276

3277 3278
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
3279 3280 3281 3282 3283 3284
        pCtx->resultInfo->complete = false;
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
        pCtx->resultInfo->complete = true;
      }
    }
  }
H
Haojun Liao 已提交
3285 3286 3287 3288
}

static void setupQueryRangeForReverseScan(SQInfo* pQInfo) {
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
S
Shengliang Guan 已提交
3289
  int32_t numOfGroups = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
H
Haojun Liao 已提交
3290

H
hjxilinx 已提交
3291
  for(int32_t i = 0; i < numOfGroups; ++i) {
H
Haojun Liao 已提交
3292
    SArray *group = GET_TABLEGROUP(pQInfo, i);
H
Haojun Liao 已提交
3293 3294
    SArray *tableKeyGroup = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, i);

H
hjxilinx 已提交
3295 3296
    size_t t = taosArrayGetSize(group);
    for (int32_t j = 0; j < t; ++j) {
3297 3298
      STableQueryInfo *pCheckInfo = taosArrayGetP(group, j);
      updateTableQueryInfoForReverseScan(pQuery, pCheckInfo);
H
Haojun Liao 已提交
3299

H
Haojun Liao 已提交
3300 3301
      // update the last key in tableKeyInfo list, the tableKeyInfo is used to build the tsdbQueryHandle and decide
      // the start check timestamp of tsdbQueryHandle
H
Haojun Liao 已提交
3302 3303 3304 3305
      STableKeyInfo *pTableKeyInfo = taosArrayGet(tableKeyGroup, j);
      pTableKeyInfo->lastKey = pCheckInfo->lastKey;

      assert(pCheckInfo->pTable == pTableKeyInfo->pTable);
H
hjxilinx 已提交
3306 3307
    }
  }
3308 3309
}

3310
void switchCtxOrder(SQueryRuntimeEnv *pRuntimeEnv) {
3311
  SQuery *pQuery = pRuntimeEnv->pQuery;
3312
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
3313
    SWITCH_ORDER(pRuntimeEnv->pCtx[i].order);
3314 3315 3316
  }
}

H
Haojun Liao 已提交
3317
int32_t createQueryResultInfo(SQuery *pQuery, SWindowResult *pResultRow, bool isSTableQuery, size_t interBufSize) {
3318
  int32_t numOfCols = pQuery->numOfOutput;
3319

H
Haojun Liao 已提交
3320 3321
  size_t size = numOfCols * sizeof(SResultInfo) + interBufSize;
  pResultRow->resultInfo = calloc(1, size);
B
Bomin Zhang 已提交
3322 3323 3324
  if (pResultRow->resultInfo == NULL) {
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
  }
3325

H
Haojun Liao 已提交
3326
  pResultRow->pos = (SPosInfo) {-1, -1};
3327

H
Haojun Liao 已提交
3328
  char* buf = (char*) pResultRow->resultInfo + numOfCols * sizeof(SResultInfo);
H
Haojun Liao 已提交
3329

3330
  // set the intermediate result output buffer
H
Haojun Liao 已提交
3331
  setWindowResultInfo(pResultRow->resultInfo, pQuery, isSTableQuery, buf);
B
Bomin Zhang 已提交
3332
  return TSDB_CODE_SUCCESS;
3333 3334 3335 3336
}

void resetCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3337

3338
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3339 3340
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
    pCtx->aOutputBuf = pQuery->sdata[i]->data;
3341

3342 3343 3344 3345
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT/TAGPRJ/PRJ/TAG etc.
     */
3346
    RESET_RESULT_INFO(&pRuntimeEnv->resultInfo[i]);
3347
    pCtx->resultInfo = &pRuntimeEnv->resultInfo[i];
3348

3349
    // set the timestamp output buffer for top/bottom/diff query
3350
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3351 3352 3353
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3354

3355
    memset(pQuery->sdata[i]->data, 0, (size_t)(pQuery->pSelectExpr[i].bytes * pQuery->rec.capacity));
3356
  }
3357

3358 3359 3360 3361 3362
  initCtxOutputBuf(pRuntimeEnv);
}

void forwardCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, int64_t output) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3363

3364
  // reset the execution contexts
3365
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3366
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
3367
    assert(functionId != TSDB_FUNC_DIFF);
3368

3369 3370 3371 3372
    // set next output position
    if (IS_OUTER_FORWARD(aAggs[functionId].nStatus)) {
      pRuntimeEnv->pCtx[j].aOutputBuf += pRuntimeEnv->pCtx[j].outputBytes * output;
    }
3373

3374 3375 3376 3377 3378 3379 3380 3381
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      /*
       * NOTE: for top/bottom query, the value of first column of output (timestamp) are assigned
       * in the procedure of top/bottom routine
       * the output buffer in top/bottom routine is ptsOutputBuf, so we need to forward the output buffer
       *
       * diff function is handled in multi-output function
       */
S
TD-1057  
Shengliang Guan 已提交
3382
      pRuntimeEnv->pCtx[j].ptsOutputBuf = (char*)pRuntimeEnv->pCtx[j].ptsOutputBuf + TSDB_KEYSIZE * output;
3383
    }
3384

3385
    RESET_RESULT_INFO(pRuntimeEnv->pCtx[j].resultInfo);
3386 3387 3388 3389 3390
  }
}

void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3391

3392
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3393
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
3394
    pRuntimeEnv->pCtx[j].currentStage = 0;
3395

H
Haojun Liao 已提交
3396 3397 3398 3399
    SResultInfo* pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
    if (pResInfo->initialized) {
      continue;
    }
3400

3401 3402 3403 3404
    aAggs[functionId].init(&pRuntimeEnv->pCtx[j]);
  }
}

3405
void skipResults(SQueryRuntimeEnv *pRuntimeEnv) {
3406
  SQuery *pQuery = pRuntimeEnv->pQuery;
3407
  if (pQuery->rec.rows == 0 || pQuery->limit.offset == 0) {
3408 3409
    return;
  }
3410

3411
  if (pQuery->rec.rows <= pQuery->limit.offset) {
3412
    qDebug("QInfo:%p skip rows:%" PRId64 ", new offset:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), pQuery->rec.rows,
3413
        pQuery->limit.offset - pQuery->rec.rows);
3414

3415 3416
    pQuery->limit.offset -= pQuery->rec.rows;
    pQuery->rec.rows = 0;
3417

3418
    resetCtxOutputBuf(pRuntimeEnv);
3419

H
Haojun Liao 已提交
3420
    // clear the buffer full flag if exists
3421
    CLEAR_QUERY_STATUS(pQuery, QUERY_RESBUF_FULL);
3422
  } else {
3423
    int64_t numOfSkip = pQuery->limit.offset;
3424
    pQuery->rec.rows -= numOfSkip;
3425
    pQuery->limit.offset = 0;
3426

3427
    qDebug("QInfo:%p skip row:%"PRId64", new offset:%d, numOfRows remain:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), numOfSkip,
3428
           0, pQuery->rec.rows);
3429

3430
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3431
      int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3432
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
H
Haojun Liao 已提交
3433

3434
      memmove(pQuery->sdata[i]->data, (char*)pQuery->sdata[i]->data + bytes * numOfSkip, (size_t)(pQuery->rec.rows * bytes));
H
Haojun Liao 已提交
3435
      pRuntimeEnv->pCtx[i].aOutputBuf = ((char*) pQuery->sdata[i]->data) + pQuery->rec.rows * bytes;
3436

3437
      if (functionId == TSDB_FUNC_DIFF || functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
3438
        pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
3439 3440
      }
    }
3441

S
TD-1057  
Shengliang Guan 已提交
3442
    updateNumOfResult(pRuntimeEnv, (int32_t)pQuery->rec.rows);
3443 3444 3445 3446 3447 3448 3449 3450
  }
}

void setQueryStatus(SQuery *pQuery, int8_t status) {
  if (status == QUERY_NOT_COMPLETED) {
    pQuery->status = status;
  } else {
    // QUERY_NOT_COMPLETED is not compatible with any other status, so clear its position first
3451
    CLEAR_QUERY_STATUS(pQuery, QUERY_NOT_COMPLETED);
3452 3453 3454 3455 3456 3457
    pQuery->status |= status;
  }
}

bool needScanDataBlocksAgain(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3458

H
hjxilinx 已提交
3459
  bool toContinue = false;
H
Haojun Liao 已提交
3460
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3461 3462
    // for each group result, call the finalize function for each column
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
3463

3464 3465
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *pResult = getWindowResult(pWindowResInfo, i);
3466
      if (!pResult->closed) {
3467 3468
        continue;
      }
3469

3470
      setWindowResOutputBuf(pRuntimeEnv, pResult);
3471

3472
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3473
        int16_t functId = pQuery->pSelectExpr[j].base.functionId;
3474 3475 3476
        if (functId == TSDB_FUNC_TS) {
          continue;
        }
3477

3478 3479
        aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
        SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3480

3481 3482 3483 3484
        toContinue |= (!pResInfo->complete);
      }
    }
  } else {
3485
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3486
      int16_t functId = pQuery->pSelectExpr[j].base.functionId;
3487 3488 3489
      if (functId == TSDB_FUNC_TS) {
        continue;
      }
3490

3491 3492
      aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
      SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3493

3494 3495 3496
      toContinue |= (!pResInfo->complete);
    }
  }
3497

3498 3499 3500
  return toContinue;
}

H
Haojun Liao 已提交
3501
static SQueryStatusInfo getQueryStatusInfo(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
3502
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3503
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3504

H
Haojun Liao 已提交
3505 3506
  assert((start <= pTableQueryInfo->lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (start >= pTableQueryInfo->lastKey && !QUERY_IS_ASC_QUERY(pQuery)));
3507

3508
  SQueryStatusInfo info = {
H
hjxilinx 已提交
3509
      .status      = pQuery->status,
3510
      .windowIndex = pRuntimeEnv->windowResInfo.curIndex,
H
Haojun Liao 已提交
3511
      .lastKey     = start,
3512
  };
3513

S
TD-1057  
Shengliang Guan 已提交
3514 3515 3516 3517 3518
  TIME_WINDOW_COPY(info.w, pQuery->window);
  TIME_WINDOW_COPY(info.curWindow, pTableQueryInfo->win);

  info.curWindow.skey = start;

3519 3520 3521
  return info;
}

3522 3523 3524 3525
static void setEnvBeforeReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQInfo *pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery *pQuery = pRuntimeEnv->pQuery;

3526 3527 3528
  pStatus->cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);  // save the cursor
  if (pRuntimeEnv->pTSBuf) {
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
3529 3530
    bool ret = tsBufNextPos(pRuntimeEnv->pTSBuf);
    assert(ret);
3531
  }
3532

3533
  // reverse order time range
3534 3535 3536
  pQuery->window = pStatus->curWindow;
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);

3537
  SWITCH_ORDER(pQuery->order.order);
3538 3539 3540 3541 3542 3543 3544

  if (QUERY_IS_ASC_QUERY(pQuery)) {
    assert(pQuery->window.skey <= pQuery->window.ekey);
  } else {
    assert(pQuery->window.skey >= pQuery->window.ekey);
  }

3545
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
3546

3547
  STsdbQueryCond cond = {
H
hjxilinx 已提交
3548
      .order   = pQuery->order.order,
3549
      .colList = pQuery->colList,
3550 3551
      .numOfCols = pQuery->numOfCols,
  };
3552

S
TD-1057  
Shengliang Guan 已提交
3553 3554
  TIME_WINDOW_COPY(cond.twindow, pQuery->window);

H
Haojun Liao 已提交
3555 3556 3557 3558 3559
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
  disableFuncInReverseScan(pQInfo);
  setupQueryRangeForReverseScan(pQInfo);

3560 3561 3562 3563
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
3564

3565
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
B
Bomin Zhang 已提交
3566 3567 3568
  if (pRuntimeEnv->pSecQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
3569 3570
}

3571 3572
static void clearEnvAfterReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3573
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3574

3575 3576
  SWITCH_ORDER(pQuery->order.order);
  switchCtxOrder(pRuntimeEnv);
3577

3578 3579 3580 3581
  tsBufSetCursor(pRuntimeEnv->pTSBuf, &pStatus->cur);
  if (pRuntimeEnv->pTSBuf) {
    pRuntimeEnv->pTSBuf->cur.order = pQuery->order.order;
  }
3582

3583
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
3584

3585
  // update the pQuery->window.skey and pQuery->window.ekey to limit the scan scope of sliding query during reverse scan
H
hjxilinx 已提交
3586
  pTableQueryInfo->lastKey = pStatus->lastKey;
3587
  pQuery->status = pStatus->status;
3588

H
hjxilinx 已提交
3589
  pTableQueryInfo->win = pStatus->w;
3590
  pQuery->window = pTableQueryInfo->win;
3591 3592
}

H
Haojun Liao 已提交
3593 3594 3595 3596 3597 3598 3599
static void restoreTimeWindow(STableGroupInfo* pTableGroupInfo, STsdbQueryCond* pCond) {
  assert(pTableGroupInfo->numOfTables == 1);
  SArray* pTableKeyGroup = taosArrayGetP(pTableGroupInfo->pGroupList, 0);
  STableKeyInfo* pKeyInfo = taosArrayGet(pTableKeyGroup, 0);
  pKeyInfo->lastKey = pCond->twindow.skey;
}

3600
void scanOneTableDataBlocks(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
H
hjxilinx 已提交
3601
  SQInfo *pQInfo = (SQInfo *) GET_QINFO_ADDR(pRuntimeEnv);
3602
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3603
  STableQueryInfo *pTableQueryInfo = pQuery->current;
3604

3605
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
3606

3607
  // store the start query position
H
Haojun Liao 已提交
3608
  SQueryStatusInfo qstatus = getQueryStatusInfo(pRuntimeEnv, start);
3609

3610 3611
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
3612

3613 3614
  while (1) {
    doScanAllDataBlocks(pRuntimeEnv);
3615

3616 3617
    if (pRuntimeEnv->scanFlag == MASTER_SCAN) {
      qstatus.status = pQuery->status;
3618 3619 3620 3621 3622 3623

      // do nothing if no data blocks are found qualified during scan
      if (qstatus.lastKey != pTableQueryInfo->lastKey) {
        qstatus.curWindow.ekey = pTableQueryInfo->lastKey - step;
      }

3624
      qstatus.lastKey = pTableQueryInfo->lastKey;
3625
    }
3626

3627
    if (!needScanDataBlocksAgain(pRuntimeEnv)) {
3628
      // restore the status code and jump out of loop
3629
      if (pRuntimeEnv->scanFlag == REPEAT_SCAN) {
3630
        pQuery->status = qstatus.status;
3631
      }
3632

3633 3634
      break;
    }
3635

3636
    STsdbQueryCond cond = {
H
hjxilinx 已提交
3637
        .order   = pQuery->order.order,
3638
        .colList = pQuery->colList,
3639
        .numOfCols = pQuery->numOfCols,
3640
    };
3641

S
TD-1057  
Shengliang Guan 已提交
3642 3643
    TIME_WINDOW_COPY(cond.twindow, qstatus.curWindow);

3644 3645
    if (pRuntimeEnv->pSecQueryHandle != NULL) {
      tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
3646
    }
3647

H
Haojun Liao 已提交
3648
    restoreTimeWindow(&pQInfo->tableGroupInfo, &cond);
3649
    pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
B
Bomin Zhang 已提交
3650 3651 3652
    if (pRuntimeEnv->pSecQueryHandle == NULL) {
      longjmp(pRuntimeEnv->env, terrno);
    }
3653

3654
    pRuntimeEnv->windowResInfo.curIndex = qstatus.windowIndex;
3655 3656
    setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
    pRuntimeEnv->scanFlag = REPEAT_SCAN;
3657

3658
    qDebug("QInfo:%p start to repeat scan data blocks due to query func required, qrange:%"PRId64"-%"PRId64, pQInfo,
3659
        cond.twindow.skey, cond.twindow.ekey);
3660

3661
    // check if query is killed or not
H
Haojun Liao 已提交
3662
    if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
3663 3664
      finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
3665 3666
    }
  }
3667

H
hjxilinx 已提交
3668
  if (!needReverseScan(pQuery)) {
3669 3670
    return;
  }
3671

3672
  setEnvBeforeReverseScan(pRuntimeEnv, &qstatus);
3673

3674
  // reverse scan from current position
3675
  qDebug("QInfo:%p start to reverse scan", pQInfo);
3676
  doScanAllDataBlocks(pRuntimeEnv);
3677 3678

  clearEnvAfterReverseScan(pRuntimeEnv, &qstatus);
3679 3680
}

H
hjxilinx 已提交
3681
void finalizeQueryResult(SQueryRuntimeEnv *pRuntimeEnv) {
3682
  SQuery *pQuery = pRuntimeEnv->pQuery;
3683

H
Haojun Liao 已提交
3684
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3685 3686
    // for each group result, call the finalize function for each column
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
3687
    if (pRuntimeEnv->groupbyNormalCol) {
3688 3689
      closeAllTimeWindow(pWindowResInfo);
    }
3690

3691 3692 3693 3694 3695
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *buf = &pWindowResInfo->pResult[i];
      if (!isWindowResClosed(pWindowResInfo, i)) {
        continue;
      }
3696

3697
      setWindowResOutputBuf(pRuntimeEnv, buf);
3698

3699
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3700
        aAggs[pQuery->pSelectExpr[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3701
      }
3702

3703 3704 3705 3706
      /*
       * set the number of output results for group by normal columns, the number of output rows usually is 1 except
       * the top and bottom query
       */
S
TD-1057  
Shengliang Guan 已提交
3707
      buf->numOfRows = (uint16_t)getNumOfResult(pRuntimeEnv);
3708
    }
3709

3710
  } else {
3711
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3712
      aAggs[pQuery->pSelectExpr[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3713 3714 3715 3716 3717
    }
  }
}

static bool hasMainOutput(SQuery *pQuery) {
3718
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3719
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3720

3721 3722 3723 3724
    if (functionId != TSDB_FUNC_TS && functionId != TSDB_FUNC_TAG && functionId != TSDB_FUNC_TAGPRJ) {
      return true;
    }
  }
3725

3726 3727 3728
  return false;
}

H
Haojun Liao 已提交
3729
static STableQueryInfo *createTableQueryInfo(SQueryRuntimeEnv *pRuntimeEnv, void* pTable, STimeWindow win, void* buf) {
H
Haojun Liao 已提交
3730
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
3731

H
Haojun Liao 已提交
3732
  STableQueryInfo *pTableQueryInfo = buf;
3733

H
hjxilinx 已提交
3734 3735
  pTableQueryInfo->win = win;
  pTableQueryInfo->lastKey = win.skey;
3736

3737
  pTableQueryInfo->pTable = pTable;
3738
  pTableQueryInfo->cur.vgroupIndex = -1;
3739

H
Haojun Liao 已提交
3740 3741
  // set more initial size of interval/groupby query
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyNormalCol) {
H
Haojun Liao 已提交
3742
    int32_t initialSize = 16;
H
Haojun Liao 已提交
3743
    int32_t initialThreshold = 100;
B
Bomin Zhang 已提交
3744 3745 3746 3747
    int32_t code = initWindowResInfo(&pTableQueryInfo->windowResInfo, pRuntimeEnv, initialSize, initialThreshold, TSDB_DATA_TYPE_INT);
    if (code != TSDB_CODE_SUCCESS) {
      return NULL;
    }
H
Haojun Liao 已提交
3748
  } else { // in other aggregate query, do not initialize the windowResInfo
H
Haojun Liao 已提交
3749 3750
  }

3751 3752 3753
  return pTableQueryInfo;
}

H
Haojun Liao 已提交
3754
void destroyTableQueryInfoImpl(STableQueryInfo *pTableQueryInfo) {
3755 3756 3757
  if (pTableQueryInfo == NULL) {
    return;
  }
3758

H
Haojun Liao 已提交
3759
  cleanupTimeWindowInfo(&pTableQueryInfo->windowResInfo);
3760 3761 3762 3763 3764
}

/**
 * set output buffer for different group
 * @param pRuntimeEnv
3765
 * @param pDataBlockInfo
3766
 */
H
Haojun Liao 已提交
3767
void setExecutionContext(SQInfo *pQInfo, int32_t groupIndex, TSKEY nextKey) {
3768
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
Haojun Liao 已提交
3769 3770 3771
  STableQueryInfo  *pTableQueryInfo = pRuntimeEnv->pQuery->current;
  SWindowResInfo   *pWindowResInfo = &pRuntimeEnv->windowResInfo;

H
Haojun Liao 已提交
3772 3773
  // lastKey needs to be updated
  pTableQueryInfo->lastKey = nextKey;
H
Haojun Liao 已提交
3774 3775 3776 3777

  if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
    setAdditionalInfo(pQInfo, pTableQueryInfo->pTable, pTableQueryInfo);
  }
H
Haojun Liao 已提交
3778

H
Haojun Liao 已提交
3779 3780 3781
  if (pRuntimeEnv->prevGroupId != INT32_MIN && pRuntimeEnv->prevGroupId == groupIndex) {
    return;
  }
3782

3783 3784
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&groupIndex,
      sizeof(groupIndex), true);
3785 3786 3787
  if (pWindowRes == NULL) {
    return;
  }
3788

3789 3790 3791 3792 3793
  /*
   * not assign result buffer yet, add new result buffer
   * all group belong to one result set, and each group result has different group id so set the id to be one
   */
  if (pWindowRes->pos.pageId == -1) {
3794
    if (addNewWindowResultBuf(pWindowRes, pRuntimeEnv->pResultBuf, groupIndex, pRuntimeEnv->numOfRowsPerPage) !=
3795 3796 3797 3798
        TSDB_CODE_SUCCESS) {
      return;
    }
  }
3799

H
Haojun Liao 已提交
3800 3801
  // record the current active group id
  pRuntimeEnv->prevGroupId = groupIndex;
3802 3803 3804 3805
  setWindowResOutputBuf(pRuntimeEnv, pWindowRes);
  initCtxOutputBuf(pRuntimeEnv);
}

H
Haojun Liao 已提交
3806
void setWindowResOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult) {
3807
  SQuery *pQuery = pRuntimeEnv->pQuery;
3808

3809
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
H
Haojun Liao 已提交
3810 3811
  tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pos.pageId);

3812
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3813
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
H
Haojun Liao 已提交
3814
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult, page);
3815

3816
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3817 3818 3819
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3820

3821 3822 3823 3824 3825
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT
     */
    pCtx->resultInfo = &pResult->resultInfo[i];
3826

3827 3828 3829 3830 3831 3832
    // set super table query flag
    SResultInfo *pResInfo = GET_RES_INFO(pCtx);
    pResInfo->superTableQ = pRuntimeEnv->stableQuery;
  }
}

H
Haojun Liao 已提交
3833 3834
void setWindowResOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3835

H
Haojun Liao 已提交
3836
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
H
Haojun Liao 已提交
3837 3838
  tFilePage* bufPage = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pos.pageId);

H
Haojun Liao 已提交
3839 3840 3841 3842
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];

    pCtx->resultInfo = &pResult->resultInfo[i];
H
Haojun Liao 已提交
3843
    if (pCtx->resultInfo->initialized && pCtx->resultInfo->complete) {
H
Haojun Liao 已提交
3844 3845
      continue;
    }
3846

H
Haojun Liao 已提交
3847
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult, bufPage);
H
Haojun Liao 已提交
3848
    pCtx->currentStage = 0;
3849

H
Haojun Liao 已提交
3850 3851 3852 3853
    int32_t functionId = pCtx->functionId;
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3854

H
Haojun Liao 已提交
3855 3856 3857 3858 3859
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT
     */
    pCtx->resultInfo->superTableQ = pRuntimeEnv->stableQuery;     // set super table query flag
3860

H
Haojun Liao 已提交
3861 3862 3863 3864 3865 3866
    if (!pCtx->resultInfo->initialized) {
      aAggs[functionId].init(pCtx);
    }
  }
}

3867
int32_t setAdditionalInfo(SQInfo *pQInfo, void* pTable, STableQueryInfo *pTableQueryInfo) {
3868
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
3869

3870
  setTagVal(pRuntimeEnv, pTable, pQInfo->tsdb);
3871

3872 3873
  // both the master and supplement scan needs to set the correct ts comp start position
  if (pRuntimeEnv->pTSBuf != NULL) {
H
Haojun Liao 已提交
3874 3875
    tVariant* pTag = &pRuntimeEnv->pCtx[0].tag;

H
Haojun Liao 已提交
3876 3877 3878 3879
    if (pTableQueryInfo->cur.vgroupIndex == -1) {
      tVariantAssign(&pTableQueryInfo->tag, pTag);
      STSElem elem = tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, pQInfo->vgId, &pTableQueryInfo->tag);

H
Haojun Liao 已提交
3880 3881 3882 3883 3884
      // failed to find data with the specified tag value and vnodeId
      if (elem.vnode < 0) {
        if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
          qError("QInfo:%p failed to find tag:%s in ts_comp", pQInfo, pTag->pz);
        } else {
H
Haojun Liao 已提交
3885
          qError("QInfo:%p failed to find tag:%" PRId64 " in ts_comp", pQInfo, pTag->i64Key);
H
Haojun Liao 已提交
3886 3887 3888 3889 3890
        }

        return false;
      }

H
Haojun Liao 已提交
3891
      // keep the cursor info of current meter
H
Haojun Liao 已提交
3892 3893
      pTableQueryInfo->cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);
      if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
H
Haojun Liao 已提交
3894
        qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex);
H
Haojun Liao 已提交
3895
      } else {
H
Haojun Liao 已提交
3896
        qDebug("QInfo:%p find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex);
H
Haojun Liao 已提交
3897 3898
      }

H
Haojun Liao 已提交
3899 3900
    } else {
      tsBufSetCursor(pRuntimeEnv->pTSBuf, &pTableQueryInfo->cur);
H
Haojun Liao 已提交
3901 3902

      if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
H
Haojun Liao 已提交
3903
        qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex);
H
Haojun Liao 已提交
3904
      } else {
H
Haojun Liao 已提交
3905
        qDebug("QInfo:%p find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex);
H
Haojun Liao 已提交
3906
      }
3907 3908
    }
  }
3909

3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921
  return 0;
}

/*
 * There are two cases to handle:
 *
 * 1. Query range is not set yet (queryRangeSet = 0). we need to set the query range info, including pQuery->lastKey,
 *    pQuery->window.skey, and pQuery->eKey.
 * 2. Query range is set and query is in progress. There may be another result with the same query ranges to be
 *    merged during merge stage. In this case, we need the pTableQueryInfo->lastResRows to decide if there
 *    is a previous result generated or not.
 */
H
hjxilinx 已提交
3922
void setIntervalQueryRange(SQInfo *pQInfo, TSKEY key) {
3923 3924
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3925
  STableQueryInfo *pTableQueryInfo = pQuery->current;
3926

3927 3928 3929
  if (pTableQueryInfo->queryRangeSet) {
    pTableQueryInfo->lastKey = key;
  } else {
3930
    pTableQueryInfo->win.skey = key;
3931
    STimeWindow win = {.skey = key, .ekey = pQuery->window.ekey};
3932

3933 3934 3935 3936 3937
    // for too small query range, no data in this interval.
    if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey < pQuery->window.skey)) ||
        (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey < pQuery->window.ekey))) {
      return;
    }
3938

3939 3940 3941 3942 3943 3944
    /**
     * In handling the both ascending and descending order super table query, we need to find the first qualified
     * timestamp of this table, and then set the first qualified start timestamp.
     * In ascending query, key is the first qualified timestamp. However, in the descending order query, additional
     * operations involve.
     */
H
Haojun Liao 已提交
3945
    STimeWindow     w = TSWINDOW_INITIALIZER;
3946
    SWindowResInfo *pWindowResInfo = &pTableQueryInfo->windowResInfo;
3947

H
Haojun Liao 已提交
3948 3949
    TSKEY sk = MIN(win.skey, win.ekey);
    TSKEY ek = MAX(win.skey, win.ekey);
H
Haojun Liao 已提交
3950
    getAlignQueryTimeWindow(pQuery, win.skey, sk, ek, &w);
3951
    pWindowResInfo->startTime = pTableQueryInfo->win.skey;  // windowSKey may be 0 in case of 1970 timestamp
3952

3953 3954
    if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
3955
        assert(win.ekey == pQuery->window.ekey);
3956
      }
3957

3958
      pWindowResInfo->prevSKey = w.skey;
3959
    }
3960

3961
    pTableQueryInfo->queryRangeSet = 1;
3962
    pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
3963 3964 3965 3966
  }
}

bool requireTimestamp(SQuery *pQuery) {
3967
  for (int32_t i = 0; i < pQuery->numOfOutput; i++) {
3968
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981
    if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_NEED_TS) != 0) {
      return true;
    }
  }
  return false;
}

bool needPrimaryTimestampCol(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo) {
  /*
   * 1. if skey or ekey locates in this block, we need to load the timestamp column to decide the precise position
   * 2. if there are top/bottom, first_dst/last_dst functions, we need to load timestamp column in any cases;
   */
  STimeWindow *w = &pDataBlockInfo->window;
H
hjxilinx 已提交
3982
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3983

H
hjxilinx 已提交
3984
  bool loadPrimaryTS = (pTableQueryInfo->lastKey >= w->skey && pTableQueryInfo->lastKey <= w->ekey) ||
3985 3986
                       (pQuery->window.ekey >= w->skey && pQuery->window.ekey <= w->ekey) || requireTimestamp(pQuery);

3987 3988 3989
  return loadPrimaryTS;
}

3990
static int32_t doCopyToSData(SQInfo *pQInfo, SWindowResInfo *pResultInfo, int32_t orderType) {
3991 3992
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
3993

3994 3995 3996
  int32_t numOfResult = 0;
  int32_t startIdx = 0;
  int32_t step = -1;
3997

3998
  qDebug("QInfo:%p start to copy data from windowResInfo to query buf", pQInfo);
3999 4000
  int32_t totalSet = numOfClosedTimeWindow(pResultInfo);
  SWindowResult* result = pResultInfo->pResult;
4001

4002
  if (orderType == TSDB_ORDER_ASC) {
4003
    startIdx = pQInfo->groupIndex;
4004 4005
    step = 1;
  } else {  // desc order copy all data
4006
    startIdx = totalSet - pQInfo->groupIndex - 1;
4007 4008
    step = -1;
  }
4009

H
Haojun Liao 已提交
4010 4011
  SGroupResInfo* pGroupResInfo = &pQInfo->groupResInfo;

4012
  for (int32_t i = startIdx; (i < totalSet) && (i >= 0); i += step) {
4013
    if (result[i].numOfRows == 0) {
4014
      pQInfo->groupIndex += 1;
H
Haojun Liao 已提交
4015
      pGroupResInfo->pos.rowId = 0;
4016 4017
      continue;
    }
4018

H
Haojun Liao 已提交
4019 4020
    int32_t numOfRowsToCopy = result[i].numOfRows - pGroupResInfo->pos.rowId;
    int32_t oldOffset = pGroupResInfo->pos.rowId;
4021

4022
    /*
H
Haojun Liao 已提交
4023 4024
     * current output space is not enough to accommodate all data of this page, only partial results
     * will be copied to SQuery object's result buffer
4025
     */
4026
    if (numOfRowsToCopy > pQuery->rec.capacity - numOfResult) {
H
Haojun Liao 已提交
4027 4028
      numOfRowsToCopy = (int32_t) pQuery->rec.capacity - numOfResult;
      pGroupResInfo->pos.rowId += numOfRowsToCopy;
4029
    } else {
H
Haojun Liao 已提交
4030
      pGroupResInfo->pos.rowId = 0;
4031
      pQInfo->groupIndex += 1;
4032
    }
4033

H
Haojun Liao 已提交
4034 4035
    tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, result[i].pos.pageId);

4036
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
4037
      int32_t size = pRuntimeEnv->pCtx[j].outputBytes;
4038

4039
      char *out = pQuery->sdata[j]->data + numOfResult * size;
H
Haojun Liao 已提交
4040
      char *in = getPosInResultPage(pRuntimeEnv, j, &result[i], page);
4041 4042
      memcpy(out, in + oldOffset * size, size * numOfRowsToCopy);
    }
4043

4044
    numOfResult += numOfRowsToCopy;
4045 4046 4047
    if (numOfResult == pQuery->rec.capacity) {
      break;
    }
4048
  }
4049

4050
  qDebug("QInfo:%p copy data to query buf completed", pQInfo);
4051 4052

#ifdef _DEBUG_VIEW
4053
  displayInterResult(pQuery->sdata, pRuntimeEnv, numOfResult);
4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066
#endif
  return numOfResult;
}

/**
 * copyFromWindowResToSData support copy data in ascending/descending order
 * For interval query of both super table and table, copy the data in ascending order, since the output results are
 * ordered in SWindowResutl already. While handling the group by query for both table and super table,
 * all group result are completed already.
 *
 * @param pQInfo
 * @param result
 */
4067
void copyFromWindowResToSData(SQInfo *pQInfo, SWindowResInfo *pResultInfo) {
4068
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
4069

4070
  int32_t orderType = (pQuery->pGroupbyExpr != NULL) ? pQuery->pGroupbyExpr->orderType : TSDB_ORDER_ASC;
4071
  int32_t numOfResult = doCopyToSData(pQInfo, pResultInfo, orderType);
4072

4073
  pQuery->rec.rows += numOfResult;
4074

4075
  assert(pQuery->rec.rows <= pQuery->rec.capacity);
4076 4077
}

H
Haojun Liao 已提交
4078
static void updateWindowResNumOfRes(SQueryRuntimeEnv *pRuntimeEnv) {
4079
  SQuery *pQuery = pRuntimeEnv->pQuery;
4080

4081
  // update the number of result for each, only update the number of rows for the corresponding window result.
H
Haojun Liao 已提交
4082 4083 4084
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
    return;
  }
4085

H
Haojun Liao 已提交
4086 4087
  for (int32_t i = 0; i < pRuntimeEnv->windowResInfo.size; ++i) {
    SWindowResult *pResult = &pRuntimeEnv->windowResInfo.pResult[i];
4088

H
Haojun Liao 已提交
4089 4090 4091 4092
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
      int32_t functionId = pRuntimeEnv->pCtx[j].functionId;
      if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
        continue;
4093
      }
H
Haojun Liao 已提交
4094

4095
      pResult->numOfRows = (uint16_t)(MAX(pResult->numOfRows, pResult->resultInfo[j].numOfRes));
4096 4097 4098 4099
    }
  }
}

H
Haojun Liao 已提交
4100
static void stableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo, SDataStatis *pStatis,
4101
    SArray *pDataBlock, __block_search_fn_t searchFn) {
4102
  SQuery *         pQuery = pRuntimeEnv->pQuery;
4103
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4104

4105
  SWindowResInfo * pWindowResInfo = &pTableQueryInfo->windowResInfo;
H
hjxilinx 已提交
4106
  pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : pDataBlockInfo->rows - 1;
4107

H
Haojun Liao 已提交
4108
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->groupbyNormalCol) {
4109
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
4110
  } else {
4111
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
4112 4113 4114
  }
}

H
Haojun Liao 已提交
4115
bool queryHasRemainResForTableQuery(SQueryRuntimeEnv* pRuntimeEnv) {
4116 4117
  SQuery *pQuery = pRuntimeEnv->pQuery;
  SFillInfo *pFillInfo = pRuntimeEnv->pFillInfo;
4118

H
Haojun Liao 已提交
4119
  if (pQuery->limit.limit > 0 && pQuery->rec.total >= pQuery->limit.limit) {
4120 4121
    return false;
  }
4122

4123
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
H
Haojun Liao 已提交
4124
    // There are results not returned to client yet, so filling applied to the remain result is required firstly.
H
Haojun Liao 已提交
4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139
    int32_t remain = taosNumOfRemainRows(pFillInfo);
    if (remain > 0) {
      return true;
    }

    /*
     * While the code reaches here, there are no results remains now.
     * If query is not completed yet, the gaps between two results blocks need to be handled after next data block
     * is retrieved from TSDB.
     *
     * NOTE: If the result set is not the first block, the gap in front of the result set will be filled. If the result
     * set is the FIRST result block, the gap between the start time of query time window and the timestamp of the
     * first result row in the actual result set will fill nothing.
     */
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
S
TD-1057  
Shengliang Guan 已提交
4140
      int32_t numOfTotal = (int32_t)getFilledNumOfRes(pFillInfo, pQuery->window.ekey, (int32_t)pQuery->rec.capacity);
H
Haojun Liao 已提交
4141 4142 4143 4144 4145 4146
      return numOfTotal > 0;
    }

  } else {
    // there are results waiting for returned to client.
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED) &&
H
Haojun Liao 已提交
4147
        (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) &&
H
Haojun Liao 已提交
4148 4149 4150
        (pRuntimeEnv->windowResInfo.size > 0)) {
      return true;
    }
4151
  }
4152 4153

  return false;
4154 4155 4156
}

static void doCopyQueryResultToMsg(SQInfo *pQInfo, int32_t numOfRows, char *data) {
4157
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
4158

4159 4160
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
    int32_t bytes = pQuery->pSelectExpr[col].bytes;
4161

4162 4163 4164
    memmove(data, pQuery->sdata[col]->data, bytes * numOfRows);
    data += bytes * numOfRows;
  }
4165

weixin_48148422's avatar
weixin_48148422 已提交
4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177
  int32_t numOfTables = (int32_t)taosArrayGetSize(pQInfo->arrTableIdInfo);
  *(int32_t*)data = htonl(numOfTables);
  data += sizeof(int32_t);
  for(int32_t i = 0; i < numOfTables; i++) {
    STableIdInfo* pSrc = taosArrayGet(pQInfo->arrTableIdInfo, i);
    STableIdInfo* pDst = (STableIdInfo*)data;
    pDst->uid = htobe64(pSrc->uid);
    pDst->tid = htonl(pSrc->tid);
    pDst->key = htobe64(pSrc->key);
    data += sizeof(STableIdInfo);
  }

H
Haojun Liao 已提交
4178
  // Check if query is completed or not for stable query or normal table query respectively.
H
hjxilinx 已提交
4179
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4180
    if (pQInfo->runtimeEnv.stableQuery) {
H
Haojun Liao 已提交
4181
      if (IS_STASBLE_QUERY_OVER(pQInfo)) {
4182 4183 4184
        setQueryStatus(pQuery, QUERY_OVER);
      }
    } else {
H
Haojun Liao 已提交
4185
      if (!queryHasRemainResForTableQuery(&pQInfo->runtimeEnv)) {
4186 4187
        setQueryStatus(pQuery, QUERY_OVER);
      }
4188
    }
H
hjxilinx 已提交
4189
  }
4190 4191
}

H
Haojun Liao 已提交
4192
int32_t doFillGapsInResults(SQueryRuntimeEnv* pRuntimeEnv, tFilePage **pDst, int32_t *numOfFilled) {
4193
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
4194
  SQuery *pQuery = pRuntimeEnv->pQuery;
4195
  SFillInfo* pFillInfo = pRuntimeEnv->pFillInfo;
4196

4197
  while (1) {
S
TD-1057  
Shengliang Guan 已提交
4198
    int32_t ret = (int32_t)taosGenerateDataBlock(pFillInfo, (tFilePage**)pQuery->sdata, (int32_t)pQuery->rec.capacity);
4199

4200
    // todo apply limit output function
4201 4202
    /* reached the start position of according to offset value, return immediately */
    if (pQuery->limit.offset == 0) {
4203
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows", pQInfo, pFillInfo->numOfRows, ret);
4204 4205
      return ret;
    }
4206

4207
    if (pQuery->limit.offset < ret) {
4208
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%" PRId64 ". Discard due to offset, remain:%" PRId64 ", new offset:%d",
4209
             pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, ret - pQuery->limit.offset, 0);
4210

S
TD-1057  
Shengliang Guan 已提交
4211
      ret -= (int32_t)pQuery->limit.offset;
4212 4213
      // todo !!!!there exactly number of interpo is not valid.
      // todo refactor move to the beginning of buffer
4214 4215 4216
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        memmove(pDst[i]->data, pDst[i]->data + pQuery->pSelectExpr[i].bytes * pQuery->limit.offset,
                ret * pQuery->pSelectExpr[i].bytes);
4217
      }
4218

4219 4220 4221
      pQuery->limit.offset = 0;
      return ret;
    } else {
4222
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%" PRId64 ". Discard due to offset, "
B
Bomin Zhang 已提交
4223
             "remain:%d, new offset:%" PRId64, pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, 0,
4224
          pQuery->limit.offset - ret);
4225

4226
      pQuery->limit.offset -= ret;
4227
      pQuery->rec.rows = 0;
4228 4229
      ret = 0;
    }
4230

H
Haojun Liao 已提交
4231
    if (!queryHasRemainResForTableQuery(pRuntimeEnv)) {
4232 4233 4234 4235 4236
      return ret;
    }
  }
}

4237
static void queryCostStatis(SQInfo *pQInfo) {
4238
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4239
  SQueryCostInfo *pSummary = &pRuntimeEnv->summary;
H
Haojun Liao 已提交
4240

H
Haojun Liao 已提交
4241 4242 4243
  // add the merge time
  pSummary->elapsedTime += pSummary->firstStageMergeTime;

H
Haojun Liao 已提交
4244 4245 4246
  qDebug("QInfo:%p :cost summary: elapsed time:%"PRId64" us, first merge:%"PRId64" us, total blocks:%d, "
         "load block statis:%d, load data block:%d, total rows:%"PRId64 ", check rows:%"PRId64,
         pQInfo, pSummary->elapsedTime, pSummary->firstStageMergeTime, pSummary->totalBlocks, pSummary->loadBlockStatis,
H
Haojun Liao 已提交
4247
         pSummary->loadBlocks, pSummary->totalRows, pSummary->totalCheckedRows);
4248

H
Haojun Liao 已提交
4249
  qDebug("QInfo:%p :cost summary: internal size:%"PRId64"B, numOfWin:%"PRId64, pQInfo, pSummary->internalSupSize,
4250
      pSummary->numOfTimeWindows);
4251 4252
}

4253 4254
static void updateOffsetVal(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pBlockInfo) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
4255
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4256

4257
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
4258

4259
  if (pQuery->limit.offset == pBlockInfo->rows) {  // current block will ignore completed
H
hjxilinx 已提交
4260
    pTableQueryInfo->lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->window.ekey + step : pBlockInfo->window.skey + step;
4261 4262 4263
    pQuery->limit.offset = 0;
    return;
  }
4264

4265
  if (QUERY_IS_ASC_QUERY(pQuery)) {
S
TD-1057  
Shengliang Guan 已提交
4266
    pQuery->pos = (int32_t)pQuery->limit.offset;
4267
  } else {
S
TD-1057  
Shengliang Guan 已提交
4268
    pQuery->pos = pBlockInfo->rows - (int32_t)pQuery->limit.offset - 1;
4269
  }
4270

4271
  assert(pQuery->pos >= 0 && pQuery->pos <= pBlockInfo->rows - 1);
4272

4273
  SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
4274
  SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);
4275

4276
  // update the pQuery->limit.offset value, and pQuery->pos value
H
Haojun Liao 已提交
4277
  TSKEY *keys = (TSKEY *) pColInfoData->pData;
4278 4279

  // update the offset value
H
hjxilinx 已提交
4280
  pTableQueryInfo->lastKey = keys[pQuery->pos];
4281
  pQuery->limit.offset = 0;
4282

H
hjxilinx 已提交
4283
  int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, pBlockInfo, NULL, binarySearchForKey, pDataBlock);
4284

4285
  qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
4286
         pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows, numOfRes, pQuery->current->lastKey);
4287
}
4288

4289 4290 4291 4292 4293
void skipBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;

  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0) {
    return;
4294
  }
4295

4296 4297 4298
  pQuery->pos = 0;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

H
hjxilinx 已提交
4299
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4300
  TsdbQueryHandleT pQueryHandle = pRuntimeEnv->pQueryHandle;
4301

H
Haojun Liao 已提交
4302
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4303
  while (tsdbNextDataBlock(pQueryHandle)) {
H
Haojun Liao 已提交
4304
    if (IS_QUERY_KILLED(GET_QINFO_ADDR(pRuntimeEnv))) {
H
Haojun Liao 已提交
4305 4306
      finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4307
    }
4308

H
Haojun Liao 已提交
4309
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
4310

4311 4312
    if (pQuery->limit.offset > blockInfo.rows) {
      pQuery->limit.offset -= blockInfo.rows;
H
hjxilinx 已提交
4313 4314
      pTableQueryInfo->lastKey = (QUERY_IS_ASC_QUERY(pQuery)) ? blockInfo.window.ekey : blockInfo.window.skey;
      pTableQueryInfo->lastKey += step;
4315

4316
      qDebug("QInfo:%p skip rows:%d, offset:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv), blockInfo.rows,
4317 4318
             pQuery->limit.offset);
    } else {  // find the appropriated start position in current block
4319 4320 4321
      updateOffsetVal(pRuntimeEnv, &blockInfo);
      break;
    }
4322
  }
H
Haojun Liao 已提交
4323 4324 4325 4326

  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }
4327
}
4328

H
Haojun Liao 已提交
4329
static bool skipTimeInterval(SQueryRuntimeEnv *pRuntimeEnv, TSKEY* start) {
4330
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
4331
  *start = pQuery->current->lastKey;
4332

4333
  // if queried with value filter, do NOT forward query start position
4334
  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->pFillInfo != NULL) {
4335
    return true;
4336
  }
4337

4338
  /*
4339 4340
   * 1. for interval without interpolation query we forward pQuery->interval.interval at a time for
   *    pQuery->limit.offset times. Since hole exists, pQuery->interval.interval*pQuery->limit.offset value is
4341 4342
   *    not valid. otherwise, we only forward pQuery->limit.offset number of points
   */
4343
  assert(pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL);
4344

H
Haojun Liao 已提交
4345
  STimeWindow w = TSWINDOW_INITIALIZER;
4346

4347
  SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
hjxilinx 已提交
4348
  STableQueryInfo *pTableQueryInfo = pQuery->current;
4349

H
Haojun Liao 已提交
4350
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4351
  while (tsdbNextDataBlock(pRuntimeEnv->pQueryHandle)) {
H
Haojun Liao 已提交
4352
    tsdbRetrieveDataBlockInfo(pRuntimeEnv->pQueryHandle, &blockInfo);
4353

H
Haojun Liao 已提交
4354 4355
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
H
Haojun Liao 已提交
4356
        getAlignQueryTimeWindow(pQuery, blockInfo.window.skey, blockInfo.window.skey, pQuery->window.ekey, &w);
H
Haojun Liao 已提交
4357 4358 4359
        pWindowResInfo->startTime = w.skey;
        pWindowResInfo->prevSKey = w.skey;
      }
4360
    } else {
H
Haojun Liao 已提交
4361
      getAlignQueryTimeWindow(pQuery, blockInfo.window.ekey, pQuery->window.ekey, blockInfo.window.ekey, &w);
4362

4363 4364 4365
      pWindowResInfo->startTime = pQuery->window.skey;
      pWindowResInfo->prevSKey = w.skey;
    }
4366

4367 4368
    // the first time window
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, pWindowResInfo->prevSKey, pQuery);
4369

4370 4371 4372 4373 4374 4375
    while (pQuery->limit.offset > 0) {
      if ((win.ekey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (win.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        pQuery->limit.offset -= 1;
        pWindowResInfo->prevSKey = win.skey;
      }
4376

4377
      STimeWindow tw = win;
H
Haojun Liao 已提交
4378
      GET_NEXT_TIMEWINDOW(pQuery, &tw);
4379

4380
      if (pQuery->limit.offset == 0) {
4381 4382
        if ((tw.skey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
            (tw.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
H
Haojun Liao 已提交
4383 4384
          // load the data block and check data remaining in current data block
          // TODO optimize performance
4385 4386 4387
          SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
          SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

4388 4389
          tw = win;
          int32_t startPos =
H
Haojun Liao 已提交
4390
              getNextQualifiedWindow(pRuntimeEnv, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey, -1);
4391 4392 4393 4394
          assert(startPos >= 0);

          // set the abort info
          pQuery->pos = startPos;
4395

H
Haojun Liao 已提交
4396 4397 4398 4399
          // reset the query start timestamp
          pTableQueryInfo->win.skey = ((TSKEY *)pColInfoData->pData)[startPos];
          pQuery->window.skey = pTableQueryInfo->win.skey;
          *start = pTableQueryInfo->win.skey;
4400

4401
          pWindowResInfo->prevSKey = tw.skey;
H
Haojun Liao 已提交
4402
          int32_t index = pRuntimeEnv->windowResInfo.curIndex;
4403

H
hjxilinx 已提交
4404
          int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, NULL, binarySearchForKey, pDataBlock);
H
Haojun Liao 已提交
4405
          pRuntimeEnv->windowResInfo.curIndex = index;  // restore the window index
4406

4407
          qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64,
4408
                 GET_QINFO_ADDR(pRuntimeEnv), blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes, pQuery->current->lastKey);
4409

4410
          return true;
H
Haojun Liao 已提交
4411 4412 4413 4414
        } else { // do nothing
          *start = tw.skey;
          pQuery->window.skey = tw.skey;
          pWindowResInfo->prevSKey = tw.skey;
4415
          return true;
4416 4417 4418
        }
      }

H
Haojun Liao 已提交
4419 4420 4421 4422 4423 4424 4425
      /*
       * If the next time window still starts from current data block,
       * load the primary timestamp column first, and then find the start position for the next queried time window.
       * Note that only the primary timestamp column is required.
       * TODO: Optimize for this cases. All data blocks are not needed to be loaded, only if the first actually required
       * time window resides in current data block.
       */
4426 4427 4428 4429 4430 4431 4432
      if ((tw.skey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (tw.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
        SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

        tw = win;
        int32_t startPos =
H
Haojun Liao 已提交
4433
            getNextQualifiedWindow(pRuntimeEnv, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey, -1);
4434 4435 4436 4437
        assert(startPos >= 0);

        // set the abort info
        pQuery->pos = startPos;
H
hjxilinx 已提交
4438
        pTableQueryInfo->lastKey = ((TSKEY *)pColInfoData->pData)[startPos];
4439 4440
        pWindowResInfo->prevSKey = tw.skey;
        win = tw;
4441
      } else {
H
Haojun Liao 已提交
4442
        break;  // offset is not 0, and next time window begins or ends in the next block.
4443 4444 4445
      }
    }
  }
4446

H
Haojun Liao 已提交
4447 4448 4449 4450 4451
  // check for error
  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }

4452 4453 4454
  return true;
}

H
Haojun Liao 已提交
4455 4456
static void doDestroyTableQueryInfo(STableGroupInfo* pTableqinfoGroupInfo);

B
Bomin Zhang 已提交
4457
static int32_t setupQueryHandle(void* tsdb, SQInfo* pQInfo, bool isSTableQuery) {
B
Bomin Zhang 已提交
4458
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4459 4460
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

B
Bomin Zhang 已提交
4461
  if (onlyQueryTags(pQuery)) {
B
Bomin Zhang 已提交
4462
    return TSDB_CODE_SUCCESS;
B
Bomin Zhang 已提交
4463 4464
  }

H
Haojun Liao 已提交
4465
  if (isSTableQuery && (!QUERY_IS_INTERVAL_QUERY(pQuery)) && (!isFixedOutputQuery(pRuntimeEnv))) {
B
Bomin Zhang 已提交
4466
    return TSDB_CODE_SUCCESS;
B
Bomin Zhang 已提交
4467
  }
4468 4469

  STsdbQueryCond cond = {
B
Bomin Zhang 已提交
4470 4471 4472
    .order   = pQuery->order.order,
    .colList = pQuery->colList,
    .numOfCols = pQuery->numOfCols,
4473
  };
weixin_48148422's avatar
weixin_48148422 已提交
4474

S
TD-1057  
Shengliang Guan 已提交
4475 4476
  TIME_WINDOW_COPY(cond.twindow, pQuery->window);

B
Bomin Zhang 已提交
4477
  if (!isSTableQuery
4478
    && (pQInfo->tableqinfoGroupInfo.numOfTables == 1)
4479
    && (cond.order == TSDB_ORDER_ASC)
H
Haojun Liao 已提交
4480
    && (!QUERY_IS_INTERVAL_QUERY(pQuery))
B
Bomin Zhang 已提交
4481
    && (!isGroupbyNormalCol(pQuery->pGroupbyExpr))
H
Haojun Liao 已提交
4482
    && (!isFixedOutputQuery(pRuntimeEnv))
B
Bomin Zhang 已提交
4483
  ) {
H
Haojun Liao 已提交
4484
    SArray* pa = GET_TABLEGROUP(pQInfo, 0);
4485 4486
    STableQueryInfo* pCheckInfo = taosArrayGetP(pa, 0);
    cond.twindow = pCheckInfo->win;
4487
  }
B
Bomin Zhang 已提交
4488

B
Bomin Zhang 已提交
4489
  terrno = TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
4490
  if (isFirstLastRowQuery(pQuery)) {
4491
    pRuntimeEnv->pQueryHandle = tsdbQueryLastRow(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
H
Haojun Liao 已提交
4492

H
Haojun Liao 已提交
4493 4494 4495
    // update the query time window
    pQuery->window = cond.twindow;

H
Haojun Liao 已提交
4496
    if (pQInfo->tableGroupInfo.numOfTables == 0) {
H
Haojun Liao 已提交
4497
      pQInfo->tableqinfoGroupInfo.numOfTables = 0;
H
Haojun Liao 已提交
4498 4499 4500 4501
    } else {
      size_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
      for(int32_t i = 0; i < numOfGroups; ++i) {
        SArray *group = GET_TABLEGROUP(pQInfo, i);
H
Haojun Liao 已提交
4502

H
Haojun Liao 已提交
4503 4504 4505
        size_t t = taosArrayGetSize(group);
        for (int32_t j = 0; j < t; ++j) {
          STableQueryInfo *pCheckInfo = taosArrayGetP(group, j);
H
Haojun Liao 已提交
4506

H
Haojun Liao 已提交
4507 4508 4509
          pCheckInfo->win = pQuery->window;
          pCheckInfo->lastKey = pCheckInfo->win.skey;
        }
H
Haojun Liao 已提交
4510 4511
      }
    }
4512
  } else if (isPointInterpoQuery(pQuery)) {
4513
    pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
H
Haojun Liao 已提交
4514
  } else {
4515
    pRuntimeEnv->pQueryHandle = tsdbQueryTables(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
H
Haojun Liao 已提交
4516
  }
4517

B
Bomin Zhang 已提交
4518
  return terrno;
B
Bomin Zhang 已提交
4519 4520
}

4521 4522 4523
static SFillColInfo* taosCreateFillColInfo(SQuery* pQuery) {
  int32_t numOfCols = pQuery->numOfOutput;
  int32_t offset = 0;
4524

4525
  SFillColInfo* pFillCol = calloc(numOfCols, sizeof(SFillColInfo));
H
Haojun Liao 已提交
4526 4527 4528 4529
  if (pFillCol == NULL) {
    return NULL;
  }

4530 4531
  for(int32_t i = 0; i < numOfCols; ++i) {
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];
4532

4533
    pFillCol[i].col.bytes  = pExprInfo->bytes;
S
TD-1057  
Shengliang Guan 已提交
4534
    pFillCol[i].col.type   = (int8_t)pExprInfo->type;
4535 4536 4537
    pFillCol[i].col.offset = offset;
    pFillCol[i].flag       = TSDB_COL_NORMAL;    // always be ta normal column for table query
    pFillCol[i].functionId = pExprInfo->base.functionId;
4538
    pFillCol[i].fillVal.i = pQuery->fillVal[i];
4539

4540 4541
    offset += pExprInfo->bytes;
  }
4542

4543 4544 4545
  return pFillCol;
}

4546
int32_t doInitQInfo(SQInfo *pQInfo, STSBuf *pTsBuf, void *tsdb, int32_t vgId, bool isSTableQuery) {
4547 4548
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;

4549
  int32_t code = TSDB_CODE_SUCCESS;
4550
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
4551 4552 4553

  pRuntimeEnv->topBotQuery = isTopBottomQuery(pQuery);
  pRuntimeEnv->hasTagResults = hasTagValOutput(pQuery);
4554 4555

  setScanLimitationByResultBuffer(pQuery);
H
Haojun Liao 已提交
4556

B
Bomin Zhang 已提交
4557 4558 4559 4560
  code = setupQueryHandle(tsdb, pQInfo, isSTableQuery);
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }
4561

4562
  pQInfo->tsdb = tsdb;
4563
  pQInfo->vgId = vgId;
4564 4565

  pRuntimeEnv->pQuery = pQuery;
H
Haojun Liao 已提交
4566
  pRuntimeEnv->pTSBuf = pTsBuf;
4567
  pRuntimeEnv->cur.vgroupIndex = -1;
4568
  pRuntimeEnv->stableQuery = isSTableQuery;
H
Haojun Liao 已提交
4569
  pRuntimeEnv->prevGroupId = INT32_MIN;
H
Haojun Liao 已提交
4570
  pRuntimeEnv->groupbyNormalCol = isGroupbyNormalCol(pQuery->pGroupbyExpr);
4571

H
Haojun Liao 已提交
4572
  if (pTsBuf != NULL) {
4573 4574 4575 4576 4577 4578 4579 4580 4581 4582
    int16_t order = (pQuery->order.order == pRuntimeEnv->pTSBuf->tsOrder) ? TSDB_ORDER_ASC : TSDB_ORDER_DESC;
    tsBufSetTraverseOrder(pRuntimeEnv->pTSBuf, order);
  }

  // create runtime environment
  code = setupQueryRuntimeEnv(pRuntimeEnv, pQuery->order.order);
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }

4583 4584 4585
  int32_t ps = DEFAULT_PAGE_SIZE;
  int32_t rowsize = 0;
  getIntermediateBufInfo(pRuntimeEnv, &ps, &rowsize);
H
Haojun Liao 已提交
4586
  int32_t TWOMB = 1024*1024*2;
4587

H
Haojun Liao 已提交
4588
  if (isSTableQuery && !onlyQueryTags(pRuntimeEnv->pQuery)) {
H
Haojun Liao 已提交
4589
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rowsize, ps, TWOMB, pQInfo);
4590 4591 4592 4593
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

H
Haojun Liao 已提交
4594
    if (!QUERY_IS_INTERVAL_QUERY(pQuery)) {
4595
      int16_t type = TSDB_DATA_TYPE_NULL;
4596
      int32_t threshold = 0;
4597

H
Haojun Liao 已提交
4598
      if (pRuntimeEnv->groupbyNormalCol) {  // group by columns not tags;
4599
        type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
4600
        threshold = 4000;
4601 4602
      } else {
        type = TSDB_DATA_TYPE_INT;  // group id
S
TD-1057  
Shengliang Guan 已提交
4603
        threshold = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
4604 4605 4606
        if (threshold < 8) {
          threshold = 8;
        }
4607 4608
      }

4609
      code = initWindowResInfo(&pRuntimeEnv->windowResInfo, pRuntimeEnv, 8, threshold, type);
B
Bomin Zhang 已提交
4610 4611 4612
      if (code != TSDB_CODE_SUCCESS) {
        return code;
      }
4613
    }
H
Haojun Liao 已提交
4614
  } else if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
4615 4616
    int32_t numOfResultRows = getInitialPageNum(pQInfo);
    getIntermediateBufInfo(pRuntimeEnv, &ps, &rowsize);
H
Haojun Liao 已提交
4617
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rowsize, ps, TWOMB, pQInfo);
4618 4619 4620 4621 4622
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

    int16_t type = TSDB_DATA_TYPE_NULL;
H
Haojun Liao 已提交
4623
    if (pRuntimeEnv->groupbyNormalCol) {
4624 4625 4626 4627 4628
      type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
    } else {
      type = TSDB_DATA_TYPE_TIMESTAMP;
    }

4629
    code = initWindowResInfo(&pRuntimeEnv->windowResInfo, pRuntimeEnv, numOfResultRows, 4096, type);
B
Bomin Zhang 已提交
4630 4631 4632
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }
4633 4634
  }

4635
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
4636
    SFillColInfo* pColInfo = taosCreateFillColInfo(pQuery);
H
Haojun Liao 已提交
4637 4638 4639 4640 4641 4642
    STimeWindow w = TSWINDOW_INITIALIZER;

    TSKEY sk = MIN(pQuery->window.skey, pQuery->window.ekey);
    TSKEY ek = MAX(pQuery->window.skey, pQuery->window.ekey);
    getAlignQueryTimeWindow(pQuery, pQuery->window.skey, sk, ek, &w);

S
TD-1057  
Shengliang Guan 已提交
4643
    pRuntimeEnv->pFillInfo = taosInitFillInfo(pQuery->order.order, w.skey, 0, (int32_t)pQuery->rec.capacity, pQuery->numOfOutput,
4644 4645
                                              pQuery->interval.sliding, pQuery->interval.slidingUnit, (int8_t)pQuery->precision,
                                              pQuery->fillType, pColInfo);
4646
  }
4647

H
Haojun Liao 已提交
4648
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
4649
  return TSDB_CODE_SUCCESS;
4650 4651
}

4652
static void enableExecutionForNextTable(SQueryRuntimeEnv *pRuntimeEnv) {
4653
  SQuery *pQuery = pRuntimeEnv->pQuery;
4654

4655
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
4656 4657 4658 4659 4660 4661 4662
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[i]);
    if (pResInfo != NULL) {
      pResInfo->complete = false;
    }
  }
}

H
Haojun Liao 已提交
4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679
static FORCE_INLINE void setEnvForEachBlock(SQInfo* pQInfo, STableQueryInfo* pTableQueryInfo, SDataBlockInfo* pBlockInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

  if (!QUERY_IS_INTERVAL_QUERY(pQuery)) {
    setExecutionContext(pQInfo, pTableQueryInfo->groupIndex, pBlockInfo->window.ekey + step);
  } else {  // interval query
    TSKEY nextKey = pBlockInfo->window.skey;
    setIntervalQueryRange(pQInfo, nextKey);

    if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
      setAdditionalInfo(pQInfo, pTableQueryInfo->pTable, pTableQueryInfo);
    }
  }
}

H
Haojun Liao 已提交
4680
static int64_t scanMultiTableDataBlocks(SQInfo *pQInfo) {
4681
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
Haojun Liao 已提交
4682 4683
  SQuery*           pQuery = pRuntimeEnv->pQuery;
  SQueryCostInfo*   summary  = &pRuntimeEnv->summary;
H
Haojun Liao 已提交
4684

H
hjxilinx 已提交
4685
  int64_t st = taosGetTimestampMs();
4686

4687
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
H
Haojun Liao 已提交
4688
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4689

H
Haojun Liao 已提交
4690 4691
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

H
Haojun Liao 已提交
4692
  while (tsdbNextDataBlock(pQueryHandle)) {
4693
    summary->totalBlocks += 1;
H
Haojun Liao 已提交
4694

H
Haojun Liao 已提交
4695
    if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
4696
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4697
    }
4698

H
Haojun Liao 已提交
4699
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
H
Haojun Liao 已提交
4700 4701 4702 4703
    STableQueryInfo **pTableQueryInfo = (STableQueryInfo**) taosHashGet(pQInfo->tableqinfoGroupInfo.map, &blockInfo.tid, sizeof(blockInfo.tid));
    if(pTableQueryInfo == NULL) {
      break;
    }
4704

H
Haojun Liao 已提交
4705
    pQuery->current = *pTableQueryInfo;
H
Haojun Liao 已提交
4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      assert(
          ((*pTableQueryInfo)->win.skey <= (*pTableQueryInfo)->win.ekey) &&
          ((*pTableQueryInfo)->lastKey >= (*pTableQueryInfo)->win.skey) &&
          ((*pTableQueryInfo)->win.skey >= pQuery->window.skey && (*pTableQueryInfo)->win.ekey <= pQuery->window.ekey));
    } else {
      assert(
          ((*pTableQueryInfo)->win.skey >= (*pTableQueryInfo)->win.ekey) &&
          ((*pTableQueryInfo)->lastKey <= (*pTableQueryInfo)->win.skey) &&
          ((*pTableQueryInfo)->win.skey <= pQuery->window.skey && (*pTableQueryInfo)->win.ekey >= pQuery->window.ekey));
    }
4717

H
Haojun Liao 已提交
4718
    if (!pRuntimeEnv->groupbyNormalCol) {
H
Haojun Liao 已提交
4719
      setEnvForEachBlock(pQInfo, *pTableQueryInfo, &blockInfo);
4720
    }
4721

4722
    uint32_t     status = 0;
H
Haojun Liao 已提交
4723 4724
    SDataStatis *pStatis = NULL;
    SArray      *pDataBlock = NULL;
4725

H
Haojun Liao 已提交
4726
    int32_t ret = loadDataBlockOnDemand(pRuntimeEnv, &pQuery->current->windowResInfo, pQueryHandle, &blockInfo, &pStatis, &pDataBlock, &status);
4727 4728 4729 4730 4731
    if (ret != TSDB_CODE_SUCCESS) {
      break;
    }

    if (status == BLK_DATA_DISCARD) {
H
Haojun Liao 已提交
4732
      pQuery->current->lastKey = QUERY_IS_ASC_QUERY(pQuery)? blockInfo.window.ekey + step : blockInfo.window.skey + step;
H
Haojun Liao 已提交
4733 4734 4735
      continue;
    }

4736 4737
    summary->totalRows += blockInfo.rows;
    stableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, pDataBlock, binarySearchForKey);
H
Haojun Liao 已提交
4738

H
Haojun Liao 已提交
4739 4740 4741 4742
    qDebug("QInfo:%p check data block completed, uid:%"PRId64", tid:%d, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, "
           "lastKey:%" PRId64,
           pQInfo, blockInfo.uid, blockInfo.tid, blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows,
           pQuery->current->lastKey);
4743
  }
4744

H
Haojun Liao 已提交
4745 4746 4747 4748
  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }

H
Haojun Liao 已提交
4749 4750
  updateWindowResNumOfRes(pRuntimeEnv);

H
hjxilinx 已提交
4751 4752
  int64_t et = taosGetTimestampMs();
  return et - st;
4753 4754
}

4755 4756
static bool multiTableMultioutputHelper(SQInfo *pQInfo, int32_t index) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4757
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4758

4759
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
H
Haojun Liao 已提交
4760
  SArray *group = GET_TABLEGROUP(pQInfo, 0);
4761
  STableQueryInfo* pCheckInfo = taosArrayGetP(group, index);
4762

H
Haojun Liao 已提交
4763 4764 4765
  if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
    setTagVal(pRuntimeEnv, pCheckInfo->pTable, pQInfo->tsdb);
  }
4766

H
Haojun Liao 已提交
4767
  STableId* id = TSDB_TABLEID(pCheckInfo->pTable);
4768
  qDebug("QInfo:%p query on (%d): uid:%" PRIu64 ", tid:%d, qrange:%" PRId64 "-%" PRId64, pQInfo, index,
H
Haojun Liao 已提交
4769
         id->uid, id->tid, pCheckInfo->lastKey, pCheckInfo->win.ekey);
4770

4771
  STsdbQueryCond cond = {
4772
      .twindow   = {pCheckInfo->lastKey, pCheckInfo->win.ekey},
H
hjxilinx 已提交
4773 4774
      .order     = pQuery->order.order,
      .colList   = pQuery->colList,
4775
      .numOfCols = pQuery->numOfCols,
4776
  };
4777

H
hjxilinx 已提交
4778
  // todo refactor
4779
  SArray *g1 = taosArrayInit(1, POINTER_BYTES);
H
Haojun Liao 已提交
4780 4781 4782 4783
  SArray *tx = taosArrayInit(1, sizeof(STableKeyInfo));

  STableKeyInfo info = {.pTable = pCheckInfo->pTable, .lastKey = pCheckInfo->lastKey};
  taosArrayPush(tx, &info);
4784

4785
  taosArrayPush(g1, &tx);
4786
  STableGroupInfo gp = {.numOfTables = 1, .pGroupList = g1};
4787

4788
  // include only current table
4789 4790 4791 4792
  if (pRuntimeEnv->pQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
    pRuntimeEnv->pQueryHandle = NULL;
  }
4793

H
Haojun Liao 已提交
4794
  pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo);
4795 4796
  taosArrayDestroy(tx);
  taosArrayDestroy(g1);
B
Bomin Zhang 已提交
4797 4798 4799
  if (pRuntimeEnv->pQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
4800

4801
  if (pRuntimeEnv->pTSBuf != NULL) {
H
Haojun Liao 已提交
4802 4803
      tVariant* pTag = &pRuntimeEnv->pCtx[0].tag;

4804
    if (pRuntimeEnv->cur.vgroupIndex == -1) {
H
Haojun Liao 已提交
4805
      STSElem elem = tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, pQInfo->vgId, pTag);
H
Haojun Liao 已提交
4806
      // failed to find data with the specified tag value and vnodeId
4807
      if (elem.vnode < 0) {
H
Haojun Liao 已提交
4808 4809 4810 4811 4812 4813
        if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
          qError("QInfo:%p failed to find tag:%s in ts_comp", pQInfo, pTag->pz);
        } else {
          qError("QInfo:%p failed to find tag:%"PRId64" in ts_comp", pQInfo, pTag->i64Key);
        }

H
Haojun Liao 已提交
4814
        tVariantDestroy(&elem.tag);
4815
        return false;
H
Haojun Liao 已提交
4816 4817
      } else {
        STSCursor cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);
H
Haojun Liao 已提交
4818 4819 4820 4821 4822 4823 4824 4825

        if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
          qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz,
                 cur.blockIndex, cur.tsIndex);
        } else {
          qDebug("QInfo:%p find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key,
                 cur.blockIndex, cur.tsIndex);
        }
4826 4827
      }
    } else {
H
Haojun Liao 已提交
4828 4829
      STSElem elem = tsBufGetElem(pRuntimeEnv->pTSBuf);
      if (tVariantCompare(&elem.tag, &pRuntimeEnv->pCtx[0].tag) != 0) {
H
Haojun Liao 已提交
4830

H
Haojun Liao 已提交
4831
        STSElem elem1 = tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, pQInfo->vgId, pTag);
H
Haojun Liao 已提交
4832
        // failed to find data with the specified tag value and vnodeId
H
Haojun Liao 已提交
4833
        if (elem1.vnode < 0) {
H
Haojun Liao 已提交
4834 4835 4836 4837 4838
          if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
            qError("QInfo:%p failed to find tag:%s in ts_comp", pQInfo, pTag->pz);
          } else {
            qError("QInfo:%p failed to find tag:%"PRId64" in ts_comp", pQInfo, pTag->i64Key);
          }
H
Haojun Liao 已提交
4839 4840 4841 4842

          tVariantDestroy(&elem.tag);
          tVariantDestroy(&elem1.tag);

H
Haojun Liao 已提交
4843
          return false;
H
Haojun Liao 已提交
4844 4845
        } else {
          STSCursor cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);
H
Haojun Liao 已提交
4846 4847 4848 4849 4850
          if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
            qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, cur.blockIndex, cur.tsIndex);
          } else {
            qDebug("QInfo:%p find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key, cur.blockIndex, cur.tsIndex);
          }
H
Haojun Liao 已提交
4851
        }
H
Haojun Liao 已提交
4852 4853

        tVariantDestroy(&elem1.tag);
H
Haojun Liao 已提交
4854 4855
      } else {
        tsBufSetCursor(pRuntimeEnv->pTSBuf, &pRuntimeEnv->cur);
H
Haojun Liao 已提交
4856
        STSCursor cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);
H
Haojun Liao 已提交
4857 4858 4859 4860 4861
        if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
          qDebug("QInfo:%p continue scan ts_comp file, tag:%s blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, cur.blockIndex, cur.tsIndex);
        } else {
          qDebug("QInfo:%p continue scan ts_comp file, tag:%"PRId64" blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key, cur.blockIndex, cur.tsIndex);
        }
H
Haojun Liao 已提交
4862
      }
H
Haojun Liao 已提交
4863 4864

      tVariantDestroy(&elem.tag);
4865 4866
    }
  }
4867

4868
  initCtxOutputBuf(pRuntimeEnv);
4869 4870 4871 4872 4873 4874 4875 4876 4877 4878
  return true;
}

/**
 * super table query handler
 * 1. super table projection query, group-by on normal columns query, ts-comp query
 * 2. point interpolation query, last row query
 *
 * @param pQInfo
 */
4879
static void sequentialTableProcess(SQInfo *pQInfo) {
4880
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4881
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4882
  setQueryStatus(pQuery, QUERY_COMPLETED);
4883

H
Haojun Liao 已提交
4884
  size_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
4885

H
Haojun Liao 已提交
4886
  if (isPointInterpoQuery(pQuery) || isFirstLastRowQuery(pQuery)) {
4887 4888
    resetCtxOutputBuf(pRuntimeEnv);
    assert(pQuery->limit.offset == 0 && pQuery->limit.limit != 0);
4889

4890
    while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
4891
      SArray* group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex);
4892

S
TD-1057  
Shengliang Guan 已提交
4893
      qDebug("QInfo:%p last_row query on group:%d, total group:%" PRIzu ", current group:%p", pQInfo, pQInfo->groupIndex,
dengyihao's avatar
dengyihao 已提交
4894
             numOfGroups, group);
H
Haojun Liao 已提交
4895 4896 4897 4898 4899 4900 4901

      STsdbQueryCond cond = {
          .colList = pQuery->colList,
          .order   = pQuery->order.order,
          .numOfCols = pQuery->numOfCols,
      };

S
TD-1057  
Shengliang Guan 已提交
4902 4903
      TIME_WINDOW_COPY(cond.twindow, pQuery->window);

H
Haojun Liao 已提交
4904 4905 4906
      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);
4907

H
Haojun Liao 已提交
4908 4909 4910 4911 4912 4913 4914
      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }
4915

4916
      if (isFirstLastRowQuery(pQuery)) {
H
Haojun Liao 已提交
4917
        assert(0);  // last_row query switch to other routine to handle
H
Haojun Liao 已提交
4918
      } else {
H
Haojun Liao 已提交
4919
        pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(pQInfo->tsdb, &cond, &gp, pQInfo);
4920
      }
B
Bomin Zhang 已提交
4921 4922 4923 4924 4925 4926

      taosArrayDestroy(tx);
      taosArrayDestroy(g1);
      if (pRuntimeEnv->pQueryHandle == NULL) {
        longjmp(pRuntimeEnv->env, terrno);
      }
H
Haojun Liao 已提交
4927

H
Haojun Liao 已提交
4928
      initCtxOutputBuf(pRuntimeEnv);
4929

4930
      SArray* s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle);
4931
      assert(taosArrayGetSize(s) >= 1);
4932

4933
      setTagVal(pRuntimeEnv, taosArrayGetP(s, 0), pQInfo->tsdb);
4934 4935 4936
      if (isFirstLastRowQuery(pQuery)) {
        assert(taosArrayGetSize(s) == 1);
      }
H
Haojun Liao 已提交
4937

dengyihao's avatar
dengyihao 已提交
4938
      taosArrayDestroy(s);
H
Haojun Liao 已提交
4939

H
Haojun Liao 已提交
4940
      // here we simply set the first table as current table
4941 4942 4943
      SArray* first = GET_TABLEGROUP(pQInfo, pQInfo->groupIndex);
      pQuery->current = taosArrayGetP(first, 0);

4944
      scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
4945

H
Haojun Liao 已提交
4946 4947 4948 4949 4950
      int64_t numOfRes = getNumOfResult(pRuntimeEnv);
      if (numOfRes > 0) {
        pQuery->rec.rows += numOfRes;
        forwardCtxOutputBuf(pRuntimeEnv, numOfRes);
      }
4951

H
Haojun Liao 已提交
4952 4953 4954 4955 4956
      skipResults(pRuntimeEnv);
      pQInfo->groupIndex += 1;

      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
4957 4958 4959 4960 4961 4962

      if (pQuery->rec.rows >= pQuery->rec.capacity) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
        break;
      }
    }
H
Haojun Liao 已提交
4963
  } else if (pRuntimeEnv->groupbyNormalCol) { // group-by on normal columns query
4964
    while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
4965
      SArray* group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex);
4966

S
TD-1057  
Shengliang Guan 已提交
4967
      qDebug("QInfo:%p group by normal columns group:%d, total group:%" PRIzu "", pQInfo, pQInfo->groupIndex, numOfGroups);
4968 4969 4970 4971 4972 4973 4974

      STsdbQueryCond cond = {
          .colList = pQuery->colList,
          .order   = pQuery->order.order,
          .numOfCols = pQuery->numOfCols,
      };

S
TD-1057  
Shengliang Guan 已提交
4975 4976
      TIME_WINDOW_COPY(cond.twindow, pQuery->window);

4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988
      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);

      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }

H
Haojun Liao 已提交
4989
      // no need to update the lastkey for each table
4990
      pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo);
H
Haojun Liao 已提交
4991

B
Bomin Zhang 已提交
4992 4993
      taosArrayDestroy(g1);
      taosArrayDestroy(tx);
B
Bomin Zhang 已提交
4994 4995 4996
      if (pRuntimeEnv->pQueryHandle == NULL) {
        longjmp(pRuntimeEnv->env, terrno);
      }
4997

4998
      SArray* s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle);
4999 5000
      assert(taosArrayGetSize(s) >= 1);

5001
      setTagVal(pRuntimeEnv, taosArrayGetP(s, 0), pQInfo->tsdb);
5002 5003 5004 5005 5006 5007 5008 5009

      // here we simply set the first table as current table
      scanMultiTableDataBlocks(pQInfo);
      pQInfo->groupIndex += 1;

      SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;

        // no results generated for current group, continue to try the next group
5010
      taosArrayDestroy(s);
5011 5012 5013 5014 5015
      if (pWindowResInfo->size <= 0) {
        continue;
      }

      for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
5016
        pWindowResInfo->pResult[i].closed = true; // enable return all results for group by normal columns
5017 5018 5019

        SWindowResult *pResult = &pWindowResInfo->pResult[i];
        for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
5020
          pResult->numOfRows = (uint16_t)(MAX(pResult->numOfRows, pResult->resultInfo[j].numOfRes));
5021 5022 5023
        }
      }

5024
      qDebug("QInfo:%p generated groupby columns results %d rows for group %d completed", pQInfo, pWindowResInfo->size,
5025 5026 5027 5028 5029 5030 5031
          pQInfo->groupIndex);
      int32_t currentGroupIndex = pQInfo->groupIndex;

      pQuery->rec.rows = 0;
      pQInfo->groupIndex = 0;

      ensureOutputBufferSimple(pRuntimeEnv, pWindowResInfo->size);
5032
      copyFromWindowResToSData(pQInfo, pWindowResInfo);
5033 5034 5035 5036 5037 5038

      pQInfo->groupIndex = currentGroupIndex;  //restore the group index
      assert(pQuery->rec.rows == pWindowResInfo->size);

      clearClosedTimeWindow(pRuntimeEnv);
      break;
5039 5040 5041
    }
  } else {
    /*
5042
     * 1. super table projection query, 2. ts-comp query
5043 5044 5045
     * if the subgroup index is larger than 0, results generated by group by tbname,k is existed.
     * we need to return it to client in the first place.
     */
5046
    if (pQInfo->groupIndex > 0) {
5047
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5048
      pQuery->rec.total += pQuery->rec.rows;
5049

5050
      if (pQuery->rec.rows > 0) {
5051 5052 5053
        return;
      }
    }
5054

5055
    // all data have returned already
5056
    if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
5057 5058
      return;
    }
5059

5060 5061
    resetCtxOutputBuf(pRuntimeEnv);
    resetTimeWindowInfo(pRuntimeEnv, &pRuntimeEnv->windowResInfo);
5062

H
Haojun Liao 已提交
5063
    SArray *group = GET_TABLEGROUP(pQInfo, 0);
5064 5065
    assert(taosArrayGetSize(group) == pQInfo->tableqinfoGroupInfo.numOfTables &&
           1 == taosArrayGetSize(pQInfo->tableqinfoGroupInfo.pGroupList));
5066

5067
    while (pQInfo->tableIndex < pQInfo->tableqinfoGroupInfo.numOfTables) {
H
Haojun Liao 已提交
5068
      if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
5069
        longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
5070
      }
5071

5072
      pQuery->current = taosArrayGetP(group, pQInfo->tableIndex);
5073
      if (!multiTableMultioutputHelper(pQInfo, pQInfo->tableIndex)) {
5074
        pQInfo->tableIndex++;
5075 5076
        continue;
      }
5077

H
hjxilinx 已提交
5078
      // TODO handle the limit offset problem
5079
      if (pQuery->numOfFilterCols == 0 && pQuery->limit.offset > 0) {
5080 5081
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
          pQInfo->tableIndex++;
5082 5083 5084
          continue;
        }
      }
5085

5086
      scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
5087
      skipResults(pRuntimeEnv);
5088

5089
      // the limitation of output result is reached, set the query completed
5090
      if (limitResults(pRuntimeEnv)) {
H
Haojun Liao 已提交
5091
        SET_STABLE_QUERY_OVER(pQInfo);
5092 5093
        break;
      }
5094

5095 5096
      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
5097

5098
      if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
5099 5100 5101 5102 5103 5104
        /*
         * query range is identical in terms of all meters involved in query,
         * so we need to restore them at the *beginning* of query on each meter,
         * not the consecutive query on meter on which is aborted due to buffer limitation
         * to ensure that, we can reset the query range once query on a meter is completed.
         */
5105
        pQInfo->tableIndex++;
weixin_48148422's avatar
weixin_48148422 已提交
5106

H
Haojun Liao 已提交
5107
        STableIdInfo tidInfo = {0};
5108

H
Haojun Liao 已提交
5109 5110 5111
        STableId* id = TSDB_TABLEID(pQuery->current->pTable);
        tidInfo.uid = id->uid;
        tidInfo.tid = id->tid;
weixin_48148422's avatar
weixin_48148422 已提交
5112
        tidInfo.key = pQuery->current->lastKey;
weixin_48148422's avatar
weixin_48148422 已提交
5113 5114
        taosArrayPush(pQInfo->arrTableIdInfo, &tidInfo);

5115
        // if the buffer is full or group by each table, we need to jump out of the loop
H
Haojun Liao 已提交
5116
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
5117 5118
          break;
        }
5119

H
Haojun Liao 已提交
5120 5121 5122 5123
        if (pRuntimeEnv->pTSBuf != NULL) {
          pRuntimeEnv->cur = pRuntimeEnv->pTSBuf->cur;
        }

5124
      } else {
5125
        // all data in the result buffer are skipped due to the offset, continue to retrieve data from current meter
5126 5127
        if (pQuery->rec.rows == 0) {
          assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
5128 5129
          continue;
        } else {
5130 5131 5132
          // buffer is full, wait for the next round to retrieve data from current meter
          assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
          break;
5133 5134 5135
        }
      }
    }
H
Haojun Liao 已提交
5136

5137
    if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
H
Haojun Liao 已提交
5138 5139
      setQueryStatus(pQuery, QUERY_COMPLETED);
    }
5140
  }
5141

5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153
  /*
   * 1. super table projection query, group-by on normal columns query, ts-comp query
   * 2. point interpolation query, last row query
   *
   * group-by on normal columns query and last_row query do NOT invoke the finalizer here,
   * since the finalize stage will be done at the client side.
   *
   * projection query, point interpolation query do not need the finalizer.
   *
   * Only the ts-comp query requires the finalizer function to be executed here.
   */
  if (isTSCompQuery(pQuery)) {
H
hjxilinx 已提交
5154
    finalizeQueryResult(pRuntimeEnv);
5155
  }
5156

5157 5158 5159
  if (pRuntimeEnv->pTSBuf != NULL) {
    pRuntimeEnv->cur = pRuntimeEnv->pTSBuf->cur;
  }
5160

5161
  qDebug(
S
TD-1530  
Shengliang Guan 已提交
5162 5163
      "QInfo %p numOfTables:%" PRIu64 ", index:%d, numOfGroups:%" PRIzu ", %" PRId64 " points returned, total:%" PRId64 ", offset:%" PRId64,
      pQInfo, (uint64_t)pQInfo->tableqinfoGroupInfo.numOfTables, pQInfo->tableIndex, numOfGroups, pQuery->rec.rows, pQuery->rec.total,
5164
      pQuery->limit.offset);
5165 5166
}

5167 5168 5169 5170
static void doSaveContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

5171 5172 5173
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
  SWITCH_ORDER(pQuery->order.order);
5174

5175
  if (pRuntimeEnv->pTSBuf != NULL) {
5176
    pRuntimeEnv->pTSBuf->cur.order = pQuery->order.order;
5177
  }
5178

5179 5180 5181 5182 5183
  STsdbQueryCond cond = {
      .order   = pQuery->order.order,
      .colList = pQuery->colList,
      .numOfCols = pQuery->numOfCols,
  };
H
Haojun Liao 已提交
5184

S
TD-1057  
Shengliang Guan 已提交
5185 5186
  TIME_WINDOW_COPY(cond.twindow, pQuery->window);

5187 5188 5189 5190
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
H
Haojun Liao 已提交
5191

H
Haojun Liao 已提交
5192 5193 5194 5195 5196
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
  disableFuncInReverseScan(pQInfo);
  setupQueryRangeForReverseScan(pQInfo);

H
Haojun Liao 已提交
5197
  pRuntimeEnv->prevGroupId = INT32_MIN;
5198
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
B
Bomin Zhang 已提交
5199 5200 5201
  if (pRuntimeEnv->pSecQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
H
hjxilinx 已提交
5202 5203
}

5204 5205 5206 5207
static void doRestoreContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

H
hjxilinx 已提交
5208
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
5209

5210
  if (pRuntimeEnv->pTSBuf != NULL) {
5211
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
5212
  }
5213

5214
  switchCtxOrder(pRuntimeEnv);
5215 5216 5217
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
}

5218 5219 5220
static void doCloseAllTimeWindowAfterScan(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
Haojun Liao 已提交
5221
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
H
Haojun Liao 已提交
5222
    size_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
5223
    for (int32_t i = 0; i < numOfGroup; ++i) {
H
Haojun Liao 已提交
5224
      SArray *group = GET_TABLEGROUP(pQInfo, i);
5225

5226
      size_t num = taosArrayGetSize(group);
5227
      for (int32_t j = 0; j < num; ++j) {
5228 5229
        STableQueryInfo* item = taosArrayGetP(group, j);
        closeAllTimeWindow(&item->windowResInfo);
5230
      }
H
hjxilinx 已提交
5231 5232 5233 5234 5235 5236 5237
    }
  } else {  // close results for group result
    closeAllTimeWindow(&pQInfo->runtimeEnv.windowResInfo);
  }
}

static void multiTableQueryProcess(SQInfo *pQInfo) {
5238 5239 5240
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

5241
  if (pQInfo->groupIndex > 0) {
5242
    /*
5243
     * if the groupIndex > 0, the query process must be completed yet, we only need to
5244 5245
     * copy the data into output buffer
     */
H
Haojun Liao 已提交
5246
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
5247 5248
      copyResToQueryResultBuf(pQInfo, pQuery);
#ifdef _DEBUG_VIEW
5249
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
5250 5251
#endif
    } else {
5252
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5253
    }
5254

5255
    qDebug("QInfo:%p current:%"PRId64", total:%"PRId64"", pQInfo, pQuery->rec.rows, pQuery->rec.total);
5256 5257
    return;
  }
5258

5259
  qDebug("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", order:%d, forward scan start", pQInfo,
5260 5261
         pQuery->window.skey, pQuery->window.ekey, pQuery->order.order);

H
hjxilinx 已提交
5262
  // do check all qualified data blocks
H
Haojun Liao 已提交
5263
  int64_t el = scanMultiTableDataBlocks(pQInfo);
5264
  qDebug("QInfo:%p master scan completed, elapsed time: %" PRId64 "ms, reverse scan start", pQInfo, el);
5265

H
hjxilinx 已提交
5266
  // query error occurred or query is killed, abort current execution
H
Haojun Liao 已提交
5267
  if (pQInfo->code != TSDB_CODE_SUCCESS || IS_QUERY_KILLED(pQInfo)) {
5268
    qDebug("QInfo:%p query killed or error occurred, code:%s, abort", pQInfo, tstrerror(pQInfo->code));
H
Haojun Liao 已提交
5269 5270
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
5271
  }
5272

H
hjxilinx 已提交
5273 5274
  // close all time window results
  doCloseAllTimeWindowAfterScan(pQInfo);
5275

H
hjxilinx 已提交
5276 5277
  if (needReverseScan(pQuery)) {
    doSaveContext(pQInfo);
5278

H
Haojun Liao 已提交
5279
    el = scanMultiTableDataBlocks(pQInfo);
5280
    qDebug("QInfo:%p reversed scan completed, elapsed time: %" PRId64 "ms", pQInfo, el);
5281

H
Haojun Liao 已提交
5282
    doRestoreContext(pQInfo);
H
hjxilinx 已提交
5283
  } else {
5284
    qDebug("QInfo:%p no need to do reversed scan, query completed", pQInfo);
5285
  }
5286

5287
  setQueryStatus(pQuery, QUERY_COMPLETED);
5288

H
Haojun Liao 已提交
5289
  if (pQInfo->code != TSDB_CODE_SUCCESS || IS_QUERY_KILLED(pQInfo)) {
5290
    qDebug("QInfo:%p query killed or error occurred, code:%s, abort", pQInfo, tstrerror(pQInfo->code));
H
Haojun Liao 已提交
5291 5292
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
H
hjxilinx 已提交
5293
  }
5294

H
Haojun Liao 已提交
5295
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || isSumAvgRateQuery(pQuery)) {
5296
    if (mergeIntoGroupResult(pQInfo) == TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
5297
      copyResToQueryResultBuf(pQInfo, pQuery);
5298 5299

#ifdef _DEBUG_VIEW
5300
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
5301 5302 5303
#endif
    }
  } else {  // not a interval query
5304
    copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5305
  }
5306

5307
  // handle the limitation of output buffer
5308
  qDebug("QInfo:%p points returned:%" PRId64 ", total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
5309 5310 5311 5312 5313 5314 5315 5316
}

/*
 * in each query, this function will be called only once, no retry for further result.
 *
 * select count(*)/top(field,k)/avg(field name) from table_name [where ts>now-1a];
 * select count(*) from table_name group by status_column;
 */
H
hjxilinx 已提交
5317
static void tableFixedOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
5318
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
5319

H
hjxilinx 已提交
5320
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
5321
  if (!pRuntimeEnv->topBotQuery && pQuery->limit.offset > 0) {  // no need to execute, since the output will be ignore.
H
Haojun Liao 已提交
5322 5323
    return;
  }
5324

H
hjxilinx 已提交
5325
  pQuery->current = pTableInfo;  // set current query table info
5326

5327
  scanOneTableDataBlocks(pRuntimeEnv, pTableInfo->lastKey);
H
hjxilinx 已提交
5328
  finalizeQueryResult(pRuntimeEnv);
5329

H
Haojun Liao 已提交
5330
  if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
5331 5332
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
5333
  }
5334

H
Haojun Liao 已提交
5335
  // since the numOfRows must be identical for all sql functions that are allowed to be executed simutaneously.
5336
  pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
5337

5338
  skipResults(pRuntimeEnv);
5339
  limitResults(pRuntimeEnv);
5340 5341
}

H
hjxilinx 已提交
5342
static void tableMultiOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
5343
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
5344

H
hjxilinx 已提交
5345 5346
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
5347

5348 5349 5350 5351
  // for ts_comp query, re-initialized is not allowed
  if (!isTSCompQuery(pQuery)) {
    resetCtxOutputBuf(pRuntimeEnv);
  }
5352

5353 5354 5355 5356 5357 5358
  // skip blocks without load the actual data block from file if no filter condition present
  skipBlocks(&pQInfo->runtimeEnv);
  if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return;
  }
5359 5360

  while (1) {
5361
    scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
H
hjxilinx 已提交
5362
    finalizeQueryResult(pRuntimeEnv);
5363

5364 5365
    pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
    if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols > 0 && pQuery->rec.rows > 0) {
5366
      skipResults(pRuntimeEnv);
5367 5368 5369
    }

    /*
H
hjxilinx 已提交
5370 5371
     * 1. if pQuery->size == 0, pQuery->limit.offset >= 0, still need to check data
     * 2. if pQuery->size > 0, pQuery->limit.offset must be 0
5372
     */
5373
    if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
5374 5375 5376
      break;
    }

5377
    qDebug("QInfo:%p skip current result, offset:%" PRId64 ", next qrange:%" PRId64 "-%" PRId64,
B
Bomin Zhang 已提交
5378
           pQInfo, pQuery->limit.offset, pQuery->current->lastKey, pQuery->current->win.ekey);
5379 5380 5381 5382

    resetCtxOutputBuf(pRuntimeEnv);
  }

5383
  limitResults(pRuntimeEnv);
5384
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
5385
    qDebug("QInfo:%p query paused due to output limitation, next qrange:%" PRId64 "-%" PRId64, pQInfo,
H
hjxilinx 已提交
5386
        pQuery->current->lastKey, pQuery->window.ekey);
weixin_48148422's avatar
weixin_48148422 已提交
5387 5388
  } else if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
    STableIdInfo tidInfo;
B
Bomin Zhang 已提交
5389
    STableId* id = TSDB_TABLEID(pQuery->current->pTable);
5390

H
Haojun Liao 已提交
5391 5392
    tidInfo.uid = id->uid;
    tidInfo.tid = id->tid;
weixin_48148422's avatar
weixin_48148422 已提交
5393 5394
    tidInfo.key = pQuery->current->lastKey;
    taosArrayPush(pQInfo->arrTableIdInfo, &tidInfo);
5395 5396
  }

5397 5398 5399
  if (!isTSCompQuery(pQuery)) {
    assert(pQuery->rec.rows <= pQuery->rec.capacity);
  }
5400 5401
}

H
Haojun Liao 已提交
5402
static void tableIntervalProcessImpl(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
5403
  SQuery *pQuery = pRuntimeEnv->pQuery;
5404

5405
  while (1) {
5406
    scanOneTableDataBlocks(pRuntimeEnv, start);
5407

5408
    assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_NOT_COMPLETED));
H
hjxilinx 已提交
5409
    finalizeQueryResult(pRuntimeEnv);
5410

5411 5412 5413
    // here we can ignore the records in case of no interpolation
    // todo handle offset, in case of top/bottom interval query
    if ((pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL) && pQuery->limit.offset > 0 &&
5414
        pQuery->fillType == TSDB_FILL_NONE) {
5415 5416
      // maxOutput <= 0, means current query does not generate any results
      int32_t numOfClosed = numOfClosedTimeWindow(&pRuntimeEnv->windowResInfo);
5417

S
TD-1057  
Shengliang Guan 已提交
5418
      int32_t c = (int32_t)(MIN(numOfClosed, pQuery->limit.offset));
5419 5420 5421
      clearFirstNTimeWindow(pRuntimeEnv, c);
      pQuery->limit.offset -= c;
    }
5422

5423
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED | QUERY_RESBUF_FULL)) {
5424 5425 5426 5427 5428
      break;
    }
  }
}

5429
// handle time interval query on table
H
hjxilinx 已提交
5430
static void tableIntervalProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
5431 5432
  SQueryRuntimeEnv *pRuntimeEnv = &(pQInfo->runtimeEnv);

H
hjxilinx 已提交
5433 5434
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
5435

H
Haojun Liao 已提交
5436
  int32_t numOfFilled = 0;
H
Haojun Liao 已提交
5437
  TSKEY newStartKey = TSKEY_INITIAL_VAL;
5438

5439
  // skip blocks without load the actual data block from file if no filter condition present
H
Haojun Liao 已提交
5440
  skipTimeInterval(pRuntimeEnv, &newStartKey);
5441
  if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0 && pRuntimeEnv->pFillInfo == NULL) {
5442 5443 5444 5445
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return;
  }

5446
  while (1) {
H
Haojun Liao 已提交
5447
    tableIntervalProcessImpl(pRuntimeEnv, newStartKey);
5448

H
Haojun Liao 已提交
5449
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
5450
      pQInfo->groupIndex = 0;  // always start from 0
5451
      pQuery->rec.rows = 0;
5452
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5453

5454
      clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
5455
    }
5456

5457
    // the offset is handled at prepare stage if no interpolation involved
5458
    if (pQuery->fillType == TSDB_FILL_NONE || pQuery->rec.rows == 0) {
5459
      limitResults(pRuntimeEnv);
5460 5461
      break;
    } else {
S
TD-1057  
Shengliang Guan 已提交
5462
      taosFillSetStartInfo(pRuntimeEnv->pFillInfo, (int32_t)pQuery->rec.rows, pQuery->window.ekey);
5463
      taosFillCopyInputDataFromFilePage(pRuntimeEnv->pFillInfo, (tFilePage**) pQuery->sdata);
H
Haojun Liao 已提交
5464
      numOfFilled = 0;
5465

H
Haojun Liao 已提交
5466
      pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, &numOfFilled);
5467
      if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
5468
        limitResults(pRuntimeEnv);
5469 5470
        break;
      }
5471

5472
      // no result generated yet, continue retrieve data
5473
      pQuery->rec.rows = 0;
5474 5475
    }
  }
5476

5477
  // all data scanned, the group by normal column can return
H
Haojun Liao 已提交
5478
  if (pRuntimeEnv->groupbyNormalCol) {  // todo refactor with merge interval time result
5479
    pQInfo->groupIndex = 0;
5480
    pQuery->rec.rows = 0;
5481
    copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5482
    clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
5483 5484 5485
  }
}

5486 5487 5488 5489
static void tableQueryImpl(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

H
Haojun Liao 已提交
5490
  if (queryHasRemainResForTableQuery(pRuntimeEnv)) {
5491

H
Haojun Liao 已提交
5492 5493 5494 5495 5496 5497 5498 5499 5500 5501 5502 5503
    if (pQuery->fillType != TSDB_FILL_NONE) {
      /*
       * There are remain results that are not returned due to result interpolation
       * So, we do keep in this procedure instead of launching retrieve procedure for next results.
       */
      int32_t numOfFilled = 0;
      pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, &numOfFilled);

      if (pQuery->rec.rows > 0) {
        limitResults(pRuntimeEnv);
      }

H
Haojun Liao 已提交
5504
      qDebug("QInfo:%p current:%" PRId64 " returned, total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total);
5505
      return;
H
Haojun Liao 已提交
5506
    } else {
5507
      pQuery->rec.rows = 0;
5508
      pQInfo->groupIndex = 0;  // always start from 0
5509

5510
      if (pRuntimeEnv->windowResInfo.size > 0) {
5511
        copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5512
        clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
5513

5514
        if (pQuery->rec.rows > 0) {
5515
          qDebug("QInfo:%p %"PRId64" rows returned from group results, total:%"PRId64"", pQInfo, pQuery->rec.rows, pQuery->rec.total);
H
Haojun Liao 已提交
5516 5517 5518

          // there are not data remains
          if (pRuntimeEnv->windowResInfo.size <= 0) {
H
Haojun Liao 已提交
5519
            qDebug("QInfo:%p query over, %"PRId64" rows are returned", pQInfo, pQuery->rec.total);
H
Haojun Liao 已提交
5520 5521
          }

5522 5523 5524 5525 5526
          return;
        }
      }
    }
  }
5527

H
hjxilinx 已提交
5528
  // number of points returned during this query
5529
  pQuery->rec.rows = 0;
5530
  int64_t st = taosGetTimestampUs();
5531

5532
  assert(pQInfo->tableqinfoGroupInfo.numOfTables == 1);
H
Haojun Liao 已提交
5533
  SArray* g = GET_TABLEGROUP(pQInfo, 0);
5534
  STableQueryInfo* item = taosArrayGetP(g, 0);
5535

5536
  // group by normal column, sliding window query, interval query are handled by interval query processor
H
Haojun Liao 已提交
5537
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyNormalCol) {  // interval (down sampling operation)
5538
    tableIntervalProcess(pQInfo, item);
H
Haojun Liao 已提交
5539
  } else if (isFixedOutputQuery(pRuntimeEnv)) {
5540
    tableFixedOutputProcess(pQInfo, item);
5541 5542
  } else {  // diff/add/multiply/subtract/division
    assert(pQuery->checkBuffer == 1);
5543
    tableMultiOutputProcess(pQInfo, item);
5544
  }
5545

5546
  // record the total elapsed time
5547
  pRuntimeEnv->summary.elapsedTime += (taosGetTimestampUs() - st);
5548
  assert(pQInfo->tableqinfoGroupInfo.numOfTables == 1);
5549 5550
}

5551
static void stableQueryImpl(SQInfo *pQInfo) {
H
Haojun Liao 已提交
5552 5553
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *pQuery = pRuntimeEnv->pQuery;
5554
  pQuery->rec.rows = 0;
5555

5556
  int64_t st = taosGetTimestampUs();
5557

H
Haojun Liao 已提交
5558
  if (QUERY_IS_INTERVAL_QUERY(pQuery) ||
5559
      (isFixedOutputQuery(pRuntimeEnv) && (!isPointInterpoQuery(pQuery)) && (!pRuntimeEnv->groupbyNormalCol))) {
H
hjxilinx 已提交
5560
    multiTableQueryProcess(pQInfo);
5561
  } else {
5562
    assert((pQuery->checkBuffer == 1 && pQuery->interval.interval == 0) || isPointInterpoQuery(pQuery) ||
H
Haojun Liao 已提交
5563
            isFirstLastRowQuery(pQuery) || pRuntimeEnv->groupbyNormalCol);
5564

5565
    sequentialTableProcess(pQInfo);
5566
  }
5567

H
hjxilinx 已提交
5568
  // record the total elapsed time
5569
  pQInfo->runtimeEnv.summary.elapsedTime += (taosGetTimestampUs() - st);
H
hjxilinx 已提交
5570 5571
}

5572
static int32_t getColumnIndexInSource(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
5573
  int32_t j = 0;
5574

5575
  if (TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) {
H
Haojun Liao 已提交
5576
    if (pExprMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
5577
      return TSDB_TBNAME_COLUMN_INDEX;
H
Haojun Liao 已提交
5578 5579
    }

5580 5581 5582 5583
    while(j < pQueryMsg->numOfTags) {
      if (pExprMsg->colInfo.colId == pTagCols[j].colId) {
        return j;
      }
5584

5585 5586
      j += 1;
    }
5587

Y
TD-1230  
yihaoDeng 已提交
5588
  } else if (TSDB_COL_IS_UD_COL(pExprMsg->colInfo.flag)) {  // user specified column data
H
Haojun Liao 已提交
5589
    return TSDB_UD_COLUMN_INDEX;
5590 5591 5592 5593 5594
  } else {
    while (j < pQueryMsg->numOfCols) {
      if (pExprMsg->colInfo.colId == pQueryMsg->colList[j].colId) {
        return j;
      }
5595

5596
      j += 1;
5597 5598
    }
  }
5599
  assert(0);
5600
  return -1;
5601 5602
}

5603 5604 5605
bool validateExprColumnInfo(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
  int32_t j = getColumnIndexInSource(pQueryMsg, pExprMsg, pTagCols);
  return j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags;
5606 5607
}

5608
static bool validateQueryMsg(SQueryTableMsg *pQueryMsg) {
5609 5610
  if (pQueryMsg->interval.interval < 0) {
    qError("qmsg:%p illegal value of interval time %" PRId64, pQueryMsg, pQueryMsg->interval.interval);
5611
    return false;
5612 5613
  }

H
hjxilinx 已提交
5614
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
5615
    qError("qmsg:%p illegal value of numOfTables %d", pQueryMsg, pQueryMsg->numOfTables);
5616
    return false;
5617 5618
  }

H
hjxilinx 已提交
5619
  if (pQueryMsg->numOfGroupCols < 0) {
S
slguan 已提交
5620
    qError("qmsg:%p illegal value of numOfGroupbyCols %d", pQueryMsg, pQueryMsg->numOfGroupCols);
5621
    return false;
5622 5623
  }

5624 5625
  if (pQueryMsg->numOfOutput > TSDB_MAX_COLUMNS || pQueryMsg->numOfOutput <= 0) {
    qError("qmsg:%p illegal value of output columns %d", pQueryMsg, pQueryMsg->numOfOutput);
5626
    return false;
5627 5628
  }

5629 5630 5631 5632 5633 5634 5635 5636 5637 5638
  return true;
}

static bool validateQuerySourceCols(SQueryTableMsg *pQueryMsg, SSqlFuncMsg** pExprMsg) {
  int32_t numOfTotal = pQueryMsg->numOfCols + pQueryMsg->numOfTags;
  if (pQueryMsg->numOfCols < 0 || pQueryMsg->numOfTags < 0 || numOfTotal > TSDB_MAX_COLUMNS) {
    qError("qmsg:%p illegal value of numOfCols %d numOfTags:%d", pQueryMsg, pQueryMsg->numOfCols, pQueryMsg->numOfTags);
    return false;
  } else if (numOfTotal == 0) {
    for(int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
H
Haojun Liao 已提交
5639 5640 5641 5642 5643
      SSqlFuncMsg* pFuncMsg = pExprMsg[i];

      if ((pFuncMsg->functionId == TSDB_FUNC_TAGPRJ) ||
          (pFuncMsg->functionId == TSDB_FUNC_TID_TAG && pFuncMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) ||
          (pFuncMsg->functionId == TSDB_FUNC_COUNT && pFuncMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX)) {
5644
        continue;
5645
      }
5646

5647
      return false;
5648 5649
    }
  }
5650

5651
  return true;
5652 5653
}

5654
static char *createTableIdList(SQueryTableMsg *pQueryMsg, char *pMsg, SArray **pTableIdList) {
H
hjxilinx 已提交
5655
  assert(pQueryMsg->numOfTables > 0);
5656

weixin_48148422's avatar
weixin_48148422 已提交
5657
  *pTableIdList = taosArrayInit(pQueryMsg->numOfTables, sizeof(STableIdInfo));
5658

weixin_48148422's avatar
weixin_48148422 已提交
5659 5660
  for (int32_t j = 0; j < pQueryMsg->numOfTables; ++j) {
    STableIdInfo* pTableIdInfo = (STableIdInfo *)pMsg;
5661

5662
    pTableIdInfo->tid = htonl(pTableIdInfo->tid);
H
hjxilinx 已提交
5663 5664
    pTableIdInfo->uid = htobe64(pTableIdInfo->uid);
    pTableIdInfo->key = htobe64(pTableIdInfo->key);
5665

H
hjxilinx 已提交
5666 5667 5668
    taosArrayPush(*pTableIdList, pTableIdInfo);
    pMsg += sizeof(STableIdInfo);
  }
5669

H
hjxilinx 已提交
5670 5671
  return pMsg;
}
5672

5673
/**
H
hjxilinx 已提交
5674
 * pQueryMsg->head has been converted before this function is called.
5675
 *
H
hjxilinx 已提交
5676
 * @param pQueryMsg
5677 5678 5679 5680
 * @param pTableIdList
 * @param pExpr
 * @return
 */
5681
static int32_t convertQueryMsg(SQueryTableMsg *pQueryMsg, SArray **pTableIdList, SSqlFuncMsg ***pExpr,
weixin_48148422's avatar
weixin_48148422 已提交
5682
                               char **tagCond, char** tbnameCond, SColIndex **groupbyCols, SColumnInfo** tagCols) {
5683 5684
  int32_t code = TSDB_CODE_SUCCESS;

5685 5686 5687 5688
  pQueryMsg->numOfTables = htonl(pQueryMsg->numOfTables);

  pQueryMsg->window.skey = htobe64(pQueryMsg->window.skey);
  pQueryMsg->window.ekey = htobe64(pQueryMsg->window.ekey);
5689 5690 5691 5692 5693 5694
  pQueryMsg->interval.interval = htobe64(pQueryMsg->interval.interval);
  pQueryMsg->interval.sliding = htobe64(pQueryMsg->interval.sliding);
  pQueryMsg->interval.offset = htobe64(pQueryMsg->interval.offset);
  pQueryMsg->interval.intervalUnit = pQueryMsg->interval.intervalUnit;
  pQueryMsg->interval.slidingUnit = pQueryMsg->interval.slidingUnit;
  pQueryMsg->interval.offsetUnit = pQueryMsg->interval.offsetUnit;
5695 5696
  pQueryMsg->limit = htobe64(pQueryMsg->limit);
  pQueryMsg->offset = htobe64(pQueryMsg->offset);
H
hjxilinx 已提交
5697

5698 5699
  pQueryMsg->order = htons(pQueryMsg->order);
  pQueryMsg->orderColId = htons(pQueryMsg->orderColId);
H
Haojun Liao 已提交
5700
  pQueryMsg->queryType = htonl(pQueryMsg->queryType);
weixin_48148422's avatar
weixin_48148422 已提交
5701
  pQueryMsg->tagNameRelType = htons(pQueryMsg->tagNameRelType);
5702 5703

  pQueryMsg->numOfCols = htons(pQueryMsg->numOfCols);
5704
  pQueryMsg->numOfOutput = htons(pQueryMsg->numOfOutput);
H
hjxilinx 已提交
5705
  pQueryMsg->numOfGroupCols = htons(pQueryMsg->numOfGroupCols);
5706 5707 5708
  pQueryMsg->tagCondLen = htons(pQueryMsg->tagCondLen);
  pQueryMsg->tsOffset = htonl(pQueryMsg->tsOffset);
  pQueryMsg->tsLen = htonl(pQueryMsg->tsLen);
H
hjxilinx 已提交
5709
  pQueryMsg->tsNumOfBlocks = htonl(pQueryMsg->tsNumOfBlocks);
5710
  pQueryMsg->tsOrder = htonl(pQueryMsg->tsOrder);
5711
  pQueryMsg->numOfTags = htonl(pQueryMsg->numOfTags);
5712

5713
  // query msg safety check
5714
  if (!validateQueryMsg(pQueryMsg)) {
5715 5716
    code = TSDB_CODE_QRY_INVALID_MSG;
    goto _cleanup;
5717 5718
  }

H
hjxilinx 已提交
5719 5720
  char *pMsg = (char *)(pQueryMsg->colList) + sizeof(SColumnInfo) * pQueryMsg->numOfCols;
  for (int32_t col = 0; col < pQueryMsg->numOfCols; ++col) {
5721 5722
    SColumnInfo *pColInfo = &pQueryMsg->colList[col];

H
hjxilinx 已提交
5723
    pColInfo->colId = htons(pColInfo->colId);
5724
    pColInfo->type = htons(pColInfo->type);
H
hjxilinx 已提交
5725 5726
    pColInfo->bytes = htons(pColInfo->bytes);
    pColInfo->numOfFilters = htons(pColInfo->numOfFilters);
5727

H
hjxilinx 已提交
5728
    assert(pColInfo->type >= TSDB_DATA_TYPE_BOOL && pColInfo->type <= TSDB_DATA_TYPE_NCHAR);
5729

H
hjxilinx 已提交
5730
    int32_t numOfFilters = pColInfo->numOfFilters;
5731
    if (numOfFilters > 0) {
H
hjxilinx 已提交
5732
      pColInfo->filters = calloc(numOfFilters, sizeof(SColumnFilterInfo));
H
Haojun Liao 已提交
5733 5734 5735 5736
      if (pColInfo->filters == NULL) {
        code = TSDB_CODE_QRY_OUT_OF_MEMORY;
        goto _cleanup;
      }
5737 5738 5739
    }

    for (int32_t f = 0; f < numOfFilters; ++f) {
5740
      SColumnFilterInfo *pFilterMsg = (SColumnFilterInfo *)pMsg;
5741

5742 5743
      SColumnFilterInfo *pColFilter = &pColInfo->filters[f];
      pColFilter->filterstr = htons(pFilterMsg->filterstr);
5744 5745 5746

      pMsg += sizeof(SColumnFilterInfo);

5747 5748
      if (pColFilter->filterstr) {
        pColFilter->len = htobe64(pFilterMsg->len);
5749

5750
        pColFilter->pz = (int64_t)calloc(1, (size_t)(pColFilter->len + 1 * TSDB_NCHAR_SIZE)); // note: null-terminator
H
Haojun Liao 已提交
5751 5752 5753 5754 5755
        if (pColFilter->pz == 0) {
          code = TSDB_CODE_QRY_OUT_OF_MEMORY;
          goto _cleanup;
        }

5756
        memcpy((void *)pColFilter->pz, pMsg, (size_t)pColFilter->len);
5757
        pMsg += (pColFilter->len + 1);
5758
      } else {
5759 5760
        pColFilter->lowerBndi = htobe64(pFilterMsg->lowerBndi);
        pColFilter->upperBndi = htobe64(pFilterMsg->upperBndi);
5761 5762
      }

5763 5764
      pColFilter->lowerRelOptr = htons(pFilterMsg->lowerRelOptr);
      pColFilter->upperRelOptr = htons(pFilterMsg->upperRelOptr);
5765 5766 5767
    }
  }

5768
  *pExpr = calloc(pQueryMsg->numOfOutput, POINTER_BYTES);
H
Haojun Liao 已提交
5769 5770 5771 5772 5773
  if (*pExpr == NULL) {
    code = TSDB_CODE_QRY_OUT_OF_MEMORY;
    goto _cleanup;
  }

5774
  SSqlFuncMsg *pExprMsg = (SSqlFuncMsg *)pMsg;
5775

5776
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5777
    (*pExpr)[i] = pExprMsg;
5778

5779
    pExprMsg->colInfo.colIndex = htons(pExprMsg->colInfo.colIndex);
5780 5781 5782 5783
    pExprMsg->colInfo.colId = htons(pExprMsg->colInfo.colId);
    pExprMsg->colInfo.flag = htons(pExprMsg->colInfo.flag);
    pExprMsg->functionId = htons(pExprMsg->functionId);
    pExprMsg->numOfParams = htons(pExprMsg->numOfParams);
5784

5785
    pMsg += sizeof(SSqlFuncMsg);
5786 5787

    for (int32_t j = 0; j < pExprMsg->numOfParams; ++j) {
5788
      pExprMsg->arg[j].argType = htons(pExprMsg->arg[j].argType);
5789 5790 5791 5792
      pExprMsg->arg[j].argBytes = htons(pExprMsg->arg[j].argBytes);

      if (pExprMsg->arg[j].argType == TSDB_DATA_TYPE_BINARY) {
        pExprMsg->arg[j].argValue.pz = pMsg;
5793
        pMsg += pExprMsg->arg[j].argBytes;  // one more for the string terminated char.
5794 5795 5796 5797 5798
      } else {
        pExprMsg->arg[j].argValue.i64 = htobe64(pExprMsg->arg[j].argValue.i64);
      }
    }

H
Haojun Liao 已提交
5799 5800
    int16_t functionId = pExprMsg->functionId;
    if (functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ || functionId == TSDB_FUNC_TAG_DUMMY) {
Y
TD-1230  
yihaoDeng 已提交
5801
      if (!TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) {  // ignore the column  index check for arithmetic expression.
5802 5803
        code = TSDB_CODE_QRY_INVALID_MSG;
        goto _cleanup;
5804 5805
      }
    } else {
5806
//      if (!validateExprColumnInfo(pQueryMsg, pExprMsg)) {
5807
//        return TSDB_CODE_QRY_INVALID_MSG;
5808
//      }
5809 5810
    }

5811
    pExprMsg = (SSqlFuncMsg *)pMsg;
5812
  }
5813

5814
  if (!validateQuerySourceCols(pQueryMsg, *pExpr)) {
5815
    code = TSDB_CODE_QRY_INVALID_MSG;
dengyihao's avatar
dengyihao 已提交
5816
    goto _cleanup;
5817
  }
5818

H
hjxilinx 已提交
5819
  pMsg = createTableIdList(pQueryMsg, pMsg, pTableIdList);
5820

H
hjxilinx 已提交
5821
  if (pQueryMsg->numOfGroupCols > 0) {  // group by tag columns
5822
    *groupbyCols = malloc(pQueryMsg->numOfGroupCols * sizeof(SColIndex));
5823 5824 5825 5826
    if (*groupbyCols == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }
5827 5828 5829

    for (int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
      (*groupbyCols)[i].colId = *(int16_t *)pMsg;
5830
      pMsg += sizeof((*groupbyCols)[i].colId);
5831 5832

      (*groupbyCols)[i].colIndex = *(int16_t *)pMsg;
5833 5834
      pMsg += sizeof((*groupbyCols)[i].colIndex);

5835
      (*groupbyCols)[i].flag = *(int16_t *)pMsg;
5836 5837 5838 5839 5840
      pMsg += sizeof((*groupbyCols)[i].flag);

      memcpy((*groupbyCols)[i].name, pMsg, tListLen(groupbyCols[i]->name));
      pMsg += tListLen((*groupbyCols)[i].name);
    }
5841

H
hjxilinx 已提交
5842 5843
    pQueryMsg->orderByIdx = htons(pQueryMsg->orderByIdx);
    pQueryMsg->orderType = htons(pQueryMsg->orderType);
5844 5845
  }

5846 5847
  pQueryMsg->fillType = htons(pQueryMsg->fillType);
  if (pQueryMsg->fillType != TSDB_FILL_NONE) {
5848
    pQueryMsg->fillVal = (uint64_t)(pMsg);
5849 5850

    int64_t *v = (int64_t *)pMsg;
5851
    for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5852 5853
      v[i] = htobe64(v[i]);
    }
5854

5855
    pMsg += sizeof(int64_t) * pQueryMsg->numOfOutput;
5856
  }
5857

5858 5859
  if (pQueryMsg->numOfTags > 0) {
    (*tagCols) = calloc(1, sizeof(SColumnInfo) * pQueryMsg->numOfTags);
H
Haojun Liao 已提交
5860 5861 5862 5863 5864
    if (*tagCols == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }

5865 5866
    for (int32_t i = 0; i < pQueryMsg->numOfTags; ++i) {
      SColumnInfo* pTagCol = (SColumnInfo*) pMsg;
5867

5868 5869 5870 5871
      pTagCol->colId = htons(pTagCol->colId);
      pTagCol->bytes = htons(pTagCol->bytes);
      pTagCol->type  = htons(pTagCol->type);
      pTagCol->numOfFilters = 0;
5872

5873
      (*tagCols)[i] = *pTagCol;
5874
      pMsg += sizeof(SColumnInfo);
5875
    }
H
hjxilinx 已提交
5876
  }
5877

5878 5879 5880
  // the tag query condition expression string is located at the end of query msg
  if (pQueryMsg->tagCondLen > 0) {
    *tagCond = calloc(1, pQueryMsg->tagCondLen);
H
Haojun Liao 已提交
5881 5882 5883 5884 5885 5886

    if (*tagCond == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;

    }
5887 5888 5889
    memcpy(*tagCond, pMsg, pQueryMsg->tagCondLen);
    pMsg += pQueryMsg->tagCondLen;
  }
5890

weixin_48148422's avatar
weixin_48148422 已提交
5891
  if (*pMsg != 0) {
5892
    size_t len = strlen(pMsg) + 1;
5893

5894
    *tbnameCond = malloc(len);
5895 5896 5897 5898 5899
    if (*tbnameCond == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }

weixin_48148422's avatar
weixin_48148422 已提交
5900
    strcpy(*tbnameCond, pMsg);
5901
    pMsg += len;
weixin_48148422's avatar
weixin_48148422 已提交
5902
  }
5903

5904
  qDebug("qmsg:%p query %d tables, type:%d, qrange:%" PRId64 "-%" PRId64 ", numOfGroupbyTagCols:%d, order:%d, "
H
Haojun Liao 已提交
5905 5906
         "outputCols:%d, numOfCols:%d, interval:%" PRId64 ", fillType:%d, comptsLen:%d, compNumOfBlocks:%d, limit:%" PRId64 ", offset:%" PRId64,
         pQueryMsg, pQueryMsg->numOfTables, pQueryMsg->queryType, pQueryMsg->window.skey, pQueryMsg->window.ekey, pQueryMsg->numOfGroupCols,
5907
         pQueryMsg->order, pQueryMsg->numOfOutput, pQueryMsg->numOfCols, pQueryMsg->interval.interval,
H
Haojun Liao 已提交
5908
         pQueryMsg->fillType, pQueryMsg->tsLen, pQueryMsg->tsNumOfBlocks, pQueryMsg->limit, pQueryMsg->offset);
5909 5910

  return TSDB_CODE_SUCCESS;
dengyihao's avatar
dengyihao 已提交
5911 5912

_cleanup:
S
Shengliang Guan 已提交
5913
  taosTFree(*pExpr);
dengyihao's avatar
dengyihao 已提交
5914 5915
  taosArrayDestroy(*pTableIdList);
  *pTableIdList = NULL;
S
Shengliang Guan 已提交
5916 5917 5918 5919
  taosTFree(*tbnameCond);
  taosTFree(*groupbyCols);
  taosTFree(*tagCols);
  taosTFree(*tagCond);
5920 5921

  return code;
5922 5923
}

H
hjxilinx 已提交
5924
static int32_t buildAirthmeticExprFromMsg(SExprInfo *pArithExprInfo, SQueryTableMsg *pQueryMsg) {
5925
  qDebug("qmsg:%p create arithmetic expr from binary string: %s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz);
weixin_48148422's avatar
weixin_48148422 已提交
5926 5927

  tExprNode* pExprNode = NULL;
dengyihao's avatar
dengyihao 已提交
5928
  TRY(TSDB_MAX_TAG_CONDITIONS) {
weixin_48148422's avatar
weixin_48148422 已提交
5929 5930 5931
    pExprNode = exprTreeFromBinary(pArithExprInfo->base.arg[0].argValue.pz, pArithExprInfo->base.arg[0].argBytes);
  } CATCH( code ) {
    CLEANUP_EXECUTE();
dengyihao's avatar
TD-816  
dengyihao 已提交
5932
    qError("qmsg:%p failed to create arithmetic expression string from:%s, reason: %s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz, tstrerror(code));
weixin_48148422's avatar
weixin_48148422 已提交
5933 5934 5935
    return code;
  } END_TRY

H
hjxilinx 已提交
5936
  if (pExprNode == NULL) {
5937
    qError("qmsg:%p failed to create arithmetic expression string from:%s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz);
5938
    return TSDB_CODE_QRY_APP_ERROR;
5939
  }
5940

5941
  pArithExprInfo->pExpr = pExprNode;
5942 5943 5944
  return TSDB_CODE_SUCCESS;
}

H
Haojun Liao 已提交
5945
static int32_t createQFunctionExprFromMsg(SQueryTableMsg *pQueryMsg, SExprInfo **pExprInfo, SSqlFuncMsg **pExprMsg,
5946 5947
    SColumnInfo* pTagCols) {
  *pExprInfo = NULL;
H
hjxilinx 已提交
5948
  int32_t code = TSDB_CODE_SUCCESS;
5949

H
Haojun Liao 已提交
5950
  SExprInfo *pExprs = (SExprInfo *)calloc(pQueryMsg->numOfOutput, sizeof(SExprInfo));
5951
  if (pExprs == NULL) {
5952
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
5953 5954 5955 5956 5957
  }

  bool    isSuperTable = QUERY_IS_STABLE_QUERY(pQueryMsg->queryType);
  int16_t tagLen = 0;

5958
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5959
    pExprs[i].base = *pExprMsg[i];
5960
    pExprs[i].bytes = 0;
5961 5962 5963 5964

    int16_t type = 0;
    int16_t bytes = 0;

5965
    // parse the arithmetic expression
5966
    if (pExprs[i].base.functionId == TSDB_FUNC_ARITHM) {
5967
      code = buildAirthmeticExprFromMsg(&pExprs[i], pQueryMsg);
5968

5969
      if (code != TSDB_CODE_SUCCESS) {
S
Shengliang Guan 已提交
5970
        taosTFree(pExprs);
5971
        return code;
5972 5973
      }

5974
      type  = TSDB_DATA_TYPE_DOUBLE;
5975
      bytes = tDataTypeDesc[type].nSize;
H
Haojun Liao 已提交
5976
    } else if (pExprs[i].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX && pExprs[i].base.functionId == TSDB_FUNC_TAGPRJ) {  // parse the normal column
H
Haojun Liao 已提交
5977
      SSchema s = tGetTableNameColumnSchema();
H
Haojun Liao 已提交
5978
      type = s.type;
H
Haojun Liao 已提交
5979
      bytes = s.bytes;
5980 5981
    } else if (pExprs[i].base.colInfo.colId <= TSDB_UD_COLUMN_INDEX) {
      // it is a user-defined constant value column
H
Haojun Liao 已提交
5982 5983
      assert(pExprs[i].base.functionId == TSDB_FUNC_PRJ);

5984 5985
      type = pExprs[i].base.arg[1].argType;
      bytes = pExprs[i].base.arg[1].argBytes;
H
Haojun Liao 已提交
5986 5987 5988 5989 5990

      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        bytes += VARSTR_HEADER_SIZE;
      }
    } else {
5991
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
dengyihao's avatar
dengyihao 已提交
5992
      assert(j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags);
H
Haojun Liao 已提交
5993

dengyihao's avatar
dengyihao 已提交
5994
      if (pExprs[i].base.colInfo.colId != TSDB_TBNAME_COLUMN_INDEX && j >= 0) {
H
Haojun Liao 已提交
5995 5996 5997 5998
        SColumnInfo* pCol = (TSDB_COL_IS_TAG(pExprs[i].base.colInfo.flag))? &pTagCols[j]:&pQueryMsg->colList[j];
        type = pCol->type;
        bytes = pCol->bytes;
      } else {
H
Haojun Liao 已提交
5999
        SSchema s = tGetTableNameColumnSchema();
H
hjxilinx 已提交
6000

H
Haojun Liao 已提交
6001 6002 6003
        type  = s.type;
        bytes = s.bytes;
      }
6004 6005
    }

S
TD-1057  
Shengliang Guan 已提交
6006
    int32_t param = (int32_t)pExprs[i].base.arg[0].argValue.i64;
6007
    if (getResultDataInfo(type, bytes, pExprs[i].base.functionId, param, &pExprs[i].type, &pExprs[i].bytes,
6008
                          &pExprs[i].interBytes, 0, isSuperTable) != TSDB_CODE_SUCCESS) {
S
Shengliang Guan 已提交
6009
      taosTFree(pExprs);
6010
      return TSDB_CODE_QRY_INVALID_MSG;
6011 6012
    }

6013
    if (pExprs[i].base.functionId == TSDB_FUNC_TAG_DUMMY || pExprs[i].base.functionId == TSDB_FUNC_TS_DUMMY) {
6014
      tagLen += pExprs[i].bytes;
6015
    }
6016
    assert(isValidDataType(pExprs[i].type));
6017 6018 6019
  }

  // TODO refactor
6020
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
6021 6022
    pExprs[i].base = *pExprMsg[i];
    int16_t functId = pExprs[i].base.functionId;
6023

6024
    if (functId == TSDB_FUNC_TOP || functId == TSDB_FUNC_BOTTOM) {
6025
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
6026 6027 6028 6029 6030 6031 6032 6033 6034
      if (j < 0 || j >= pQueryMsg->numOfCols) {
        assert(0);
      } else {
        SColumnInfo *pCol = &pQueryMsg->colList[j];
        int32_t ret =
            getResultDataInfo(pCol->type, pCol->bytes, functId, (int32_t)pExprs[i].base.arg[0].argValue.i64,
                              &pExprs[i].type, &pExprs[i].bytes, &pExprs[i].interBytes, tagLen, isSuperTable);
        assert(ret == TSDB_CODE_SUCCESS);
      }
6035 6036 6037
    }
  }

6038
  *pExprInfo = pExprs;
6039 6040 6041
  return TSDB_CODE_SUCCESS;
}

6042
static SSqlGroupbyExpr *createGroupbyExprFromMsg(SQueryTableMsg *pQueryMsg, SColIndex *pColIndex, int32_t *code) {
6043 6044 6045 6046 6047
  if (pQueryMsg->numOfGroupCols == 0) {
    return NULL;
  }

  // using group by tag columns
6048
  SSqlGroupbyExpr *pGroupbyExpr = (SSqlGroupbyExpr *)calloc(1, sizeof(SSqlGroupbyExpr));
6049
  if (pGroupbyExpr == NULL) {
6050
    *code = TSDB_CODE_QRY_OUT_OF_MEMORY;
6051 6052 6053 6054 6055 6056 6057
    return NULL;
  }

  pGroupbyExpr->numOfGroupCols = pQueryMsg->numOfGroupCols;
  pGroupbyExpr->orderType = pQueryMsg->orderType;
  pGroupbyExpr->orderIndex = pQueryMsg->orderByIdx;

6058 6059 6060 6061
  pGroupbyExpr->columnInfo = taosArrayInit(pQueryMsg->numOfGroupCols, sizeof(SColIndex));
  for(int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
    taosArrayPush(pGroupbyExpr->columnInfo, &pColIndex[i]);
  }
6062

6063 6064 6065
  return pGroupbyExpr;
}

6066
static int32_t createFilterInfo(void *pQInfo, SQuery *pQuery) {
6067
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
6068
    if (pQuery->colList[i].numOfFilters > 0) {
6069 6070 6071 6072 6073 6074 6075 6076 6077
      pQuery->numOfFilterCols++;
    }
  }

  if (pQuery->numOfFilterCols == 0) {
    return TSDB_CODE_SUCCESS;
  }

  pQuery->pFilterInfo = calloc(1, sizeof(SSingleColumnFilterInfo) * pQuery->numOfFilterCols);
H
Haojun Liao 已提交
6078 6079 6080
  if (pQuery->pFilterInfo == NULL) {
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
  }
6081 6082

  for (int32_t i = 0, j = 0; i < pQuery->numOfCols; ++i) {
6083
    if (pQuery->colList[i].numOfFilters > 0) {
6084 6085
      SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[j];

B
Bomin Zhang 已提交
6086
      memcpy(&pFilterInfo->info, &pQuery->colList[i], sizeof(SColumnInfo));
6087
      pFilterInfo->info = pQuery->colList[i];
6088

6089
      pFilterInfo->numOfFilters = pQuery->colList[i].numOfFilters;
6090
      pFilterInfo->pFilters = calloc(pFilterInfo->numOfFilters, sizeof(SColumnFilterElem));
H
Haojun Liao 已提交
6091 6092 6093
      if (pFilterInfo->pFilters == NULL) {
        return TSDB_CODE_QRY_OUT_OF_MEMORY;
      }
6094 6095 6096

      for (int32_t f = 0; f < pFilterInfo->numOfFilters; ++f) {
        SColumnFilterElem *pSingleColFilter = &pFilterInfo->pFilters[f];
6097
        pSingleColFilter->filterInfo = pQuery->colList[i].filters[f];
6098 6099 6100 6101 6102

        int32_t lower = pSingleColFilter->filterInfo.lowerRelOptr;
        int32_t upper = pSingleColFilter->filterInfo.upperRelOptr;

        if (lower == TSDB_RELATION_INVALID && upper == TSDB_RELATION_INVALID) {
S
slguan 已提交
6103
          qError("QInfo:%p invalid filter info", pQInfo);
6104
          return TSDB_CODE_QRY_INVALID_MSG;
6105 6106
        }

6107 6108
        int16_t type  = pQuery->colList[i].type;
        int16_t bytes = pQuery->colList[i].bytes;
6109

6110 6111 6112
        // todo refactor
        __filter_func_t *rangeFilterArray = getRangeFilterFuncArray(type);
        __filter_func_t *filterArray = getValueFilterFuncArray(type);
6113 6114

        if (rangeFilterArray == NULL && filterArray == NULL) {
S
slguan 已提交
6115
          qError("QInfo:%p failed to get filter function, invalid data type:%d", pQInfo, type);
6116
          return TSDB_CODE_QRY_INVALID_MSG;
6117 6118
        }

6119
        if ((lower == TSDB_RELATION_GREATER_EQUAL || lower == TSDB_RELATION_GREATER) &&
6120
            (upper == TSDB_RELATION_LESS_EQUAL || upper == TSDB_RELATION_LESS)) {
dengyihao's avatar
dengyihao 已提交
6121
          assert(rangeFilterArray != NULL);
6122
          if (lower == TSDB_RELATION_GREATER_EQUAL) {
6123 6124 6125 6126 6127 6128 6129 6130 6131 6132 6133 6134 6135
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[4];
            } else {
              pSingleColFilter->fp = rangeFilterArray[2];
            }
          } else {
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[3];
            } else {
              pSingleColFilter->fp = rangeFilterArray[1];
            }
          }
        } else {  // set callback filter function
dengyihao's avatar
dengyihao 已提交
6136
          assert(filterArray != NULL);
6137 6138 6139 6140
          if (lower != TSDB_RELATION_INVALID) {
            pSingleColFilter->fp = filterArray[lower];

            if (upper != TSDB_RELATION_INVALID) {
dengyihao's avatar
dengyihao 已提交
6141
              qError("pQInfo:%p failed to get filter function, invalid filter condition: %d", pQInfo, type);
6142
              return TSDB_CODE_QRY_INVALID_MSG;
6143 6144 6145 6146 6147 6148 6149 6150 6151 6152 6153 6154 6155 6156 6157 6158
            }
          } else {
            pSingleColFilter->fp = filterArray[upper];
          }
        }
        assert(pSingleColFilter->fp != NULL);
        pSingleColFilter->bytes = bytes;
      }

      j++;
    }
  }

  return TSDB_CODE_SUCCESS;
}

6159
static void doUpdateExprColumnIndex(SQuery *pQuery) {
6160
  assert(pQuery->pSelectExpr != NULL && pQuery != NULL);
6161

6162
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
6163
    SSqlFuncMsg *pSqlExprMsg = &pQuery->pSelectExpr[k].base;
6164
    if (pSqlExprMsg->functionId == TSDB_FUNC_ARITHM) {
6165 6166
      continue;
    }
6167

6168
    // todo opt performance
H
Haojun Liao 已提交
6169
    SColIndex *pColIndex = &pSqlExprMsg->colInfo;
H
Haojun Liao 已提交
6170
    if (TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
6171 6172
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfCols; ++f) {
H
Haojun Liao 已提交
6173 6174
        if (pColIndex->colId == pQuery->colList[f].colId) {
          pColIndex->colIndex = f;
6175 6176 6177
          break;
        }
      }
H
Haojun Liao 已提交
6178 6179

      assert(f < pQuery->numOfCols);
6180 6181
    } else if (pColIndex->colId <= TSDB_UD_COLUMN_INDEX) {
      // do nothing for user-defined constant value result columns
6182
    } else {
6183 6184
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfTags; ++f) {
H
Haojun Liao 已提交
6185 6186
        if (pColIndex->colId == pQuery->tagColList[f].colId) {
          pColIndex->colIndex = f;
6187 6188
          break;
        }
6189
      }
6190

6191
      assert(f < pQuery->numOfTags || pColIndex->colId == TSDB_TBNAME_COLUMN_INDEX);
6192 6193 6194 6195
    }
  }
}

dengyihao's avatar
dengyihao 已提交
6196 6197
static void freeQInfo(SQInfo *pQInfo);

H
Haojun Liao 已提交
6198 6199 6200
static void calResultBufSize(SQuery* pQuery) {
  const int32_t RESULT_MSG_MIN_SIZE  = 1024 * (1024 + 512);  // bytes
  const int32_t RESULT_MSG_MIN_ROWS  = 8192;
S
TD-1057  
Shengliang Guan 已提交
6201
  const float RESULT_THRESHOLD_RATIO = 0.85f;
H
Haojun Liao 已提交
6202

6203 6204 6205 6206 6207
  if (isProjQuery(pQuery)) {
    int32_t numOfRes = RESULT_MSG_MIN_SIZE / pQuery->rowSize;
    if (numOfRes < RESULT_MSG_MIN_ROWS) {
      numOfRes = RESULT_MSG_MIN_ROWS;
    }
H
Haojun Liao 已提交
6208

6209
    pQuery->rec.capacity  = numOfRes;
S
TD-1057  
Shengliang Guan 已提交
6210
    pQuery->rec.threshold = (int32_t)(numOfRes * RESULT_THRESHOLD_RATIO);
6211 6212
  } else {  // in case of non-prj query, a smaller output buffer will be used.
    pQuery->rec.capacity = 4096;
S
TD-1057  
Shengliang Guan 已提交
6213
    pQuery->rec.threshold = (int32_t)(pQuery->rec.capacity * RESULT_THRESHOLD_RATIO);
6214
  }
H
Haojun Liao 已提交
6215 6216
}

6217 6218
static SQInfo *createQInfoImpl(SQueryTableMsg *pQueryMsg, SSqlGroupbyExpr *pGroupbyExpr, SExprInfo *pExprs,
                               STableGroupInfo *pTableGroupInfo, SColumnInfo* pTagCols, bool stableQuery) {
B
Bomin Zhang 已提交
6219 6220 6221
  int16_t numOfCols = pQueryMsg->numOfCols;
  int16_t numOfOutput = pQueryMsg->numOfOutput;

6222 6223
  SQInfo *pQInfo = (SQInfo *)calloc(1, sizeof(SQInfo));
  if (pQInfo == NULL) {
B
Bomin Zhang 已提交
6224
    goto _cleanup_qinfo;
6225
  }
6226

B
Bomin Zhang 已提交
6227 6228 6229
  // to make sure third party won't overwrite this structure
  pQInfo->signature = pQInfo;
  pQInfo->tableGroupInfo = *pTableGroupInfo;
6230 6231

  SQuery *pQuery = calloc(1, sizeof(SQuery));
B
Bomin Zhang 已提交
6232 6233 6234
  if (pQuery == NULL) {
    goto _cleanup_query;
  }
H
Haojun Liao 已提交
6235

6236 6237
  pQInfo->runtimeEnv.pQuery = pQuery;

6238
  pQuery->numOfCols       = numOfCols;
H
hjxilinx 已提交
6239
  pQuery->numOfOutput     = numOfOutput;
6240 6241 6242
  pQuery->limit.limit     = pQueryMsg->limit;
  pQuery->limit.offset    = pQueryMsg->offset;
  pQuery->order.order     = pQueryMsg->order;
6243
  pQuery->order.orderColId = pQueryMsg->orderColId;
6244 6245
  pQuery->pSelectExpr     = pExprs;
  pQuery->pGroupbyExpr    = pGroupbyExpr;
6246
  memcpy(&pQuery->interval, &pQueryMsg->interval, sizeof(pQuery->interval));
6247
  pQuery->fillType        = pQueryMsg->fillType;
6248
  pQuery->numOfTags       = pQueryMsg->numOfTags;
B
Bomin Zhang 已提交
6249
  pQuery->tagColList      = pTagCols;
H
Haojun Liao 已提交
6250

6251
  pQuery->colList = calloc(numOfCols, sizeof(SSingleColumnFilterInfo));
6252
  if (pQuery->colList == NULL) {
6253
    goto _cleanup;
6254
  }
6255

H
hjxilinx 已提交
6256
  for (int16_t i = 0; i < numOfCols; ++i) {
6257
    pQuery->colList[i] = pQueryMsg->colList[i];
6258
    pQuery->colList[i].filters = tscFilterInfoClone(pQueryMsg->colList[i].filters, pQuery->colList[i].numOfFilters);
H
hjxilinx 已提交
6259
  }
6260

6261
  // calculate the result row size
6262 6263 6264
  for (int16_t col = 0; col < numOfOutput; ++col) {
    assert(pExprs[col].bytes > 0);
    pQuery->rowSize += pExprs[col].bytes;
6265
  }
6266

6267
  doUpdateExprColumnIndex(pQuery);
6268

6269
  int32_t ret = createFilterInfo(pQInfo, pQuery);
6270
  if (ret != TSDB_CODE_SUCCESS) {
6271
    goto _cleanup;
6272 6273 6274
  }

  // prepare the result buffer
6275
  pQuery->sdata = (tFilePage **)calloc(pQuery->numOfOutput, POINTER_BYTES);
6276
  if (pQuery->sdata == NULL) {
6277
    goto _cleanup;
6278 6279
  }

H
Haojun Liao 已提交
6280
  calResultBufSize(pQuery);
6281

6282
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
6283
    assert(pExprs[col].interBytes >= pExprs[col].bytes);
6284 6285

    // allocate additional memory for interResults that are usually larger then final results
6286
    size_t size = (size_t)((pQuery->rec.capacity + 1) * pExprs[col].bytes + pExprs[col].interBytes + sizeof(tFilePage));
6287
    pQuery->sdata[col] = (tFilePage *)calloc(1, size);
6288
    if (pQuery->sdata[col] == NULL) {
6289
      goto _cleanup;
6290 6291 6292
    }
  }

6293
  if (pQuery->fillType != TSDB_FILL_NONE) {
6294 6295
    pQuery->fillVal = malloc(sizeof(int64_t) * pQuery->numOfOutput);
    if (pQuery->fillVal == NULL) {
6296
      goto _cleanup;
6297 6298 6299
    }

    // the first column is the timestamp
6300
    memcpy(pQuery->fillVal, (char *)pQueryMsg->fillVal, pQuery->numOfOutput * sizeof(int64_t));
6301 6302
  }

dengyihao's avatar
dengyihao 已提交
6303 6304 6305 6306 6307 6308
  size_t numOfGroups = 0;
  if (pTableGroupInfo->pGroupList != NULL) {
    numOfGroups = taosArrayGetSize(pTableGroupInfo->pGroupList);

    pQInfo->tableqinfoGroupInfo.pGroupList = taosArrayInit(numOfGroups, POINTER_BYTES);
    pQInfo->tableqinfoGroupInfo.numOfTables = pTableGroupInfo->numOfTables;
H
Haojun Liao 已提交
6309
    pQInfo->tableqinfoGroupInfo.map = taosHashInit(pTableGroupInfo->numOfTables,
H
Haojun Liao 已提交
6310
                                                   taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, false);
H
Haojun Liao 已提交
6311
  }
6312

weixin_48148422's avatar
weixin_48148422 已提交
6313
  int tableIndex = 0;
6314

H
Haojun Liao 已提交
6315
  pQInfo->runtimeEnv.interBufSize = getOutputInterResultBufSize(pQuery);
H
Haojun Liao 已提交
6316
  pQInfo->pBuf = calloc(pTableGroupInfo->numOfTables, sizeof(STableQueryInfo));
H
Haojun Liao 已提交
6317 6318 6319 6320
  if (pQInfo->pBuf == NULL) {
    goto _cleanup;
  }

H
Haojun Liao 已提交
6321
  // NOTE: pTableCheckInfo need to update the query time range and the lastKey info
6322 6323
  pQInfo->arrTableIdInfo = taosArrayInit(tableIndex, sizeof(STableIdInfo));
  pQInfo->dataReady = QUERY_RESULT_NOT_READY;
H
Haojun Liao 已提交
6324
  pQInfo->rspContext = NULL;
6325
  pthread_mutex_init(&pQInfo->lock, NULL);
H
Haojun Liao 已提交
6326
  tsem_init(&pQInfo->ready, 0, 0);
6327 6328 6329 6330 6331 6332

  pQuery->pos = -1;
  pQuery->window = pQueryMsg->window;
  changeExecuteScanOrder(pQInfo, pQueryMsg, stableQuery);

  STimeWindow window = pQuery->window;
H
Haojun Liao 已提交
6333

H
Haojun Liao 已提交
6334 6335
  int32_t index = 0;

H
hjxilinx 已提交
6336
  for(int32_t i = 0; i < numOfGroups; ++i) {
6337
    SArray* pa = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, i);
6338

H
Haojun Liao 已提交
6339
    size_t s = taosArrayGetSize(pa);
6340
    SArray* p1 = taosArrayInit(s, POINTER_BYTES);
B
Bomin Zhang 已提交
6341 6342 6343
    if (p1 == NULL) {
      goto _cleanup;
    }
H
Haojun Liao 已提交
6344

Y
yihaoDeng 已提交
6345
    taosArrayPush(pQInfo->tableqinfoGroupInfo.pGroupList, &p1);
6346

H
hjxilinx 已提交
6347
    for(int32_t j = 0; j < s; ++j) {
H
Haojun Liao 已提交
6348
      STableKeyInfo* info = taosArrayGet(pa, j);
6349

S
TD-1057  
Shengliang Guan 已提交
6350
      void* buf = (char*)pQInfo->pBuf + index * sizeof(STableQueryInfo);
H
Haojun Liao 已提交
6351

6352
      window.skey = info->lastKey;
H
Haojun Liao 已提交
6353
      STableQueryInfo* item = createTableQueryInfo(&pQInfo->runtimeEnv, info->pTable, window, buf);
B
Bomin Zhang 已提交
6354 6355 6356
      if (item == NULL) {
        goto _cleanup;
      }
H
Haojun Liao 已提交
6357

6358
      item->groupIndex = i;
H
hjxilinx 已提交
6359
      taosArrayPush(p1, &item);
6360 6361

      STableId* id = TSDB_TABLEID(info->pTable);
H
Haojun Liao 已提交
6362 6363
      taosHashPut(pQInfo->tableqinfoGroupInfo.map, &id->tid, sizeof(id->tid), &item, POINTER_BYTES);
      index += 1;
H
hjxilinx 已提交
6364 6365
    }
  }
6366

6367
  colIdCheck(pQuery);
6368

6369
  qDebug("qmsg:%p QInfo:%p created", pQueryMsg, pQInfo);
6370 6371
  return pQInfo;

B
Bomin Zhang 已提交
6372
_cleanup_qinfo:
H
Haojun Liao 已提交
6373
  tsdbDestroyTableGroup(pTableGroupInfo);
B
Bomin Zhang 已提交
6374 6375

_cleanup_query:
6376 6377 6378 6379
  if (pGroupbyExpr != NULL) {
    taosArrayDestroy(pGroupbyExpr->columnInfo);
    free(pGroupbyExpr);
  }
H
Haojun Liao 已提交
6380

S
Shengliang Guan 已提交
6381
  taosTFree(pTagCols);
B
Bomin Zhang 已提交
6382 6383 6384 6385 6386 6387
  for (int32_t i = 0; i < numOfOutput; ++i) {
    SExprInfo* pExprInfo = &pExprs[i];
    if (pExprInfo->pExpr != NULL) {
      tExprTreeDestroy(&pExprInfo->pExpr, NULL);
    }
  }
H
Haojun Liao 已提交
6388

S
Shengliang Guan 已提交
6389
  taosTFree(pExprs);
B
Bomin Zhang 已提交
6390

6391
_cleanup:
dengyihao's avatar
dengyihao 已提交
6392
  freeQInfo(pQInfo);
6393 6394 6395
  return NULL;
}

H
hjxilinx 已提交
6396
static bool isValidQInfo(void *param) {
H
hjxilinx 已提交
6397 6398 6399 6400
  SQInfo *pQInfo = (SQInfo *)param;
  if (pQInfo == NULL) {
    return false;
  }
6401

H
hjxilinx 已提交
6402 6403 6404 6405
  /*
   * pQInfo->signature may be changed by another thread, so we assign value of signature
   * into local variable, then compare by using local variable
   */
6406
  uint64_t sig = (uint64_t)pQInfo->signature;
H
hjxilinx 已提交
6407 6408 6409
  return (sig == (uint64_t)pQInfo);
}

6410
static int32_t initQInfo(SQueryTableMsg *pQueryMsg, void *tsdb, int32_t vgId, SQInfo *pQInfo, bool isSTable) {
H
hjxilinx 已提交
6411
  int32_t code = TSDB_CODE_SUCCESS;
6412
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
6413

H
hjxilinx 已提交
6414 6415
  STSBuf *pTSBuf = NULL;
  if (pQueryMsg->tsLen > 0) {  // open new file to save the result
H
Haojun Liao 已提交
6416
    char *tsBlock = (char *) pQueryMsg + pQueryMsg->tsOffset;
H
Haojun Liao 已提交
6417
    pTSBuf = tsBufCreateFromCompBlocks(tsBlock, pQueryMsg->tsNumOfBlocks, pQueryMsg->tsLen, pQueryMsg->tsOrder, vgId);
6418

H
hjxilinx 已提交
6419
    tsBufResetPos(pTSBuf);
dengyihao's avatar
dengyihao 已提交
6420 6421
    bool ret = tsBufNextPos(pTSBuf);
    UNUSED(ret);
H
hjxilinx 已提交
6422
  }
Y
TD-1665  
yihaoDeng 已提交
6423 6424
  
  pQuery->precision = tsdbGetCfg(tsdb)->precision;
6425

6426 6427
  if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey > pQuery->window.ekey)) ||
      (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey > pQuery->window.skey))) {
6428
    qDebug("QInfo:%p no result in time range %" PRId64 "-%" PRId64 ", order %d", pQInfo, pQuery->window.skey,
6429
           pQuery->window.ekey, pQuery->order.order);
6430
    setQueryStatus(pQuery, QUERY_COMPLETED);
B
Bomin Zhang 已提交
6431
    pQInfo->tableqinfoGroupInfo.numOfTables = 0;
6432 6433
    return TSDB_CODE_SUCCESS;
  }
6434

6435
  if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
6436
    qDebug("QInfo:%p no table qualified for tag filter, abort query", pQInfo);
6437 6438 6439
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return TSDB_CODE_SUCCESS;
  }
H
hjxilinx 已提交
6440 6441

  // filter the qualified
6442
  if ((code = doInitQInfo(pQInfo, pTSBuf, tsdb, vgId, isSTable)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6443 6444
    goto _error;
  }
6445

H
hjxilinx 已提交
6446 6447 6448 6449
  return code;

_error:
  // table query ref will be decrease during error handling
6450
  freeQInfo(pQInfo);
H
hjxilinx 已提交
6451 6452 6453
  return code;
}

B
Bomin Zhang 已提交
6454
static void freeColumnFilterInfo(SColumnFilterInfo* pFilter, int32_t numOfFilters) {
H
Haojun Liao 已提交
6455
    if (pFilter == NULL || numOfFilters == 0) {
B
Bomin Zhang 已提交
6456 6457
      return;
    }
H
Haojun Liao 已提交
6458

B
Bomin Zhang 已提交
6459 6460 6461 6462 6463
    for (int32_t i = 0; i < numOfFilters; i++) {
      if (pFilter[i].filterstr) {
        free((void*)(pFilter[i].pz));
      }
    }
H
Haojun Liao 已提交
6464

B
Bomin Zhang 已提交
6465 6466 6467
    free(pFilter);
}

H
Haojun Liao 已提交
6468 6469
static void doDestroyTableQueryInfo(STableGroupInfo* pTableqinfoGroupInfo) {
  if (pTableqinfoGroupInfo->pGroupList != NULL) {
H
Haojun Liao 已提交
6470
    int32_t numOfGroups = (int32_t) taosArrayGetSize(pTableqinfoGroupInfo->pGroupList);
H
Haojun Liao 已提交
6471 6472 6473 6474 6475 6476 6477 6478 6479 6480 6481 6482 6483 6484 6485 6486 6487 6488 6489 6490 6491
    for (int32_t i = 0; i < numOfGroups; ++i) {
      SArray *p = taosArrayGetP(pTableqinfoGroupInfo->pGroupList, i);

      size_t num = taosArrayGetSize(p);
      for(int32_t j = 0; j < num; ++j) {
        STableQueryInfo* item = taosArrayGetP(p, j);
        destroyTableQueryInfoImpl(item);
      }

      taosArrayDestroy(p);
    }
  }

  taosArrayDestroy(pTableqinfoGroupInfo->pGroupList);
  taosHashCleanup(pTableqinfoGroupInfo->map);

  pTableqinfoGroupInfo->pGroupList = NULL;
  pTableqinfoGroupInfo->map = NULL;
  pTableqinfoGroupInfo->numOfTables = 0;
}

H
hjxilinx 已提交
6492 6493 6494 6495
static void freeQInfo(SQInfo *pQInfo) {
  if (!isValidQInfo(pQInfo)) {
    return;
  }
6496

6497
  qDebug("QInfo:%p start to free QInfo", pQInfo);
6498

6499
  teardownQueryRuntimeEnv(&pQInfo->runtimeEnv);
6500

H
Haojun Liao 已提交
6501 6502 6503 6504 6505 6506 6507
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  if (pQuery != NULL) {
    if (pQuery->sdata != NULL) {
      for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
        taosTFree(pQuery->sdata[col]);
      }
      taosTFree(pQuery->sdata);
H
hjxilinx 已提交
6508
    }
6509

H
Haojun Liao 已提交
6510 6511 6512
    if (pQuery->fillVal != NULL) {
      taosTFree(pQuery->fillVal);
    }
6513

H
Haojun Liao 已提交
6514 6515 6516 6517
    for (int32_t i = 0; i < pQuery->numOfFilterCols; ++i) {
      SSingleColumnFilterInfo *pColFilter = &pQuery->pFilterInfo[i];
      if (pColFilter->numOfFilters > 0) {
        taosTFree(pColFilter->pFilters);
H
hjxilinx 已提交
6518
      }
H
hjxilinx 已提交
6519
    }
6520

H
Haojun Liao 已提交
6521 6522 6523 6524 6525 6526 6527 6528 6529 6530 6531 6532 6533 6534 6535 6536
    if (pQuery->pSelectExpr != NULL) {
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        SExprInfo *pExprInfo = &pQuery->pSelectExpr[i];

        if (pExprInfo->pExpr != NULL) {
          tExprTreeDestroy(&pExprInfo->pExpr, NULL);
        }
      }

      taosTFree(pQuery->pSelectExpr);
    }

    if (pQuery->pGroupbyExpr != NULL) {
      taosArrayDestroy(pQuery->pGroupbyExpr->columnInfo);
      taosTFree(pQuery->pGroupbyExpr);
    }
6537

H
Haojun Liao 已提交
6538 6539 6540 6541 6542 6543 6544 6545 6546 6547 6548 6549
    taosTFree(pQuery->tagColList);
    taosTFree(pQuery->pFilterInfo);

    if (pQuery->colList != NULL) {
      for (int32_t i = 0; i < pQuery->numOfCols; i++) {
        SColumnInfo *column = pQuery->colList + i;
        freeColumnFilterInfo(column->filters, column->numOfFilters);
      }
      taosTFree(pQuery->colList);
    }

    taosTFree(pQuery);
H
hjxilinx 已提交
6550
  }
6551

H
Haojun Liao 已提交
6552
  doDestroyTableQueryInfo(&pQInfo->tableqinfoGroupInfo);
H
Haojun Liao 已提交
6553

H
Haojun Liao 已提交
6554
  taosTFree(pQInfo->pBuf);
H
Haojun Liao 已提交
6555
  tsdbDestroyTableGroup(&pQInfo->tableGroupInfo);
weixin_48148422's avatar
weixin_48148422 已提交
6556
  taosArrayDestroy(pQInfo->arrTableIdInfo);
6557

6558
  pQInfo->signature = 0;
6559

6560
  qDebug("QInfo:%p QInfo is freed", pQInfo);
6561

S
Shengliang Guan 已提交
6562
  taosTFree(pQInfo);
H
hjxilinx 已提交
6563 6564
}

H
hjxilinx 已提交
6565
static size_t getResultSize(SQInfo *pQInfo, int64_t *numOfRows) {
6566 6567
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
hjxilinx 已提交
6568 6569 6570 6571 6572 6573 6574 6575 6576 6577 6578
  /*
   * get the file size and set the numOfRows to be the file size, since for tsComp query,
   * the returned row size is equalled to 1
   * TODO handle the case that the file is too large to send back one time
   */
  if (isTSCompQuery(pQuery) && (*numOfRows) > 0) {
    struct stat fstat;
    if (stat(pQuery->sdata[0]->data, &fstat) == 0) {
      *numOfRows = fstat.st_size;
      return fstat.st_size;
    } else {
S
slguan 已提交
6579
      qError("QInfo:%p failed to get file info, path:%s, reason:%s", pQInfo, pQuery->sdata[0]->data, strerror(errno));
H
hjxilinx 已提交
6580 6581 6582
      return 0;
    }
  } else {
6583
    return (size_t)(pQuery->rowSize * (*numOfRows));
6584
  }
H
hjxilinx 已提交
6585
}
6586

H
hjxilinx 已提交
6587 6588 6589
static int32_t doDumpQueryResult(SQInfo *pQInfo, char *data) {
  // the remained number of retrieved rows, not the interpolated result
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
6590

H
hjxilinx 已提交
6591 6592 6593
  // load data from file to msg buffer
  if (isTSCompQuery(pQuery)) {
    int32_t fd = open(pQuery->sdata[0]->data, O_RDONLY, 0666);
6594

H
hjxilinx 已提交
6595 6596
    // make sure file exist
    if (FD_VALID(fd)) {
6597 6598 6599
      uint64_t s = lseek(fd, 0, SEEK_END);

      qDebug("QInfo:%p ts comp data return, file:%s, size:%"PRId64, pQInfo, pQuery->sdata[0]->data, s);
H
Haojun Liao 已提交
6600
      if (lseek(fd, 0, SEEK_SET) >= 0) {
6601
        size_t sz = read(fd, data, (uint32_t)s);
6602 6603 6604
        if(sz < s) {  // todo handle error
          assert(0);
        }
H
Haojun Liao 已提交
6605
      } else {
6606
        UNUSED(s);
dengyihao's avatar
dengyihao 已提交
6607
      }
H
Haojun Liao 已提交
6608

H
hjxilinx 已提交
6609 6610 6611
      close(fd);
      unlink(pQuery->sdata[0]->data);
    } else {
dengyihao's avatar
dengyihao 已提交
6612
      // todo return the error code to client and handle invalid fd
S
slguan 已提交
6613
      qError("QInfo:%p failed to open tmp file to send ts-comp data to client, path:%s, reason:%s", pQInfo,
H
hjxilinx 已提交
6614
             pQuery->sdata[0]->data, strerror(errno));
dengyihao's avatar
dengyihao 已提交
6615
      if (fd != -1) {
6616
        close(fd);
dengyihao's avatar
dengyihao 已提交
6617
      }
H
hjxilinx 已提交
6618
    }
6619

H
hjxilinx 已提交
6620 6621 6622 6623
    // all data returned, set query over
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
      setQueryStatus(pQuery, QUERY_OVER);
    }
H
hjxilinx 已提交
6624
  } else {
S
TD-1057  
Shengliang Guan 已提交
6625
    doCopyQueryResultToMsg(pQInfo, (int32_t)pQuery->rec.rows, data);
6626
  }
6627

6628
  pQuery->rec.total += pQuery->rec.rows;
6629
  qDebug("QInfo:%p current numOfRes rows:%" PRId64 ", total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total);
6630

6631
  if (pQuery->limit.limit > 0 && pQuery->limit.limit == pQuery->rec.total) {
6632
    qDebug("QInfo:%p results limitation reached, limitation:%"PRId64, pQInfo, pQuery->limit.limit);
6633 6634
    setQueryStatus(pQuery, QUERY_OVER);
  }
6635

H
hjxilinx 已提交
6636
  return TSDB_CODE_SUCCESS;
6637 6638
}

6639 6640 6641 6642 6643 6644 6645
typedef struct SQueryMgmt {
  SCacheObj      *qinfoPool;      // query handle pool
  int32_t         vgId;
  bool            closed;
  pthread_mutex_t lock;
} SQueryMgmt;

6646
int32_t qCreateQueryInfo(void* tsdb, int32_t vgId, SQueryTableMsg* pQueryMsg, qinfo_t* pQInfo) {
6647
  assert(pQueryMsg != NULL && tsdb != NULL);
6648 6649

  int32_t code = TSDB_CODE_SUCCESS;
6650

6651 6652 6653 6654 6655 6656 6657 6658
  char            *tagCond  = NULL;
  char            *tbnameCond = NULL;
  SArray          *pTableIdList = NULL;
  SSqlFuncMsg    **pExprMsg = NULL;
  SExprInfo       *pExprs   = NULL;
  SColIndex       *pGroupColIndex = NULL;
  SColumnInfo     *pTagColumnInfo = NULL;
  SSqlGroupbyExpr *pGroupbyExpr   = NULL;
6659

6660 6661
  code = convertQueryMsg(pQueryMsg, &pTableIdList, &pExprMsg, &tagCond, &tbnameCond, &pGroupColIndex, &pTagColumnInfo);
  if (code != TSDB_CODE_SUCCESS) {
B
Bomin Zhang 已提交
6662
    goto _over;
6663 6664
  }

H
hjxilinx 已提交
6665
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
6666
    qError("Invalid number of tables to query, numOfTables:%d", pQueryMsg->numOfTables);
6667
    code = TSDB_CODE_QRY_INVALID_MSG;
H
hjxilinx 已提交
6668
    goto _over;
6669 6670
  }

H
hjxilinx 已提交
6671
  if (pTableIdList == NULL || taosArrayGetSize(pTableIdList) == 0) {
S
slguan 已提交
6672
    qError("qmsg:%p, SQueryTableMsg wrong format", pQueryMsg);
6673
    code = TSDB_CODE_QRY_INVALID_MSG;
H
hjxilinx 已提交
6674
    goto _over;
6675 6676
  }

H
Haojun Liao 已提交
6677
  if ((code = createQFunctionExprFromMsg(pQueryMsg, &pExprs, pExprMsg, pTagColumnInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6678
    goto _over;
6679 6680
  }

dengyihao's avatar
dengyihao 已提交
6681
  pGroupbyExpr = createGroupbyExprFromMsg(pQueryMsg, pGroupColIndex, &code);
H
hjxilinx 已提交
6682
  if ((pGroupbyExpr == NULL && pQueryMsg->numOfGroupCols != 0) || code != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6683
    goto _over;
6684
  }
6685

H
hjxilinx 已提交
6686
  bool isSTableQuery = false;
6687
  STableGroupInfo tableGroupInfo = {0};
6688 6689
  int64_t st = taosGetTimestampUs();

H
Haojun Liao 已提交
6690
  if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_TABLE_QUERY)) {
weixin_48148422's avatar
weixin_48148422 已提交
6691
    STableIdInfo *id = taosArrayGet(pTableIdList, 0);
H
Haojun Liao 已提交
6692

6693
    qDebug("qmsg:%p query normal table, uid:%"PRId64", tid:%d", pQueryMsg, id->uid, id->tid);
H
Haojun Liao 已提交
6694
    if ((code = tsdbGetOneTableGroup(tsdb, id->uid, pQueryMsg->window.skey, &tableGroupInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6695
      goto _over;
6696
    }
H
Haojun Liao 已提交
6697
  } else if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY|TSDB_QUERY_TYPE_STABLE_QUERY)) {
6698
    isSTableQuery = true;
H
Haojun Liao 已提交
6699 6700 6701

    // also note there's possibility that only one table in the super table
    if (!TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY)) {
weixin_48148422's avatar
weixin_48148422 已提交
6702 6703 6704 6705 6706 6707 6708
      STableIdInfo *id = taosArrayGet(pTableIdList, 0);

      // group by normal column, do not pass the group by condition to tsdb to group table into different group
      int32_t numOfGroupByCols = pQueryMsg->numOfGroupCols;
      if (pQueryMsg->numOfGroupCols == 1 && !TSDB_COL_IS_TAG(pGroupColIndex->flag)) {
        numOfGroupByCols = 0;
      }
6709 6710

      qDebug("qmsg:%p query stable, uid:%"PRId64", tid:%d", pQueryMsg, id->uid, id->tid);
H
Haojun Liao 已提交
6711 6712 6713
      code = tsdbQuerySTableByTagCond(tsdb, id->uid, pQueryMsg->window.skey, tagCond, pQueryMsg->tagCondLen,
          pQueryMsg->tagNameRelType, tbnameCond, &tableGroupInfo, pGroupColIndex, numOfGroupByCols);

6714
      if (code != TSDB_CODE_SUCCESS) {
6715
        qError("qmsg:%p failed to query stable, reason: %s", pQueryMsg, tstrerror(code));
6716 6717
        goto _over;
      }
weixin_48148422's avatar
weixin_48148422 已提交
6718
    } else {
6719 6720 6721 6722
      code = tsdbGetTableGroupFromIdList(tsdb, pTableIdList, &tableGroupInfo);
      if (code != TSDB_CODE_SUCCESS) {
        goto _over;
      }
H
Haojun Liao 已提交
6723

S
TD-1057  
Shengliang Guan 已提交
6724
      qDebug("qmsg:%p query on %" PRIzu " tables in one group from client", pQueryMsg, tableGroupInfo.numOfTables);
6725
    }
6726 6727

    int64_t el = taosGetTimestampUs() - st;
S
TD-1057  
Shengliang Guan 已提交
6728
    qDebug("qmsg:%p tag filter completed, numOfTables:%" PRIzu ", elapsed time:%"PRId64"us", pQueryMsg, tableGroupInfo.numOfTables, el);
H
hjxilinx 已提交
6729
  } else {
6730
    assert(0);
6731
  }
6732

6733
  (*pQInfo) = createQInfoImpl(pQueryMsg, pGroupbyExpr, pExprs, &tableGroupInfo, pTagColumnInfo, isSTableQuery);
dengyihao's avatar
dengyihao 已提交
6734 6735 6736
  pExprs = NULL;
  pGroupbyExpr = NULL;
  pTagColumnInfo = NULL;
6737

6738
  if ((*pQInfo) == NULL) {
6739
    code = TSDB_CODE_QRY_OUT_OF_MEMORY;
H
hjxilinx 已提交
6740
    goto _over;
6741
  }
6742

6743
  code = initQInfo(pQueryMsg, tsdb, vgId, *pQInfo, isSTableQuery);
6744

H
hjxilinx 已提交
6745
_over:
dengyihao's avatar
dengyihao 已提交
6746 6747 6748
  free(tagCond);
  free(tbnameCond);
  free(pGroupColIndex);
dengyihao's avatar
dengyihao 已提交
6749 6750
  if (pGroupbyExpr != NULL) {
    taosArrayDestroy(pGroupbyExpr->columnInfo);
dengyihao's avatar
dengyihao 已提交
6751
    free(pGroupbyExpr);
6752
  }
dengyihao's avatar
dengyihao 已提交
6753 6754
  free(pTagColumnInfo);
  free(pExprs);
dengyihao's avatar
dengyihao 已提交
6755
  free(pExprMsg);
H
hjxilinx 已提交
6756
  taosArrayDestroy(pTableIdList);
6757

B
Bomin Zhang 已提交
6758 6759 6760 6761 6762
  for (int32_t i = 0; i < pQueryMsg->numOfCols; i++) {
    SColumnInfo* column = pQueryMsg->colList + i;
    freeColumnFilterInfo(column->filters, column->numOfFilters);
  }

H
Haojun Liao 已提交
6763
  //pQInfo already freed in initQInfo, but *pQInfo may not pointer to null;
6764 6765 6766 6767
  if (code != TSDB_CODE_SUCCESS) {
    *pQInfo = NULL;
  }

6768
  // if failed to add ref for all tables in this query, abort current query
6769
  return code;
H
hjxilinx 已提交
6770 6771
}

H
Haojun Liao 已提交
6772
void qDestroyQueryInfo(qinfo_t qHandle) {
H
Haojun Liao 已提交
6773 6774 6775 6776 6777
  SQInfo* pQInfo = (SQInfo*) qHandle;
  if (!isValidQInfo(pQInfo)) {
    return;
  }

H
Haojun Liao 已提交
6778 6779 6780
  qDebug("QInfo:%p query completed", pQInfo);
  queryCostStatis(pQInfo);   // print the query cost summary
  freeQInfo(pQInfo);
H
Haojun Liao 已提交
6781 6782
}

6783 6784 6785 6786 6787 6788 6789 6790
static bool doBuildResCheck(SQInfo* pQInfo) {
  bool buildRes = false;

  pthread_mutex_lock(&pQInfo->lock);

  pQInfo->dataReady = QUERY_RESULT_READY;
  buildRes = (pQInfo->rspContext != NULL);

H
Haojun Liao 已提交
6791 6792
  // clear qhandle owner, it must be in the secure area. other thread may run ahead before current, after it is
  // put into task to be executed.
6793 6794
  assert(pQInfo->owner == taosGetPthreadId());
  pQInfo->owner = 0;
H
Haojun Liao 已提交
6795

H
Haojun Liao 已提交
6796
  pthread_mutex_unlock(&pQInfo->lock);
H
Haojun Liao 已提交
6797 6798

  tsem_post(&pQInfo->ready);
6799 6800 6801
  return buildRes;
}

6802
bool qTableQuery(qinfo_t qinfo) {
6803
  SQInfo *pQInfo = (SQInfo *)qinfo;
H
Haojun Liao 已提交
6804
  assert(pQInfo && pQInfo->signature == pQInfo);
6805
  int64_t threadId = taosGetPthreadId();
6806

6807 6808 6809 6810
  int64_t curOwner = 0;
  if ((curOwner = atomic_val_compare_exchange_64(&pQInfo->owner, 0, threadId)) != 0) {
    qError("QInfo:%p qhandle is now executed by thread:%p", pQInfo, (void*) curOwner);
    pQInfo->code = TSDB_CODE_QRY_IN_EXEC;
6811
    return false;
H
hjxilinx 已提交
6812
  }
6813

H
Haojun Liao 已提交
6814
  if (IS_QUERY_KILLED(pQInfo)) {
6815
    qDebug("QInfo:%p it is already killed, abort", pQInfo);
6816
    return doBuildResCheck(pQInfo);
H
hjxilinx 已提交
6817
  }
6818

6819 6820
  if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
    qDebug("QInfo:%p no table exists for query, abort", pQInfo);
6821 6822
    setQueryStatus(pQInfo->runtimeEnv.pQuery, QUERY_COMPLETED);
    return doBuildResCheck(pQInfo);
6823 6824 6825
  }

  // error occurs, record the error code and return to client
H
Haojun Liao 已提交
6826
  int32_t ret = setjmp(pQInfo->runtimeEnv.env);
6827 6828
  if (ret != TSDB_CODE_SUCCESS) {
    pQInfo->code = ret;
H
Haojun Liao 已提交
6829
    qDebug("QInfo:%p query abort due to error/cancel occurs, code:%s", pQInfo, tstrerror(pQInfo->code));
6830
    return doBuildResCheck(pQInfo);
6831 6832
  }

6833
  qDebug("QInfo:%p query task is launched", pQInfo);
6834

6835
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
6836
  if (onlyQueryTags(pQInfo->runtimeEnv.pQuery)) {
H
Haojun Liao 已提交
6837
    assert(pQInfo->runtimeEnv.pQueryHandle == NULL);
6838
    buildTagQueryResult(pQInfo);
H
hjxilinx 已提交
6839
  } else if (pQInfo->runtimeEnv.stableQuery) {
6840
    stableQueryImpl(pQInfo);
H
hjxilinx 已提交
6841
  } else {
6842
    tableQueryImpl(pQInfo);
H
hjxilinx 已提交
6843
  }
6844

6845
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
6846
  if (IS_QUERY_KILLED(pQInfo)) {
6847 6848
    qDebug("QInfo:%p query is killed", pQInfo);
  } else if (pQuery->rec.rows == 0) {
S
TD-1057  
Shengliang Guan 已提交
6849
    qDebug("QInfo:%p over, %" PRIzu " tables queried, %"PRId64" rows are returned", pQInfo, pQInfo->tableqinfoGroupInfo.numOfTables, pQuery->rec.total);
6850 6851 6852 6853 6854
  } else {
    qDebug("QInfo:%p query paused, %" PRId64 " rows returned, numOfTotal:%" PRId64 " rows",
           pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
  }

6855
  return doBuildResCheck(pQInfo);
H
hjxilinx 已提交
6856 6857
}

6858
int32_t qRetrieveQueryResultInfo(qinfo_t qinfo, bool* buildRes, void* pRspContext) {
6859 6860
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
6861
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
H
Haojun Liao 已提交
6862
    qError("QInfo:%p invalid qhandle", pQInfo);
6863
    return TSDB_CODE_QRY_INVALID_QHANDLE;
H
hjxilinx 已提交
6864
  }
6865

6866
  *buildRes = false;
H
Haojun Liao 已提交
6867
  if (IS_QUERY_KILLED(pQInfo)) {
6868
    qDebug("QInfo:%p query is killed, code:%d", pQInfo, pQInfo->code);
H
hjxilinx 已提交
6869
    return pQInfo->code;
H
hjxilinx 已提交
6870
  }
6871

6872
  int32_t code = TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
6873 6874 6875 6876

#if 0
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

6877
  pthread_mutex_lock(&pQInfo->lock);
H
Haojun Liao 已提交
6878 6879
  assert(pQInfo->rspContext == NULL);

6880 6881 6882 6883 6884
  if (pQInfo->dataReady == QUERY_RESULT_READY) {
    *buildRes = true;
    qDebug("QInfo:%p retrieve result info, rowsize:%d, rows:%"PRId64", code:%d", pQInfo, pQuery->rowSize, pQuery->rec.rows,
           pQInfo->code);
  } else {
H
Haojun Liao 已提交
6885
    *buildRes = false;
6886
    qDebug("QInfo:%p retrieve req set query return result after paused", pQInfo);
6887
    pQInfo->rspContext = pRspContext;
H
Haojun Liao 已提交
6888
    assert(pQInfo->rspContext != NULL);
6889
  }
6890

6891
  code = pQInfo->code;
6892
  pthread_mutex_unlock(&pQInfo->lock);
H
Haojun Liao 已提交
6893 6894 6895 6896 6897 6898
#else
  tsem_wait(&pQInfo->ready);
  *buildRes = true;
  code = pQInfo->code;
#endif

6899
  return code;
H
hjxilinx 已提交
6900
}
6901

6902
int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *contLen, bool* continueExec) {
6903 6904
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
6905
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
6906
    return TSDB_CODE_QRY_INVALID_QHANDLE;
6907
  }
6908

6909
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
6910 6911
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  size_t  size = getResultSize(pQInfo, &pQuery->rec.rows);
6912

weixin_48148422's avatar
weixin_48148422 已提交
6913 6914
  size += sizeof(int32_t);
  size += sizeof(STableIdInfo) * taosArrayGetSize(pQInfo->arrTableIdInfo);
6915

S
TD-1057  
Shengliang Guan 已提交
6916
  *contLen = (int32_t)(size + sizeof(SRetrieveTableRsp));
6917

B
Bomin Zhang 已提交
6918 6919
  // todo proper handle failed to allocate memory,
  // current solution only avoid crash, but cannot return error code to client
6920
  *pRsp = (SRetrieveTableRsp *)rpcMallocCont(*contLen);
B
Bomin Zhang 已提交
6921 6922 6923
  if (*pRsp == NULL) {
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
  }
6924

S
TD-1057  
Shengliang Guan 已提交
6925
  (*pRsp)->numOfRows = htonl((int32_t)pQuery->rec.rows);
6926

H
Haojun Liao 已提交
6927
  if (pQInfo->code == TSDB_CODE_SUCCESS) {
6928
    (*pRsp)->offset   = htobe64(pQuery->limit.offset);
6929
    (*pRsp)->useconds = htobe64(pRuntimeEnv->summary.elapsedTime);
6930
  } else {
6931 6932
    (*pRsp)->offset   = 0;
    (*pRsp)->useconds = htobe64(pRuntimeEnv->summary.elapsedTime);
6933
  }
6934

6935
  (*pRsp)->precision = htons(pQuery->precision);
H
Haojun Liao 已提交
6936 6937
  if (pQuery->rec.rows > 0 && pQInfo->code == TSDB_CODE_SUCCESS) {
    doDumpQueryResult(pQInfo, (*pRsp)->data);
6938
  } else {
H
hjxilinx 已提交
6939
    setQueryStatus(pQuery, QUERY_OVER);
6940
  }
6941

6942
  pQInfo->rspContext = NULL;
6943
  pQInfo->dataReady  = QUERY_RESULT_NOT_READY;
6944

H
Haojun Liao 已提交
6945
  if (IS_QUERY_KILLED(pQInfo) || Q_STATUS_EQUAL(pQuery->status, QUERY_OVER)) {
6946
    *continueExec = false;
6947
    (*pRsp)->completed = 1;  // notify no more result to client
6948
  } else {
6949
    *continueExec = true;
6950
    qDebug("QInfo:%p has more results waits for client retrieve", pQInfo);
6951 6952
  }

H
Haojun Liao 已提交
6953
  return pQInfo->code;
6954
}
H
hjxilinx 已提交
6955

6956 6957 6958 6959 6960 6961 6962 6963 6964 6965 6966
int32_t qQueryCompleted(qinfo_t qinfo) {
  SQInfo *pQInfo = (SQInfo *)qinfo;

  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
    return TSDB_CODE_QRY_INVALID_QHANDLE;
  }

  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
  return IS_QUERY_KILLED(pQInfo) || Q_STATUS_EQUAL(pQuery->status, QUERY_OVER);
}

H
Haojun Liao 已提交
6967
int32_t qKillQuery(qinfo_t qinfo) {
H
Haojun Liao 已提交
6968 6969 6970 6971 6972 6973 6974
  SQInfo *pQInfo = (SQInfo *)qinfo;

  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
    return TSDB_CODE_QRY_INVALID_QHANDLE;
  }

  setQueryKilled(pQInfo);
6975 6976 6977

  // Wait for the query executing thread being stopped/
  // Once the query is stopped, the owner of qHandle will be cleared immediately.
6978
  while (pQInfo->owner != 0) {
6979 6980 6981
    taosMsleep(100);
  }

H
Haojun Liao 已提交
6982 6983 6984
  return TSDB_CODE_SUCCESS;
}

6985 6986 6987 6988 6989 6990 6991 6992 6993 6994 6995 6996 6997 6998 6999 7000
static void doSetTagValueToResultBuf(char* output, const char* val, int16_t type, int16_t bytes) {
  if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
    if (val == NULL) {
      setVardataNull(output, type);
    } else {
      memcpy(output, val, varDataTLen(val));
    }
  } else {
    if (val == NULL) {
      setNull(output, type, bytes);
    } else {  // todo here stop will cause client crash
      memcpy(output, val, bytes);
    }
  }
}

H
hjxilinx 已提交
7001 7002 7003
static void buildTagQueryResult(SQInfo* pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
7004

H
Haojun Liao 已提交
7005
  size_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
H
Haojun Liao 已提交
7006
  assert(numOfGroup == 0 || numOfGroup == 1);
7007

H
Haojun Liao 已提交
7008
  if (numOfGroup == 0) {
7009 7010
    return;
  }
7011

H
Haojun Liao 已提交
7012
  SArray* pa = GET_TABLEGROUP(pQInfo, 0);
7013

H
Haojun Liao 已提交
7014
  size_t num = taosArrayGetSize(pa);
7015
  assert(num == pQInfo->tableqinfoGroupInfo.numOfTables);
7016

H
Haojun Liao 已提交
7017
  int32_t count = 0;
7018 7019 7020
  int32_t functionId = pQuery->pSelectExpr[0].base.functionId;
  if (functionId == TSDB_FUNC_TID_TAG) { // return the tags & table Id
    assert(pQuery->numOfOutput == 1);
7021

7022 7023
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[0];
    int32_t rsize = pExprInfo->bytes;
H
Haojun Liao 已提交
7024
    count = 0;
7025

H
Haojun Liao 已提交
7026 7027 7028 7029 7030 7031 7032 7033 7034 7035 7036
    int16_t bytes = pExprInfo->bytes;
    int16_t type = pExprInfo->type;

    for(int32_t i = 0; i < pQuery->numOfTags; ++i) {
      if (pQuery->tagColList[i].colId == pExprInfo->base.colInfo.colId) {
        bytes = pQuery->tagColList[i].bytes;
        type = pQuery->tagColList[i].type;
        break;
      }
    }

H
Haojun Liao 已提交
7037 7038
    while(pQInfo->tableIndex < num && count < pQuery->rec.capacity) {
      int32_t i = pQInfo->tableIndex++;
7039
      STableQueryInfo *item = taosArrayGetP(pa, i);
7040

7041
      char *output = pQuery->sdata[0]->data + count * rsize;
7042
      varDataSetLen(output, rsize - VARSTR_HEADER_SIZE);
7043

7044
      output = varDataVal(output);
H
Haojun Liao 已提交
7045
      STableId* id = TSDB_TABLEID(item->pTable);
7046

7047 7048 7049
      *(int16_t *)output = 0;
      output += sizeof(int16_t);

H
Haojun Liao 已提交
7050 7051
      *(int64_t *)output = id->uid;  // memory align problem, todo serialize
      output += sizeof(id->uid);
7052

H
Haojun Liao 已提交
7053 7054
      *(int32_t *)output = id->tid;
      output += sizeof(id->tid);
7055

7056
      *(int32_t *)output = pQInfo->vgId;
7057
      output += sizeof(pQInfo->vgId);
7058

7059
      if (pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
7060
        char* data = tsdbGetTableName(item->pTable);
7061
        memcpy(output, data, varDataTLen(data));
H
[td-90]  
Haojun Liao 已提交
7062
      } else {
7063 7064
        char* data = tsdbGetTableTagVal(item->pTable, pExprInfo->base.colInfo.colId, type, bytes);
        doSetTagValueToResultBuf(output, data, type, bytes);
H
[td-90]  
Haojun Liao 已提交
7065
      }
7066

H
Haojun Liao 已提交
7067
      count += 1;
7068
    }
7069

7070
    qDebug("QInfo:%p create (tableId, tag) info completed, rows:%d", pQInfo, count);
7071

H
Haojun Liao 已提交
7072 7073 7074 7075
  } else if (functionId == TSDB_FUNC_COUNT) {// handle the "count(tbname)" query
    *(int64_t*) pQuery->sdata[0]->data = num;

    count = 1;
H
Haojun Liao 已提交
7076
    SET_STABLE_QUERY_OVER(pQInfo);
7077
    qDebug("QInfo:%p create count(tbname) query, res:%d rows:1", pQInfo, count);
7078
  } else {  // return only the tags|table name etc.
H
Haojun Liao 已提交
7079
    count = 0;
H
Haojun Liao 已提交
7080
    SSchema tbnameSchema = tGetTableNameColumnSchema();
7081

S
TD-1057  
Shengliang Guan 已提交
7082
    int32_t maxNumOfTables = (int32_t)pQuery->rec.capacity;
7083
    if (pQuery->limit.limit >= 0 && pQuery->limit.limit < pQuery->rec.capacity) {
S
TD-1057  
Shengliang Guan 已提交
7084
      maxNumOfTables = (int32_t)pQuery->limit.limit;
7085 7086
    }

7087
    while(pQInfo->tableIndex < num && count < maxNumOfTables) {
H
Haojun Liao 已提交
7088
      int32_t i = pQInfo->tableIndex++;
7089

7090 7091 7092 7093 7094 7095
      // discard current result due to offset
      if (pQuery->limit.offset > 0) {
        pQuery->limit.offset -= 1;
        continue;
      }

7096
      SExprInfo* pExprInfo = pQuery->pSelectExpr;
7097
      STableQueryInfo* item = taosArrayGetP(pa, i);
7098

7099 7100
      char *data = NULL, *dst = NULL;
      int16_t type = 0, bytes = 0;
7101
      for(int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
7102
        // not assign value in case of user defined constant output column
Y
TD-1230  
yihaoDeng 已提交
7103
        if (TSDB_COL_IS_UD_COL(pExprInfo[j].base.colInfo.flag)) {
H
Haojun Liao 已提交
7104 7105
          continue;
        }
7106

7107
        if (pExprInfo[j].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
7108 7109 7110 7111 7112 7113 7114 7115
          bytes = tbnameSchema.bytes;
          type = tbnameSchema.type;

          data = tsdbGetTableName(item->pTable);
          dst = pQuery->sdata[j]->data + count * tbnameSchema.bytes;
        } else {
          type = pExprInfo[j].type;
          bytes = pExprInfo[j].bytes;
7116

7117 7118
          data = tsdbGetTableTagVal(item->pTable, pExprInfo[j].base.colInfo.colId, type, bytes);
          dst = pQuery->sdata[j]->data + count * pExprInfo[j].bytes;
7119

7120
        }
7121 7122

        doSetTagValueToResultBuf(dst, data, type, bytes);
H
hjxilinx 已提交
7123
      }
H
Haojun Liao 已提交
7124
      count += 1;
H
hjxilinx 已提交
7125
    }
7126

7127
    qDebug("QInfo:%p create tag values results completed, rows:%d", pQInfo, count);
H
hjxilinx 已提交
7128
  }
7129

H
Haojun Liao 已提交
7130
  pQuery->rec.rows = count;
H
hjxilinx 已提交
7131
  setQueryStatus(pQuery, QUERY_COMPLETED);
H
hjxilinx 已提交
7132 7133
}

7134 7135 7136 7137 7138 7139 7140
void* qGetResultRetrieveMsg(qinfo_t qinfo) {
  SQInfo* pQInfo = (SQInfo*) qinfo;
  assert(pQInfo != NULL);

  return pQInfo->rspContext;
}

7141 7142 7143 7144 7145 7146 7147
void freeqinfoFn(void *qhandle) {
  void** handle = qhandle;
  if (handle == NULL || *handle == NULL) {
    return;
  }

  qKillQuery(*handle);
H
Haojun Liao 已提交
7148
  qDestroyQueryInfo(*handle);
7149 7150 7151
}

void* qOpenQueryMgmt(int32_t vgId) {
H
Haojun Liao 已提交
7152
  const int32_t REFRESH_HANDLE_INTERVAL = 30; // every 30 seconds, refresh handle pool
7153 7154 7155 7156

  char cacheName[128] = {0};
  sprintf(cacheName, "qhandle_%d", vgId);

7157
  SQueryMgmt* pQueryMgmt = calloc(1, sizeof(SQueryMgmt));
H
Haojun Liao 已提交
7158 7159 7160 7161
  if (pQueryMgmt == NULL) {
    terrno = TSDB_CODE_QRY_OUT_OF_MEMORY;
    return NULL;
  }
7162

S
TD-1530  
Shengliang Guan 已提交
7163
  pQueryMgmt->qinfoPool = taosCacheInit(TSDB_CACHE_PTR_KEY, REFRESH_HANDLE_INTERVAL, true, freeqinfoFn, cacheName);
7164 7165 7166 7167
  pQueryMgmt->closed    = false;
  pQueryMgmt->vgId      = vgId;

  pthread_mutex_init(&pQueryMgmt->lock, NULL);
7168 7169

  qDebug("vgId:%d, open querymgmt success", vgId);
7170
  return pQueryMgmt;
7171 7172
}

H
Haojun Liao 已提交
7173
static void queryMgmtKillQueryFn(void* handle) {
H
Hui Li 已提交
7174 7175
  void** fp = (void**)handle;
  qKillQuery(*fp);
H
Haojun Liao 已提交
7176 7177 7178
}

void qQueryMgmtNotifyClosed(void* pQMgmt) {
7179 7180 7181 7182 7183 7184 7185
  if (pQMgmt == NULL) {
    return;
  }

  SQueryMgmt* pQueryMgmt = pQMgmt;
  qDebug("vgId:%d, set querymgmt closed, wait for all queries cancelled", pQueryMgmt->vgId);

H
Haojun Liao 已提交
7186
//  pthread_mutex_lock(&pQueryMgmt->lock);
7187
  pQueryMgmt->closed = true;
H
Haojun Liao 已提交
7188
//  pthread_mutex_unlock(&pQueryMgmt->lock);
7189

H
Haojun Liao 已提交
7190
  taosCacheRefresh(pQueryMgmt->qinfoPool, queryMgmtKillQueryFn);
7191 7192 7193 7194 7195 7196 7197 7198 7199 7200 7201 7202 7203 7204 7205 7206 7207
}

void qCleanupQueryMgmt(void* pQMgmt) {
  if (pQMgmt == NULL) {
    return;
  }

  SQueryMgmt* pQueryMgmt = pQMgmt;
  int32_t vgId = pQueryMgmt->vgId;

  assert(pQueryMgmt->closed);

  SCacheObj* pqinfoPool = pQueryMgmt->qinfoPool;
  pQueryMgmt->qinfoPool = NULL;

  taosCacheCleanup(pqinfoPool);
  pthread_mutex_destroy(&pQueryMgmt->lock);
S
Shengliang Guan 已提交
7208
  taosTFree(pQueryMgmt);
7209

7210
  qDebug("vgId:%d queryMgmt cleanup completed", vgId);
7211 7212
}

7213
void** qRegisterQInfo(void* pMgmt, uint64_t qInfo) {
7214
  if (pMgmt == NULL) {
7215
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
7216 7217 7218
    return NULL;
  }

7219
  const int32_t DEFAULT_QHANDLE_LIFE_SPAN = tsShellActivityTimer * 2 * 1000;
7220

7221 7222
  SQueryMgmt *pQueryMgmt = pMgmt;
  if (pQueryMgmt->qinfoPool == NULL) {
7223
    qError("QInfo:%p failed to add qhandle into qMgmt, since qMgmt is closed", (void *)qInfo);
7224
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
7225 7226 7227
    return NULL;
  }

H
Haojun Liao 已提交
7228
//  pthread_mutex_lock(&pQueryMgmt->lock);
7229
  if (pQueryMgmt->closed) {
H
Haojun Liao 已提交
7230
//    pthread_mutex_unlock(&pQueryMgmt->lock);
7231
    qError("QInfo:%p failed to add qhandle into cache, since qMgmt is colsing", (void *)qInfo);
7232
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
7233 7234
    return NULL;
  } else {
S
TD-1530  
Shengliang Guan 已提交
7235 7236
    TSDB_CACHE_PTR_TYPE handleVal = (TSDB_CACHE_PTR_TYPE) qInfo;
    void** handle = taosCachePut(pQueryMgmt->qinfoPool, &handleVal, sizeof(TSDB_CACHE_PTR_TYPE), &qInfo, sizeof(TSDB_CACHE_PTR_TYPE), DEFAULT_QHANDLE_LIFE_SPAN);
H
Haojun Liao 已提交
7237
//    pthread_mutex_unlock(&pQueryMgmt->lock);
7238 7239 7240 7241 7242

    return handle;
  }
}

S
TD-1530  
Shengliang Guan 已提交
7243
void** qAcquireQInfo(void* pMgmt, uint64_t _key) {
7244 7245 7246 7247 7248 7249
  SQueryMgmt *pQueryMgmt = pMgmt;

  if (pQueryMgmt->qinfoPool == NULL || pQueryMgmt->closed) {
    return NULL;
  }

S
TD-1530  
Shengliang Guan 已提交
7250 7251
  TSDB_CACHE_PTR_TYPE key = (TSDB_CACHE_PTR_TYPE)_key;
  void** handle = taosCacheAcquireByKey(pQueryMgmt->qinfoPool, &key, sizeof(TSDB_CACHE_PTR_TYPE));
7252 7253 7254 7255 7256 7257 7258
  if (handle == NULL || *handle == NULL) {
    return NULL;
  } else {
    return handle;
  }
}

H
Haojun Liao 已提交
7259
void** qReleaseQInfo(void* pMgmt, void* pQInfo, bool freeHandle) {
7260 7261 7262 7263 7264
  SQueryMgmt *pQueryMgmt = pMgmt;
  if (pQueryMgmt->qinfoPool == NULL) {
    return NULL;
  }

H
Haojun Liao 已提交
7265
  taosCacheRelease(pQueryMgmt->qinfoPool, pQInfo, freeHandle);
7266 7267 7268
  return 0;
}

7269