qExecutor.c 240.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
#include "os.h"
H
Haojun Liao 已提交
16 17
#include "qFill.h"
#include "taosmsg.h"
18 19
#include "tcache.h"
#include "tglobal.h"
20

H
Haojun Liao 已提交
21
#include "exception.h"
22
#include "hash.h"
H
Haojun Liao 已提交
23 24 25 26
#include "qAst.h"
#include "qExecutor.h"
#include "qResultbuf.h"
#include "qUtil.h"
H
hjxilinx 已提交
27
#include "query.h"
S
slguan 已提交
28
#include "queryLog.h"
29
#include "tlosertree.h"
30

H
Haojun Liao 已提交
31
#define MAX_ROWS_PER_RESBUF_PAGE  ((1u<<12) - 1)
32 33 34 35 36

/**
 * check if the primary column is load by default, otherwise, the program will
 * forced to load primary column explicitly.
 */
37
#define Q_STATUS_EQUAL(p, s)  (((p) & (s)) != 0)
38 39
#define QUERY_IS_ASC_QUERY(q) (GET_FORWARD_DIRECTION_FACTOR((q)->order.order) == QUERY_ASC_FORWARD_STEP)

40
#define IS_MASTER_SCAN(runtime)        ((runtime)->scanFlag == MASTER_SCAN)
H
hjxilinx 已提交
41
#define IS_REVERSE_SCAN(runtime)       ((runtime)->scanFlag == REVERSE_SCAN)
42
#define SET_MASTER_SCAN_FLAG(runtime)  ((runtime)->scanFlag = MASTER_SCAN)
H
hjxilinx 已提交
43
#define SET_REVERSE_SCAN_FLAG(runtime) ((runtime)->scanFlag = REVERSE_SCAN)
44

H
Haojun Liao 已提交
45
#define GET_QINFO_ADDR(x) ((SQInfo *)((char *)(x)-offsetof(SQInfo, runtimeEnv)))
46

47
#define GET_COL_DATA_POS(query, index, step) ((query)->pos + (index) * (step))
48
#define SWITCH_ORDER(n) (((n) = ((n) == TSDB_ORDER_ASC) ? TSDB_ORDER_DESC : TSDB_ORDER_ASC))
49

H
Haojun Liao 已提交
50 51
#define SDATA_BLOCK_INITIALIZER (SDataBlockInfo) {{0}, 0}

S
TD-1057  
Shengliang Guan 已提交
52 53 54 55 56
#define TIME_WINDOW_COPY(_dst, _src)  do {\
   _dst.skey = _src.skey;\
   _dst.ekey = _src.ekey;\
} while (0);

57
enum {
H
hjxilinx 已提交
58
  // when query starts to execute, this status will set
59 60
  QUERY_NOT_COMPLETED = 0x1u,

H
hjxilinx 已提交
61 62
  /* result output buffer is full, current query is paused.
   * this status is only exist in group-by clause and diff/add/division/multiply/ query.
63
   */
64 65
  QUERY_RESBUF_FULL = 0x2u,

H
hjxilinx 已提交
66 67 68
  /* query is over
   * 1. this status is used in one row result query process, e.g., count/sum/first/last/ avg...etc.
   * 2. when all data within queried time window, it is also denoted as query_completed
69
   */
70
  QUERY_COMPLETED = 0x4u,
71

H
hjxilinx 已提交
72 73
  /* when the result is not completed return to client, this status will be
   * usually used in case of interval query with interpolation option
74
   */
75
  QUERY_OVER = 0x8u,
76
};
77 78

enum {
79 80
  TS_JOIN_TS_EQUAL       = 0,
  TS_JOIN_TS_NOT_EQUALS  = 1,
81 82 83
  TS_JOIN_TAG_NOT_EQUALS = 2,
};

84
typedef struct {
85 86 87 88 89 90
  int32_t     status;       // query status
  TSKEY       lastKey;      // the lastKey value before query executed
  STimeWindow w;            // whole query time window
  STimeWindow curWindow;    // current query window
  int32_t     windowIndex;  // index of active time window result for interval query
  STSCursor   cur;
91 92
} SQueryStatusInfo;

H
Haojun Liao 已提交
93
#if 0
H
Haojun Liao 已提交
94
static UNUSED_FUNC void *u_malloc (size_t __size) {
H
Haojun Liao 已提交
95
  uint32_t v = rand();
H
Haojun Liao 已提交
96 97

  if (v % 1000 <= 0) {
H
Haojun Liao 已提交
98 99
    return NULL;
  } else {
H
Haojun Liao 已提交
100
    return malloc(__size);
H
Haojun Liao 已提交
101
  }
H
Haojun Liao 已提交
102 103
}

H
Haojun Liao 已提交
104 105
static UNUSED_FUNC void* u_calloc(size_t num, size_t __size) {
  uint32_t v = rand();
H
Haojun Liao 已提交
106
  if (v % 1000 <= 0) {
H
Haojun Liao 已提交
107 108 109 110 111 112
    return NULL;
  } else {
    return calloc(num, __size);
  }
}

H
Haojun Liao 已提交
113 114 115 116 117 118 119 120 121
static UNUSED_FUNC void* u_realloc(void* p, size_t __size) {
  uint32_t v = rand();
  if (v % 5 <= 1) {
    return NULL;
  } else {
    return realloc(p, __size);
  }
}

H
Haojun Liao 已提交
122
#define calloc  u_calloc
H
Haojun Liao 已提交
123
#define malloc  u_malloc
H
Haojun Liao 已提交
124
#define realloc u_realloc
H
Haojun Liao 已提交
125
#endif
H
Haojun Liao 已提交
126

127
#define CLEAR_QUERY_STATUS(q, st)   ((q)->status &= (~(st)))
H
Haojun Liao 已提交
128 129 130
#define GET_NUM_OF_TABLEGROUP(q)    taosArrayGetSize((q)->tableqinfoGroupInfo.pGroupList)
#define GET_TABLEGROUP(q, _index)   ((SArray*) taosArrayGetP((q)->tableqinfoGroupInfo.pGroupList, (_index)))

131
static void setQueryStatus(SQuery *pQuery, int8_t status);
H
Haojun Liao 已提交
132
static void finalizeQueryResult(SQueryRuntimeEnv *pRuntimeEnv);
133

134
#define QUERY_IS_INTERVAL_QUERY(_q) ((_q)->interval.interval > 0)
135

136 137
static void getNextTimeWindow(SQuery* pQuery, STimeWindow* tw) {
  int32_t factor = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
138 139 140
  if (pQuery->interval.intervalUnit != 'n' && pQuery->interval.intervalUnit != 'y') {
    tw->skey += pQuery->interval.sliding * factor;
    tw->ekey = tw->skey + pQuery->interval.interval - 1;
141 142 143
    return;
  }

144
  int64_t key = tw->skey / 1000, interval = pQuery->interval.interval;
145 146 147
  if (pQuery->precision == TSDB_TIME_PRECISION_MICRO) {
    key /= 1000;
  }
148
  if (pQuery->interval.intervalUnit == 'y') {
149 150
    interval *= 12;
  }
151 152 153 154 155

  struct tm tm;
  time_t t = (time_t)key;
  localtime_r(&t, &tm);

S
Shengliang Guan 已提交
156
  int mon = (int)(tm.tm_year * 12 + tm.tm_mon + interval * factor);
157 158 159 160
  tm.tm_year = mon / 12;
  tm.tm_mon = mon % 12;
  tw->skey = mktime(&tm) * 1000L;

S
Shengliang Guan 已提交
161
  mon = (int)(mon + interval);
162 163 164 165 166 167 168 169 170 171 172 173
  tm.tm_year = mon / 12;
  tm.tm_mon = mon % 12;
  tw->ekey = mktime(&tm) * 1000L;

  if (pQuery->precision == TSDB_TIME_PRECISION_MICRO) {
    tw->skey *= 1000L;
    tw->ekey *= 1000L;
  }
  tw->ekey -= 1;
}

#define GET_NEXT_TIMEWINDOW(_q, tw) getNextTimeWindow((_q), (tw))
H
Haojun Liao 已提交
174

175 176
#define SET_STABLE_QUERY_OVER(_q) ((_q)->tableIndex = (int32_t)((_q)->tableqinfoGroupInfo.numOfTables))
#define IS_STASBLE_QUERY_OVER(_q) ((_q)->tableIndex >= (int32_t)((_q)->tableqinfoGroupInfo.numOfTables))
H
Haojun Liao 已提交
177

H
hjxilinx 已提交
178
// todo move to utility
179
static int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *group);
180

H
hjxilinx 已提交
181
static void setWindowResOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult);
H
Haojun Liao 已提交
182
static void setWindowResOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult);
183 184
static void resetMergeResultBuf(SQuery *pQuery, SQLFunctionCtx *pCtx, SResultInfo *pResultInfo);
static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId);
185

186
static void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
H
Haojun Liao 已提交
187
                          SDataStatis *pStatis, void *param, int32_t colIndex, int32_t vgId);
188

189
static void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
H
Haojun Liao 已提交
190
static void destroyTableQueryInfoImpl(STableQueryInfo *pTableQueryInfo);
191 192
static void resetCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
static bool hasMainOutput(SQuery *pQuery);
H
hjxilinx 已提交
193
static void buildTagQueryResult(SQInfo *pQInfo);
194

195
static int32_t setAdditionalInfo(SQInfo *pQInfo, void *pTable, STableQueryInfo *pTableQueryInfo);
H
Haojun Liao 已提交
196
static int32_t flushFromResultBuf(SQueryRuntimeEnv* pRuntimeEnv, SGroupResInfo* pGroupResInfo);
197

198
bool doFilterData(SQuery *pQuery, int32_t elemPos) {
199 200
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
201

S
TD-1057  
Shengliang Guan 已提交
202
    char *pElem = (char*)pFilterInfo->pData + pFilterInfo->info.bytes * elemPos;
203

204 205
    bool qualified = false;
    for (int32_t j = 0; j < pFilterInfo->numOfFilters; ++j) {
206
      SColumnFilterElem *pFilterElem = &pFilterInfo->pFilters[j];
207

H
Haojun Liao 已提交
208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224
      bool isnull = isNull(pElem, pFilterInfo->info.type);
      if (isnull) {
        if (pFilterElem->fp == isNull_filter) {
          qualified = true;
          break;
        } else {
          continue;
        }
      } else {
        if (pFilterElem->fp == notNull_filter) {
          qualified = true;
          break;
        } else if (pFilterElem->fp == isNull_filter) {
          continue;
        }
      }

225 226 227 228 229
      if (pFilterElem->fp(pFilterElem, pElem, pElem)) {
        qualified = true;
        break;
      }
    }
230

231 232 233 234
    if (!qualified) {
      return false;
    }
  }
235

236 237 238 239 240 241
  return true;
}

int64_t getNumOfResult(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  bool    hasMainFunction = hasMainOutput(pQuery);
242

243
  int64_t maxOutput = 0;
244
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
245
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
246

247 248 249 250 251 252 253 254
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (hasMainFunction &&
        (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ)) {
      continue;
    }
255

256 257 258 259 260
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
    if (pResInfo != NULL && maxOutput < pResInfo->numOfRes) {
      maxOutput = pResInfo->numOfRes;
    }
  }
261

262
  assert(maxOutput >= 0);
263 264 265
  return maxOutput;
}

266 267 268 269 270
/*
 * the value of number of result needs to be update due to offset value upated.
 */
void updateNumOfResult(SQueryRuntimeEnv *pRuntimeEnv, int32_t numOfRes) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
271

272 273
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
274

H
Haojun Liao 已提交
275 276 277 278 279
    int16_t functionId = pRuntimeEnv->pCtx[j].functionId;
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ ||
        functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
280

H
Haojun Liao 已提交
281
    assert(pResInfo->numOfRes > numOfRes);
282 283 284 285
    pResInfo->numOfRes = numOfRes;
  }
}

H
Haojun Liao 已提交
286
static UNUSED_FUNC int32_t getGroupResultId(int32_t groupIndex) {
H
Haojun Liao 已提交
287
  int32_t base = 20000000;
288 289 290 291 292 293 294
  return base + (groupIndex * 10000);
}

bool isGroupbyNormalCol(SSqlGroupbyExpr *pGroupbyExpr) {
  if (pGroupbyExpr == NULL || pGroupbyExpr->numOfGroupCols == 0) {
    return false;
  }
295

296
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
297
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
Y
TD-1230  
yihaoDeng 已提交
298
    if (TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
299
      //make sure the normal column locates at the second position if tbname exists in group by clause
300
      if (pGroupbyExpr->numOfGroupCols > 1) {
301
        assert(pColIndex->colIndex > 0);
302
      }
303

304 305 306
      return true;
    }
  }
307

308 309 310 311 312
  return false;
}

int16_t getGroupbyColumnType(SQuery *pQuery, SSqlGroupbyExpr *pGroupbyExpr) {
  assert(pGroupbyExpr != NULL);
313

314 315
  int32_t colId = -2;
  int16_t type = TSDB_DATA_TYPE_NULL;
316

317
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
318
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
Y
TD-1230  
yihaoDeng 已提交
319
    if (TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
320 321 322 323
      colId = pColIndex->colId;
      break;
    }
  }
324

325
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
326 327
    if (colId == pQuery->colList[i].colId) {
      type = pQuery->colList[i].type;
328 329 330
      break;
    }
  }
331

332 333 334 335 336 337
  return type;
}

bool isSelectivityWithTagsQuery(SQuery *pQuery) {
  bool    hasTags = false;
  int32_t numOfSelectivity = 0;
338

339
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
340
    int32_t functId = pQuery->pSelectExpr[i].base.functionId;
341 342 343 344
    if (functId == TSDB_FUNC_TAG_DUMMY || functId == TSDB_FUNC_TS_DUMMY) {
      hasTags = true;
      continue;
    }
345

346 347 348 349
    if ((aAggs[functId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
      numOfSelectivity++;
    }
  }
350

351 352 353
  if (numOfSelectivity > 0 && hasTags) {
    return true;
  }
354

355 356 357
  return false;
}

358 359 360 361 362 363 364 365 366 367 368
bool isProjQuery(SQuery *pQuery) {
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    int32_t functId = pQuery->pSelectExpr[i].base.functionId;
    if (functId != TSDB_FUNC_PRJ && functId != TSDB_FUNC_TAGPRJ) {
      return false;
    }
  }

  return true;
}

369
bool isTSCompQuery(SQuery *pQuery) { return pQuery->pSelectExpr[0].base.functionId == TSDB_FUNC_TS_COMP; }
370

371 372 373
static bool limitResults(SQueryRuntimeEnv* pRuntimeEnv) {
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery* pQuery = pRuntimeEnv->pQuery;
374

375 376
  if ((pQuery->limit.limit > 0) && (pQuery->rec.total + pQuery->rec.rows > pQuery->limit.limit)) {
    pQuery->rec.rows = pQuery->limit.limit - pQuery->rec.total;
377

378
    qDebug("QInfo:%p discard remain data due to result limitation, limit:%"PRId64", current return:%" PRId64 ", total:%"PRId64,
379 380
        pQInfo, pQuery->limit.limit, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
    assert(pQuery->rec.rows >= 0);
381 382 383
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return true;
  }
384

385 386 387 388
  return false;
}

static bool isTopBottomQuery(SQuery *pQuery) {
389
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
390
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
391 392 393
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
394

395 396 397 398
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      return true;
    }
  }
399

400 401 402
  return false;
}

H
Haojun Liao 已提交
403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420
static bool hasTagValOutput(SQuery* pQuery) {
  SExprInfo *pExprInfo = &pQuery->pSelectExpr[0];
  if (pQuery->numOfOutput == 1 && pExprInfo->base.functionId == TSDB_FUNC_TS_COMP) {
    return true;
  } else {  // set tag value, by which the results are aggregated.
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
      SExprInfo *pLocalExprInfo = &pQuery->pSelectExpr[idx];

      // ts_comp column required the tag value for join filter
      if (TSDB_COL_IS_TAG(pLocalExprInfo->base.colInfo.flag)) {
        return true;
      }
    }
  }

  return false;
}

421 422 423 424 425 426 427 428
/**
 * @param pQuery
 * @param col
 * @param pDataBlockInfo
 * @param pStatis
 * @param pColStatis
 * @return
 */
H
Haojun Liao 已提交
429
static bool hasNullValue(SColIndex* pColIndex, SDataStatis *pStatis, SDataStatis **pColStatis) {
H
Haojun Liao 已提交
430
  if (pStatis != NULL && TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
H
Haojun Liao 已提交
431 432
    *pColStatis = &pStatis[pColIndex->colIndex];
    assert((*pColStatis)->colId == pColIndex->colId);
H
hjxilinx 已提交
433 434
  } else {
    *pColStatis = NULL;
435
  }
436

H
Haojun Liao 已提交
437
  if (TSDB_COL_IS_TAG(pColIndex->flag) || TSDB_COL_IS_UD_COL(pColIndex->flag) || pColIndex->colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
H
Haojun Liao 已提交
438 439 440
    return false;
  }

441 442 443
  if ((*pColStatis) != NULL && (*pColStatis)->numOfNull == 0) {
    return false;
  }
444

445 446 447 448
  return true;
}

static SWindowResult *doSetTimeWindowFromKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, char *pData,
449
                                             int16_t bytes, bool masterscan) {
450
  SQuery *pQuery = pRuntimeEnv->pQuery;
451

452
  int32_t *p1 = (int32_t *) taosHashGet(pWindowResInfo->hashList, pData, bytes);
453 454
  if (p1 != NULL) {
    pWindowResInfo->curIndex = *p1;
455
  } else {
H
Haojun Liao 已提交
456 457 458
    if (!masterscan) {  // not master scan, do not add new timewindow
      return NULL;
    }
459

H
Haojun Liao 已提交
460 461
    // more than the capacity, reallocate the resources
    if (pWindowResInfo->size >= pWindowResInfo->capacity) {
462 463
      int64_t newCap = 0;
      if (pWindowResInfo->capacity > 10000) {
S
Shengliang Guan 已提交
464
        newCap = (int64_t)(pWindowResInfo->capacity * 1.25);
465
      } else {
S
Shengliang Guan 已提交
466
        newCap = (int64_t)(pWindowResInfo->capacity * 1.5);
467 468
      }

469
      char *t = realloc(pWindowResInfo->pResult, (size_t)(newCap * sizeof(SWindowResult)));
470
      pRuntimeEnv->summary.internalSupSize += (newCap - pWindowResInfo->capacity) * sizeof(SWindowResult);
471
      pRuntimeEnv->summary.numOfTimeWindows += (newCap - pWindowResInfo->capacity);
472

473 474
      if (t == NULL) {
        longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
475
      }
476

477 478
      pWindowResInfo->pResult = (SWindowResult *)t;

S
TD-1057  
Shengliang Guan 已提交
479
      int32_t inc = (int32_t)newCap - pWindowResInfo->capacity;
480 481
      memset(&pWindowResInfo->pResult[pWindowResInfo->capacity], 0, sizeof(SWindowResult) * inc);

482 483
      pRuntimeEnv->summary.internalSupSize += (pQuery->numOfOutput * sizeof(SResultInfo) + pRuntimeEnv->interBufSize) * inc;

H
Haojun Liao 已提交
484
      for (int32_t i = pWindowResInfo->capacity; i < newCap; ++i) {
H
Haojun Liao 已提交
485 486 487 488
        int32_t ret = createQueryResultInfo(pQuery, &pWindowResInfo->pResult[i], pRuntimeEnv->stableQuery, pRuntimeEnv->interBufSize);
        if (ret != TSDB_CODE_SUCCESS) {
          longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
        }
H
Haojun Liao 已提交
489 490
      }

S
TD-1057  
Shengliang Guan 已提交
491
      pWindowResInfo->capacity = (int32_t)newCap;
492
    }
H
Haojun Liao 已提交
493 494 495 496

    // add a new result set for a new group
    pWindowResInfo->curIndex = pWindowResInfo->size++;
    taosHashPut(pWindowResInfo->hashList, pData, bytes, (char *)&pWindowResInfo->curIndex, sizeof(int32_t));
497
  }
498

499 500 501 502 503
  // too many time window in query
  if (pWindowResInfo->size > MAX_INTERVAL_TIME_WINDOW) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_TOO_MANY_TIMEWINDOW);
  }

504 505 506 507 508 509
  return getWindowResult(pWindowResInfo, pWindowResInfo->curIndex);
}

// get the correct time window according to the handled timestamp
static STimeWindow getActiveTimeWindow(SWindowResInfo *pWindowResInfo, int64_t ts, SQuery *pQuery) {
  STimeWindow w = {0};
510

511
 if (pWindowResInfo->curIndex == -1) {  // the first window, from the previous stored value
512
    w.skey = pWindowResInfo->prevSKey;
513 514
    if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
      w.ekey = taosTimeAdd(w.skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision);
515
    } else {
516
      w.ekey = w.skey + pQuery->interval.interval - 1;
517
    }
518
  } else {
519 520
    int32_t slot = curTimeWindowIndex(pWindowResInfo);
    SWindowResult* pWindowRes = getWindowResult(pWindowResInfo, slot);
521
    w = pWindowRes->win;
522
  }
523

524
  if (w.skey > ts || w.ekey < ts) {
525 526 527
    if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
      w.skey = taosTimeTruncate(ts, &pQuery->interval, pQuery->precision);
      w.ekey = taosTimeAdd(w.skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
528 529
    } else {
      int64_t st = w.skey;
530

531
      if (st > ts) {
532
        st -= ((st - ts + pQuery->interval.sliding - 1) / pQuery->interval.sliding) * pQuery->interval.sliding;
533
      }
534

535
      int64_t et = st + pQuery->interval.interval - 1;
536
      if (et < ts) {
537
        st += ((ts - et + pQuery->interval.sliding - 1) / pQuery->interval.sliding) * pQuery->interval.sliding;
538
      }
539

540
      w.skey = st;
541
      w.ekey = w.skey + pQuery->interval.interval - 1;
542
    }
543
  }
544

545 546 547 548 549 550 551
  /*
   * query border check, skey should not be bounded by the query time range, since the value skey will
   * be used as the time window index value. So we only change ekey of time window accordingly.
   */
  if (w.ekey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) {
    w.ekey = pQuery->window.ekey;
  }
552

553 554 555 556 557 558 559 560
  return w;
}

static int32_t addNewWindowResultBuf(SWindowResult *pWindowRes, SDiskbasedResultBuf *pResultBuf, int32_t sid,
                                     int32_t numOfRowsPerPage) {
  if (pWindowRes->pos.pageId != -1) {
    return 0;
  }
561

562
  tFilePage *pData = NULL;
563

564 565 566
  // in the first scan, new space needed for results
  int32_t pageId = -1;
  SIDList list = getDataBufPagesIdList(pResultBuf, sid);
567

H
Haojun Liao 已提交
568
  if (taosArrayGetSize(list) == 0) {
569 570
    pData = getNewDataBuf(pResultBuf, sid, &pageId);
  } else {
H
Haojun Liao 已提交
571 572 573
    SPageInfo* pi = getLastPageInfo(list);
    pData = getResBufPage(pResultBuf, pi->pageId);
    pageId = pi->pageId;
574

575
    if (pData->num >= numOfRowsPerPage) {
H
Haojun Liao 已提交
576 577 578
      // release current page first, and prepare the next one
      releaseResBufPageInfo(pResultBuf, pi);

579 580
      pData = getNewDataBuf(pResultBuf, sid, &pageId);
      if (pData != NULL) {
581
        assert(pData->num == 0);  // number of elements must be 0 for new allocated buffer
582 583 584
      }
    }
  }
585

586 587 588
  if (pData == NULL) {
    return -1;
  }
589

590 591 592
  // set the number of rows in current disk page
  if (pWindowRes->pos.pageId == -1) {  // not allocated yet, allocate new buffer
    pWindowRes->pos.pageId = pageId;
S
TD-1057  
Shengliang Guan 已提交
593
    pWindowRes->pos.rowId = (int32_t)(pData->num++);
H
Haojun Liao 已提交
594 595

    assert(pWindowRes->pos.pageId >= 0);
596
  }
597

598 599 600 601
  return 0;
}

static int32_t setWindowOutputBufByKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, int32_t sid,
602
                                       STimeWindow *win, bool masterscan, bool* newWind) {
603 604
  assert(win->skey <= win->ekey);
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
605

606 607
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&win->skey,
      TSDB_KEYSIZE, masterscan);
608
  if (pWindowRes == NULL) {
609 610 611
    *newWind = false;

    return masterscan? -1:0;
612
  }
613

614
  *newWind = true;
H
Haojun Liao 已提交
615

616 617 618
  // not assign result buffer yet, add new result buffer
  if (pWindowRes->pos.pageId == -1) {
    int32_t ret = addNewWindowResultBuf(pWindowRes, pResultBuf, sid, pRuntimeEnv->numOfRowsPerPage);
H
Haojun Liao 已提交
619
    if (ret != TSDB_CODE_SUCCESS) {
620 621 622
      return -1;
    }
  }
623

624
  // set time window for current result
625
  pWindowRes->win = (*win);
626

H
Haojun Liao 已提交
627
  setWindowResOutputBufInitCtx(pRuntimeEnv, pWindowRes);
628 629 630
  return TSDB_CODE_SUCCESS;
}

631
static bool getTimeWindowResStatus(SWindowResInfo *pWindowResInfo, int32_t slot) {
632
  assert(slot >= 0 && slot < pWindowResInfo->size);
633
  return pWindowResInfo->pResult[slot].closed;
634 635
}

H
Haojun Liao 已提交
636
static FORCE_INLINE int32_t getForwardStepsInBlock(int32_t numOfRows, __block_search_fn_t searchFn, TSKEY ekey, int16_t pos,
637 638
                                      int16_t order, int64_t *pData) {
  int32_t forwardStep = 0;
639

H
Haojun Liao 已提交
640 641 642 643
  if (order == TSDB_ORDER_ASC) {
    int32_t end = searchFn((char*) &pData[pos], numOfRows - pos, ekey, order);
    if (end >= 0) {
      forwardStep = end;
644

H
Haojun Liao 已提交
645 646 647 648 649 650 651 652 653 654 655 656
      if (pData[end + pos] == ekey) {
        forwardStep += 1;
      }
    }
  } else {
    int32_t end = searchFn((char *)pData, pos + 1, ekey, order);
    if (end >= 0) {
      forwardStep = pos - end;

      if (pData[end] == ekey) {
        forwardStep += 1;
      }
657 658
    }
  }
659

H
Haojun Liao 已提交
660
  assert(forwardStep > 0);
661 662 663 664 665 666
  return forwardStep;
}

/**
 * NOTE: the query status only set for the first scan of master scan.
 */
667
static int32_t doCheckQueryCompleted(SQueryRuntimeEnv *pRuntimeEnv, TSKEY lastKey, SWindowResInfo *pWindowResInfo) {
668
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
669
  if (pRuntimeEnv->scanFlag != MASTER_SCAN || (!QUERY_IS_INTERVAL_QUERY(pQuery))) {
670
    return pWindowResInfo->size;
671
  }
672

673
  // no qualified results exist, abort check
674
  int32_t numOfClosed = 0;
675

676
  if (pWindowResInfo->size == 0) {
677
    return pWindowResInfo->size;
678
  }
679

680
  // query completed
H
hjxilinx 已提交
681 682
  if ((lastKey >= pQuery->current->win.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (lastKey <= pQuery->current->win.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
683
    closeAllTimeWindow(pWindowResInfo);
684

685 686 687 688
    pWindowResInfo->curIndex = pWindowResInfo->size - 1;
    setQueryStatus(pQuery, QUERY_COMPLETED | QUERY_RESBUF_FULL);
  } else {  // set the current index to be the last unclosed window
    int32_t i = 0;
689
    int64_t skey = TSKEY_INITIAL_VAL;
690

691 692
    for (i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *pResult = &pWindowResInfo->pResult[i];
693
      if (pResult->closed) {
694
        numOfClosed += 1;
695 696
        continue;
      }
697

698
      TSKEY ekey = pResult->win.ekey;
699
      if ((ekey <= lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
700
          (pResult->win.skey >= lastKey && !QUERY_IS_ASC_QUERY(pQuery))) {
701 702
        closeTimeWindow(pWindowResInfo, i);
      } else {
703
        skey = pResult->win.skey;
704 705 706
        break;
      }
    }
707

708
    // all windows are closed, set the last one to be the skey
709
    if (skey == TSKEY_INITIAL_VAL) {
710 711 712 713 714
      assert(i == pWindowResInfo->size);
      pWindowResInfo->curIndex = pWindowResInfo->size - 1;
    } else {
      pWindowResInfo->curIndex = i;
    }
715

716
    pWindowResInfo->prevSKey = pWindowResInfo->pResult[pWindowResInfo->curIndex].win.skey;
717

718 719
    // the number of completed slots are larger than the threshold, return current generated results to client.
    if (numOfClosed > pWindowResInfo->threshold) {
720
      qDebug("QInfo:%p total result window:%d closed:%d, reached the output threshold %d, return",
721
          GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size, numOfClosed, pQuery->rec.threshold);
722

723
      setQueryStatus(pQuery, QUERY_RESBUF_FULL);
724
    } else {
725
      qDebug("QInfo:%p total result window:%d already closed:%d", GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size,
726
             numOfClosed);
727 728
    }
  }
729

730 731 732 733 734
  // output has reached the limitation, set query completed
  if (pQuery->limit.limit > 0 && (pQuery->limit.limit + pQuery->limit.offset) <= numOfClosed &&
      pRuntimeEnv->scanFlag == MASTER_SCAN) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
735

736
  assert(pWindowResInfo->prevSKey != TSKEY_INITIAL_VAL);
737
  return numOfClosed;
738 739 740
}

static int32_t getNumOfRowsInTimeWindow(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo, TSKEY *pPrimaryColumn,
H
hjxilinx 已提交
741
                                        int32_t startPos, TSKEY ekey, __block_search_fn_t searchFn, bool updateLastKey) {
742
  assert(startPos >= 0 && startPos < pDataBlockInfo->rows);
743

H
Haojun Liao 已提交
744
  int32_t num   = -1;
745
  int32_t order = pQuery->order.order;
H
Haojun Liao 已提交
746
  int32_t step  = GET_FORWARD_DIRECTION_FACTOR(order);
747

H
hjxilinx 已提交
748
  STableQueryInfo* item = pQuery->current;
749

750 751
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (ekey < pDataBlockInfo->window.ekey) {
752
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
H
Haojun Liao 已提交
753 754
      if (updateLastKey) { // update the last key
        item->lastKey = pPrimaryColumn[startPos + (num - 1)] + step;
755 756
      }
    } else {
757
      num = pDataBlockInfo->rows - startPos;
758
      if (updateLastKey) {
H
hjxilinx 已提交
759
        item->lastKey = pDataBlockInfo->window.ekey + step;
760 761 762 763
      }
    }
  } else {  // desc
    if (ekey > pDataBlockInfo->window.skey) {
764
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
H
Haojun Liao 已提交
765 766
      if (updateLastKey) {  // update the last key
        item->lastKey = pPrimaryColumn[startPos - (num - 1)] + step;
767 768 769 770
      }
    } else {
      num = startPos + 1;
      if (updateLastKey) {
H
hjxilinx 已提交
771
        item->lastKey = pDataBlockInfo->window.skey + step;
772 773 774
      }
    }
  }
775

H
Haojun Liao 已提交
776
  assert(num > 0);
777 778 779
  return num;
}

H
Haojun Liao 已提交
780 781
static void doBlockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, bool closed, STimeWindow *pWin, int32_t offset,
                                      int32_t forwardStep, TSKEY *tsCol, int32_t numOfTotal) {
782 783
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
784

H
Haojun Liao 已提交
785 786
  bool hasPrev = pCtx[0].preAggVals.isSet;

787
  if (IS_MASTER_SCAN(pRuntimeEnv) || closed) {
788 789 790 791
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
      pCtx[k].nStartQueryTimestamp = pWin->skey;
      pCtx[k].size = forwardStep;
      pCtx[k].startOffset = (QUERY_IS_ASC_QUERY(pQuery)) ? offset : offset - (forwardStep - 1);
792

H
Haojun Liao 已提交
793
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
794
      if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
H
Haojun Liao 已提交
795
        pCtx[k].ptsList = &tsCol[pCtx[k].startOffset];
796
      }
797

798
      // not a whole block involved in query processing, statistics data can not be used
H
Haojun Liao 已提交
799 800 801 802
      // NOTE: the original value of isSet have been changed here
      if (pCtx[k].preAggVals.isSet && forwardStep < numOfTotal) {
        pCtx[k].preAggVals.isSet = false;
      }
H
Haojun Liao 已提交
803

804 805 806
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunction(&pCtx[k]);
      }
H
Haojun Liao 已提交
807 808 809

      // restore it
      pCtx[k].preAggVals.isSet = hasPrev;
810 811 812 813
    }
  }
}

814
static void doRowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, bool closed, STimeWindow *pWin, int32_t offset) {
815 816
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
817

818
  if (IS_MASTER_SCAN(pRuntimeEnv) || closed) {
819 820
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
      pCtx[k].nStartQueryTimestamp = pWin->skey;
821

822 823 824 825
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunctionF(&pCtx[k], offset);
      }
826 827 828 829
    }
  }
}

H
Haojun Liao 已提交
830 831
static int32_t getNextQualifiedWindow(SQueryRuntimeEnv *pRuntimeEnv, STimeWindow *pNext, SDataBlockInfo *pDataBlockInfo,
    TSKEY *primaryKeys, __block_search_fn_t searchFn, int32_t prevPosition) {
832
  SQuery *pQuery = pRuntimeEnv->pQuery;
833

H
Haojun Liao 已提交
834
  GET_NEXT_TIMEWINDOW(pQuery, pNext);
835

H
Haojun Liao 已提交
836
  // next time window is not in current block
H
Haojun Liao 已提交
837 838
  if ((pNext->skey > pDataBlockInfo->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (pNext->ekey < pDataBlockInfo->window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
H
Haojun Liao 已提交
839 840
    return -1;
  }
841

H
Haojun Liao 已提交
842 843
  TSKEY startKey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
844
    startKey = pNext->skey;
H
Haojun Liao 已提交
845 846
    if (startKey < pQuery->window.skey) {
      startKey = pQuery->window.skey;
847
    }
H
Haojun Liao 已提交
848
  } else {
H
Haojun Liao 已提交
849
    startKey = pNext->ekey;
H
Haojun Liao 已提交
850 851
    if (startKey > pQuery->window.skey) {
      startKey = pQuery->window.skey;
852
    }
H
Haojun Liao 已提交
853
  }
854

H
Haojun Liao 已提交
855 856
  int32_t startPos = 0;
  // tumbling time window query, a special case of sliding time window query
857
  if (pQuery->interval.sliding == pQuery->interval.interval && prevPosition != -1) {
H
Haojun Liao 已提交
858 859 860 861 862
    int32_t factor = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
    startPos = prevPosition + factor;
  } else {
    startPos = searchFn((char *)primaryKeys, pDataBlockInfo->rows, startKey, pQuery->order.order);
  }
863

H
Haojun Liao 已提交
864 865 866 867
  /*
   * This time window does not cover any data, try next time window,
   * this case may happen when the time window is too small
   */
H
Haojun Liao 已提交
868
  if (QUERY_IS_ASC_QUERY(pQuery) && primaryKeys[startPos] > pNext->ekey) {
H
Haojun Liao 已提交
869
    TSKEY next = primaryKeys[startPos];
870 871 872
    if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
      pNext->skey = taosTimeTruncate(next, &pQuery->interval, pQuery->precision);
      pNext->ekey = taosTimeAdd(pNext->skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
873
    } else {
874 875
      pNext->ekey += ((next - pNext->ekey + pQuery->interval.sliding - 1)/pQuery->interval.sliding) * pQuery->interval.sliding;
      pNext->skey = pNext->ekey - pQuery->interval.interval + 1;
876
    }
H
Haojun Liao 已提交
877
  } else if ((!QUERY_IS_ASC_QUERY(pQuery)) && primaryKeys[startPos] < pNext->skey) {
H
Haojun Liao 已提交
878
    TSKEY next = primaryKeys[startPos];
879 880 881
    if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
      pNext->skey = taosTimeTruncate(next, &pQuery->interval, pQuery->precision);
      pNext->ekey = taosTimeAdd(pNext->skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
882
    } else {
883 884
      pNext->skey -= ((pNext->skey - next + pQuery->interval.sliding - 1) / pQuery->interval.sliding) * pQuery->interval.sliding;
      pNext->ekey = pNext->skey + pQuery->interval.interval - 1;
885
    }
886
  }
887

H
Haojun Liao 已提交
888
  return startPos;
889 890
}

H
Haojun Liao 已提交
891
static FORCE_INLINE TSKEY reviseWindowEkey(SQuery *pQuery, STimeWindow *pWindow) {
892 893 894 895 896 897 898 899 900 901 902 903
  TSKEY ekey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    ekey = pWindow->ekey;
    if (ekey > pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  } else {
    ekey = pWindow->skey;
    if (ekey < pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  }
904

905 906 907
  return ekey;
}

H
hjxilinx 已提交
908 909
//todo binary search
static void* getDataBlockImpl(SArray* pDataBlock, int32_t colId) {
S
TD-1057  
Shengliang Guan 已提交
910
  int32_t numOfCols = (int32_t)taosArrayGetSize(pDataBlock);
911

H
hjxilinx 已提交
912 913 914 915 916 917
  for (int32_t i = 0; i < numOfCols; ++i) {
    SColumnInfoData *p = taosArrayGet(pDataBlock, i);
    if (colId == p->info.colId) {
      return p->pData;
    }
  }
918

H
hjxilinx 已提交
919 920 921 922
  return NULL;
}

static char *getDataBlock(SQueryRuntimeEnv *pRuntimeEnv, SArithmeticSupport *sas, int32_t col, int32_t size,
923
                    SArray *pDataBlock) {
dengyihao's avatar
dengyihao 已提交
924 925 926
  if (pDataBlock == NULL) {
    return NULL;
  }
927

H
Haojun Liao 已提交
928
  char *dataBlock = NULL;
H
Haojun Liao 已提交
929
  SQuery *pQuery = pRuntimeEnv->pQuery;
930

931
  int32_t functionId = pQuery->pSelectExpr[col].base.functionId;
932
  if (functionId == TSDB_FUNC_ARITHM) {
933
    sas->pArithExpr = &pQuery->pSelectExpr[col];
934

935 936 937 938
    sas->offset  = 0;
    sas->colList = pQuery->colList;
    sas->numOfCols = pQuery->numOfCols;
    sas->data    = calloc(pQuery->numOfCols, POINTER_BYTES);
939

H
Haojun Liao 已提交
940
    if (sas->data == NULL) {
H
Haojun Liao 已提交
941
      finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
H
Haojun Liao 已提交
942 943 944
      longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
    }

945
    // here the pQuery->colList and sas->colList are identical
S
TD-1057  
Shengliang Guan 已提交
946
    int32_t numOfCols = (int32_t)taosArrayGetSize(pDataBlock);
947
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
948
      SColumnInfo *pColMsg = &pQuery->colList[i];
949

950 951 952 953 954 955 956 957
      dataBlock = NULL;
      for (int32_t k = 0; k < numOfCols; ++k) {  //todo refactor
        SColumnInfoData *p = taosArrayGet(pDataBlock, k);
        if (pColMsg->colId == p->info.colId) {
          dataBlock = p->pData;
          break;
        }
      }
958

959
      assert(dataBlock != NULL);
960
      sas->data[i] = dataBlock;  // start from the offset
961
    }
962

963
  } else {  // other type of query function
964
    SColIndex *pCol = &pQuery->pSelectExpr[col].base.colInfo;
H
Haojun Liao 已提交
965
    if (TSDB_COL_IS_NORMAL_COL(pCol->flag)) {
H
Haojun Liao 已提交
966 967 968 969 970
      SColIndex* pColIndex = &pQuery->pSelectExpr[col].base.colInfo;
      SColumnInfoData *p = taosArrayGet(pDataBlock, pColIndex->colIndex);
      assert(p->info.colId == pColIndex->colId);

      dataBlock = p->pData;
H
Haojun Liao 已提交
971 972
    } else {
      dataBlock = NULL;
973 974
    }
  }
975

976 977 978 979
  return dataBlock;
}

/**
H
Haojun Liao 已提交
980
 * todo set the last value for pQueryTableInfo as in rowwiseapplyfunctions
981 982
 * @param pRuntimeEnv
 * @param forwardStep
983
 * @param tsCols
984 985 986 987 988
 * @param pFields
 * @param isDiskFileBlock
 * @return                  the incremental number of output value, so it maybe 0 for fixed number of query,
 *                          such as count/min/max etc.
 */
989
static void blockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis,
990 991
                                       SDataBlockInfo *pDataBlockInfo, SWindowResInfo *pWindowResInfo,
                                       __block_search_fn_t searchFn, SArray *pDataBlock) {
992
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
993 994
  bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);

995 996
  SQuery *pQuery = pRuntimeEnv->pQuery;
  TSKEY  *tsCols = NULL;
997
  if (pDataBlock != NULL) {
998
    SColumnInfoData* pColInfo = taosArrayGet(pDataBlock, 0);
999
    tsCols = (TSKEY *)(pColInfo->pData);
1000
  }
1001

1002
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
H
Haojun Liao 已提交
1003
  if (sasArray == NULL) {
H
Haojun Liao 已提交
1004
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
H
Haojun Liao 已提交
1005 1006
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }
1007

H
Haojun Liao 已提交
1008
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
1009
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
1010
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
H
Haojun Liao 已提交
1011
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k, pQInfo->vgId);
1012
  }
1013

1014
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
1015
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
H
Haojun Liao 已提交
1016
    TSKEY ts = TSKEY_INITIAL_VAL;
1017

H
Haojun Liao 已提交
1018 1019 1020 1021 1022 1023 1024 1025
    if (tsCols == NULL) {
      ts = QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->window.skey:pDataBlockInfo->window.ekey;
    } else {
      int32_t offset = GET_COL_DATA_POS(pQuery, 0, step);
      ts = tsCols[offset];
    }

    bool        hasTimeWindow = false;
1026
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
H
Haojun Liao 已提交
1027 1028
    if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &win, masterScan, &hasTimeWindow) !=
        TSDB_CODE_SUCCESS) {
S
Shengliang Guan 已提交
1029
      taosTFree(sasArray);
H
hjxilinx 已提交
1030
      return;
1031
    }
1032

H
Haojun Liao 已提交
1033 1034 1035
    int32_t forwardStep = 0;
    int32_t startPos = pQuery->pos;

1036
    if (hasTimeWindow) {
H
Haojun Liao 已提交
1037
      TSKEY ekey = reviseWindowEkey(pQuery, &win);
H
Haojun Liao 已提交
1038
      forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, pQuery->pos, ekey, searchFn, true);
1039

1040
      bool pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
H
Haojun Liao 已提交
1041
      doBlockwiseApplyFunctions(pRuntimeEnv, pStatus, &win, startPos, forwardStep, tsCols, pDataBlockInfo->rows);
1042
    }
1043

1044 1045
    int32_t     index = pWindowResInfo->curIndex;
    STimeWindow nextWin = win;
1046

1047
    while (1) {
H
Haojun Liao 已提交
1048 1049
      int32_t prevEndPos = (forwardStep - 1) * step + startPos;
      startPos = getNextQualifiedWindow(pRuntimeEnv, &nextWin, pDataBlockInfo, tsCols, searchFn, prevEndPos);
1050 1051 1052
      if (startPos < 0) {
        break;
      }
1053

1054
      // null data, failed to allocate more memory buffer
H
Haojun Liao 已提交
1055
      hasTimeWindow = false;
H
Haojun Liao 已提交
1056 1057
      if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &nextWin, masterScan,
                                  &hasTimeWindow) != TSDB_CODE_SUCCESS) {
1058 1059
        break;
      }
1060

1061 1062 1063 1064 1065
      if (!hasTimeWindow) {
        continue;
      }

      TSKEY ekey = reviseWindowEkey(pQuery, &nextWin);
H
Haojun Liao 已提交
1066
      forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, startPos, ekey, searchFn, true);
1067

1068 1069
      bool closed = getTimeWindowResStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
      doBlockwiseApplyFunctions(pRuntimeEnv, closed, &nextWin, startPos, forwardStep, tsCols, pDataBlockInfo->rows);
1070
    }
1071

1072 1073 1074 1075 1076 1077 1078
    pWindowResInfo->curIndex = index;
  } else {
    /*
     * the sqlfunctionCtx parameters should be set done before all functions are invoked,
     * since the selectivity + tag_prj query needs all parameters been set done.
     * tag_prj function are changed to be TSDB_FUNC_TAG_DUMMY
     */
1079
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
1080
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
1081 1082 1083 1084 1085
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunction(&pCtx[k]);
      }
    }
  }
1086

1087 1088 1089 1090
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    if (pQuery->pSelectExpr[i].base.functionId != TSDB_FUNC_ARITHM) {
      continue;
    }
1091

S
Shengliang Guan 已提交
1092
    taosTFree(sasArray[i].data);
1093
  }
1094

S
Shengliang Guan 已提交
1095
  taosTFree(sasArray);
1096 1097 1098 1099 1100 1101
}

static int32_t setGroupResultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, char *pData, int16_t type, int16_t bytes) {
  if (isNull(pData, type)) {  // ignore the null value
    return -1;
  }
1102

1103
  int32_t GROUPRESULTID = 1;
1104

1105
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
1106

1107
  // not assign result buffer yet, add new result buffer
1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125
  char* d = pData;
  int16_t len = bytes;
  if (type == TSDB_DATA_TYPE_BINARY||type == TSDB_DATA_TYPE_NCHAR) {
    d = varDataVal(pData);
    len = varDataLen(pData);
  } else if (type == TSDB_DATA_TYPE_FLOAT || type == TSDB_DATA_TYPE_DOUBLE) {
    SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
    qError("QInfo:%p group by not supported on double/float/binary/nchar columns, abort", pQInfo);

    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_APP_ERROR);
  }

  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, &pRuntimeEnv->windowResInfo, d, len, true);
  if (pWindowRes == NULL) {
    return -1;
  }

  int64_t v = -1;
1126 1127 1128 1129 1130 1131 1132 1133
  switch(type) {
    case TSDB_DATA_TYPE_BOOL:
    case TSDB_DATA_TYPE_TINYINT:  v = GET_INT8_VAL(pData);  break;
    case TSDB_DATA_TYPE_SMALLINT: v = GET_INT16_VAL(pData); break;
    case TSDB_DATA_TYPE_INT:      v = GET_INT32_VAL(pData); break;
    case TSDB_DATA_TYPE_BIGINT:   v = GET_INT64_VAL(pData); break;
  }

1134 1135 1136 1137 1138 1139
  if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
    pWindowRes->key = malloc(varDataTLen(pData));
    varDataCopy(pWindowRes->key, pData);
  } else {
    pWindowRes->win.skey = v;
    pWindowRes->win.ekey = v;
1140
  }
1141

1142
  assert(pRuntimeEnv->windowResInfo.interval == 0);
1143

1144 1145 1146 1147 1148 1149
  if (pWindowRes->pos.pageId == -1) {
    int32_t ret = addNewWindowResultBuf(pWindowRes, pResultBuf, GROUPRESULTID, pRuntimeEnv->numOfRowsPerPage);
    if (ret != 0) {
      return -1;
    }
  }
1150

1151 1152 1153 1154 1155
  setWindowResOutputBuf(pRuntimeEnv, pWindowRes);
  initCtxOutputBuf(pRuntimeEnv);
  return TSDB_CODE_SUCCESS;
}

1156
static char *getGroupbyColumnData(SQuery *pQuery, int16_t *type, int16_t *bytes, SArray* pDataBlock) {
1157
  SSqlGroupbyExpr *pGroupbyExpr = pQuery->pGroupbyExpr;
1158

1159
  for (int32_t k = 0; k < pGroupbyExpr->numOfGroupCols; ++k) {
1160
    SColIndex* pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, k);
Y
TD-1230  
yihaoDeng 已提交
1161
    if (TSDB_COL_IS_TAG(pColIndex->flag)) {
1162 1163
      continue;
    }
1164

1165
    int16_t colIndex = -1;
1166
    int32_t colId = pColIndex->colId;
1167

1168
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
1169
      if (pQuery->colList[i].colId == colId) {
1170 1171 1172 1173
        colIndex = i;
        break;
      }
    }
1174

1175
    assert(colIndex >= 0 && colIndex < pQuery->numOfCols);
1176

1177 1178
    *type = pQuery->colList[colIndex].type;
    *bytes = pQuery->colList[colIndex].bytes;
1179
    /*
1180 1181 1182
     *  the colIndex is acquired from the first tables of all qualified tables in this vnode during query prepare
     * stage, the remain tables may not have the required column in cache actually. So, the validation of required
     * column in cache with the corresponding schema is reinforced.
1183
     */
S
TD-1057  
Shengliang Guan 已提交
1184
    int32_t numOfCols = (int32_t)taosArrayGetSize(pDataBlock);
1185

1186 1187 1188 1189 1190 1191
    for (int32_t i = 0; i < numOfCols; ++i) {
      SColumnInfoData *p = taosArrayGet(pDataBlock, i);
      if (pColIndex->colId == p->info.colId) {
        return p->pData;
      }
    }
1192
  }
1193

1194
  return NULL;
1195 1196 1197 1198
}

static int32_t doTSJoinFilter(SQueryRuntimeEnv *pRuntimeEnv, int32_t offset) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
1199

1200 1201
  STSElem         elem = tsBufGetElem(pRuntimeEnv->pTSBuf);
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1202

1203
  // compare tag first
1204
  if (tVariantCompare(&pCtx[0].tag, &elem.tag) != 0) {
1205 1206
    return TS_JOIN_TAG_NOT_EQUALS;
  }
1207

S
TD-1057  
Shengliang Guan 已提交
1208
  TSKEY key = *(TSKEY *)((char*)pCtx[0].aInputElemBuf + TSDB_KEYSIZE * offset);
1209 1210

#if defined(_DEBUG_VIEW)
1211
  printf("elem in comp ts file:%" PRId64 ", key:%" PRId64 ", tag:%"PRIu64", query order:%d, ts order:%d, traverse:%d, index:%d\n",
H
Haojun Liao 已提交
1212
         elem.ts, key, elem.tag.i64Key, pQuery->order.order, pRuntimeEnv->pTSBuf->tsOrder,
1213 1214
         pRuntimeEnv->pTSBuf->cur.order, pRuntimeEnv->pTSBuf->cur.tsIndex);
#endif
1215

1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (key < elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key > elem.ts) {
      assert(false);
    }
  } else {
    if (key > elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key < elem.ts) {
      assert(false);
    }
  }
1229

1230 1231 1232 1233 1234
  return TS_JOIN_TS_EQUAL;
}

static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId) {
  SResultInfo *pResInfo = GET_RES_INFO(pCtx);
H
hjxilinx 已提交
1235
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
1236 1237 1238 1239 1240

  // in case of timestamp column, always generated results.
  if (functionId == TSDB_FUNC_TS) {
    return true;
  }
1241

1242 1243 1244
  if (pResInfo->complete || functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TS_DUMMY) {
    return false;
  }
1245

1246
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_FIRST) {
H
hjxilinx 已提交
1247 1248
    return QUERY_IS_ASC_QUERY(pQuery);
  }
1249 1250 1251 1252 1253 1254

  // todo add comments
  if ((functionId == TSDB_FUNC_LAST_DST || functionId == TSDB_FUNC_LAST)) {
    return pCtx->param[0].i64Key == pQuery->order.order;
  }

1255
  // in the supplementary scan, only the following functions need to be executed
H
Haojun Liao 已提交
1256
  if (IS_REVERSE_SCAN(pRuntimeEnv)) {
1257 1258
    return false;
  }
1259

1260 1261 1262
  return true;
}

1263 1264
static void rowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis, SDataBlockInfo *pDataBlockInfo,
    SWindowResInfo *pWindowResInfo, SArray *pDataBlock) {
1265
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1266
  bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);
1267

1268
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1269
  STableQueryInfo* item = pQuery->current;
H
Haojun Liao 已提交
1270 1271 1272 1273

  SColumnInfoData* pColumnInfoData = (SColumnInfoData *)taosArrayGet(pDataBlock, 0);

  TSKEY  *tsCols = (pColumnInfoData->info.type == TSDB_DATA_TYPE_TIMESTAMP)? (TSKEY*) pColumnInfoData->pData:NULL;
H
Haojun Liao 已提交
1274 1275
  bool    groupbyColumnValue = pRuntimeEnv->groupbyNormalCol;

1276
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
H
Haojun Liao 已提交
1277
  if (sasArray == NULL) {
H
Haojun Liao 已提交
1278
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
H
Haojun Liao 已提交
1279 1280
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }
1281

1282 1283
  int16_t type = 0;
  int16_t bytes = 0;
1284

1285
  char *groupbyColumnData = NULL;
H
Haojun Liao 已提交
1286
  if (groupbyColumnValue) {
1287
    groupbyColumnData = getGroupbyColumnData(pQuery, &type, &bytes, pDataBlock);
1288
  }
1289

H
Haojun Liao 已提交
1290
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
1291
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
1292
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
H
Haojun Liao 已提交
1293
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k, pQInfo->vgId);
1294
  }
1295

1296 1297
  // set the input column data
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
1298
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
H
hjxilinx 已提交
1299 1300
    pFilterInfo->pData = getDataBlockImpl(pDataBlock, pFilterInfo->info.colId);
    assert(pFilterInfo->pData != NULL);
1301
  }
1302

1303
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1304

1305 1306 1307
  // from top to bottom in desc
  // from bottom to top in asc order
  if (pRuntimeEnv->pTSBuf != NULL) {
1308
    qDebug("QInfo:%p process data rows, numOfRows:%d, query order:%d, ts comp order:%d", pQInfo, pDataBlockInfo->rows,
1309 1310
           pQuery->order.order, pRuntimeEnv->pTSBuf->cur.order);
  }
1311

1312
  int32_t j = 0;
H
hjxilinx 已提交
1313
  int32_t offset = -1;
1314

1315
  for (j = 0; j < pDataBlockInfo->rows; ++j) {
H
hjxilinx 已提交
1316
    offset = GET_COL_DATA_POS(pQuery, j, step);
1317

1318 1319 1320 1321 1322 1323 1324 1325 1326 1327
    if (pRuntimeEnv->pTSBuf != NULL) {
      int32_t r = doTSJoinFilter(pRuntimeEnv, offset);
      if (r == TS_JOIN_TAG_NOT_EQUALS) {
        break;
      } else if (r == TS_JOIN_TS_NOT_EQUALS) {
        continue;
      } else {
        assert(r == TS_JOIN_TS_EQUAL);
      }
    }
1328

1329
    if (pQuery->numOfFilterCols > 0 && (!doFilterData(pQuery, offset))) {
1330 1331
      continue;
    }
1332

1333
    // interval window query, decide the time window according to the primary timestamp
H
Haojun Liao 已提交
1334
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1335
      int64_t     ts = tsCols[offset];
1336
      STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
1337

1338 1339
      bool hasTimeWindow = false;
      int32_t ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &win, masterScan, &hasTimeWindow);
1340 1341 1342
      if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
        continue;
      }
1343

1344 1345 1346 1347
      if (!hasTimeWindow) {
        continue;
      }

1348 1349
      bool closed = getTimeWindowResStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
      doRowwiseApplyFunctions(pRuntimeEnv, closed, &win, offset);
1350

1351 1352
      STimeWindow nextWin = win;
      int32_t     index = pWindowResInfo->curIndex;
1353

1354
      while (1) {
H
Haojun Liao 已提交
1355
        GET_NEXT_TIMEWINDOW(pQuery, &nextWin);
1356
        if ((nextWin.skey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
H
Haojun Liao 已提交
1357
            (nextWin.skey < pQuery->window.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
1358 1359
          break;
        }
1360

1361 1362 1363
        if (ts < nextWin.skey || ts > nextWin.ekey) {
          break;
        }
1364

1365
        // null data, failed to allocate more memory buffer
H
Haojun Liao 已提交
1366
        hasTimeWindow = false;
1367
        if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &nextWin, masterScan, &hasTimeWindow) != TSDB_CODE_SUCCESS) {
1368 1369
          break;
        }
1370

1371
        if (hasTimeWindow) {
1372 1373
          closed = getTimeWindowResStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
          doRowwiseApplyFunctions(pRuntimeEnv, closed, &nextWin, offset);
1374
        }
1375
      }
1376

1377 1378 1379
      pWindowResInfo->curIndex = index;
    } else {  // other queries
      // decide which group this rows belongs to according to current state value
H
Haojun Liao 已提交
1380
      if (groupbyColumnValue) {
H
hjxilinx 已提交
1381
        char *val = groupbyColumnData + bytes * offset;
1382

H
hjxilinx 已提交
1383
        int32_t ret = setGroupResultOutputBuf(pRuntimeEnv, val, type, bytes);
1384 1385 1386 1387
        if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
          continue;
        }
      }
1388

1389
      for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
1390
        int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
1391 1392 1393 1394 1395
        if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
          aAggs[functionId].xFunctionF(&pCtx[k], offset);
        }
      }
    }
1396

1397 1398 1399
    if (pRuntimeEnv->pTSBuf != NULL) {
      // if timestamp filter list is empty, quit current query
      if (!tsBufNextPos(pRuntimeEnv->pTSBuf)) {
H
hjxilinx 已提交
1400
        setQueryStatus(pQuery, QUERY_COMPLETED);
1401 1402 1403 1404
        break;
      }
    }
  }
H
Haojun Liao 已提交
1405 1406 1407 1408 1409 1410 1411 1412

  assert(offset >= 0);
  if (tsCols != NULL) {
    item->lastKey = tsCols[offset] + step;
  } else {
    item->lastKey = (QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->window.ekey:pDataBlockInfo->window.skey) + step;
  }

1413 1414 1415 1416 1417
  // todo refactor: extract method
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    if (pQuery->pSelectExpr[i].base.functionId != TSDB_FUNC_ARITHM) {
      continue;
    }
1418

S
Shengliang Guan 已提交
1419
    taosTFree(sasArray[i].data);
1420
  }
1421

1422 1423 1424 1425
  free(sasArray);
}

static int32_t tableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo,
H
hjxilinx 已提交
1426
                                          SDataStatis *pStatis, __block_search_fn_t searchFn, SArray *pDataBlock) {
H
hjxilinx 已提交
1427
  SQuery *pQuery = pRuntimeEnv->pQuery;
1428

H
hjxilinx 已提交
1429 1430
  STableQueryInfo* pTableQInfo = pQuery->current;
  SWindowResInfo*  pWindowResInfo = &pRuntimeEnv->windowResInfo;
1431

H
Haojun Liao 已提交
1432
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->groupbyNormalCol) {
1433
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
1434
  } else {
1435
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
1436
  }
1437

1438
  // update the lastkey of current table
1439
  TSKEY lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pDataBlockInfo->window.ekey : pDataBlockInfo->window.skey;
H
hjxilinx 已提交
1440
  pTableQInfo->lastKey = lastKey + GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1441

1442
  // interval query with limit applied
1443
  int32_t numOfRes = 0;
H
Haojun Liao 已提交
1444
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1445 1446
    numOfRes = doCheckQueryCompleted(pRuntimeEnv, lastKey, pWindowResInfo);
  } else {
S
TD-1057  
Shengliang Guan 已提交
1447
    numOfRes = (int32_t)getNumOfResult(pRuntimeEnv);
1448

1449 1450 1451 1452
    // update the number of output result
    if (numOfRes > 0 && pQuery->checkBuffer == 1) {
      assert(numOfRes >= pQuery->rec.rows);
      pQuery->rec.rows = numOfRes;
1453

1454 1455 1456
      if (numOfRes >= pQuery->rec.threshold) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
      }
1457

1458 1459 1460
      if ((pQuery->limit.limit >= 0) && (pQuery->limit.limit + pQuery->limit.offset) <= numOfRes) {
        setQueryStatus(pQuery, QUERY_COMPLETED);
      }
1461 1462 1463 1464 1465

      if (((pTableQInfo->lastKey > pTableQInfo->win.ekey) && QUERY_IS_ASC_QUERY(pQuery)) ||
          ((pTableQInfo->lastKey < pTableQInfo->win.ekey) && (!QUERY_IS_ASC_QUERY(pQuery)))) {
        setQueryStatus(pQuery, QUERY_COMPLETED);
      }
H
Haojun Liao 已提交
1466
    }
1467
  }
1468

1469
  return numOfRes;
1470 1471
}

H
Haojun Liao 已提交
1472
void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
H
Haojun Liao 已提交
1473
                   SDataStatis *pStatis, void *param, int32_t colIndex, int32_t vgId) {
1474

1475 1476
  int32_t functionId = pQuery->pSelectExpr[colIndex].base.functionId;
  int32_t colId = pQuery->pSelectExpr[colIndex].base.colInfo.colId;
1477

1478
  SDataStatis *tpField = NULL;
H
Haojun Liao 已提交
1479
  pCtx->hasNull = hasNullValue(&pQuery->pSelectExpr[colIndex].base.colInfo, pStatis, &tpField);
1480
  pCtx->aInputElemBuf = inputData;
1481

1482
  if (tpField != NULL) {
H
Haojun Liao 已提交
1483
    pCtx->preAggVals.isSet  = true;
1484 1485
    pCtx->preAggVals.statis = *tpField;
    assert(pCtx->preAggVals.statis.numOfNull <= pBlockInfo->rows);
1486 1487 1488
  } else {
    pCtx->preAggVals.isSet = false;
  }
1489

H
Haojun Liao 已提交
1490 1491
  pCtx->preAggVals.dataBlockLoaded = (inputData != NULL);

H
Haojun Liao 已提交
1492 1493
  // limit/offset query will affect this value
  pCtx->size = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->rows - pQuery->pos : pQuery->pos + 1;
1494

H
Haojun Liao 已提交
1495
  // minimum value no matter ascending/descending order query
H
Haojun Liao 已提交
1496 1497
  pCtx->startOffset = QUERY_IS_ASC_QUERY(pQuery) ? pQuery->pos: (pQuery->pos - pCtx->size + 1);
  assert(pCtx->startOffset >= 0);
H
Haojun Liao 已提交
1498

1499 1500
  uint32_t status = aAggs[functionId].nStatus;
  if (((status & (TSDB_FUNCSTATE_SELECTIVITY | TSDB_FUNCSTATE_NEED_TS)) != 0) && (tsCol != NULL)) {
H
Haojun Liao 已提交
1501
    pCtx->ptsList = &tsCol[pCtx->startOffset];
1502
  }
1503

1504 1505 1506 1507 1508
  if (functionId >= TSDB_FUNC_FIRST_DST && functionId <= TSDB_FUNC_LAST_DST) {
    // last_dist or first_dist function
    // store the first&last timestamp into the intermediate buffer [1], the true
    // value may be null but timestamp will never be null
  } else if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_TWA ||
1509
             functionId == TSDB_FUNC_DIFF || (functionId >= TSDB_FUNC_RATE && functionId <= TSDB_FUNC_AVG_IRATE)) {
1510
    /*
H
Haojun Liao 已提交
1511
     * least squares function needs two columns of input, currently, the x value of linear equation is set to
1512 1513 1514 1515 1516 1517 1518 1519 1520 1521
     * timestamp column, and the y-value is the column specified in pQuery->pSelectExpr[i].colIdxInBuffer
     *
     * top/bottom function needs timestamp to indicate when the
     * top/bottom values emerge, so does diff function
     */
    if (functionId == TSDB_FUNC_TWA) {
      STwaInfo *pTWAInfo = GET_RES_INFO(pCtx)->interResultBuf;
      pTWAInfo->SKey = pQuery->window.skey;
      pTWAInfo->EKey = pQuery->window.ekey;
    }
1522

1523 1524
  } else if (functionId == TSDB_FUNC_ARITHM) {
    pCtx->param[1].pz = param;
H
Haojun Liao 已提交
1525 1526 1527 1528 1529 1530
  } else if (functionId == TSDB_FUNC_SPREAD) {  // set the statistics data for primary time stamp column
    if (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
      pCtx->preAggVals.isSet  = true;
      pCtx->preAggVals.statis.min = pBlockInfo->window.skey;
      pCtx->preAggVals.statis.max = pBlockInfo->window.ekey;
    }
1531 1532
  } else if (functionId == TSDB_FUNC_INTERP) {
    SInterpInfoDetail *pInterpInfo = GET_RES_INFO(pCtx)->interResultBuf;
S
TD-1057  
Shengliang Guan 已提交
1533
    pInterpInfo->type = (int8_t)pQuery->fillType;
1534 1535
    pInterpInfo->ts = pQuery->window.skey;
    pInterpInfo->primaryCol = (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX);
1536

1537 1538 1539 1540
    if (pQuery->fillVal != NULL) {
      if (isNull((const char*) &pQuery->fillVal[colIndex], pCtx->inputType)) {
        pCtx->param[1].nType = TSDB_DATA_TYPE_NULL;
      } else { // todo refactor, tVariantCreateFromBinary should handle the NULL value
H
Haojun Liao 已提交
1541 1542 1543
        if (pCtx->inputType != TSDB_DATA_TYPE_BINARY && pCtx->inputType != TSDB_DATA_TYPE_NCHAR) {
          tVariantCreateFromBinary(&pCtx->param[1], (char*) &pQuery->fillVal[colIndex], pCtx->inputBytes, pCtx->inputType);
        }
1544 1545
      }
    }
H
Haojun Liao 已提交
1546 1547 1548
  } else if (functionId == TSDB_FUNC_TS_COMP) {
    pCtx->param[0].i64Key = vgId;
    pCtx->param[0].nType = TSDB_DATA_TYPE_BIGINT;
1549
  }
1550

1551 1552 1553 1554 1555 1556
#if defined(_DEBUG_VIEW)
  //  int64_t *tsList = (int64_t *)primaryColumnData;
//  int64_t  s = tsList[0];
//  int64_t  e = tsList[size - 1];

//    if (IS_DATA_BLOCK_LOADED(blockStatus)) {
1557
//        qDebug("QInfo:%p query ts:%lld-%lld, offset:%d, rows:%d, bstatus:%d,
1558 1559 1560
//        functId:%d", GET_QINFO_ADDR(pQuery),
//               s, e, startOffset, size, blockStatus, functionId);
//    } else {
1561
//        qDebug("QInfo:%p block not loaded, bstatus:%d",
1562 1563 1564 1565 1566 1567
//        GET_QINFO_ADDR(pQuery), blockStatus);
//    }
#endif
}

// set the output buffer for the selectivity + tag query
H
Haojun Liao 已提交
1568
static int32_t setCtxTagColumnInfo(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx) {
H
Haojun Liao 已提交
1569 1570
  SQuery* pQuery = pRuntimeEnv->pQuery;

1571
  if (isSelectivityWithTagsQuery(pQuery)) {
1572
    int32_t num = 0;
1573
    int16_t tagLen = 0;
1574

1575
    SQLFunctionCtx *p = NULL;
1576
    SQLFunctionCtx **pTagCtx = calloc(pQuery->numOfOutput, POINTER_BYTES);
H
Haojun Liao 已提交
1577 1578 1579
    if (pTagCtx == NULL) {
      return TSDB_CODE_QRY_OUT_OF_MEMORY;
    }
H
Haojun Liao 已提交
1580

1581
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1582
      SSqlFuncMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].base;
1583

1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596
      if (pSqlFuncMsg->functionId == TSDB_FUNC_TAG_DUMMY || pSqlFuncMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        tagLen += pCtx[i].outputBytes;
        pTagCtx[num++] = &pCtx[i];
      } else if ((aAggs[pSqlFuncMsg->functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
        p = &pCtx[i];
      } else if (pSqlFuncMsg->functionId == TSDB_FUNC_TS || pSqlFuncMsg->functionId == TSDB_FUNC_TAG) {
        // tag function may be the group by tag column
        // ts may be the required primary timestamp column
        continue;
      } else {
        // the column may be the normal column, group by normal_column, the functionId is TSDB_FUNC_PRJ
      }
    }
dengyihao's avatar
dengyihao 已提交
1597 1598 1599 1600 1601
    if (p != NULL) {
      p->tagInfo.pTagCtxList = pTagCtx;
      p->tagInfo.numOfTagCols = num;
      p->tagInfo.tagsLen = tagLen;
    } else {
1602
      taosTFree(pTagCtx);
dengyihao's avatar
dengyihao 已提交
1603
    }
1604
  }
H
Haojun Liao 已提交
1605 1606

  return TSDB_CODE_SUCCESS;
1607 1608
}

H
Haojun Liao 已提交
1609 1610
static FORCE_INLINE void setWindowResultInfo(SResultInfo *pResultInfo, SQuery *pQuery, bool isStableQuery, char* buf) {
  char* p = buf;
1611
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1612 1613 1614 1615
    int32_t size = pQuery->pSelectExpr[i].interBytes;
    setResultInfoBuf(&pResultInfo[i], size, isStableQuery, p);

    p += size;
1616 1617 1618
  }
}

1619
static int32_t setupQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv, int16_t order) {
1620
  qDebug("QInfo:%p setup runtime env", GET_QINFO_ADDR(pRuntimeEnv));
1621 1622
  SQuery *pQuery = pRuntimeEnv->pQuery;

H
Haojun Liao 已提交
1623 1624 1625
  size_t size = pRuntimeEnv->interBufSize + pQuery->numOfOutput * sizeof(SResultInfo);

  pRuntimeEnv->resultInfo = calloc(1, size);
1626
  pRuntimeEnv->pCtx = (SQLFunctionCtx *)calloc(pQuery->numOfOutput, sizeof(SQLFunctionCtx));
1627

1628
  if (pRuntimeEnv->resultInfo == NULL || pRuntimeEnv->pCtx == NULL) {
1629
    goto _clean;
1630
  }
1631

1632
  pRuntimeEnv->offset[0] = 0;
1633
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1634
    SSqlFuncMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].base;
1635

1636
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1637
    SColIndex* pIndex = &pSqlFuncMsg->colInfo;
1638

Y
TD-1230  
yihaoDeng 已提交
1639
    if (TSDB_COL_REQ_NULL(pIndex->flag)) {
1640 1641
      pCtx->requireNull = true;
      pIndex->flag &= ~(TSDB_COL_NULL);
Y
TD-1230  
yihaoDeng 已提交
1642
    } else {
1643 1644
      pCtx->requireNull = false;
    }
Y
TD-1230  
yihaoDeng 已提交
1645

1646 1647
    int32_t index = pSqlFuncMsg->colInfo.colIndex;
    if (TSDB_COL_IS_TAG(pIndex->flag)) {
1648
      if (pIndex->colId == TSDB_TBNAME_COLUMN_INDEX) {  // todo refactor
H
Haojun Liao 已提交
1649 1650 1651 1652
        SSchema s = tGetTableNameColumnSchema();

        pCtx->inputBytes = s.bytes;
        pCtx->inputType = s.type;
1653 1654 1655 1656
      } else {
        pCtx->inputBytes = pQuery->tagColList[index].bytes;
        pCtx->inputType = pQuery->tagColList[index].type;
      }
H
Haojun Liao 已提交
1657 1658 1659
    } else if (TSDB_COL_IS_UD_COL(pIndex->flag)) {
      pCtx->inputBytes = pSqlFuncMsg->arg[0].argBytes;
      pCtx->inputType = pSqlFuncMsg->arg[0].argType;
1660 1661 1662 1663
    } else {
      pCtx->inputBytes = pQuery->colList[index].bytes;
      pCtx->inputType = pQuery->colList[index].type;
    }
1664

Y
TD-1230  
yihaoDeng 已提交
1665

1666
    assert(isValidDataType(pCtx->inputType));
1667
    pCtx->ptsOutputBuf = NULL;
1668

1669 1670
    pCtx->outputBytes = pQuery->pSelectExpr[i].bytes;
    pCtx->outputType = pQuery->pSelectExpr[i].type;
1671

1672 1673
    pCtx->order = pQuery->order.order;
    pCtx->functionId = pSqlFuncMsg->functionId;
1674

1675 1676 1677 1678 1679
    pCtx->numOfParams = pSqlFuncMsg->numOfParams;
    for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
      int16_t type = pSqlFuncMsg->arg[j].argType;
      int16_t bytes = pSqlFuncMsg->arg[j].argBytes;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
1680
        tVariantCreateFromBinary(&pCtx->param[j], pSqlFuncMsg->arg[j].argValue.pz, bytes, type);
1681 1682 1683 1684
      } else {
        tVariantCreateFromBinary(&pCtx->param[j], (char *)&pSqlFuncMsg->arg[j].argValue.i64, bytes, type);
      }
    }
1685

1686 1687
    // set the order information for top/bottom query
    int32_t functionId = pCtx->functionId;
1688

1689
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
1690
      int32_t f = pQuery->pSelectExpr[0].base.functionId;
1691
      assert(f == TSDB_FUNC_TS || f == TSDB_FUNC_TS_DUMMY);
1692

1693 1694 1695 1696
      pCtx->param[2].i64Key = order;
      pCtx->param[2].nType = TSDB_DATA_TYPE_BIGINT;
      pCtx->param[3].i64Key = functionId;
      pCtx->param[3].nType = TSDB_DATA_TYPE_BIGINT;
1697

1698 1699
      pCtx->param[1].i64Key = pQuery->order.orderColId;
    }
1700

1701 1702 1703 1704
    if (i > 0) {
      pRuntimeEnv->offset[i] = pRuntimeEnv->offset[i - 1] + pRuntimeEnv->pCtx[i - 1].outputBytes;
    }
  }
1705

H
Haojun Liao 已提交
1706
  char* buf = (char*) pRuntimeEnv->resultInfo + sizeof(SResultInfo) * pQuery->numOfOutput;
H
Haojun Liao 已提交
1707

1708
  // set the intermediate result output buffer
H
Haojun Liao 已提交
1709
  setWindowResultInfo(pRuntimeEnv->resultInfo, pQuery, pRuntimeEnv->stableQuery, buf);
1710

1711
  // if it is group by normal column, do not set output buffer, the output buffer is pResult
1712
  if (!pRuntimeEnv->groupbyNormalCol && !pRuntimeEnv->stableQuery) {
1713 1714
    resetCtxOutputBuf(pRuntimeEnv);
  }
1715

H
Haojun Liao 已提交
1716 1717 1718
  if (setCtxTagColumnInfo(pRuntimeEnv, pRuntimeEnv->pCtx) != TSDB_CODE_SUCCESS) {
    goto _clean;
  }
1719

H
Haojun Liao 已提交
1720
  qDebug("QInfo:%p init runtime completed", GET_QINFO_ADDR(pRuntimeEnv));
1721
  return TSDB_CODE_SUCCESS;
1722

1723
_clean:
S
Shengliang Guan 已提交
1724 1725
  taosTFree(pRuntimeEnv->resultInfo);
  taosTFree(pRuntimeEnv->pCtx);
1726

1727
  return TSDB_CODE_QRY_OUT_OF_MEMORY;
1728 1729 1730 1731 1732 1733
}

static void teardownQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv) {
  if (pRuntimeEnv->pQuery == NULL) {
    return;
  }
1734

1735
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1736
  SQInfo* pQInfo = (SQInfo*) GET_QINFO_ADDR(pRuntimeEnv);
1737

1738
  qDebug("QInfo:%p teardown runtime env", pQInfo);
H
Haojun Liao 已提交
1739
  cleanupTimeWindowInfo(&pRuntimeEnv->windowResInfo);
1740

1741
  if (pRuntimeEnv->pCtx != NULL) {
1742
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1743
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1744

1745 1746 1747
      for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
        tVariantDestroy(&pCtx->param[j]);
      }
1748

1749
      tVariantDestroy(&pCtx->tag);
S
Shengliang Guan 已提交
1750
      taosTFree(pCtx->tagInfo.pTagCtxList);
1751
    }
1752

S
Shengliang Guan 已提交
1753 1754
    taosTFree(pRuntimeEnv->resultInfo);
    taosTFree(pRuntimeEnv->pCtx);
1755
  }
1756

H
Haojun Liao 已提交
1757
  pRuntimeEnv->pFillInfo = taosDestoryFillInfo(pRuntimeEnv->pFillInfo);
1758

H
Haojun Liao 已提交
1759
  destroyResultBuf(pRuntimeEnv->pResultBuf);
1760
  tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
1761
  tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
1762

H
Haojun Liao 已提交
1763
  pRuntimeEnv->pTSBuf = tsBufDestroy(pRuntimeEnv->pTSBuf);
1764 1765
}

H
Haojun Liao 已提交
1766
#define IS_QUERY_KILLED(_q) ((_q)->code == TSDB_CODE_TSC_QUERY_CANCELLED)
1767

H
Haojun Liao 已提交
1768
static void setQueryKilled(SQInfo *pQInfo) { pQInfo->code = TSDB_CODE_TSC_QUERY_CANCELLED;}
H
hjxilinx 已提交
1769

H
Haojun Liao 已提交
1770 1771 1772
static bool isFixedOutputQuery(SQueryRuntimeEnv* pRuntimeEnv) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1773 1774
    return false;
  }
1775

1776
  // Note:top/bottom query is fixed output query
H
Haojun Liao 已提交
1777
  if (pRuntimeEnv->topBotQuery || pRuntimeEnv->groupbyNormalCol) {
1778 1779
    return true;
  }
1780

1781
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1782
    SSqlFuncMsg *pExprMsg = &pQuery->pSelectExpr[i].base;
1783

1784 1785
    // ignore the ts_comp function
    if (i == 0 && pExprMsg->functionId == TSDB_FUNC_PRJ && pExprMsg->numOfParams == 1 &&
1786
        pExprMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
1787 1788
      continue;
    }
1789

1790 1791 1792
    if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
1793

1794 1795 1796 1797
    if (!IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus)) {
      return true;
    }
  }
1798

1799 1800 1801
  return false;
}

1802
// todo refactor with isLastRowQuery
H
hjxilinx 已提交
1803
static bool isPointInterpoQuery(SQuery *pQuery) {
1804
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1805
    int32_t functionID = pQuery->pSelectExpr[i].base.functionId;
1806
    if (functionID == TSDB_FUNC_INTERP) {
1807 1808 1809
      return true;
    }
  }
1810

1811 1812 1813 1814
  return false;
}

// TODO REFACTOR:MERGE WITH CLIENT-SIDE FUNCTION
H
hjxilinx 已提交
1815
static bool isSumAvgRateQuery(SQuery *pQuery) {
1816
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1817
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1818 1819 1820
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
1821

1822 1823 1824 1825 1826
    if (functionId == TSDB_FUNC_SUM_RATE || functionId == TSDB_FUNC_SUM_IRATE || functionId == TSDB_FUNC_AVG_RATE ||
        functionId == TSDB_FUNC_AVG_IRATE) {
      return true;
    }
  }
1827

1828 1829 1830
  return false;
}

H
hjxilinx 已提交
1831
static bool isFirstLastRowQuery(SQuery *pQuery) {
1832
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1833
    int32_t functionID = pQuery->pSelectExpr[i].base.functionId;
1834 1835 1836 1837
    if (functionID == TSDB_FUNC_LAST_ROW) {
      return true;
    }
  }
1838

1839 1840 1841
  return false;
}

H
hjxilinx 已提交
1842
static bool needReverseScan(SQuery *pQuery) {
1843
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1844
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1845 1846 1847
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG) {
      continue;
    }
1848

1849
    if ((functionId == TSDB_FUNC_FIRST || functionId == TSDB_FUNC_FIRST_DST) && !QUERY_IS_ASC_QUERY(pQuery)) {
1850 1851
      return true;
    }
1852 1853

    if (functionId == TSDB_FUNC_LAST || functionId == TSDB_FUNC_LAST_DST) {
S
TD-1057  
Shengliang Guan 已提交
1854
      int32_t order = (int32_t)pQuery->pSelectExpr[i].base.arg->argValue.i64;
1855 1856
      return order != pQuery->order.order;
    }
1857
  }
1858

1859 1860
  return false;
}
H
hjxilinx 已提交
1861

H
Haojun Liao 已提交
1862 1863 1864 1865
/**
 * The following 4 kinds of query are treated as the tags query
 * tagprj, tid_tag query, count(tbname), 'abc' (user defined constant value column) query
 */
H
hjxilinx 已提交
1866 1867
static bool onlyQueryTags(SQuery* pQuery) {
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1868 1869 1870
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];

    int32_t functionId = pExprInfo->base.functionId;
H
Haojun Liao 已提交
1871 1872 1873 1874

    if (functionId != TSDB_FUNC_TAGPRJ &&
        functionId != TSDB_FUNC_TID_TAG &&
        (!(functionId == TSDB_FUNC_COUNT && pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX)) &&
Y
TD-1230  
yihaoDeng 已提交
1875
        (!(functionId == TSDB_FUNC_PRJ && TSDB_COL_IS_UD_COL(pExprInfo->base.colInfo.flag)))) {
H
hjxilinx 已提交
1876 1877 1878
      return false;
    }
  }
1879

H
hjxilinx 已提交
1880 1881 1882
  return true;
}

1883 1884
/////////////////////////////////////////////////////////////////////////////////////////////

H
Haojun Liao 已提交
1885
void getAlignQueryTimeWindow(SQuery *pQuery, int64_t key, int64_t keyFirst, int64_t keyLast, STimeWindow *win) {
1886 1887
  assert(key >= keyFirst && key <= keyLast && pQuery->interval.sliding <= pQuery->interval.interval);
  win->skey = taosTimeTruncate(key, &pQuery->interval, pQuery->precision);
H
Haojun Liao 已提交
1888 1889

  /*
1890
   * if the realSkey > INT64_MAX - pQuery->interval.interval, the query duration between
H
Haojun Liao 已提交
1891 1892
   * realSkey and realEkey must be less than one interval.Therefore, no need to adjust the query ranges.
   */
1893 1894
  if (keyFirst > (INT64_MAX - pQuery->interval.interval)) {
    assert(keyLast - keyFirst < pQuery->interval.interval);
1895
    win->ekey = INT64_MAX;
1896 1897
  } else if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
    win->ekey = taosTimeAdd(win->skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
H
Haojun Liao 已提交
1898
  } else {
1899
    win->ekey = win->skey + pQuery->interval.interval - 1;
1900 1901 1902 1903 1904
  }
}

static void setScanLimitationByResultBuffer(SQuery *pQuery) {
  if (isTopBottomQuery(pQuery)) {
1905
    pQuery->checkBuffer = 0;
1906
  } else if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
1907
    pQuery->checkBuffer = 0;
1908 1909
  } else {
    bool hasMultioutput = false;
1910
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1911
      SSqlFuncMsg *pExprMsg = &pQuery->pSelectExpr[i].base;
1912 1913 1914
      if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        continue;
      }
1915

1916 1917 1918 1919 1920
      hasMultioutput = IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus);
      if (!hasMultioutput) {
        break;
      }
    }
1921

1922
    pQuery->checkBuffer = hasMultioutput ? 1 : 0;
1923 1924 1925 1926 1927 1928
  }
}

/*
 * todo add more parameters to check soon..
 */
1929
bool colIdCheck(SQuery *pQuery) {
1930 1931
  // load data column information is incorrect
  for (int32_t i = 0; i < pQuery->numOfCols - 1; ++i) {
1932
    if (pQuery->colList[i].colId == pQuery->colList[i + 1].colId) {
S
slguan 已提交
1933
      qError("QInfo:%p invalid data load column for query", GET_QINFO_ADDR(pQuery));
1934 1935 1936
      return false;
    }
  }
1937

1938 1939 1940 1941 1942 1943
  return true;
}

// todo ignore the avg/sum/min/max/count/stddev/top/bottom functions, of which
// the scan order is not matter
static bool onlyOneQueryType(SQuery *pQuery, int32_t functId, int32_t functIdDst) {
1944
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1945
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1946

1947 1948 1949 1950
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG ||
        functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
1951

1952 1953 1954 1955
    if (functionId != functId && functionId != functIdDst) {
      return false;
    }
  }
1956

1957 1958 1959 1960 1961 1962 1963
  return true;
}

static bool onlyFirstQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_FIRST, TSDB_FUNC_FIRST_DST); }

static bool onlyLastQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_LAST, TSDB_FUNC_LAST_DST); }

H
Haojun Liao 已提交
1964
// todo refactor, add iterator
1965 1966
static void doExchangeTimeWindow(SQInfo* pQInfo, STimeWindow* win) {
  size_t t = taosArrayGetSize(pQInfo->tableGroupInfo.pGroupList);
H
Haojun Liao 已提交
1967
  for(int32_t i = 0; i < t; ++i) {
1968
    SArray* p1 = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, i);
H
Haojun Liao 已提交
1969 1970 1971

    size_t len = taosArrayGetSize(p1);
    for(int32_t j = 0; j < len; ++j) {
1972
      STableKeyInfo* pInfo = taosArrayGet(p1, j);
H
Haojun Liao 已提交
1973

1974 1975 1976 1977
      // update the new lastkey if it is equalled to the value of the old skey
      if (pInfo->lastKey == win->ekey) {
        pInfo->lastKey = win->skey;
      }
H
Haojun Liao 已提交
1978 1979 1980 1981
    }
  }
}

1982
static void changeExecuteScanOrder(SQInfo *pQInfo, SQueryTableMsg* pQueryMsg, bool stableQuery) {
H
Haojun Liao 已提交
1983 1984
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;

1985 1986 1987
  // in case of point-interpolation query, use asc order scan
  char msg[] = "QInfo:%p scan order changed for %s query, old:%d, new:%d, qrange exchanged, old qrange:%" PRId64
               "-%" PRId64 ", new qrange:%" PRId64 "-%" PRId64;
1988

1989 1990
  // todo handle the case the the order irrelevant query type mixed up with order critical query type
  // descending order query for last_row query
H
Haojun Liao 已提交
1991
  if (isFirstLastRowQuery(pQuery)) {
H
Haojun Liao 已提交
1992
    qDebug("QInfo:%p scan order changed for last_row query, old:%d, new:%d", pQInfo, pQuery->order.order, TSDB_ORDER_ASC);
1993

H
Haojun Liao 已提交
1994
    pQuery->order.order = TSDB_ORDER_ASC;
H
Haojun Liao 已提交
1995 1996 1997
    if (pQuery->window.skey > pQuery->window.ekey) {
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
1998

1999 2000
    return;
  }
2001

H
Haojun Liao 已提交
2002
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr) && pQuery->order.order == TSDB_ORDER_DESC) {
H
Haojun Liao 已提交
2003
    pQuery->order.order = TSDB_ORDER_ASC;
H
Haojun Liao 已提交
2004 2005 2006
    if (pQuery->window.skey > pQuery->window.ekey) {
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
H
Haojun Liao 已提交
2007

2008
    doExchangeTimeWindow(pQInfo, &pQuery->window);
H
Haojun Liao 已提交
2009 2010 2011
    return;
  }

2012
  if (isPointInterpoQuery(pQuery) && pQuery->interval.interval == 0) {
H
Haojun Liao 已提交
2013 2014 2015 2016 2017
    if (!QUERY_IS_ASC_QUERY(pQuery)) {
      qDebug(msg, GET_QINFO_ADDR(pQuery), "interp", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
             pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
2018

2019
    pQuery->order.order = TSDB_ORDER_ASC;
2020 2021
    return;
  }
2022

2023
  if (pQuery->interval.interval == 0) {
2024 2025
    if (onlyFirstQuery(pQuery)) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
2026
        qDebug(msg, GET_QINFO_ADDR(pQuery), "only-first", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
2027 2028
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2029
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2030
        doExchangeTimeWindow(pQInfo, &pQuery->window);
2031
      }
2032

2033
      pQuery->order.order = TSDB_ORDER_ASC;
2034 2035
    } else if (onlyLastQuery(pQuery)) {
      if (QUERY_IS_ASC_QUERY(pQuery)) {
2036
        qDebug(msg, GET_QINFO_ADDR(pQuery), "only-last", pQuery->order.order, TSDB_ORDER_DESC, pQuery->window.skey,
2037 2038
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2039
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2040
        doExchangeTimeWindow(pQInfo, &pQuery->window);
2041
      }
2042

2043
      pQuery->order.order = TSDB_ORDER_DESC;
2044
    }
2045

2046
  } else {  // interval query
2047
    if (stableQuery) {
2048 2049
      if (onlyFirstQuery(pQuery)) {
        if (!QUERY_IS_ASC_QUERY(pQuery)) {
2050
          qDebug(msg, GET_QINFO_ADDR(pQuery), "only-first stable", pQuery->order.order, TSDB_ORDER_ASC,
2051 2052
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2053
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2054
          doExchangeTimeWindow(pQInfo, &pQuery->window);
2055
        }
2056

2057
        pQuery->order.order = TSDB_ORDER_ASC;
2058 2059
      } else if (onlyLastQuery(pQuery)) {
        if (QUERY_IS_ASC_QUERY(pQuery)) {
2060
          qDebug(msg, GET_QINFO_ADDR(pQuery), "only-last stable", pQuery->order.order, TSDB_ORDER_DESC,
2061 2062
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2063
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2064
          doExchangeTimeWindow(pQInfo, &pQuery->window);
2065
        }
2066

2067
        pQuery->order.order = TSDB_ORDER_DESC;
2068 2069 2070 2071 2072 2073 2074 2075
      }
    }
  }
}

static int32_t getInitialPageNum(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  int32_t INITIAL_RESULT_ROWS_VALUE = 16;
2076

2077
  int32_t num = 0;
2078

2079 2080
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
    num = 128;
H
Haojun Liao 已提交
2081
  } else if (QUERY_IS_INTERVAL_QUERY(pQuery)) {  // time window query, allocate one page for each table
2082
    size_t s = pQInfo->tableqinfoGroupInfo.numOfTables;
S
TD-1057  
Shengliang Guan 已提交
2083
    num = (int32_t)(MAX(s, INITIAL_RESULT_ROWS_VALUE));
2084 2085
  } else {    // for super table query, one page for each subset
    num = 1;  // pQInfo->pSidSet->numOfSubSet;
2086
  }
2087

2088 2089 2090 2091
  assert(num > 0);
  return num;
}

2092 2093
static void getIntermediateBufInfo(SQueryRuntimeEnv* pRuntimeEnv, int32_t* ps, int32_t* rowsize) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
2094
  int32_t MIN_ROWS_PER_PAGE = 4;
2095

S
TD-1057  
Shengliang Guan 已提交
2096
  *rowsize = (int32_t)(pQuery->rowSize * GET_ROW_PARAM_FOR_MULTIOUTPUT(pQuery, pRuntimeEnv->topBotQuery, pRuntimeEnv->stableQuery));
2097 2098 2099 2100
  int32_t overhead = sizeof(tFilePage);

  // one page contains at least two rows
  *ps = DEFAULT_INTERN_BUF_PAGE_SIZE;
H
Haojun Liao 已提交
2101
  while(((*rowsize) * MIN_ROWS_PER_PAGE) > (*ps) - overhead) {
2102 2103 2104 2105
    *ps = (*ps << 1u);
  }

  pRuntimeEnv->numOfRowsPerPage = ((*ps) - sizeof(tFilePage)) / (*rowsize);
2106
  assert(pRuntimeEnv->numOfRowsPerPage <= MAX_ROWS_PER_RESBUF_PAGE);
2107 2108
}

H
Haojun Liao 已提交
2109
#define IS_PREFILTER_TYPE(_t) ((_t) != TSDB_DATA_TYPE_BINARY && (_t) != TSDB_DATA_TYPE_NCHAR)
2110

H
Haojun Liao 已提交
2111 2112 2113 2114
static bool needToLoadDataBlock(SQueryRuntimeEnv* pRuntimeEnv, SDataStatis *pDataStatis, SQLFunctionCtx *pCtx,
    int32_t numOfRows) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
  if (pDataStatis == NULL || (pQuery->numOfFilterCols == 0 && (!pRuntimeEnv->topBotQuery))) {
2115 2116 2117 2118 2119
    return true;
  }

  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
2120

H
Haojun Liao 已提交
2121 2122 2123 2124 2125 2126 2127 2128
    int32_t index = -1;
    for(int32_t i = 0; i < pQuery->numOfCols; ++i) {
      if (pDataStatis[i].colId == pFilterInfo->info.colId) {
        index = i;
        break;
      }
    }

2129
    // no statistics data, load the true data block
H
Haojun Liao 已提交
2130
    if (index == -1) {
H
Haojun Liao 已提交
2131
      return true;
2132
    }
2133

2134
    // not support pre-filter operation on binary/nchar data type
H
Haojun Liao 已提交
2135
    if (!IS_PREFILTER_TYPE(pFilterInfo->info.type)) {
H
Haojun Liao 已提交
2136
      return true;
2137
    }
2138

2139
    // all data in current column are NULL, no need to check its boundary value
H
Haojun Liao 已提交
2140
    if (pDataStatis[index].numOfNull == numOfRows) {
2141 2142 2143 2144 2145 2146 2147 2148 2149

      // if isNULL query exists, load the null data column
      for (int32_t j = 0; j < pFilterInfo->numOfFilters; ++j) {
        SColumnFilterElem *pFilterElem = &pFilterInfo->pFilters[j];
        if (pFilterElem->fp == isNull_filter) {
          return true;
        }
      }

2150 2151
      continue;
    }
2152

H
Haojun Liao 已提交
2153 2154 2155
    SDataStatis* pDataBlockst = &pDataStatis[index];

    if (pFilterInfo->info.type == TSDB_DATA_TYPE_FLOAT) {
S
TD-1057  
Shengliang Guan 已提交
2156 2157
      float minval = (float)(*(double *)(&pDataBlockst->min));
      float maxval = (float)(*(double *)(&pDataBlockst->max));
2158

2159 2160 2161 2162 2163 2164 2165
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&minval, (char *)&maxval)) {
          return true;
        }
      }
    } else {
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
H
Haojun Liao 已提交
2166
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&pDataBlockst->min, (char *)&pDataBlockst->max)) {
2167 2168 2169 2170 2171
          return true;
        }
      }
    }
  }
2172

H
Haojun Liao 已提交
2173 2174 2175 2176 2177 2178 2179 2180
  if (pRuntimeEnv->topBotQuery) {
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
      int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
      if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
        return topbot_datablock_filter(&pCtx[i], functionId, (char *)&pDataStatis[i].min, (char *)&pDataStatis[i].max);
      }
    }
  }
2181

H
Haojun Liao 已提交
2182
  return false;
2183 2184
}

H
Haojun Liao 已提交
2185 2186 2187 2188 2189 2190 2191 2192
static bool overlapWithTimeWindow(SQuery* pQuery, SDataBlockInfo* pBlockInfo) {
  STimeWindow w = {0};

  TSKEY sk = MIN(pQuery->window.skey, pQuery->window.ekey);
  TSKEY ek = MAX(pQuery->window.skey, pQuery->window.ekey);

  if (QUERY_IS_ASC_QUERY(pQuery)) {
    getAlignQueryTimeWindow(pQuery, pBlockInfo->window.skey, sk, ek, &w);
H
Haojun Liao 已提交
2193
    assert(w.ekey >= pBlockInfo->window.skey);
H
Haojun Liao 已提交
2194

H
Haojun Liao 已提交
2195
    if (w.ekey < pBlockInfo->window.ekey) {
H
Haojun Liao 已提交
2196 2197 2198 2199 2200
      return true;
    }

    while(1) {
      GET_NEXT_TIMEWINDOW(pQuery, &w);
H
Haojun Liao 已提交
2201
      if (w.skey > pBlockInfo->window.ekey) {
H
Haojun Liao 已提交
2202 2203 2204
        break;
      }

H
Haojun Liao 已提交
2205 2206
      assert(w.ekey > pBlockInfo->window.ekey);
      if (w.skey <= pBlockInfo->window.ekey && w.skey > pBlockInfo->window.skey) {
H
Haojun Liao 已提交
2207 2208 2209 2210 2211
        return true;
      }
    }
  } else {
    getAlignQueryTimeWindow(pQuery, pBlockInfo->window.ekey, sk, ek, &w);
H
Haojun Liao 已提交
2212 2213 2214
    assert(w.skey <= pBlockInfo->window.ekey);

    if (w.skey > pBlockInfo->window.skey) {
H
Haojun Liao 已提交
2215 2216 2217 2218 2219 2220 2221 2222 2223
      return true;
    }

    while(1) {
      GET_NEXT_TIMEWINDOW(pQuery, &w);
      if (w.ekey < pBlockInfo->window.skey) {
        break;
      }

H
Haojun Liao 已提交
2224 2225
      assert(w.skey < pBlockInfo->window.skey);
      if (w.ekey < pBlockInfo->window.ekey && w.ekey >= pBlockInfo->window.skey) {
H
Haojun Liao 已提交
2226 2227 2228 2229 2230 2231 2232 2233
        return true;
      }
    }
  }

  return false;
}

H
Haojun Liao 已提交
2234
int32_t loadDataBlockOnDemand(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo * pWindowResInfo, void* pQueryHandle, SDataBlockInfo* pBlockInfo, SDataStatis **pStatis, SArray** pDataBlock, uint32_t* status) {
2235
  SQuery *pQuery = pRuntimeEnv->pQuery;
2236

H
Haojun Liao 已提交
2237 2238
  *status = BLK_DATA_NO_NEEDED;

H
Haojun Liao 已提交
2239
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf > 0) {
2240
    *status = BLK_DATA_ALL_NEEDED;
H
Haojun Liao 已提交
2241
  } else { // check if this data block is required to load
H
Haojun Liao 已提交
2242

H
Haojun Liao 已提交
2243
    // Calculate all time windows that are overlapping or contain current data block.
2244
    // If current data block is contained by all possible time window, do not load current data block.
H
Haojun Liao 已提交
2245
    if (QUERY_IS_INTERVAL_QUERY(pQuery) && overlapWithTimeWindow(pQuery, pBlockInfo)) {
2246
      *status = BLK_DATA_ALL_NEEDED;
2247
    }
2248

2249
    if ((*status) != BLK_DATA_ALL_NEEDED) {
H
Haojun Liao 已提交
2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264
      // the pCtx[i] result is belonged to previous time window since the outputBuf has not been set yet,
      // the filter result may be incorrect. So in case of interval query, we need to set the correct time output buffer
      if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
        bool hasTimeWindow = false;
        bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);

        TSKEY k = QUERY_IS_ASC_QUERY(pQuery)? pBlockInfo->window.skey:pBlockInfo->window.ekey;

        STimeWindow win = getActiveTimeWindow(pWindowResInfo, k, pQuery);
        if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pBlockInfo->tid, &win, masterScan, &hasTimeWindow) !=
            TSDB_CODE_SUCCESS) {
          // todo handle error in set result for timewindow
        }
      }

H
Haojun Liao 已提交
2265 2266 2267 2268 2269
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        SSqlFuncMsg* pSqlFunc = &pQuery->pSelectExpr[i].base;

        int32_t functionId = pSqlFunc->functionId;
        int32_t colId = pSqlFunc->colInfo.colId;
2270 2271
        (*status) |= aAggs[functionId].dataReqFunc(&pRuntimeEnv->pCtx[i], pBlockInfo->window.skey, pBlockInfo->window.ekey, colId);
        if (((*status) & BLK_DATA_ALL_NEEDED) == BLK_DATA_ALL_NEEDED) {
H
Haojun Liao 已提交
2272 2273 2274
          break;
        }
      }
2275 2276
    }
  }
2277

2278
  if ((*status) == BLK_DATA_NO_NEEDED) {
H
Haojun Liao 已提交
2279 2280
    qDebug("QInfo:%p data block discard, brange:%"PRId64 "-%"PRId64", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
           pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
2281
    pRuntimeEnv->summary.discardBlocks += 1;
2282 2283 2284 2285
  } else if ((*status) == BLK_DATA_STATIS_NEEDED) {

    // this function never returns error?
    tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis);
2286

2287
    pRuntimeEnv->summary.loadBlockStatis += 1;
2288

2289
    if (*pStatis == NULL) { // data block statistics does not exist, load data block
H
Haojun Liao 已提交
2290
      *pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2291
      pRuntimeEnv->summary.totalCheckedRows += pBlockInfo->rows;
2292 2293
    }
  } else {
2294
    assert((*status) == BLK_DATA_ALL_NEEDED);
2295

2296
    // load the data block statistics to perform further filter
H
Haojun Liao 已提交
2297
    pRuntimeEnv->summary.loadBlockStatis += 1;
2298
    tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis);
2299

H
Haojun Liao 已提交
2300
    if (!needToLoadDataBlock(pRuntimeEnv, *pStatis, pRuntimeEnv->pCtx, pBlockInfo->rows)) {
2301 2302
      // current block has been discard due to filter applied
      pRuntimeEnv->summary.discardBlocks += 1;
H
Haojun Liao 已提交
2303 2304
      qDebug("QInfo:%p data block discard, brange:%"PRId64 "-%"PRId64", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
          pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
2305
      (*status) = BLK_DATA_DISCARD;
2306
    }
2307

2308
    pRuntimeEnv->summary.totalCheckedRows += pBlockInfo->rows;
H
Haojun Liao 已提交
2309
    pRuntimeEnv->summary.loadBlocks += 1;
H
Haojun Liao 已提交
2310
    *pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2311 2312 2313
    if (*pDataBlock == NULL) {
      return terrno;
    }
2314
  }
2315

H
Haojun Liao 已提交
2316
  return TSDB_CODE_SUCCESS;
2317 2318
}

H
hjxilinx 已提交
2319
int32_t binarySearchForKey(char *pValue, int num, TSKEY key, int order) {
2320
  int32_t midPos = -1;
H
Haojun Liao 已提交
2321
  int32_t numOfRows;
2322

2323 2324 2325
  if (num <= 0) {
    return -1;
  }
2326

2327
  assert(order == TSDB_ORDER_ASC || order == TSDB_ORDER_DESC);
2328 2329

  TSKEY * keyList = (TSKEY *)pValue;
2330
  int32_t firstPos = 0;
2331
  int32_t lastPos = num - 1;
2332

2333
  if (order == TSDB_ORDER_DESC) {
H
hjxilinx 已提交
2334 2335 2336 2337 2338
    // find the first position which is smaller than the key
    while (1) {
      if (key >= keyList[lastPos]) return lastPos;
      if (key == keyList[firstPos]) return firstPos;
      if (key < keyList[firstPos]) return firstPos - 1;
2339

H
Haojun Liao 已提交
2340 2341
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2342

H
hjxilinx 已提交
2343 2344 2345 2346 2347 2348 2349 2350
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
2351

H
hjxilinx 已提交
2352 2353 2354 2355 2356
  } else {
    // find the first position which is bigger than the key
    while (1) {
      if (key <= keyList[firstPos]) return firstPos;
      if (key == keyList[lastPos]) return lastPos;
2357

H
hjxilinx 已提交
2358 2359 2360 2361 2362 2363 2364
      if (key > keyList[lastPos]) {
        lastPos = lastPos + 1;
        if (lastPos >= num)
          return -1;
        else
          return lastPos;
      }
2365

H
Haojun Liao 已提交
2366 2367
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2368

H
hjxilinx 已提交
2369 2370 2371 2372 2373 2374 2375 2376 2377
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
  }
2378

H
hjxilinx 已提交
2379 2380 2381
  return midPos;
}

2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394
static void ensureOutputBufferSimple(SQueryRuntimeEnv* pRuntimeEnv, int32_t capacity) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  if (capacity < pQuery->rec.capacity) {
    return;
  }

  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    int32_t bytes = pQuery->pSelectExpr[i].bytes;
    assert(bytes > 0 && capacity > 0);

    char *tmp = realloc(pQuery->sdata[i], bytes * capacity + sizeof(tFilePage));
    if (tmp == NULL) {  // todo handle the oom
H
Haojun Liao 已提交
2395
      longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
2396 2397 2398 2399 2400 2401 2402 2403
    } else {
      pQuery->sdata[i] = (tFilePage *)tmp;
    }

    // set the pCtx output buffer position
    pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data;
  }

2404
  qDebug("QInfo:%p realloc output buffer to inc output buffer from: %" PRId64 " rows to:%d rows", GET_QINFO_ADDR(pRuntimeEnv),
2405 2406 2407 2408 2409
         pQuery->rec.capacity, capacity);

  pQuery->rec.capacity = capacity;
}

2410 2411 2412
static void ensureOutputBuffer(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pBlockInfo) {
  // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
  SQuery* pQuery = pRuntimeEnv->pQuery;
2413
  if (!QUERY_IS_INTERVAL_QUERY(pQuery) && !pRuntimeEnv->groupbyNormalCol && !isFixedOutputQuery(pRuntimeEnv) && !isTSCompQuery(pQuery)) {
2414
    SResultRec *pRec = &pQuery->rec;
2415

2416
    if (pQuery->rec.capacity - pQuery->rec.rows < pBlockInfo->rows) {
S
TD-1057  
Shengliang Guan 已提交
2417 2418
      int32_t remain = (int32_t)(pRec->capacity - pRec->rows);
      int32_t newSize = (int32_t)(pRec->capacity + (pBlockInfo->rows - remain));
2419

2420 2421
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        int32_t bytes = pQuery->pSelectExpr[i].bytes;
H
Haojun Liao 已提交
2422 2423
        assert(bytes > 0 && newSize > 0);

2424 2425
        char *tmp = realloc(pQuery->sdata[i], bytes * newSize + sizeof(tFilePage));
        if (tmp == NULL) {  // todo handle the oom
H
Haojun Liao 已提交
2426
          longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
2427
        } else {
2428
          memset(tmp + sizeof(tFilePage) + bytes * pRec->rows, 0, (size_t)((newSize - pRec->rows) * bytes));
2429 2430
          pQuery->sdata[i] = (tFilePage *)tmp;
        }
H
Haojun Liao 已提交
2431

2432 2433
        // set the pCtx output buffer position
        pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data + pRec->rows * bytes;
H
Haojun Liao 已提交
2434

2435 2436 2437 2438 2439
        int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
        if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
          pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
        }
      }
H
Haojun Liao 已提交
2440

2441
      qDebug("QInfo:%p realloc output buffer, new size: %d rows, old:%" PRId64 ", remain:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv),
2442
             newSize, pRec->capacity, newSize - pRec->rows);
2443

2444 2445 2446 2447 2448
      pRec->capacity = newSize;
    }
  }
}

2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469
static void doSetInitialTimewindow(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pBlockInfo) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  if (QUERY_IS_INTERVAL_QUERY(pQuery) && pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL) {
    STimeWindow w = TSWINDOW_INITIALIZER;
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;

    if (QUERY_IS_ASC_QUERY(pQuery)) {
      getAlignQueryTimeWindow(pQuery, pBlockInfo->window.skey, pBlockInfo->window.skey, pQuery->window.ekey, &w);
      pWindowResInfo->startTime = w.skey;
      pWindowResInfo->prevSKey = w.skey;
    } else {
      // the start position of the first time window in the endpoint that spreads beyond the queried last timestamp
      getAlignQueryTimeWindow(pQuery, pBlockInfo->window.ekey, pQuery->window.ekey, pBlockInfo->window.ekey, &w);

      pWindowResInfo->startTime = pQuery->window.skey;
      pWindowResInfo->prevSKey = w.skey;
    }
  }
}

2470 2471
static int64_t doScanAllDataBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
2472
  STableQueryInfo* pTableQueryInfo = pQuery->current;
H
Haojun Liao 已提交
2473
  SQueryCostInfo*  summary  = &pRuntimeEnv->summary;
2474

2475
  qDebug("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", lastkey:%" PRId64 ", order:%d",
H
hjxilinx 已提交
2476 2477
         GET_QINFO_ADDR(pRuntimeEnv), pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, pTableQueryInfo->lastKey,
         pQuery->order.order);
2478

2479
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
H
Haojun Liao 已提交
2480
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
2481

H
Haojun Liao 已提交
2482
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
2483
  while (tsdbNextDataBlock(pQueryHandle)) {
H
Haojun Liao 已提交
2484
    summary->totalBlocks += 1;
H
Haojun Liao 已提交
2485

H
Haojun Liao 已提交
2486
    if (IS_QUERY_KILLED(GET_QINFO_ADDR(pRuntimeEnv))) {
H
Haojun Liao 已提交
2487
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
2488
    }
2489

H
Haojun Liao 已提交
2490
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
2491
    doSetInitialTimewindow(pRuntimeEnv, &blockInfo);
2492

H
hjxilinx 已提交
2493
    // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
2494
    ensureOutputBuffer(pRuntimeEnv, &blockInfo);
2495

2496
    SDataStatis *pStatis = NULL;
2497 2498
    SArray *     pDataBlock = NULL;
    uint32_t     status = 0;
2499

H
Haojun Liao 已提交
2500
    int32_t ret = loadDataBlockOnDemand(pRuntimeEnv, &pRuntimeEnv->windowResInfo, pQueryHandle, &blockInfo, &pStatis, &pDataBlock, &status);
2501
    if (ret != TSDB_CODE_SUCCESS) {
2502 2503 2504
      break;
    }

2505 2506 2507 2508 2509 2510
    if (status == BLK_DATA_DISCARD) {
      pQuery->current->lastKey =
              QUERY_IS_ASC_QUERY(pQuery) ? blockInfo.window.ekey + step : blockInfo.window.skey + step;
      continue;
    }

H
Haojun Liao 已提交
2511 2512
    // query start position can not move into tableApplyFunctionsOnBlock due to limit/offset condition
    pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : blockInfo.rows - 1;
H
hjxilinx 已提交
2513
    int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, binarySearchForKey, pDataBlock);
2514

H
Haojun Liao 已提交
2515
    summary->totalRows += blockInfo.rows;
2516
    qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
2517
           blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes, pQuery->current->lastKey);
2518

2519 2520
    // while the output buffer is full or limit/offset is applied, query may be paused here
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL | QUERY_COMPLETED)) {
H
hjxilinx 已提交
2521
      break;
2522 2523
    }
  }
2524

H
Haojun Liao 已提交
2525 2526 2527 2528
  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }

H
hjxilinx 已提交
2529
  // if the result buffer is not full, set the query complete
2530 2531 2532
  if (!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
2533

H
Haojun Liao 已提交
2534
  if (QUERY_IS_INTERVAL_QUERY(pQuery) && IS_MASTER_SCAN(pRuntimeEnv)) {
H
hjxilinx 已提交
2535
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
2536
      closeAllTimeWindow(&pRuntimeEnv->windowResInfo);
H
hjxilinx 已提交
2537
      pRuntimeEnv->windowResInfo.curIndex = pRuntimeEnv->windowResInfo.size - 1;  // point to the last time window
2538 2539 2540 2541
    } else {
      assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
    }
  }
2542

2543
  return 0;
2544 2545 2546 2547 2548 2549
}

/*
 * set tag value in SQLFunctionCtx
 * e.g.,tag information into input buffer
 */
2550
static void doSetTagValueInParam(void *tsdb, void* pTable, int32_t tagColId, tVariant *tag, int16_t type, int16_t bytes) {
H
[td-90]  
Haojun Liao 已提交
2551
  tVariantDestroy(tag);
2552

2553
  if (tagColId == TSDB_TBNAME_COLUMN_INDEX) {
2554
    char* val = tsdbGetTableName(pTable);
H
[td-90]  
Haojun Liao 已提交
2555
    assert(val != NULL);
2556

H
[td-90]  
Haojun Liao 已提交
2557
    tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), TSDB_DATA_TYPE_BINARY);
2558
  } else {
2559
    char* val = tsdbGetTableTagVal(pTable, tagColId, type, bytes);
H
[td-90]  
Haojun Liao 已提交
2560 2561 2562 2563
    if (val == NULL) {
      tag->nType = TSDB_DATA_TYPE_NULL;
      return;
    }
2564

H
hjxilinx 已提交
2565
    if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
H
Hongze Cheng 已提交
2566
      if (isNull(val, type)) {
H
Haojun Liao 已提交
2567 2568 2569 2570
        tag->nType = TSDB_DATA_TYPE_NULL;
        return;
      }

H
[td-90]  
Haojun Liao 已提交
2571
      tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), type);
H
hjxilinx 已提交
2572
    } else {
H
Haojun Liao 已提交
2573 2574 2575 2576 2577
      if (isNull(val, type)) {
        tag->nType = TSDB_DATA_TYPE_NULL;
        return;
      }

H
[td-90]  
Haojun Liao 已提交
2578
      tVariantCreateFromBinary(tag, val, bytes, type);
H
hjxilinx 已提交
2579
    }
2580
  }
2581 2582
}

2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594
static SColumnInfo* doGetTagColumnInfoById(SColumnInfo* pTagColList, int32_t numOfTags, int16_t colId) {
  assert(pTagColList != NULL && numOfTags > 0);

  for(int32_t i = 0; i < numOfTags; ++i) {
    if (pTagColList[i].colId == colId) {
      return &pTagColList[i];
    }
  }

  return NULL;
}

2595
void setTagVal(SQueryRuntimeEnv *pRuntimeEnv, void *pTable, void *tsdb) {
2596
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
2597
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
2598

H
[td-90]  
Haojun Liao 已提交
2599 2600 2601
  SExprInfo *pExprInfo = &pQuery->pSelectExpr[0];
  if (pQuery->numOfOutput == 1 && pExprInfo->base.functionId == TSDB_FUNC_TS_COMP) {
    assert(pExprInfo->base.numOfParams == 1);
H
Haojun Liao 已提交
2602

S
TD-1057  
Shengliang Guan 已提交
2603
    int16_t tagColId = (int16_t)pExprInfo->base.arg->argValue.i64;
2604
    SColumnInfo* pColInfo = doGetTagColumnInfoById(pQuery->tagColList, pQuery->numOfTags, tagColId);
H
Haojun Liao 已提交
2605

2606
    doSetTagValueInParam(tsdb, pTable, tagColId, &pRuntimeEnv->pCtx[0].tag, pColInfo->type, pColInfo->bytes);
2607 2608
  } else {
    // set tag value, by which the results are aggregated.
2609
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
H
Haojun Liao 已提交
2610
      SExprInfo* pLocalExprInfo = &pQuery->pSelectExpr[idx];
2611

2612
      // ts_comp column required the tag value for join filter
H
Haojun Liao 已提交
2613
      if (!TSDB_COL_IS_TAG(pLocalExprInfo->base.colInfo.flag)) {
2614 2615
        continue;
      }
2616

2617
      // todo use tag column index to optimize performance
2618
      doSetTagValueInParam(tsdb, pTable, pLocalExprInfo->base.colInfo.colId, &pRuntimeEnv->pCtx[idx].tag,
H
Haojun Liao 已提交
2619
                           pLocalExprInfo->type, pLocalExprInfo->bytes);
2620
    }
2621

2622
    // set the join tag for first column
H
[td-90]  
Haojun Liao 已提交
2623
    SSqlFuncMsg *pFuncMsg = &pExprInfo->base;
2624 2625
    if ((pFuncMsg->functionId == TSDB_FUNC_TS || pFuncMsg->functionId == TSDB_FUNC_PRJ) && pRuntimeEnv->pTSBuf != NULL &&
        pFuncMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
2626
      assert(pFuncMsg->numOfParams == 1);
H
Haojun Liao 已提交
2627

S
TD-1057  
Shengliang Guan 已提交
2628
      int16_t tagColId = (int16_t)pExprInfo->base.arg->argValue.i64;
2629
      SColumnInfo* pColInfo = doGetTagColumnInfoById(pQuery->tagColList, pQuery->numOfTags, tagColId);
H
Haojun Liao 已提交
2630

2631
      doSetTagValueInParam(tsdb, pTable, tagColId, &pRuntimeEnv->pCtx[0].tag, pColInfo->type, pColInfo->bytes);
2632
      qDebug("QInfo:%p set tag value for join comparison, colId:%" PRId64 ", val:%"PRId64, pQInfo, pExprInfo->base.arg->argValue.i64,
B
Bomin Zhang 已提交
2633
          pRuntimeEnv->pCtx[0].tag.i64Key)
2634 2635 2636 2637 2638 2639 2640
    }
  }
}

static void doMerge(SQueryRuntimeEnv *pRuntimeEnv, int64_t timestamp, SWindowResult *pWindowRes, bool mergeFlag) {
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
2641

H
Haojun Liao 已提交
2642 2643
  tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes->pos.pageId);

2644
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2645
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2646 2647 2648
    if (!mergeFlag) {
      pCtx[i].aOutputBuf = pCtx[i].aOutputBuf + pCtx[i].outputBytes;
      pCtx[i].currentStage = FIRST_STAGE_MERGE;
2649

2650
      RESET_RESULT_INFO(pCtx[i].resultInfo);
2651 2652
      aAggs[functionId].init(&pCtx[i]);
    }
2653

2654 2655
    pCtx[i].hasNull = true;
    pCtx[i].nStartQueryTimestamp = timestamp;
H
Haojun Liao 已提交
2656
    pCtx[i].aInputElemBuf = getPosInResultPage(pRuntimeEnv, i, pWindowRes, page);
2657

2658 2659 2660
    // in case of tag column, the tag information should be extracted from input buffer
    if (functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TAG) {
      tVariantDestroy(&pCtx[i].tag);
2661

2662 2663 2664 2665 2666 2667
      int32_t type = pCtx[i].outputType;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        tVariantCreateFromBinary(&pCtx[i].tag, varDataVal(pCtx[i].aInputElemBuf), varDataLen(pCtx[i].aInputElemBuf), type);
      } else {
        tVariantCreateFromBinary(&pCtx[i].tag, pCtx[i].aInputElemBuf, pCtx[i].inputBytes, pCtx[i].inputType);
      }
2668

2669 2670
    }
  }
2671

2672
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2673
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2674 2675 2676
    if (functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
2677

2678 2679 2680 2681
    aAggs[functionId].distMergeFunc(&pCtx[i]);
  }
}

2682
static UNUSED_FUNC void printBinaryData(int32_t functionId, char *data, int32_t srcDataType) {
2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_LAST_DST) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_BINARY:
        printf("%" PRId64 ",%s\t", *(TSKEY *)data, (data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int8_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int16_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 ",%" PRId64 "\t", *(TSKEY *)data, *(TSKEY *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int32_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%" PRId64 ",%f\t", *(TSKEY *)data, *(float *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%" PRId64 ",%lf\t", *(TSKEY *)data, *(double *)(data + TSDB_KEYSIZE + 1));
        break;
    }
  } else if (functionId == TSDB_FUNC_AVG) {
    printf("%lf,%d\t", *(double *)data, *(int32_t *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_SPREAD) {
    printf("%lf,%lf\t", *(double *)data, *(double *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_TWA) {
    data += 1;
    printf("%lf,%" PRId64 ",%" PRId64 ",%" PRId64 "\t", *(double *)data, *(int64_t *)(data + 8),
           *(int64_t *)(data + 16), *(int64_t *)(data + 24));
  } else if (functionId == TSDB_FUNC_MIN || functionId == TSDB_FUNC_MAX) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%d\t", *(int8_t *)data);
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%d\t", *(int16_t *)data);
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 "\t", *(int64_t *)data);
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%d\t", *(int *)data);
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%f\t", *(float *)data);
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%f\t", *(float *)data);
        break;
    }
  } else if (functionId == TSDB_FUNC_SUM) {
    if (srcDataType == TSDB_DATA_TYPE_FLOAT || srcDataType == TSDB_DATA_TYPE_DOUBLE) {
      printf("%lf\t", *(float *)data);
    } else {
      printf("%" PRId64 "\t", *(int64_t *)data);
    }
  } else {
    printf("%s\t", data);
  }
}

2751
void UNUSED_FUNC displayInterResult(tFilePage **pdata, SQueryRuntimeEnv* pRuntimeEnv, int32_t numOfRows) {
2752
  SQuery* pQuery = pRuntimeEnv->pQuery;
2753
  int32_t numOfCols = pQuery->numOfOutput;
2754
  printf("super table query intermediate result, total:%d\n", numOfRows);
2755

2756 2757
  for (int32_t j = 0; j < numOfRows; ++j) {
    for (int32_t i = 0; i < numOfCols; ++i) {
2758

2759
      switch (pQuery->pSelectExpr[i].type) {
2760
        case TSDB_DATA_TYPE_BINARY: {
2761
          int32_t type = pQuery->pSelectExpr[i].type;
2762
          printBinaryData(pQuery->pSelectExpr[i].base.functionId, pdata[i]->data + pQuery->pSelectExpr[i].bytes * j,
2763 2764 2765 2766 2767
                          type);
          break;
        }
        case TSDB_DATA_TYPE_TIMESTAMP:
        case TSDB_DATA_TYPE_BIGINT:
2768
          printf("%" PRId64 "\t", *(int64_t *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2769 2770
          break;
        case TSDB_DATA_TYPE_INT:
2771
          printf("%d\t", *(int32_t *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2772 2773
          break;
        case TSDB_DATA_TYPE_FLOAT:
2774
          printf("%f\t", *(float *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2775 2776
          break;
        case TSDB_DATA_TYPE_DOUBLE:
2777
          printf("%lf\t", *(double *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2778 2779 2780 2781 2782 2783 2784 2785
          break;
      }
    }
    printf("\n");
  }
}

typedef struct SCompSupporter {
H
hjxilinx 已提交
2786 2787 2788
  STableQueryInfo **pTableQueryInfo;
  int32_t *         position;
  SQInfo *          pQInfo;
2789 2790 2791 2792 2793
} SCompSupporter;

int32_t tableResultComparFn(const void *pLeft, const void *pRight, void *param) {
  int32_t left = *(int32_t *)pLeft;
  int32_t right = *(int32_t *)pRight;
2794

2795 2796
  SCompSupporter *  supporter = (SCompSupporter *)param;
  SQueryRuntimeEnv *pRuntimeEnv = &supporter->pQInfo->runtimeEnv;
2797

2798 2799
  int32_t leftPos = supporter->position[left];
  int32_t rightPos = supporter->position[right];
2800

2801 2802 2803 2804
  /* left source is exhausted */
  if (leftPos == -1) {
    return 1;
  }
2805

2806 2807 2808 2809
  /* right source is exhausted*/
  if (rightPos == -1) {
    return -1;
  }
2810

H
hjxilinx 已提交
2811
  SWindowResInfo *pWindowResInfo1 = &supporter->pTableQueryInfo[left]->windowResInfo;
2812
  SWindowResult * pWindowRes1 = getWindowResult(pWindowResInfo1, leftPos);
H
Haojun Liao 已提交
2813
  tFilePage *page1 = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes1->pos.pageId);
2814

H
Haojun Liao 已提交
2815
  char *b1 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes1, page1);
2816
  TSKEY leftTimestamp = GET_INT64_VAL(b1);
2817

H
hjxilinx 已提交
2818
  SWindowResInfo *pWindowResInfo2 = &supporter->pTableQueryInfo[right]->windowResInfo;
2819
  SWindowResult * pWindowRes2 = getWindowResult(pWindowResInfo2, rightPos);
H
Haojun Liao 已提交
2820
  tFilePage *page2 = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes2->pos.pageId);
2821

H
Haojun Liao 已提交
2822
  char *b2 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes2, page2);
2823
  TSKEY rightTimestamp = GET_INT64_VAL(b2);
2824

2825 2826 2827
  if (leftTimestamp == rightTimestamp) {
    return 0;
  }
2828

2829 2830 2831
  return leftTimestamp > rightTimestamp ? 1 : -1;
}

2832
int32_t mergeIntoGroupResult(SQInfo *pQInfo) {
H
Haojun Liao 已提交
2833
  int64_t st = taosGetTimestampUs();
2834
  int32_t ret = TSDB_CODE_SUCCESS;
2835

S
TD-1057  
Shengliang Guan 已提交
2836
  int32_t numOfGroups = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
2837

2838
  while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
2839
    SArray *group = GET_TABLEGROUP(pQInfo, pQInfo->groupIndex);
2840
    ret = mergeIntoGroupResultImpl(pQInfo, group);
2841 2842 2843 2844
    if (ret < 0) {  // not enough disk space to save the data into disk
      return -1;
    }

2845
    pQInfo->groupIndex += 1;
2846 2847

    // this group generates at least one result, return results
2848 2849 2850
    if (ret > 0) {
      break;
    }
2851

H
Haojun Liao 已提交
2852
    assert(pQInfo->groupResInfo.numOfDataPages == 0);
2853
    qDebug("QInfo:%p no result in group %d, continue", pQInfo, pQInfo->groupIndex - 1);
2854
  }
2855

H
Haojun Liao 已提交
2856 2857
  SGroupResInfo* info = &pQInfo->groupResInfo;
  if (pQInfo->groupIndex == numOfGroups && info->pos.pageId == info->numOfDataPages) {
H
Haojun Liao 已提交
2858 2859 2860
    SET_STABLE_QUERY_OVER(pQInfo);
  }

H
Haojun Liao 已提交
2861 2862 2863
  int64_t elapsedTime = taosGetTimestampUs() - st;
  qDebug("QInfo:%p merge res data into group, index:%d, total group:%d, elapsed time:%" PRId64 "us", pQInfo,
         pQInfo->groupIndex - 1, numOfGroups, elapsedTime);
2864

H
Haojun Liao 已提交
2865
  pQInfo->runtimeEnv.summary.firstStageMergeTime += elapsedTime;
2866 2867 2868 2869
  return TSDB_CODE_SUCCESS;
}

void copyResToQueryResultBuf(SQInfo *pQInfo, SQuery *pQuery) {
H
Haojun Liao 已提交
2870 2871 2872 2873 2874
  SGroupResInfo* pGroupResInfo = &pQInfo->groupResInfo;

  // all results have been return to client, try next group
  if (pGroupResInfo->pos.pageId == pGroupResInfo->numOfDataPages) {
    pGroupResInfo->numOfDataPages = 0;
H
Haojun Liao 已提交
2875
    pGroupResInfo->pos.pageId = 0;
H
Haojun Liao 已提交
2876
    pGroupResInfo->pos.rowId = 0;
2877

2878
    // current results of group has been sent to client, try next group
2879
    if (mergeIntoGroupResult(pQInfo) != TSDB_CODE_SUCCESS) {
2880 2881
      return;  // failed to save data in the disk
    }
2882

2883
    // check if all results has been sent to client
S
TD-1057  
Shengliang Guan 已提交
2884
    int32_t numOfGroup = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
H
Haojun Liao 已提交
2885
    if (pGroupResInfo->numOfDataPages == 0 && pQInfo->groupIndex == numOfGroup) {
H
Haojun Liao 已提交
2886
      SET_STABLE_QUERY_OVER(pQInfo);
2887 2888
      return;
    }
2889
  }
2890 2891

  SQueryRuntimeEnv *   pRuntimeEnv = &pQInfo->runtimeEnv;
2892
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
2893

H
Haojun Liao 已提交
2894 2895
  int32_t id = pQInfo->groupResInfo.groupId;
  SIDList list = getDataBufPagesIdList(pResultBuf, id);
2896

2897
  int32_t offset = 0;
H
Haojun Liao 已提交
2898 2899 2900 2901 2902 2903 2904
  int32_t numOfCopiedRows = 0;

  size_t size = taosArrayGetSize(list);
  assert(size == pGroupResInfo->numOfDataPages);

  bool done = false;
  for (int32_t j = pGroupResInfo->pos.pageId; j < size; ++j) {
H
Haojun Liao 已提交
2905
    SPageInfo* pi = *(SPageInfo**) taosArrayGet(list, j);
H
Haojun Liao 已提交
2906 2907 2908
    tFilePage* pData = getResBufPage(pResultBuf, pi->pageId);

    assert(pData->num > 0 && pData->num <= pRuntimeEnv->numOfRowsPerPage && pGroupResInfo->pos.rowId < pData->num);
2909
    int32_t numOfRes = (int32_t)(pData->num - pGroupResInfo->pos.rowId);
H
Haojun Liao 已提交
2910 2911

    if (numOfRes > pQuery->rec.capacity - offset) {
2912
      numOfCopiedRows = (int32_t)(pQuery->rec.capacity - offset);
H
Haojun Liao 已提交
2913 2914 2915
      pGroupResInfo->pos.rowId += numOfCopiedRows;
      done = true;
    } else {
2916
      numOfCopiedRows = (int32_t)pData->num;
H
Haojun Liao 已提交
2917 2918 2919 2920

      pGroupResInfo->pos.pageId += 1;
      pGroupResInfo->pos.rowId = 0;
    }
2921

2922
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2923
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
2924
      char *  pDest = pQuery->sdata[i]->data;
2925

H
Haojun Liao 已提交
2926 2927
      memcpy(pDest + offset * bytes, pData->data + pRuntimeEnv->offset[i] * pRuntimeEnv->numOfRowsPerPage,
          (size_t)bytes * numOfCopiedRows);
2928
    }
2929

H
Haojun Liao 已提交
2930 2931 2932 2933
    offset += numOfCopiedRows;
    if (done) {
      break;
    }
2934
  }
2935

2936
  assert(pQuery->rec.rows == 0);
H
Haojun Liao 已提交
2937
  pQuery->rec.rows += offset;
2938 2939
}

H
Haojun Liao 已提交
2940
int64_t getNumOfResultWindowRes(SQuery *pQuery, SWindowResult *pWindowRes) {
2941
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
2942
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
2943

2944 2945 2946 2947 2948 2949 2950
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
      continue;
    }
2951

2952
    SResultInfo *pResultInfo = &pWindowRes->resultInfo[j];
H
Haojun Liao 已提交
2953
    assert(pResultInfo != NULL);
2954

H
Haojun Liao 已提交
2955 2956
    if (pResultInfo->numOfRes > 0) {
      return pResultInfo->numOfRes;
2957 2958
    }
  }
2959

H
Haojun Liao 已提交
2960
  return 0;
2961 2962
}

2963
int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *pGroup) {
2964
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
2965
  SQuery *          pQuery = pRuntimeEnv->pQuery;
2966

2967
  size_t size = taosArrayGetSize(pGroup);
2968
  tFilePage **buffer = pQuery->sdata;
2969

H
Haojun Liao 已提交
2970
  int32_t *posList = calloc(size, sizeof(int32_t));
H
hjxilinx 已提交
2971
  STableQueryInfo **pTableList = malloc(POINTER_BYTES * size);
2972

2973
  if (pTableList == NULL || posList == NULL) {
S
Shengliang Guan 已提交
2974 2975
    taosTFree(posList);
    taosTFree(pTableList);
2976 2977

    qError("QInfo:%p failed alloc memory", pQInfo);
H
Haojun Liao 已提交
2978
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
2979 2980
  }

2981
  // todo opt for the case of one table per group
2982
  int32_t numOfTables = 0;
H
Haojun Liao 已提交
2983 2984 2985
  SIDList pageList = NULL;
  int32_t tid = -1;

2986
  for (int32_t i = 0; i < size; ++i) {
2987
    STableQueryInfo *item = taosArrayGetP(pGroup, i);
2988

H
Haojun Liao 已提交
2989
    SIDList list = getDataBufPagesIdList(pRuntimeEnv->pResultBuf, TSDB_TABLEID(item->pTable)->tid);
H
Haojun Liao 已提交
2990
    if (taosArrayGetSize(list) > 0 && item->windowResInfo.size > 0) {
H
Haojun Liao 已提交
2991
      pTableList[numOfTables++] = item;
2992 2993
      tid = TSDB_TABLEID(item->pTable)->tid;
      pageList = list;
2994 2995
    }
  }
2996

H
Haojun Liao 已提交
2997
  // there is no data in current group
2998
  if (numOfTables == 0) {
S
Shengliang Guan 已提交
2999 3000
    taosTFree(posList);
    taosTFree(pTableList);
3001
    return 0;
H
Haojun Liao 已提交
3002
  } else if (numOfTables == 1) { // no need to merge results since only one table in each group
H
Haojun Liao 已提交
3003 3004 3005 3006 3007
    taosTFree(posList);
    taosTFree(pTableList);

    SGroupResInfo* pGroupResInfo = &pQInfo->groupResInfo;

3008
    pGroupResInfo->numOfDataPages = (int32_t)taosArrayGetSize(pageList);
H
Haojun Liao 已提交
3009 3010 3011 3012 3013
    pGroupResInfo->groupId = tid;
    pGroupResInfo->pos.pageId = 0;
    pGroupResInfo->pos.rowId = 0;

    return pGroupResInfo->numOfDataPages;
3014
  }
3015

3016
  SCompSupporter cs = {pTableList, posList, pQInfo};
3017

3018
  SLoserTreeInfo *pTree = NULL;
3019
  tLoserTreeCreate(&pTree, numOfTables, &cs, tableResultComparFn);
3020

3021
  SResultInfo *pResultInfo = calloc(pQuery->numOfOutput, sizeof(SResultInfo));
H
Haojun Liao 已提交
3022 3023 3024 3025
  if (pResultInfo == NULL) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }

H
Haojun Liao 已提交
3026
  char* buf = calloc(1, pRuntimeEnv->interBufSize);
H
Haojun Liao 已提交
3027 3028 3029 3030
  if (buf == NULL) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }

H
Haojun Liao 已提交
3031
  setWindowResultInfo(pResultInfo, pQuery, pRuntimeEnv->stableQuery, buf);
3032
  resetMergeResultBuf(pQuery, pRuntimeEnv->pCtx, pResultInfo);
3033

H
Haojun Liao 已提交
3034 3035
  pQInfo->groupResInfo.groupId = getGroupResultId(pQInfo->groupIndex);

H
Haojun Liao 已提交
3036
  // todo add windowRes iterator
3037 3038
  int64_t lastTimestamp = -1;
  int64_t startt = taosGetTimestampMs();
3039

3040
  while (1) {
3041 3042
    if (IS_QUERY_KILLED(pQInfo)) {
      qDebug("QInfo:%p it is already killed, abort", pQInfo);
H
Haojun Liao 已提交
3043 3044 3045 3046 3047 3048 3049

      taosTFree(pTableList);
      taosTFree(posList);
      taosTFree(pTree);
      taosTFree(pResultInfo);
      taosTFree(buf);

3050 3051 3052
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
    }

3053
    int32_t pos = pTree->pNode[0].index;
3054

H
hjxilinx 已提交
3055
    SWindowResInfo *pWindowResInfo = &pTableList[pos]->windowResInfo;
H
Haojun Liao 已提交
3056
    SWindowResult  *pWindowRes = getWindowResult(pWindowResInfo, cs.position[pos]);
H
Haojun Liao 已提交
3057
    tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes->pos.pageId);
3058

H
Haojun Liao 已提交
3059
    char *b = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes, page);
3060
    TSKEY ts = GET_INT64_VAL(b);
3061

3062
    assert(ts == pWindowRes->win.skey);
H
Haojun Liao 已提交
3063
    int64_t num = getNumOfResultWindowRes(pQuery, pWindowRes);
3064 3065
    if (num <= 0) {
      cs.position[pos] += 1;
3066

3067 3068
      if (cs.position[pos] >= pWindowResInfo->size) {
        cs.position[pos] = -1;
3069

3070
        // all input sources are exhausted
3071
        if (--numOfTables == 0) {
3072 3073 3074 3075 3076 3077 3078
          break;
        }
      }
    } else {
      if (ts == lastTimestamp) {  // merge with the last one
        doMerge(pRuntimeEnv, ts, pWindowRes, true);
      } else {  // copy data to disk buffer
3079
        if (buffer[0]->num == pQuery->rec.capacity) {
H
Haojun Liao 已提交
3080
          if (flushFromResultBuf(pRuntimeEnv, &pQInfo->groupResInfo) != TSDB_CODE_SUCCESS) {
3081 3082
            return -1;
          }
3083

3084 3085
          resetMergeResultBuf(pQuery, pRuntimeEnv->pCtx, pResultInfo);
        }
3086

3087
        doMerge(pRuntimeEnv, ts, pWindowRes, false);
3088
        buffer[0]->num += 1;
3089
      }
3090

3091
      lastTimestamp = ts;
3092

H
Haojun Liao 已提交
3093 3094 3095
      // move to the next element of current entry
      int32_t currentPageId = pWindowRes->pos.pageId;

3096 3097 3098
      cs.position[pos] += 1;
      if (cs.position[pos] >= pWindowResInfo->size) {
        cs.position[pos] = -1;
3099

3100
        // all input sources are exhausted
3101
        if (--numOfTables == 0) {
3102 3103
          break;
        }
H
Haojun Liao 已提交
3104 3105 3106 3107 3108 3109
      } else {
        // current page is not needed anymore
        SWindowResult  *pNextWindowRes = getWindowResult(pWindowResInfo, cs.position[pos]);
        if (pNextWindowRes->pos.pageId != currentPageId) {
          releaseResBufPage(pRuntimeEnv->pResultBuf, page);
        }
3110 3111
      }
    }
3112

3113 3114
    tLoserTreeAdjust(pTree, pos + pTree->numOfEntries);
  }
3115

3116
  if (buffer[0]->num != 0) {  // there are data in buffer
H
Haojun Liao 已提交
3117
    if (flushFromResultBuf(pRuntimeEnv, &pQInfo->groupResInfo) != TSDB_CODE_SUCCESS) {
S
slguan 已提交
3118
      qError("QInfo:%p failed to flush data into temp file, abort query", pQInfo);
3119

S
Shengliang Guan 已提交
3120 3121 3122 3123
      taosTFree(pTree);
      taosTFree(pTableList);
      taosTFree(posList);
      taosTFree(pResultInfo);
3124

3125 3126 3127
      return -1;
    }
  }
3128

3129 3130 3131
  int64_t endt = taosGetTimestampMs();

#ifdef _DEBUG_VIEW
3132
  displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
3133
#endif
3134

3135
  qDebug("QInfo:%p result merge completed for group:%d, elapsed time:%" PRId64 " ms", pQInfo, pQInfo->groupIndex, endt - startt);
3136

S
Shengliang Guan 已提交
3137 3138 3139
  taosTFree(pTableList);
  taosTFree(posList);
  taosTFree(pTree);
3140

S
Shengliang Guan 已提交
3141 3142
  taosTFree(pResultInfo);
  taosTFree(buf);
H
Haojun Liao 已提交
3143 3144

  return pQInfo->groupResInfo.numOfDataPages;
3145 3146
}

H
Haojun Liao 已提交
3147 3148
int32_t flushFromResultBuf(SQueryRuntimeEnv* pRuntimeEnv, SGroupResInfo* pGroupResInfo) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3149

3150
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
3151

3152 3153
  // the base value for group result, since the maximum number of table for each vnode will not exceed 100,000.
  int32_t pageId = -1;
3154
  int32_t capacity = pResultBuf->numOfRowsPerPage;
3155

H
Haojun Liao 已提交
3156
  int32_t remain = (int32_t) pQuery->sdata[0]->num;
3157
  int32_t offset = 0;
3158

3159
  while (remain > 0) {
H
Haojun Liao 已提交
3160 3161
    int32_t rows = (remain > capacity)? capacity:remain;
    assert(rows > 0);
3162

H
Haojun Liao 已提交
3163 3164 3165
    // get the output buffer page
    tFilePage *buf = getNewDataBuf(pResultBuf, pGroupResInfo->groupId, &pageId);
    buf->num = rows;
3166

H
Haojun Liao 已提交
3167
    // pagewisely copy to dest buffer
3168
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3169
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
3170

H
Haojun Liao 已提交
3171 3172
      char* output = buf->data + pRuntimeEnv->offset[i] * pRuntimeEnv->numOfRowsPerPage;
      char* src = ((char *) pQuery->sdata[i]->data) + offset * bytes;
3173
      memcpy(output, src, (size_t)(buf->num * bytes));
3174
    }
3175

H
Haojun Liao 已提交
3176 3177 3178 3179
    offset += rows;
    remain -= rows;

    pGroupResInfo->numOfDataPages += 1;
3180
  }
3181

3182 3183 3184 3185
  return TSDB_CODE_SUCCESS;
}

void resetMergeResultBuf(SQuery *pQuery, SQLFunctionCtx *pCtx, SResultInfo *pResultInfo) {
3186
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
3187
    pCtx[k].aOutputBuf = pQuery->sdata[k]->data - pCtx[k].outputBytes;
3188 3189 3190
    pCtx[k].size = 1;
    pCtx[k].startOffset = 0;
    pCtx[k].resultInfo = &pResultInfo[k];
3191

3192
    pQuery->sdata[k]->num = 0;
3193 3194 3195
  }
}

3196 3197 3198 3199
static void updateTableQueryInfoForReverseScan(SQuery *pQuery, STableQueryInfo *pTableQueryInfo) {
  if (pTableQueryInfo == NULL) {
    return;
  }
3200

H
Haojun Liao 已提交
3201
  // order has changed already
3202
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
3203

H
Haojun Liao 已提交
3204 3205 3206 3207 3208 3209
  // TODO validate the assertion
//  if (!QUERY_IS_ASC_QUERY(pQuery)) {
//    assert(pTableQueryInfo->win.ekey >= pTableQueryInfo->lastKey + step);
//  } else {
//    assert(pTableQueryInfo->win.ekey <= pTableQueryInfo->lastKey + step);
//  }
H
Haojun Liao 已提交
3210 3211 3212 3213 3214 3215 3216

  if (pTableQueryInfo->lastKey == pTableQueryInfo->win.skey) {
    // do nothing, no results
  } else {
    pTableQueryInfo->win.ekey = pTableQueryInfo->lastKey + step;
  }

3217 3218
  SWAP(pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, TSKEY);
  pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
3219

3220 3221
  SWITCH_ORDER(pTableQueryInfo->cur.order);
  pTableQueryInfo->cur.vgroupIndex = -1;
H
Haojun Liao 已提交
3222 3223 3224

  // set the index at the end of time window
  pTableQueryInfo->windowResInfo.curIndex = pTableQueryInfo->windowResInfo.size - 1;
3225 3226 3227 3228
}

static void disableFuncInReverseScanImpl(SQInfo* pQInfo, SWindowResInfo *pWindowResInfo, int32_t order) {
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
3229

3230
  for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
3231 3232
    bool closed = getTimeWindowResStatus(pWindowResInfo, i);
    if (!closed) {
3233 3234
      continue;
    }
3235

3236
    SWindowResult *buf = getWindowResult(pWindowResInfo, i);
3237

3238
    // open/close the specified query for each group result
3239
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3240
      int32_t functId = pQuery->pSelectExpr[j].base.functionId;
3241

3242 3243
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
3244 3245 3246 3247 3248 3249 3250 3251
        buf->resultInfo[j].complete = false;
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
        buf->resultInfo[j].complete = true;
      }
    }
  }
}

3252 3253
void disableFuncInReverseScan(SQInfo *pQInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
3254
  SQuery *pQuery = pRuntimeEnv->pQuery;
3255
  int32_t order = pQuery->order.order;
3256

3257 3258
  // group by normal columns and interval query on normal table
  SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
3259
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3260
    disableFuncInReverseScanImpl(pQInfo, pWindowResInfo, order);
3261
  } else {  // for simple result of table query,
3262
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {  // todo refactor
3263
      int32_t functId = pQuery->pSelectExpr[j].base.functionId;
3264

3265
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[j];
3266 3267 3268
      if (pCtx->resultInfo == NULL) {
        continue; // resultInfo is NULL, means no data checked in previous scan
      }
3269

3270 3271
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
3272 3273 3274 3275 3276 3277
        pCtx->resultInfo->complete = false;
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
        pCtx->resultInfo->complete = true;
      }
    }
  }
H
Haojun Liao 已提交
3278 3279 3280 3281
}

static void setupQueryRangeForReverseScan(SQInfo* pQInfo) {
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
S
Shengliang Guan 已提交
3282
  int32_t numOfGroups = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
H
Haojun Liao 已提交
3283

H
hjxilinx 已提交
3284
  for(int32_t i = 0; i < numOfGroups; ++i) {
H
Haojun Liao 已提交
3285
    SArray *group = GET_TABLEGROUP(pQInfo, i);
H
Haojun Liao 已提交
3286 3287
    SArray *tableKeyGroup = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, i);

H
hjxilinx 已提交
3288 3289
    size_t t = taosArrayGetSize(group);
    for (int32_t j = 0; j < t; ++j) {
3290 3291
      STableQueryInfo *pCheckInfo = taosArrayGetP(group, j);
      updateTableQueryInfoForReverseScan(pQuery, pCheckInfo);
H
Haojun Liao 已提交
3292

H
Haojun Liao 已提交
3293 3294
      // update the last key in tableKeyInfo list, the tableKeyInfo is used to build the tsdbQueryHandle and decide
      // the start check timestamp of tsdbQueryHandle
H
Haojun Liao 已提交
3295 3296 3297 3298
      STableKeyInfo *pTableKeyInfo = taosArrayGet(tableKeyGroup, j);
      pTableKeyInfo->lastKey = pCheckInfo->lastKey;

      assert(pCheckInfo->pTable == pTableKeyInfo->pTable);
H
hjxilinx 已提交
3299 3300
    }
  }
3301 3302
}

3303
void switchCtxOrder(SQueryRuntimeEnv *pRuntimeEnv) {
3304
  SQuery *pQuery = pRuntimeEnv->pQuery;
3305
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
3306
    SWITCH_ORDER(pRuntimeEnv->pCtx[i].order);
3307 3308 3309
  }
}

H
Haojun Liao 已提交
3310
int32_t createQueryResultInfo(SQuery *pQuery, SWindowResult *pResultRow, bool isSTableQuery, size_t interBufSize) {
3311
  int32_t numOfCols = pQuery->numOfOutput;
3312

H
Haojun Liao 已提交
3313 3314
  size_t size = numOfCols * sizeof(SResultInfo) + interBufSize;
  pResultRow->resultInfo = calloc(1, size);
B
Bomin Zhang 已提交
3315 3316 3317
  if (pResultRow->resultInfo == NULL) {
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
  }
3318

H
Haojun Liao 已提交
3319
  pResultRow->pos = (SPosInfo) {-1, -1};
3320

H
Haojun Liao 已提交
3321
  char* buf = (char*) pResultRow->resultInfo + numOfCols * sizeof(SResultInfo);
H
Haojun Liao 已提交
3322

3323
  // set the intermediate result output buffer
H
Haojun Liao 已提交
3324
  setWindowResultInfo(pResultRow->resultInfo, pQuery, isSTableQuery, buf);
B
Bomin Zhang 已提交
3325
  return TSDB_CODE_SUCCESS;
3326 3327 3328 3329
}

void resetCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3330

3331
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3332 3333
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
    pCtx->aOutputBuf = pQuery->sdata[i]->data;
3334

3335 3336 3337 3338
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT/TAGPRJ/PRJ/TAG etc.
     */
3339
    RESET_RESULT_INFO(&pRuntimeEnv->resultInfo[i]);
3340
    pCtx->resultInfo = &pRuntimeEnv->resultInfo[i];
3341

3342
    // set the timestamp output buffer for top/bottom/diff query
3343
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3344 3345 3346
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3347

3348
    memset(pQuery->sdata[i]->data, 0, (size_t)(pQuery->pSelectExpr[i].bytes * pQuery->rec.capacity));
3349
  }
3350

3351 3352 3353 3354 3355
  initCtxOutputBuf(pRuntimeEnv);
}

void forwardCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, int64_t output) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3356

3357
  // reset the execution contexts
3358
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3359
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
3360
    assert(functionId != TSDB_FUNC_DIFF);
3361

3362 3363 3364 3365
    // set next output position
    if (IS_OUTER_FORWARD(aAggs[functionId].nStatus)) {
      pRuntimeEnv->pCtx[j].aOutputBuf += pRuntimeEnv->pCtx[j].outputBytes * output;
    }
3366

3367 3368 3369 3370 3371 3372 3373 3374
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      /*
       * NOTE: for top/bottom query, the value of first column of output (timestamp) are assigned
       * in the procedure of top/bottom routine
       * the output buffer in top/bottom routine is ptsOutputBuf, so we need to forward the output buffer
       *
       * diff function is handled in multi-output function
       */
S
TD-1057  
Shengliang Guan 已提交
3375
      pRuntimeEnv->pCtx[j].ptsOutputBuf = (char*)pRuntimeEnv->pCtx[j].ptsOutputBuf + TSDB_KEYSIZE * output;
3376
    }
3377

3378
    RESET_RESULT_INFO(pRuntimeEnv->pCtx[j].resultInfo);
3379 3380 3381 3382 3383
  }
}

void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3384

3385
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3386
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
3387
    pRuntimeEnv->pCtx[j].currentStage = 0;
3388

H
Haojun Liao 已提交
3389 3390 3391 3392
    SResultInfo* pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
    if (pResInfo->initialized) {
      continue;
    }
3393

3394 3395 3396 3397
    aAggs[functionId].init(&pRuntimeEnv->pCtx[j]);
  }
}

3398
void skipResults(SQueryRuntimeEnv *pRuntimeEnv) {
3399
  SQuery *pQuery = pRuntimeEnv->pQuery;
3400
  if (pQuery->rec.rows == 0 || pQuery->limit.offset == 0) {
3401 3402
    return;
  }
3403

3404
  if (pQuery->rec.rows <= pQuery->limit.offset) {
3405
    qDebug("QInfo:%p skip rows:%" PRId64 ", new offset:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), pQuery->rec.rows,
3406
        pQuery->limit.offset - pQuery->rec.rows);
3407

3408 3409
    pQuery->limit.offset -= pQuery->rec.rows;
    pQuery->rec.rows = 0;
3410

3411
    resetCtxOutputBuf(pRuntimeEnv);
3412

H
Haojun Liao 已提交
3413
    // clear the buffer full flag if exists
3414
    CLEAR_QUERY_STATUS(pQuery, QUERY_RESBUF_FULL);
3415
  } else {
3416
    int64_t numOfSkip = pQuery->limit.offset;
3417
    pQuery->rec.rows -= numOfSkip;
3418
    pQuery->limit.offset = 0;
3419

3420
    qDebug("QInfo:%p skip row:%"PRId64", new offset:%d, numOfRows remain:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), numOfSkip,
3421
           0, pQuery->rec.rows);
3422

3423
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3424
      int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3425
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
H
Haojun Liao 已提交
3426

3427
      memmove(pQuery->sdata[i]->data, (char*)pQuery->sdata[i]->data + bytes * numOfSkip, (size_t)(pQuery->rec.rows * bytes));
H
Haojun Liao 已提交
3428
      pRuntimeEnv->pCtx[i].aOutputBuf = ((char*) pQuery->sdata[i]->data) + pQuery->rec.rows * bytes;
3429

3430
      if (functionId == TSDB_FUNC_DIFF || functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
3431
        pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
3432 3433
      }
    }
3434

S
TD-1057  
Shengliang Guan 已提交
3435
    updateNumOfResult(pRuntimeEnv, (int32_t)pQuery->rec.rows);
3436 3437 3438 3439 3440 3441 3442 3443
  }
}

void setQueryStatus(SQuery *pQuery, int8_t status) {
  if (status == QUERY_NOT_COMPLETED) {
    pQuery->status = status;
  } else {
    // QUERY_NOT_COMPLETED is not compatible with any other status, so clear its position first
3444
    CLEAR_QUERY_STATUS(pQuery, QUERY_NOT_COMPLETED);
3445 3446 3447 3448 3449 3450
    pQuery->status |= status;
  }
}

bool needScanDataBlocksAgain(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3451

H
hjxilinx 已提交
3452
  bool toContinue = false;
H
Haojun Liao 已提交
3453
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3454 3455
    // for each group result, call the finalize function for each column
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
3456

3457 3458
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *pResult = getWindowResult(pWindowResInfo, i);
3459
      if (!pResult->closed) {
3460 3461
        continue;
      }
3462

3463
      setWindowResOutputBuf(pRuntimeEnv, pResult);
3464

3465
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3466
        int16_t functId = pQuery->pSelectExpr[j].base.functionId;
3467 3468 3469
        if (functId == TSDB_FUNC_TS) {
          continue;
        }
3470

3471 3472
        aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
        SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3473

3474 3475 3476 3477
        toContinue |= (!pResInfo->complete);
      }
    }
  } else {
3478
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3479
      int16_t functId = pQuery->pSelectExpr[j].base.functionId;
3480 3481 3482
      if (functId == TSDB_FUNC_TS) {
        continue;
      }
3483

3484 3485
      aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
      SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3486

3487 3488 3489
      toContinue |= (!pResInfo->complete);
    }
  }
3490

3491 3492 3493
  return toContinue;
}

H
Haojun Liao 已提交
3494
static SQueryStatusInfo getQueryStatusInfo(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
3495
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3496
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3497

H
Haojun Liao 已提交
3498 3499
  assert((start <= pTableQueryInfo->lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (start >= pTableQueryInfo->lastKey && !QUERY_IS_ASC_QUERY(pQuery)));
3500

3501
  SQueryStatusInfo info = {
H
hjxilinx 已提交
3502
      .status      = pQuery->status,
3503
      .windowIndex = pRuntimeEnv->windowResInfo.curIndex,
H
Haojun Liao 已提交
3504
      .lastKey     = start,
3505
  };
3506

S
TD-1057  
Shengliang Guan 已提交
3507 3508 3509 3510 3511
  TIME_WINDOW_COPY(info.w, pQuery->window);
  TIME_WINDOW_COPY(info.curWindow, pTableQueryInfo->win);

  info.curWindow.skey = start;

3512 3513 3514
  return info;
}

3515 3516 3517 3518
static void setEnvBeforeReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQInfo *pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery *pQuery = pRuntimeEnv->pQuery;

3519 3520 3521
  pStatus->cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);  // save the cursor
  if (pRuntimeEnv->pTSBuf) {
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
3522 3523
    bool ret = tsBufNextPos(pRuntimeEnv->pTSBuf);
    assert(ret);
3524
  }
3525

3526
  // reverse order time range
3527 3528 3529
  pQuery->window = pStatus->curWindow;
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);

3530
  SWITCH_ORDER(pQuery->order.order);
3531 3532 3533 3534 3535 3536 3537

  if (QUERY_IS_ASC_QUERY(pQuery)) {
    assert(pQuery->window.skey <= pQuery->window.ekey);
  } else {
    assert(pQuery->window.skey >= pQuery->window.ekey);
  }

3538
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
3539

3540
  STsdbQueryCond cond = {
H
hjxilinx 已提交
3541
      .order   = pQuery->order.order,
3542
      .colList = pQuery->colList,
3543 3544
      .numOfCols = pQuery->numOfCols,
  };
3545

S
TD-1057  
Shengliang Guan 已提交
3546 3547
  TIME_WINDOW_COPY(cond.twindow, pQuery->window);

H
Haojun Liao 已提交
3548 3549 3550 3551 3552
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
  disableFuncInReverseScan(pQInfo);
  setupQueryRangeForReverseScan(pQInfo);

3553 3554 3555 3556
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
3557

3558
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
B
Bomin Zhang 已提交
3559 3560 3561
  if (pRuntimeEnv->pSecQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
3562 3563
}

3564 3565
static void clearEnvAfterReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3566
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3567

3568 3569
  SWITCH_ORDER(pQuery->order.order);
  switchCtxOrder(pRuntimeEnv);
3570

3571 3572 3573 3574
  tsBufSetCursor(pRuntimeEnv->pTSBuf, &pStatus->cur);
  if (pRuntimeEnv->pTSBuf) {
    pRuntimeEnv->pTSBuf->cur.order = pQuery->order.order;
  }
3575

3576
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
3577

3578
  // update the pQuery->window.skey and pQuery->window.ekey to limit the scan scope of sliding query during reverse scan
H
hjxilinx 已提交
3579
  pTableQueryInfo->lastKey = pStatus->lastKey;
3580
  pQuery->status = pStatus->status;
3581

H
hjxilinx 已提交
3582
  pTableQueryInfo->win = pStatus->w;
3583
  pQuery->window = pTableQueryInfo->win;
3584 3585
}

H
Haojun Liao 已提交
3586 3587 3588 3589 3590 3591 3592
static void restoreTimeWindow(STableGroupInfo* pTableGroupInfo, STsdbQueryCond* pCond) {
  assert(pTableGroupInfo->numOfTables == 1);
  SArray* pTableKeyGroup = taosArrayGetP(pTableGroupInfo->pGroupList, 0);
  STableKeyInfo* pKeyInfo = taosArrayGet(pTableKeyGroup, 0);
  pKeyInfo->lastKey = pCond->twindow.skey;
}

3593
void scanOneTableDataBlocks(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
H
hjxilinx 已提交
3594
  SQInfo *pQInfo = (SQInfo *) GET_QINFO_ADDR(pRuntimeEnv);
3595
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3596
  STableQueryInfo *pTableQueryInfo = pQuery->current;
3597

3598
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
3599

3600
  // store the start query position
H
Haojun Liao 已提交
3601
  SQueryStatusInfo qstatus = getQueryStatusInfo(pRuntimeEnv, start);
3602

3603 3604
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
3605

3606 3607
  while (1) {
    doScanAllDataBlocks(pRuntimeEnv);
3608

3609 3610
    if (pRuntimeEnv->scanFlag == MASTER_SCAN) {
      qstatus.status = pQuery->status;
3611 3612 3613 3614 3615 3616

      // do nothing if no data blocks are found qualified during scan
      if (qstatus.lastKey != pTableQueryInfo->lastKey) {
        qstatus.curWindow.ekey = pTableQueryInfo->lastKey - step;
      }

3617
      qstatus.lastKey = pTableQueryInfo->lastKey;
3618
    }
3619

3620
    if (!needScanDataBlocksAgain(pRuntimeEnv)) {
3621
      // restore the status code and jump out of loop
3622
      if (pRuntimeEnv->scanFlag == REPEAT_SCAN) {
3623
        pQuery->status = qstatus.status;
3624
      }
3625

3626 3627
      break;
    }
3628

3629
    STsdbQueryCond cond = {
H
hjxilinx 已提交
3630
        .order   = pQuery->order.order,
3631
        .colList = pQuery->colList,
3632
        .numOfCols = pQuery->numOfCols,
3633
    };
3634

S
TD-1057  
Shengliang Guan 已提交
3635 3636
    TIME_WINDOW_COPY(cond.twindow, qstatus.curWindow);

3637 3638
    if (pRuntimeEnv->pSecQueryHandle != NULL) {
      tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
3639
    }
3640

H
Haojun Liao 已提交
3641
    restoreTimeWindow(&pQInfo->tableGroupInfo, &cond);
3642
    pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
B
Bomin Zhang 已提交
3643 3644 3645
    if (pRuntimeEnv->pSecQueryHandle == NULL) {
      longjmp(pRuntimeEnv->env, terrno);
    }
3646

3647
    pRuntimeEnv->windowResInfo.curIndex = qstatus.windowIndex;
3648 3649
    setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
    pRuntimeEnv->scanFlag = REPEAT_SCAN;
3650

3651
    qDebug("QInfo:%p start to repeat scan data blocks due to query func required, qrange:%"PRId64"-%"PRId64, pQInfo,
3652
        cond.twindow.skey, cond.twindow.ekey);
3653

3654
    // check if query is killed or not
H
Haojun Liao 已提交
3655
    if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
3656 3657
      finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
3658 3659
    }
  }
3660

H
hjxilinx 已提交
3661
  if (!needReverseScan(pQuery)) {
3662 3663
    return;
  }
3664

3665
  setEnvBeforeReverseScan(pRuntimeEnv, &qstatus);
3666

3667
  // reverse scan from current position
3668
  qDebug("QInfo:%p start to reverse scan", pQInfo);
3669
  doScanAllDataBlocks(pRuntimeEnv);
3670 3671

  clearEnvAfterReverseScan(pRuntimeEnv, &qstatus);
3672 3673
}

H
hjxilinx 已提交
3674
void finalizeQueryResult(SQueryRuntimeEnv *pRuntimeEnv) {
3675
  SQuery *pQuery = pRuntimeEnv->pQuery;
3676

H
Haojun Liao 已提交
3677
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3678 3679
    // for each group result, call the finalize function for each column
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
3680
    if (pRuntimeEnv->groupbyNormalCol) {
3681 3682
      closeAllTimeWindow(pWindowResInfo);
    }
3683

3684 3685 3686 3687 3688
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *buf = &pWindowResInfo->pResult[i];
      if (!isWindowResClosed(pWindowResInfo, i)) {
        continue;
      }
3689

3690
      setWindowResOutputBuf(pRuntimeEnv, buf);
3691

3692
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3693
        aAggs[pQuery->pSelectExpr[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3694
      }
3695

3696 3697 3698 3699
      /*
       * set the number of output results for group by normal columns, the number of output rows usually is 1 except
       * the top and bottom query
       */
S
TD-1057  
Shengliang Guan 已提交
3700
      buf->numOfRows = (uint16_t)getNumOfResult(pRuntimeEnv);
3701
    }
3702

3703
  } else {
3704
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3705
      aAggs[pQuery->pSelectExpr[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3706 3707 3708 3709 3710
    }
  }
}

static bool hasMainOutput(SQuery *pQuery) {
3711
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3712
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3713

3714 3715 3716 3717
    if (functionId != TSDB_FUNC_TS && functionId != TSDB_FUNC_TAG && functionId != TSDB_FUNC_TAGPRJ) {
      return true;
    }
  }
3718

3719 3720 3721
  return false;
}

H
Haojun Liao 已提交
3722
static STableQueryInfo *createTableQueryInfo(SQueryRuntimeEnv *pRuntimeEnv, void* pTable, STimeWindow win, void* buf) {
H
Haojun Liao 已提交
3723
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
3724

H
Haojun Liao 已提交
3725
  STableQueryInfo *pTableQueryInfo = buf;
3726

H
hjxilinx 已提交
3727 3728
  pTableQueryInfo->win = win;
  pTableQueryInfo->lastKey = win.skey;
3729

3730
  pTableQueryInfo->pTable = pTable;
3731
  pTableQueryInfo->cur.vgroupIndex = -1;
3732

H
Haojun Liao 已提交
3733 3734
  // set more initial size of interval/groupby query
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyNormalCol) {
H
Haojun Liao 已提交
3735
    int32_t initialSize = 16;
H
Haojun Liao 已提交
3736
    int32_t initialThreshold = 100;
B
Bomin Zhang 已提交
3737 3738 3739 3740
    int32_t code = initWindowResInfo(&pTableQueryInfo->windowResInfo, pRuntimeEnv, initialSize, initialThreshold, TSDB_DATA_TYPE_INT);
    if (code != TSDB_CODE_SUCCESS) {
      return NULL;
    }
H
Haojun Liao 已提交
3741
  } else { // in other aggregate query, do not initialize the windowResInfo
H
Haojun Liao 已提交
3742 3743
  }

3744 3745 3746
  return pTableQueryInfo;
}

H
Haojun Liao 已提交
3747
void destroyTableQueryInfoImpl(STableQueryInfo *pTableQueryInfo) {
3748 3749 3750
  if (pTableQueryInfo == NULL) {
    return;
  }
3751

H
Haojun Liao 已提交
3752
  cleanupTimeWindowInfo(&pTableQueryInfo->windowResInfo);
3753 3754 3755 3756 3757
}

/**
 * set output buffer for different group
 * @param pRuntimeEnv
3758
 * @param pDataBlockInfo
3759
 */
H
Haojun Liao 已提交
3760
void setExecutionContext(SQInfo *pQInfo, int32_t groupIndex, TSKEY nextKey) {
3761
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
Haojun Liao 已提交
3762 3763 3764
  STableQueryInfo  *pTableQueryInfo = pRuntimeEnv->pQuery->current;
  SWindowResInfo   *pWindowResInfo = &pRuntimeEnv->windowResInfo;

H
Haojun Liao 已提交
3765 3766
  // lastKey needs to be updated
  pTableQueryInfo->lastKey = nextKey;
H
Haojun Liao 已提交
3767 3768 3769 3770

  if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
    setAdditionalInfo(pQInfo, pTableQueryInfo->pTable, pTableQueryInfo);
  }
H
Haojun Liao 已提交
3771

H
Haojun Liao 已提交
3772 3773 3774
  if (pRuntimeEnv->prevGroupId != INT32_MIN && pRuntimeEnv->prevGroupId == groupIndex) {
    return;
  }
3775

3776 3777
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&groupIndex,
      sizeof(groupIndex), true);
3778 3779 3780
  if (pWindowRes == NULL) {
    return;
  }
3781

3782 3783 3784 3785 3786
  /*
   * not assign result buffer yet, add new result buffer
   * all group belong to one result set, and each group result has different group id so set the id to be one
   */
  if (pWindowRes->pos.pageId == -1) {
3787
    if (addNewWindowResultBuf(pWindowRes, pRuntimeEnv->pResultBuf, groupIndex, pRuntimeEnv->numOfRowsPerPage) !=
3788 3789 3790 3791
        TSDB_CODE_SUCCESS) {
      return;
    }
  }
3792

H
Haojun Liao 已提交
3793 3794
  // record the current active group id
  pRuntimeEnv->prevGroupId = groupIndex;
3795 3796 3797 3798
  setWindowResOutputBuf(pRuntimeEnv, pWindowRes);
  initCtxOutputBuf(pRuntimeEnv);
}

H
Haojun Liao 已提交
3799
void setWindowResOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult) {
3800
  SQuery *pQuery = pRuntimeEnv->pQuery;
3801

3802
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
H
Haojun Liao 已提交
3803 3804
  tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pos.pageId);

3805
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3806
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
H
Haojun Liao 已提交
3807
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult, page);
3808

3809
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3810 3811 3812
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3813

3814 3815 3816 3817 3818
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT
     */
    pCtx->resultInfo = &pResult->resultInfo[i];
3819

3820 3821 3822 3823 3824 3825
    // set super table query flag
    SResultInfo *pResInfo = GET_RES_INFO(pCtx);
    pResInfo->superTableQ = pRuntimeEnv->stableQuery;
  }
}

H
Haojun Liao 已提交
3826 3827
void setWindowResOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3828

H
Haojun Liao 已提交
3829
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
H
Haojun Liao 已提交
3830 3831
  tFilePage* bufPage = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pos.pageId);

H
Haojun Liao 已提交
3832 3833 3834 3835
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];

    pCtx->resultInfo = &pResult->resultInfo[i];
H
Haojun Liao 已提交
3836
    if (pCtx->resultInfo->initialized && pCtx->resultInfo->complete) {
H
Haojun Liao 已提交
3837 3838
      continue;
    }
3839

H
Haojun Liao 已提交
3840
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult, bufPage);
H
Haojun Liao 已提交
3841
    pCtx->currentStage = 0;
3842

H
Haojun Liao 已提交
3843 3844 3845 3846
    int32_t functionId = pCtx->functionId;
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3847

H
Haojun Liao 已提交
3848 3849 3850 3851 3852
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT
     */
    pCtx->resultInfo->superTableQ = pRuntimeEnv->stableQuery;     // set super table query flag
3853

H
Haojun Liao 已提交
3854 3855 3856 3857 3858 3859
    if (!pCtx->resultInfo->initialized) {
      aAggs[functionId].init(pCtx);
    }
  }
}

3860
int32_t setAdditionalInfo(SQInfo *pQInfo, void* pTable, STableQueryInfo *pTableQueryInfo) {
3861
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
3862

3863
  setTagVal(pRuntimeEnv, pTable, pQInfo->tsdb);
3864

3865 3866
  // both the master and supplement scan needs to set the correct ts comp start position
  if (pRuntimeEnv->pTSBuf != NULL) {
3867
    if (pTableQueryInfo->cur.vgroupIndex == -1) {
3868 3869
      tVariantAssign(&pTableQueryInfo->tag, &pRuntimeEnv->pCtx[0].tag);
      tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, 0, &pTableQueryInfo->tag);
3870

3871 3872 3873 3874 3875 3876
      // keep the cursor info of current meter
      pTableQueryInfo->cur = pRuntimeEnv->pTSBuf->cur;
    } else {
      tsBufSetCursor(pRuntimeEnv->pTSBuf, &pTableQueryInfo->cur);
    }
  }
3877

3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889
  return 0;
}

/*
 * There are two cases to handle:
 *
 * 1. Query range is not set yet (queryRangeSet = 0). we need to set the query range info, including pQuery->lastKey,
 *    pQuery->window.skey, and pQuery->eKey.
 * 2. Query range is set and query is in progress. There may be another result with the same query ranges to be
 *    merged during merge stage. In this case, we need the pTableQueryInfo->lastResRows to decide if there
 *    is a previous result generated or not.
 */
H
hjxilinx 已提交
3890
void setIntervalQueryRange(SQInfo *pQInfo, TSKEY key) {
3891 3892
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3893
  STableQueryInfo *pTableQueryInfo = pQuery->current;
3894

3895 3896 3897
  if (pTableQueryInfo->queryRangeSet) {
    pTableQueryInfo->lastKey = key;
  } else {
3898
    pTableQueryInfo->win.skey = key;
3899
    STimeWindow win = {.skey = key, .ekey = pQuery->window.ekey};
3900

3901 3902 3903 3904 3905
    // for too small query range, no data in this interval.
    if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey < pQuery->window.skey)) ||
        (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey < pQuery->window.ekey))) {
      return;
    }
3906

3907 3908 3909 3910 3911 3912
    /**
     * In handling the both ascending and descending order super table query, we need to find the first qualified
     * timestamp of this table, and then set the first qualified start timestamp.
     * In ascending query, key is the first qualified timestamp. However, in the descending order query, additional
     * operations involve.
     */
H
Haojun Liao 已提交
3913
    STimeWindow     w = TSWINDOW_INITIALIZER;
3914
    SWindowResInfo *pWindowResInfo = &pTableQueryInfo->windowResInfo;
3915

H
Haojun Liao 已提交
3916 3917
    TSKEY sk = MIN(win.skey, win.ekey);
    TSKEY ek = MAX(win.skey, win.ekey);
H
Haojun Liao 已提交
3918
    getAlignQueryTimeWindow(pQuery, win.skey, sk, ek, &w);
3919
    pWindowResInfo->startTime = pTableQueryInfo->win.skey;  // windowSKey may be 0 in case of 1970 timestamp
3920

3921 3922
    if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
3923
        assert(win.ekey == pQuery->window.ekey);
3924
      }
3925

3926
      pWindowResInfo->prevSKey = w.skey;
3927
    }
3928

3929
    pTableQueryInfo->queryRangeSet = 1;
3930
    pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
3931 3932 3933 3934
  }
}

bool requireTimestamp(SQuery *pQuery) {
3935
  for (int32_t i = 0; i < pQuery->numOfOutput; i++) {
3936
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949
    if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_NEED_TS) != 0) {
      return true;
    }
  }
  return false;
}

bool needPrimaryTimestampCol(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo) {
  /*
   * 1. if skey or ekey locates in this block, we need to load the timestamp column to decide the precise position
   * 2. if there are top/bottom, first_dst/last_dst functions, we need to load timestamp column in any cases;
   */
  STimeWindow *w = &pDataBlockInfo->window;
H
hjxilinx 已提交
3950
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3951

H
hjxilinx 已提交
3952
  bool loadPrimaryTS = (pTableQueryInfo->lastKey >= w->skey && pTableQueryInfo->lastKey <= w->ekey) ||
3953 3954
                       (pQuery->window.ekey >= w->skey && pQuery->window.ekey <= w->ekey) || requireTimestamp(pQuery);

3955 3956 3957
  return loadPrimaryTS;
}

3958
static int32_t doCopyToSData(SQInfo *pQInfo, SWindowResInfo *pResultInfo, int32_t orderType) {
3959 3960
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
3961

3962 3963 3964
  int32_t numOfResult = 0;
  int32_t startIdx = 0;
  int32_t step = -1;
3965

3966
  qDebug("QInfo:%p start to copy data from windowResInfo to query buf", pQInfo);
3967 3968
  int32_t totalSet = numOfClosedTimeWindow(pResultInfo);
  SWindowResult* result = pResultInfo->pResult;
3969

3970
  if (orderType == TSDB_ORDER_ASC) {
3971
    startIdx = pQInfo->groupIndex;
3972 3973
    step = 1;
  } else {  // desc order copy all data
3974
    startIdx = totalSet - pQInfo->groupIndex - 1;
3975 3976
    step = -1;
  }
3977

H
Haojun Liao 已提交
3978 3979
  SGroupResInfo* pGroupResInfo = &pQInfo->groupResInfo;

3980
  for (int32_t i = startIdx; (i < totalSet) && (i >= 0); i += step) {
3981
    if (result[i].numOfRows == 0) {
3982
      pQInfo->groupIndex += 1;
H
Haojun Liao 已提交
3983
      pGroupResInfo->pos.rowId = 0;
3984 3985
      continue;
    }
3986

H
Haojun Liao 已提交
3987 3988
    int32_t numOfRowsToCopy = result[i].numOfRows - pGroupResInfo->pos.rowId;
    int32_t oldOffset = pGroupResInfo->pos.rowId;
3989

3990
    /*
H
Haojun Liao 已提交
3991 3992
     * current output space is not enough to accommodate all data of this page, only partial results
     * will be copied to SQuery object's result buffer
3993
     */
3994
    if (numOfRowsToCopy > pQuery->rec.capacity - numOfResult) {
H
Haojun Liao 已提交
3995 3996
      numOfRowsToCopy = (int32_t) pQuery->rec.capacity - numOfResult;
      pGroupResInfo->pos.rowId += numOfRowsToCopy;
3997
    } else {
H
Haojun Liao 已提交
3998
      pGroupResInfo->pos.rowId = 0;
3999
      pQInfo->groupIndex += 1;
4000
    }
4001

H
Haojun Liao 已提交
4002 4003
    tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, result[i].pos.pageId);

4004
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
4005
      int32_t size = pRuntimeEnv->pCtx[j].outputBytes;
4006

4007
      char *out = pQuery->sdata[j]->data + numOfResult * size;
H
Haojun Liao 已提交
4008
      char *in = getPosInResultPage(pRuntimeEnv, j, &result[i], page);
4009 4010
      memcpy(out, in + oldOffset * size, size * numOfRowsToCopy);
    }
4011

4012
    numOfResult += numOfRowsToCopy;
4013 4014 4015
    if (numOfResult == pQuery->rec.capacity) {
      break;
    }
4016
  }
4017

4018
  qDebug("QInfo:%p copy data to query buf completed", pQInfo);
4019 4020

#ifdef _DEBUG_VIEW
4021
  displayInterResult(pQuery->sdata, pRuntimeEnv, numOfResult);
4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034
#endif
  return numOfResult;
}

/**
 * copyFromWindowResToSData support copy data in ascending/descending order
 * For interval query of both super table and table, copy the data in ascending order, since the output results are
 * ordered in SWindowResutl already. While handling the group by query for both table and super table,
 * all group result are completed already.
 *
 * @param pQInfo
 * @param result
 */
4035
void copyFromWindowResToSData(SQInfo *pQInfo, SWindowResInfo *pResultInfo) {
4036
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
4037

4038
  int32_t orderType = (pQuery->pGroupbyExpr != NULL) ? pQuery->pGroupbyExpr->orderType : TSDB_ORDER_ASC;
4039
  int32_t numOfResult = doCopyToSData(pQInfo, pResultInfo, orderType);
4040

4041
  pQuery->rec.rows += numOfResult;
4042

4043
  assert(pQuery->rec.rows <= pQuery->rec.capacity);
4044 4045
}

H
Haojun Liao 已提交
4046
static void updateWindowResNumOfRes(SQueryRuntimeEnv *pRuntimeEnv) {
4047
  SQuery *pQuery = pRuntimeEnv->pQuery;
4048

4049
  // update the number of result for each, only update the number of rows for the corresponding window result.
H
Haojun Liao 已提交
4050 4051 4052
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
    return;
  }
4053

H
Haojun Liao 已提交
4054 4055
  for (int32_t i = 0; i < pRuntimeEnv->windowResInfo.size; ++i) {
    SWindowResult *pResult = &pRuntimeEnv->windowResInfo.pResult[i];
4056

H
Haojun Liao 已提交
4057 4058 4059 4060
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
      int32_t functionId = pRuntimeEnv->pCtx[j].functionId;
      if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
        continue;
4061
      }
H
Haojun Liao 已提交
4062

4063
      pResult->numOfRows = (uint16_t)(MAX(pResult->numOfRows, pResult->resultInfo[j].numOfRes));
4064 4065 4066 4067
    }
  }
}

H
Haojun Liao 已提交
4068
static void stableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo, SDataStatis *pStatis,
4069
    SArray *pDataBlock, __block_search_fn_t searchFn) {
4070
  SQuery *         pQuery = pRuntimeEnv->pQuery;
4071
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4072

4073
  SWindowResInfo * pWindowResInfo = &pTableQueryInfo->windowResInfo;
H
hjxilinx 已提交
4074
  pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : pDataBlockInfo->rows - 1;
4075

H
Haojun Liao 已提交
4076
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->groupbyNormalCol) {
4077
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
4078
  } else {
4079
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
4080 4081 4082
  }
}

H
Haojun Liao 已提交
4083
bool queryHasRemainResForTableQuery(SQueryRuntimeEnv* pRuntimeEnv) {
4084 4085
  SQuery *pQuery = pRuntimeEnv->pQuery;
  SFillInfo *pFillInfo = pRuntimeEnv->pFillInfo;
4086

H
Haojun Liao 已提交
4087
  if (pQuery->limit.limit > 0 && pQuery->rec.total >= pQuery->limit.limit) {
4088 4089
    return false;
  }
4090

4091
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
H
Haojun Liao 已提交
4092
    // There are results not returned to client yet, so filling applied to the remain result is required firstly.
H
Haojun Liao 已提交
4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107
    int32_t remain = taosNumOfRemainRows(pFillInfo);
    if (remain > 0) {
      return true;
    }

    /*
     * While the code reaches here, there are no results remains now.
     * If query is not completed yet, the gaps between two results blocks need to be handled after next data block
     * is retrieved from TSDB.
     *
     * NOTE: If the result set is not the first block, the gap in front of the result set will be filled. If the result
     * set is the FIRST result block, the gap between the start time of query time window and the timestamp of the
     * first result row in the actual result set will fill nothing.
     */
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
S
TD-1057  
Shengliang Guan 已提交
4108
      int32_t numOfTotal = (int32_t)getFilledNumOfRes(pFillInfo, pQuery->window.ekey, (int32_t)pQuery->rec.capacity);
H
Haojun Liao 已提交
4109 4110 4111 4112 4113 4114
      return numOfTotal > 0;
    }

  } else {
    // there are results waiting for returned to client.
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED) &&
H
Haojun Liao 已提交
4115
        (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) &&
H
Haojun Liao 已提交
4116 4117 4118
        (pRuntimeEnv->windowResInfo.size > 0)) {
      return true;
    }
4119
  }
4120 4121

  return false;
4122 4123 4124
}

static void doCopyQueryResultToMsg(SQInfo *pQInfo, int32_t numOfRows, char *data) {
4125
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
4126

4127 4128
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
    int32_t bytes = pQuery->pSelectExpr[col].bytes;
4129

4130 4131 4132
    memmove(data, pQuery->sdata[col]->data, bytes * numOfRows);
    data += bytes * numOfRows;
  }
4133

weixin_48148422's avatar
weixin_48148422 已提交
4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145
  int32_t numOfTables = (int32_t)taosArrayGetSize(pQInfo->arrTableIdInfo);
  *(int32_t*)data = htonl(numOfTables);
  data += sizeof(int32_t);
  for(int32_t i = 0; i < numOfTables; i++) {
    STableIdInfo* pSrc = taosArrayGet(pQInfo->arrTableIdInfo, i);
    STableIdInfo* pDst = (STableIdInfo*)data;
    pDst->uid = htobe64(pSrc->uid);
    pDst->tid = htonl(pSrc->tid);
    pDst->key = htobe64(pSrc->key);
    data += sizeof(STableIdInfo);
  }

H
Haojun Liao 已提交
4146
  // Check if query is completed or not for stable query or normal table query respectively.
H
hjxilinx 已提交
4147
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4148
    if (pQInfo->runtimeEnv.stableQuery) {
H
Haojun Liao 已提交
4149
      if (IS_STASBLE_QUERY_OVER(pQInfo)) {
4150 4151 4152
        setQueryStatus(pQuery, QUERY_OVER);
      }
    } else {
H
Haojun Liao 已提交
4153
      if (!queryHasRemainResForTableQuery(&pQInfo->runtimeEnv)) {
4154 4155
        setQueryStatus(pQuery, QUERY_OVER);
      }
4156
    }
H
hjxilinx 已提交
4157
  }
4158 4159
}

H
Haojun Liao 已提交
4160
int32_t doFillGapsInResults(SQueryRuntimeEnv* pRuntimeEnv, tFilePage **pDst, int32_t *numOfFilled) {
4161
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
4162
  SQuery *pQuery = pRuntimeEnv->pQuery;
4163
  SFillInfo* pFillInfo = pRuntimeEnv->pFillInfo;
4164

4165
  while (1) {
S
TD-1057  
Shengliang Guan 已提交
4166
    int32_t ret = (int32_t)taosGenerateDataBlock(pFillInfo, (tFilePage**)pQuery->sdata, (int32_t)pQuery->rec.capacity);
4167

4168
    // todo apply limit output function
4169 4170
    /* reached the start position of according to offset value, return immediately */
    if (pQuery->limit.offset == 0) {
4171
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows", pQInfo, pFillInfo->numOfRows, ret);
4172 4173
      return ret;
    }
4174

4175
    if (pQuery->limit.offset < ret) {
4176
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%" PRId64 ". Discard due to offset, remain:%" PRId64 ", new offset:%d",
4177
             pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, ret - pQuery->limit.offset, 0);
4178

S
TD-1057  
Shengliang Guan 已提交
4179
      ret -= (int32_t)pQuery->limit.offset;
4180 4181
      // todo !!!!there exactly number of interpo is not valid.
      // todo refactor move to the beginning of buffer
4182 4183 4184
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        memmove(pDst[i]->data, pDst[i]->data + pQuery->pSelectExpr[i].bytes * pQuery->limit.offset,
                ret * pQuery->pSelectExpr[i].bytes);
4185
      }
4186

4187 4188 4189
      pQuery->limit.offset = 0;
      return ret;
    } else {
4190
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%" PRId64 ". Discard due to offset, "
B
Bomin Zhang 已提交
4191
             "remain:%d, new offset:%" PRId64, pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, 0,
4192
          pQuery->limit.offset - ret);
4193

4194
      pQuery->limit.offset -= ret;
4195
      pQuery->rec.rows = 0;
4196 4197
      ret = 0;
    }
4198

H
Haojun Liao 已提交
4199
    if (!queryHasRemainResForTableQuery(pRuntimeEnv)) {
4200 4201 4202 4203 4204
      return ret;
    }
  }
}

4205
static void queryCostStatis(SQInfo *pQInfo) {
4206
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4207
  SQueryCostInfo *pSummary = &pRuntimeEnv->summary;
H
Haojun Liao 已提交
4208

H
Haojun Liao 已提交
4209 4210 4211
  // add the merge time
  pSummary->elapsedTime += pSummary->firstStageMergeTime;

H
Haojun Liao 已提交
4212 4213 4214
  qDebug("QInfo:%p :cost summary: elapsed time:%"PRId64" us, first merge:%"PRId64" us, total blocks:%d, "
         "load block statis:%d, load data block:%d, total rows:%"PRId64 ", check rows:%"PRId64,
         pQInfo, pSummary->elapsedTime, pSummary->firstStageMergeTime, pSummary->totalBlocks, pSummary->loadBlockStatis,
H
Haojun Liao 已提交
4215
         pSummary->loadBlocks, pSummary->totalRows, pSummary->totalCheckedRows);
4216

H
Haojun Liao 已提交
4217
  qDebug("QInfo:%p :cost summary: internal size:%"PRId64"B, numOfWin:%"PRId64, pQInfo, pSummary->internalSupSize,
4218
      pSummary->numOfTimeWindows);
4219 4220
}

4221 4222
static void updateOffsetVal(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pBlockInfo) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
4223
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4224

4225
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
4226

4227
  if (pQuery->limit.offset == pBlockInfo->rows) {  // current block will ignore completed
H
hjxilinx 已提交
4228
    pTableQueryInfo->lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->window.ekey + step : pBlockInfo->window.skey + step;
4229 4230 4231
    pQuery->limit.offset = 0;
    return;
  }
4232

4233
  if (QUERY_IS_ASC_QUERY(pQuery)) {
S
TD-1057  
Shengliang Guan 已提交
4234
    pQuery->pos = (int32_t)pQuery->limit.offset;
4235
  } else {
S
TD-1057  
Shengliang Guan 已提交
4236
    pQuery->pos = pBlockInfo->rows - (int32_t)pQuery->limit.offset - 1;
4237
  }
4238

4239
  assert(pQuery->pos >= 0 && pQuery->pos <= pBlockInfo->rows - 1);
4240

4241
  SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
4242
  SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);
4243

4244
  // update the pQuery->limit.offset value, and pQuery->pos value
H
Haojun Liao 已提交
4245
  TSKEY *keys = (TSKEY *) pColInfoData->pData;
4246 4247

  // update the offset value
H
hjxilinx 已提交
4248
  pTableQueryInfo->lastKey = keys[pQuery->pos];
4249
  pQuery->limit.offset = 0;
4250

H
hjxilinx 已提交
4251
  int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, pBlockInfo, NULL, binarySearchForKey, pDataBlock);
4252

4253
  qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
4254
         pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows, numOfRes, pQuery->current->lastKey);
4255
}
4256

4257 4258 4259 4260 4261
void skipBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;

  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0) {
    return;
4262
  }
4263

4264 4265 4266
  pQuery->pos = 0;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

H
hjxilinx 已提交
4267
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4268
  TsdbQueryHandleT pQueryHandle = pRuntimeEnv->pQueryHandle;
4269

H
Haojun Liao 已提交
4270
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4271
  while (tsdbNextDataBlock(pQueryHandle)) {
H
Haojun Liao 已提交
4272
    if (IS_QUERY_KILLED(GET_QINFO_ADDR(pRuntimeEnv))) {
H
Haojun Liao 已提交
4273 4274
      finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4275
    }
4276

H
Haojun Liao 已提交
4277
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
4278

4279 4280
    if (pQuery->limit.offset > blockInfo.rows) {
      pQuery->limit.offset -= blockInfo.rows;
H
hjxilinx 已提交
4281 4282
      pTableQueryInfo->lastKey = (QUERY_IS_ASC_QUERY(pQuery)) ? blockInfo.window.ekey : blockInfo.window.skey;
      pTableQueryInfo->lastKey += step;
4283

4284
      qDebug("QInfo:%p skip rows:%d, offset:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv), blockInfo.rows,
4285 4286
             pQuery->limit.offset);
    } else {  // find the appropriated start position in current block
4287 4288 4289
      updateOffsetVal(pRuntimeEnv, &blockInfo);
      break;
    }
4290
  }
H
Haojun Liao 已提交
4291 4292 4293 4294

  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }
4295
}
4296

H
Haojun Liao 已提交
4297
static bool skipTimeInterval(SQueryRuntimeEnv *pRuntimeEnv, TSKEY* start) {
4298
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
4299
  *start = pQuery->current->lastKey;
4300

4301
  // if queried with value filter, do NOT forward query start position
4302
  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->pFillInfo != NULL) {
4303
    return true;
4304
  }
4305

4306
  /*
4307 4308
   * 1. for interval without interpolation query we forward pQuery->interval.interval at a time for
   *    pQuery->limit.offset times. Since hole exists, pQuery->interval.interval*pQuery->limit.offset value is
4309 4310
   *    not valid. otherwise, we only forward pQuery->limit.offset number of points
   */
4311
  assert(pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL);
4312

H
Haojun Liao 已提交
4313
  STimeWindow w = TSWINDOW_INITIALIZER;
4314

4315
  SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
hjxilinx 已提交
4316
  STableQueryInfo *pTableQueryInfo = pQuery->current;
4317

H
Haojun Liao 已提交
4318
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4319
  while (tsdbNextDataBlock(pRuntimeEnv->pQueryHandle)) {
H
Haojun Liao 已提交
4320
    tsdbRetrieveDataBlockInfo(pRuntimeEnv->pQueryHandle, &blockInfo);
4321

H
Haojun Liao 已提交
4322 4323
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
H
Haojun Liao 已提交
4324
        getAlignQueryTimeWindow(pQuery, blockInfo.window.skey, blockInfo.window.skey, pQuery->window.ekey, &w);
H
Haojun Liao 已提交
4325 4326 4327
        pWindowResInfo->startTime = w.skey;
        pWindowResInfo->prevSKey = w.skey;
      }
4328
    } else {
H
Haojun Liao 已提交
4329
      getAlignQueryTimeWindow(pQuery, blockInfo.window.ekey, pQuery->window.ekey, blockInfo.window.ekey, &w);
4330

4331 4332 4333
      pWindowResInfo->startTime = pQuery->window.skey;
      pWindowResInfo->prevSKey = w.skey;
    }
4334

4335 4336
    // the first time window
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, pWindowResInfo->prevSKey, pQuery);
4337

4338 4339 4340 4341 4342 4343
    while (pQuery->limit.offset > 0) {
      if ((win.ekey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (win.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        pQuery->limit.offset -= 1;
        pWindowResInfo->prevSKey = win.skey;
      }
4344

4345
      STimeWindow tw = win;
H
Haojun Liao 已提交
4346
      GET_NEXT_TIMEWINDOW(pQuery, &tw);
4347

4348
      if (pQuery->limit.offset == 0) {
4349 4350
        if ((tw.skey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
            (tw.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
H
Haojun Liao 已提交
4351 4352
          // load the data block and check data remaining in current data block
          // TODO optimize performance
4353 4354 4355
          SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
          SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

4356 4357
          tw = win;
          int32_t startPos =
H
Haojun Liao 已提交
4358
              getNextQualifiedWindow(pRuntimeEnv, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey, -1);
4359 4360 4361 4362
          assert(startPos >= 0);

          // set the abort info
          pQuery->pos = startPos;
4363

H
Haojun Liao 已提交
4364 4365 4366 4367
          // reset the query start timestamp
          pTableQueryInfo->win.skey = ((TSKEY *)pColInfoData->pData)[startPos];
          pQuery->window.skey = pTableQueryInfo->win.skey;
          *start = pTableQueryInfo->win.skey;
4368

4369
          pWindowResInfo->prevSKey = tw.skey;
H
Haojun Liao 已提交
4370
          int32_t index = pRuntimeEnv->windowResInfo.curIndex;
4371

H
hjxilinx 已提交
4372
          int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, NULL, binarySearchForKey, pDataBlock);
H
Haojun Liao 已提交
4373
          pRuntimeEnv->windowResInfo.curIndex = index;  // restore the window index
4374

4375
          qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64,
4376
                 GET_QINFO_ADDR(pRuntimeEnv), blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes, pQuery->current->lastKey);
4377

4378
          return true;
H
Haojun Liao 已提交
4379 4380 4381 4382
        } else { // do nothing
          *start = tw.skey;
          pQuery->window.skey = tw.skey;
          pWindowResInfo->prevSKey = tw.skey;
4383
          return true;
4384 4385 4386
        }
      }

H
Haojun Liao 已提交
4387 4388 4389 4390 4391 4392 4393
      /*
       * If the next time window still starts from current data block,
       * load the primary timestamp column first, and then find the start position for the next queried time window.
       * Note that only the primary timestamp column is required.
       * TODO: Optimize for this cases. All data blocks are not needed to be loaded, only if the first actually required
       * time window resides in current data block.
       */
4394 4395 4396 4397 4398 4399 4400
      if ((tw.skey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (tw.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
        SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

        tw = win;
        int32_t startPos =
H
Haojun Liao 已提交
4401
            getNextQualifiedWindow(pRuntimeEnv, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey, -1);
4402 4403 4404 4405
        assert(startPos >= 0);

        // set the abort info
        pQuery->pos = startPos;
H
hjxilinx 已提交
4406
        pTableQueryInfo->lastKey = ((TSKEY *)pColInfoData->pData)[startPos];
4407 4408
        pWindowResInfo->prevSKey = tw.skey;
        win = tw;
4409
      } else {
H
Haojun Liao 已提交
4410
        break;  // offset is not 0, and next time window begins or ends in the next block.
4411 4412 4413
      }
    }
  }
4414

H
Haojun Liao 已提交
4415 4416 4417 4418 4419
  // check for error
  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }

4420 4421 4422
  return true;
}

H
Haojun Liao 已提交
4423 4424
static void doDestroyTableQueryInfo(STableGroupInfo* pTableqinfoGroupInfo);

B
Bomin Zhang 已提交
4425
static int32_t setupQueryHandle(void* tsdb, SQInfo* pQInfo, bool isSTableQuery) {
B
Bomin Zhang 已提交
4426
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4427 4428
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

B
Bomin Zhang 已提交
4429
  if (onlyQueryTags(pQuery)) {
B
Bomin Zhang 已提交
4430
    return TSDB_CODE_SUCCESS;
B
Bomin Zhang 已提交
4431 4432
  }

H
Haojun Liao 已提交
4433
  if (isSTableQuery && (!QUERY_IS_INTERVAL_QUERY(pQuery)) && (!isFixedOutputQuery(pRuntimeEnv))) {
B
Bomin Zhang 已提交
4434
    return TSDB_CODE_SUCCESS;
B
Bomin Zhang 已提交
4435
  }
4436 4437

  STsdbQueryCond cond = {
B
Bomin Zhang 已提交
4438 4439 4440
    .order   = pQuery->order.order,
    .colList = pQuery->colList,
    .numOfCols = pQuery->numOfCols,
4441
  };
weixin_48148422's avatar
weixin_48148422 已提交
4442

S
TD-1057  
Shengliang Guan 已提交
4443 4444
  TIME_WINDOW_COPY(cond.twindow, pQuery->window);

B
Bomin Zhang 已提交
4445
  if (!isSTableQuery
4446
    && (pQInfo->tableqinfoGroupInfo.numOfTables == 1)
4447
    && (cond.order == TSDB_ORDER_ASC)
H
Haojun Liao 已提交
4448
    && (!QUERY_IS_INTERVAL_QUERY(pQuery))
B
Bomin Zhang 已提交
4449
    && (!isGroupbyNormalCol(pQuery->pGroupbyExpr))
H
Haojun Liao 已提交
4450
    && (!isFixedOutputQuery(pRuntimeEnv))
B
Bomin Zhang 已提交
4451
  ) {
H
Haojun Liao 已提交
4452
    SArray* pa = GET_TABLEGROUP(pQInfo, 0);
4453 4454
    STableQueryInfo* pCheckInfo = taosArrayGetP(pa, 0);
    cond.twindow = pCheckInfo->win;
4455
  }
B
Bomin Zhang 已提交
4456

B
Bomin Zhang 已提交
4457
  terrno = TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
4458
  if (isFirstLastRowQuery(pQuery)) {
4459
    pRuntimeEnv->pQueryHandle = tsdbQueryLastRow(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
H
Haojun Liao 已提交
4460

H
Haojun Liao 已提交
4461 4462 4463
    // update the query time window
    pQuery->window = cond.twindow;

H
Haojun Liao 已提交
4464
    if (pQInfo->tableGroupInfo.numOfTables == 0) {
H
Haojun Liao 已提交
4465
      pQInfo->tableqinfoGroupInfo.numOfTables = 0;
H
Haojun Liao 已提交
4466 4467 4468 4469
    } else {
      size_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
      for(int32_t i = 0; i < numOfGroups; ++i) {
        SArray *group = GET_TABLEGROUP(pQInfo, i);
H
Haojun Liao 已提交
4470

H
Haojun Liao 已提交
4471 4472 4473
        size_t t = taosArrayGetSize(group);
        for (int32_t j = 0; j < t; ++j) {
          STableQueryInfo *pCheckInfo = taosArrayGetP(group, j);
H
Haojun Liao 已提交
4474

H
Haojun Liao 已提交
4475 4476 4477
          pCheckInfo->win = pQuery->window;
          pCheckInfo->lastKey = pCheckInfo->win.skey;
        }
H
Haojun Liao 已提交
4478 4479
      }
    }
4480
  } else if (isPointInterpoQuery(pQuery)) {
4481
    pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
H
Haojun Liao 已提交
4482
  } else {
4483
    pRuntimeEnv->pQueryHandle = tsdbQueryTables(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
H
Haojun Liao 已提交
4484
  }
4485

B
Bomin Zhang 已提交
4486
  return terrno;
B
Bomin Zhang 已提交
4487 4488
}

4489 4490 4491
static SFillColInfo* taosCreateFillColInfo(SQuery* pQuery) {
  int32_t numOfCols = pQuery->numOfOutput;
  int32_t offset = 0;
4492

4493
  SFillColInfo* pFillCol = calloc(numOfCols, sizeof(SFillColInfo));
H
Haojun Liao 已提交
4494 4495 4496 4497
  if (pFillCol == NULL) {
    return NULL;
  }

4498 4499
  for(int32_t i = 0; i < numOfCols; ++i) {
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];
4500

4501
    pFillCol[i].col.bytes  = pExprInfo->bytes;
S
TD-1057  
Shengliang Guan 已提交
4502
    pFillCol[i].col.type   = (int8_t)pExprInfo->type;
4503 4504 4505
    pFillCol[i].col.offset = offset;
    pFillCol[i].flag       = TSDB_COL_NORMAL;    // always be ta normal column for table query
    pFillCol[i].functionId = pExprInfo->base.functionId;
4506
    pFillCol[i].fillVal.i = pQuery->fillVal[i];
4507

4508 4509
    offset += pExprInfo->bytes;
  }
4510

4511 4512 4513
  return pFillCol;
}

4514
int32_t doInitQInfo(SQInfo *pQInfo, STSBuf *pTsBuf, void *tsdb, int32_t vgId, bool isSTableQuery) {
4515 4516
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;

4517
  int32_t code = TSDB_CODE_SUCCESS;
4518
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
4519 4520 4521

  pRuntimeEnv->topBotQuery = isTopBottomQuery(pQuery);
  pRuntimeEnv->hasTagResults = hasTagValOutput(pQuery);
4522 4523

  setScanLimitationByResultBuffer(pQuery);
H
Haojun Liao 已提交
4524

B
Bomin Zhang 已提交
4525 4526 4527 4528
  code = setupQueryHandle(tsdb, pQInfo, isSTableQuery);
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }
4529

4530
  pQInfo->tsdb = tsdb;
4531
  pQInfo->vgId = vgId;
4532 4533

  pRuntimeEnv->pQuery = pQuery;
H
Haojun Liao 已提交
4534
  pRuntimeEnv->pTSBuf = pTsBuf;
4535
  pRuntimeEnv->cur.vgroupIndex = -1;
4536
  pRuntimeEnv->stableQuery = isSTableQuery;
H
Haojun Liao 已提交
4537
  pRuntimeEnv->prevGroupId = INT32_MIN;
H
Haojun Liao 已提交
4538
  pRuntimeEnv->groupbyNormalCol = isGroupbyNormalCol(pQuery->pGroupbyExpr);
4539

H
Haojun Liao 已提交
4540
  if (pTsBuf != NULL) {
4541 4542 4543 4544 4545 4546 4547 4548 4549 4550
    int16_t order = (pQuery->order.order == pRuntimeEnv->pTSBuf->tsOrder) ? TSDB_ORDER_ASC : TSDB_ORDER_DESC;
    tsBufSetTraverseOrder(pRuntimeEnv->pTSBuf, order);
  }

  // create runtime environment
  code = setupQueryRuntimeEnv(pRuntimeEnv, pQuery->order.order);
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }

4551 4552 4553
  int32_t ps = DEFAULT_PAGE_SIZE;
  int32_t rowsize = 0;
  getIntermediateBufInfo(pRuntimeEnv, &ps, &rowsize);
H
Haojun Liao 已提交
4554
  int32_t TWOMB = 1024*1024*2;
4555

H
Haojun Liao 已提交
4556
  if (isSTableQuery && !onlyQueryTags(pRuntimeEnv->pQuery)) {
H
Haojun Liao 已提交
4557
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rowsize, ps, TWOMB, pQInfo);
4558 4559 4560 4561
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

H
Haojun Liao 已提交
4562
    if (!QUERY_IS_INTERVAL_QUERY(pQuery)) {
4563
      int16_t type = TSDB_DATA_TYPE_NULL;
4564
      int32_t threshold = 0;
4565

H
Haojun Liao 已提交
4566
      if (pRuntimeEnv->groupbyNormalCol) {  // group by columns not tags;
4567
        type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
4568
        threshold = 4000;
4569 4570
      } else {
        type = TSDB_DATA_TYPE_INT;  // group id
S
TD-1057  
Shengliang Guan 已提交
4571
        threshold = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
4572 4573 4574
        if (threshold < 8) {
          threshold = 8;
        }
4575 4576
      }

4577
      code = initWindowResInfo(&pRuntimeEnv->windowResInfo, pRuntimeEnv, 8, threshold, type);
B
Bomin Zhang 已提交
4578 4579 4580
      if (code != TSDB_CODE_SUCCESS) {
        return code;
      }
4581
    }
H
Haojun Liao 已提交
4582
  } else if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
4583 4584
    int32_t numOfResultRows = getInitialPageNum(pQInfo);
    getIntermediateBufInfo(pRuntimeEnv, &ps, &rowsize);
H
Haojun Liao 已提交
4585
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rowsize, ps, TWOMB, pQInfo);
4586 4587 4588 4589 4590
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

    int16_t type = TSDB_DATA_TYPE_NULL;
H
Haojun Liao 已提交
4591
    if (pRuntimeEnv->groupbyNormalCol) {
4592 4593 4594 4595 4596
      type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
    } else {
      type = TSDB_DATA_TYPE_TIMESTAMP;
    }

4597
    code = initWindowResInfo(&pRuntimeEnv->windowResInfo, pRuntimeEnv, numOfResultRows, 4096, type);
B
Bomin Zhang 已提交
4598 4599 4600
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }
4601 4602
  }

4603
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
4604
    SFillColInfo* pColInfo = taosCreateFillColInfo(pQuery);
H
Haojun Liao 已提交
4605 4606 4607 4608 4609 4610
    STimeWindow w = TSWINDOW_INITIALIZER;

    TSKEY sk = MIN(pQuery->window.skey, pQuery->window.ekey);
    TSKEY ek = MAX(pQuery->window.skey, pQuery->window.ekey);
    getAlignQueryTimeWindow(pQuery, pQuery->window.skey, sk, ek, &w);

S
TD-1057  
Shengliang Guan 已提交
4611
    pRuntimeEnv->pFillInfo = taosInitFillInfo(pQuery->order.order, w.skey, 0, (int32_t)pQuery->rec.capacity, pQuery->numOfOutput,
4612 4613
                                              pQuery->interval.sliding, pQuery->interval.slidingUnit, (int8_t)pQuery->precision,
                                              pQuery->fillType, pColInfo);
4614
  }
4615

H
Haojun Liao 已提交
4616
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
4617
  return TSDB_CODE_SUCCESS;
4618 4619
}

4620
static void enableExecutionForNextTable(SQueryRuntimeEnv *pRuntimeEnv) {
4621
  SQuery *pQuery = pRuntimeEnv->pQuery;
4622

4623
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
4624 4625 4626 4627 4628 4629 4630
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[i]);
    if (pResInfo != NULL) {
      pResInfo->complete = false;
    }
  }
}

H
Haojun Liao 已提交
4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647
static FORCE_INLINE void setEnvForEachBlock(SQInfo* pQInfo, STableQueryInfo* pTableQueryInfo, SDataBlockInfo* pBlockInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

  if (!QUERY_IS_INTERVAL_QUERY(pQuery)) {
    setExecutionContext(pQInfo, pTableQueryInfo->groupIndex, pBlockInfo->window.ekey + step);
  } else {  // interval query
    TSKEY nextKey = pBlockInfo->window.skey;
    setIntervalQueryRange(pQInfo, nextKey);

    if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
      setAdditionalInfo(pQInfo, pTableQueryInfo->pTable, pTableQueryInfo);
    }
  }
}

H
Haojun Liao 已提交
4648
static int64_t scanMultiTableDataBlocks(SQInfo *pQInfo) {
4649
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
Haojun Liao 已提交
4650 4651
  SQuery*           pQuery = pRuntimeEnv->pQuery;
  SQueryCostInfo*   summary  = &pRuntimeEnv->summary;
H
Haojun Liao 已提交
4652

H
hjxilinx 已提交
4653
  int64_t st = taosGetTimestampMs();
4654

4655
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
H
Haojun Liao 已提交
4656
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4657

H
Haojun Liao 已提交
4658 4659
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

H
Haojun Liao 已提交
4660
  while (tsdbNextDataBlock(pQueryHandle)) {
4661
    summary->totalBlocks += 1;
H
Haojun Liao 已提交
4662

H
Haojun Liao 已提交
4663
    if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
4664
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4665
    }
4666

H
Haojun Liao 已提交
4667
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
H
Haojun Liao 已提交
4668 4669 4670 4671
    STableQueryInfo **pTableQueryInfo = (STableQueryInfo**) taosHashGet(pQInfo->tableqinfoGroupInfo.map, &blockInfo.tid, sizeof(blockInfo.tid));
    if(pTableQueryInfo == NULL) {
      break;
    }
4672

H
Haojun Liao 已提交
4673
    pQuery->current = *pTableQueryInfo;
H
Haojun Liao 已提交
4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      assert(
          ((*pTableQueryInfo)->win.skey <= (*pTableQueryInfo)->win.ekey) &&
          ((*pTableQueryInfo)->lastKey >= (*pTableQueryInfo)->win.skey) &&
          ((*pTableQueryInfo)->win.skey >= pQuery->window.skey && (*pTableQueryInfo)->win.ekey <= pQuery->window.ekey));
    } else {
      assert(
          ((*pTableQueryInfo)->win.skey >= (*pTableQueryInfo)->win.ekey) &&
          ((*pTableQueryInfo)->lastKey <= (*pTableQueryInfo)->win.skey) &&
          ((*pTableQueryInfo)->win.skey <= pQuery->window.skey && (*pTableQueryInfo)->win.ekey >= pQuery->window.ekey));
    }
4685

H
Haojun Liao 已提交
4686
    if (!pRuntimeEnv->groupbyNormalCol) {
H
Haojun Liao 已提交
4687
      setEnvForEachBlock(pQInfo, *pTableQueryInfo, &blockInfo);
4688
    }
4689

4690
    uint32_t     status = 0;
H
Haojun Liao 已提交
4691 4692
    SDataStatis *pStatis = NULL;
    SArray      *pDataBlock = NULL;
4693

H
Haojun Liao 已提交
4694
    int32_t ret = loadDataBlockOnDemand(pRuntimeEnv, &pQuery->current->windowResInfo, pQueryHandle, &blockInfo, &pStatis, &pDataBlock, &status);
4695 4696 4697 4698 4699
    if (ret != TSDB_CODE_SUCCESS) {
      break;
    }

    if (status == BLK_DATA_DISCARD) {
H
Haojun Liao 已提交
4700
      pQuery->current->lastKey = QUERY_IS_ASC_QUERY(pQuery)? blockInfo.window.ekey + step : blockInfo.window.skey + step;
H
Haojun Liao 已提交
4701 4702 4703
      continue;
    }

4704 4705
    summary->totalRows += blockInfo.rows;
    stableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, pDataBlock, binarySearchForKey);
H
Haojun Liao 已提交
4706

H
Haojun Liao 已提交
4707 4708 4709 4710
    qDebug("QInfo:%p check data block completed, uid:%"PRId64", tid:%d, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, "
           "lastKey:%" PRId64,
           pQInfo, blockInfo.uid, blockInfo.tid, blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows,
           pQuery->current->lastKey);
4711
  }
4712

H
Haojun Liao 已提交
4713 4714 4715 4716
  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }

H
Haojun Liao 已提交
4717 4718
  updateWindowResNumOfRes(pRuntimeEnv);

H
hjxilinx 已提交
4719 4720
  int64_t et = taosGetTimestampMs();
  return et - st;
4721 4722
}

4723 4724
static bool multiTableMultioutputHelper(SQInfo *pQInfo, int32_t index) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4725
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4726

4727
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
H
Haojun Liao 已提交
4728
  SArray *group = GET_TABLEGROUP(pQInfo, 0);
4729
  STableQueryInfo* pCheckInfo = taosArrayGetP(group, index);
4730

H
Haojun Liao 已提交
4731 4732 4733
  if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
    setTagVal(pRuntimeEnv, pCheckInfo->pTable, pQInfo->tsdb);
  }
4734

H
Haojun Liao 已提交
4735
  STableId* id = TSDB_TABLEID(pCheckInfo->pTable);
4736
  qDebug("QInfo:%p query on (%d): uid:%" PRIu64 ", tid:%d, qrange:%" PRId64 "-%" PRId64, pQInfo, index,
H
Haojun Liao 已提交
4737
         id->uid, id->tid, pCheckInfo->lastKey, pCheckInfo->win.ekey);
4738

4739
  STsdbQueryCond cond = {
4740
      .twindow   = {pCheckInfo->lastKey, pCheckInfo->win.ekey},
H
hjxilinx 已提交
4741 4742
      .order     = pQuery->order.order,
      .colList   = pQuery->colList,
4743
      .numOfCols = pQuery->numOfCols,
4744
  };
4745

H
hjxilinx 已提交
4746
  // todo refactor
4747
  SArray *g1 = taosArrayInit(1, POINTER_BYTES);
H
Haojun Liao 已提交
4748 4749 4750 4751
  SArray *tx = taosArrayInit(1, sizeof(STableKeyInfo));

  STableKeyInfo info = {.pTable = pCheckInfo->pTable, .lastKey = pCheckInfo->lastKey};
  taosArrayPush(tx, &info);
4752

4753
  taosArrayPush(g1, &tx);
4754
  STableGroupInfo gp = {.numOfTables = 1, .pGroupList = g1};
4755

4756
  // include only current table
4757 4758 4759 4760
  if (pRuntimeEnv->pQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
    pRuntimeEnv->pQueryHandle = NULL;
  }
4761

H
Haojun Liao 已提交
4762
  pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo);
4763 4764
  taosArrayDestroy(tx);
  taosArrayDestroy(g1);
B
Bomin Zhang 已提交
4765 4766 4767
  if (pRuntimeEnv->pQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
4768

4769
  if (pRuntimeEnv->pTSBuf != NULL) {
4770
    if (pRuntimeEnv->cur.vgroupIndex == -1) {
H
Haojun Liao 已提交
4771 4772
      STSElem elem = tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, pQInfo->vgId, &pRuntimeEnv->pCtx[0].tag);
      // failed to find data with the specified tag value and vnodeId
4773
      if (elem.vnode < 0) {
H
Haojun Liao 已提交
4774
        qDebug("QInfo:%p failed to find tag:%s in ts_comp", pQInfo, pRuntimeEnv->pCtx[0].tag.pz);
4775
        return false;
H
Haojun Liao 已提交
4776 4777 4778 4779
      } else {
        STSCursor cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);
        qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pRuntimeEnv->pCtx[0].tag.pz,
            cur.blockIndex, cur.tsIndex);
4780 4781
      }
    } else {
H
Haojun Liao 已提交
4782 4783 4784 4785 4786 4787 4788 4789
      STSElem elem = tsBufGetElem(pRuntimeEnv->pTSBuf);
      if (tVariantCompare(&elem.tag, &pRuntimeEnv->pCtx[0].tag) != 0) {
        STSElem elem1 = tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, pQInfo->vgId, &pRuntimeEnv->pCtx[0].tag);
        if (elem1.vnode < 0) {
          return false;
        }
      }

4790
      tsBufSetCursor(pRuntimeEnv->pTSBuf, &pRuntimeEnv->cur);
H
Haojun Liao 已提交
4791 4792 4793
        STSCursor cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);
        qDebug("QInfo:%p continue scan ts_comp file, tag:%s blockIndex:%d, tsIndex:%d", pQInfo, pRuntimeEnv->pCtx[0].tag.pz,
               cur.blockIndex, cur.tsIndex);
4794 4795
    }
  }
4796

4797
  initCtxOutputBuf(pRuntimeEnv);
4798 4799 4800 4801 4802 4803 4804 4805 4806 4807
  return true;
}

/**
 * super table query handler
 * 1. super table projection query, group-by on normal columns query, ts-comp query
 * 2. point interpolation query, last row query
 *
 * @param pQInfo
 */
4808
static void sequentialTableProcess(SQInfo *pQInfo) {
4809
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4810
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4811
  setQueryStatus(pQuery, QUERY_COMPLETED);
4812

H
Haojun Liao 已提交
4813
  size_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
4814

H
Haojun Liao 已提交
4815
  if (isPointInterpoQuery(pQuery) || isFirstLastRowQuery(pQuery)) {
4816 4817
    resetCtxOutputBuf(pRuntimeEnv);
    assert(pQuery->limit.offset == 0 && pQuery->limit.limit != 0);
4818

4819
    while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
4820
      SArray* group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex);
4821

S
TD-1057  
Shengliang Guan 已提交
4822
      qDebug("QInfo:%p last_row query on group:%d, total group:%" PRIzu ", current group:%p", pQInfo, pQInfo->groupIndex,
dengyihao's avatar
dengyihao 已提交
4823
             numOfGroups, group);
H
Haojun Liao 已提交
4824 4825 4826 4827 4828 4829 4830

      STsdbQueryCond cond = {
          .colList = pQuery->colList,
          .order   = pQuery->order.order,
          .numOfCols = pQuery->numOfCols,
      };

S
TD-1057  
Shengliang Guan 已提交
4831 4832
      TIME_WINDOW_COPY(cond.twindow, pQuery->window);

H
Haojun Liao 已提交
4833 4834 4835
      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);
4836

H
Haojun Liao 已提交
4837 4838 4839 4840 4841 4842 4843
      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }
4844

4845
      if (isFirstLastRowQuery(pQuery)) {
H
Haojun Liao 已提交
4846
        assert(0);  // last_row query switch to other routine to handle
H
Haojun Liao 已提交
4847
      } else {
H
Haojun Liao 已提交
4848
        pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(pQInfo->tsdb, &cond, &gp, pQInfo);
4849
      }
B
Bomin Zhang 已提交
4850 4851 4852 4853 4854 4855

      taosArrayDestroy(tx);
      taosArrayDestroy(g1);
      if (pRuntimeEnv->pQueryHandle == NULL) {
        longjmp(pRuntimeEnv->env, terrno);
      }
H
Haojun Liao 已提交
4856

H
Haojun Liao 已提交
4857
      initCtxOutputBuf(pRuntimeEnv);
4858

4859
      SArray* s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle);
4860
      assert(taosArrayGetSize(s) >= 1);
4861

4862
      setTagVal(pRuntimeEnv, taosArrayGetP(s, 0), pQInfo->tsdb);
4863 4864 4865
      if (isFirstLastRowQuery(pQuery)) {
        assert(taosArrayGetSize(s) == 1);
      }
H
Haojun Liao 已提交
4866

dengyihao's avatar
dengyihao 已提交
4867
      taosArrayDestroy(s);
H
Haojun Liao 已提交
4868

H
Haojun Liao 已提交
4869
      // here we simply set the first table as current table
4870 4871 4872
      SArray* first = GET_TABLEGROUP(pQInfo, pQInfo->groupIndex);
      pQuery->current = taosArrayGetP(first, 0);

4873
      scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
4874

H
Haojun Liao 已提交
4875 4876 4877 4878 4879
      int64_t numOfRes = getNumOfResult(pRuntimeEnv);
      if (numOfRes > 0) {
        pQuery->rec.rows += numOfRes;
        forwardCtxOutputBuf(pRuntimeEnv, numOfRes);
      }
4880

H
Haojun Liao 已提交
4881 4882 4883 4884 4885
      skipResults(pRuntimeEnv);
      pQInfo->groupIndex += 1;

      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
4886 4887 4888 4889 4890 4891

      if (pQuery->rec.rows >= pQuery->rec.capacity) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
        break;
      }
    }
H
Haojun Liao 已提交
4892
  } else if (pRuntimeEnv->groupbyNormalCol) { // group-by on normal columns query
4893
    while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
4894
      SArray* group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex);
4895

S
TD-1057  
Shengliang Guan 已提交
4896
      qDebug("QInfo:%p group by normal columns group:%d, total group:%" PRIzu "", pQInfo, pQInfo->groupIndex, numOfGroups);
4897 4898 4899 4900 4901 4902 4903

      STsdbQueryCond cond = {
          .colList = pQuery->colList,
          .order   = pQuery->order.order,
          .numOfCols = pQuery->numOfCols,
      };

S
TD-1057  
Shengliang Guan 已提交
4904 4905
      TIME_WINDOW_COPY(cond.twindow, pQuery->window);

4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917
      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);

      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }

H
Haojun Liao 已提交
4918
      // no need to update the lastkey for each table
4919
      pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo);
H
Haojun Liao 已提交
4920

B
Bomin Zhang 已提交
4921 4922
      taosArrayDestroy(g1);
      taosArrayDestroy(tx);
B
Bomin Zhang 已提交
4923 4924 4925
      if (pRuntimeEnv->pQueryHandle == NULL) {
        longjmp(pRuntimeEnv->env, terrno);
      }
4926

4927
      SArray* s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle);
4928 4929
      assert(taosArrayGetSize(s) >= 1);

4930
      setTagVal(pRuntimeEnv, taosArrayGetP(s, 0), pQInfo->tsdb);
4931 4932 4933 4934 4935 4936 4937 4938

      // here we simply set the first table as current table
      scanMultiTableDataBlocks(pQInfo);
      pQInfo->groupIndex += 1;

      SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;

        // no results generated for current group, continue to try the next group
4939
      taosArrayDestroy(s);
4940 4941 4942 4943 4944
      if (pWindowResInfo->size <= 0) {
        continue;
      }

      for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
4945
        pWindowResInfo->pResult[i].closed = true; // enable return all results for group by normal columns
4946 4947 4948

        SWindowResult *pResult = &pWindowResInfo->pResult[i];
        for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
4949
          pResult->numOfRows = (uint16_t)(MAX(pResult->numOfRows, pResult->resultInfo[j].numOfRes));
4950 4951 4952
        }
      }

4953
      qDebug("QInfo:%p generated groupby columns results %d rows for group %d completed", pQInfo, pWindowResInfo->size,
4954 4955 4956 4957 4958 4959 4960
          pQInfo->groupIndex);
      int32_t currentGroupIndex = pQInfo->groupIndex;

      pQuery->rec.rows = 0;
      pQInfo->groupIndex = 0;

      ensureOutputBufferSimple(pRuntimeEnv, pWindowResInfo->size);
4961
      copyFromWindowResToSData(pQInfo, pWindowResInfo);
4962 4963 4964 4965 4966 4967

      pQInfo->groupIndex = currentGroupIndex;  //restore the group index
      assert(pQuery->rec.rows == pWindowResInfo->size);

      clearClosedTimeWindow(pRuntimeEnv);
      break;
4968 4969 4970
    }
  } else {
    /*
4971
     * 1. super table projection query, 2. ts-comp query
4972 4973 4974
     * if the subgroup index is larger than 0, results generated by group by tbname,k is existed.
     * we need to return it to client in the first place.
     */
4975
    if (pQInfo->groupIndex > 0) {
4976
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
4977
      pQuery->rec.total += pQuery->rec.rows;
4978

4979
      if (pQuery->rec.rows > 0) {
4980 4981 4982
        return;
      }
    }
4983

4984
    // all data have returned already
4985
    if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
4986 4987
      return;
    }
4988

4989 4990
    resetCtxOutputBuf(pRuntimeEnv);
    resetTimeWindowInfo(pRuntimeEnv, &pRuntimeEnv->windowResInfo);
4991

H
Haojun Liao 已提交
4992
    SArray *group = GET_TABLEGROUP(pQInfo, 0);
4993 4994
    assert(taosArrayGetSize(group) == pQInfo->tableqinfoGroupInfo.numOfTables &&
           1 == taosArrayGetSize(pQInfo->tableqinfoGroupInfo.pGroupList));
4995

4996
    while (pQInfo->tableIndex < pQInfo->tableqinfoGroupInfo.numOfTables) {
H
Haojun Liao 已提交
4997
      if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
4998
        longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4999
      }
5000

5001
      pQuery->current = taosArrayGetP(group, pQInfo->tableIndex);
5002
      if (!multiTableMultioutputHelper(pQInfo, pQInfo->tableIndex)) {
5003
        pQInfo->tableIndex++;
5004 5005
        continue;
      }
5006

H
hjxilinx 已提交
5007
      // TODO handle the limit offset problem
5008
      if (pQuery->numOfFilterCols == 0 && pQuery->limit.offset > 0) {
5009 5010
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
          pQInfo->tableIndex++;
5011 5012 5013
          continue;
        }
      }
5014

5015
      scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
5016
      skipResults(pRuntimeEnv);
5017

5018
      // the limitation of output result is reached, set the query completed
5019
      if (limitResults(pRuntimeEnv)) {
H
Haojun Liao 已提交
5020
        SET_STABLE_QUERY_OVER(pQInfo);
5021 5022
        break;
      }
5023

5024 5025
      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
5026

5027
      if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
5028 5029 5030 5031 5032 5033
        /*
         * query range is identical in terms of all meters involved in query,
         * so we need to restore them at the *beginning* of query on each meter,
         * not the consecutive query on meter on which is aborted due to buffer limitation
         * to ensure that, we can reset the query range once query on a meter is completed.
         */
5034
        pQInfo->tableIndex++;
weixin_48148422's avatar
weixin_48148422 已提交
5035

H
Haojun Liao 已提交
5036
        STableIdInfo tidInfo = {0};
5037

H
Haojun Liao 已提交
5038 5039 5040
        STableId* id = TSDB_TABLEID(pQuery->current->pTable);
        tidInfo.uid = id->uid;
        tidInfo.tid = id->tid;
weixin_48148422's avatar
weixin_48148422 已提交
5041
        tidInfo.key = pQuery->current->lastKey;
weixin_48148422's avatar
weixin_48148422 已提交
5042 5043
        taosArrayPush(pQInfo->arrTableIdInfo, &tidInfo);

5044
        // if the buffer is full or group by each table, we need to jump out of the loop
H
Haojun Liao 已提交
5045
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
5046 5047
          break;
        }
5048

5049
      } else {
5050
        // all data in the result buffer are skipped due to the offset, continue to retrieve data from current meter
5051 5052
        if (pQuery->rec.rows == 0) {
          assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
5053 5054
          continue;
        } else {
5055 5056 5057
          // buffer is full, wait for the next round to retrieve data from current meter
          assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
          break;
5058 5059 5060
        }
      }
    }
H
Haojun Liao 已提交
5061

5062
    if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
H
Haojun Liao 已提交
5063 5064
      setQueryStatus(pQuery, QUERY_COMPLETED);
    }
5065
  }
5066

5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078
  /*
   * 1. super table projection query, group-by on normal columns query, ts-comp query
   * 2. point interpolation query, last row query
   *
   * group-by on normal columns query and last_row query do NOT invoke the finalizer here,
   * since the finalize stage will be done at the client side.
   *
   * projection query, point interpolation query do not need the finalizer.
   *
   * Only the ts-comp query requires the finalizer function to be executed here.
   */
  if (isTSCompQuery(pQuery)) {
H
hjxilinx 已提交
5079
    finalizeQueryResult(pRuntimeEnv);
5080
  }
5081

5082 5083 5084
  if (pRuntimeEnv->pTSBuf != NULL) {
    pRuntimeEnv->cur = pRuntimeEnv->pTSBuf->cur;
  }
5085

5086
  qDebug(
S
TD-1530  
Shengliang Guan 已提交
5087 5088
      "QInfo %p numOfTables:%" PRIu64 ", index:%d, numOfGroups:%" PRIzu ", %" PRId64 " points returned, total:%" PRId64 ", offset:%" PRId64,
      pQInfo, (uint64_t)pQInfo->tableqinfoGroupInfo.numOfTables, pQInfo->tableIndex, numOfGroups, pQuery->rec.rows, pQuery->rec.total,
5089
      pQuery->limit.offset);
5090 5091
}

5092 5093 5094 5095
static void doSaveContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

5096 5097 5098
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
  SWITCH_ORDER(pQuery->order.order);
5099

5100
  if (pRuntimeEnv->pTSBuf != NULL) {
5101
    pRuntimeEnv->pTSBuf->cur.order = pQuery->order.order;
5102
  }
5103

5104 5105 5106 5107 5108
  STsdbQueryCond cond = {
      .order   = pQuery->order.order,
      .colList = pQuery->colList,
      .numOfCols = pQuery->numOfCols,
  };
H
Haojun Liao 已提交
5109

S
TD-1057  
Shengliang Guan 已提交
5110 5111
  TIME_WINDOW_COPY(cond.twindow, pQuery->window);

5112 5113 5114 5115
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
H
Haojun Liao 已提交
5116

H
Haojun Liao 已提交
5117 5118 5119 5120 5121
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
  disableFuncInReverseScan(pQInfo);
  setupQueryRangeForReverseScan(pQInfo);

H
Haojun Liao 已提交
5122
  pRuntimeEnv->prevGroupId = INT32_MIN;
5123
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
B
Bomin Zhang 已提交
5124 5125 5126
  if (pRuntimeEnv->pSecQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
H
hjxilinx 已提交
5127 5128
}

5129 5130 5131 5132
static void doRestoreContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

H
hjxilinx 已提交
5133
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
5134

5135
  if (pRuntimeEnv->pTSBuf != NULL) {
5136
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
5137
  }
5138

5139
  switchCtxOrder(pRuntimeEnv);
5140 5141 5142
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
}

5143 5144 5145
static void doCloseAllTimeWindowAfterScan(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
Haojun Liao 已提交
5146
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
H
Haojun Liao 已提交
5147
    size_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
5148
    for (int32_t i = 0; i < numOfGroup; ++i) {
H
Haojun Liao 已提交
5149
      SArray *group = GET_TABLEGROUP(pQInfo, i);
5150

5151
      size_t num = taosArrayGetSize(group);
5152
      for (int32_t j = 0; j < num; ++j) {
5153 5154
        STableQueryInfo* item = taosArrayGetP(group, j);
        closeAllTimeWindow(&item->windowResInfo);
5155
      }
H
hjxilinx 已提交
5156 5157 5158 5159 5160 5161 5162
    }
  } else {  // close results for group result
    closeAllTimeWindow(&pQInfo->runtimeEnv.windowResInfo);
  }
}

static void multiTableQueryProcess(SQInfo *pQInfo) {
5163 5164 5165
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

5166
  if (pQInfo->groupIndex > 0) {
5167
    /*
5168
     * if the groupIndex > 0, the query process must be completed yet, we only need to
5169 5170
     * copy the data into output buffer
     */
H
Haojun Liao 已提交
5171
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
5172 5173
      copyResToQueryResultBuf(pQInfo, pQuery);
#ifdef _DEBUG_VIEW
5174
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
5175 5176
#endif
    } else {
5177
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5178
    }
5179

5180
    qDebug("QInfo:%p current:%"PRId64", total:%"PRId64"", pQInfo, pQuery->rec.rows, pQuery->rec.total);
5181 5182
    return;
  }
5183

5184
  qDebug("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", order:%d, forward scan start", pQInfo,
5185 5186
         pQuery->window.skey, pQuery->window.ekey, pQuery->order.order);

H
hjxilinx 已提交
5187
  // do check all qualified data blocks
H
Haojun Liao 已提交
5188
  int64_t el = scanMultiTableDataBlocks(pQInfo);
5189
  qDebug("QInfo:%p master scan completed, elapsed time: %" PRId64 "ms, reverse scan start", pQInfo, el);
5190

H
hjxilinx 已提交
5191
  // query error occurred or query is killed, abort current execution
H
Haojun Liao 已提交
5192
  if (pQInfo->code != TSDB_CODE_SUCCESS || IS_QUERY_KILLED(pQInfo)) {
5193
    qDebug("QInfo:%p query killed or error occurred, code:%s, abort", pQInfo, tstrerror(pQInfo->code));
H
Haojun Liao 已提交
5194 5195
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
5196
  }
5197

H
hjxilinx 已提交
5198 5199
  // close all time window results
  doCloseAllTimeWindowAfterScan(pQInfo);
5200

H
hjxilinx 已提交
5201 5202
  if (needReverseScan(pQuery)) {
    doSaveContext(pQInfo);
5203

H
Haojun Liao 已提交
5204
    el = scanMultiTableDataBlocks(pQInfo);
5205
    qDebug("QInfo:%p reversed scan completed, elapsed time: %" PRId64 "ms", pQInfo, el);
5206

H
Haojun Liao 已提交
5207
    doRestoreContext(pQInfo);
H
hjxilinx 已提交
5208
  } else {
5209
    qDebug("QInfo:%p no need to do reversed scan, query completed", pQInfo);
5210
  }
5211

5212
  setQueryStatus(pQuery, QUERY_COMPLETED);
5213

H
Haojun Liao 已提交
5214
  if (pQInfo->code != TSDB_CODE_SUCCESS || IS_QUERY_KILLED(pQInfo)) {
5215
    qDebug("QInfo:%p query killed or error occurred, code:%s, abort", pQInfo, tstrerror(pQInfo->code));
H
Haojun Liao 已提交
5216 5217
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
H
hjxilinx 已提交
5218
  }
5219

H
Haojun Liao 已提交
5220
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || isSumAvgRateQuery(pQuery)) {
5221
    if (mergeIntoGroupResult(pQInfo) == TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
5222
      copyResToQueryResultBuf(pQInfo, pQuery);
5223 5224

#ifdef _DEBUG_VIEW
5225
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
5226 5227 5228
#endif
    }
  } else {  // not a interval query
5229
    copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5230
  }
5231

5232
  // handle the limitation of output buffer
5233
  qDebug("QInfo:%p points returned:%" PRId64 ", total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
5234 5235 5236 5237 5238 5239 5240 5241
}

/*
 * in each query, this function will be called only once, no retry for further result.
 *
 * select count(*)/top(field,k)/avg(field name) from table_name [where ts>now-1a];
 * select count(*) from table_name group by status_column;
 */
H
hjxilinx 已提交
5242
static void tableFixedOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
5243
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
5244

H
hjxilinx 已提交
5245
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
5246
  if (!pRuntimeEnv->topBotQuery && pQuery->limit.offset > 0) {  // no need to execute, since the output will be ignore.
H
Haojun Liao 已提交
5247 5248
    return;
  }
5249

H
hjxilinx 已提交
5250
  pQuery->current = pTableInfo;  // set current query table info
5251

5252
  scanOneTableDataBlocks(pRuntimeEnv, pTableInfo->lastKey);
H
hjxilinx 已提交
5253
  finalizeQueryResult(pRuntimeEnv);
5254

H
Haojun Liao 已提交
5255
  if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
5256 5257
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
5258
  }
5259

H
Haojun Liao 已提交
5260
  // since the numOfRows must be identical for all sql functions that are allowed to be executed simutaneously.
5261
  pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
5262

5263
  skipResults(pRuntimeEnv);
5264
  limitResults(pRuntimeEnv);
5265 5266
}

H
hjxilinx 已提交
5267
static void tableMultiOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
5268
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
5269

H
hjxilinx 已提交
5270 5271
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
5272

5273 5274 5275 5276
  // for ts_comp query, re-initialized is not allowed
  if (!isTSCompQuery(pQuery)) {
    resetCtxOutputBuf(pRuntimeEnv);
  }
5277

5278 5279 5280 5281 5282 5283
  // skip blocks without load the actual data block from file if no filter condition present
  skipBlocks(&pQInfo->runtimeEnv);
  if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return;
  }
5284 5285

  while (1) {
5286
    scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
H
hjxilinx 已提交
5287
    finalizeQueryResult(pRuntimeEnv);
5288

5289 5290
    pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
    if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols > 0 && pQuery->rec.rows > 0) {
5291
      skipResults(pRuntimeEnv);
5292 5293 5294
    }

    /*
H
hjxilinx 已提交
5295 5296
     * 1. if pQuery->size == 0, pQuery->limit.offset >= 0, still need to check data
     * 2. if pQuery->size > 0, pQuery->limit.offset must be 0
5297
     */
5298
    if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
5299 5300 5301
      break;
    }

5302
    qDebug("QInfo:%p skip current result, offset:%" PRId64 ", next qrange:%" PRId64 "-%" PRId64,
B
Bomin Zhang 已提交
5303
           pQInfo, pQuery->limit.offset, pQuery->current->lastKey, pQuery->current->win.ekey);
5304 5305 5306 5307

    resetCtxOutputBuf(pRuntimeEnv);
  }

5308
  limitResults(pRuntimeEnv);
5309
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
5310
    qDebug("QInfo:%p query paused due to output limitation, next qrange:%" PRId64 "-%" PRId64, pQInfo,
H
hjxilinx 已提交
5311
        pQuery->current->lastKey, pQuery->window.ekey);
weixin_48148422's avatar
weixin_48148422 已提交
5312 5313
  } else if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
    STableIdInfo tidInfo;
B
Bomin Zhang 已提交
5314
    STableId* id = TSDB_TABLEID(pQuery->current->pTable);
5315

H
Haojun Liao 已提交
5316 5317
    tidInfo.uid = id->uid;
    tidInfo.tid = id->tid;
weixin_48148422's avatar
weixin_48148422 已提交
5318 5319
    tidInfo.key = pQuery->current->lastKey;
    taosArrayPush(pQInfo->arrTableIdInfo, &tidInfo);
5320 5321
  }

5322 5323 5324
  if (!isTSCompQuery(pQuery)) {
    assert(pQuery->rec.rows <= pQuery->rec.capacity);
  }
5325 5326
}

H
Haojun Liao 已提交
5327
static void tableIntervalProcessImpl(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
5328
  SQuery *pQuery = pRuntimeEnv->pQuery;
5329

5330
  while (1) {
5331
    scanOneTableDataBlocks(pRuntimeEnv, start);
5332

5333
    assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_NOT_COMPLETED));
H
hjxilinx 已提交
5334
    finalizeQueryResult(pRuntimeEnv);
5335

5336 5337 5338
    // here we can ignore the records in case of no interpolation
    // todo handle offset, in case of top/bottom interval query
    if ((pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL) && pQuery->limit.offset > 0 &&
5339
        pQuery->fillType == TSDB_FILL_NONE) {
5340 5341
      // maxOutput <= 0, means current query does not generate any results
      int32_t numOfClosed = numOfClosedTimeWindow(&pRuntimeEnv->windowResInfo);
5342

S
TD-1057  
Shengliang Guan 已提交
5343
      int32_t c = (int32_t)(MIN(numOfClosed, pQuery->limit.offset));
5344 5345 5346
      clearFirstNTimeWindow(pRuntimeEnv, c);
      pQuery->limit.offset -= c;
    }
5347

5348
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED | QUERY_RESBUF_FULL)) {
5349 5350 5351 5352 5353
      break;
    }
  }
}

5354
// handle time interval query on table
H
hjxilinx 已提交
5355
static void tableIntervalProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
5356 5357
  SQueryRuntimeEnv *pRuntimeEnv = &(pQInfo->runtimeEnv);

H
hjxilinx 已提交
5358 5359
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
5360

H
Haojun Liao 已提交
5361
  int32_t numOfFilled = 0;
H
Haojun Liao 已提交
5362
  TSKEY newStartKey = TSKEY_INITIAL_VAL;
5363

5364
  // skip blocks without load the actual data block from file if no filter condition present
H
Haojun Liao 已提交
5365
  skipTimeInterval(pRuntimeEnv, &newStartKey);
5366
  if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0 && pRuntimeEnv->pFillInfo == NULL) {
5367 5368 5369 5370
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return;
  }

5371
  while (1) {
H
Haojun Liao 已提交
5372
    tableIntervalProcessImpl(pRuntimeEnv, newStartKey);
5373

H
Haojun Liao 已提交
5374
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
5375
      pQInfo->groupIndex = 0;  // always start from 0
5376
      pQuery->rec.rows = 0;
5377
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5378

5379
      clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
5380
    }
5381

5382
    // the offset is handled at prepare stage if no interpolation involved
5383
    if (pQuery->fillType == TSDB_FILL_NONE || pQuery->rec.rows == 0) {
5384
      limitResults(pRuntimeEnv);
5385 5386
      break;
    } else {
S
TD-1057  
Shengliang Guan 已提交
5387
      taosFillSetStartInfo(pRuntimeEnv->pFillInfo, (int32_t)pQuery->rec.rows, pQuery->window.ekey);
5388
      taosFillCopyInputDataFromFilePage(pRuntimeEnv->pFillInfo, (tFilePage**) pQuery->sdata);
H
Haojun Liao 已提交
5389
      numOfFilled = 0;
5390

H
Haojun Liao 已提交
5391
      pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, &numOfFilled);
5392
      if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
5393
        limitResults(pRuntimeEnv);
5394 5395
        break;
      }
5396

5397
      // no result generated yet, continue retrieve data
5398
      pQuery->rec.rows = 0;
5399 5400
    }
  }
5401

5402
  // all data scanned, the group by normal column can return
H
Haojun Liao 已提交
5403
  if (pRuntimeEnv->groupbyNormalCol) {  // todo refactor with merge interval time result
5404
    pQInfo->groupIndex = 0;
5405
    pQuery->rec.rows = 0;
5406
    copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5407
    clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
5408 5409 5410
  }
}

5411 5412 5413 5414
static void tableQueryImpl(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

H
Haojun Liao 已提交
5415
  if (queryHasRemainResForTableQuery(pRuntimeEnv)) {
5416

H
Haojun Liao 已提交
5417 5418 5419 5420 5421 5422 5423 5424 5425 5426 5427 5428
    if (pQuery->fillType != TSDB_FILL_NONE) {
      /*
       * There are remain results that are not returned due to result interpolation
       * So, we do keep in this procedure instead of launching retrieve procedure for next results.
       */
      int32_t numOfFilled = 0;
      pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, &numOfFilled);

      if (pQuery->rec.rows > 0) {
        limitResults(pRuntimeEnv);
      }

H
Haojun Liao 已提交
5429
      qDebug("QInfo:%p current:%" PRId64 " returned, total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total);
5430
      return;
H
Haojun Liao 已提交
5431
    } else {
5432
      pQuery->rec.rows = 0;
5433
      pQInfo->groupIndex = 0;  // always start from 0
5434

5435
      if (pRuntimeEnv->windowResInfo.size > 0) {
5436
        copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5437
        clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
5438

5439
        if (pQuery->rec.rows > 0) {
5440
          qDebug("QInfo:%p %"PRId64" rows returned from group results, total:%"PRId64"", pQInfo, pQuery->rec.rows, pQuery->rec.total);
H
Haojun Liao 已提交
5441 5442 5443

          // there are not data remains
          if (pRuntimeEnv->windowResInfo.size <= 0) {
H
Haojun Liao 已提交
5444
            qDebug("QInfo:%p query over, %"PRId64" rows are returned", pQInfo, pQuery->rec.total);
H
Haojun Liao 已提交
5445 5446
          }

5447 5448 5449 5450 5451
          return;
        }
      }
    }
  }
5452

H
hjxilinx 已提交
5453
  // number of points returned during this query
5454
  pQuery->rec.rows = 0;
5455
  int64_t st = taosGetTimestampUs();
5456

5457
  assert(pQInfo->tableqinfoGroupInfo.numOfTables == 1);
H
Haojun Liao 已提交
5458
  SArray* g = GET_TABLEGROUP(pQInfo, 0);
5459
  STableQueryInfo* item = taosArrayGetP(g, 0);
5460

5461
  // group by normal column, sliding window query, interval query are handled by interval query processor
H
Haojun Liao 已提交
5462
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyNormalCol) {  // interval (down sampling operation)
5463
    tableIntervalProcess(pQInfo, item);
H
Haojun Liao 已提交
5464
  } else if (isFixedOutputQuery(pRuntimeEnv)) {
5465
    tableFixedOutputProcess(pQInfo, item);
5466 5467
  } else {  // diff/add/multiply/subtract/division
    assert(pQuery->checkBuffer == 1);
5468
    tableMultiOutputProcess(pQInfo, item);
5469
  }
5470

5471
  // record the total elapsed time
5472
  pRuntimeEnv->summary.elapsedTime += (taosGetTimestampUs() - st);
5473
  assert(pQInfo->tableqinfoGroupInfo.numOfTables == 1);
5474 5475
}

5476
static void stableQueryImpl(SQInfo *pQInfo) {
H
Haojun Liao 已提交
5477 5478
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *pQuery = pRuntimeEnv->pQuery;
5479
  pQuery->rec.rows = 0;
5480

5481
  int64_t st = taosGetTimestampUs();
5482

H
Haojun Liao 已提交
5483
  if (QUERY_IS_INTERVAL_QUERY(pQuery) ||
5484
      (isFixedOutputQuery(pRuntimeEnv) && (!isPointInterpoQuery(pQuery)) && (!pRuntimeEnv->groupbyNormalCol))) {
H
hjxilinx 已提交
5485
    multiTableQueryProcess(pQInfo);
5486
  } else {
5487
    assert((pQuery->checkBuffer == 1 && pQuery->interval.interval == 0) || isPointInterpoQuery(pQuery) ||
H
Haojun Liao 已提交
5488
            isFirstLastRowQuery(pQuery) || pRuntimeEnv->groupbyNormalCol);
5489

5490
    sequentialTableProcess(pQInfo);
5491
  }
5492

H
hjxilinx 已提交
5493
  // record the total elapsed time
5494
  pQInfo->runtimeEnv.summary.elapsedTime += (taosGetTimestampUs() - st);
H
hjxilinx 已提交
5495 5496
}

5497
static int32_t getColumnIndexInSource(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
5498
  int32_t j = 0;
5499

5500
  if (TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) {
H
Haojun Liao 已提交
5501
    if (pExprMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
5502
      return TSDB_TBNAME_COLUMN_INDEX;
H
Haojun Liao 已提交
5503 5504
    }

5505 5506 5507 5508
    while(j < pQueryMsg->numOfTags) {
      if (pExprMsg->colInfo.colId == pTagCols[j].colId) {
        return j;
      }
5509

5510 5511
      j += 1;
    }
5512

Y
TD-1230  
yihaoDeng 已提交
5513
  } else if (TSDB_COL_IS_UD_COL(pExprMsg->colInfo.flag)) {  // user specified column data
H
Haojun Liao 已提交
5514
    return TSDB_UD_COLUMN_INDEX;
5515 5516 5517 5518 5519
  } else {
    while (j < pQueryMsg->numOfCols) {
      if (pExprMsg->colInfo.colId == pQueryMsg->colList[j].colId) {
        return j;
      }
5520

5521
      j += 1;
5522 5523
    }
  }
5524
  assert(0);
5525
  return -1;
5526 5527
}

5528 5529 5530
bool validateExprColumnInfo(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
  int32_t j = getColumnIndexInSource(pQueryMsg, pExprMsg, pTagCols);
  return j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags;
5531 5532
}

5533
static bool validateQueryMsg(SQueryTableMsg *pQueryMsg) {
5534 5535
  if (pQueryMsg->interval.interval < 0) {
    qError("qmsg:%p illegal value of interval time %" PRId64, pQueryMsg, pQueryMsg->interval.interval);
5536
    return false;
5537 5538
  }

H
hjxilinx 已提交
5539
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
5540
    qError("qmsg:%p illegal value of numOfTables %d", pQueryMsg, pQueryMsg->numOfTables);
5541
    return false;
5542 5543
  }

H
hjxilinx 已提交
5544
  if (pQueryMsg->numOfGroupCols < 0) {
S
slguan 已提交
5545
    qError("qmsg:%p illegal value of numOfGroupbyCols %d", pQueryMsg, pQueryMsg->numOfGroupCols);
5546
    return false;
5547 5548
  }

5549 5550
  if (pQueryMsg->numOfOutput > TSDB_MAX_COLUMNS || pQueryMsg->numOfOutput <= 0) {
    qError("qmsg:%p illegal value of output columns %d", pQueryMsg, pQueryMsg->numOfOutput);
5551
    return false;
5552 5553
  }

5554 5555 5556 5557 5558 5559 5560 5561 5562 5563
  return true;
}

static bool validateQuerySourceCols(SQueryTableMsg *pQueryMsg, SSqlFuncMsg** pExprMsg) {
  int32_t numOfTotal = pQueryMsg->numOfCols + pQueryMsg->numOfTags;
  if (pQueryMsg->numOfCols < 0 || pQueryMsg->numOfTags < 0 || numOfTotal > TSDB_MAX_COLUMNS) {
    qError("qmsg:%p illegal value of numOfCols %d numOfTags:%d", pQueryMsg, pQueryMsg->numOfCols, pQueryMsg->numOfTags);
    return false;
  } else if (numOfTotal == 0) {
    for(int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
H
Haojun Liao 已提交
5564 5565 5566 5567 5568
      SSqlFuncMsg* pFuncMsg = pExprMsg[i];

      if ((pFuncMsg->functionId == TSDB_FUNC_TAGPRJ) ||
          (pFuncMsg->functionId == TSDB_FUNC_TID_TAG && pFuncMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) ||
          (pFuncMsg->functionId == TSDB_FUNC_COUNT && pFuncMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX)) {
5569
        continue;
5570
      }
5571

5572
      return false;
5573 5574
    }
  }
5575

5576
  return true;
5577 5578
}

5579
static char *createTableIdList(SQueryTableMsg *pQueryMsg, char *pMsg, SArray **pTableIdList) {
H
hjxilinx 已提交
5580
  assert(pQueryMsg->numOfTables > 0);
5581

weixin_48148422's avatar
weixin_48148422 已提交
5582
  *pTableIdList = taosArrayInit(pQueryMsg->numOfTables, sizeof(STableIdInfo));
5583

weixin_48148422's avatar
weixin_48148422 已提交
5584 5585
  for (int32_t j = 0; j < pQueryMsg->numOfTables; ++j) {
    STableIdInfo* pTableIdInfo = (STableIdInfo *)pMsg;
5586

5587
    pTableIdInfo->tid = htonl(pTableIdInfo->tid);
H
hjxilinx 已提交
5588 5589
    pTableIdInfo->uid = htobe64(pTableIdInfo->uid);
    pTableIdInfo->key = htobe64(pTableIdInfo->key);
5590

H
hjxilinx 已提交
5591 5592 5593
    taosArrayPush(*pTableIdList, pTableIdInfo);
    pMsg += sizeof(STableIdInfo);
  }
5594

H
hjxilinx 已提交
5595 5596
  return pMsg;
}
5597

5598
/**
H
hjxilinx 已提交
5599
 * pQueryMsg->head has been converted before this function is called.
5600
 *
H
hjxilinx 已提交
5601
 * @param pQueryMsg
5602 5603 5604 5605
 * @param pTableIdList
 * @param pExpr
 * @return
 */
5606
static int32_t convertQueryMsg(SQueryTableMsg *pQueryMsg, SArray **pTableIdList, SSqlFuncMsg ***pExpr,
weixin_48148422's avatar
weixin_48148422 已提交
5607
                               char **tagCond, char** tbnameCond, SColIndex **groupbyCols, SColumnInfo** tagCols) {
5608 5609
  int32_t code = TSDB_CODE_SUCCESS;

5610 5611 5612 5613
  pQueryMsg->numOfTables = htonl(pQueryMsg->numOfTables);

  pQueryMsg->window.skey = htobe64(pQueryMsg->window.skey);
  pQueryMsg->window.ekey = htobe64(pQueryMsg->window.ekey);
5614 5615 5616 5617 5618 5619
  pQueryMsg->interval.interval = htobe64(pQueryMsg->interval.interval);
  pQueryMsg->interval.sliding = htobe64(pQueryMsg->interval.sliding);
  pQueryMsg->interval.offset = htobe64(pQueryMsg->interval.offset);
  pQueryMsg->interval.intervalUnit = pQueryMsg->interval.intervalUnit;
  pQueryMsg->interval.slidingUnit = pQueryMsg->interval.slidingUnit;
  pQueryMsg->interval.offsetUnit = pQueryMsg->interval.offsetUnit;
5620 5621
  pQueryMsg->limit = htobe64(pQueryMsg->limit);
  pQueryMsg->offset = htobe64(pQueryMsg->offset);
H
hjxilinx 已提交
5622

5623 5624
  pQueryMsg->order = htons(pQueryMsg->order);
  pQueryMsg->orderColId = htons(pQueryMsg->orderColId);
H
Haojun Liao 已提交
5625
  pQueryMsg->queryType = htonl(pQueryMsg->queryType);
weixin_48148422's avatar
weixin_48148422 已提交
5626
  pQueryMsg->tagNameRelType = htons(pQueryMsg->tagNameRelType);
5627 5628

  pQueryMsg->numOfCols = htons(pQueryMsg->numOfCols);
5629
  pQueryMsg->numOfOutput = htons(pQueryMsg->numOfOutput);
H
hjxilinx 已提交
5630
  pQueryMsg->numOfGroupCols = htons(pQueryMsg->numOfGroupCols);
5631 5632 5633
  pQueryMsg->tagCondLen = htons(pQueryMsg->tagCondLen);
  pQueryMsg->tsOffset = htonl(pQueryMsg->tsOffset);
  pQueryMsg->tsLen = htonl(pQueryMsg->tsLen);
H
hjxilinx 已提交
5634
  pQueryMsg->tsNumOfBlocks = htonl(pQueryMsg->tsNumOfBlocks);
5635
  pQueryMsg->tsOrder = htonl(pQueryMsg->tsOrder);
5636
  pQueryMsg->numOfTags = htonl(pQueryMsg->numOfTags);
5637

5638
  // query msg safety check
5639
  if (!validateQueryMsg(pQueryMsg)) {
5640 5641
    code = TSDB_CODE_QRY_INVALID_MSG;
    goto _cleanup;
5642 5643
  }

H
hjxilinx 已提交
5644 5645
  char *pMsg = (char *)(pQueryMsg->colList) + sizeof(SColumnInfo) * pQueryMsg->numOfCols;
  for (int32_t col = 0; col < pQueryMsg->numOfCols; ++col) {
5646 5647
    SColumnInfo *pColInfo = &pQueryMsg->colList[col];

H
hjxilinx 已提交
5648
    pColInfo->colId = htons(pColInfo->colId);
5649
    pColInfo->type = htons(pColInfo->type);
H
hjxilinx 已提交
5650 5651
    pColInfo->bytes = htons(pColInfo->bytes);
    pColInfo->numOfFilters = htons(pColInfo->numOfFilters);
5652

H
hjxilinx 已提交
5653
    assert(pColInfo->type >= TSDB_DATA_TYPE_BOOL && pColInfo->type <= TSDB_DATA_TYPE_NCHAR);
5654

H
hjxilinx 已提交
5655
    int32_t numOfFilters = pColInfo->numOfFilters;
5656
    if (numOfFilters > 0) {
H
hjxilinx 已提交
5657
      pColInfo->filters = calloc(numOfFilters, sizeof(SColumnFilterInfo));
H
Haojun Liao 已提交
5658 5659 5660 5661
      if (pColInfo->filters == NULL) {
        code = TSDB_CODE_QRY_OUT_OF_MEMORY;
        goto _cleanup;
      }
5662 5663 5664
    }

    for (int32_t f = 0; f < numOfFilters; ++f) {
5665
      SColumnFilterInfo *pFilterMsg = (SColumnFilterInfo *)pMsg;
5666

5667 5668
      SColumnFilterInfo *pColFilter = &pColInfo->filters[f];
      pColFilter->filterstr = htons(pFilterMsg->filterstr);
5669 5670 5671

      pMsg += sizeof(SColumnFilterInfo);

5672 5673
      if (pColFilter->filterstr) {
        pColFilter->len = htobe64(pFilterMsg->len);
5674

5675
        pColFilter->pz = (int64_t)calloc(1, (size_t)(pColFilter->len + 1 * TSDB_NCHAR_SIZE)); // note: null-terminator
H
Haojun Liao 已提交
5676 5677 5678 5679 5680
        if (pColFilter->pz == 0) {
          code = TSDB_CODE_QRY_OUT_OF_MEMORY;
          goto _cleanup;
        }

5681
        memcpy((void *)pColFilter->pz, pMsg, (size_t)pColFilter->len);
5682
        pMsg += (pColFilter->len + 1);
5683
      } else {
5684 5685
        pColFilter->lowerBndi = htobe64(pFilterMsg->lowerBndi);
        pColFilter->upperBndi = htobe64(pFilterMsg->upperBndi);
5686 5687
      }

5688 5689
      pColFilter->lowerRelOptr = htons(pFilterMsg->lowerRelOptr);
      pColFilter->upperRelOptr = htons(pFilterMsg->upperRelOptr);
5690 5691 5692
    }
  }

5693
  *pExpr = calloc(pQueryMsg->numOfOutput, POINTER_BYTES);
H
Haojun Liao 已提交
5694 5695 5696 5697 5698
  if (*pExpr == NULL) {
    code = TSDB_CODE_QRY_OUT_OF_MEMORY;
    goto _cleanup;
  }

5699
  SSqlFuncMsg *pExprMsg = (SSqlFuncMsg *)pMsg;
5700

5701
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5702
    (*pExpr)[i] = pExprMsg;
5703

5704
    pExprMsg->colInfo.colIndex = htons(pExprMsg->colInfo.colIndex);
5705 5706 5707 5708
    pExprMsg->colInfo.colId = htons(pExprMsg->colInfo.colId);
    pExprMsg->colInfo.flag = htons(pExprMsg->colInfo.flag);
    pExprMsg->functionId = htons(pExprMsg->functionId);
    pExprMsg->numOfParams = htons(pExprMsg->numOfParams);
5709

5710
    pMsg += sizeof(SSqlFuncMsg);
5711 5712

    for (int32_t j = 0; j < pExprMsg->numOfParams; ++j) {
5713
      pExprMsg->arg[j].argType = htons(pExprMsg->arg[j].argType);
5714 5715 5716 5717
      pExprMsg->arg[j].argBytes = htons(pExprMsg->arg[j].argBytes);

      if (pExprMsg->arg[j].argType == TSDB_DATA_TYPE_BINARY) {
        pExprMsg->arg[j].argValue.pz = pMsg;
5718
        pMsg += pExprMsg->arg[j].argBytes;  // one more for the string terminated char.
5719 5720 5721 5722 5723
      } else {
        pExprMsg->arg[j].argValue.i64 = htobe64(pExprMsg->arg[j].argValue.i64);
      }
    }

H
Haojun Liao 已提交
5724 5725
    int16_t functionId = pExprMsg->functionId;
    if (functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ || functionId == TSDB_FUNC_TAG_DUMMY) {
Y
TD-1230  
yihaoDeng 已提交
5726
      if (!TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) {  // ignore the column  index check for arithmetic expression.
5727 5728
        code = TSDB_CODE_QRY_INVALID_MSG;
        goto _cleanup;
5729 5730
      }
    } else {
5731
//      if (!validateExprColumnInfo(pQueryMsg, pExprMsg)) {
5732
//        return TSDB_CODE_QRY_INVALID_MSG;
5733
//      }
5734 5735
    }

5736
    pExprMsg = (SSqlFuncMsg *)pMsg;
5737
  }
5738

5739
  if (!validateQuerySourceCols(pQueryMsg, *pExpr)) {
5740
    code = TSDB_CODE_QRY_INVALID_MSG;
dengyihao's avatar
dengyihao 已提交
5741
    goto _cleanup;
5742
  }
5743

H
hjxilinx 已提交
5744
  pMsg = createTableIdList(pQueryMsg, pMsg, pTableIdList);
5745

H
hjxilinx 已提交
5746
  if (pQueryMsg->numOfGroupCols > 0) {  // group by tag columns
5747
    *groupbyCols = malloc(pQueryMsg->numOfGroupCols * sizeof(SColIndex));
5748 5749 5750 5751
    if (*groupbyCols == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }
5752 5753 5754

    for (int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
      (*groupbyCols)[i].colId = *(int16_t *)pMsg;
5755
      pMsg += sizeof((*groupbyCols)[i].colId);
5756 5757

      (*groupbyCols)[i].colIndex = *(int16_t *)pMsg;
5758 5759
      pMsg += sizeof((*groupbyCols)[i].colIndex);

5760
      (*groupbyCols)[i].flag = *(int16_t *)pMsg;
5761 5762 5763 5764 5765
      pMsg += sizeof((*groupbyCols)[i].flag);

      memcpy((*groupbyCols)[i].name, pMsg, tListLen(groupbyCols[i]->name));
      pMsg += tListLen((*groupbyCols)[i].name);
    }
5766

H
hjxilinx 已提交
5767 5768
    pQueryMsg->orderByIdx = htons(pQueryMsg->orderByIdx);
    pQueryMsg->orderType = htons(pQueryMsg->orderType);
5769 5770
  }

5771 5772
  pQueryMsg->fillType = htons(pQueryMsg->fillType);
  if (pQueryMsg->fillType != TSDB_FILL_NONE) {
5773
    pQueryMsg->fillVal = (uint64_t)(pMsg);
5774 5775

    int64_t *v = (int64_t *)pMsg;
5776
    for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5777 5778
      v[i] = htobe64(v[i]);
    }
5779

5780
    pMsg += sizeof(int64_t) * pQueryMsg->numOfOutput;
5781
  }
5782

5783 5784
  if (pQueryMsg->numOfTags > 0) {
    (*tagCols) = calloc(1, sizeof(SColumnInfo) * pQueryMsg->numOfTags);
H
Haojun Liao 已提交
5785 5786 5787 5788 5789
    if (*tagCols == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }

5790 5791
    for (int32_t i = 0; i < pQueryMsg->numOfTags; ++i) {
      SColumnInfo* pTagCol = (SColumnInfo*) pMsg;
5792

5793 5794 5795 5796
      pTagCol->colId = htons(pTagCol->colId);
      pTagCol->bytes = htons(pTagCol->bytes);
      pTagCol->type  = htons(pTagCol->type);
      pTagCol->numOfFilters = 0;
5797

5798
      (*tagCols)[i] = *pTagCol;
5799
      pMsg += sizeof(SColumnInfo);
5800
    }
H
hjxilinx 已提交
5801
  }
5802

5803 5804 5805
  // the tag query condition expression string is located at the end of query msg
  if (pQueryMsg->tagCondLen > 0) {
    *tagCond = calloc(1, pQueryMsg->tagCondLen);
H
Haojun Liao 已提交
5806 5807 5808 5809 5810 5811

    if (*tagCond == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;

    }
5812 5813 5814
    memcpy(*tagCond, pMsg, pQueryMsg->tagCondLen);
    pMsg += pQueryMsg->tagCondLen;
  }
5815

weixin_48148422's avatar
weixin_48148422 已提交
5816
  if (*pMsg != 0) {
5817
    size_t len = strlen(pMsg) + 1;
5818

5819
    *tbnameCond = malloc(len);
5820 5821 5822 5823 5824
    if (*tbnameCond == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }

weixin_48148422's avatar
weixin_48148422 已提交
5825
    strcpy(*tbnameCond, pMsg);
5826
    pMsg += len;
weixin_48148422's avatar
weixin_48148422 已提交
5827
  }
5828

5829
  qDebug("qmsg:%p query %d tables, type:%d, qrange:%" PRId64 "-%" PRId64 ", numOfGroupbyTagCols:%d, order:%d, "
H
Haojun Liao 已提交
5830 5831
         "outputCols:%d, numOfCols:%d, interval:%" PRId64 ", fillType:%d, comptsLen:%d, compNumOfBlocks:%d, limit:%" PRId64 ", offset:%" PRId64,
         pQueryMsg, pQueryMsg->numOfTables, pQueryMsg->queryType, pQueryMsg->window.skey, pQueryMsg->window.ekey, pQueryMsg->numOfGroupCols,
5832
         pQueryMsg->order, pQueryMsg->numOfOutput, pQueryMsg->numOfCols, pQueryMsg->interval.interval,
H
Haojun Liao 已提交
5833
         pQueryMsg->fillType, pQueryMsg->tsLen, pQueryMsg->tsNumOfBlocks, pQueryMsg->limit, pQueryMsg->offset);
5834 5835

  return TSDB_CODE_SUCCESS;
dengyihao's avatar
dengyihao 已提交
5836 5837

_cleanup:
S
Shengliang Guan 已提交
5838
  taosTFree(*pExpr);
dengyihao's avatar
dengyihao 已提交
5839 5840
  taosArrayDestroy(*pTableIdList);
  *pTableIdList = NULL;
S
Shengliang Guan 已提交
5841 5842 5843 5844
  taosTFree(*tbnameCond);
  taosTFree(*groupbyCols);
  taosTFree(*tagCols);
  taosTFree(*tagCond);
5845 5846

  return code;
5847 5848
}

H
hjxilinx 已提交
5849
static int32_t buildAirthmeticExprFromMsg(SExprInfo *pArithExprInfo, SQueryTableMsg *pQueryMsg) {
5850
  qDebug("qmsg:%p create arithmetic expr from binary string: %s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz);
weixin_48148422's avatar
weixin_48148422 已提交
5851 5852

  tExprNode* pExprNode = NULL;
dengyihao's avatar
dengyihao 已提交
5853
  TRY(TSDB_MAX_TAG_CONDITIONS) {
weixin_48148422's avatar
weixin_48148422 已提交
5854 5855 5856
    pExprNode = exprTreeFromBinary(pArithExprInfo->base.arg[0].argValue.pz, pArithExprInfo->base.arg[0].argBytes);
  } CATCH( code ) {
    CLEANUP_EXECUTE();
dengyihao's avatar
TD-816  
dengyihao 已提交
5857
    qError("qmsg:%p failed to create arithmetic expression string from:%s, reason: %s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz, tstrerror(code));
weixin_48148422's avatar
weixin_48148422 已提交
5858 5859 5860
    return code;
  } END_TRY

H
hjxilinx 已提交
5861
  if (pExprNode == NULL) {
5862
    qError("qmsg:%p failed to create arithmetic expression string from:%s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz);
5863
    return TSDB_CODE_QRY_APP_ERROR;
5864
  }
5865

5866
  pArithExprInfo->pExpr = pExprNode;
5867 5868 5869
  return TSDB_CODE_SUCCESS;
}

H
Haojun Liao 已提交
5870
static int32_t createQFunctionExprFromMsg(SQueryTableMsg *pQueryMsg, SExprInfo **pExprInfo, SSqlFuncMsg **pExprMsg,
5871 5872
    SColumnInfo* pTagCols) {
  *pExprInfo = NULL;
H
hjxilinx 已提交
5873
  int32_t code = TSDB_CODE_SUCCESS;
5874

H
Haojun Liao 已提交
5875
  SExprInfo *pExprs = (SExprInfo *)calloc(pQueryMsg->numOfOutput, sizeof(SExprInfo));
5876
  if (pExprs == NULL) {
5877
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
5878 5879 5880 5881 5882
  }

  bool    isSuperTable = QUERY_IS_STABLE_QUERY(pQueryMsg->queryType);
  int16_t tagLen = 0;

5883
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5884
    pExprs[i].base = *pExprMsg[i];
5885
    pExprs[i].bytes = 0;
5886 5887 5888 5889

    int16_t type = 0;
    int16_t bytes = 0;

5890
    // parse the arithmetic expression
5891
    if (pExprs[i].base.functionId == TSDB_FUNC_ARITHM) {
5892
      code = buildAirthmeticExprFromMsg(&pExprs[i], pQueryMsg);
5893

5894
      if (code != TSDB_CODE_SUCCESS) {
S
Shengliang Guan 已提交
5895
        taosTFree(pExprs);
5896
        return code;
5897 5898
      }

5899
      type  = TSDB_DATA_TYPE_DOUBLE;
5900
      bytes = tDataTypeDesc[type].nSize;
H
Haojun Liao 已提交
5901
    } else if (pExprs[i].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX && pExprs[i].base.functionId == TSDB_FUNC_TAGPRJ) {  // parse the normal column
H
Haojun Liao 已提交
5902
      SSchema s = tGetTableNameColumnSchema();
H
Haojun Liao 已提交
5903
      type = s.type;
H
Haojun Liao 已提交
5904
      bytes = s.bytes;
5905 5906
    } else if (pExprs[i].base.colInfo.colId <= TSDB_UD_COLUMN_INDEX) {
      // it is a user-defined constant value column
H
Haojun Liao 已提交
5907 5908
      assert(pExprs[i].base.functionId == TSDB_FUNC_PRJ);

5909 5910
      type = pExprs[i].base.arg[1].argType;
      bytes = pExprs[i].base.arg[1].argBytes;
H
Haojun Liao 已提交
5911 5912 5913 5914 5915

      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        bytes += VARSTR_HEADER_SIZE;
      }
    } else {
5916
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
dengyihao's avatar
dengyihao 已提交
5917
      assert(j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags);
H
Haojun Liao 已提交
5918

dengyihao's avatar
dengyihao 已提交
5919
      if (pExprs[i].base.colInfo.colId != TSDB_TBNAME_COLUMN_INDEX && j >= 0) {
H
Haojun Liao 已提交
5920 5921 5922 5923
        SColumnInfo* pCol = (TSDB_COL_IS_TAG(pExprs[i].base.colInfo.flag))? &pTagCols[j]:&pQueryMsg->colList[j];
        type = pCol->type;
        bytes = pCol->bytes;
      } else {
H
Haojun Liao 已提交
5924
        SSchema s = tGetTableNameColumnSchema();
H
hjxilinx 已提交
5925

H
Haojun Liao 已提交
5926 5927 5928
        type  = s.type;
        bytes = s.bytes;
      }
5929 5930
    }

S
TD-1057  
Shengliang Guan 已提交
5931
    int32_t param = (int32_t)pExprs[i].base.arg[0].argValue.i64;
5932
    if (getResultDataInfo(type, bytes, pExprs[i].base.functionId, param, &pExprs[i].type, &pExprs[i].bytes,
5933
                          &pExprs[i].interBytes, 0, isSuperTable) != TSDB_CODE_SUCCESS) {
S
Shengliang Guan 已提交
5934
      taosTFree(pExprs);
5935
      return TSDB_CODE_QRY_INVALID_MSG;
5936 5937
    }

5938
    if (pExprs[i].base.functionId == TSDB_FUNC_TAG_DUMMY || pExprs[i].base.functionId == TSDB_FUNC_TS_DUMMY) {
5939
      tagLen += pExprs[i].bytes;
5940
    }
5941
    assert(isValidDataType(pExprs[i].type));
5942 5943 5944
  }

  // TODO refactor
5945
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5946 5947
    pExprs[i].base = *pExprMsg[i];
    int16_t functId = pExprs[i].base.functionId;
5948

5949
    if (functId == TSDB_FUNC_TOP || functId == TSDB_FUNC_BOTTOM) {
5950
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
5951 5952 5953 5954 5955 5956 5957 5958 5959
      if (j < 0 || j >= pQueryMsg->numOfCols) {
        assert(0);
      } else {
        SColumnInfo *pCol = &pQueryMsg->colList[j];
        int32_t ret =
            getResultDataInfo(pCol->type, pCol->bytes, functId, (int32_t)pExprs[i].base.arg[0].argValue.i64,
                              &pExprs[i].type, &pExprs[i].bytes, &pExprs[i].interBytes, tagLen, isSuperTable);
        assert(ret == TSDB_CODE_SUCCESS);
      }
5960 5961 5962
    }
  }

5963
  *pExprInfo = pExprs;
5964 5965 5966
  return TSDB_CODE_SUCCESS;
}

5967
static SSqlGroupbyExpr *createGroupbyExprFromMsg(SQueryTableMsg *pQueryMsg, SColIndex *pColIndex, int32_t *code) {
5968 5969 5970 5971 5972
  if (pQueryMsg->numOfGroupCols == 0) {
    return NULL;
  }

  // using group by tag columns
5973
  SSqlGroupbyExpr *pGroupbyExpr = (SSqlGroupbyExpr *)calloc(1, sizeof(SSqlGroupbyExpr));
5974
  if (pGroupbyExpr == NULL) {
5975
    *code = TSDB_CODE_QRY_OUT_OF_MEMORY;
5976 5977 5978 5979 5980 5981 5982
    return NULL;
  }

  pGroupbyExpr->numOfGroupCols = pQueryMsg->numOfGroupCols;
  pGroupbyExpr->orderType = pQueryMsg->orderType;
  pGroupbyExpr->orderIndex = pQueryMsg->orderByIdx;

5983 5984 5985 5986
  pGroupbyExpr->columnInfo = taosArrayInit(pQueryMsg->numOfGroupCols, sizeof(SColIndex));
  for(int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
    taosArrayPush(pGroupbyExpr->columnInfo, &pColIndex[i]);
  }
5987

5988 5989 5990
  return pGroupbyExpr;
}

5991
static int32_t createFilterInfo(void *pQInfo, SQuery *pQuery) {
5992
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
5993
    if (pQuery->colList[i].numOfFilters > 0) {
5994 5995 5996 5997 5998 5999 6000 6001 6002
      pQuery->numOfFilterCols++;
    }
  }

  if (pQuery->numOfFilterCols == 0) {
    return TSDB_CODE_SUCCESS;
  }

  pQuery->pFilterInfo = calloc(1, sizeof(SSingleColumnFilterInfo) * pQuery->numOfFilterCols);
H
Haojun Liao 已提交
6003 6004 6005
  if (pQuery->pFilterInfo == NULL) {
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
  }
6006 6007

  for (int32_t i = 0, j = 0; i < pQuery->numOfCols; ++i) {
6008
    if (pQuery->colList[i].numOfFilters > 0) {
6009 6010
      SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[j];

B
Bomin Zhang 已提交
6011
      memcpy(&pFilterInfo->info, &pQuery->colList[i], sizeof(SColumnInfo));
6012
      pFilterInfo->info = pQuery->colList[i];
6013

6014
      pFilterInfo->numOfFilters = pQuery->colList[i].numOfFilters;
6015
      pFilterInfo->pFilters = calloc(pFilterInfo->numOfFilters, sizeof(SColumnFilterElem));
H
Haojun Liao 已提交
6016 6017 6018
      if (pFilterInfo->pFilters == NULL) {
        return TSDB_CODE_QRY_OUT_OF_MEMORY;
      }
6019 6020 6021

      for (int32_t f = 0; f < pFilterInfo->numOfFilters; ++f) {
        SColumnFilterElem *pSingleColFilter = &pFilterInfo->pFilters[f];
6022
        pSingleColFilter->filterInfo = pQuery->colList[i].filters[f];
6023 6024 6025 6026 6027

        int32_t lower = pSingleColFilter->filterInfo.lowerRelOptr;
        int32_t upper = pSingleColFilter->filterInfo.upperRelOptr;

        if (lower == TSDB_RELATION_INVALID && upper == TSDB_RELATION_INVALID) {
S
slguan 已提交
6028
          qError("QInfo:%p invalid filter info", pQInfo);
6029
          return TSDB_CODE_QRY_INVALID_MSG;
6030 6031
        }

6032 6033
        int16_t type  = pQuery->colList[i].type;
        int16_t bytes = pQuery->colList[i].bytes;
6034

6035 6036 6037
        // todo refactor
        __filter_func_t *rangeFilterArray = getRangeFilterFuncArray(type);
        __filter_func_t *filterArray = getValueFilterFuncArray(type);
6038 6039

        if (rangeFilterArray == NULL && filterArray == NULL) {
S
slguan 已提交
6040
          qError("QInfo:%p failed to get filter function, invalid data type:%d", pQInfo, type);
6041
          return TSDB_CODE_QRY_INVALID_MSG;
6042 6043
        }

6044
        if ((lower == TSDB_RELATION_GREATER_EQUAL || lower == TSDB_RELATION_GREATER) &&
6045
            (upper == TSDB_RELATION_LESS_EQUAL || upper == TSDB_RELATION_LESS)) {
dengyihao's avatar
dengyihao 已提交
6046
          assert(rangeFilterArray != NULL);
6047
          if (lower == TSDB_RELATION_GREATER_EQUAL) {
6048 6049 6050 6051 6052 6053 6054 6055 6056 6057 6058 6059 6060
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[4];
            } else {
              pSingleColFilter->fp = rangeFilterArray[2];
            }
          } else {
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[3];
            } else {
              pSingleColFilter->fp = rangeFilterArray[1];
            }
          }
        } else {  // set callback filter function
dengyihao's avatar
dengyihao 已提交
6061
          assert(filterArray != NULL);
6062 6063 6064 6065
          if (lower != TSDB_RELATION_INVALID) {
            pSingleColFilter->fp = filterArray[lower];

            if (upper != TSDB_RELATION_INVALID) {
dengyihao's avatar
dengyihao 已提交
6066
              qError("pQInfo:%p failed to get filter function, invalid filter condition: %d", pQInfo, type);
6067
              return TSDB_CODE_QRY_INVALID_MSG;
6068 6069 6070 6071 6072 6073 6074 6075 6076 6077 6078 6079 6080 6081 6082 6083
            }
          } else {
            pSingleColFilter->fp = filterArray[upper];
          }
        }
        assert(pSingleColFilter->fp != NULL);
        pSingleColFilter->bytes = bytes;
      }

      j++;
    }
  }

  return TSDB_CODE_SUCCESS;
}

6084
static void doUpdateExprColumnIndex(SQuery *pQuery) {
6085
  assert(pQuery->pSelectExpr != NULL && pQuery != NULL);
6086

6087
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
6088
    SSqlFuncMsg *pSqlExprMsg = &pQuery->pSelectExpr[k].base;
6089
    if (pSqlExprMsg->functionId == TSDB_FUNC_ARITHM) {
6090 6091
      continue;
    }
6092

6093
    // todo opt performance
H
Haojun Liao 已提交
6094
    SColIndex *pColIndex = &pSqlExprMsg->colInfo;
H
Haojun Liao 已提交
6095
    if (TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
6096 6097
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfCols; ++f) {
H
Haojun Liao 已提交
6098 6099
        if (pColIndex->colId == pQuery->colList[f].colId) {
          pColIndex->colIndex = f;
6100 6101 6102
          break;
        }
      }
H
Haojun Liao 已提交
6103 6104

      assert(f < pQuery->numOfCols);
6105 6106
    } else if (pColIndex->colId <= TSDB_UD_COLUMN_INDEX) {
      // do nothing for user-defined constant value result columns
6107
    } else {
6108 6109
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfTags; ++f) {
H
Haojun Liao 已提交
6110 6111
        if (pColIndex->colId == pQuery->tagColList[f].colId) {
          pColIndex->colIndex = f;
6112 6113
          break;
        }
6114
      }
6115

6116
      assert(f < pQuery->numOfTags || pColIndex->colId == TSDB_TBNAME_COLUMN_INDEX);
6117 6118 6119 6120
    }
  }
}

dengyihao's avatar
dengyihao 已提交
6121 6122
static void freeQInfo(SQInfo *pQInfo);

H
Haojun Liao 已提交
6123 6124 6125
static void calResultBufSize(SQuery* pQuery) {
  const int32_t RESULT_MSG_MIN_SIZE  = 1024 * (1024 + 512);  // bytes
  const int32_t RESULT_MSG_MIN_ROWS  = 8192;
S
TD-1057  
Shengliang Guan 已提交
6126
  const float RESULT_THRESHOLD_RATIO = 0.85f;
H
Haojun Liao 已提交
6127

6128 6129 6130 6131 6132
  if (isProjQuery(pQuery)) {
    int32_t numOfRes = RESULT_MSG_MIN_SIZE / pQuery->rowSize;
    if (numOfRes < RESULT_MSG_MIN_ROWS) {
      numOfRes = RESULT_MSG_MIN_ROWS;
    }
H
Haojun Liao 已提交
6133

6134
    pQuery->rec.capacity  = numOfRes;
S
TD-1057  
Shengliang Guan 已提交
6135
    pQuery->rec.threshold = (int32_t)(numOfRes * RESULT_THRESHOLD_RATIO);
6136 6137
  } else {  // in case of non-prj query, a smaller output buffer will be used.
    pQuery->rec.capacity = 4096;
S
TD-1057  
Shengliang Guan 已提交
6138
    pQuery->rec.threshold = (int32_t)(pQuery->rec.capacity * RESULT_THRESHOLD_RATIO);
6139
  }
H
Haojun Liao 已提交
6140 6141
}

6142 6143
static SQInfo *createQInfoImpl(SQueryTableMsg *pQueryMsg, SSqlGroupbyExpr *pGroupbyExpr, SExprInfo *pExprs,
                               STableGroupInfo *pTableGroupInfo, SColumnInfo* pTagCols, bool stableQuery) {
B
Bomin Zhang 已提交
6144 6145 6146
  int16_t numOfCols = pQueryMsg->numOfCols;
  int16_t numOfOutput = pQueryMsg->numOfOutput;

6147 6148
  SQInfo *pQInfo = (SQInfo *)calloc(1, sizeof(SQInfo));
  if (pQInfo == NULL) {
B
Bomin Zhang 已提交
6149
    goto _cleanup_qinfo;
6150
  }
6151

B
Bomin Zhang 已提交
6152 6153 6154
  // to make sure third party won't overwrite this structure
  pQInfo->signature = pQInfo;
  pQInfo->tableGroupInfo = *pTableGroupInfo;
6155 6156

  SQuery *pQuery = calloc(1, sizeof(SQuery));
B
Bomin Zhang 已提交
6157 6158 6159
  if (pQuery == NULL) {
    goto _cleanup_query;
  }
H
Haojun Liao 已提交
6160

6161 6162
  pQInfo->runtimeEnv.pQuery = pQuery;

6163
  pQuery->numOfCols       = numOfCols;
H
hjxilinx 已提交
6164
  pQuery->numOfOutput     = numOfOutput;
6165 6166 6167
  pQuery->limit.limit     = pQueryMsg->limit;
  pQuery->limit.offset    = pQueryMsg->offset;
  pQuery->order.order     = pQueryMsg->order;
6168
  pQuery->order.orderColId = pQueryMsg->orderColId;
6169 6170
  pQuery->pSelectExpr     = pExprs;
  pQuery->pGroupbyExpr    = pGroupbyExpr;
6171
  memcpy(&pQuery->interval, &pQueryMsg->interval, sizeof(pQuery->interval));
6172
  pQuery->fillType        = pQueryMsg->fillType;
6173
  pQuery->numOfTags       = pQueryMsg->numOfTags;
B
Bomin Zhang 已提交
6174
  pQuery->tagColList      = pTagCols;
H
Haojun Liao 已提交
6175

6176
  pQuery->colList = calloc(numOfCols, sizeof(SSingleColumnFilterInfo));
6177
  if (pQuery->colList == NULL) {
6178
    goto _cleanup;
6179
  }
6180

H
hjxilinx 已提交
6181
  for (int16_t i = 0; i < numOfCols; ++i) {
6182
    pQuery->colList[i] = pQueryMsg->colList[i];
6183
    pQuery->colList[i].filters = tscFilterInfoClone(pQueryMsg->colList[i].filters, pQuery->colList[i].numOfFilters);
H
hjxilinx 已提交
6184
  }
6185

6186
  // calculate the result row size
6187 6188 6189
  for (int16_t col = 0; col < numOfOutput; ++col) {
    assert(pExprs[col].bytes > 0);
    pQuery->rowSize += pExprs[col].bytes;
6190
  }
6191

6192
  doUpdateExprColumnIndex(pQuery);
6193

6194
  int32_t ret = createFilterInfo(pQInfo, pQuery);
6195
  if (ret != TSDB_CODE_SUCCESS) {
6196
    goto _cleanup;
6197 6198 6199
  }

  // prepare the result buffer
6200
  pQuery->sdata = (tFilePage **)calloc(pQuery->numOfOutput, POINTER_BYTES);
6201
  if (pQuery->sdata == NULL) {
6202
    goto _cleanup;
6203 6204
  }

H
Haojun Liao 已提交
6205
  calResultBufSize(pQuery);
6206

6207
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
6208
    assert(pExprs[col].interBytes >= pExprs[col].bytes);
6209 6210

    // allocate additional memory for interResults that are usually larger then final results
6211
    size_t size = (size_t)((pQuery->rec.capacity + 1) * pExprs[col].bytes + pExprs[col].interBytes + sizeof(tFilePage));
6212
    pQuery->sdata[col] = (tFilePage *)calloc(1, size);
6213
    if (pQuery->sdata[col] == NULL) {
6214
      goto _cleanup;
6215 6216 6217
    }
  }

6218
  if (pQuery->fillType != TSDB_FILL_NONE) {
6219 6220
    pQuery->fillVal = malloc(sizeof(int64_t) * pQuery->numOfOutput);
    if (pQuery->fillVal == NULL) {
6221
      goto _cleanup;
6222 6223 6224
    }

    // the first column is the timestamp
6225
    memcpy(pQuery->fillVal, (char *)pQueryMsg->fillVal, pQuery->numOfOutput * sizeof(int64_t));
6226 6227
  }

dengyihao's avatar
dengyihao 已提交
6228 6229 6230 6231 6232 6233
  size_t numOfGroups = 0;
  if (pTableGroupInfo->pGroupList != NULL) {
    numOfGroups = taosArrayGetSize(pTableGroupInfo->pGroupList);

    pQInfo->tableqinfoGroupInfo.pGroupList = taosArrayInit(numOfGroups, POINTER_BYTES);
    pQInfo->tableqinfoGroupInfo.numOfTables = pTableGroupInfo->numOfTables;
H
Haojun Liao 已提交
6234
    pQInfo->tableqinfoGroupInfo.map = taosHashInit(pTableGroupInfo->numOfTables,
H
Haojun Liao 已提交
6235
                                                   taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, false);
H
Haojun Liao 已提交
6236
  }
6237

weixin_48148422's avatar
weixin_48148422 已提交
6238
  int tableIndex = 0;
6239

H
Haojun Liao 已提交
6240
  pQInfo->runtimeEnv.interBufSize = getOutputInterResultBufSize(pQuery);
H
Haojun Liao 已提交
6241
  pQInfo->pBuf = calloc(pTableGroupInfo->numOfTables, sizeof(STableQueryInfo));
H
Haojun Liao 已提交
6242 6243 6244 6245
  if (pQInfo->pBuf == NULL) {
    goto _cleanup;
  }

H
Haojun Liao 已提交
6246
  // NOTE: pTableCheckInfo need to update the query time range and the lastKey info
6247 6248
  pQInfo->arrTableIdInfo = taosArrayInit(tableIndex, sizeof(STableIdInfo));
  pQInfo->dataReady = QUERY_RESULT_NOT_READY;
H
Haojun Liao 已提交
6249
  pQInfo->rspContext = NULL;
6250
  pthread_mutex_init(&pQInfo->lock, NULL);
H
Haojun Liao 已提交
6251
  tsem_init(&pQInfo->ready, 0, 0);
6252 6253 6254 6255 6256 6257

  pQuery->pos = -1;
  pQuery->window = pQueryMsg->window;
  changeExecuteScanOrder(pQInfo, pQueryMsg, stableQuery);

  STimeWindow window = pQuery->window;
H
Haojun Liao 已提交
6258

H
Haojun Liao 已提交
6259 6260
  int32_t index = 0;

H
hjxilinx 已提交
6261
  for(int32_t i = 0; i < numOfGroups; ++i) {
6262
    SArray* pa = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, i);
6263

H
Haojun Liao 已提交
6264
    size_t s = taosArrayGetSize(pa);
6265
    SArray* p1 = taosArrayInit(s, POINTER_BYTES);
B
Bomin Zhang 已提交
6266 6267 6268
    if (p1 == NULL) {
      goto _cleanup;
    }
H
Haojun Liao 已提交
6269

Y
yihaoDeng 已提交
6270
    taosArrayPush(pQInfo->tableqinfoGroupInfo.pGroupList, &p1);
6271

H
hjxilinx 已提交
6272
    for(int32_t j = 0; j < s; ++j) {
H
Haojun Liao 已提交
6273
      STableKeyInfo* info = taosArrayGet(pa, j);
6274

S
TD-1057  
Shengliang Guan 已提交
6275
      void* buf = (char*)pQInfo->pBuf + index * sizeof(STableQueryInfo);
H
Haojun Liao 已提交
6276

6277
      window.skey = info->lastKey;
H
Haojun Liao 已提交
6278
      STableQueryInfo* item = createTableQueryInfo(&pQInfo->runtimeEnv, info->pTable, window, buf);
B
Bomin Zhang 已提交
6279 6280 6281
      if (item == NULL) {
        goto _cleanup;
      }
H
Haojun Liao 已提交
6282

6283
      item->groupIndex = i;
H
hjxilinx 已提交
6284
      taosArrayPush(p1, &item);
6285 6286

      STableId* id = TSDB_TABLEID(info->pTable);
H
Haojun Liao 已提交
6287 6288
      taosHashPut(pQInfo->tableqinfoGroupInfo.map, &id->tid, sizeof(id->tid), &item, POINTER_BYTES);
      index += 1;
H
hjxilinx 已提交
6289 6290
    }
  }
6291

6292
  colIdCheck(pQuery);
6293

6294
  qDebug("qmsg:%p QInfo:%p created", pQueryMsg, pQInfo);
6295 6296
  return pQInfo;

B
Bomin Zhang 已提交
6297
_cleanup_qinfo:
H
Haojun Liao 已提交
6298
  tsdbDestroyTableGroup(pTableGroupInfo);
B
Bomin Zhang 已提交
6299 6300

_cleanup_query:
6301 6302 6303 6304
  if (pGroupbyExpr != NULL) {
    taosArrayDestroy(pGroupbyExpr->columnInfo);
    free(pGroupbyExpr);
  }
H
Haojun Liao 已提交
6305

S
Shengliang Guan 已提交
6306
  taosTFree(pTagCols);
B
Bomin Zhang 已提交
6307 6308 6309 6310 6311 6312
  for (int32_t i = 0; i < numOfOutput; ++i) {
    SExprInfo* pExprInfo = &pExprs[i];
    if (pExprInfo->pExpr != NULL) {
      tExprTreeDestroy(&pExprInfo->pExpr, NULL);
    }
  }
H
Haojun Liao 已提交
6313

S
Shengliang Guan 已提交
6314
  taosTFree(pExprs);
B
Bomin Zhang 已提交
6315

6316
_cleanup:
dengyihao's avatar
dengyihao 已提交
6317
  freeQInfo(pQInfo);
6318 6319 6320
  return NULL;
}

H
hjxilinx 已提交
6321
static bool isValidQInfo(void *param) {
H
hjxilinx 已提交
6322 6323 6324 6325
  SQInfo *pQInfo = (SQInfo *)param;
  if (pQInfo == NULL) {
    return false;
  }
6326

H
hjxilinx 已提交
6327 6328 6329 6330
  /*
   * pQInfo->signature may be changed by another thread, so we assign value of signature
   * into local variable, then compare by using local variable
   */
6331
  uint64_t sig = (uint64_t)pQInfo->signature;
H
hjxilinx 已提交
6332 6333 6334
  return (sig == (uint64_t)pQInfo);
}

6335
static int32_t initQInfo(SQueryTableMsg *pQueryMsg, void *tsdb, int32_t vgId, SQInfo *pQInfo, bool isSTable) {
H
hjxilinx 已提交
6336
  int32_t code = TSDB_CODE_SUCCESS;
6337
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
6338

H
hjxilinx 已提交
6339 6340
  STSBuf *pTSBuf = NULL;
  if (pQueryMsg->tsLen > 0) {  // open new file to save the result
H
Haojun Liao 已提交
6341
    char *tsBlock = (char *) pQueryMsg + pQueryMsg->tsOffset;
H
Haojun Liao 已提交
6342
    pTSBuf = tsBufCreateFromCompBlocks(tsBlock, pQueryMsg->tsNumOfBlocks, pQueryMsg->tsLen, pQueryMsg->tsOrder, vgId);
6343

H
hjxilinx 已提交
6344
    tsBufResetPos(pTSBuf);
dengyihao's avatar
dengyihao 已提交
6345 6346
    bool ret = tsBufNextPos(pTSBuf);
    UNUSED(ret);
H
hjxilinx 已提交
6347
  }
Y
TD-1665  
yihaoDeng 已提交
6348 6349
  
  pQuery->precision = tsdbGetCfg(tsdb)->precision;
6350

6351 6352
  if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey > pQuery->window.ekey)) ||
      (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey > pQuery->window.skey))) {
6353
    qDebug("QInfo:%p no result in time range %" PRId64 "-%" PRId64 ", order %d", pQInfo, pQuery->window.skey,
6354
           pQuery->window.ekey, pQuery->order.order);
6355
    setQueryStatus(pQuery, QUERY_COMPLETED);
B
Bomin Zhang 已提交
6356
    pQInfo->tableqinfoGroupInfo.numOfTables = 0;
6357 6358
    return TSDB_CODE_SUCCESS;
  }
6359

6360
  if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
6361
    qDebug("QInfo:%p no table qualified for tag filter, abort query", pQInfo);
6362 6363 6364
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return TSDB_CODE_SUCCESS;
  }
H
hjxilinx 已提交
6365 6366

  // filter the qualified
6367
  if ((code = doInitQInfo(pQInfo, pTSBuf, tsdb, vgId, isSTable)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6368 6369
    goto _error;
  }
6370

H
hjxilinx 已提交
6371 6372 6373 6374
  return code;

_error:
  // table query ref will be decrease during error handling
6375
  freeQInfo(pQInfo);
H
hjxilinx 已提交
6376 6377 6378
  return code;
}

B
Bomin Zhang 已提交
6379
static void freeColumnFilterInfo(SColumnFilterInfo* pFilter, int32_t numOfFilters) {
H
Haojun Liao 已提交
6380
    if (pFilter == NULL || numOfFilters == 0) {
B
Bomin Zhang 已提交
6381 6382
      return;
    }
H
Haojun Liao 已提交
6383

B
Bomin Zhang 已提交
6384 6385 6386 6387 6388
    for (int32_t i = 0; i < numOfFilters; i++) {
      if (pFilter[i].filterstr) {
        free((void*)(pFilter[i].pz));
      }
    }
H
Haojun Liao 已提交
6389

B
Bomin Zhang 已提交
6390 6391 6392
    free(pFilter);
}

H
Haojun Liao 已提交
6393 6394
static void doDestroyTableQueryInfo(STableGroupInfo* pTableqinfoGroupInfo) {
  if (pTableqinfoGroupInfo->pGroupList != NULL) {
H
Haojun Liao 已提交
6395
    int32_t numOfGroups = (int32_t) taosArrayGetSize(pTableqinfoGroupInfo->pGroupList);
H
Haojun Liao 已提交
6396 6397 6398 6399 6400 6401 6402 6403 6404 6405 6406 6407 6408 6409 6410 6411 6412 6413 6414 6415 6416
    for (int32_t i = 0; i < numOfGroups; ++i) {
      SArray *p = taosArrayGetP(pTableqinfoGroupInfo->pGroupList, i);

      size_t num = taosArrayGetSize(p);
      for(int32_t j = 0; j < num; ++j) {
        STableQueryInfo* item = taosArrayGetP(p, j);
        destroyTableQueryInfoImpl(item);
      }

      taosArrayDestroy(p);
    }
  }

  taosArrayDestroy(pTableqinfoGroupInfo->pGroupList);
  taosHashCleanup(pTableqinfoGroupInfo->map);

  pTableqinfoGroupInfo->pGroupList = NULL;
  pTableqinfoGroupInfo->map = NULL;
  pTableqinfoGroupInfo->numOfTables = 0;
}

H
hjxilinx 已提交
6417 6418 6419 6420
static void freeQInfo(SQInfo *pQInfo) {
  if (!isValidQInfo(pQInfo)) {
    return;
  }
6421

6422
  qDebug("QInfo:%p start to free QInfo", pQInfo);
6423

6424
  teardownQueryRuntimeEnv(&pQInfo->runtimeEnv);
6425

H
Haojun Liao 已提交
6426 6427 6428 6429 6430 6431 6432
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  if (pQuery != NULL) {
    if (pQuery->sdata != NULL) {
      for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
        taosTFree(pQuery->sdata[col]);
      }
      taosTFree(pQuery->sdata);
H
hjxilinx 已提交
6433
    }
6434

H
Haojun Liao 已提交
6435 6436 6437
    if (pQuery->fillVal != NULL) {
      taosTFree(pQuery->fillVal);
    }
6438

H
Haojun Liao 已提交
6439 6440 6441 6442
    for (int32_t i = 0; i < pQuery->numOfFilterCols; ++i) {
      SSingleColumnFilterInfo *pColFilter = &pQuery->pFilterInfo[i];
      if (pColFilter->numOfFilters > 0) {
        taosTFree(pColFilter->pFilters);
H
hjxilinx 已提交
6443
      }
H
hjxilinx 已提交
6444
    }
6445

H
Haojun Liao 已提交
6446 6447 6448 6449 6450 6451 6452 6453 6454 6455 6456 6457 6458 6459 6460 6461
    if (pQuery->pSelectExpr != NULL) {
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        SExprInfo *pExprInfo = &pQuery->pSelectExpr[i];

        if (pExprInfo->pExpr != NULL) {
          tExprTreeDestroy(&pExprInfo->pExpr, NULL);
        }
      }

      taosTFree(pQuery->pSelectExpr);
    }

    if (pQuery->pGroupbyExpr != NULL) {
      taosArrayDestroy(pQuery->pGroupbyExpr->columnInfo);
      taosTFree(pQuery->pGroupbyExpr);
    }
6462

H
Haojun Liao 已提交
6463 6464 6465 6466 6467 6468 6469 6470 6471 6472 6473 6474
    taosTFree(pQuery->tagColList);
    taosTFree(pQuery->pFilterInfo);

    if (pQuery->colList != NULL) {
      for (int32_t i = 0; i < pQuery->numOfCols; i++) {
        SColumnInfo *column = pQuery->colList + i;
        freeColumnFilterInfo(column->filters, column->numOfFilters);
      }
      taosTFree(pQuery->colList);
    }

    taosTFree(pQuery);
H
hjxilinx 已提交
6475
  }
6476

H
Haojun Liao 已提交
6477
  doDestroyTableQueryInfo(&pQInfo->tableqinfoGroupInfo);
H
Haojun Liao 已提交
6478

H
Haojun Liao 已提交
6479
  taosTFree(pQInfo->pBuf);
H
Haojun Liao 已提交
6480
  tsdbDestroyTableGroup(&pQInfo->tableGroupInfo);
weixin_48148422's avatar
weixin_48148422 已提交
6481
  taosArrayDestroy(pQInfo->arrTableIdInfo);
6482

6483
  pQInfo->signature = 0;
6484

6485
  qDebug("QInfo:%p QInfo is freed", pQInfo);
6486

S
Shengliang Guan 已提交
6487
  taosTFree(pQInfo);
H
hjxilinx 已提交
6488 6489
}

H
hjxilinx 已提交
6490
static size_t getResultSize(SQInfo *pQInfo, int64_t *numOfRows) {
6491 6492
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
hjxilinx 已提交
6493 6494 6495 6496 6497 6498 6499 6500 6501 6502 6503
  /*
   * get the file size and set the numOfRows to be the file size, since for tsComp query,
   * the returned row size is equalled to 1
   * TODO handle the case that the file is too large to send back one time
   */
  if (isTSCompQuery(pQuery) && (*numOfRows) > 0) {
    struct stat fstat;
    if (stat(pQuery->sdata[0]->data, &fstat) == 0) {
      *numOfRows = fstat.st_size;
      return fstat.st_size;
    } else {
S
slguan 已提交
6504
      qError("QInfo:%p failed to get file info, path:%s, reason:%s", pQInfo, pQuery->sdata[0]->data, strerror(errno));
H
hjxilinx 已提交
6505 6506 6507
      return 0;
    }
  } else {
6508
    return (size_t)(pQuery->rowSize * (*numOfRows));
6509
  }
H
hjxilinx 已提交
6510
}
6511

H
hjxilinx 已提交
6512 6513 6514
static int32_t doDumpQueryResult(SQInfo *pQInfo, char *data) {
  // the remained number of retrieved rows, not the interpolated result
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
6515

H
hjxilinx 已提交
6516 6517 6518
  // load data from file to msg buffer
  if (isTSCompQuery(pQuery)) {
    int32_t fd = open(pQuery->sdata[0]->data, O_RDONLY, 0666);
6519

H
hjxilinx 已提交
6520 6521
    // make sure file exist
    if (FD_VALID(fd)) {
6522 6523 6524
      uint64_t s = lseek(fd, 0, SEEK_END);

      qDebug("QInfo:%p ts comp data return, file:%s, size:%"PRId64, pQInfo, pQuery->sdata[0]->data, s);
H
Haojun Liao 已提交
6525
      if (lseek(fd, 0, SEEK_SET) >= 0) {
6526
        size_t sz = read(fd, data, (uint32_t)s);
6527 6528 6529
        if(sz < s) {  // todo handle error
          assert(0);
        }
H
Haojun Liao 已提交
6530
      } else {
6531
        UNUSED(s);
dengyihao's avatar
dengyihao 已提交
6532
      }
H
Haojun Liao 已提交
6533

H
hjxilinx 已提交
6534 6535 6536
      close(fd);
      unlink(pQuery->sdata[0]->data);
    } else {
dengyihao's avatar
dengyihao 已提交
6537
      // todo return the error code to client and handle invalid fd
S
slguan 已提交
6538
      qError("QInfo:%p failed to open tmp file to send ts-comp data to client, path:%s, reason:%s", pQInfo,
H
hjxilinx 已提交
6539
             pQuery->sdata[0]->data, strerror(errno));
dengyihao's avatar
dengyihao 已提交
6540
      if (fd != -1) {
6541
        close(fd);
dengyihao's avatar
dengyihao 已提交
6542
      }
H
hjxilinx 已提交
6543
    }
6544

H
hjxilinx 已提交
6545 6546 6547 6548
    // all data returned, set query over
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
      setQueryStatus(pQuery, QUERY_OVER);
    }
H
hjxilinx 已提交
6549
  } else {
S
TD-1057  
Shengliang Guan 已提交
6550
    doCopyQueryResultToMsg(pQInfo, (int32_t)pQuery->rec.rows, data);
6551
  }
6552

6553
  pQuery->rec.total += pQuery->rec.rows;
6554
  qDebug("QInfo:%p current numOfRes rows:%" PRId64 ", total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total);
6555

6556
  if (pQuery->limit.limit > 0 && pQuery->limit.limit == pQuery->rec.total) {
6557
    qDebug("QInfo:%p results limitation reached, limitation:%"PRId64, pQInfo, pQuery->limit.limit);
6558 6559
    setQueryStatus(pQuery, QUERY_OVER);
  }
6560

H
hjxilinx 已提交
6561
  return TSDB_CODE_SUCCESS;
6562 6563
}

6564 6565 6566 6567 6568 6569 6570
typedef struct SQueryMgmt {
  SCacheObj      *qinfoPool;      // query handle pool
  int32_t         vgId;
  bool            closed;
  pthread_mutex_t lock;
} SQueryMgmt;

6571
int32_t qCreateQueryInfo(void* tsdb, int32_t vgId, SQueryTableMsg* pQueryMsg, qinfo_t* pQInfo) {
6572
  assert(pQueryMsg != NULL && tsdb != NULL);
6573 6574

  int32_t code = TSDB_CODE_SUCCESS;
6575

6576 6577 6578 6579 6580 6581 6582 6583
  char            *tagCond  = NULL;
  char            *tbnameCond = NULL;
  SArray          *pTableIdList = NULL;
  SSqlFuncMsg    **pExprMsg = NULL;
  SExprInfo       *pExprs   = NULL;
  SColIndex       *pGroupColIndex = NULL;
  SColumnInfo     *pTagColumnInfo = NULL;
  SSqlGroupbyExpr *pGroupbyExpr   = NULL;
6584

6585 6586
  code = convertQueryMsg(pQueryMsg, &pTableIdList, &pExprMsg, &tagCond, &tbnameCond, &pGroupColIndex, &pTagColumnInfo);
  if (code != TSDB_CODE_SUCCESS) {
B
Bomin Zhang 已提交
6587
    goto _over;
6588 6589
  }

H
hjxilinx 已提交
6590
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
6591
    qError("Invalid number of tables to query, numOfTables:%d", pQueryMsg->numOfTables);
6592
    code = TSDB_CODE_QRY_INVALID_MSG;
H
hjxilinx 已提交
6593
    goto _over;
6594 6595
  }

H
hjxilinx 已提交
6596
  if (pTableIdList == NULL || taosArrayGetSize(pTableIdList) == 0) {
S
slguan 已提交
6597
    qError("qmsg:%p, SQueryTableMsg wrong format", pQueryMsg);
6598
    code = TSDB_CODE_QRY_INVALID_MSG;
H
hjxilinx 已提交
6599
    goto _over;
6600 6601
  }

H
Haojun Liao 已提交
6602
  if ((code = createQFunctionExprFromMsg(pQueryMsg, &pExprs, pExprMsg, pTagColumnInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6603
    goto _over;
6604 6605
  }

dengyihao's avatar
dengyihao 已提交
6606
  pGroupbyExpr = createGroupbyExprFromMsg(pQueryMsg, pGroupColIndex, &code);
H
hjxilinx 已提交
6607
  if ((pGroupbyExpr == NULL && pQueryMsg->numOfGroupCols != 0) || code != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6608
    goto _over;
6609
  }
6610

H
hjxilinx 已提交
6611
  bool isSTableQuery = false;
6612
  STableGroupInfo tableGroupInfo = {0};
6613 6614
  int64_t st = taosGetTimestampUs();

H
Haojun Liao 已提交
6615
  if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_TABLE_QUERY)) {
weixin_48148422's avatar
weixin_48148422 已提交
6616
    STableIdInfo *id = taosArrayGet(pTableIdList, 0);
H
Haojun Liao 已提交
6617

6618
    qDebug("qmsg:%p query normal table, uid:%"PRId64", tid:%d", pQueryMsg, id->uid, id->tid);
H
Haojun Liao 已提交
6619
    if ((code = tsdbGetOneTableGroup(tsdb, id->uid, pQueryMsg->window.skey, &tableGroupInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6620
      goto _over;
6621
    }
H
Haojun Liao 已提交
6622
  } else if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY|TSDB_QUERY_TYPE_STABLE_QUERY)) {
6623
    isSTableQuery = true;
H
Haojun Liao 已提交
6624 6625 6626

    // also note there's possibility that only one table in the super table
    if (!TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY)) {
weixin_48148422's avatar
weixin_48148422 已提交
6627 6628 6629 6630 6631 6632 6633
      STableIdInfo *id = taosArrayGet(pTableIdList, 0);

      // group by normal column, do not pass the group by condition to tsdb to group table into different group
      int32_t numOfGroupByCols = pQueryMsg->numOfGroupCols;
      if (pQueryMsg->numOfGroupCols == 1 && !TSDB_COL_IS_TAG(pGroupColIndex->flag)) {
        numOfGroupByCols = 0;
      }
6634 6635

      qDebug("qmsg:%p query stable, uid:%"PRId64", tid:%d", pQueryMsg, id->uid, id->tid);
H
Haojun Liao 已提交
6636 6637 6638
      code = tsdbQuerySTableByTagCond(tsdb, id->uid, pQueryMsg->window.skey, tagCond, pQueryMsg->tagCondLen,
          pQueryMsg->tagNameRelType, tbnameCond, &tableGroupInfo, pGroupColIndex, numOfGroupByCols);

6639
      if (code != TSDB_CODE_SUCCESS) {
6640
        qError("qmsg:%p failed to query stable, reason: %s", pQueryMsg, tstrerror(code));
6641 6642
        goto _over;
      }
weixin_48148422's avatar
weixin_48148422 已提交
6643
    } else {
6644 6645 6646 6647
      code = tsdbGetTableGroupFromIdList(tsdb, pTableIdList, &tableGroupInfo);
      if (code != TSDB_CODE_SUCCESS) {
        goto _over;
      }
H
Haojun Liao 已提交
6648

S
TD-1057  
Shengliang Guan 已提交
6649
      qDebug("qmsg:%p query on %" PRIzu " tables in one group from client", pQueryMsg, tableGroupInfo.numOfTables);
6650
    }
6651 6652

    int64_t el = taosGetTimestampUs() - st;
S
TD-1057  
Shengliang Guan 已提交
6653
    qDebug("qmsg:%p tag filter completed, numOfTables:%" PRIzu ", elapsed time:%"PRId64"us", pQueryMsg, tableGroupInfo.numOfTables, el);
H
hjxilinx 已提交
6654
  } else {
6655
    assert(0);
6656
  }
6657

6658
  (*pQInfo) = createQInfoImpl(pQueryMsg, pGroupbyExpr, pExprs, &tableGroupInfo, pTagColumnInfo, isSTableQuery);
dengyihao's avatar
dengyihao 已提交
6659 6660 6661
  pExprs = NULL;
  pGroupbyExpr = NULL;
  pTagColumnInfo = NULL;
6662

6663
  if ((*pQInfo) == NULL) {
6664
    code = TSDB_CODE_QRY_OUT_OF_MEMORY;
H
hjxilinx 已提交
6665
    goto _over;
6666
  }
6667

6668
  code = initQInfo(pQueryMsg, tsdb, vgId, *pQInfo, isSTableQuery);
6669

H
hjxilinx 已提交
6670
_over:
dengyihao's avatar
dengyihao 已提交
6671 6672 6673
  free(tagCond);
  free(tbnameCond);
  free(pGroupColIndex);
dengyihao's avatar
dengyihao 已提交
6674 6675
  if (pGroupbyExpr != NULL) {
    taosArrayDestroy(pGroupbyExpr->columnInfo);
dengyihao's avatar
dengyihao 已提交
6676
    free(pGroupbyExpr);
6677
  }
dengyihao's avatar
dengyihao 已提交
6678 6679
  free(pTagColumnInfo);
  free(pExprs);
dengyihao's avatar
dengyihao 已提交
6680
  free(pExprMsg);
H
hjxilinx 已提交
6681
  taosArrayDestroy(pTableIdList);
6682

B
Bomin Zhang 已提交
6683 6684 6685 6686 6687
  for (int32_t i = 0; i < pQueryMsg->numOfCols; i++) {
    SColumnInfo* column = pQueryMsg->colList + i;
    freeColumnFilterInfo(column->filters, column->numOfFilters);
  }

H
Haojun Liao 已提交
6688
  //pQInfo already freed in initQInfo, but *pQInfo may not pointer to null;
6689 6690 6691 6692
  if (code != TSDB_CODE_SUCCESS) {
    *pQInfo = NULL;
  }

6693
  // if failed to add ref for all tables in this query, abort current query
6694
  return code;
H
hjxilinx 已提交
6695 6696
}

H
Haojun Liao 已提交
6697
void qDestroyQueryInfo(qinfo_t qHandle) {
H
Haojun Liao 已提交
6698 6699 6700 6701 6702
  SQInfo* pQInfo = (SQInfo*) qHandle;
  if (!isValidQInfo(pQInfo)) {
    return;
  }

H
Haojun Liao 已提交
6703 6704 6705
  qDebug("QInfo:%p query completed", pQInfo);
  queryCostStatis(pQInfo);   // print the query cost summary
  freeQInfo(pQInfo);
H
Haojun Liao 已提交
6706 6707
}

6708 6709 6710 6711 6712 6713 6714 6715
static bool doBuildResCheck(SQInfo* pQInfo) {
  bool buildRes = false;

  pthread_mutex_lock(&pQInfo->lock);

  pQInfo->dataReady = QUERY_RESULT_READY;
  buildRes = (pQInfo->rspContext != NULL);

H
Haojun Liao 已提交
6716 6717
  // clear qhandle owner, it must be in the secure area. other thread may run ahead before current, after it is
  // put into task to be executed.
6718 6719
  assert(pQInfo->owner == taosGetPthreadId());
  pQInfo->owner = 0;
H
Haojun Liao 已提交
6720

H
Haojun Liao 已提交
6721
  pthread_mutex_unlock(&pQInfo->lock);
H
Haojun Liao 已提交
6722 6723

  tsem_post(&pQInfo->ready);
6724 6725 6726
  return buildRes;
}

6727
bool qTableQuery(qinfo_t qinfo) {
6728
  SQInfo *pQInfo = (SQInfo *)qinfo;
H
Haojun Liao 已提交
6729
  assert(pQInfo && pQInfo->signature == pQInfo);
6730
  int64_t threadId = taosGetPthreadId();
6731

6732 6733 6734 6735
  int64_t curOwner = 0;
  if ((curOwner = atomic_val_compare_exchange_64(&pQInfo->owner, 0, threadId)) != 0) {
    qError("QInfo:%p qhandle is now executed by thread:%p", pQInfo, (void*) curOwner);
    pQInfo->code = TSDB_CODE_QRY_IN_EXEC;
6736
    return false;
H
hjxilinx 已提交
6737
  }
6738

H
Haojun Liao 已提交
6739
  if (IS_QUERY_KILLED(pQInfo)) {
6740
    qDebug("QInfo:%p it is already killed, abort", pQInfo);
6741
    return doBuildResCheck(pQInfo);
H
hjxilinx 已提交
6742
  }
6743

6744 6745
  if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
    qDebug("QInfo:%p no table exists for query, abort", pQInfo);
6746 6747
    setQueryStatus(pQInfo->runtimeEnv.pQuery, QUERY_COMPLETED);
    return doBuildResCheck(pQInfo);
6748 6749 6750
  }

  // error occurs, record the error code and return to client
H
Haojun Liao 已提交
6751
  int32_t ret = setjmp(pQInfo->runtimeEnv.env);
6752 6753
  if (ret != TSDB_CODE_SUCCESS) {
    pQInfo->code = ret;
H
Haojun Liao 已提交
6754
    qDebug("QInfo:%p query abort due to error/cancel occurs, code:%s", pQInfo, tstrerror(pQInfo->code));
6755
    return doBuildResCheck(pQInfo);
6756 6757
  }

6758
  qDebug("QInfo:%p query task is launched", pQInfo);
6759

6760
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
6761
  if (onlyQueryTags(pQInfo->runtimeEnv.pQuery)) {
H
Haojun Liao 已提交
6762
    assert(pQInfo->runtimeEnv.pQueryHandle == NULL);
6763
    buildTagQueryResult(pQInfo);
H
hjxilinx 已提交
6764
  } else if (pQInfo->runtimeEnv.stableQuery) {
6765
    stableQueryImpl(pQInfo);
H
hjxilinx 已提交
6766
  } else {
6767
    tableQueryImpl(pQInfo);
H
hjxilinx 已提交
6768
  }
6769

6770
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
6771
  if (IS_QUERY_KILLED(pQInfo)) {
6772 6773
    qDebug("QInfo:%p query is killed", pQInfo);
  } else if (pQuery->rec.rows == 0) {
S
TD-1057  
Shengliang Guan 已提交
6774
    qDebug("QInfo:%p over, %" PRIzu " tables queried, %"PRId64" rows are returned", pQInfo, pQInfo->tableqinfoGroupInfo.numOfTables, pQuery->rec.total);
6775 6776 6777 6778 6779
  } else {
    qDebug("QInfo:%p query paused, %" PRId64 " rows returned, numOfTotal:%" PRId64 " rows",
           pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
  }

6780
  return doBuildResCheck(pQInfo);
H
hjxilinx 已提交
6781 6782
}

6783
int32_t qRetrieveQueryResultInfo(qinfo_t qinfo, bool* buildRes, void* pRspContext) {
6784 6785
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
6786
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
H
Haojun Liao 已提交
6787
    qError("QInfo:%p invalid qhandle", pQInfo);
6788
    return TSDB_CODE_QRY_INVALID_QHANDLE;
H
hjxilinx 已提交
6789
  }
6790

6791
  *buildRes = false;
H
Haojun Liao 已提交
6792
  if (IS_QUERY_KILLED(pQInfo)) {
6793
    qDebug("QInfo:%p query is killed, code:%d", pQInfo, pQInfo->code);
H
hjxilinx 已提交
6794
    return pQInfo->code;
H
hjxilinx 已提交
6795
  }
6796

6797
  int32_t code = TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
6798 6799 6800 6801

#if 0
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

6802
  pthread_mutex_lock(&pQInfo->lock);
H
Haojun Liao 已提交
6803 6804
  assert(pQInfo->rspContext == NULL);

6805 6806 6807 6808 6809
  if (pQInfo->dataReady == QUERY_RESULT_READY) {
    *buildRes = true;
    qDebug("QInfo:%p retrieve result info, rowsize:%d, rows:%"PRId64", code:%d", pQInfo, pQuery->rowSize, pQuery->rec.rows,
           pQInfo->code);
  } else {
H
Haojun Liao 已提交
6810
    *buildRes = false;
6811
    qDebug("QInfo:%p retrieve req set query return result after paused", pQInfo);
6812
    pQInfo->rspContext = pRspContext;
H
Haojun Liao 已提交
6813
    assert(pQInfo->rspContext != NULL);
6814
  }
6815

6816
  code = pQInfo->code;
6817
  pthread_mutex_unlock(&pQInfo->lock);
H
Haojun Liao 已提交
6818 6819 6820 6821 6822 6823
#else
  tsem_wait(&pQInfo->ready);
  *buildRes = true;
  code = pQInfo->code;
#endif

6824
  return code;
H
hjxilinx 已提交
6825
}
6826

6827
int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *contLen, bool* continueExec) {
6828 6829
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
6830
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
6831
    return TSDB_CODE_QRY_INVALID_QHANDLE;
6832
  }
6833

6834
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
6835 6836
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  size_t  size = getResultSize(pQInfo, &pQuery->rec.rows);
6837

weixin_48148422's avatar
weixin_48148422 已提交
6838 6839
  size += sizeof(int32_t);
  size += sizeof(STableIdInfo) * taosArrayGetSize(pQInfo->arrTableIdInfo);
6840

S
TD-1057  
Shengliang Guan 已提交
6841
  *contLen = (int32_t)(size + sizeof(SRetrieveTableRsp));
6842

B
Bomin Zhang 已提交
6843 6844
  // todo proper handle failed to allocate memory,
  // current solution only avoid crash, but cannot return error code to client
6845
  *pRsp = (SRetrieveTableRsp *)rpcMallocCont(*contLen);
B
Bomin Zhang 已提交
6846 6847 6848
  if (*pRsp == NULL) {
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
  }
6849

S
TD-1057  
Shengliang Guan 已提交
6850
  (*pRsp)->numOfRows = htonl((int32_t)pQuery->rec.rows);
6851

H
Haojun Liao 已提交
6852
  if (pQInfo->code == TSDB_CODE_SUCCESS) {
6853
    (*pRsp)->offset   = htobe64(pQuery->limit.offset);
6854
    (*pRsp)->useconds = htobe64(pRuntimeEnv->summary.elapsedTime);
6855
  } else {
6856 6857
    (*pRsp)->offset   = 0;
    (*pRsp)->useconds = htobe64(pRuntimeEnv->summary.elapsedTime);
6858
  }
6859

6860
  (*pRsp)->precision = htons(pQuery->precision);
H
Haojun Liao 已提交
6861 6862
  if (pQuery->rec.rows > 0 && pQInfo->code == TSDB_CODE_SUCCESS) {
    doDumpQueryResult(pQInfo, (*pRsp)->data);
6863
  } else {
H
hjxilinx 已提交
6864
    setQueryStatus(pQuery, QUERY_OVER);
6865
  }
6866

6867
  pQInfo->rspContext = NULL;
6868
  pQInfo->dataReady  = QUERY_RESULT_NOT_READY;
6869

H
Haojun Liao 已提交
6870
  if (IS_QUERY_KILLED(pQInfo) || Q_STATUS_EQUAL(pQuery->status, QUERY_OVER)) {
6871
    *continueExec = false;
6872
    (*pRsp)->completed = 1;  // notify no more result to client
6873
  } else {
6874
    *continueExec = true;
6875
    qDebug("QInfo:%p has more results waits for client retrieve", pQInfo);
6876 6877
  }

H
Haojun Liao 已提交
6878
  return pQInfo->code;
6879
}
H
hjxilinx 已提交
6880

6881 6882 6883 6884 6885 6886 6887 6888 6889 6890 6891
int32_t qQueryCompleted(qinfo_t qinfo) {
  SQInfo *pQInfo = (SQInfo *)qinfo;

  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
    return TSDB_CODE_QRY_INVALID_QHANDLE;
  }

  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
  return IS_QUERY_KILLED(pQInfo) || Q_STATUS_EQUAL(pQuery->status, QUERY_OVER);
}

H
Haojun Liao 已提交
6892
int32_t qKillQuery(qinfo_t qinfo) {
H
Haojun Liao 已提交
6893 6894 6895 6896 6897 6898 6899
  SQInfo *pQInfo = (SQInfo *)qinfo;

  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
    return TSDB_CODE_QRY_INVALID_QHANDLE;
  }

  setQueryKilled(pQInfo);
6900 6901 6902

  // Wait for the query executing thread being stopped/
  // Once the query is stopped, the owner of qHandle will be cleared immediately.
6903
  while (pQInfo->owner != 0) {
6904 6905 6906
    taosMsleep(100);
  }

H
Haojun Liao 已提交
6907 6908 6909
  return TSDB_CODE_SUCCESS;
}

6910 6911 6912 6913 6914 6915 6916 6917 6918 6919 6920 6921 6922 6923 6924 6925
static void doSetTagValueToResultBuf(char* output, const char* val, int16_t type, int16_t bytes) {
  if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
    if (val == NULL) {
      setVardataNull(output, type);
    } else {
      memcpy(output, val, varDataTLen(val));
    }
  } else {
    if (val == NULL) {
      setNull(output, type, bytes);
    } else {  // todo here stop will cause client crash
      memcpy(output, val, bytes);
    }
  }
}

H
hjxilinx 已提交
6926 6927 6928
static void buildTagQueryResult(SQInfo* pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
6929

H
Haojun Liao 已提交
6930
  size_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
H
Haojun Liao 已提交
6931
  assert(numOfGroup == 0 || numOfGroup == 1);
6932

H
Haojun Liao 已提交
6933
  if (numOfGroup == 0) {
6934 6935
    return;
  }
6936

H
Haojun Liao 已提交
6937
  SArray* pa = GET_TABLEGROUP(pQInfo, 0);
6938

H
Haojun Liao 已提交
6939
  size_t num = taosArrayGetSize(pa);
6940
  assert(num == pQInfo->tableqinfoGroupInfo.numOfTables);
6941

H
Haojun Liao 已提交
6942
  int32_t count = 0;
6943 6944 6945
  int32_t functionId = pQuery->pSelectExpr[0].base.functionId;
  if (functionId == TSDB_FUNC_TID_TAG) { // return the tags & table Id
    assert(pQuery->numOfOutput == 1);
6946

6947 6948
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[0];
    int32_t rsize = pExprInfo->bytes;
H
Haojun Liao 已提交
6949
    count = 0;
6950

H
Haojun Liao 已提交
6951 6952 6953 6954 6955 6956 6957 6958 6959 6960 6961
    int16_t bytes = pExprInfo->bytes;
    int16_t type = pExprInfo->type;

    for(int32_t i = 0; i < pQuery->numOfTags; ++i) {
      if (pQuery->tagColList[i].colId == pExprInfo->base.colInfo.colId) {
        bytes = pQuery->tagColList[i].bytes;
        type = pQuery->tagColList[i].type;
        break;
      }
    }

H
Haojun Liao 已提交
6962 6963
    while(pQInfo->tableIndex < num && count < pQuery->rec.capacity) {
      int32_t i = pQInfo->tableIndex++;
6964
      STableQueryInfo *item = taosArrayGetP(pa, i);
6965

6966
      char *output = pQuery->sdata[0]->data + count * rsize;
6967
      varDataSetLen(output, rsize - VARSTR_HEADER_SIZE);
6968

6969
      output = varDataVal(output);
H
Haojun Liao 已提交
6970
      STableId* id = TSDB_TABLEID(item->pTable);
6971

6972 6973 6974
      *(int16_t *)output = 0;
      output += sizeof(int16_t);

H
Haojun Liao 已提交
6975 6976
      *(int64_t *)output = id->uid;  // memory align problem, todo serialize
      output += sizeof(id->uid);
6977

H
Haojun Liao 已提交
6978 6979
      *(int32_t *)output = id->tid;
      output += sizeof(id->tid);
6980

6981
      *(int32_t *)output = pQInfo->vgId;
6982
      output += sizeof(pQInfo->vgId);
6983

6984
      if (pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
6985
        char* data = tsdbGetTableName(item->pTable);
6986
        memcpy(output, data, varDataTLen(data));
H
[td-90]  
Haojun Liao 已提交
6987
      } else {
6988 6989
        char* data = tsdbGetTableTagVal(item->pTable, pExprInfo->base.colInfo.colId, type, bytes);
        doSetTagValueToResultBuf(output, data, type, bytes);
H
[td-90]  
Haojun Liao 已提交
6990
      }
6991

H
Haojun Liao 已提交
6992
      count += 1;
6993
    }
6994

6995
    qDebug("QInfo:%p create (tableId, tag) info completed, rows:%d", pQInfo, count);
6996

H
Haojun Liao 已提交
6997 6998 6999 7000
  } else if (functionId == TSDB_FUNC_COUNT) {// handle the "count(tbname)" query
    *(int64_t*) pQuery->sdata[0]->data = num;

    count = 1;
H
Haojun Liao 已提交
7001
    SET_STABLE_QUERY_OVER(pQInfo);
7002
    qDebug("QInfo:%p create count(tbname) query, res:%d rows:1", pQInfo, count);
7003
  } else {  // return only the tags|table name etc.
H
Haojun Liao 已提交
7004
    count = 0;
H
Haojun Liao 已提交
7005
    SSchema tbnameSchema = tGetTableNameColumnSchema();
7006

S
TD-1057  
Shengliang Guan 已提交
7007
    int32_t maxNumOfTables = (int32_t)pQuery->rec.capacity;
7008
    if (pQuery->limit.limit >= 0 && pQuery->limit.limit < pQuery->rec.capacity) {
S
TD-1057  
Shengliang Guan 已提交
7009
      maxNumOfTables = (int32_t)pQuery->limit.limit;
7010 7011
    }

7012
    while(pQInfo->tableIndex < num && count < maxNumOfTables) {
H
Haojun Liao 已提交
7013
      int32_t i = pQInfo->tableIndex++;
7014

7015 7016 7017 7018 7019 7020
      // discard current result due to offset
      if (pQuery->limit.offset > 0) {
        pQuery->limit.offset -= 1;
        continue;
      }

7021
      SExprInfo* pExprInfo = pQuery->pSelectExpr;
7022
      STableQueryInfo* item = taosArrayGetP(pa, i);
7023

7024 7025
      char *data = NULL, *dst = NULL;
      int16_t type = 0, bytes = 0;
7026
      for(int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
7027
        // not assign value in case of user defined constant output column
Y
TD-1230  
yihaoDeng 已提交
7028
        if (TSDB_COL_IS_UD_COL(pExprInfo[j].base.colInfo.flag)) {
H
Haojun Liao 已提交
7029 7030
          continue;
        }
7031

7032
        if (pExprInfo[j].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
7033 7034 7035 7036 7037 7038 7039 7040
          bytes = tbnameSchema.bytes;
          type = tbnameSchema.type;

          data = tsdbGetTableName(item->pTable);
          dst = pQuery->sdata[j]->data + count * tbnameSchema.bytes;
        } else {
          type = pExprInfo[j].type;
          bytes = pExprInfo[j].bytes;
7041

7042 7043
          data = tsdbGetTableTagVal(item->pTable, pExprInfo[j].base.colInfo.colId, type, bytes);
          dst = pQuery->sdata[j]->data + count * pExprInfo[j].bytes;
7044

7045
        }
7046 7047

        doSetTagValueToResultBuf(dst, data, type, bytes);
H
hjxilinx 已提交
7048
      }
H
Haojun Liao 已提交
7049
      count += 1;
H
hjxilinx 已提交
7050
    }
7051

7052
    qDebug("QInfo:%p create tag values results completed, rows:%d", pQInfo, count);
H
hjxilinx 已提交
7053
  }
7054

H
Haojun Liao 已提交
7055
  pQuery->rec.rows = count;
H
hjxilinx 已提交
7056
  setQueryStatus(pQuery, QUERY_COMPLETED);
H
hjxilinx 已提交
7057 7058
}

7059 7060 7061 7062 7063 7064 7065
void* qGetResultRetrieveMsg(qinfo_t qinfo) {
  SQInfo* pQInfo = (SQInfo*) qinfo;
  assert(pQInfo != NULL);

  return pQInfo->rspContext;
}

7066 7067 7068 7069 7070 7071 7072
void freeqinfoFn(void *qhandle) {
  void** handle = qhandle;
  if (handle == NULL || *handle == NULL) {
    return;
  }

  qKillQuery(*handle);
H
Haojun Liao 已提交
7073
  qDestroyQueryInfo(*handle);
7074 7075 7076
}

void* qOpenQueryMgmt(int32_t vgId) {
H
Haojun Liao 已提交
7077
  const int32_t REFRESH_HANDLE_INTERVAL = 30; // every 30 seconds, refresh handle pool
7078 7079 7080 7081

  char cacheName[128] = {0};
  sprintf(cacheName, "qhandle_%d", vgId);

7082
  SQueryMgmt* pQueryMgmt = calloc(1, sizeof(SQueryMgmt));
H
Haojun Liao 已提交
7083 7084 7085 7086
  if (pQueryMgmt == NULL) {
    terrno = TSDB_CODE_QRY_OUT_OF_MEMORY;
    return NULL;
  }
7087

S
TD-1530  
Shengliang Guan 已提交
7088
  pQueryMgmt->qinfoPool = taosCacheInit(TSDB_CACHE_PTR_KEY, REFRESH_HANDLE_INTERVAL, true, freeqinfoFn, cacheName);
7089 7090 7091 7092
  pQueryMgmt->closed    = false;
  pQueryMgmt->vgId      = vgId;

  pthread_mutex_init(&pQueryMgmt->lock, NULL);
7093 7094

  qDebug("vgId:%d, open querymgmt success", vgId);
7095
  return pQueryMgmt;
7096 7097
}

H
Haojun Liao 已提交
7098
static void queryMgmtKillQueryFn(void* handle) {
H
Hui Li 已提交
7099 7100
  void** fp = (void**)handle;
  qKillQuery(*fp);
H
Haojun Liao 已提交
7101 7102 7103
}

void qQueryMgmtNotifyClosed(void* pQMgmt) {
7104 7105 7106 7107 7108 7109 7110
  if (pQMgmt == NULL) {
    return;
  }

  SQueryMgmt* pQueryMgmt = pQMgmt;
  qDebug("vgId:%d, set querymgmt closed, wait for all queries cancelled", pQueryMgmt->vgId);

H
Haojun Liao 已提交
7111
//  pthread_mutex_lock(&pQueryMgmt->lock);
7112
  pQueryMgmt->closed = true;
H
Haojun Liao 已提交
7113
//  pthread_mutex_unlock(&pQueryMgmt->lock);
7114

H
Haojun Liao 已提交
7115
  taosCacheRefresh(pQueryMgmt->qinfoPool, queryMgmtKillQueryFn);
7116 7117 7118 7119 7120 7121 7122 7123 7124 7125 7126 7127 7128 7129 7130 7131 7132
}

void qCleanupQueryMgmt(void* pQMgmt) {
  if (pQMgmt == NULL) {
    return;
  }

  SQueryMgmt* pQueryMgmt = pQMgmt;
  int32_t vgId = pQueryMgmt->vgId;

  assert(pQueryMgmt->closed);

  SCacheObj* pqinfoPool = pQueryMgmt->qinfoPool;
  pQueryMgmt->qinfoPool = NULL;

  taosCacheCleanup(pqinfoPool);
  pthread_mutex_destroy(&pQueryMgmt->lock);
S
Shengliang Guan 已提交
7133
  taosTFree(pQueryMgmt);
7134

7135
  qDebug("vgId:%d queryMgmt cleanup completed", vgId);
7136 7137
}

7138
void** qRegisterQInfo(void* pMgmt, uint64_t qInfo) {
7139
  if (pMgmt == NULL) {
7140
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
7141 7142 7143
    return NULL;
  }

7144
  const int32_t DEFAULT_QHANDLE_LIFE_SPAN = tsShellActivityTimer * 2 * 1000;
7145

7146 7147
  SQueryMgmt *pQueryMgmt = pMgmt;
  if (pQueryMgmt->qinfoPool == NULL) {
7148
    qError("QInfo:%p failed to add qhandle into qMgmt, since qMgmt is closed", (void *)qInfo);
7149
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
7150 7151 7152
    return NULL;
  }

H
Haojun Liao 已提交
7153
//  pthread_mutex_lock(&pQueryMgmt->lock);
7154
  if (pQueryMgmt->closed) {
H
Haojun Liao 已提交
7155
//    pthread_mutex_unlock(&pQueryMgmt->lock);
7156
    qError("QInfo:%p failed to add qhandle into cache, since qMgmt is colsing", (void *)qInfo);
7157
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
7158 7159
    return NULL;
  } else {
S
TD-1530  
Shengliang Guan 已提交
7160 7161
    TSDB_CACHE_PTR_TYPE handleVal = (TSDB_CACHE_PTR_TYPE) qInfo;
    void** handle = taosCachePut(pQueryMgmt->qinfoPool, &handleVal, sizeof(TSDB_CACHE_PTR_TYPE), &qInfo, sizeof(TSDB_CACHE_PTR_TYPE), DEFAULT_QHANDLE_LIFE_SPAN);
H
Haojun Liao 已提交
7162
//    pthread_mutex_unlock(&pQueryMgmt->lock);
7163 7164 7165 7166 7167

    return handle;
  }
}

S
TD-1530  
Shengliang Guan 已提交
7168
void** qAcquireQInfo(void* pMgmt, uint64_t _key) {
7169 7170 7171 7172 7173 7174
  SQueryMgmt *pQueryMgmt = pMgmt;

  if (pQueryMgmt->qinfoPool == NULL || pQueryMgmt->closed) {
    return NULL;
  }

S
TD-1530  
Shengliang Guan 已提交
7175 7176
  TSDB_CACHE_PTR_TYPE key = (TSDB_CACHE_PTR_TYPE)_key;
  void** handle = taosCacheAcquireByKey(pQueryMgmt->qinfoPool, &key, sizeof(TSDB_CACHE_PTR_TYPE));
7177 7178 7179 7180 7181 7182 7183
  if (handle == NULL || *handle == NULL) {
    return NULL;
  } else {
    return handle;
  }
}

H
Haojun Liao 已提交
7184
void** qReleaseQInfo(void* pMgmt, void* pQInfo, bool freeHandle) {
7185 7186 7187 7188 7189
  SQueryMgmt *pQueryMgmt = pMgmt;
  if (pQueryMgmt->qinfoPool == NULL) {
    return NULL;
  }

H
Haojun Liao 已提交
7190
  taosCacheRelease(pQueryMgmt->qinfoPool, pQInfo, freeHandle);
7191 7192 7193
  return 0;
}

7194