qExecutor.c 239.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
#include "os.h"
H
Haojun Liao 已提交
16 17
#include "qFill.h"
#include "taosmsg.h"
18 19
#include "tcache.h"
#include "tglobal.h"
20

H
Haojun Liao 已提交
21
#include "exception.h"
22
#include "hash.h"
H
Haojun Liao 已提交
23 24 25 26
#include "qAst.h"
#include "qExecutor.h"
#include "qResultbuf.h"
#include "qUtil.h"
H
hjxilinx 已提交
27
#include "query.h"
S
slguan 已提交
28
#include "queryLog.h"
29
#include "tlosertree.h"
30

H
Haojun Liao 已提交
31
#define MAX_ROWS_PER_RESBUF_PAGE  ((1u<<12) - 1)
32 33 34 35 36

/**
 * check if the primary column is load by default, otherwise, the program will
 * forced to load primary column explicitly.
 */
37
#define Q_STATUS_EQUAL(p, s)  (((p) & (s)) != 0)
38 39
#define QUERY_IS_ASC_QUERY(q) (GET_FORWARD_DIRECTION_FACTOR((q)->order.order) == QUERY_ASC_FORWARD_STEP)

40
#define IS_MASTER_SCAN(runtime)        ((runtime)->scanFlag == MASTER_SCAN)
H
hjxilinx 已提交
41
#define IS_REVERSE_SCAN(runtime)       ((runtime)->scanFlag == REVERSE_SCAN)
42
#define SET_MASTER_SCAN_FLAG(runtime)  ((runtime)->scanFlag = MASTER_SCAN)
H
hjxilinx 已提交
43
#define SET_REVERSE_SCAN_FLAG(runtime) ((runtime)->scanFlag = REVERSE_SCAN)
44

H
Haojun Liao 已提交
45
#define GET_QINFO_ADDR(x) ((SQInfo *)((char *)(x)-offsetof(SQInfo, runtimeEnv)))
46

47
#define GET_COL_DATA_POS(query, index, step) ((query)->pos + (index) * (step))
48
#define SWITCH_ORDER(n) (((n) = ((n) == TSDB_ORDER_ASC) ? TSDB_ORDER_DESC : TSDB_ORDER_ASC))
49

H
Haojun Liao 已提交
50 51
#define SDATA_BLOCK_INITIALIZER (SDataBlockInfo) {{0}, 0}

S
TD-1057  
Shengliang Guan 已提交
52 53 54 55 56
#define TIME_WINDOW_COPY(_dst, _src)  do {\
   _dst.skey = _src.skey;\
   _dst.ekey = _src.ekey;\
} while (0);

57
enum {
H
hjxilinx 已提交
58
  // when query starts to execute, this status will set
59 60
  QUERY_NOT_COMPLETED = 0x1u,

H
hjxilinx 已提交
61 62
  /* result output buffer is full, current query is paused.
   * this status is only exist in group-by clause and diff/add/division/multiply/ query.
63
   */
64 65
  QUERY_RESBUF_FULL = 0x2u,

H
hjxilinx 已提交
66 67 68
  /* query is over
   * 1. this status is used in one row result query process, e.g., count/sum/first/last/ avg...etc.
   * 2. when all data within queried time window, it is also denoted as query_completed
69
   */
70
  QUERY_COMPLETED = 0x4u,
71

H
hjxilinx 已提交
72 73
  /* when the result is not completed return to client, this status will be
   * usually used in case of interval query with interpolation option
74
   */
75
  QUERY_OVER = 0x8u,
76
};
77 78

enum {
79 80
  TS_JOIN_TS_EQUAL       = 0,
  TS_JOIN_TS_NOT_EQUALS  = 1,
81 82 83
  TS_JOIN_TAG_NOT_EQUALS = 2,
};

84
typedef struct {
85 86 87 88 89 90
  int32_t     status;       // query status
  TSKEY       lastKey;      // the lastKey value before query executed
  STimeWindow w;            // whole query time window
  STimeWindow curWindow;    // current query window
  int32_t     windowIndex;  // index of active time window result for interval query
  STSCursor   cur;
91 92
} SQueryStatusInfo;

H
Haojun Liao 已提交
93
#if 0
H
Haojun Liao 已提交
94
static UNUSED_FUNC void *u_malloc (size_t __size) {
H
Haojun Liao 已提交
95
  uint32_t v = rand();
H
Haojun Liao 已提交
96 97

  if (v % 1000 <= 0) {
H
Haojun Liao 已提交
98 99
    return NULL;
  } else {
H
Haojun Liao 已提交
100
    return malloc(__size);
H
Haojun Liao 已提交
101
  }
H
Haojun Liao 已提交
102 103
}

H
Haojun Liao 已提交
104 105
static UNUSED_FUNC void* u_calloc(size_t num, size_t __size) {
  uint32_t v = rand();
H
Haojun Liao 已提交
106
  if (v % 1000 <= 0) {
H
Haojun Liao 已提交
107 108 109 110 111 112
    return NULL;
  } else {
    return calloc(num, __size);
  }
}

H
Haojun Liao 已提交
113 114 115 116 117 118 119 120 121
static UNUSED_FUNC void* u_realloc(void* p, size_t __size) {
  uint32_t v = rand();
  if (v % 5 <= 1) {
    return NULL;
  } else {
    return realloc(p, __size);
  }
}

H
Haojun Liao 已提交
122
#define calloc  u_calloc
H
Haojun Liao 已提交
123
#define malloc  u_malloc
H
Haojun Liao 已提交
124
#define realloc u_realloc
H
Haojun Liao 已提交
125
#endif
H
Haojun Liao 已提交
126

127
#define CLEAR_QUERY_STATUS(q, st)   ((q)->status &= (~(st)))
H
Haojun Liao 已提交
128 129 130
#define GET_NUM_OF_TABLEGROUP(q)    taosArrayGetSize((q)->tableqinfoGroupInfo.pGroupList)
#define GET_TABLEGROUP(q, _index)   ((SArray*) taosArrayGetP((q)->tableqinfoGroupInfo.pGroupList, (_index)))

131
static void setQueryStatus(SQuery *pQuery, int8_t status);
H
Haojun Liao 已提交
132
static void finalizeQueryResult(SQueryRuntimeEnv *pRuntimeEnv);
133

134
#define QUERY_IS_INTERVAL_QUERY(_q) ((_q)->interval.interval > 0)
135

136 137
static void getNextTimeWindow(SQuery* pQuery, STimeWindow* tw) {
  int32_t factor = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
138 139 140
  if (pQuery->interval.intervalUnit != 'n' && pQuery->interval.intervalUnit != 'y') {
    tw->skey += pQuery->interval.sliding * factor;
    tw->ekey = tw->skey + pQuery->interval.interval - 1;
141 142 143
    return;
  }

144
  int64_t key = tw->skey / 1000, interval = pQuery->interval.interval;
145 146 147
  if (pQuery->precision == TSDB_TIME_PRECISION_MICRO) {
    key /= 1000;
  }
148
  if (pQuery->interval.intervalUnit == 'y') {
149 150
    interval *= 12;
  }
151 152 153 154 155

  struct tm tm;
  time_t t = (time_t)key;
  localtime_r(&t, &tm);

S
Shengliang Guan 已提交
156
  int mon = (int)(tm.tm_year * 12 + tm.tm_mon + interval * factor);
157 158 159 160
  tm.tm_year = mon / 12;
  tm.tm_mon = mon % 12;
  tw->skey = mktime(&tm) * 1000L;

S
Shengliang Guan 已提交
161
  mon = (int)(mon + interval);
162 163 164 165 166 167 168 169 170 171 172 173
  tm.tm_year = mon / 12;
  tm.tm_mon = mon % 12;
  tw->ekey = mktime(&tm) * 1000L;

  if (pQuery->precision == TSDB_TIME_PRECISION_MICRO) {
    tw->skey *= 1000L;
    tw->ekey *= 1000L;
  }
  tw->ekey -= 1;
}

#define GET_NEXT_TIMEWINDOW(_q, tw) getNextTimeWindow((_q), (tw))
H
Haojun Liao 已提交
174

175 176
#define SET_STABLE_QUERY_OVER(_q) ((_q)->tableIndex = (int32_t)((_q)->tableqinfoGroupInfo.numOfTables))
#define IS_STASBLE_QUERY_OVER(_q) ((_q)->tableIndex >= (int32_t)((_q)->tableqinfoGroupInfo.numOfTables))
H
Haojun Liao 已提交
177

H
hjxilinx 已提交
178
// todo move to utility
179
static int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *group);
180

H
hjxilinx 已提交
181
static void setWindowResOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult);
H
Haojun Liao 已提交
182
static void setWindowResOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult);
183 184
static void resetMergeResultBuf(SQuery *pQuery, SQLFunctionCtx *pCtx, SResultInfo *pResultInfo);
static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId);
185

186 187 188
static void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
                          SDataStatis *pStatis, void *param, int32_t colIndex);

189
static void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
H
Haojun Liao 已提交
190
static void destroyTableQueryInfoImpl(STableQueryInfo *pTableQueryInfo);
191 192
static void resetCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
static bool hasMainOutput(SQuery *pQuery);
H
hjxilinx 已提交
193
static void buildTagQueryResult(SQInfo *pQInfo);
194

195
static int32_t setAdditionalInfo(SQInfo *pQInfo, void *pTable, STableQueryInfo *pTableQueryInfo);
H
Haojun Liao 已提交
196
static int32_t flushFromResultBuf(SQueryRuntimeEnv* pRuntimeEnv, SGroupResInfo* pGroupResInfo);
197

198
bool doFilterData(SQuery *pQuery, int32_t elemPos) {
199 200
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
201

S
TD-1057  
Shengliang Guan 已提交
202
    char *pElem = (char*)pFilterInfo->pData + pFilterInfo->info.bytes * elemPos;
203

204 205
    bool qualified = false;
    for (int32_t j = 0; j < pFilterInfo->numOfFilters; ++j) {
206
      SColumnFilterElem *pFilterElem = &pFilterInfo->pFilters[j];
207

H
Haojun Liao 已提交
208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224
      bool isnull = isNull(pElem, pFilterInfo->info.type);
      if (isnull) {
        if (pFilterElem->fp == isNull_filter) {
          qualified = true;
          break;
        } else {
          continue;
        }
      } else {
        if (pFilterElem->fp == notNull_filter) {
          qualified = true;
          break;
        } else if (pFilterElem->fp == isNull_filter) {
          continue;
        }
      }

225 226 227 228 229
      if (pFilterElem->fp(pFilterElem, pElem, pElem)) {
        qualified = true;
        break;
      }
    }
230

231 232 233 234
    if (!qualified) {
      return false;
    }
  }
235

236 237 238 239 240 241
  return true;
}

int64_t getNumOfResult(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  bool    hasMainFunction = hasMainOutput(pQuery);
242

243
  int64_t maxOutput = 0;
244
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
245
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
246

247 248 249 250 251 252 253 254
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (hasMainFunction &&
        (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ)) {
      continue;
    }
255

256 257 258 259 260
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
    if (pResInfo != NULL && maxOutput < pResInfo->numOfRes) {
      maxOutput = pResInfo->numOfRes;
    }
  }
261

262
  assert(maxOutput >= 0);
263 264 265
  return maxOutput;
}

266 267 268 269 270
/*
 * the value of number of result needs to be update due to offset value upated.
 */
void updateNumOfResult(SQueryRuntimeEnv *pRuntimeEnv, int32_t numOfRes) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
271

272 273
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
274

H
Haojun Liao 已提交
275 276 277 278 279
    int16_t functionId = pRuntimeEnv->pCtx[j].functionId;
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ ||
        functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
280

H
Haojun Liao 已提交
281
    assert(pResInfo->numOfRes > numOfRes);
282 283 284 285
    pResInfo->numOfRes = numOfRes;
  }
}

H
Haojun Liao 已提交
286
static UNUSED_FUNC int32_t getGroupResultId(int32_t groupIndex) {
H
Haojun Liao 已提交
287
  int32_t base = 20000000;
288 289 290 291 292 293 294
  return base + (groupIndex * 10000);
}

bool isGroupbyNormalCol(SSqlGroupbyExpr *pGroupbyExpr) {
  if (pGroupbyExpr == NULL || pGroupbyExpr->numOfGroupCols == 0) {
    return false;
  }
295

296
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
297
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
Y
TD-1230  
yihaoDeng 已提交
298
    if (TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
299
      //make sure the normal column locates at the second position if tbname exists in group by clause
300
      if (pGroupbyExpr->numOfGroupCols > 1) {
301
        assert(pColIndex->colIndex > 0);
302
      }
303

304 305 306
      return true;
    }
  }
307

308 309 310 311 312
  return false;
}

int16_t getGroupbyColumnType(SQuery *pQuery, SSqlGroupbyExpr *pGroupbyExpr) {
  assert(pGroupbyExpr != NULL);
313

314 315
  int32_t colId = -2;
  int16_t type = TSDB_DATA_TYPE_NULL;
316

317
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
318
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
Y
TD-1230  
yihaoDeng 已提交
319
    if (TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
320 321 322 323
      colId = pColIndex->colId;
      break;
    }
  }
324

325
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
326 327
    if (colId == pQuery->colList[i].colId) {
      type = pQuery->colList[i].type;
328 329 330
      break;
    }
  }
331

332 333 334 335 336 337
  return type;
}

bool isSelectivityWithTagsQuery(SQuery *pQuery) {
  bool    hasTags = false;
  int32_t numOfSelectivity = 0;
338

339
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
340
    int32_t functId = pQuery->pSelectExpr[i].base.functionId;
341 342 343 344
    if (functId == TSDB_FUNC_TAG_DUMMY || functId == TSDB_FUNC_TS_DUMMY) {
      hasTags = true;
      continue;
    }
345

346 347 348 349
    if ((aAggs[functId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
      numOfSelectivity++;
    }
  }
350

351 352 353
  if (numOfSelectivity > 0 && hasTags) {
    return true;
  }
354

355 356 357
  return false;
}

358 359 360 361 362 363 364 365 366 367 368
bool isProjQuery(SQuery *pQuery) {
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    int32_t functId = pQuery->pSelectExpr[i].base.functionId;
    if (functId != TSDB_FUNC_PRJ && functId != TSDB_FUNC_TAGPRJ) {
      return false;
    }
  }

  return true;
}

369
bool isTSCompQuery(SQuery *pQuery) { return pQuery->pSelectExpr[0].base.functionId == TSDB_FUNC_TS_COMP; }
370

371 372 373
static bool limitResults(SQueryRuntimeEnv* pRuntimeEnv) {
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery* pQuery = pRuntimeEnv->pQuery;
374

375 376
  if ((pQuery->limit.limit > 0) && (pQuery->rec.total + pQuery->rec.rows > pQuery->limit.limit)) {
    pQuery->rec.rows = pQuery->limit.limit - pQuery->rec.total;
377

378
    qDebug("QInfo:%p discard remain data due to result limitation, limit:%"PRId64", current return:%" PRId64 ", total:%"PRId64,
379 380
        pQInfo, pQuery->limit.limit, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
    assert(pQuery->rec.rows >= 0);
381 382 383
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return true;
  }
384

385 386 387 388
  return false;
}

static bool isTopBottomQuery(SQuery *pQuery) {
389
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
390
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
391 392 393
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
394

395 396 397 398
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      return true;
    }
  }
399

400 401 402
  return false;
}

H
Haojun Liao 已提交
403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420
static bool hasTagValOutput(SQuery* pQuery) {
  SExprInfo *pExprInfo = &pQuery->pSelectExpr[0];
  if (pQuery->numOfOutput == 1 && pExprInfo->base.functionId == TSDB_FUNC_TS_COMP) {
    return true;
  } else {  // set tag value, by which the results are aggregated.
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
      SExprInfo *pLocalExprInfo = &pQuery->pSelectExpr[idx];

      // ts_comp column required the tag value for join filter
      if (TSDB_COL_IS_TAG(pLocalExprInfo->base.colInfo.flag)) {
        return true;
      }
    }
  }

  return false;
}

421 422 423 424 425 426 427 428
/**
 * @param pQuery
 * @param col
 * @param pDataBlockInfo
 * @param pStatis
 * @param pColStatis
 * @return
 */
H
Haojun Liao 已提交
429
static bool hasNullValue(SColIndex* pColIndex, SDataStatis *pStatis, SDataStatis **pColStatis) {
H
Haojun Liao 已提交
430
  if (pStatis != NULL && TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
H
Haojun Liao 已提交
431 432
    *pColStatis = &pStatis[pColIndex->colIndex];
    assert((*pColStatis)->colId == pColIndex->colId);
H
hjxilinx 已提交
433 434
  } else {
    *pColStatis = NULL;
435
  }
436

H
Haojun Liao 已提交
437
  if (TSDB_COL_IS_TAG(pColIndex->flag) || TSDB_COL_IS_UD_COL(pColIndex->flag) || pColIndex->colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
H
Haojun Liao 已提交
438 439 440
    return false;
  }

441 442 443
  if ((*pColStatis) != NULL && (*pColStatis)->numOfNull == 0) {
    return false;
  }
444

445 446 447 448
  return true;
}

static SWindowResult *doSetTimeWindowFromKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, char *pData,
449
                                             int16_t bytes, bool masterscan) {
450
  SQuery *pQuery = pRuntimeEnv->pQuery;
451

452
  int32_t *p1 = (int32_t *) taosHashGet(pWindowResInfo->hashList, pData, bytes);
453 454
  if (p1 != NULL) {
    pWindowResInfo->curIndex = *p1;
455
  } else {
H
Haojun Liao 已提交
456 457 458
    if (!masterscan) {  // not master scan, do not add new timewindow
      return NULL;
    }
459

H
Haojun Liao 已提交
460 461
    // more than the capacity, reallocate the resources
    if (pWindowResInfo->size >= pWindowResInfo->capacity) {
462 463
      int64_t newCap = 0;
      if (pWindowResInfo->capacity > 10000) {
S
Shengliang Guan 已提交
464
        newCap = (int64_t)(pWindowResInfo->capacity * 1.25);
465
      } else {
S
Shengliang Guan 已提交
466
        newCap = (int64_t)(pWindowResInfo->capacity * 1.5);
467 468
      }

469
      char *t = realloc(pWindowResInfo->pResult, (size_t)(newCap * sizeof(SWindowResult)));
470
      pRuntimeEnv->summary.internalSupSize += (newCap - pWindowResInfo->capacity) * sizeof(SWindowResult);
471
      pRuntimeEnv->summary.numOfTimeWindows += (newCap - pWindowResInfo->capacity);
472

473 474
      if (t == NULL) {
        longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
475
      }
476

477 478
      pWindowResInfo->pResult = (SWindowResult *)t;

S
TD-1057  
Shengliang Guan 已提交
479
      int32_t inc = (int32_t)newCap - pWindowResInfo->capacity;
480 481
      memset(&pWindowResInfo->pResult[pWindowResInfo->capacity], 0, sizeof(SWindowResult) * inc);

482 483
      pRuntimeEnv->summary.internalSupSize += (pQuery->numOfOutput * sizeof(SResultInfo) + pRuntimeEnv->interBufSize) * inc;

H
Haojun Liao 已提交
484
      for (int32_t i = pWindowResInfo->capacity; i < newCap; ++i) {
H
Haojun Liao 已提交
485 486 487 488
        int32_t ret = createQueryResultInfo(pQuery, &pWindowResInfo->pResult[i], pRuntimeEnv->stableQuery, pRuntimeEnv->interBufSize);
        if (ret != TSDB_CODE_SUCCESS) {
          longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
        }
H
Haojun Liao 已提交
489 490
      }

S
TD-1057  
Shengliang Guan 已提交
491
      pWindowResInfo->capacity = (int32_t)newCap;
492
    }
H
Haojun Liao 已提交
493 494 495 496

    // add a new result set for a new group
    pWindowResInfo->curIndex = pWindowResInfo->size++;
    taosHashPut(pWindowResInfo->hashList, pData, bytes, (char *)&pWindowResInfo->curIndex, sizeof(int32_t));
497
  }
498

499 500 501 502 503
  // too many time window in query
  if (pWindowResInfo->size > MAX_INTERVAL_TIME_WINDOW) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_TOO_MANY_TIMEWINDOW);
  }

504 505 506 507 508 509
  return getWindowResult(pWindowResInfo, pWindowResInfo->curIndex);
}

// get the correct time window according to the handled timestamp
static STimeWindow getActiveTimeWindow(SWindowResInfo *pWindowResInfo, int64_t ts, SQuery *pQuery) {
  STimeWindow w = {0};
510

511
 if (pWindowResInfo->curIndex == -1) {  // the first window, from the previous stored value
512
    w.skey = pWindowResInfo->prevSKey;
513 514
    if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
      w.ekey = taosTimeAdd(w.skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision);
515
    } else {
516
      w.ekey = w.skey + pQuery->interval.interval - 1;
517
    }
518
  } else {
519 520
    int32_t slot = curTimeWindowIndex(pWindowResInfo);
    SWindowResult* pWindowRes = getWindowResult(pWindowResInfo, slot);
521
    w = pWindowRes->win;
522
  }
523

524
  if (w.skey > ts || w.ekey < ts) {
525 526 527
    if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
      w.skey = taosTimeTruncate(ts, &pQuery->interval, pQuery->precision);
      w.ekey = taosTimeAdd(w.skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
528 529
    } else {
      int64_t st = w.skey;
530

531
      if (st > ts) {
532
        st -= ((st - ts + pQuery->interval.sliding - 1) / pQuery->interval.sliding) * pQuery->interval.sliding;
533
      }
534

535
      int64_t et = st + pQuery->interval.interval - 1;
536
      if (et < ts) {
537
        st += ((ts - et + pQuery->interval.sliding - 1) / pQuery->interval.sliding) * pQuery->interval.sliding;
538
      }
539

540
      w.skey = st;
541
      w.ekey = w.skey + pQuery->interval.interval - 1;
542
    }
543
  }
544

545 546 547 548 549 550 551
  /*
   * query border check, skey should not be bounded by the query time range, since the value skey will
   * be used as the time window index value. So we only change ekey of time window accordingly.
   */
  if (w.ekey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) {
    w.ekey = pQuery->window.ekey;
  }
552

553 554 555 556 557 558 559 560
  return w;
}

static int32_t addNewWindowResultBuf(SWindowResult *pWindowRes, SDiskbasedResultBuf *pResultBuf, int32_t sid,
                                     int32_t numOfRowsPerPage) {
  if (pWindowRes->pos.pageId != -1) {
    return 0;
  }
561

562
  tFilePage *pData = NULL;
563

564 565 566
  // in the first scan, new space needed for results
  int32_t pageId = -1;
  SIDList list = getDataBufPagesIdList(pResultBuf, sid);
567

H
Haojun Liao 已提交
568
  if (taosArrayGetSize(list) == 0) {
569 570
    pData = getNewDataBuf(pResultBuf, sid, &pageId);
  } else {
H
Haojun Liao 已提交
571 572 573
    SPageInfo* pi = getLastPageInfo(list);
    pData = getResBufPage(pResultBuf, pi->pageId);
    pageId = pi->pageId;
574

575
    if (pData->num >= numOfRowsPerPage) {
H
Haojun Liao 已提交
576 577 578
      // release current page first, and prepare the next one
      releaseResBufPageInfo(pResultBuf, pi);

579 580
      pData = getNewDataBuf(pResultBuf, sid, &pageId);
      if (pData != NULL) {
581
        assert(pData->num == 0);  // number of elements must be 0 for new allocated buffer
582 583 584
      }
    }
  }
585

586 587 588
  if (pData == NULL) {
    return -1;
  }
589

590 591 592
  // set the number of rows in current disk page
  if (pWindowRes->pos.pageId == -1) {  // not allocated yet, allocate new buffer
    pWindowRes->pos.pageId = pageId;
S
TD-1057  
Shengliang Guan 已提交
593
    pWindowRes->pos.rowId = (int32_t)(pData->num++);
H
Haojun Liao 已提交
594 595

    assert(pWindowRes->pos.pageId >= 0);
596
  }
597

598 599 600 601
  return 0;
}

static int32_t setWindowOutputBufByKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, int32_t sid,
602
                                       STimeWindow *win, bool masterscan, bool* newWind) {
603 604
  assert(win->skey <= win->ekey);
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
605

606 607
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&win->skey,
      TSDB_KEYSIZE, masterscan);
608
  if (pWindowRes == NULL) {
609 610 611
    *newWind = false;

    return masterscan? -1:0;
612
  }
613

614
  *newWind = true;
H
Haojun Liao 已提交
615

616 617 618
  // not assign result buffer yet, add new result buffer
  if (pWindowRes->pos.pageId == -1) {
    int32_t ret = addNewWindowResultBuf(pWindowRes, pResultBuf, sid, pRuntimeEnv->numOfRowsPerPage);
H
Haojun Liao 已提交
619
    if (ret != TSDB_CODE_SUCCESS) {
620 621 622
      return -1;
    }
  }
623

624
  // set time window for current result
625
  pWindowRes->win = (*win);
626

H
Haojun Liao 已提交
627
  setWindowResOutputBufInitCtx(pRuntimeEnv, pWindowRes);
628 629 630
  return TSDB_CODE_SUCCESS;
}

631
static bool getTimeWindowResStatus(SWindowResInfo *pWindowResInfo, int32_t slot) {
632
  assert(slot >= 0 && slot < pWindowResInfo->size);
633
  return pWindowResInfo->pResult[slot].closed;
634 635
}

H
Haojun Liao 已提交
636
static FORCE_INLINE int32_t getForwardStepsInBlock(int32_t numOfRows, __block_search_fn_t searchFn, TSKEY ekey, int16_t pos,
637 638
                                      int16_t order, int64_t *pData) {
  int32_t forwardStep = 0;
639

H
Haojun Liao 已提交
640 641 642 643
  if (order == TSDB_ORDER_ASC) {
    int32_t end = searchFn((char*) &pData[pos], numOfRows - pos, ekey, order);
    if (end >= 0) {
      forwardStep = end;
644

H
Haojun Liao 已提交
645 646 647 648 649 650 651 652 653 654 655 656
      if (pData[end + pos] == ekey) {
        forwardStep += 1;
      }
    }
  } else {
    int32_t end = searchFn((char *)pData, pos + 1, ekey, order);
    if (end >= 0) {
      forwardStep = pos - end;

      if (pData[end] == ekey) {
        forwardStep += 1;
      }
657 658
    }
  }
659

H
Haojun Liao 已提交
660
  assert(forwardStep > 0);
661 662 663 664 665 666
  return forwardStep;
}

/**
 * NOTE: the query status only set for the first scan of master scan.
 */
667
static int32_t doCheckQueryCompleted(SQueryRuntimeEnv *pRuntimeEnv, TSKEY lastKey, SWindowResInfo *pWindowResInfo) {
668
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
669
  if (pRuntimeEnv->scanFlag != MASTER_SCAN || (!QUERY_IS_INTERVAL_QUERY(pQuery))) {
670
    return pWindowResInfo->size;
671
  }
672

673
  // no qualified results exist, abort check
674
  int32_t numOfClosed = 0;
675

676
  if (pWindowResInfo->size == 0) {
677
    return pWindowResInfo->size;
678
  }
679

680
  // query completed
H
hjxilinx 已提交
681 682
  if ((lastKey >= pQuery->current->win.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (lastKey <= pQuery->current->win.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
683
    closeAllTimeWindow(pWindowResInfo);
684

685 686 687 688
    pWindowResInfo->curIndex = pWindowResInfo->size - 1;
    setQueryStatus(pQuery, QUERY_COMPLETED | QUERY_RESBUF_FULL);
  } else {  // set the current index to be the last unclosed window
    int32_t i = 0;
689
    int64_t skey = TSKEY_INITIAL_VAL;
690

691 692
    for (i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *pResult = &pWindowResInfo->pResult[i];
693
      if (pResult->closed) {
694
        numOfClosed += 1;
695 696
        continue;
      }
697

698
      TSKEY ekey = pResult->win.ekey;
699
      if ((ekey <= lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
700
          (pResult->win.skey >= lastKey && !QUERY_IS_ASC_QUERY(pQuery))) {
701 702
        closeTimeWindow(pWindowResInfo, i);
      } else {
703
        skey = pResult->win.skey;
704 705 706
        break;
      }
    }
707

708
    // all windows are closed, set the last one to be the skey
709
    if (skey == TSKEY_INITIAL_VAL) {
710 711 712 713 714
      assert(i == pWindowResInfo->size);
      pWindowResInfo->curIndex = pWindowResInfo->size - 1;
    } else {
      pWindowResInfo->curIndex = i;
    }
715

716
    pWindowResInfo->prevSKey = pWindowResInfo->pResult[pWindowResInfo->curIndex].win.skey;
717

718 719
    // the number of completed slots are larger than the threshold, return current generated results to client.
    if (numOfClosed > pWindowResInfo->threshold) {
720
      qDebug("QInfo:%p total result window:%d closed:%d, reached the output threshold %d, return",
721
          GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size, numOfClosed, pQuery->rec.threshold);
722

723
      setQueryStatus(pQuery, QUERY_RESBUF_FULL);
724
    } else {
725
      qDebug("QInfo:%p total result window:%d already closed:%d", GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size,
726
             numOfClosed);
727 728
    }
  }
729

730 731 732 733 734
  // output has reached the limitation, set query completed
  if (pQuery->limit.limit > 0 && (pQuery->limit.limit + pQuery->limit.offset) <= numOfClosed &&
      pRuntimeEnv->scanFlag == MASTER_SCAN) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
735

736
  assert(pWindowResInfo->prevSKey != TSKEY_INITIAL_VAL);
737
  return numOfClosed;
738 739 740
}

static int32_t getNumOfRowsInTimeWindow(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo, TSKEY *pPrimaryColumn,
H
hjxilinx 已提交
741
                                        int32_t startPos, TSKEY ekey, __block_search_fn_t searchFn, bool updateLastKey) {
742
  assert(startPos >= 0 && startPos < pDataBlockInfo->rows);
743

H
Haojun Liao 已提交
744
  int32_t num   = -1;
745
  int32_t order = pQuery->order.order;
H
Haojun Liao 已提交
746
  int32_t step  = GET_FORWARD_DIRECTION_FACTOR(order);
747

H
hjxilinx 已提交
748
  STableQueryInfo* item = pQuery->current;
749

750 751
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (ekey < pDataBlockInfo->window.ekey) {
752
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
H
Haojun Liao 已提交
753 754
      if (updateLastKey) { // update the last key
        item->lastKey = pPrimaryColumn[startPos + (num - 1)] + step;
755 756
      }
    } else {
757
      num = pDataBlockInfo->rows - startPos;
758
      if (updateLastKey) {
H
hjxilinx 已提交
759
        item->lastKey = pDataBlockInfo->window.ekey + step;
760 761 762 763
      }
    }
  } else {  // desc
    if (ekey > pDataBlockInfo->window.skey) {
764
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
H
Haojun Liao 已提交
765 766
      if (updateLastKey) {  // update the last key
        item->lastKey = pPrimaryColumn[startPos - (num - 1)] + step;
767 768 769 770
      }
    } else {
      num = startPos + 1;
      if (updateLastKey) {
H
hjxilinx 已提交
771
        item->lastKey = pDataBlockInfo->window.skey + step;
772 773 774
      }
    }
  }
775

H
Haojun Liao 已提交
776
  assert(num > 0);
777 778 779
  return num;
}

H
Haojun Liao 已提交
780 781
static void doBlockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, bool closed, STimeWindow *pWin, int32_t offset,
                                      int32_t forwardStep, TSKEY *tsCol, int32_t numOfTotal) {
782 783
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
784

H
Haojun Liao 已提交
785 786
  bool hasPrev = pCtx[0].preAggVals.isSet;

787
  if (IS_MASTER_SCAN(pRuntimeEnv) || closed) {
788 789 790 791
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
      pCtx[k].nStartQueryTimestamp = pWin->skey;
      pCtx[k].size = forwardStep;
      pCtx[k].startOffset = (QUERY_IS_ASC_QUERY(pQuery)) ? offset : offset - (forwardStep - 1);
792

H
Haojun Liao 已提交
793
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
794
      if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
H
Haojun Liao 已提交
795
        pCtx[k].ptsList = &tsCol[pCtx[k].startOffset];
796
      }
797

798
      // not a whole block involved in query processing, statistics data can not be used
H
Haojun Liao 已提交
799 800 801 802
      // NOTE: the original value of isSet have been changed here
      if (pCtx[k].preAggVals.isSet && forwardStep < numOfTotal) {
        pCtx[k].preAggVals.isSet = false;
      }
H
Haojun Liao 已提交
803

804 805 806
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunction(&pCtx[k]);
      }
H
Haojun Liao 已提交
807 808 809

      // restore it
      pCtx[k].preAggVals.isSet = hasPrev;
810 811 812 813
    }
  }
}

814
static void doRowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, bool closed, STimeWindow *pWin, int32_t offset) {
815 816
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
817

818
  if (IS_MASTER_SCAN(pRuntimeEnv) || closed) {
819 820
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
      pCtx[k].nStartQueryTimestamp = pWin->skey;
821

822 823 824 825
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunctionF(&pCtx[k], offset);
      }
826 827 828 829
    }
  }
}

H
Haojun Liao 已提交
830 831
static int32_t getNextQualifiedWindow(SQueryRuntimeEnv *pRuntimeEnv, STimeWindow *pNext, SDataBlockInfo *pDataBlockInfo,
    TSKEY *primaryKeys, __block_search_fn_t searchFn, int32_t prevPosition) {
832
  SQuery *pQuery = pRuntimeEnv->pQuery;
833

H
Haojun Liao 已提交
834
  GET_NEXT_TIMEWINDOW(pQuery, pNext);
835

H
Haojun Liao 已提交
836
  // next time window is not in current block
H
Haojun Liao 已提交
837 838
  if ((pNext->skey > pDataBlockInfo->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (pNext->ekey < pDataBlockInfo->window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
H
Haojun Liao 已提交
839 840
    return -1;
  }
841

H
Haojun Liao 已提交
842 843
  TSKEY startKey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
844
    startKey = pNext->skey;
H
Haojun Liao 已提交
845 846
    if (startKey < pQuery->window.skey) {
      startKey = pQuery->window.skey;
847
    }
H
Haojun Liao 已提交
848
  } else {
H
Haojun Liao 已提交
849
    startKey = pNext->ekey;
H
Haojun Liao 已提交
850 851
    if (startKey > pQuery->window.skey) {
      startKey = pQuery->window.skey;
852
    }
H
Haojun Liao 已提交
853
  }
854

H
Haojun Liao 已提交
855 856
  int32_t startPos = 0;
  // tumbling time window query, a special case of sliding time window query
857
  if (pQuery->interval.sliding == pQuery->interval.interval && prevPosition != -1) {
H
Haojun Liao 已提交
858 859 860 861 862
    int32_t factor = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
    startPos = prevPosition + factor;
  } else {
    startPos = searchFn((char *)primaryKeys, pDataBlockInfo->rows, startKey, pQuery->order.order);
  }
863

H
Haojun Liao 已提交
864 865 866 867
  /*
   * This time window does not cover any data, try next time window,
   * this case may happen when the time window is too small
   */
H
Haojun Liao 已提交
868
  if (QUERY_IS_ASC_QUERY(pQuery) && primaryKeys[startPos] > pNext->ekey) {
H
Haojun Liao 已提交
869
    TSKEY next = primaryKeys[startPos];
870 871 872
    if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
      pNext->skey = taosTimeTruncate(next, &pQuery->interval, pQuery->precision);
      pNext->ekey = taosTimeAdd(pNext->skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
873
    } else {
874 875
      pNext->ekey += ((next - pNext->ekey + pQuery->interval.sliding - 1)/pQuery->interval.sliding) * pQuery->interval.sliding;
      pNext->skey = pNext->ekey - pQuery->interval.interval + 1;
876
    }
H
Haojun Liao 已提交
877
  } else if ((!QUERY_IS_ASC_QUERY(pQuery)) && primaryKeys[startPos] < pNext->skey) {
H
Haojun Liao 已提交
878
    TSKEY next = primaryKeys[startPos];
879 880 881
    if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
      pNext->skey = taosTimeTruncate(next, &pQuery->interval, pQuery->precision);
      pNext->ekey = taosTimeAdd(pNext->skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
882
    } else {
883 884
      pNext->skey -= ((pNext->skey - next + pQuery->interval.sliding - 1) / pQuery->interval.sliding) * pQuery->interval.sliding;
      pNext->ekey = pNext->skey + pQuery->interval.interval - 1;
885
    }
886
  }
887

H
Haojun Liao 已提交
888
  return startPos;
889 890
}

H
Haojun Liao 已提交
891
static FORCE_INLINE TSKEY reviseWindowEkey(SQuery *pQuery, STimeWindow *pWindow) {
892 893 894 895 896 897 898 899 900 901 902 903
  TSKEY ekey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    ekey = pWindow->ekey;
    if (ekey > pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  } else {
    ekey = pWindow->skey;
    if (ekey < pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  }
904

905 906 907
  return ekey;
}

H
hjxilinx 已提交
908 909
//todo binary search
static void* getDataBlockImpl(SArray* pDataBlock, int32_t colId) {
S
TD-1057  
Shengliang Guan 已提交
910
  int32_t numOfCols = (int32_t)taosArrayGetSize(pDataBlock);
911

H
hjxilinx 已提交
912 913 914 915 916 917
  for (int32_t i = 0; i < numOfCols; ++i) {
    SColumnInfoData *p = taosArrayGet(pDataBlock, i);
    if (colId == p->info.colId) {
      return p->pData;
    }
  }
918

H
hjxilinx 已提交
919 920 921 922
  return NULL;
}

static char *getDataBlock(SQueryRuntimeEnv *pRuntimeEnv, SArithmeticSupport *sas, int32_t col, int32_t size,
923
                    SArray *pDataBlock) {
dengyihao's avatar
dengyihao 已提交
924 925 926
  if (pDataBlock == NULL) {
    return NULL;
  }
927

H
Haojun Liao 已提交
928
  char *dataBlock = NULL;
H
Haojun Liao 已提交
929
  SQuery *pQuery = pRuntimeEnv->pQuery;
930

931
  int32_t functionId = pQuery->pSelectExpr[col].base.functionId;
932
  if (functionId == TSDB_FUNC_ARITHM) {
933
    sas->pArithExpr = &pQuery->pSelectExpr[col];
934

935 936 937 938
    sas->offset  = 0;
    sas->colList = pQuery->colList;
    sas->numOfCols = pQuery->numOfCols;
    sas->data    = calloc(pQuery->numOfCols, POINTER_BYTES);
939

H
Haojun Liao 已提交
940
    if (sas->data == NULL) {
H
Haojun Liao 已提交
941
      finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
H
Haojun Liao 已提交
942 943 944
      longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
    }

945
    // here the pQuery->colList and sas->colList are identical
S
TD-1057  
Shengliang Guan 已提交
946
    int32_t numOfCols = (int32_t)taosArrayGetSize(pDataBlock);
947
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
948
      SColumnInfo *pColMsg = &pQuery->colList[i];
949

950 951 952 953 954 955 956 957
      dataBlock = NULL;
      for (int32_t k = 0; k < numOfCols; ++k) {  //todo refactor
        SColumnInfoData *p = taosArrayGet(pDataBlock, k);
        if (pColMsg->colId == p->info.colId) {
          dataBlock = p->pData;
          break;
        }
      }
958

959
      assert(dataBlock != NULL);
960
      sas->data[i] = dataBlock;  // start from the offset
961
    }
962

963
  } else {  // other type of query function
964
    SColIndex *pCol = &pQuery->pSelectExpr[col].base.colInfo;
H
Haojun Liao 已提交
965
    if (TSDB_COL_IS_NORMAL_COL(pCol->flag)) {
H
Haojun Liao 已提交
966 967 968 969 970
      SColIndex* pColIndex = &pQuery->pSelectExpr[col].base.colInfo;
      SColumnInfoData *p = taosArrayGet(pDataBlock, pColIndex->colIndex);
      assert(p->info.colId == pColIndex->colId);

      dataBlock = p->pData;
H
Haojun Liao 已提交
971 972
    } else {
      dataBlock = NULL;
973 974
    }
  }
975

976 977 978 979
  return dataBlock;
}

/**
H
Haojun Liao 已提交
980
 * todo set the last value for pQueryTableInfo as in rowwiseapplyfunctions
981 982
 * @param pRuntimeEnv
 * @param forwardStep
983
 * @param tsCols
984 985 986 987 988
 * @param pFields
 * @param isDiskFileBlock
 * @return                  the incremental number of output value, so it maybe 0 for fixed number of query,
 *                          such as count/min/max etc.
 */
989
static void blockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis,
990 991
                                       SDataBlockInfo *pDataBlockInfo, SWindowResInfo *pWindowResInfo,
                                       __block_search_fn_t searchFn, SArray *pDataBlock) {
992
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
993 994
  bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);

995 996
  SQuery *pQuery = pRuntimeEnv->pQuery;
  TSKEY  *tsCols = NULL;
997
  if (pDataBlock != NULL) {
998
    SColumnInfoData* pColInfo = taosArrayGet(pDataBlock, 0);
999
    tsCols = (TSKEY *)(pColInfo->pData);
1000
  }
1001

1002
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
H
Haojun Liao 已提交
1003
  if (sasArray == NULL) {
H
Haojun Liao 已提交
1004
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
H
Haojun Liao 已提交
1005 1006
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }
1007

1008
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
1009
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
1010
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k);
1011
  }
1012

1013
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
1014
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
H
Haojun Liao 已提交
1015
    TSKEY ts = TSKEY_INITIAL_VAL;
1016

H
Haojun Liao 已提交
1017 1018 1019 1020 1021 1022 1023 1024
    if (tsCols == NULL) {
      ts = QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->window.skey:pDataBlockInfo->window.ekey;
    } else {
      int32_t offset = GET_COL_DATA_POS(pQuery, 0, step);
      ts = tsCols[offset];
    }

    bool        hasTimeWindow = false;
1025
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
H
Haojun Liao 已提交
1026 1027
    if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &win, masterScan, &hasTimeWindow) !=
        TSDB_CODE_SUCCESS) {
S
Shengliang Guan 已提交
1028
      taosTFree(sasArray);
H
hjxilinx 已提交
1029
      return;
1030
    }
1031

H
Haojun Liao 已提交
1032 1033 1034
    int32_t forwardStep = 0;
    int32_t startPos = pQuery->pos;

1035
    if (hasTimeWindow) {
H
Haojun Liao 已提交
1036
      TSKEY ekey = reviseWindowEkey(pQuery, &win);
H
Haojun Liao 已提交
1037
      forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, pQuery->pos, ekey, searchFn, true);
1038

1039
      bool pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
H
Haojun Liao 已提交
1040
      doBlockwiseApplyFunctions(pRuntimeEnv, pStatus, &win, startPos, forwardStep, tsCols, pDataBlockInfo->rows);
1041
    }
1042

1043 1044
    int32_t     index = pWindowResInfo->curIndex;
    STimeWindow nextWin = win;
1045

1046
    while (1) {
H
Haojun Liao 已提交
1047 1048
      int32_t prevEndPos = (forwardStep - 1) * step + startPos;
      startPos = getNextQualifiedWindow(pRuntimeEnv, &nextWin, pDataBlockInfo, tsCols, searchFn, prevEndPos);
1049 1050 1051
      if (startPos < 0) {
        break;
      }
1052

1053
      // null data, failed to allocate more memory buffer
H
Haojun Liao 已提交
1054
      hasTimeWindow = false;
H
Haojun Liao 已提交
1055 1056
      if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &nextWin, masterScan,
                                  &hasTimeWindow) != TSDB_CODE_SUCCESS) {
1057 1058
        break;
      }
1059

1060 1061 1062 1063 1064
      if (!hasTimeWindow) {
        continue;
      }

      TSKEY ekey = reviseWindowEkey(pQuery, &nextWin);
H
Haojun Liao 已提交
1065
      forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, startPos, ekey, searchFn, true);
1066

1067 1068
      bool closed = getTimeWindowResStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
      doBlockwiseApplyFunctions(pRuntimeEnv, closed, &nextWin, startPos, forwardStep, tsCols, pDataBlockInfo->rows);
1069
    }
1070

1071 1072 1073 1074 1075 1076 1077
    pWindowResInfo->curIndex = index;
  } else {
    /*
     * the sqlfunctionCtx parameters should be set done before all functions are invoked,
     * since the selectivity + tag_prj query needs all parameters been set done.
     * tag_prj function are changed to be TSDB_FUNC_TAG_DUMMY
     */
1078
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
1079
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
1080 1081 1082 1083 1084
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunction(&pCtx[k]);
      }
    }
  }
1085

1086 1087 1088 1089
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    if (pQuery->pSelectExpr[i].base.functionId != TSDB_FUNC_ARITHM) {
      continue;
    }
1090

S
Shengliang Guan 已提交
1091
    taosTFree(sasArray[i].data);
1092
  }
1093

S
Shengliang Guan 已提交
1094
  taosTFree(sasArray);
1095 1096 1097 1098 1099 1100
}

static int32_t setGroupResultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, char *pData, int16_t type, int16_t bytes) {
  if (isNull(pData, type)) {  // ignore the null value
    return -1;
  }
1101

1102
  int32_t GROUPRESULTID = 1;
1103

1104
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
1105

1106
  // not assign result buffer yet, add new result buffer
1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124
  char* d = pData;
  int16_t len = bytes;
  if (type == TSDB_DATA_TYPE_BINARY||type == TSDB_DATA_TYPE_NCHAR) {
    d = varDataVal(pData);
    len = varDataLen(pData);
  } else if (type == TSDB_DATA_TYPE_FLOAT || type == TSDB_DATA_TYPE_DOUBLE) {
    SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
    qError("QInfo:%p group by not supported on double/float/binary/nchar columns, abort", pQInfo);

    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_APP_ERROR);
  }

  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, &pRuntimeEnv->windowResInfo, d, len, true);
  if (pWindowRes == NULL) {
    return -1;
  }

  int64_t v = -1;
1125 1126 1127 1128 1129 1130 1131 1132
  switch(type) {
    case TSDB_DATA_TYPE_BOOL:
    case TSDB_DATA_TYPE_TINYINT:  v = GET_INT8_VAL(pData);  break;
    case TSDB_DATA_TYPE_SMALLINT: v = GET_INT16_VAL(pData); break;
    case TSDB_DATA_TYPE_INT:      v = GET_INT32_VAL(pData); break;
    case TSDB_DATA_TYPE_BIGINT:   v = GET_INT64_VAL(pData); break;
  }

1133 1134 1135 1136 1137 1138
  if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
    pWindowRes->key = malloc(varDataTLen(pData));
    varDataCopy(pWindowRes->key, pData);
  } else {
    pWindowRes->win.skey = v;
    pWindowRes->win.ekey = v;
1139
  }
1140

1141
  assert(pRuntimeEnv->windowResInfo.interval == 0);
1142

1143 1144 1145 1146 1147 1148
  if (pWindowRes->pos.pageId == -1) {
    int32_t ret = addNewWindowResultBuf(pWindowRes, pResultBuf, GROUPRESULTID, pRuntimeEnv->numOfRowsPerPage);
    if (ret != 0) {
      return -1;
    }
  }
1149

1150 1151 1152 1153 1154
  setWindowResOutputBuf(pRuntimeEnv, pWindowRes);
  initCtxOutputBuf(pRuntimeEnv);
  return TSDB_CODE_SUCCESS;
}

1155
static char *getGroupbyColumnData(SQuery *pQuery, int16_t *type, int16_t *bytes, SArray* pDataBlock) {
1156
  SSqlGroupbyExpr *pGroupbyExpr = pQuery->pGroupbyExpr;
1157

1158
  for (int32_t k = 0; k < pGroupbyExpr->numOfGroupCols; ++k) {
1159
    SColIndex* pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, k);
Y
TD-1230  
yihaoDeng 已提交
1160
    if (TSDB_COL_IS_TAG(pColIndex->flag)) {
1161 1162
      continue;
    }
1163

1164
    int16_t colIndex = -1;
1165
    int32_t colId = pColIndex->colId;
1166

1167
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
1168
      if (pQuery->colList[i].colId == colId) {
1169 1170 1171 1172
        colIndex = i;
        break;
      }
    }
1173

1174
    assert(colIndex >= 0 && colIndex < pQuery->numOfCols);
1175

1176 1177
    *type = pQuery->colList[colIndex].type;
    *bytes = pQuery->colList[colIndex].bytes;
1178
    /*
1179 1180 1181
     *  the colIndex is acquired from the first tables of all qualified tables in this vnode during query prepare
     * stage, the remain tables may not have the required column in cache actually. So, the validation of required
     * column in cache with the corresponding schema is reinforced.
1182
     */
S
TD-1057  
Shengliang Guan 已提交
1183
    int32_t numOfCols = (int32_t)taosArrayGetSize(pDataBlock);
1184

1185 1186 1187 1188 1189 1190
    for (int32_t i = 0; i < numOfCols; ++i) {
      SColumnInfoData *p = taosArrayGet(pDataBlock, i);
      if (pColIndex->colId == p->info.colId) {
        return p->pData;
      }
    }
1191
  }
1192

1193
  return NULL;
1194 1195 1196 1197
}

static int32_t doTSJoinFilter(SQueryRuntimeEnv *pRuntimeEnv, int32_t offset) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
1198

1199 1200
  STSElem         elem = tsBufGetElem(pRuntimeEnv->pTSBuf);
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1201

1202
  // compare tag first
1203
  if (tVariantCompare(&pCtx[0].tag, &elem.tag) != 0) {
1204 1205
    return TS_JOIN_TAG_NOT_EQUALS;
  }
1206

S
TD-1057  
Shengliang Guan 已提交
1207
  TSKEY key = *(TSKEY *)((char*)pCtx[0].aInputElemBuf + TSDB_KEYSIZE * offset);
1208 1209

#if defined(_DEBUG_VIEW)
1210
  printf("elem in comp ts file:%" PRId64 ", key:%" PRId64 ", tag:%"PRIu64", query order:%d, ts order:%d, traverse:%d, index:%d\n",
H
Haojun Liao 已提交
1211
         elem.ts, key, elem.tag.i64Key, pQuery->order.order, pRuntimeEnv->pTSBuf->tsOrder,
1212 1213
         pRuntimeEnv->pTSBuf->cur.order, pRuntimeEnv->pTSBuf->cur.tsIndex);
#endif
1214

1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (key < elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key > elem.ts) {
      assert(false);
    }
  } else {
    if (key > elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key < elem.ts) {
      assert(false);
    }
  }
1228

1229 1230 1231 1232 1233
  return TS_JOIN_TS_EQUAL;
}

static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId) {
  SResultInfo *pResInfo = GET_RES_INFO(pCtx);
H
hjxilinx 已提交
1234
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
1235 1236 1237 1238 1239

  // in case of timestamp column, always generated results.
  if (functionId == TSDB_FUNC_TS) {
    return true;
  }
1240

1241 1242 1243
  if (pResInfo->complete || functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TS_DUMMY) {
    return false;
  }
1244

1245
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_FIRST) {
H
hjxilinx 已提交
1246 1247
    return QUERY_IS_ASC_QUERY(pQuery);
  }
1248 1249 1250 1251 1252 1253

  // todo add comments
  if ((functionId == TSDB_FUNC_LAST_DST || functionId == TSDB_FUNC_LAST)) {
    return pCtx->param[0].i64Key == pQuery->order.order;
  }

1254
  // in the supplementary scan, only the following functions need to be executed
H
Haojun Liao 已提交
1255
  if (IS_REVERSE_SCAN(pRuntimeEnv)) {
1256 1257
    return false;
  }
1258

1259 1260 1261
  return true;
}

1262 1263
static void rowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis, SDataBlockInfo *pDataBlockInfo,
    SWindowResInfo *pWindowResInfo, SArray *pDataBlock) {
1264
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1265
  bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);
1266

1267
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1268
  STableQueryInfo* item = pQuery->current;
H
Haojun Liao 已提交
1269 1270 1271 1272

  SColumnInfoData* pColumnInfoData = (SColumnInfoData *)taosArrayGet(pDataBlock, 0);

  TSKEY  *tsCols = (pColumnInfoData->info.type == TSDB_DATA_TYPE_TIMESTAMP)? (TSKEY*) pColumnInfoData->pData:NULL;
H
Haojun Liao 已提交
1273 1274
  bool    groupbyColumnValue = pRuntimeEnv->groupbyNormalCol;

1275
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
H
Haojun Liao 已提交
1276
  if (sasArray == NULL) {
H
Haojun Liao 已提交
1277
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
H
Haojun Liao 已提交
1278 1279
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }
1280

1281 1282
  int16_t type = 0;
  int16_t bytes = 0;
1283

1284
  char *groupbyColumnData = NULL;
H
Haojun Liao 已提交
1285
  if (groupbyColumnValue) {
1286
    groupbyColumnData = getGroupbyColumnData(pQuery, &type, &bytes, pDataBlock);
1287
  }
1288

1289
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
1290
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
1291
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k);
1292
  }
1293

1294 1295
  // set the input column data
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
1296
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
H
hjxilinx 已提交
1297 1298
    pFilterInfo->pData = getDataBlockImpl(pDataBlock, pFilterInfo->info.colId);
    assert(pFilterInfo->pData != NULL);
1299
  }
1300

1301
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1302

1303 1304 1305
  // from top to bottom in desc
  // from bottom to top in asc order
  if (pRuntimeEnv->pTSBuf != NULL) {
H
Haojun Liao 已提交
1306
    SQInfo *pQInfo = (SQInfo *)GET_QINFO_ADDR(pRuntimeEnv);
1307
    qDebug("QInfo:%p process data rows, numOfRows:%d, query order:%d, ts comp order:%d", pQInfo, pDataBlockInfo->rows,
1308 1309
           pQuery->order.order, pRuntimeEnv->pTSBuf->cur.order);
  }
1310

1311
  int32_t j = 0;
H
hjxilinx 已提交
1312
  int32_t offset = -1;
1313

1314
  for (j = 0; j < pDataBlockInfo->rows; ++j) {
H
hjxilinx 已提交
1315
    offset = GET_COL_DATA_POS(pQuery, j, step);
1316

1317 1318 1319 1320 1321 1322 1323 1324 1325 1326
    if (pRuntimeEnv->pTSBuf != NULL) {
      int32_t r = doTSJoinFilter(pRuntimeEnv, offset);
      if (r == TS_JOIN_TAG_NOT_EQUALS) {
        break;
      } else if (r == TS_JOIN_TS_NOT_EQUALS) {
        continue;
      } else {
        assert(r == TS_JOIN_TS_EQUAL);
      }
    }
1327

1328
    if (pQuery->numOfFilterCols > 0 && (!doFilterData(pQuery, offset))) {
1329 1330
      continue;
    }
1331

1332
    // interval window query, decide the time window according to the primary timestamp
H
Haojun Liao 已提交
1333
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1334
      int64_t     ts = tsCols[offset];
1335
      STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
1336

1337 1338
      bool hasTimeWindow = false;
      int32_t ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &win, masterScan, &hasTimeWindow);
1339 1340 1341
      if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
        continue;
      }
1342

1343 1344 1345 1346
      if (!hasTimeWindow) {
        continue;
      }

1347 1348
      bool closed = getTimeWindowResStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
      doRowwiseApplyFunctions(pRuntimeEnv, closed, &win, offset);
1349

1350 1351
      STimeWindow nextWin = win;
      int32_t     index = pWindowResInfo->curIndex;
1352

1353
      while (1) {
H
Haojun Liao 已提交
1354
        GET_NEXT_TIMEWINDOW(pQuery, &nextWin);
1355
        if ((nextWin.skey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
H
Haojun Liao 已提交
1356
            (nextWin.skey < pQuery->window.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
1357 1358
          break;
        }
1359

1360 1361 1362
        if (ts < nextWin.skey || ts > nextWin.ekey) {
          break;
        }
1363

1364
        // null data, failed to allocate more memory buffer
H
Haojun Liao 已提交
1365
        hasTimeWindow = false;
1366
        if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &nextWin, masterScan, &hasTimeWindow) != TSDB_CODE_SUCCESS) {
1367 1368
          break;
        }
1369

1370
        if (hasTimeWindow) {
1371 1372
          closed = getTimeWindowResStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
          doRowwiseApplyFunctions(pRuntimeEnv, closed, &nextWin, offset);
1373
        }
1374
      }
1375

1376 1377 1378
      pWindowResInfo->curIndex = index;
    } else {  // other queries
      // decide which group this rows belongs to according to current state value
H
Haojun Liao 已提交
1379
      if (groupbyColumnValue) {
H
hjxilinx 已提交
1380
        char *val = groupbyColumnData + bytes * offset;
1381

H
hjxilinx 已提交
1382
        int32_t ret = setGroupResultOutputBuf(pRuntimeEnv, val, type, bytes);
1383 1384 1385 1386
        if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
          continue;
        }
      }
1387

1388
      for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
1389
        int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
1390 1391 1392 1393 1394
        if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
          aAggs[functionId].xFunctionF(&pCtx[k], offset);
        }
      }
    }
1395

1396 1397 1398
    if (pRuntimeEnv->pTSBuf != NULL) {
      // if timestamp filter list is empty, quit current query
      if (!tsBufNextPos(pRuntimeEnv->pTSBuf)) {
H
hjxilinx 已提交
1399
        setQueryStatus(pQuery, QUERY_COMPLETED);
1400 1401 1402 1403
        break;
      }
    }
  }
H
Haojun Liao 已提交
1404 1405 1406 1407 1408 1409 1410 1411

  assert(offset >= 0);
  if (tsCols != NULL) {
    item->lastKey = tsCols[offset] + step;
  } else {
    item->lastKey = (QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->window.ekey:pDataBlockInfo->window.skey) + step;
  }

1412 1413 1414 1415 1416
  // todo refactor: extract method
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    if (pQuery->pSelectExpr[i].base.functionId != TSDB_FUNC_ARITHM) {
      continue;
    }
1417

S
Shengliang Guan 已提交
1418
    taosTFree(sasArray[i].data);
1419
  }
1420

1421 1422 1423 1424
  free(sasArray);
}

static int32_t tableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo,
H
hjxilinx 已提交
1425
                                          SDataStatis *pStatis, __block_search_fn_t searchFn, SArray *pDataBlock) {
H
hjxilinx 已提交
1426
  SQuery *pQuery = pRuntimeEnv->pQuery;
1427

H
hjxilinx 已提交
1428 1429
  STableQueryInfo* pTableQInfo = pQuery->current;
  SWindowResInfo*  pWindowResInfo = &pRuntimeEnv->windowResInfo;
1430

H
Haojun Liao 已提交
1431
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->groupbyNormalCol) {
1432
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
1433
  } else {
1434
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
1435
  }
1436

1437
  // update the lastkey of current table
1438
  TSKEY lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pDataBlockInfo->window.ekey : pDataBlockInfo->window.skey;
H
hjxilinx 已提交
1439
  pTableQInfo->lastKey = lastKey + GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1440

1441
  // interval query with limit applied
1442
  int32_t numOfRes = 0;
H
Haojun Liao 已提交
1443
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1444 1445
    numOfRes = doCheckQueryCompleted(pRuntimeEnv, lastKey, pWindowResInfo);
  } else {
S
TD-1057  
Shengliang Guan 已提交
1446
    numOfRes = (int32_t)getNumOfResult(pRuntimeEnv);
1447

1448 1449 1450 1451
    // update the number of output result
    if (numOfRes > 0 && pQuery->checkBuffer == 1) {
      assert(numOfRes >= pQuery->rec.rows);
      pQuery->rec.rows = numOfRes;
1452

1453 1454 1455
      if (numOfRes >= pQuery->rec.threshold) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
      }
1456

1457 1458 1459
      if ((pQuery->limit.limit >= 0) && (pQuery->limit.limit + pQuery->limit.offset) <= numOfRes) {
        setQueryStatus(pQuery, QUERY_COMPLETED);
      }
1460 1461 1462 1463 1464

      if (((pTableQInfo->lastKey > pTableQInfo->win.ekey) && QUERY_IS_ASC_QUERY(pQuery)) ||
          ((pTableQInfo->lastKey < pTableQInfo->win.ekey) && (!QUERY_IS_ASC_QUERY(pQuery)))) {
        setQueryStatus(pQuery, QUERY_COMPLETED);
      }
H
Haojun Liao 已提交
1465
    }
1466
  }
1467

1468
  return numOfRes;
1469 1470
}

H
Haojun Liao 已提交
1471
void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
1472
                   SDataStatis *pStatis, void *param, int32_t colIndex) {
1473

1474 1475
  int32_t functionId = pQuery->pSelectExpr[colIndex].base.functionId;
  int32_t colId = pQuery->pSelectExpr[colIndex].base.colInfo.colId;
1476

1477
  SDataStatis *tpField = NULL;
H
Haojun Liao 已提交
1478
  pCtx->hasNull = hasNullValue(&pQuery->pSelectExpr[colIndex].base.colInfo, pStatis, &tpField);
1479
  pCtx->aInputElemBuf = inputData;
1480

1481
  if (tpField != NULL) {
H
Haojun Liao 已提交
1482
    pCtx->preAggVals.isSet  = true;
1483 1484
    pCtx->preAggVals.statis = *tpField;
    assert(pCtx->preAggVals.statis.numOfNull <= pBlockInfo->rows);
1485 1486 1487
  } else {
    pCtx->preAggVals.isSet = false;
  }
1488

H
Haojun Liao 已提交
1489 1490
  pCtx->preAggVals.dataBlockLoaded = (inputData != NULL);

H
Haojun Liao 已提交
1491 1492
  // limit/offset query will affect this value
  pCtx->size = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->rows - pQuery->pos : pQuery->pos + 1;
1493

H
Haojun Liao 已提交
1494
  // minimum value no matter ascending/descending order query
H
Haojun Liao 已提交
1495 1496
  pCtx->startOffset = QUERY_IS_ASC_QUERY(pQuery) ? pQuery->pos: (pQuery->pos - pCtx->size + 1);
  assert(pCtx->startOffset >= 0);
H
Haojun Liao 已提交
1497

1498 1499
  uint32_t status = aAggs[functionId].nStatus;
  if (((status & (TSDB_FUNCSTATE_SELECTIVITY | TSDB_FUNCSTATE_NEED_TS)) != 0) && (tsCol != NULL)) {
H
Haojun Liao 已提交
1500
    pCtx->ptsList = &tsCol[pCtx->startOffset];
1501
  }
1502

1503 1504 1505 1506 1507
  if (functionId >= TSDB_FUNC_FIRST_DST && functionId <= TSDB_FUNC_LAST_DST) {
    // last_dist or first_dist function
    // store the first&last timestamp into the intermediate buffer [1], the true
    // value may be null but timestamp will never be null
  } else if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_TWA ||
1508
             functionId == TSDB_FUNC_DIFF || (functionId >= TSDB_FUNC_RATE && functionId <= TSDB_FUNC_AVG_IRATE)) {
1509
    /*
H
Haojun Liao 已提交
1510
     * least squares function needs two columns of input, currently, the x value of linear equation is set to
1511 1512 1513 1514 1515 1516 1517 1518 1519 1520
     * timestamp column, and the y-value is the column specified in pQuery->pSelectExpr[i].colIdxInBuffer
     *
     * top/bottom function needs timestamp to indicate when the
     * top/bottom values emerge, so does diff function
     */
    if (functionId == TSDB_FUNC_TWA) {
      STwaInfo *pTWAInfo = GET_RES_INFO(pCtx)->interResultBuf;
      pTWAInfo->SKey = pQuery->window.skey;
      pTWAInfo->EKey = pQuery->window.ekey;
    }
1521

1522 1523
  } else if (functionId == TSDB_FUNC_ARITHM) {
    pCtx->param[1].pz = param;
H
Haojun Liao 已提交
1524 1525 1526 1527 1528 1529
  } else if (functionId == TSDB_FUNC_SPREAD) {  // set the statistics data for primary time stamp column
    if (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
      pCtx->preAggVals.isSet  = true;
      pCtx->preAggVals.statis.min = pBlockInfo->window.skey;
      pCtx->preAggVals.statis.max = pBlockInfo->window.ekey;
    }
1530 1531
  } else if (functionId == TSDB_FUNC_INTERP) {
    SInterpInfoDetail *pInterpInfo = GET_RES_INFO(pCtx)->interResultBuf;
S
TD-1057  
Shengliang Guan 已提交
1532
    pInterpInfo->type = (int8_t)pQuery->fillType;
1533 1534
    pInterpInfo->ts = pQuery->window.skey;
    pInterpInfo->primaryCol = (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX);
1535

1536 1537 1538 1539
    if (pQuery->fillVal != NULL) {
      if (isNull((const char*) &pQuery->fillVal[colIndex], pCtx->inputType)) {
        pCtx->param[1].nType = TSDB_DATA_TYPE_NULL;
      } else { // todo refactor, tVariantCreateFromBinary should handle the NULL value
H
Haojun Liao 已提交
1540 1541 1542
        if (pCtx->inputType != TSDB_DATA_TYPE_BINARY && pCtx->inputType != TSDB_DATA_TYPE_NCHAR) {
          tVariantCreateFromBinary(&pCtx->param[1], (char*) &pQuery->fillVal[colIndex], pCtx->inputBytes, pCtx->inputType);
        }
1543 1544
      }
    }
1545
  }
1546

1547 1548 1549 1550 1551 1552
#if defined(_DEBUG_VIEW)
  //  int64_t *tsList = (int64_t *)primaryColumnData;
//  int64_t  s = tsList[0];
//  int64_t  e = tsList[size - 1];

//    if (IS_DATA_BLOCK_LOADED(blockStatus)) {
1553
//        qDebug("QInfo:%p query ts:%lld-%lld, offset:%d, rows:%d, bstatus:%d,
1554 1555 1556
//        functId:%d", GET_QINFO_ADDR(pQuery),
//               s, e, startOffset, size, blockStatus, functionId);
//    } else {
1557
//        qDebug("QInfo:%p block not loaded, bstatus:%d",
1558 1559 1560 1561 1562 1563
//        GET_QINFO_ADDR(pQuery), blockStatus);
//    }
#endif
}

// set the output buffer for the selectivity + tag query
H
Haojun Liao 已提交
1564
static int32_t setCtxTagColumnInfo(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx) {
H
Haojun Liao 已提交
1565 1566
  SQuery* pQuery = pRuntimeEnv->pQuery;

1567
  if (isSelectivityWithTagsQuery(pQuery)) {
1568
    int32_t num = 0;
1569
    int16_t tagLen = 0;
1570

1571
    SQLFunctionCtx *p = NULL;
1572
    SQLFunctionCtx **pTagCtx = calloc(pQuery->numOfOutput, POINTER_BYTES);
H
Haojun Liao 已提交
1573 1574 1575
    if (pTagCtx == NULL) {
      return TSDB_CODE_QRY_OUT_OF_MEMORY;
    }
H
Haojun Liao 已提交
1576

1577
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1578
      SSqlFuncMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].base;
1579

1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592
      if (pSqlFuncMsg->functionId == TSDB_FUNC_TAG_DUMMY || pSqlFuncMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        tagLen += pCtx[i].outputBytes;
        pTagCtx[num++] = &pCtx[i];
      } else if ((aAggs[pSqlFuncMsg->functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
        p = &pCtx[i];
      } else if (pSqlFuncMsg->functionId == TSDB_FUNC_TS || pSqlFuncMsg->functionId == TSDB_FUNC_TAG) {
        // tag function may be the group by tag column
        // ts may be the required primary timestamp column
        continue;
      } else {
        // the column may be the normal column, group by normal_column, the functionId is TSDB_FUNC_PRJ
      }
    }
dengyihao's avatar
dengyihao 已提交
1593 1594 1595 1596 1597
    if (p != NULL) {
      p->tagInfo.pTagCtxList = pTagCtx;
      p->tagInfo.numOfTagCols = num;
      p->tagInfo.tagsLen = tagLen;
    } else {
1598
      taosTFree(pTagCtx);
dengyihao's avatar
dengyihao 已提交
1599
    }
1600
  }
H
Haojun Liao 已提交
1601 1602

  return TSDB_CODE_SUCCESS;
1603 1604
}

H
Haojun Liao 已提交
1605 1606
static FORCE_INLINE void setWindowResultInfo(SResultInfo *pResultInfo, SQuery *pQuery, bool isStableQuery, char* buf) {
  char* p = buf;
1607
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1608 1609 1610 1611
    int32_t size = pQuery->pSelectExpr[i].interBytes;
    setResultInfoBuf(&pResultInfo[i], size, isStableQuery, p);

    p += size;
1612 1613 1614
  }
}

1615
static int32_t setupQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv, int16_t order) {
1616
  qDebug("QInfo:%p setup runtime env", GET_QINFO_ADDR(pRuntimeEnv));
1617 1618
  SQuery *pQuery = pRuntimeEnv->pQuery;

H
Haojun Liao 已提交
1619 1620 1621
  size_t size = pRuntimeEnv->interBufSize + pQuery->numOfOutput * sizeof(SResultInfo);

  pRuntimeEnv->resultInfo = calloc(1, size);
1622
  pRuntimeEnv->pCtx = (SQLFunctionCtx *)calloc(pQuery->numOfOutput, sizeof(SQLFunctionCtx));
1623

1624
  if (pRuntimeEnv->resultInfo == NULL || pRuntimeEnv->pCtx == NULL) {
1625
    goto _clean;
1626
  }
1627

1628
  pRuntimeEnv->offset[0] = 0;
1629
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1630
    SSqlFuncMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].base;
1631

1632
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1633
    SColIndex* pIndex = &pSqlFuncMsg->colInfo;
1634

Y
TD-1230  
yihaoDeng 已提交
1635
    if (TSDB_COL_REQ_NULL(pIndex->flag)) {
1636 1637
      pCtx->requireNull = true;
      pIndex->flag &= ~(TSDB_COL_NULL);
Y
TD-1230  
yihaoDeng 已提交
1638
    } else {
1639 1640
      pCtx->requireNull = false;
    }
Y
TD-1230  
yihaoDeng 已提交
1641

1642 1643
    int32_t index = pSqlFuncMsg->colInfo.colIndex;
    if (TSDB_COL_IS_TAG(pIndex->flag)) {
1644
      if (pIndex->colId == TSDB_TBNAME_COLUMN_INDEX) {  // todo refactor
H
Haojun Liao 已提交
1645 1646 1647 1648
        SSchema s = tGetTableNameColumnSchema();

        pCtx->inputBytes = s.bytes;
        pCtx->inputType = s.type;
1649 1650 1651 1652
      } else {
        pCtx->inputBytes = pQuery->tagColList[index].bytes;
        pCtx->inputType = pQuery->tagColList[index].type;
      }
H
Haojun Liao 已提交
1653 1654 1655
    } else if (TSDB_COL_IS_UD_COL(pIndex->flag)) {
      pCtx->inputBytes = pSqlFuncMsg->arg[0].argBytes;
      pCtx->inputType = pSqlFuncMsg->arg[0].argType;
1656 1657 1658 1659
    } else {
      pCtx->inputBytes = pQuery->colList[index].bytes;
      pCtx->inputType = pQuery->colList[index].type;
    }
1660

Y
TD-1230  
yihaoDeng 已提交
1661

1662
    assert(isValidDataType(pCtx->inputType));
1663
    pCtx->ptsOutputBuf = NULL;
1664

1665 1666
    pCtx->outputBytes = pQuery->pSelectExpr[i].bytes;
    pCtx->outputType = pQuery->pSelectExpr[i].type;
1667

1668 1669
    pCtx->order = pQuery->order.order;
    pCtx->functionId = pSqlFuncMsg->functionId;
1670

1671 1672 1673 1674 1675
    pCtx->numOfParams = pSqlFuncMsg->numOfParams;
    for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
      int16_t type = pSqlFuncMsg->arg[j].argType;
      int16_t bytes = pSqlFuncMsg->arg[j].argBytes;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
1676
        tVariantCreateFromBinary(&pCtx->param[j], pSqlFuncMsg->arg[j].argValue.pz, bytes, type);
1677 1678 1679 1680
      } else {
        tVariantCreateFromBinary(&pCtx->param[j], (char *)&pSqlFuncMsg->arg[j].argValue.i64, bytes, type);
      }
    }
1681

1682 1683
    // set the order information for top/bottom query
    int32_t functionId = pCtx->functionId;
1684

1685
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
1686
      int32_t f = pQuery->pSelectExpr[0].base.functionId;
1687
      assert(f == TSDB_FUNC_TS || f == TSDB_FUNC_TS_DUMMY);
1688

1689 1690 1691 1692
      pCtx->param[2].i64Key = order;
      pCtx->param[2].nType = TSDB_DATA_TYPE_BIGINT;
      pCtx->param[3].i64Key = functionId;
      pCtx->param[3].nType = TSDB_DATA_TYPE_BIGINT;
1693

1694 1695
      pCtx->param[1].i64Key = pQuery->order.orderColId;
    }
1696

1697 1698 1699 1700
    if (i > 0) {
      pRuntimeEnv->offset[i] = pRuntimeEnv->offset[i - 1] + pRuntimeEnv->pCtx[i - 1].outputBytes;
    }
  }
1701

H
Haojun Liao 已提交
1702
  char* buf = (char*) pRuntimeEnv->resultInfo + sizeof(SResultInfo) * pQuery->numOfOutput;
H
Haojun Liao 已提交
1703

1704
  // set the intermediate result output buffer
H
Haojun Liao 已提交
1705
  setWindowResultInfo(pRuntimeEnv->resultInfo, pQuery, pRuntimeEnv->stableQuery, buf);
1706

1707
  // if it is group by normal column, do not set output buffer, the output buffer is pResult
1708
  if (!pRuntimeEnv->groupbyNormalCol && !pRuntimeEnv->stableQuery) {
1709 1710
    resetCtxOutputBuf(pRuntimeEnv);
  }
1711

H
Haojun Liao 已提交
1712 1713 1714
  if (setCtxTagColumnInfo(pRuntimeEnv, pRuntimeEnv->pCtx) != TSDB_CODE_SUCCESS) {
    goto _clean;
  }
1715

H
Haojun Liao 已提交
1716
  qDebug("QInfo:%p init runtime completed", GET_QINFO_ADDR(pRuntimeEnv));
1717
  return TSDB_CODE_SUCCESS;
1718

1719
_clean:
S
Shengliang Guan 已提交
1720 1721
  taosTFree(pRuntimeEnv->resultInfo);
  taosTFree(pRuntimeEnv->pCtx);
1722

1723
  return TSDB_CODE_QRY_OUT_OF_MEMORY;
1724 1725 1726 1727 1728 1729
}

static void teardownQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv) {
  if (pRuntimeEnv->pQuery == NULL) {
    return;
  }
1730

1731
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1732
  SQInfo* pQInfo = (SQInfo*) GET_QINFO_ADDR(pRuntimeEnv);
1733

1734
  qDebug("QInfo:%p teardown runtime env", pQInfo);
H
Haojun Liao 已提交
1735
  cleanupTimeWindowInfo(&pRuntimeEnv->windowResInfo);
1736

1737
  if (pRuntimeEnv->pCtx != NULL) {
1738
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1739
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1740

1741 1742 1743
      for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
        tVariantDestroy(&pCtx->param[j]);
      }
1744

1745
      tVariantDestroy(&pCtx->tag);
S
Shengliang Guan 已提交
1746
      taosTFree(pCtx->tagInfo.pTagCtxList);
1747
    }
1748

S
Shengliang Guan 已提交
1749 1750
    taosTFree(pRuntimeEnv->resultInfo);
    taosTFree(pRuntimeEnv->pCtx);
1751
  }
1752

H
Haojun Liao 已提交
1753
  pRuntimeEnv->pFillInfo = taosDestoryFillInfo(pRuntimeEnv->pFillInfo);
1754

H
Haojun Liao 已提交
1755
  destroyResultBuf(pRuntimeEnv->pResultBuf);
1756
  tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
1757
  tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
1758

H
Haojun Liao 已提交
1759
  pRuntimeEnv->pTSBuf = tsBufDestroy(pRuntimeEnv->pTSBuf);
1760 1761
}

H
Haojun Liao 已提交
1762
#define IS_QUERY_KILLED(_q) ((_q)->code == TSDB_CODE_TSC_QUERY_CANCELLED)
1763

H
Haojun Liao 已提交
1764
static void setQueryKilled(SQInfo *pQInfo) { pQInfo->code = TSDB_CODE_TSC_QUERY_CANCELLED;}
H
hjxilinx 已提交
1765

H
Haojun Liao 已提交
1766 1767 1768
static bool isFixedOutputQuery(SQueryRuntimeEnv* pRuntimeEnv) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1769 1770
    return false;
  }
1771

1772
  // Note:top/bottom query is fixed output query
H
Haojun Liao 已提交
1773
  if (pRuntimeEnv->topBotQuery || pRuntimeEnv->groupbyNormalCol) {
1774 1775
    return true;
  }
1776

1777
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1778
    SSqlFuncMsg *pExprMsg = &pQuery->pSelectExpr[i].base;
1779

1780 1781
    // ignore the ts_comp function
    if (i == 0 && pExprMsg->functionId == TSDB_FUNC_PRJ && pExprMsg->numOfParams == 1 &&
1782
        pExprMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
1783 1784
      continue;
    }
1785

1786 1787 1788
    if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
1789

1790 1791 1792 1793
    if (!IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus)) {
      return true;
    }
  }
1794

1795 1796 1797
  return false;
}

1798
// todo refactor with isLastRowQuery
H
hjxilinx 已提交
1799
static bool isPointInterpoQuery(SQuery *pQuery) {
1800
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1801
    int32_t functionID = pQuery->pSelectExpr[i].base.functionId;
1802
    if (functionID == TSDB_FUNC_INTERP) {
1803 1804 1805
      return true;
    }
  }
1806

1807 1808 1809 1810
  return false;
}

// TODO REFACTOR:MERGE WITH CLIENT-SIDE FUNCTION
H
hjxilinx 已提交
1811
static bool isSumAvgRateQuery(SQuery *pQuery) {
1812
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1813
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1814 1815 1816
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
1817

1818 1819 1820 1821 1822
    if (functionId == TSDB_FUNC_SUM_RATE || functionId == TSDB_FUNC_SUM_IRATE || functionId == TSDB_FUNC_AVG_RATE ||
        functionId == TSDB_FUNC_AVG_IRATE) {
      return true;
    }
  }
1823

1824 1825 1826
  return false;
}

H
hjxilinx 已提交
1827
static bool isFirstLastRowQuery(SQuery *pQuery) {
1828
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1829
    int32_t functionID = pQuery->pSelectExpr[i].base.functionId;
1830 1831 1832 1833
    if (functionID == TSDB_FUNC_LAST_ROW) {
      return true;
    }
  }
1834

1835 1836 1837
  return false;
}

H
hjxilinx 已提交
1838
static bool needReverseScan(SQuery *pQuery) {
1839
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1840
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1841 1842 1843
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG) {
      continue;
    }
1844

1845
    if ((functionId == TSDB_FUNC_FIRST || functionId == TSDB_FUNC_FIRST_DST) && !QUERY_IS_ASC_QUERY(pQuery)) {
1846 1847
      return true;
    }
1848 1849

    if (functionId == TSDB_FUNC_LAST || functionId == TSDB_FUNC_LAST_DST) {
S
TD-1057  
Shengliang Guan 已提交
1850
      int32_t order = (int32_t)pQuery->pSelectExpr[i].base.arg->argValue.i64;
1851 1852
      return order != pQuery->order.order;
    }
1853
  }
1854

1855 1856
  return false;
}
H
hjxilinx 已提交
1857

H
Haojun Liao 已提交
1858 1859 1860 1861
/**
 * The following 4 kinds of query are treated as the tags query
 * tagprj, tid_tag query, count(tbname), 'abc' (user defined constant value column) query
 */
H
hjxilinx 已提交
1862 1863
static bool onlyQueryTags(SQuery* pQuery) {
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1864 1865 1866
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];

    int32_t functionId = pExprInfo->base.functionId;
H
Haojun Liao 已提交
1867 1868 1869 1870

    if (functionId != TSDB_FUNC_TAGPRJ &&
        functionId != TSDB_FUNC_TID_TAG &&
        (!(functionId == TSDB_FUNC_COUNT && pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX)) &&
Y
TD-1230  
yihaoDeng 已提交
1871
        (!(functionId == TSDB_FUNC_PRJ && TSDB_COL_IS_UD_COL(pExprInfo->base.colInfo.flag)))) {
H
hjxilinx 已提交
1872 1873 1874
      return false;
    }
  }
1875

H
hjxilinx 已提交
1876 1877 1878
  return true;
}

1879 1880
/////////////////////////////////////////////////////////////////////////////////////////////

H
Haojun Liao 已提交
1881
void getAlignQueryTimeWindow(SQuery *pQuery, int64_t key, int64_t keyFirst, int64_t keyLast, STimeWindow *win) {
1882 1883
  assert(key >= keyFirst && key <= keyLast && pQuery->interval.sliding <= pQuery->interval.interval);
  win->skey = taosTimeTruncate(key, &pQuery->interval, pQuery->precision);
H
Haojun Liao 已提交
1884 1885

  /*
1886
   * if the realSkey > INT64_MAX - pQuery->interval.interval, the query duration between
H
Haojun Liao 已提交
1887 1888
   * realSkey and realEkey must be less than one interval.Therefore, no need to adjust the query ranges.
   */
1889 1890
  if (keyFirst > (INT64_MAX - pQuery->interval.interval)) {
    assert(keyLast - keyFirst < pQuery->interval.interval);
1891
    win->ekey = INT64_MAX;
1892 1893
  } else if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
    win->ekey = taosTimeAdd(win->skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
H
Haojun Liao 已提交
1894
  } else {
1895
    win->ekey = win->skey + pQuery->interval.interval - 1;
1896 1897 1898 1899 1900
  }
}

static void setScanLimitationByResultBuffer(SQuery *pQuery) {
  if (isTopBottomQuery(pQuery)) {
1901
    pQuery->checkBuffer = 0;
1902
  } else if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
1903
    pQuery->checkBuffer = 0;
1904 1905
  } else {
    bool hasMultioutput = false;
1906
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1907
      SSqlFuncMsg *pExprMsg = &pQuery->pSelectExpr[i].base;
1908 1909 1910
      if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        continue;
      }
1911

1912 1913 1914 1915 1916
      hasMultioutput = IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus);
      if (!hasMultioutput) {
        break;
      }
    }
1917

1918
    pQuery->checkBuffer = hasMultioutput ? 1 : 0;
1919 1920 1921 1922 1923 1924
  }
}

/*
 * todo add more parameters to check soon..
 */
1925
bool colIdCheck(SQuery *pQuery) {
1926 1927
  // load data column information is incorrect
  for (int32_t i = 0; i < pQuery->numOfCols - 1; ++i) {
1928
    if (pQuery->colList[i].colId == pQuery->colList[i + 1].colId) {
S
slguan 已提交
1929
      qError("QInfo:%p invalid data load column for query", GET_QINFO_ADDR(pQuery));
1930 1931 1932
      return false;
    }
  }
1933

1934 1935 1936 1937 1938 1939
  return true;
}

// todo ignore the avg/sum/min/max/count/stddev/top/bottom functions, of which
// the scan order is not matter
static bool onlyOneQueryType(SQuery *pQuery, int32_t functId, int32_t functIdDst) {
1940
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1941
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1942

1943 1944 1945 1946
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG ||
        functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
1947

1948 1949 1950 1951
    if (functionId != functId && functionId != functIdDst) {
      return false;
    }
  }
1952

1953 1954 1955 1956 1957 1958 1959
  return true;
}

static bool onlyFirstQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_FIRST, TSDB_FUNC_FIRST_DST); }

static bool onlyLastQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_LAST, TSDB_FUNC_LAST_DST); }

H
Haojun Liao 已提交
1960
// todo refactor, add iterator
1961 1962
static void doExchangeTimeWindow(SQInfo* pQInfo, STimeWindow* win) {
  size_t t = taosArrayGetSize(pQInfo->tableGroupInfo.pGroupList);
H
Haojun Liao 已提交
1963
  for(int32_t i = 0; i < t; ++i) {
1964
    SArray* p1 = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, i);
H
Haojun Liao 已提交
1965 1966 1967

    size_t len = taosArrayGetSize(p1);
    for(int32_t j = 0; j < len; ++j) {
1968
      STableKeyInfo* pInfo = taosArrayGet(p1, j);
H
Haojun Liao 已提交
1969

1970 1971 1972 1973
      // update the new lastkey if it is equalled to the value of the old skey
      if (pInfo->lastKey == win->ekey) {
        pInfo->lastKey = win->skey;
      }
H
Haojun Liao 已提交
1974 1975 1976 1977
    }
  }
}

1978
static void changeExecuteScanOrder(SQInfo *pQInfo, SQueryTableMsg* pQueryMsg, bool stableQuery) {
H
Haojun Liao 已提交
1979 1980
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;

1981 1982 1983
  // in case of point-interpolation query, use asc order scan
  char msg[] = "QInfo:%p scan order changed for %s query, old:%d, new:%d, qrange exchanged, old qrange:%" PRId64
               "-%" PRId64 ", new qrange:%" PRId64 "-%" PRId64;
1984

1985 1986
  // todo handle the case the the order irrelevant query type mixed up with order critical query type
  // descending order query for last_row query
H
Haojun Liao 已提交
1987
  if (isFirstLastRowQuery(pQuery)) {
H
Haojun Liao 已提交
1988
    qDebug("QInfo:%p scan order changed for last_row query, old:%d, new:%d", pQInfo, pQuery->order.order, TSDB_ORDER_ASC);
1989

H
Haojun Liao 已提交
1990
    pQuery->order.order = TSDB_ORDER_ASC;
H
Haojun Liao 已提交
1991 1992 1993
    if (pQuery->window.skey > pQuery->window.ekey) {
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
1994

1995 1996
    return;
  }
1997

H
Haojun Liao 已提交
1998
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr) && pQuery->order.order == TSDB_ORDER_DESC) {
H
Haojun Liao 已提交
1999
    pQuery->order.order = TSDB_ORDER_ASC;
H
Haojun Liao 已提交
2000 2001 2002
    if (pQuery->window.skey > pQuery->window.ekey) {
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
H
Haojun Liao 已提交
2003

2004
    doExchangeTimeWindow(pQInfo, &pQuery->window);
H
Haojun Liao 已提交
2005 2006 2007
    return;
  }

2008
  if (isPointInterpoQuery(pQuery) && pQuery->interval.interval == 0) {
H
Haojun Liao 已提交
2009 2010 2011 2012 2013
    if (!QUERY_IS_ASC_QUERY(pQuery)) {
      qDebug(msg, GET_QINFO_ADDR(pQuery), "interp", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
             pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
2014

2015
    pQuery->order.order = TSDB_ORDER_ASC;
2016 2017
    return;
  }
2018

2019
  if (pQuery->interval.interval == 0) {
2020 2021
    if (onlyFirstQuery(pQuery)) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
2022
        qDebug(msg, GET_QINFO_ADDR(pQuery), "only-first", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
2023 2024
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2025
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2026
        doExchangeTimeWindow(pQInfo, &pQuery->window);
2027
      }
2028

2029
      pQuery->order.order = TSDB_ORDER_ASC;
2030 2031
    } else if (onlyLastQuery(pQuery)) {
      if (QUERY_IS_ASC_QUERY(pQuery)) {
2032
        qDebug(msg, GET_QINFO_ADDR(pQuery), "only-last", pQuery->order.order, TSDB_ORDER_DESC, pQuery->window.skey,
2033 2034
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2035
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2036
        doExchangeTimeWindow(pQInfo, &pQuery->window);
2037
      }
2038

2039
      pQuery->order.order = TSDB_ORDER_DESC;
2040
    }
2041

2042
  } else {  // interval query
2043
    if (stableQuery) {
2044 2045
      if (onlyFirstQuery(pQuery)) {
        if (!QUERY_IS_ASC_QUERY(pQuery)) {
2046
          qDebug(msg, GET_QINFO_ADDR(pQuery), "only-first stable", pQuery->order.order, TSDB_ORDER_ASC,
2047 2048
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2049
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2050
          doExchangeTimeWindow(pQInfo, &pQuery->window);
2051
        }
2052

2053
        pQuery->order.order = TSDB_ORDER_ASC;
2054 2055
      } else if (onlyLastQuery(pQuery)) {
        if (QUERY_IS_ASC_QUERY(pQuery)) {
2056
          qDebug(msg, GET_QINFO_ADDR(pQuery), "only-last stable", pQuery->order.order, TSDB_ORDER_DESC,
2057 2058
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2059
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2060
          doExchangeTimeWindow(pQInfo, &pQuery->window);
2061
        }
2062

2063
        pQuery->order.order = TSDB_ORDER_DESC;
2064 2065 2066 2067 2068 2069 2070 2071
      }
    }
  }
}

static int32_t getInitialPageNum(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  int32_t INITIAL_RESULT_ROWS_VALUE = 16;
2072

2073
  int32_t num = 0;
2074

2075 2076
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
    num = 128;
H
Haojun Liao 已提交
2077
  } else if (QUERY_IS_INTERVAL_QUERY(pQuery)) {  // time window query, allocate one page for each table
2078
    size_t s = pQInfo->tableqinfoGroupInfo.numOfTables;
S
TD-1057  
Shengliang Guan 已提交
2079
    num = (int32_t)(MAX(s, INITIAL_RESULT_ROWS_VALUE));
2080 2081
  } else {    // for super table query, one page for each subset
    num = 1;  // pQInfo->pSidSet->numOfSubSet;
2082
  }
2083

2084 2085 2086 2087
  assert(num > 0);
  return num;
}

2088 2089
static void getIntermediateBufInfo(SQueryRuntimeEnv* pRuntimeEnv, int32_t* ps, int32_t* rowsize) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
2090
  int32_t MIN_ROWS_PER_PAGE = 4;
2091

S
TD-1057  
Shengliang Guan 已提交
2092
  *rowsize = (int32_t)(pQuery->rowSize * GET_ROW_PARAM_FOR_MULTIOUTPUT(pQuery, pRuntimeEnv->topBotQuery, pRuntimeEnv->stableQuery));
2093 2094 2095 2096
  int32_t overhead = sizeof(tFilePage);

  // one page contains at least two rows
  *ps = DEFAULT_INTERN_BUF_PAGE_SIZE;
H
Haojun Liao 已提交
2097
  while(((*rowsize) * MIN_ROWS_PER_PAGE) > (*ps) - overhead) {
2098 2099 2100 2101
    *ps = (*ps << 1u);
  }

  pRuntimeEnv->numOfRowsPerPage = ((*ps) - sizeof(tFilePage)) / (*rowsize);
2102
  assert(pRuntimeEnv->numOfRowsPerPage <= MAX_ROWS_PER_RESBUF_PAGE);
2103 2104
}

H
Haojun Liao 已提交
2105
#define IS_PREFILTER_TYPE(_t) ((_t) != TSDB_DATA_TYPE_BINARY && (_t) != TSDB_DATA_TYPE_NCHAR)
2106

H
Haojun Liao 已提交
2107 2108 2109 2110
static bool needToLoadDataBlock(SQueryRuntimeEnv* pRuntimeEnv, SDataStatis *pDataStatis, SQLFunctionCtx *pCtx,
    int32_t numOfRows) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
  if (pDataStatis == NULL || (pQuery->numOfFilterCols == 0 && (!pRuntimeEnv->topBotQuery))) {
2111 2112 2113 2114 2115
    return true;
  }

  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
2116

H
Haojun Liao 已提交
2117 2118 2119 2120 2121 2122 2123 2124
    int32_t index = -1;
    for(int32_t i = 0; i < pQuery->numOfCols; ++i) {
      if (pDataStatis[i].colId == pFilterInfo->info.colId) {
        index = i;
        break;
      }
    }

2125
    // no statistics data, load the true data block
H
Haojun Liao 已提交
2126
    if (index == -1) {
H
Haojun Liao 已提交
2127
      return true;
2128
    }
2129

2130
    // not support pre-filter operation on binary/nchar data type
H
Haojun Liao 已提交
2131
    if (!IS_PREFILTER_TYPE(pFilterInfo->info.type)) {
H
Haojun Liao 已提交
2132
      return true;
2133
    }
2134

2135
    // all data in current column are NULL, no need to check its boundary value
H
Haojun Liao 已提交
2136
    if (pDataStatis[index].numOfNull == numOfRows) {
2137 2138 2139 2140 2141 2142 2143 2144 2145

      // if isNULL query exists, load the null data column
      for (int32_t j = 0; j < pFilterInfo->numOfFilters; ++j) {
        SColumnFilterElem *pFilterElem = &pFilterInfo->pFilters[j];
        if (pFilterElem->fp == isNull_filter) {
          return true;
        }
      }

2146 2147
      continue;
    }
2148

H
Haojun Liao 已提交
2149 2150 2151
    SDataStatis* pDataBlockst = &pDataStatis[index];

    if (pFilterInfo->info.type == TSDB_DATA_TYPE_FLOAT) {
S
TD-1057  
Shengliang Guan 已提交
2152 2153
      float minval = (float)(*(double *)(&pDataBlockst->min));
      float maxval = (float)(*(double *)(&pDataBlockst->max));
2154

2155 2156 2157 2158 2159 2160 2161
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&minval, (char *)&maxval)) {
          return true;
        }
      }
    } else {
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
H
Haojun Liao 已提交
2162
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&pDataBlockst->min, (char *)&pDataBlockst->max)) {
2163 2164 2165 2166 2167
          return true;
        }
      }
    }
  }
2168

H
Haojun Liao 已提交
2169 2170 2171 2172 2173 2174 2175 2176
  if (pRuntimeEnv->topBotQuery) {
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
      int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
      if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
        return topbot_datablock_filter(&pCtx[i], functionId, (char *)&pDataStatis[i].min, (char *)&pDataStatis[i].max);
      }
    }
  }
2177

H
Haojun Liao 已提交
2178
  return false;
2179 2180
}

H
Haojun Liao 已提交
2181 2182 2183 2184 2185 2186 2187 2188
static bool overlapWithTimeWindow(SQuery* pQuery, SDataBlockInfo* pBlockInfo) {
  STimeWindow w = {0};

  TSKEY sk = MIN(pQuery->window.skey, pQuery->window.ekey);
  TSKEY ek = MAX(pQuery->window.skey, pQuery->window.ekey);

  if (QUERY_IS_ASC_QUERY(pQuery)) {
    getAlignQueryTimeWindow(pQuery, pBlockInfo->window.skey, sk, ek, &w);
H
Haojun Liao 已提交
2189
    assert(w.ekey >= pBlockInfo->window.skey);
H
Haojun Liao 已提交
2190

H
Haojun Liao 已提交
2191
    if (w.ekey < pBlockInfo->window.ekey) {
H
Haojun Liao 已提交
2192 2193 2194 2195 2196
      return true;
    }

    while(1) {
      GET_NEXT_TIMEWINDOW(pQuery, &w);
H
Haojun Liao 已提交
2197
      if (w.skey > pBlockInfo->window.ekey) {
H
Haojun Liao 已提交
2198 2199 2200
        break;
      }

H
Haojun Liao 已提交
2201 2202
      assert(w.ekey > pBlockInfo->window.ekey);
      if (w.skey <= pBlockInfo->window.ekey && w.skey > pBlockInfo->window.skey) {
H
Haojun Liao 已提交
2203 2204 2205 2206 2207
        return true;
      }
    }
  } else {
    getAlignQueryTimeWindow(pQuery, pBlockInfo->window.ekey, sk, ek, &w);
H
Haojun Liao 已提交
2208 2209 2210
    assert(w.skey <= pBlockInfo->window.ekey);

    if (w.skey > pBlockInfo->window.skey) {
H
Haojun Liao 已提交
2211 2212 2213 2214 2215 2216 2217 2218 2219
      return true;
    }

    while(1) {
      GET_NEXT_TIMEWINDOW(pQuery, &w);
      if (w.ekey < pBlockInfo->window.skey) {
        break;
      }

H
Haojun Liao 已提交
2220 2221
      assert(w.skey < pBlockInfo->window.skey);
      if (w.ekey < pBlockInfo->window.ekey && w.ekey >= pBlockInfo->window.skey) {
H
Haojun Liao 已提交
2222 2223 2224 2225 2226 2227 2228 2229
        return true;
      }
    }
  }

  return false;
}

H
Haojun Liao 已提交
2230
int32_t loadDataBlockOnDemand(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo * pWindowResInfo, void* pQueryHandle, SDataBlockInfo* pBlockInfo, SDataStatis **pStatis, SArray** pDataBlock, uint32_t* status) {
2231
  SQuery *pQuery = pRuntimeEnv->pQuery;
2232

H
Haojun Liao 已提交
2233 2234
  *status = BLK_DATA_NO_NEEDED;

H
Haojun Liao 已提交
2235
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf > 0) {
2236
    *status = BLK_DATA_ALL_NEEDED;
H
Haojun Liao 已提交
2237
  } else { // check if this data block is required to load
H
Haojun Liao 已提交
2238

H
Haojun Liao 已提交
2239
    // Calculate all time windows that are overlapping or contain current data block.
2240
    // If current data block is contained by all possible time window, do not load current data block.
H
Haojun Liao 已提交
2241
    if (QUERY_IS_INTERVAL_QUERY(pQuery) && overlapWithTimeWindow(pQuery, pBlockInfo)) {
2242
      *status = BLK_DATA_ALL_NEEDED;
2243
    }
2244

2245
    if ((*status) != BLK_DATA_ALL_NEEDED) {
H
Haojun Liao 已提交
2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260
      // the pCtx[i] result is belonged to previous time window since the outputBuf has not been set yet,
      // the filter result may be incorrect. So in case of interval query, we need to set the correct time output buffer
      if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
        bool hasTimeWindow = false;
        bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);

        TSKEY k = QUERY_IS_ASC_QUERY(pQuery)? pBlockInfo->window.skey:pBlockInfo->window.ekey;

        STimeWindow win = getActiveTimeWindow(pWindowResInfo, k, pQuery);
        if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pBlockInfo->tid, &win, masterScan, &hasTimeWindow) !=
            TSDB_CODE_SUCCESS) {
          // todo handle error in set result for timewindow
        }
      }

H
Haojun Liao 已提交
2261 2262 2263 2264 2265
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        SSqlFuncMsg* pSqlFunc = &pQuery->pSelectExpr[i].base;

        int32_t functionId = pSqlFunc->functionId;
        int32_t colId = pSqlFunc->colInfo.colId;
2266 2267
        (*status) |= aAggs[functionId].dataReqFunc(&pRuntimeEnv->pCtx[i], pBlockInfo->window.skey, pBlockInfo->window.ekey, colId);
        if (((*status) & BLK_DATA_ALL_NEEDED) == BLK_DATA_ALL_NEEDED) {
H
Haojun Liao 已提交
2268 2269 2270
          break;
        }
      }
2271 2272
    }
  }
2273

2274
  if ((*status) == BLK_DATA_NO_NEEDED) {
H
Haojun Liao 已提交
2275 2276
    qDebug("QInfo:%p data block discard, brange:%"PRId64 "-%"PRId64", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
           pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
2277
    pRuntimeEnv->summary.discardBlocks += 1;
2278 2279 2280 2281
  } else if ((*status) == BLK_DATA_STATIS_NEEDED) {

    // this function never returns error?
    tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis);
2282

2283
    pRuntimeEnv->summary.loadBlockStatis += 1;
2284

2285
    if (*pStatis == NULL) { // data block statistics does not exist, load data block
H
Haojun Liao 已提交
2286
      *pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2287
      pRuntimeEnv->summary.totalCheckedRows += pBlockInfo->rows;
2288 2289
    }
  } else {
2290
    assert((*status) == BLK_DATA_ALL_NEEDED);
2291

2292
    // load the data block statistics to perform further filter
H
Haojun Liao 已提交
2293
    pRuntimeEnv->summary.loadBlockStatis += 1;
2294
    tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis);
2295

H
Haojun Liao 已提交
2296
    if (!needToLoadDataBlock(pRuntimeEnv, *pStatis, pRuntimeEnv->pCtx, pBlockInfo->rows)) {
2297 2298
      // current block has been discard due to filter applied
      pRuntimeEnv->summary.discardBlocks += 1;
H
Haojun Liao 已提交
2299 2300
      qDebug("QInfo:%p data block discard, brange:%"PRId64 "-%"PRId64", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
          pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
2301
      (*status) = BLK_DATA_DISCARD;
2302
    }
2303

2304
    pRuntimeEnv->summary.totalCheckedRows += pBlockInfo->rows;
H
Haojun Liao 已提交
2305
    pRuntimeEnv->summary.loadBlocks += 1;
H
Haojun Liao 已提交
2306
    *pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2307 2308 2309
    if (*pDataBlock == NULL) {
      return terrno;
    }
2310
  }
2311

H
Haojun Liao 已提交
2312
  return TSDB_CODE_SUCCESS;
2313 2314
}

H
hjxilinx 已提交
2315
int32_t binarySearchForKey(char *pValue, int num, TSKEY key, int order) {
2316
  int32_t midPos = -1;
H
Haojun Liao 已提交
2317
  int32_t numOfRows;
2318

2319 2320 2321
  if (num <= 0) {
    return -1;
  }
2322

2323
  assert(order == TSDB_ORDER_ASC || order == TSDB_ORDER_DESC);
2324 2325

  TSKEY * keyList = (TSKEY *)pValue;
2326
  int32_t firstPos = 0;
2327
  int32_t lastPos = num - 1;
2328

2329
  if (order == TSDB_ORDER_DESC) {
H
hjxilinx 已提交
2330 2331 2332 2333 2334
    // find the first position which is smaller than the key
    while (1) {
      if (key >= keyList[lastPos]) return lastPos;
      if (key == keyList[firstPos]) return firstPos;
      if (key < keyList[firstPos]) return firstPos - 1;
2335

H
Haojun Liao 已提交
2336 2337
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2338

H
hjxilinx 已提交
2339 2340 2341 2342 2343 2344 2345 2346
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
2347

H
hjxilinx 已提交
2348 2349 2350 2351 2352
  } else {
    // find the first position which is bigger than the key
    while (1) {
      if (key <= keyList[firstPos]) return firstPos;
      if (key == keyList[lastPos]) return lastPos;
2353

H
hjxilinx 已提交
2354 2355 2356 2357 2358 2359 2360
      if (key > keyList[lastPos]) {
        lastPos = lastPos + 1;
        if (lastPos >= num)
          return -1;
        else
          return lastPos;
      }
2361

H
Haojun Liao 已提交
2362 2363
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2364

H
hjxilinx 已提交
2365 2366 2367 2368 2369 2370 2371 2372 2373
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
  }
2374

H
hjxilinx 已提交
2375 2376 2377
  return midPos;
}

2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390
static void ensureOutputBufferSimple(SQueryRuntimeEnv* pRuntimeEnv, int32_t capacity) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  if (capacity < pQuery->rec.capacity) {
    return;
  }

  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    int32_t bytes = pQuery->pSelectExpr[i].bytes;
    assert(bytes > 0 && capacity > 0);

    char *tmp = realloc(pQuery->sdata[i], bytes * capacity + sizeof(tFilePage));
    if (tmp == NULL) {  // todo handle the oom
H
Haojun Liao 已提交
2391
      longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
2392 2393 2394 2395 2396 2397 2398 2399
    } else {
      pQuery->sdata[i] = (tFilePage *)tmp;
    }

    // set the pCtx output buffer position
    pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data;
  }

2400
  qDebug("QInfo:%p realloc output buffer to inc output buffer from: %" PRId64 " rows to:%d rows", GET_QINFO_ADDR(pRuntimeEnv),
2401 2402 2403 2404 2405
         pQuery->rec.capacity, capacity);

  pQuery->rec.capacity = capacity;
}

2406 2407 2408
static void ensureOutputBuffer(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pBlockInfo) {
  // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
  SQuery* pQuery = pRuntimeEnv->pQuery;
2409
  if (!QUERY_IS_INTERVAL_QUERY(pQuery) && !pRuntimeEnv->groupbyNormalCol && !isFixedOutputQuery(pRuntimeEnv) && !isTSCompQuery(pQuery)) {
2410
    SResultRec *pRec = &pQuery->rec;
2411

2412
    if (pQuery->rec.capacity - pQuery->rec.rows < pBlockInfo->rows) {
S
TD-1057  
Shengliang Guan 已提交
2413 2414
      int32_t remain = (int32_t)(pRec->capacity - pRec->rows);
      int32_t newSize = (int32_t)(pRec->capacity + (pBlockInfo->rows - remain));
2415

2416 2417
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        int32_t bytes = pQuery->pSelectExpr[i].bytes;
H
Haojun Liao 已提交
2418 2419
        assert(bytes > 0 && newSize > 0);

2420 2421
        char *tmp = realloc(pQuery->sdata[i], bytes * newSize + sizeof(tFilePage));
        if (tmp == NULL) {  // todo handle the oom
H
Haojun Liao 已提交
2422
          longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
2423
        } else {
2424
          memset(tmp + sizeof(tFilePage) + bytes * pRec->rows, 0, (size_t)((newSize - pRec->rows) * bytes));
2425 2426
          pQuery->sdata[i] = (tFilePage *)tmp;
        }
H
Haojun Liao 已提交
2427

2428 2429
        // set the pCtx output buffer position
        pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data + pRec->rows * bytes;
H
Haojun Liao 已提交
2430

2431 2432 2433 2434 2435
        int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
        if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
          pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
        }
      }
H
Haojun Liao 已提交
2436

2437
      qDebug("QInfo:%p realloc output buffer, new size: %d rows, old:%" PRId64 ", remain:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv),
2438
             newSize, pRec->capacity, newSize - pRec->rows);
2439

2440 2441 2442 2443 2444
      pRec->capacity = newSize;
    }
  }
}

2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465
static void doSetInitialTimewindow(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pBlockInfo) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  if (QUERY_IS_INTERVAL_QUERY(pQuery) && pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL) {
    STimeWindow w = TSWINDOW_INITIALIZER;
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;

    if (QUERY_IS_ASC_QUERY(pQuery)) {
      getAlignQueryTimeWindow(pQuery, pBlockInfo->window.skey, pBlockInfo->window.skey, pQuery->window.ekey, &w);
      pWindowResInfo->startTime = w.skey;
      pWindowResInfo->prevSKey = w.skey;
    } else {
      // the start position of the first time window in the endpoint that spreads beyond the queried last timestamp
      getAlignQueryTimeWindow(pQuery, pBlockInfo->window.ekey, pQuery->window.ekey, pBlockInfo->window.ekey, &w);

      pWindowResInfo->startTime = pQuery->window.skey;
      pWindowResInfo->prevSKey = w.skey;
    }
  }
}

2466 2467
static int64_t doScanAllDataBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
2468
  STableQueryInfo* pTableQueryInfo = pQuery->current;
H
Haojun Liao 已提交
2469
  SQueryCostInfo*  summary  = &pRuntimeEnv->summary;
2470

2471
  qDebug("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", lastkey:%" PRId64 ", order:%d",
H
hjxilinx 已提交
2472 2473
         GET_QINFO_ADDR(pRuntimeEnv), pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, pTableQueryInfo->lastKey,
         pQuery->order.order);
2474

2475
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
H
Haojun Liao 已提交
2476
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
2477

H
Haojun Liao 已提交
2478
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
2479
  while (tsdbNextDataBlock(pQueryHandle)) {
H
Haojun Liao 已提交
2480
    summary->totalBlocks += 1;
H
Haojun Liao 已提交
2481

H
Haojun Liao 已提交
2482
    if (IS_QUERY_KILLED(GET_QINFO_ADDR(pRuntimeEnv))) {
H
Haojun Liao 已提交
2483
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
2484
    }
2485

H
Haojun Liao 已提交
2486
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
2487
    doSetInitialTimewindow(pRuntimeEnv, &blockInfo);
2488

H
hjxilinx 已提交
2489
    // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
2490
    ensureOutputBuffer(pRuntimeEnv, &blockInfo);
2491

2492
    SDataStatis *pStatis = NULL;
2493 2494
    SArray *     pDataBlock = NULL;
    uint32_t     status = 0;
2495

H
Haojun Liao 已提交
2496
    int32_t ret = loadDataBlockOnDemand(pRuntimeEnv, &pRuntimeEnv->windowResInfo, pQueryHandle, &blockInfo, &pStatis, &pDataBlock, &status);
2497
    if (ret != TSDB_CODE_SUCCESS) {
2498 2499 2500
      break;
    }

2501 2502 2503 2504 2505 2506
    if (status == BLK_DATA_DISCARD) {
      pQuery->current->lastKey =
              QUERY_IS_ASC_QUERY(pQuery) ? blockInfo.window.ekey + step : blockInfo.window.skey + step;
      continue;
    }

H
Haojun Liao 已提交
2507 2508
    // query start position can not move into tableApplyFunctionsOnBlock due to limit/offset condition
    pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : blockInfo.rows - 1;
H
hjxilinx 已提交
2509
    int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, binarySearchForKey, pDataBlock);
2510

H
Haojun Liao 已提交
2511
    summary->totalRows += blockInfo.rows;
2512
    qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
2513
           blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes, pQuery->current->lastKey);
2514

2515 2516
    // while the output buffer is full or limit/offset is applied, query may be paused here
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL | QUERY_COMPLETED)) {
H
hjxilinx 已提交
2517
      break;
2518 2519
    }
  }
2520

H
Haojun Liao 已提交
2521 2522 2523 2524
  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }

H
hjxilinx 已提交
2525
  // if the result buffer is not full, set the query complete
2526 2527 2528
  if (!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
2529

H
Haojun Liao 已提交
2530
  if (QUERY_IS_INTERVAL_QUERY(pQuery) && IS_MASTER_SCAN(pRuntimeEnv)) {
H
hjxilinx 已提交
2531
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
2532
      closeAllTimeWindow(&pRuntimeEnv->windowResInfo);
H
hjxilinx 已提交
2533
      pRuntimeEnv->windowResInfo.curIndex = pRuntimeEnv->windowResInfo.size - 1;  // point to the last time window
2534 2535 2536 2537
    } else {
      assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
    }
  }
2538

2539
  return 0;
2540 2541 2542 2543 2544 2545
}

/*
 * set tag value in SQLFunctionCtx
 * e.g.,tag information into input buffer
 */
2546
static void doSetTagValueInParam(void *tsdb, void* pTable, int32_t tagColId, tVariant *tag, int16_t type, int16_t bytes) {
H
[td-90]  
Haojun Liao 已提交
2547
  tVariantDestroy(tag);
2548

2549
  if (tagColId == TSDB_TBNAME_COLUMN_INDEX) {
2550
    char* val = tsdbGetTableName(pTable);
H
[td-90]  
Haojun Liao 已提交
2551
    assert(val != NULL);
2552

H
[td-90]  
Haojun Liao 已提交
2553
    tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), TSDB_DATA_TYPE_BINARY);
2554
  } else {
2555
    char* val = tsdbGetTableTagVal(pTable, tagColId, type, bytes);
H
[td-90]  
Haojun Liao 已提交
2556 2557 2558 2559
    if (val == NULL) {
      tag->nType = TSDB_DATA_TYPE_NULL;
      return;
    }
2560

H
hjxilinx 已提交
2561
    if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
H
Hongze Cheng 已提交
2562
      if (isNull(val, type)) {
H
Haojun Liao 已提交
2563 2564 2565 2566
        tag->nType = TSDB_DATA_TYPE_NULL;
        return;
      }

H
[td-90]  
Haojun Liao 已提交
2567
      tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), type);
H
hjxilinx 已提交
2568
    } else {
H
Haojun Liao 已提交
2569 2570 2571 2572 2573
      if (isNull(val, type)) {
        tag->nType = TSDB_DATA_TYPE_NULL;
        return;
      }

H
[td-90]  
Haojun Liao 已提交
2574
      tVariantCreateFromBinary(tag, val, bytes, type);
H
hjxilinx 已提交
2575
    }
2576
  }
2577 2578
}

2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590
static SColumnInfo* doGetTagColumnInfoById(SColumnInfo* pTagColList, int32_t numOfTags, int16_t colId) {
  assert(pTagColList != NULL && numOfTags > 0);

  for(int32_t i = 0; i < numOfTags; ++i) {
    if (pTagColList[i].colId == colId) {
      return &pTagColList[i];
    }
  }

  return NULL;
}

2591
void setTagVal(SQueryRuntimeEnv *pRuntimeEnv, void *pTable, void *tsdb) {
2592
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
2593
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
2594

H
[td-90]  
Haojun Liao 已提交
2595 2596 2597
  SExprInfo *pExprInfo = &pQuery->pSelectExpr[0];
  if (pQuery->numOfOutput == 1 && pExprInfo->base.functionId == TSDB_FUNC_TS_COMP) {
    assert(pExprInfo->base.numOfParams == 1);
H
Haojun Liao 已提交
2598

S
TD-1057  
Shengliang Guan 已提交
2599
    int16_t tagColId = (int16_t)pExprInfo->base.arg->argValue.i64;
2600
    SColumnInfo* pColInfo = doGetTagColumnInfoById(pQuery->tagColList, pQuery->numOfTags, tagColId);
H
Haojun Liao 已提交
2601

2602
    doSetTagValueInParam(tsdb, pTable, tagColId, &pRuntimeEnv->pCtx[0].tag, pColInfo->type, pColInfo->bytes);
2603 2604
  } else {
    // set tag value, by which the results are aggregated.
2605
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
H
Haojun Liao 已提交
2606
      SExprInfo* pLocalExprInfo = &pQuery->pSelectExpr[idx];
2607

2608
      // ts_comp column required the tag value for join filter
H
Haojun Liao 已提交
2609
      if (!TSDB_COL_IS_TAG(pLocalExprInfo->base.colInfo.flag)) {
2610 2611
        continue;
      }
2612

2613
      // todo use tag column index to optimize performance
2614
      doSetTagValueInParam(tsdb, pTable, pLocalExprInfo->base.colInfo.colId, &pRuntimeEnv->pCtx[idx].tag,
H
Haojun Liao 已提交
2615
                           pLocalExprInfo->type, pLocalExprInfo->bytes);
2616
    }
2617

2618
    // set the join tag for first column
H
[td-90]  
Haojun Liao 已提交
2619
    SSqlFuncMsg *pFuncMsg = &pExprInfo->base;
2620 2621
    if ((pFuncMsg->functionId == TSDB_FUNC_TS || pFuncMsg->functionId == TSDB_FUNC_PRJ) && pRuntimeEnv->pTSBuf != NULL &&
        pFuncMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
2622
      assert(pFuncMsg->numOfParams == 1);
H
Haojun Liao 已提交
2623

S
TD-1057  
Shengliang Guan 已提交
2624
      int16_t tagColId = (int16_t)pExprInfo->base.arg->argValue.i64;
2625
      SColumnInfo* pColInfo = doGetTagColumnInfoById(pQuery->tagColList, pQuery->numOfTags, tagColId);
H
Haojun Liao 已提交
2626

2627
      doSetTagValueInParam(tsdb, pTable, tagColId, &pRuntimeEnv->pCtx[0].tag, pColInfo->type, pColInfo->bytes);
2628
      qDebug("QInfo:%p set tag value for join comparison, colId:%" PRId64 ", val:%"PRId64, pQInfo, pExprInfo->base.arg->argValue.i64,
B
Bomin Zhang 已提交
2629
          pRuntimeEnv->pCtx[0].tag.i64Key)
2630 2631 2632 2633 2634 2635 2636
    }
  }
}

static void doMerge(SQueryRuntimeEnv *pRuntimeEnv, int64_t timestamp, SWindowResult *pWindowRes, bool mergeFlag) {
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
2637

H
Haojun Liao 已提交
2638 2639
  tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes->pos.pageId);

2640
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2641
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2642 2643 2644
    if (!mergeFlag) {
      pCtx[i].aOutputBuf = pCtx[i].aOutputBuf + pCtx[i].outputBytes;
      pCtx[i].currentStage = FIRST_STAGE_MERGE;
2645

2646
      RESET_RESULT_INFO(pCtx[i].resultInfo);
2647 2648
      aAggs[functionId].init(&pCtx[i]);
    }
2649

2650 2651
    pCtx[i].hasNull = true;
    pCtx[i].nStartQueryTimestamp = timestamp;
H
Haojun Liao 已提交
2652
    pCtx[i].aInputElemBuf = getPosInResultPage(pRuntimeEnv, i, pWindowRes, page);
2653

2654 2655 2656
    // in case of tag column, the tag information should be extracted from input buffer
    if (functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TAG) {
      tVariantDestroy(&pCtx[i].tag);
2657

2658 2659 2660 2661 2662 2663
      int32_t type = pCtx[i].outputType;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        tVariantCreateFromBinary(&pCtx[i].tag, varDataVal(pCtx[i].aInputElemBuf), varDataLen(pCtx[i].aInputElemBuf), type);
      } else {
        tVariantCreateFromBinary(&pCtx[i].tag, pCtx[i].aInputElemBuf, pCtx[i].inputBytes, pCtx[i].inputType);
      }
2664

2665 2666
    }
  }
2667

2668
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2669
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2670 2671 2672
    if (functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
2673

2674 2675 2676 2677
    aAggs[functionId].distMergeFunc(&pCtx[i]);
  }
}

2678
static UNUSED_FUNC void printBinaryData(int32_t functionId, char *data, int32_t srcDataType) {
2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_LAST_DST) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_BINARY:
        printf("%" PRId64 ",%s\t", *(TSKEY *)data, (data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int8_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int16_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 ",%" PRId64 "\t", *(TSKEY *)data, *(TSKEY *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int32_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%" PRId64 ",%f\t", *(TSKEY *)data, *(float *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%" PRId64 ",%lf\t", *(TSKEY *)data, *(double *)(data + TSDB_KEYSIZE + 1));
        break;
    }
  } else if (functionId == TSDB_FUNC_AVG) {
    printf("%lf,%d\t", *(double *)data, *(int32_t *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_SPREAD) {
    printf("%lf,%lf\t", *(double *)data, *(double *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_TWA) {
    data += 1;
    printf("%lf,%" PRId64 ",%" PRId64 ",%" PRId64 "\t", *(double *)data, *(int64_t *)(data + 8),
           *(int64_t *)(data + 16), *(int64_t *)(data + 24));
  } else if (functionId == TSDB_FUNC_MIN || functionId == TSDB_FUNC_MAX) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%d\t", *(int8_t *)data);
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%d\t", *(int16_t *)data);
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 "\t", *(int64_t *)data);
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%d\t", *(int *)data);
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%f\t", *(float *)data);
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%f\t", *(float *)data);
        break;
    }
  } else if (functionId == TSDB_FUNC_SUM) {
    if (srcDataType == TSDB_DATA_TYPE_FLOAT || srcDataType == TSDB_DATA_TYPE_DOUBLE) {
      printf("%lf\t", *(float *)data);
    } else {
      printf("%" PRId64 "\t", *(int64_t *)data);
    }
  } else {
    printf("%s\t", data);
  }
}

2747
void UNUSED_FUNC displayInterResult(tFilePage **pdata, SQueryRuntimeEnv* pRuntimeEnv, int32_t numOfRows) {
2748
  SQuery* pQuery = pRuntimeEnv->pQuery;
2749
  int32_t numOfCols = pQuery->numOfOutput;
2750
  printf("super table query intermediate result, total:%d\n", numOfRows);
2751

2752 2753
  for (int32_t j = 0; j < numOfRows; ++j) {
    for (int32_t i = 0; i < numOfCols; ++i) {
2754

2755
      switch (pQuery->pSelectExpr[i].type) {
2756
        case TSDB_DATA_TYPE_BINARY: {
2757
          int32_t type = pQuery->pSelectExpr[i].type;
2758
          printBinaryData(pQuery->pSelectExpr[i].base.functionId, pdata[i]->data + pQuery->pSelectExpr[i].bytes * j,
2759 2760 2761 2762 2763
                          type);
          break;
        }
        case TSDB_DATA_TYPE_TIMESTAMP:
        case TSDB_DATA_TYPE_BIGINT:
2764
          printf("%" PRId64 "\t", *(int64_t *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2765 2766
          break;
        case TSDB_DATA_TYPE_INT:
2767
          printf("%d\t", *(int32_t *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2768 2769
          break;
        case TSDB_DATA_TYPE_FLOAT:
2770
          printf("%f\t", *(float *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2771 2772
          break;
        case TSDB_DATA_TYPE_DOUBLE:
2773
          printf("%lf\t", *(double *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2774 2775 2776 2777 2778 2779 2780 2781
          break;
      }
    }
    printf("\n");
  }
}

typedef struct SCompSupporter {
H
hjxilinx 已提交
2782 2783 2784
  STableQueryInfo **pTableQueryInfo;
  int32_t *         position;
  SQInfo *          pQInfo;
2785 2786 2787 2788 2789
} SCompSupporter;

int32_t tableResultComparFn(const void *pLeft, const void *pRight, void *param) {
  int32_t left = *(int32_t *)pLeft;
  int32_t right = *(int32_t *)pRight;
2790

2791 2792
  SCompSupporter *  supporter = (SCompSupporter *)param;
  SQueryRuntimeEnv *pRuntimeEnv = &supporter->pQInfo->runtimeEnv;
2793

2794 2795
  int32_t leftPos = supporter->position[left];
  int32_t rightPos = supporter->position[right];
2796

2797 2798 2799 2800
  /* left source is exhausted */
  if (leftPos == -1) {
    return 1;
  }
2801

2802 2803 2804 2805
  /* right source is exhausted*/
  if (rightPos == -1) {
    return -1;
  }
2806

H
hjxilinx 已提交
2807
  SWindowResInfo *pWindowResInfo1 = &supporter->pTableQueryInfo[left]->windowResInfo;
2808
  SWindowResult * pWindowRes1 = getWindowResult(pWindowResInfo1, leftPos);
H
Haojun Liao 已提交
2809
  tFilePage *page1 = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes1->pos.pageId);
2810

H
Haojun Liao 已提交
2811
  char *b1 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes1, page1);
2812
  TSKEY leftTimestamp = GET_INT64_VAL(b1);
2813

H
hjxilinx 已提交
2814
  SWindowResInfo *pWindowResInfo2 = &supporter->pTableQueryInfo[right]->windowResInfo;
2815
  SWindowResult * pWindowRes2 = getWindowResult(pWindowResInfo2, rightPos);
H
Haojun Liao 已提交
2816
  tFilePage *page2 = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes2->pos.pageId);
2817

H
Haojun Liao 已提交
2818
  char *b2 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes2, page2);
2819
  TSKEY rightTimestamp = GET_INT64_VAL(b2);
2820

2821 2822 2823
  if (leftTimestamp == rightTimestamp) {
    return 0;
  }
2824

2825 2826 2827
  return leftTimestamp > rightTimestamp ? 1 : -1;
}

2828
int32_t mergeIntoGroupResult(SQInfo *pQInfo) {
H
Haojun Liao 已提交
2829
  int64_t st = taosGetTimestampUs();
2830
  int32_t ret = TSDB_CODE_SUCCESS;
2831

S
TD-1057  
Shengliang Guan 已提交
2832
  int32_t numOfGroups = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
2833

2834
  while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
2835
    SArray *group = GET_TABLEGROUP(pQInfo, pQInfo->groupIndex);
2836
    ret = mergeIntoGroupResultImpl(pQInfo, group);
2837 2838 2839 2840
    if (ret < 0) {  // not enough disk space to save the data into disk
      return -1;
    }

2841
    pQInfo->groupIndex += 1;
2842 2843

    // this group generates at least one result, return results
2844 2845 2846
    if (ret > 0) {
      break;
    }
2847

H
Haojun Liao 已提交
2848
    assert(pQInfo->groupResInfo.numOfDataPages == 0);
2849
    qDebug("QInfo:%p no result in group %d, continue", pQInfo, pQInfo->groupIndex - 1);
2850
  }
2851

H
Haojun Liao 已提交
2852 2853
  SGroupResInfo* info = &pQInfo->groupResInfo;
  if (pQInfo->groupIndex == numOfGroups && info->pos.pageId == info->numOfDataPages) {
H
Haojun Liao 已提交
2854 2855 2856
    SET_STABLE_QUERY_OVER(pQInfo);
  }

H
Haojun Liao 已提交
2857 2858 2859
  int64_t elapsedTime = taosGetTimestampUs() - st;
  qDebug("QInfo:%p merge res data into group, index:%d, total group:%d, elapsed time:%" PRId64 "us", pQInfo,
         pQInfo->groupIndex - 1, numOfGroups, elapsedTime);
2860

H
Haojun Liao 已提交
2861
  pQInfo->runtimeEnv.summary.firstStageMergeTime += elapsedTime;
2862 2863 2864 2865
  return TSDB_CODE_SUCCESS;
}

void copyResToQueryResultBuf(SQInfo *pQInfo, SQuery *pQuery) {
H
Haojun Liao 已提交
2866 2867 2868 2869 2870
  SGroupResInfo* pGroupResInfo = &pQInfo->groupResInfo;

  // all results have been return to client, try next group
  if (pGroupResInfo->pos.pageId == pGroupResInfo->numOfDataPages) {
    pGroupResInfo->numOfDataPages = 0;
H
Haojun Liao 已提交
2871
    pGroupResInfo->pos.pageId = 0;
H
Haojun Liao 已提交
2872
    pGroupResInfo->pos.rowId = 0;
2873

2874
    // current results of group has been sent to client, try next group
2875
    if (mergeIntoGroupResult(pQInfo) != TSDB_CODE_SUCCESS) {
2876 2877
      return;  // failed to save data in the disk
    }
2878

2879
    // check if all results has been sent to client
S
TD-1057  
Shengliang Guan 已提交
2880
    int32_t numOfGroup = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
H
Haojun Liao 已提交
2881
    if (pGroupResInfo->numOfDataPages == 0 && pQInfo->groupIndex == numOfGroup) {
H
Haojun Liao 已提交
2882
      SET_STABLE_QUERY_OVER(pQInfo);
2883 2884
      return;
    }
2885
  }
2886 2887

  SQueryRuntimeEnv *   pRuntimeEnv = &pQInfo->runtimeEnv;
2888
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
2889

H
Haojun Liao 已提交
2890 2891
  int32_t id = pQInfo->groupResInfo.groupId;
  SIDList list = getDataBufPagesIdList(pResultBuf, id);
2892

2893
  int32_t offset = 0;
H
Haojun Liao 已提交
2894 2895 2896 2897 2898 2899 2900
  int32_t numOfCopiedRows = 0;

  size_t size = taosArrayGetSize(list);
  assert(size == pGroupResInfo->numOfDataPages);

  bool done = false;
  for (int32_t j = pGroupResInfo->pos.pageId; j < size; ++j) {
H
Haojun Liao 已提交
2901
    SPageInfo* pi = *(SPageInfo**) taosArrayGet(list, j);
H
Haojun Liao 已提交
2902 2903 2904
    tFilePage* pData = getResBufPage(pResultBuf, pi->pageId);

    assert(pData->num > 0 && pData->num <= pRuntimeEnv->numOfRowsPerPage && pGroupResInfo->pos.rowId < pData->num);
2905
    int32_t numOfRes = (int32_t)(pData->num - pGroupResInfo->pos.rowId);
H
Haojun Liao 已提交
2906 2907

    if (numOfRes > pQuery->rec.capacity - offset) {
2908
      numOfCopiedRows = (int32_t)(pQuery->rec.capacity - offset);
H
Haojun Liao 已提交
2909 2910 2911
      pGroupResInfo->pos.rowId += numOfCopiedRows;
      done = true;
    } else {
2912
      numOfCopiedRows = (int32_t)pData->num;
H
Haojun Liao 已提交
2913 2914 2915 2916

      pGroupResInfo->pos.pageId += 1;
      pGroupResInfo->pos.rowId = 0;
    }
2917

2918
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2919
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
2920
      char *  pDest = pQuery->sdata[i]->data;
2921

H
Haojun Liao 已提交
2922 2923
      memcpy(pDest + offset * bytes, pData->data + pRuntimeEnv->offset[i] * pRuntimeEnv->numOfRowsPerPage,
          (size_t)bytes * numOfCopiedRows);
2924
    }
2925

H
Haojun Liao 已提交
2926 2927 2928 2929
    offset += numOfCopiedRows;
    if (done) {
      break;
    }
2930
  }
2931

2932
  assert(pQuery->rec.rows == 0);
H
Haojun Liao 已提交
2933
  pQuery->rec.rows += offset;
2934 2935
}

H
Haojun Liao 已提交
2936
int64_t getNumOfResultWindowRes(SQuery *pQuery, SWindowResult *pWindowRes) {
2937
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
2938
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
2939

2940 2941 2942 2943 2944 2945 2946
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
      continue;
    }
2947

2948
    SResultInfo *pResultInfo = &pWindowRes->resultInfo[j];
H
Haojun Liao 已提交
2949
    assert(pResultInfo != NULL);
2950

H
Haojun Liao 已提交
2951 2952
    if (pResultInfo->numOfRes > 0) {
      return pResultInfo->numOfRes;
2953 2954
    }
  }
2955

H
Haojun Liao 已提交
2956
  return 0;
2957 2958
}

2959
int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *pGroup) {
2960
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
2961
  SQuery *          pQuery = pRuntimeEnv->pQuery;
2962

2963
  size_t size = taosArrayGetSize(pGroup);
2964
  tFilePage **buffer = pQuery->sdata;
2965

H
Haojun Liao 已提交
2966
  int32_t *posList = calloc(size, sizeof(int32_t));
H
hjxilinx 已提交
2967
  STableQueryInfo **pTableList = malloc(POINTER_BYTES * size);
2968

2969
  if (pTableList == NULL || posList == NULL) {
S
Shengliang Guan 已提交
2970 2971
    taosTFree(posList);
    taosTFree(pTableList);
2972 2973

    qError("QInfo:%p failed alloc memory", pQInfo);
H
Haojun Liao 已提交
2974
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
2975 2976
  }

2977
  // todo opt for the case of one table per group
2978
  int32_t numOfTables = 0;
H
Haojun Liao 已提交
2979 2980 2981
  SIDList pageList = NULL;
  int32_t tid = -1;

2982
  for (int32_t i = 0; i < size; ++i) {
2983
    STableQueryInfo *item = taosArrayGetP(pGroup, i);
2984

H
Haojun Liao 已提交
2985
    SIDList list = getDataBufPagesIdList(pRuntimeEnv->pResultBuf, TSDB_TABLEID(item->pTable)->tid);
H
Haojun Liao 已提交
2986
    if (taosArrayGetSize(list) > 0 && item->windowResInfo.size > 0) {
H
Haojun Liao 已提交
2987
      pTableList[numOfTables++] = item;
2988 2989
      tid = TSDB_TABLEID(item->pTable)->tid;
      pageList = list;
2990 2991
    }
  }
2992

H
Haojun Liao 已提交
2993
  // there is no data in current group
2994
  if (numOfTables == 0) {
S
Shengliang Guan 已提交
2995 2996
    taosTFree(posList);
    taosTFree(pTableList);
2997
    return 0;
H
Haojun Liao 已提交
2998
  } else if (numOfTables == 1) { // no need to merge results since only one table in each group
H
Haojun Liao 已提交
2999 3000 3001 3002 3003
    taosTFree(posList);
    taosTFree(pTableList);

    SGroupResInfo* pGroupResInfo = &pQInfo->groupResInfo;

3004
    pGroupResInfo->numOfDataPages = (int32_t)taosArrayGetSize(pageList);
H
Haojun Liao 已提交
3005 3006 3007 3008 3009
    pGroupResInfo->groupId = tid;
    pGroupResInfo->pos.pageId = 0;
    pGroupResInfo->pos.rowId = 0;

    return pGroupResInfo->numOfDataPages;
3010
  }
3011

3012
  SCompSupporter cs = {pTableList, posList, pQInfo};
3013

3014
  SLoserTreeInfo *pTree = NULL;
3015
  tLoserTreeCreate(&pTree, numOfTables, &cs, tableResultComparFn);
3016

3017
  SResultInfo *pResultInfo = calloc(pQuery->numOfOutput, sizeof(SResultInfo));
H
Haojun Liao 已提交
3018 3019 3020 3021
  if (pResultInfo == NULL) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }

H
Haojun Liao 已提交
3022
  char* buf = calloc(1, pRuntimeEnv->interBufSize);
H
Haojun Liao 已提交
3023 3024 3025 3026
  if (buf == NULL) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }

H
Haojun Liao 已提交
3027
  setWindowResultInfo(pResultInfo, pQuery, pRuntimeEnv->stableQuery, buf);
3028
  resetMergeResultBuf(pQuery, pRuntimeEnv->pCtx, pResultInfo);
3029

H
Haojun Liao 已提交
3030 3031
  pQInfo->groupResInfo.groupId = getGroupResultId(pQInfo->groupIndex);

H
Haojun Liao 已提交
3032
  // todo add windowRes iterator
3033 3034
  int64_t lastTimestamp = -1;
  int64_t startt = taosGetTimestampMs();
3035

3036
  while (1) {
3037 3038
    if (IS_QUERY_KILLED(pQInfo)) {
      qDebug("QInfo:%p it is already killed, abort", pQInfo);
H
Haojun Liao 已提交
3039 3040 3041 3042 3043 3044 3045

      taosTFree(pTableList);
      taosTFree(posList);
      taosTFree(pTree);
      taosTFree(pResultInfo);
      taosTFree(buf);

3046 3047 3048
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
    }

3049
    int32_t pos = pTree->pNode[0].index;
3050

H
hjxilinx 已提交
3051
    SWindowResInfo *pWindowResInfo = &pTableList[pos]->windowResInfo;
H
Haojun Liao 已提交
3052
    SWindowResult  *pWindowRes = getWindowResult(pWindowResInfo, cs.position[pos]);
H
Haojun Liao 已提交
3053
    tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes->pos.pageId);
3054

H
Haojun Liao 已提交
3055
    char *b = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes, page);
3056
    TSKEY ts = GET_INT64_VAL(b);
3057

3058
    assert(ts == pWindowRes->win.skey);
H
Haojun Liao 已提交
3059
    int64_t num = getNumOfResultWindowRes(pQuery, pWindowRes);
3060 3061
    if (num <= 0) {
      cs.position[pos] += 1;
3062

3063 3064
      if (cs.position[pos] >= pWindowResInfo->size) {
        cs.position[pos] = -1;
3065

3066
        // all input sources are exhausted
3067
        if (--numOfTables == 0) {
3068 3069 3070 3071 3072 3073 3074
          break;
        }
      }
    } else {
      if (ts == lastTimestamp) {  // merge with the last one
        doMerge(pRuntimeEnv, ts, pWindowRes, true);
      } else {  // copy data to disk buffer
3075
        if (buffer[0]->num == pQuery->rec.capacity) {
H
Haojun Liao 已提交
3076
          if (flushFromResultBuf(pRuntimeEnv, &pQInfo->groupResInfo) != TSDB_CODE_SUCCESS) {
3077 3078
            return -1;
          }
3079

3080 3081
          resetMergeResultBuf(pQuery, pRuntimeEnv->pCtx, pResultInfo);
        }
3082

3083
        doMerge(pRuntimeEnv, ts, pWindowRes, false);
3084
        buffer[0]->num += 1;
3085
      }
3086

3087
      lastTimestamp = ts;
3088

H
Haojun Liao 已提交
3089 3090 3091
      // move to the next element of current entry
      int32_t currentPageId = pWindowRes->pos.pageId;

3092 3093 3094
      cs.position[pos] += 1;
      if (cs.position[pos] >= pWindowResInfo->size) {
        cs.position[pos] = -1;
3095

3096
        // all input sources are exhausted
3097
        if (--numOfTables == 0) {
3098 3099
          break;
        }
H
Haojun Liao 已提交
3100 3101 3102 3103 3104 3105
      } else {
        // current page is not needed anymore
        SWindowResult  *pNextWindowRes = getWindowResult(pWindowResInfo, cs.position[pos]);
        if (pNextWindowRes->pos.pageId != currentPageId) {
          releaseResBufPage(pRuntimeEnv->pResultBuf, page);
        }
3106 3107
      }
    }
3108

3109 3110
    tLoserTreeAdjust(pTree, pos + pTree->numOfEntries);
  }
3111

3112
  if (buffer[0]->num != 0) {  // there are data in buffer
H
Haojun Liao 已提交
3113
    if (flushFromResultBuf(pRuntimeEnv, &pQInfo->groupResInfo) != TSDB_CODE_SUCCESS) {
S
slguan 已提交
3114
      qError("QInfo:%p failed to flush data into temp file, abort query", pQInfo);
3115

S
Shengliang Guan 已提交
3116 3117 3118 3119
      taosTFree(pTree);
      taosTFree(pTableList);
      taosTFree(posList);
      taosTFree(pResultInfo);
3120

3121 3122 3123
      return -1;
    }
  }
3124

3125 3126 3127
  int64_t endt = taosGetTimestampMs();

#ifdef _DEBUG_VIEW
3128
  displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
3129
#endif
3130

3131
  qDebug("QInfo:%p result merge completed for group:%d, elapsed time:%" PRId64 " ms", pQInfo, pQInfo->groupIndex, endt - startt);
3132

S
Shengliang Guan 已提交
3133 3134 3135
  taosTFree(pTableList);
  taosTFree(posList);
  taosTFree(pTree);
3136

S
Shengliang Guan 已提交
3137 3138
  taosTFree(pResultInfo);
  taosTFree(buf);
H
Haojun Liao 已提交
3139 3140

  return pQInfo->groupResInfo.numOfDataPages;
3141 3142
}

H
Haojun Liao 已提交
3143 3144
int32_t flushFromResultBuf(SQueryRuntimeEnv* pRuntimeEnv, SGroupResInfo* pGroupResInfo) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3145

3146
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
3147

3148 3149
  // the base value for group result, since the maximum number of table for each vnode will not exceed 100,000.
  int32_t pageId = -1;
3150
  int32_t capacity = pResultBuf->numOfRowsPerPage;
3151

H
Haojun Liao 已提交
3152
  int32_t remain = (int32_t) pQuery->sdata[0]->num;
3153
  int32_t offset = 0;
3154

3155
  while (remain > 0) {
H
Haojun Liao 已提交
3156 3157
    int32_t rows = (remain > capacity)? capacity:remain;
    assert(rows > 0);
3158

H
Haojun Liao 已提交
3159 3160 3161
    // get the output buffer page
    tFilePage *buf = getNewDataBuf(pResultBuf, pGroupResInfo->groupId, &pageId);
    buf->num = rows;
3162

H
Haojun Liao 已提交
3163
    // pagewisely copy to dest buffer
3164
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3165
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
3166

H
Haojun Liao 已提交
3167 3168
      char* output = buf->data + pRuntimeEnv->offset[i] * pRuntimeEnv->numOfRowsPerPage;
      char* src = ((char *) pQuery->sdata[i]->data) + offset * bytes;
3169
      memcpy(output, src, (size_t)(buf->num * bytes));
3170
    }
3171

H
Haojun Liao 已提交
3172 3173 3174 3175
    offset += rows;
    remain -= rows;

    pGroupResInfo->numOfDataPages += 1;
3176
  }
3177

3178 3179 3180 3181
  return TSDB_CODE_SUCCESS;
}

void resetMergeResultBuf(SQuery *pQuery, SQLFunctionCtx *pCtx, SResultInfo *pResultInfo) {
3182
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
3183
    pCtx[k].aOutputBuf = pQuery->sdata[k]->data - pCtx[k].outputBytes;
3184 3185 3186
    pCtx[k].size = 1;
    pCtx[k].startOffset = 0;
    pCtx[k].resultInfo = &pResultInfo[k];
3187

3188
    pQuery->sdata[k]->num = 0;
3189 3190 3191
  }
}

3192 3193 3194 3195
static void updateTableQueryInfoForReverseScan(SQuery *pQuery, STableQueryInfo *pTableQueryInfo) {
  if (pTableQueryInfo == NULL) {
    return;
  }
3196

H
Haojun Liao 已提交
3197
  // order has changed already
3198
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
3199

H
Haojun Liao 已提交
3200 3201 3202 3203 3204 3205
  // TODO validate the assertion
//  if (!QUERY_IS_ASC_QUERY(pQuery)) {
//    assert(pTableQueryInfo->win.ekey >= pTableQueryInfo->lastKey + step);
//  } else {
//    assert(pTableQueryInfo->win.ekey <= pTableQueryInfo->lastKey + step);
//  }
H
Haojun Liao 已提交
3206 3207 3208 3209 3210 3211 3212

  if (pTableQueryInfo->lastKey == pTableQueryInfo->win.skey) {
    // do nothing, no results
  } else {
    pTableQueryInfo->win.ekey = pTableQueryInfo->lastKey + step;
  }

3213 3214
  SWAP(pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, TSKEY);
  pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
3215

3216 3217
  SWITCH_ORDER(pTableQueryInfo->cur.order);
  pTableQueryInfo->cur.vgroupIndex = -1;
H
Haojun Liao 已提交
3218 3219 3220

  // set the index at the end of time window
  pTableQueryInfo->windowResInfo.curIndex = pTableQueryInfo->windowResInfo.size - 1;
3221 3222 3223 3224
}

static void disableFuncInReverseScanImpl(SQInfo* pQInfo, SWindowResInfo *pWindowResInfo, int32_t order) {
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
3225

3226
  for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
3227 3228
    bool closed = getTimeWindowResStatus(pWindowResInfo, i);
    if (!closed) {
3229 3230
      continue;
    }
3231

3232
    SWindowResult *buf = getWindowResult(pWindowResInfo, i);
3233

3234
    // open/close the specified query for each group result
3235
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3236
      int32_t functId = pQuery->pSelectExpr[j].base.functionId;
3237

3238 3239
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
3240 3241 3242 3243 3244 3245 3246 3247
        buf->resultInfo[j].complete = false;
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
        buf->resultInfo[j].complete = true;
      }
    }
  }
}

3248 3249
void disableFuncInReverseScan(SQInfo *pQInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
3250
  SQuery *pQuery = pRuntimeEnv->pQuery;
3251
  int32_t order = pQuery->order.order;
3252

3253 3254
  // group by normal columns and interval query on normal table
  SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
3255
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3256
    disableFuncInReverseScanImpl(pQInfo, pWindowResInfo, order);
3257
  } else {  // for simple result of table query,
3258
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {  // todo refactor
3259
      int32_t functId = pQuery->pSelectExpr[j].base.functionId;
3260

3261
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[j];
3262 3263 3264
      if (pCtx->resultInfo == NULL) {
        continue; // resultInfo is NULL, means no data checked in previous scan
      }
3265

3266 3267
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
3268 3269 3270 3271 3272 3273
        pCtx->resultInfo->complete = false;
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
        pCtx->resultInfo->complete = true;
      }
    }
  }
H
Haojun Liao 已提交
3274 3275 3276 3277
}

static void setupQueryRangeForReverseScan(SQInfo* pQInfo) {
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
S
Shengliang Guan 已提交
3278
  int32_t numOfGroups = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
H
Haojun Liao 已提交
3279

H
hjxilinx 已提交
3280
  for(int32_t i = 0; i < numOfGroups; ++i) {
H
Haojun Liao 已提交
3281
    SArray *group = GET_TABLEGROUP(pQInfo, i);
H
Haojun Liao 已提交
3282 3283
    SArray *tableKeyGroup = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, i);

H
hjxilinx 已提交
3284 3285
    size_t t = taosArrayGetSize(group);
    for (int32_t j = 0; j < t; ++j) {
3286 3287
      STableQueryInfo *pCheckInfo = taosArrayGetP(group, j);
      updateTableQueryInfoForReverseScan(pQuery, pCheckInfo);
H
Haojun Liao 已提交
3288

H
Haojun Liao 已提交
3289 3290
      // update the last key in tableKeyInfo list, the tableKeyInfo is used to build the tsdbQueryHandle and decide
      // the start check timestamp of tsdbQueryHandle
H
Haojun Liao 已提交
3291 3292 3293 3294
      STableKeyInfo *pTableKeyInfo = taosArrayGet(tableKeyGroup, j);
      pTableKeyInfo->lastKey = pCheckInfo->lastKey;

      assert(pCheckInfo->pTable == pTableKeyInfo->pTable);
H
hjxilinx 已提交
3295 3296
    }
  }
3297 3298
}

3299
void switchCtxOrder(SQueryRuntimeEnv *pRuntimeEnv) {
3300
  SQuery *pQuery = pRuntimeEnv->pQuery;
3301
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
3302
    SWITCH_ORDER(pRuntimeEnv->pCtx[i].order);
3303 3304 3305
  }
}

H
Haojun Liao 已提交
3306
int32_t createQueryResultInfo(SQuery *pQuery, SWindowResult *pResultRow, bool isSTableQuery, size_t interBufSize) {
3307
  int32_t numOfCols = pQuery->numOfOutput;
3308

H
Haojun Liao 已提交
3309 3310
  size_t size = numOfCols * sizeof(SResultInfo) + interBufSize;
  pResultRow->resultInfo = calloc(1, size);
B
Bomin Zhang 已提交
3311 3312 3313
  if (pResultRow->resultInfo == NULL) {
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
  }
3314

H
Haojun Liao 已提交
3315
  pResultRow->pos = (SPosInfo) {-1, -1};
3316

H
Haojun Liao 已提交
3317
  char* buf = (char*) pResultRow->resultInfo + numOfCols * sizeof(SResultInfo);
H
Haojun Liao 已提交
3318

3319
  // set the intermediate result output buffer
H
Haojun Liao 已提交
3320
  setWindowResultInfo(pResultRow->resultInfo, pQuery, isSTableQuery, buf);
B
Bomin Zhang 已提交
3321
  return TSDB_CODE_SUCCESS;
3322 3323 3324 3325
}

void resetCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3326

3327
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3328 3329
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
    pCtx->aOutputBuf = pQuery->sdata[i]->data;
3330

3331 3332 3333 3334
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT/TAGPRJ/PRJ/TAG etc.
     */
3335
    RESET_RESULT_INFO(&pRuntimeEnv->resultInfo[i]);
3336
    pCtx->resultInfo = &pRuntimeEnv->resultInfo[i];
3337

3338
    // set the timestamp output buffer for top/bottom/diff query
3339
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3340 3341 3342
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3343

3344
    memset(pQuery->sdata[i]->data, 0, (size_t)(pQuery->pSelectExpr[i].bytes * pQuery->rec.capacity));
3345
  }
3346

3347 3348 3349 3350 3351
  initCtxOutputBuf(pRuntimeEnv);
}

void forwardCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, int64_t output) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3352

3353
  // reset the execution contexts
3354
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3355
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
3356
    assert(functionId != TSDB_FUNC_DIFF);
3357

3358 3359 3360 3361
    // set next output position
    if (IS_OUTER_FORWARD(aAggs[functionId].nStatus)) {
      pRuntimeEnv->pCtx[j].aOutputBuf += pRuntimeEnv->pCtx[j].outputBytes * output;
    }
3362

3363 3364 3365 3366 3367 3368 3369 3370
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      /*
       * NOTE: for top/bottom query, the value of first column of output (timestamp) are assigned
       * in the procedure of top/bottom routine
       * the output buffer in top/bottom routine is ptsOutputBuf, so we need to forward the output buffer
       *
       * diff function is handled in multi-output function
       */
S
TD-1057  
Shengliang Guan 已提交
3371
      pRuntimeEnv->pCtx[j].ptsOutputBuf = (char*)pRuntimeEnv->pCtx[j].ptsOutputBuf + TSDB_KEYSIZE * output;
3372
    }
3373

3374
    RESET_RESULT_INFO(pRuntimeEnv->pCtx[j].resultInfo);
3375 3376 3377 3378 3379
  }
}

void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3380

3381
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3382
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
3383
    pRuntimeEnv->pCtx[j].currentStage = 0;
3384

H
Haojun Liao 已提交
3385 3386 3387 3388
    SResultInfo* pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
    if (pResInfo->initialized) {
      continue;
    }
3389

3390 3391 3392 3393
    aAggs[functionId].init(&pRuntimeEnv->pCtx[j]);
  }
}

3394
void skipResults(SQueryRuntimeEnv *pRuntimeEnv) {
3395
  SQuery *pQuery = pRuntimeEnv->pQuery;
3396
  if (pQuery->rec.rows == 0 || pQuery->limit.offset == 0) {
3397 3398
    return;
  }
3399

3400
  if (pQuery->rec.rows <= pQuery->limit.offset) {
3401
    qDebug("QInfo:%p skip rows:%" PRId64 ", new offset:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), pQuery->rec.rows,
3402
        pQuery->limit.offset - pQuery->rec.rows);
3403

3404 3405
    pQuery->limit.offset -= pQuery->rec.rows;
    pQuery->rec.rows = 0;
3406

3407
    resetCtxOutputBuf(pRuntimeEnv);
3408

H
Haojun Liao 已提交
3409
    // clear the buffer full flag if exists
3410
    CLEAR_QUERY_STATUS(pQuery, QUERY_RESBUF_FULL);
3411
  } else {
3412
    int64_t numOfSkip = pQuery->limit.offset;
3413
    pQuery->rec.rows -= numOfSkip;
3414
    pQuery->limit.offset = 0;
3415

3416
    qDebug("QInfo:%p skip row:%"PRId64", new offset:%d, numOfRows remain:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), numOfSkip,
3417
           0, pQuery->rec.rows);
3418

3419
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3420
      int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3421
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
H
Haojun Liao 已提交
3422

3423
      memmove(pQuery->sdata[i]->data, (char*)pQuery->sdata[i]->data + bytes * numOfSkip, (size_t)(pQuery->rec.rows * bytes));
H
Haojun Liao 已提交
3424
      pRuntimeEnv->pCtx[i].aOutputBuf = ((char*) pQuery->sdata[i]->data) + pQuery->rec.rows * bytes;
3425

3426
      if (functionId == TSDB_FUNC_DIFF || functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
3427
        pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
3428 3429
      }
    }
3430

S
TD-1057  
Shengliang Guan 已提交
3431
    updateNumOfResult(pRuntimeEnv, (int32_t)pQuery->rec.rows);
3432 3433 3434 3435 3436 3437 3438 3439
  }
}

void setQueryStatus(SQuery *pQuery, int8_t status) {
  if (status == QUERY_NOT_COMPLETED) {
    pQuery->status = status;
  } else {
    // QUERY_NOT_COMPLETED is not compatible with any other status, so clear its position first
3440
    CLEAR_QUERY_STATUS(pQuery, QUERY_NOT_COMPLETED);
3441 3442 3443 3444 3445 3446
    pQuery->status |= status;
  }
}

bool needScanDataBlocksAgain(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3447

H
hjxilinx 已提交
3448
  bool toContinue = false;
H
Haojun Liao 已提交
3449
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3450 3451
    // for each group result, call the finalize function for each column
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
3452

3453 3454
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *pResult = getWindowResult(pWindowResInfo, i);
3455
      if (!pResult->closed) {
3456 3457
        continue;
      }
3458

3459
      setWindowResOutputBuf(pRuntimeEnv, pResult);
3460

3461
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3462
        int16_t functId = pQuery->pSelectExpr[j].base.functionId;
3463 3464 3465
        if (functId == TSDB_FUNC_TS) {
          continue;
        }
3466

3467 3468
        aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
        SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3469

3470 3471 3472 3473
        toContinue |= (!pResInfo->complete);
      }
    }
  } else {
3474
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3475
      int16_t functId = pQuery->pSelectExpr[j].base.functionId;
3476 3477 3478
      if (functId == TSDB_FUNC_TS) {
        continue;
      }
3479

3480 3481
      aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
      SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3482

3483 3484 3485
      toContinue |= (!pResInfo->complete);
    }
  }
3486

3487 3488 3489
  return toContinue;
}

H
Haojun Liao 已提交
3490
static SQueryStatusInfo getQueryStatusInfo(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
3491
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3492
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3493

H
Haojun Liao 已提交
3494 3495
  assert((start <= pTableQueryInfo->lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (start >= pTableQueryInfo->lastKey && !QUERY_IS_ASC_QUERY(pQuery)));
3496

3497
  SQueryStatusInfo info = {
H
hjxilinx 已提交
3498
      .status      = pQuery->status,
3499
      .windowIndex = pRuntimeEnv->windowResInfo.curIndex,
H
Haojun Liao 已提交
3500
      .lastKey     = start,
3501
  };
3502

S
TD-1057  
Shengliang Guan 已提交
3503 3504 3505 3506 3507
  TIME_WINDOW_COPY(info.w, pQuery->window);
  TIME_WINDOW_COPY(info.curWindow, pTableQueryInfo->win);

  info.curWindow.skey = start;

3508 3509 3510
  return info;
}

3511 3512 3513 3514
static void setEnvBeforeReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQInfo *pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery *pQuery = pRuntimeEnv->pQuery;

3515 3516 3517
  pStatus->cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);  // save the cursor
  if (pRuntimeEnv->pTSBuf) {
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
3518 3519
    bool ret = tsBufNextPos(pRuntimeEnv->pTSBuf);
    assert(ret);
3520
  }
3521

3522
  // reverse order time range
3523 3524 3525
  pQuery->window = pStatus->curWindow;
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);

3526
  SWITCH_ORDER(pQuery->order.order);
3527 3528 3529 3530 3531 3532 3533

  if (QUERY_IS_ASC_QUERY(pQuery)) {
    assert(pQuery->window.skey <= pQuery->window.ekey);
  } else {
    assert(pQuery->window.skey >= pQuery->window.ekey);
  }

3534
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
3535

3536
  STsdbQueryCond cond = {
H
hjxilinx 已提交
3537
      .order   = pQuery->order.order,
3538
      .colList = pQuery->colList,
3539 3540
      .numOfCols = pQuery->numOfCols,
  };
3541

S
TD-1057  
Shengliang Guan 已提交
3542 3543
  TIME_WINDOW_COPY(cond.twindow, pQuery->window);

H
Haojun Liao 已提交
3544 3545 3546 3547 3548
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
  disableFuncInReverseScan(pQInfo);
  setupQueryRangeForReverseScan(pQInfo);

3549 3550 3551 3552
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
3553

3554
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
B
Bomin Zhang 已提交
3555 3556 3557
  if (pRuntimeEnv->pSecQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
3558 3559
}

3560 3561
static void clearEnvAfterReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3562
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3563

3564 3565
  SWITCH_ORDER(pQuery->order.order);
  switchCtxOrder(pRuntimeEnv);
3566

3567 3568 3569 3570
  tsBufSetCursor(pRuntimeEnv->pTSBuf, &pStatus->cur);
  if (pRuntimeEnv->pTSBuf) {
    pRuntimeEnv->pTSBuf->cur.order = pQuery->order.order;
  }
3571

3572
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
3573

3574
  // update the pQuery->window.skey and pQuery->window.ekey to limit the scan scope of sliding query during reverse scan
H
hjxilinx 已提交
3575
  pTableQueryInfo->lastKey = pStatus->lastKey;
3576
  pQuery->status = pStatus->status;
3577

H
hjxilinx 已提交
3578
  pTableQueryInfo->win = pStatus->w;
3579
  pQuery->window = pTableQueryInfo->win;
3580 3581
}

H
Haojun Liao 已提交
3582 3583 3584 3585 3586 3587 3588
static void restoreTimeWindow(STableGroupInfo* pTableGroupInfo, STsdbQueryCond* pCond) {
  assert(pTableGroupInfo->numOfTables == 1);
  SArray* pTableKeyGroup = taosArrayGetP(pTableGroupInfo->pGroupList, 0);
  STableKeyInfo* pKeyInfo = taosArrayGet(pTableKeyGroup, 0);
  pKeyInfo->lastKey = pCond->twindow.skey;
}

3589
void scanOneTableDataBlocks(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
H
hjxilinx 已提交
3590
  SQInfo *pQInfo = (SQInfo *) GET_QINFO_ADDR(pRuntimeEnv);
3591
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3592
  STableQueryInfo *pTableQueryInfo = pQuery->current;
3593

3594
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
3595

3596
  // store the start query position
H
Haojun Liao 已提交
3597
  SQueryStatusInfo qstatus = getQueryStatusInfo(pRuntimeEnv, start);
3598

3599 3600
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
3601

3602 3603
  while (1) {
    doScanAllDataBlocks(pRuntimeEnv);
3604

3605 3606
    if (pRuntimeEnv->scanFlag == MASTER_SCAN) {
      qstatus.status = pQuery->status;
3607 3608 3609 3610 3611 3612

      // do nothing if no data blocks are found qualified during scan
      if (qstatus.lastKey != pTableQueryInfo->lastKey) {
        qstatus.curWindow.ekey = pTableQueryInfo->lastKey - step;
      }

3613
      qstatus.lastKey = pTableQueryInfo->lastKey;
3614
    }
3615

3616
    if (!needScanDataBlocksAgain(pRuntimeEnv)) {
3617
      // restore the status code and jump out of loop
3618
      if (pRuntimeEnv->scanFlag == REPEAT_SCAN) {
3619
        pQuery->status = qstatus.status;
3620
      }
3621

3622 3623
      break;
    }
3624

3625
    STsdbQueryCond cond = {
H
hjxilinx 已提交
3626
        .order   = pQuery->order.order,
3627
        .colList = pQuery->colList,
3628
        .numOfCols = pQuery->numOfCols,
3629
    };
3630

S
TD-1057  
Shengliang Guan 已提交
3631 3632
    TIME_WINDOW_COPY(cond.twindow, qstatus.curWindow);

3633 3634
    if (pRuntimeEnv->pSecQueryHandle != NULL) {
      tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
3635
    }
3636

H
Haojun Liao 已提交
3637
    restoreTimeWindow(&pQInfo->tableGroupInfo, &cond);
3638
    pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
B
Bomin Zhang 已提交
3639 3640 3641
    if (pRuntimeEnv->pSecQueryHandle == NULL) {
      longjmp(pRuntimeEnv->env, terrno);
    }
3642

3643
    pRuntimeEnv->windowResInfo.curIndex = qstatus.windowIndex;
3644 3645
    setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
    pRuntimeEnv->scanFlag = REPEAT_SCAN;
3646

3647
    qDebug("QInfo:%p start to repeat scan data blocks due to query func required, qrange:%"PRId64"-%"PRId64, pQInfo,
3648
        cond.twindow.skey, cond.twindow.ekey);
3649

3650
    // check if query is killed or not
H
Haojun Liao 已提交
3651
    if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
3652 3653
      finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
3654 3655
    }
  }
3656

H
hjxilinx 已提交
3657
  if (!needReverseScan(pQuery)) {
3658 3659
    return;
  }
3660

3661
  setEnvBeforeReverseScan(pRuntimeEnv, &qstatus);
3662

3663
  // reverse scan from current position
3664
  qDebug("QInfo:%p start to reverse scan", pQInfo);
3665
  doScanAllDataBlocks(pRuntimeEnv);
3666 3667

  clearEnvAfterReverseScan(pRuntimeEnv, &qstatus);
3668 3669
}

H
hjxilinx 已提交
3670
void finalizeQueryResult(SQueryRuntimeEnv *pRuntimeEnv) {
3671
  SQuery *pQuery = pRuntimeEnv->pQuery;
3672

H
Haojun Liao 已提交
3673
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3674 3675
    // for each group result, call the finalize function for each column
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
3676
    if (pRuntimeEnv->groupbyNormalCol) {
3677 3678
      closeAllTimeWindow(pWindowResInfo);
    }
3679

3680 3681 3682 3683 3684
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *buf = &pWindowResInfo->pResult[i];
      if (!isWindowResClosed(pWindowResInfo, i)) {
        continue;
      }
3685

3686
      setWindowResOutputBuf(pRuntimeEnv, buf);
3687

3688
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3689
        aAggs[pQuery->pSelectExpr[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3690
      }
3691

3692 3693 3694 3695
      /*
       * set the number of output results for group by normal columns, the number of output rows usually is 1 except
       * the top and bottom query
       */
S
TD-1057  
Shengliang Guan 已提交
3696
      buf->numOfRows = (uint16_t)getNumOfResult(pRuntimeEnv);
3697
    }
3698

3699
  } else {
3700
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3701
      aAggs[pQuery->pSelectExpr[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3702 3703 3704 3705 3706
    }
  }
}

static bool hasMainOutput(SQuery *pQuery) {
3707
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3708
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3709

3710 3711 3712 3713
    if (functionId != TSDB_FUNC_TS && functionId != TSDB_FUNC_TAG && functionId != TSDB_FUNC_TAGPRJ) {
      return true;
    }
  }
3714

3715 3716 3717
  return false;
}

H
Haojun Liao 已提交
3718
static STableQueryInfo *createTableQueryInfo(SQueryRuntimeEnv *pRuntimeEnv, void* pTable, STimeWindow win, void* buf) {
H
Haojun Liao 已提交
3719
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
3720

H
Haojun Liao 已提交
3721
  STableQueryInfo *pTableQueryInfo = buf;
3722

H
hjxilinx 已提交
3723 3724
  pTableQueryInfo->win = win;
  pTableQueryInfo->lastKey = win.skey;
3725

3726
  pTableQueryInfo->pTable = pTable;
3727
  pTableQueryInfo->cur.vgroupIndex = -1;
3728

H
Haojun Liao 已提交
3729 3730
  // set more initial size of interval/groupby query
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyNormalCol) {
H
Haojun Liao 已提交
3731
    int32_t initialSize = 16;
H
Haojun Liao 已提交
3732
    int32_t initialThreshold = 100;
B
Bomin Zhang 已提交
3733 3734 3735 3736
    int32_t code = initWindowResInfo(&pTableQueryInfo->windowResInfo, pRuntimeEnv, initialSize, initialThreshold, TSDB_DATA_TYPE_INT);
    if (code != TSDB_CODE_SUCCESS) {
      return NULL;
    }
H
Haojun Liao 已提交
3737
  } else { // in other aggregate query, do not initialize the windowResInfo
H
Haojun Liao 已提交
3738 3739
  }

3740 3741 3742
  return pTableQueryInfo;
}

H
Haojun Liao 已提交
3743
void destroyTableQueryInfoImpl(STableQueryInfo *pTableQueryInfo) {
3744 3745 3746
  if (pTableQueryInfo == NULL) {
    return;
  }
3747

H
Haojun Liao 已提交
3748
  cleanupTimeWindowInfo(&pTableQueryInfo->windowResInfo);
3749 3750 3751 3752 3753
}

/**
 * set output buffer for different group
 * @param pRuntimeEnv
3754
 * @param pDataBlockInfo
3755
 */
H
Haojun Liao 已提交
3756
void setExecutionContext(SQInfo *pQInfo, int32_t groupIndex, TSKEY nextKey) {
3757
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
Haojun Liao 已提交
3758 3759 3760
  STableQueryInfo  *pTableQueryInfo = pRuntimeEnv->pQuery->current;
  SWindowResInfo   *pWindowResInfo = &pRuntimeEnv->windowResInfo;

H
Haojun Liao 已提交
3761 3762
  // lastKey needs to be updated
  pTableQueryInfo->lastKey = nextKey;
H
Haojun Liao 已提交
3763 3764 3765 3766

  if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
    setAdditionalInfo(pQInfo, pTableQueryInfo->pTable, pTableQueryInfo);
  }
H
Haojun Liao 已提交
3767

H
Haojun Liao 已提交
3768 3769 3770
  if (pRuntimeEnv->prevGroupId != INT32_MIN && pRuntimeEnv->prevGroupId == groupIndex) {
    return;
  }
3771

3772 3773
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&groupIndex,
      sizeof(groupIndex), true);
3774 3775 3776
  if (pWindowRes == NULL) {
    return;
  }
3777

3778 3779 3780 3781 3782
  /*
   * not assign result buffer yet, add new result buffer
   * all group belong to one result set, and each group result has different group id so set the id to be one
   */
  if (pWindowRes->pos.pageId == -1) {
3783
    if (addNewWindowResultBuf(pWindowRes, pRuntimeEnv->pResultBuf, groupIndex, pRuntimeEnv->numOfRowsPerPage) !=
3784 3785 3786 3787
        TSDB_CODE_SUCCESS) {
      return;
    }
  }
3788

H
Haojun Liao 已提交
3789 3790
  // record the current active group id
  pRuntimeEnv->prevGroupId = groupIndex;
3791 3792 3793 3794
  setWindowResOutputBuf(pRuntimeEnv, pWindowRes);
  initCtxOutputBuf(pRuntimeEnv);
}

H
Haojun Liao 已提交
3795
void setWindowResOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult) {
3796
  SQuery *pQuery = pRuntimeEnv->pQuery;
3797

3798
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
H
Haojun Liao 已提交
3799 3800
  tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pos.pageId);

3801
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3802
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
H
Haojun Liao 已提交
3803
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult, page);
3804

3805
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3806 3807 3808
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3809

3810 3811 3812 3813 3814
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT
     */
    pCtx->resultInfo = &pResult->resultInfo[i];
3815

3816 3817 3818 3819 3820 3821
    // set super table query flag
    SResultInfo *pResInfo = GET_RES_INFO(pCtx);
    pResInfo->superTableQ = pRuntimeEnv->stableQuery;
  }
}

H
Haojun Liao 已提交
3822 3823
void setWindowResOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3824

H
Haojun Liao 已提交
3825
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
H
Haojun Liao 已提交
3826 3827
  tFilePage* bufPage = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pos.pageId);

H
Haojun Liao 已提交
3828 3829 3830 3831
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];

    pCtx->resultInfo = &pResult->resultInfo[i];
H
Haojun Liao 已提交
3832
    if (pCtx->resultInfo->initialized && pCtx->resultInfo->complete) {
H
Haojun Liao 已提交
3833 3834
      continue;
    }
3835

H
Haojun Liao 已提交
3836
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult, bufPage);
H
Haojun Liao 已提交
3837
    pCtx->currentStage = 0;
3838

H
Haojun Liao 已提交
3839 3840 3841 3842
    int32_t functionId = pCtx->functionId;
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3843

H
Haojun Liao 已提交
3844 3845 3846 3847 3848
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT
     */
    pCtx->resultInfo->superTableQ = pRuntimeEnv->stableQuery;     // set super table query flag
3849

H
Haojun Liao 已提交
3850 3851 3852 3853 3854 3855
    if (!pCtx->resultInfo->initialized) {
      aAggs[functionId].init(pCtx);
    }
  }
}

3856
int32_t setAdditionalInfo(SQInfo *pQInfo, void* pTable, STableQueryInfo *pTableQueryInfo) {
3857
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
3858

3859
  setTagVal(pRuntimeEnv, pTable, pQInfo->tsdb);
3860

3861 3862
  // both the master and supplement scan needs to set the correct ts comp start position
  if (pRuntimeEnv->pTSBuf != NULL) {
3863
    if (pTableQueryInfo->cur.vgroupIndex == -1) {
3864 3865
      tVariantAssign(&pTableQueryInfo->tag, &pRuntimeEnv->pCtx[0].tag);
      tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, 0, &pTableQueryInfo->tag);
3866

3867 3868 3869 3870 3871 3872
      // keep the cursor info of current meter
      pTableQueryInfo->cur = pRuntimeEnv->pTSBuf->cur;
    } else {
      tsBufSetCursor(pRuntimeEnv->pTSBuf, &pTableQueryInfo->cur);
    }
  }
3873

3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885
  return 0;
}

/*
 * There are two cases to handle:
 *
 * 1. Query range is not set yet (queryRangeSet = 0). we need to set the query range info, including pQuery->lastKey,
 *    pQuery->window.skey, and pQuery->eKey.
 * 2. Query range is set and query is in progress. There may be another result with the same query ranges to be
 *    merged during merge stage. In this case, we need the pTableQueryInfo->lastResRows to decide if there
 *    is a previous result generated or not.
 */
H
hjxilinx 已提交
3886
void setIntervalQueryRange(SQInfo *pQInfo, TSKEY key) {
3887 3888
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3889
  STableQueryInfo *pTableQueryInfo = pQuery->current;
3890

3891 3892 3893
  if (pTableQueryInfo->queryRangeSet) {
    pTableQueryInfo->lastKey = key;
  } else {
3894
    pTableQueryInfo->win.skey = key;
3895
    STimeWindow win = {.skey = key, .ekey = pQuery->window.ekey};
3896

3897 3898 3899 3900 3901
    // for too small query range, no data in this interval.
    if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey < pQuery->window.skey)) ||
        (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey < pQuery->window.ekey))) {
      return;
    }
3902

3903 3904 3905 3906 3907 3908
    /**
     * In handling the both ascending and descending order super table query, we need to find the first qualified
     * timestamp of this table, and then set the first qualified start timestamp.
     * In ascending query, key is the first qualified timestamp. However, in the descending order query, additional
     * operations involve.
     */
H
Haojun Liao 已提交
3909
    STimeWindow     w = TSWINDOW_INITIALIZER;
3910
    SWindowResInfo *pWindowResInfo = &pTableQueryInfo->windowResInfo;
3911

H
Haojun Liao 已提交
3912 3913
    TSKEY sk = MIN(win.skey, win.ekey);
    TSKEY ek = MAX(win.skey, win.ekey);
H
Haojun Liao 已提交
3914
    getAlignQueryTimeWindow(pQuery, win.skey, sk, ek, &w);
3915
    pWindowResInfo->startTime = pTableQueryInfo->win.skey;  // windowSKey may be 0 in case of 1970 timestamp
3916

3917 3918
    if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
3919
        assert(win.ekey == pQuery->window.ekey);
3920
      }
3921

3922
      pWindowResInfo->prevSKey = w.skey;
3923
    }
3924

3925
    pTableQueryInfo->queryRangeSet = 1;
3926
    pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
3927 3928 3929 3930
  }
}

bool requireTimestamp(SQuery *pQuery) {
3931
  for (int32_t i = 0; i < pQuery->numOfOutput; i++) {
3932
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945
    if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_NEED_TS) != 0) {
      return true;
    }
  }
  return false;
}

bool needPrimaryTimestampCol(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo) {
  /*
   * 1. if skey or ekey locates in this block, we need to load the timestamp column to decide the precise position
   * 2. if there are top/bottom, first_dst/last_dst functions, we need to load timestamp column in any cases;
   */
  STimeWindow *w = &pDataBlockInfo->window;
H
hjxilinx 已提交
3946
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3947

H
hjxilinx 已提交
3948
  bool loadPrimaryTS = (pTableQueryInfo->lastKey >= w->skey && pTableQueryInfo->lastKey <= w->ekey) ||
3949 3950
                       (pQuery->window.ekey >= w->skey && pQuery->window.ekey <= w->ekey) || requireTimestamp(pQuery);

3951 3952 3953
  return loadPrimaryTS;
}

3954
static int32_t doCopyToSData(SQInfo *pQInfo, SWindowResInfo *pResultInfo, int32_t orderType) {
3955 3956
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
3957

3958 3959 3960
  int32_t numOfResult = 0;
  int32_t startIdx = 0;
  int32_t step = -1;
3961

3962
  qDebug("QInfo:%p start to copy data from windowResInfo to query buf", pQInfo);
3963 3964
  int32_t totalSet = numOfClosedTimeWindow(pResultInfo);
  SWindowResult* result = pResultInfo->pResult;
3965

3966
  if (orderType == TSDB_ORDER_ASC) {
3967
    startIdx = pQInfo->groupIndex;
3968 3969
    step = 1;
  } else {  // desc order copy all data
3970
    startIdx = totalSet - pQInfo->groupIndex - 1;
3971 3972
    step = -1;
  }
3973

H
Haojun Liao 已提交
3974 3975
  SGroupResInfo* pGroupResInfo = &pQInfo->groupResInfo;

3976
  for (int32_t i = startIdx; (i < totalSet) && (i >= 0); i += step) {
3977
    if (result[i].numOfRows == 0) {
3978
      pQInfo->groupIndex += 1;
H
Haojun Liao 已提交
3979
      pGroupResInfo->pos.rowId = 0;
3980 3981
      continue;
    }
3982

H
Haojun Liao 已提交
3983 3984
    int32_t numOfRowsToCopy = result[i].numOfRows - pGroupResInfo->pos.rowId;
    int32_t oldOffset = pGroupResInfo->pos.rowId;
3985

3986
    /*
H
Haojun Liao 已提交
3987 3988
     * current output space is not enough to accommodate all data of this page, only partial results
     * will be copied to SQuery object's result buffer
3989
     */
3990
    if (numOfRowsToCopy > pQuery->rec.capacity - numOfResult) {
H
Haojun Liao 已提交
3991 3992
      numOfRowsToCopy = (int32_t) pQuery->rec.capacity - numOfResult;
      pGroupResInfo->pos.rowId += numOfRowsToCopy;
3993
    } else {
H
Haojun Liao 已提交
3994
      pGroupResInfo->pos.rowId = 0;
3995
      pQInfo->groupIndex += 1;
3996
    }
3997

H
Haojun Liao 已提交
3998 3999
    tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, result[i].pos.pageId);

4000
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
4001
      int32_t size = pRuntimeEnv->pCtx[j].outputBytes;
4002

4003
      char *out = pQuery->sdata[j]->data + numOfResult * size;
H
Haojun Liao 已提交
4004
      char *in = getPosInResultPage(pRuntimeEnv, j, &result[i], page);
4005 4006
      memcpy(out, in + oldOffset * size, size * numOfRowsToCopy);
    }
4007

4008
    numOfResult += numOfRowsToCopy;
4009 4010 4011
    if (numOfResult == pQuery->rec.capacity) {
      break;
    }
4012
  }
4013

4014
  qDebug("QInfo:%p copy data to query buf completed", pQInfo);
4015 4016

#ifdef _DEBUG_VIEW
4017
  displayInterResult(pQuery->sdata, pRuntimeEnv, numOfResult);
4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030
#endif
  return numOfResult;
}

/**
 * copyFromWindowResToSData support copy data in ascending/descending order
 * For interval query of both super table and table, copy the data in ascending order, since the output results are
 * ordered in SWindowResutl already. While handling the group by query for both table and super table,
 * all group result are completed already.
 *
 * @param pQInfo
 * @param result
 */
4031
void copyFromWindowResToSData(SQInfo *pQInfo, SWindowResInfo *pResultInfo) {
4032
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
4033

4034
  int32_t orderType = (pQuery->pGroupbyExpr != NULL) ? pQuery->pGroupbyExpr->orderType : TSDB_ORDER_ASC;
4035
  int32_t numOfResult = doCopyToSData(pQInfo, pResultInfo, orderType);
4036

4037
  pQuery->rec.rows += numOfResult;
4038

4039
  assert(pQuery->rec.rows <= pQuery->rec.capacity);
4040 4041
}

H
Haojun Liao 已提交
4042
static void updateWindowResNumOfRes(SQueryRuntimeEnv *pRuntimeEnv) {
4043
  SQuery *pQuery = pRuntimeEnv->pQuery;
4044

4045
  // update the number of result for each, only update the number of rows for the corresponding window result.
H
Haojun Liao 已提交
4046 4047 4048
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
    return;
  }
4049

H
Haojun Liao 已提交
4050 4051
  for (int32_t i = 0; i < pRuntimeEnv->windowResInfo.size; ++i) {
    SWindowResult *pResult = &pRuntimeEnv->windowResInfo.pResult[i];
4052

H
Haojun Liao 已提交
4053 4054 4055 4056
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
      int32_t functionId = pRuntimeEnv->pCtx[j].functionId;
      if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
        continue;
4057
      }
H
Haojun Liao 已提交
4058

4059
      pResult->numOfRows = (uint16_t)(MAX(pResult->numOfRows, pResult->resultInfo[j].numOfRes));
4060 4061 4062 4063
    }
  }
}

H
Haojun Liao 已提交
4064
static void stableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo, SDataStatis *pStatis,
4065
    SArray *pDataBlock, __block_search_fn_t searchFn) {
4066
  SQuery *         pQuery = pRuntimeEnv->pQuery;
4067
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4068

4069
  SWindowResInfo * pWindowResInfo = &pTableQueryInfo->windowResInfo;
H
hjxilinx 已提交
4070
  pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : pDataBlockInfo->rows - 1;
4071

H
Haojun Liao 已提交
4072
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->groupbyNormalCol) {
4073
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
4074
  } else {
4075
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
4076 4077 4078
  }
}

H
Haojun Liao 已提交
4079
bool queryHasRemainResForTableQuery(SQueryRuntimeEnv* pRuntimeEnv) {
4080 4081
  SQuery *pQuery = pRuntimeEnv->pQuery;
  SFillInfo *pFillInfo = pRuntimeEnv->pFillInfo;
4082

H
Haojun Liao 已提交
4083
  if (pQuery->limit.limit > 0 && pQuery->rec.total >= pQuery->limit.limit) {
4084 4085
    return false;
  }
4086

4087
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
H
Haojun Liao 已提交
4088
    // There are results not returned to client yet, so filling applied to the remain result is required firstly.
H
Haojun Liao 已提交
4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103
    int32_t remain = taosNumOfRemainRows(pFillInfo);
    if (remain > 0) {
      return true;
    }

    /*
     * While the code reaches here, there are no results remains now.
     * If query is not completed yet, the gaps between two results blocks need to be handled after next data block
     * is retrieved from TSDB.
     *
     * NOTE: If the result set is not the first block, the gap in front of the result set will be filled. If the result
     * set is the FIRST result block, the gap between the start time of query time window and the timestamp of the
     * first result row in the actual result set will fill nothing.
     */
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
S
TD-1057  
Shengliang Guan 已提交
4104
      int32_t numOfTotal = (int32_t)getFilledNumOfRes(pFillInfo, pQuery->window.ekey, (int32_t)pQuery->rec.capacity);
H
Haojun Liao 已提交
4105 4106 4107 4108 4109 4110
      return numOfTotal > 0;
    }

  } else {
    // there are results waiting for returned to client.
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED) &&
H
Haojun Liao 已提交
4111
        (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) &&
H
Haojun Liao 已提交
4112 4113 4114
        (pRuntimeEnv->windowResInfo.size > 0)) {
      return true;
    }
4115
  }
4116 4117

  return false;
4118 4119 4120
}

static void doCopyQueryResultToMsg(SQInfo *pQInfo, int32_t numOfRows, char *data) {
4121
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
4122

4123 4124
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
    int32_t bytes = pQuery->pSelectExpr[col].bytes;
4125

4126 4127 4128
    memmove(data, pQuery->sdata[col]->data, bytes * numOfRows);
    data += bytes * numOfRows;
  }
4129

weixin_48148422's avatar
weixin_48148422 已提交
4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141
  int32_t numOfTables = (int32_t)taosArrayGetSize(pQInfo->arrTableIdInfo);
  *(int32_t*)data = htonl(numOfTables);
  data += sizeof(int32_t);
  for(int32_t i = 0; i < numOfTables; i++) {
    STableIdInfo* pSrc = taosArrayGet(pQInfo->arrTableIdInfo, i);
    STableIdInfo* pDst = (STableIdInfo*)data;
    pDst->uid = htobe64(pSrc->uid);
    pDst->tid = htonl(pSrc->tid);
    pDst->key = htobe64(pSrc->key);
    data += sizeof(STableIdInfo);
  }

H
Haojun Liao 已提交
4142
  // Check if query is completed or not for stable query or normal table query respectively.
H
hjxilinx 已提交
4143
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4144
    if (pQInfo->runtimeEnv.stableQuery) {
H
Haojun Liao 已提交
4145
      if (IS_STASBLE_QUERY_OVER(pQInfo)) {
4146 4147 4148
        setQueryStatus(pQuery, QUERY_OVER);
      }
    } else {
H
Haojun Liao 已提交
4149
      if (!queryHasRemainResForTableQuery(&pQInfo->runtimeEnv)) {
4150 4151
        setQueryStatus(pQuery, QUERY_OVER);
      }
4152
    }
H
hjxilinx 已提交
4153
  }
4154 4155
}

H
Haojun Liao 已提交
4156
int32_t doFillGapsInResults(SQueryRuntimeEnv* pRuntimeEnv, tFilePage **pDst, int32_t *numOfFilled) {
4157
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
4158
  SQuery *pQuery = pRuntimeEnv->pQuery;
4159
  SFillInfo* pFillInfo = pRuntimeEnv->pFillInfo;
4160

4161
  while (1) {
S
TD-1057  
Shengliang Guan 已提交
4162
    int32_t ret = (int32_t)taosGenerateDataBlock(pFillInfo, (tFilePage**)pQuery->sdata, (int32_t)pQuery->rec.capacity);
4163

4164
    // todo apply limit output function
4165 4166
    /* reached the start position of according to offset value, return immediately */
    if (pQuery->limit.offset == 0) {
4167
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows", pQInfo, pFillInfo->numOfRows, ret);
4168 4169
      return ret;
    }
4170

4171
    if (pQuery->limit.offset < ret) {
4172
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%" PRId64 ". Discard due to offset, remain:%" PRId64 ", new offset:%d",
4173
             pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, ret - pQuery->limit.offset, 0);
4174

S
TD-1057  
Shengliang Guan 已提交
4175
      ret -= (int32_t)pQuery->limit.offset;
4176 4177
      // todo !!!!there exactly number of interpo is not valid.
      // todo refactor move to the beginning of buffer
4178 4179 4180
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        memmove(pDst[i]->data, pDst[i]->data + pQuery->pSelectExpr[i].bytes * pQuery->limit.offset,
                ret * pQuery->pSelectExpr[i].bytes);
4181
      }
4182

4183 4184 4185
      pQuery->limit.offset = 0;
      return ret;
    } else {
4186
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%" PRId64 ". Discard due to offset, "
B
Bomin Zhang 已提交
4187
             "remain:%d, new offset:%" PRId64, pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, 0,
4188
          pQuery->limit.offset - ret);
4189

4190
      pQuery->limit.offset -= ret;
4191
      pQuery->rec.rows = 0;
4192 4193
      ret = 0;
    }
4194

H
Haojun Liao 已提交
4195
    if (!queryHasRemainResForTableQuery(pRuntimeEnv)) {
4196 4197 4198 4199 4200
      return ret;
    }
  }
}

4201
static void queryCostStatis(SQInfo *pQInfo) {
4202
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4203
  SQueryCostInfo *pSummary = &pRuntimeEnv->summary;
H
Haojun Liao 已提交
4204

H
Haojun Liao 已提交
4205 4206 4207
  // add the merge time
  pSummary->elapsedTime += pSummary->firstStageMergeTime;

H
Haojun Liao 已提交
4208 4209 4210
  qDebug("QInfo:%p :cost summary: elapsed time:%"PRId64" us, first merge:%"PRId64" us, total blocks:%d, "
         "load block statis:%d, load data block:%d, total rows:%"PRId64 ", check rows:%"PRId64,
         pQInfo, pSummary->elapsedTime, pSummary->firstStageMergeTime, pSummary->totalBlocks, pSummary->loadBlockStatis,
H
Haojun Liao 已提交
4211
         pSummary->loadBlocks, pSummary->totalRows, pSummary->totalCheckedRows);
4212

H
Haojun Liao 已提交
4213
  qDebug("QInfo:%p :cost summary: internal size:%"PRId64"B, numOfWin:%"PRId64, pQInfo, pSummary->internalSupSize,
4214
      pSummary->numOfTimeWindows);
4215 4216
}

4217 4218
static void updateOffsetVal(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pBlockInfo) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
4219
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4220

4221
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
4222

4223
  if (pQuery->limit.offset == pBlockInfo->rows) {  // current block will ignore completed
H
hjxilinx 已提交
4224
    pTableQueryInfo->lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->window.ekey + step : pBlockInfo->window.skey + step;
4225 4226 4227
    pQuery->limit.offset = 0;
    return;
  }
4228

4229
  if (QUERY_IS_ASC_QUERY(pQuery)) {
S
TD-1057  
Shengliang Guan 已提交
4230
    pQuery->pos = (int32_t)pQuery->limit.offset;
4231
  } else {
S
TD-1057  
Shengliang Guan 已提交
4232
    pQuery->pos = pBlockInfo->rows - (int32_t)pQuery->limit.offset - 1;
4233
  }
4234

4235
  assert(pQuery->pos >= 0 && pQuery->pos <= pBlockInfo->rows - 1);
4236

4237
  SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
4238
  SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);
4239

4240
  // update the pQuery->limit.offset value, and pQuery->pos value
H
Haojun Liao 已提交
4241
  TSKEY *keys = (TSKEY *) pColInfoData->pData;
4242 4243

  // update the offset value
H
hjxilinx 已提交
4244
  pTableQueryInfo->lastKey = keys[pQuery->pos];
4245
  pQuery->limit.offset = 0;
4246

H
hjxilinx 已提交
4247
  int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, pBlockInfo, NULL, binarySearchForKey, pDataBlock);
4248

4249
  qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
4250
         pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows, numOfRes, pQuery->current->lastKey);
4251
}
4252

4253 4254 4255 4256 4257
void skipBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;

  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0) {
    return;
4258
  }
4259

4260 4261 4262
  pQuery->pos = 0;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

H
hjxilinx 已提交
4263
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4264
  TsdbQueryHandleT pQueryHandle = pRuntimeEnv->pQueryHandle;
4265

H
Haojun Liao 已提交
4266
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4267
  while (tsdbNextDataBlock(pQueryHandle)) {
H
Haojun Liao 已提交
4268
    if (IS_QUERY_KILLED(GET_QINFO_ADDR(pRuntimeEnv))) {
H
Haojun Liao 已提交
4269 4270
      finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4271
    }
4272

H
Haojun Liao 已提交
4273
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
4274

4275 4276
    if (pQuery->limit.offset > blockInfo.rows) {
      pQuery->limit.offset -= blockInfo.rows;
H
hjxilinx 已提交
4277 4278
      pTableQueryInfo->lastKey = (QUERY_IS_ASC_QUERY(pQuery)) ? blockInfo.window.ekey : blockInfo.window.skey;
      pTableQueryInfo->lastKey += step;
4279

4280
      qDebug("QInfo:%p skip rows:%d, offset:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv), blockInfo.rows,
4281 4282
             pQuery->limit.offset);
    } else {  // find the appropriated start position in current block
4283 4284 4285
      updateOffsetVal(pRuntimeEnv, &blockInfo);
      break;
    }
4286
  }
H
Haojun Liao 已提交
4287 4288 4289 4290

  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }
4291
}
4292

H
Haojun Liao 已提交
4293
static bool skipTimeInterval(SQueryRuntimeEnv *pRuntimeEnv, TSKEY* start) {
4294
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
4295
  *start = pQuery->current->lastKey;
4296

4297
  // if queried with value filter, do NOT forward query start position
4298
  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->pFillInfo != NULL) {
4299
    return true;
4300
  }
4301

4302
  /*
4303 4304
   * 1. for interval without interpolation query we forward pQuery->interval.interval at a time for
   *    pQuery->limit.offset times. Since hole exists, pQuery->interval.interval*pQuery->limit.offset value is
4305 4306
   *    not valid. otherwise, we only forward pQuery->limit.offset number of points
   */
4307
  assert(pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL);
4308

H
Haojun Liao 已提交
4309
  STimeWindow w = TSWINDOW_INITIALIZER;
4310

4311
  SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
hjxilinx 已提交
4312
  STableQueryInfo *pTableQueryInfo = pQuery->current;
4313

H
Haojun Liao 已提交
4314
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4315
  while (tsdbNextDataBlock(pRuntimeEnv->pQueryHandle)) {
H
Haojun Liao 已提交
4316
    tsdbRetrieveDataBlockInfo(pRuntimeEnv->pQueryHandle, &blockInfo);
4317

H
Haojun Liao 已提交
4318 4319
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
H
Haojun Liao 已提交
4320
        getAlignQueryTimeWindow(pQuery, blockInfo.window.skey, blockInfo.window.skey, pQuery->window.ekey, &w);
H
Haojun Liao 已提交
4321 4322 4323
        pWindowResInfo->startTime = w.skey;
        pWindowResInfo->prevSKey = w.skey;
      }
4324
    } else {
H
Haojun Liao 已提交
4325
      getAlignQueryTimeWindow(pQuery, blockInfo.window.ekey, pQuery->window.ekey, blockInfo.window.ekey, &w);
4326

4327 4328 4329
      pWindowResInfo->startTime = pQuery->window.skey;
      pWindowResInfo->prevSKey = w.skey;
    }
4330

4331 4332
    // the first time window
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, pWindowResInfo->prevSKey, pQuery);
4333

4334 4335 4336 4337 4338 4339
    while (pQuery->limit.offset > 0) {
      if ((win.ekey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (win.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        pQuery->limit.offset -= 1;
        pWindowResInfo->prevSKey = win.skey;
      }
4340

4341
      STimeWindow tw = win;
H
Haojun Liao 已提交
4342
      GET_NEXT_TIMEWINDOW(pQuery, &tw);
4343

4344
      if (pQuery->limit.offset == 0) {
4345 4346
        if ((tw.skey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
            (tw.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
H
Haojun Liao 已提交
4347 4348
          // load the data block and check data remaining in current data block
          // TODO optimize performance
4349 4350 4351
          SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
          SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

4352 4353
          tw = win;
          int32_t startPos =
H
Haojun Liao 已提交
4354
              getNextQualifiedWindow(pRuntimeEnv, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey, -1);
4355 4356 4357 4358
          assert(startPos >= 0);

          // set the abort info
          pQuery->pos = startPos;
4359

H
Haojun Liao 已提交
4360 4361 4362 4363
          // reset the query start timestamp
          pTableQueryInfo->win.skey = ((TSKEY *)pColInfoData->pData)[startPos];
          pQuery->window.skey = pTableQueryInfo->win.skey;
          *start = pTableQueryInfo->win.skey;
4364

4365
          pWindowResInfo->prevSKey = tw.skey;
H
Haojun Liao 已提交
4366
          int32_t index = pRuntimeEnv->windowResInfo.curIndex;
4367

H
hjxilinx 已提交
4368
          int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, NULL, binarySearchForKey, pDataBlock);
H
Haojun Liao 已提交
4369
          pRuntimeEnv->windowResInfo.curIndex = index;  // restore the window index
4370

4371
          qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64,
4372
                 GET_QINFO_ADDR(pRuntimeEnv), blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes, pQuery->current->lastKey);
4373

4374
          return true;
H
Haojun Liao 已提交
4375 4376 4377 4378
        } else { // do nothing
          *start = tw.skey;
          pQuery->window.skey = tw.skey;
          pWindowResInfo->prevSKey = tw.skey;
4379
          return true;
4380 4381 4382
        }
      }

H
Haojun Liao 已提交
4383 4384 4385 4386 4387 4388 4389
      /*
       * If the next time window still starts from current data block,
       * load the primary timestamp column first, and then find the start position for the next queried time window.
       * Note that only the primary timestamp column is required.
       * TODO: Optimize for this cases. All data blocks are not needed to be loaded, only if the first actually required
       * time window resides in current data block.
       */
4390 4391 4392 4393 4394 4395 4396
      if ((tw.skey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (tw.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
        SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

        tw = win;
        int32_t startPos =
H
Haojun Liao 已提交
4397
            getNextQualifiedWindow(pRuntimeEnv, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey, -1);
4398 4399 4400 4401
        assert(startPos >= 0);

        // set the abort info
        pQuery->pos = startPos;
H
hjxilinx 已提交
4402
        pTableQueryInfo->lastKey = ((TSKEY *)pColInfoData->pData)[startPos];
4403 4404
        pWindowResInfo->prevSKey = tw.skey;
        win = tw;
4405
      } else {
H
Haojun Liao 已提交
4406
        break;  // offset is not 0, and next time window begins or ends in the next block.
4407 4408 4409
      }
    }
  }
4410

H
Haojun Liao 已提交
4411 4412 4413 4414 4415
  // check for error
  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }

4416 4417 4418
  return true;
}

H
Haojun Liao 已提交
4419 4420
static void doDestroyTableQueryInfo(STableGroupInfo* pTableqinfoGroupInfo);

B
Bomin Zhang 已提交
4421
static int32_t setupQueryHandle(void* tsdb, SQInfo* pQInfo, bool isSTableQuery) {
B
Bomin Zhang 已提交
4422
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4423 4424
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

B
Bomin Zhang 已提交
4425
  if (onlyQueryTags(pQuery)) {
B
Bomin Zhang 已提交
4426
    return TSDB_CODE_SUCCESS;
B
Bomin Zhang 已提交
4427 4428
  }

H
Haojun Liao 已提交
4429
  if (isSTableQuery && (!QUERY_IS_INTERVAL_QUERY(pQuery)) && (!isFixedOutputQuery(pRuntimeEnv))) {
B
Bomin Zhang 已提交
4430
    return TSDB_CODE_SUCCESS;
B
Bomin Zhang 已提交
4431
  }
4432 4433

  STsdbQueryCond cond = {
B
Bomin Zhang 已提交
4434 4435 4436
    .order   = pQuery->order.order,
    .colList = pQuery->colList,
    .numOfCols = pQuery->numOfCols,
4437
  };
weixin_48148422's avatar
weixin_48148422 已提交
4438

S
TD-1057  
Shengliang Guan 已提交
4439 4440
  TIME_WINDOW_COPY(cond.twindow, pQuery->window);

B
Bomin Zhang 已提交
4441
  if (!isSTableQuery
4442
    && (pQInfo->tableqinfoGroupInfo.numOfTables == 1)
4443
    && (cond.order == TSDB_ORDER_ASC)
H
Haojun Liao 已提交
4444
    && (!QUERY_IS_INTERVAL_QUERY(pQuery))
B
Bomin Zhang 已提交
4445
    && (!isGroupbyNormalCol(pQuery->pGroupbyExpr))
H
Haojun Liao 已提交
4446
    && (!isFixedOutputQuery(pRuntimeEnv))
B
Bomin Zhang 已提交
4447
  ) {
H
Haojun Liao 已提交
4448
    SArray* pa = GET_TABLEGROUP(pQInfo, 0);
4449 4450
    STableQueryInfo* pCheckInfo = taosArrayGetP(pa, 0);
    cond.twindow = pCheckInfo->win;
4451
  }
B
Bomin Zhang 已提交
4452

B
Bomin Zhang 已提交
4453
  terrno = TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
4454
  if (isFirstLastRowQuery(pQuery)) {
4455
    pRuntimeEnv->pQueryHandle = tsdbQueryLastRow(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
H
Haojun Liao 已提交
4456

H
Haojun Liao 已提交
4457 4458 4459
    // update the query time window
    pQuery->window = cond.twindow;

H
Haojun Liao 已提交
4460
    if (pQInfo->tableGroupInfo.numOfTables == 0) {
H
Haojun Liao 已提交
4461
      pQInfo->tableqinfoGroupInfo.numOfTables = 0;
H
Haojun Liao 已提交
4462 4463 4464 4465
    } else {
      size_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
      for(int32_t i = 0; i < numOfGroups; ++i) {
        SArray *group = GET_TABLEGROUP(pQInfo, i);
H
Haojun Liao 已提交
4466

H
Haojun Liao 已提交
4467 4468 4469
        size_t t = taosArrayGetSize(group);
        for (int32_t j = 0; j < t; ++j) {
          STableQueryInfo *pCheckInfo = taosArrayGetP(group, j);
H
Haojun Liao 已提交
4470

H
Haojun Liao 已提交
4471 4472 4473
          pCheckInfo->win = pQuery->window;
          pCheckInfo->lastKey = pCheckInfo->win.skey;
        }
H
Haojun Liao 已提交
4474 4475
      }
    }
4476
  } else if (isPointInterpoQuery(pQuery)) {
4477
    pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
H
Haojun Liao 已提交
4478
  } else {
4479
    pRuntimeEnv->pQueryHandle = tsdbQueryTables(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
H
Haojun Liao 已提交
4480
  }
4481

B
Bomin Zhang 已提交
4482
  return terrno;
B
Bomin Zhang 已提交
4483 4484
}

4485 4486 4487
static SFillColInfo* taosCreateFillColInfo(SQuery* pQuery) {
  int32_t numOfCols = pQuery->numOfOutput;
  int32_t offset = 0;
4488

4489
  SFillColInfo* pFillCol = calloc(numOfCols, sizeof(SFillColInfo));
H
Haojun Liao 已提交
4490 4491 4492 4493
  if (pFillCol == NULL) {
    return NULL;
  }

4494 4495
  for(int32_t i = 0; i < numOfCols; ++i) {
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];
4496

4497
    pFillCol[i].col.bytes  = pExprInfo->bytes;
S
TD-1057  
Shengliang Guan 已提交
4498
    pFillCol[i].col.type   = (int8_t)pExprInfo->type;
4499 4500 4501
    pFillCol[i].col.offset = offset;
    pFillCol[i].flag       = TSDB_COL_NORMAL;    // always be ta normal column for table query
    pFillCol[i].functionId = pExprInfo->base.functionId;
4502
    pFillCol[i].fillVal.i = pQuery->fillVal[i];
4503

4504 4505
    offset += pExprInfo->bytes;
  }
4506

4507 4508 4509
  return pFillCol;
}

4510
int32_t doInitQInfo(SQInfo *pQInfo, STSBuf *pTsBuf, void *tsdb, int32_t vgId, bool isSTableQuery) {
4511 4512
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;

4513
  int32_t code = TSDB_CODE_SUCCESS;
4514
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
4515 4516 4517

  pRuntimeEnv->topBotQuery = isTopBottomQuery(pQuery);
  pRuntimeEnv->hasTagResults = hasTagValOutput(pQuery);
4518 4519

  setScanLimitationByResultBuffer(pQuery);
H
Haojun Liao 已提交
4520

B
Bomin Zhang 已提交
4521 4522 4523 4524
  code = setupQueryHandle(tsdb, pQInfo, isSTableQuery);
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }
4525

4526
  pQInfo->tsdb = tsdb;
4527
  pQInfo->vgId = vgId;
4528 4529

  pRuntimeEnv->pQuery = pQuery;
H
Haojun Liao 已提交
4530
  pRuntimeEnv->pTSBuf = pTsBuf;
4531
  pRuntimeEnv->cur.vgroupIndex = -1;
4532
  pRuntimeEnv->stableQuery = isSTableQuery;
H
Haojun Liao 已提交
4533
  pRuntimeEnv->prevGroupId = INT32_MIN;
H
Haojun Liao 已提交
4534
  pRuntimeEnv->groupbyNormalCol = isGroupbyNormalCol(pQuery->pGroupbyExpr);
4535

H
Haojun Liao 已提交
4536
  if (pTsBuf != NULL) {
4537 4538 4539 4540 4541 4542 4543 4544 4545 4546
    int16_t order = (pQuery->order.order == pRuntimeEnv->pTSBuf->tsOrder) ? TSDB_ORDER_ASC : TSDB_ORDER_DESC;
    tsBufSetTraverseOrder(pRuntimeEnv->pTSBuf, order);
  }

  // create runtime environment
  code = setupQueryRuntimeEnv(pRuntimeEnv, pQuery->order.order);
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }

4547 4548 4549
  int32_t ps = DEFAULT_PAGE_SIZE;
  int32_t rowsize = 0;
  getIntermediateBufInfo(pRuntimeEnv, &ps, &rowsize);
H
Haojun Liao 已提交
4550
  int32_t TWOMB = 1024*1024*2;
4551

H
Haojun Liao 已提交
4552
  if (isSTableQuery && !onlyQueryTags(pRuntimeEnv->pQuery)) {
H
Haojun Liao 已提交
4553
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rowsize, ps, TWOMB, pQInfo);
4554 4555 4556 4557
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

H
Haojun Liao 已提交
4558
    if (!QUERY_IS_INTERVAL_QUERY(pQuery)) {
4559
      int16_t type = TSDB_DATA_TYPE_NULL;
4560
      int32_t threshold = 0;
4561

H
Haojun Liao 已提交
4562
      if (pRuntimeEnv->groupbyNormalCol) {  // group by columns not tags;
4563
        type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
4564
        threshold = 4000;
4565 4566
      } else {
        type = TSDB_DATA_TYPE_INT;  // group id
S
TD-1057  
Shengliang Guan 已提交
4567
        threshold = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
4568 4569 4570
        if (threshold < 8) {
          threshold = 8;
        }
4571 4572
      }

4573
      code = initWindowResInfo(&pRuntimeEnv->windowResInfo, pRuntimeEnv, 8, threshold, type);
B
Bomin Zhang 已提交
4574 4575 4576
      if (code != TSDB_CODE_SUCCESS) {
        return code;
      }
4577
    }
H
Haojun Liao 已提交
4578
  } else if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
4579 4580
    int32_t numOfResultRows = getInitialPageNum(pQInfo);
    getIntermediateBufInfo(pRuntimeEnv, &ps, &rowsize);
H
Haojun Liao 已提交
4581
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rowsize, ps, TWOMB, pQInfo);
4582 4583 4584 4585 4586
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

    int16_t type = TSDB_DATA_TYPE_NULL;
H
Haojun Liao 已提交
4587
    if (pRuntimeEnv->groupbyNormalCol) {
4588 4589 4590 4591 4592
      type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
    } else {
      type = TSDB_DATA_TYPE_TIMESTAMP;
    }

4593
    code = initWindowResInfo(&pRuntimeEnv->windowResInfo, pRuntimeEnv, numOfResultRows, 4096, type);
B
Bomin Zhang 已提交
4594 4595 4596
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }
4597 4598
  }

4599
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
4600
    SFillColInfo* pColInfo = taosCreateFillColInfo(pQuery);
H
Haojun Liao 已提交
4601 4602 4603 4604 4605 4606
    STimeWindow w = TSWINDOW_INITIALIZER;

    TSKEY sk = MIN(pQuery->window.skey, pQuery->window.ekey);
    TSKEY ek = MAX(pQuery->window.skey, pQuery->window.ekey);
    getAlignQueryTimeWindow(pQuery, pQuery->window.skey, sk, ek, &w);

S
TD-1057  
Shengliang Guan 已提交
4607
    pRuntimeEnv->pFillInfo = taosInitFillInfo(pQuery->order.order, w.skey, 0, (int32_t)pQuery->rec.capacity, pQuery->numOfOutput,
4608 4609
                                              pQuery->interval.sliding, pQuery->interval.slidingUnit, (int8_t)pQuery->precision,
                                              pQuery->fillType, pColInfo);
4610
  }
4611

H
Haojun Liao 已提交
4612
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
4613
  return TSDB_CODE_SUCCESS;
4614 4615
}

4616
static void enableExecutionForNextTable(SQueryRuntimeEnv *pRuntimeEnv) {
4617
  SQuery *pQuery = pRuntimeEnv->pQuery;
4618

4619
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
4620 4621 4622 4623 4624 4625 4626
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[i]);
    if (pResInfo != NULL) {
      pResInfo->complete = false;
    }
  }
}

H
Haojun Liao 已提交
4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643
static FORCE_INLINE void setEnvForEachBlock(SQInfo* pQInfo, STableQueryInfo* pTableQueryInfo, SDataBlockInfo* pBlockInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

  if (!QUERY_IS_INTERVAL_QUERY(pQuery)) {
    setExecutionContext(pQInfo, pTableQueryInfo->groupIndex, pBlockInfo->window.ekey + step);
  } else {  // interval query
    TSKEY nextKey = pBlockInfo->window.skey;
    setIntervalQueryRange(pQInfo, nextKey);

    if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
      setAdditionalInfo(pQInfo, pTableQueryInfo->pTable, pTableQueryInfo);
    }
  }
}

H
Haojun Liao 已提交
4644
static int64_t scanMultiTableDataBlocks(SQInfo *pQInfo) {
4645
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
Haojun Liao 已提交
4646 4647
  SQuery*           pQuery = pRuntimeEnv->pQuery;
  SQueryCostInfo*   summary  = &pRuntimeEnv->summary;
H
Haojun Liao 已提交
4648

H
hjxilinx 已提交
4649
  int64_t st = taosGetTimestampMs();
4650

4651
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
H
Haojun Liao 已提交
4652
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4653

H
Haojun Liao 已提交
4654 4655
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

H
Haojun Liao 已提交
4656
  while (tsdbNextDataBlock(pQueryHandle)) {
4657
    summary->totalBlocks += 1;
H
Haojun Liao 已提交
4658

H
Haojun Liao 已提交
4659
    if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
4660
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4661
    }
4662

H
Haojun Liao 已提交
4663
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
H
Haojun Liao 已提交
4664 4665 4666 4667
    STableQueryInfo **pTableQueryInfo = (STableQueryInfo**) taosHashGet(pQInfo->tableqinfoGroupInfo.map, &blockInfo.tid, sizeof(blockInfo.tid));
    if(pTableQueryInfo == NULL) {
      break;
    }
4668

H
Haojun Liao 已提交
4669
    pQuery->current = *pTableQueryInfo;
H
Haojun Liao 已提交
4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      assert(
          ((*pTableQueryInfo)->win.skey <= (*pTableQueryInfo)->win.ekey) &&
          ((*pTableQueryInfo)->lastKey >= (*pTableQueryInfo)->win.skey) &&
          ((*pTableQueryInfo)->win.skey >= pQuery->window.skey && (*pTableQueryInfo)->win.ekey <= pQuery->window.ekey));
    } else {
      assert(
          ((*pTableQueryInfo)->win.skey >= (*pTableQueryInfo)->win.ekey) &&
          ((*pTableQueryInfo)->lastKey <= (*pTableQueryInfo)->win.skey) &&
          ((*pTableQueryInfo)->win.skey <= pQuery->window.skey && (*pTableQueryInfo)->win.ekey >= pQuery->window.ekey));
    }
4681

H
Haojun Liao 已提交
4682
    if (!pRuntimeEnv->groupbyNormalCol) {
H
Haojun Liao 已提交
4683
      setEnvForEachBlock(pQInfo, *pTableQueryInfo, &blockInfo);
4684
    }
4685

4686
    uint32_t     status = 0;
H
Haojun Liao 已提交
4687 4688
    SDataStatis *pStatis = NULL;
    SArray      *pDataBlock = NULL;
4689

H
Haojun Liao 已提交
4690
    int32_t ret = loadDataBlockOnDemand(pRuntimeEnv, &pQuery->current->windowResInfo, pQueryHandle, &blockInfo, &pStatis, &pDataBlock, &status);
4691 4692 4693 4694 4695
    if (ret != TSDB_CODE_SUCCESS) {
      break;
    }

    if (status == BLK_DATA_DISCARD) {
H
Haojun Liao 已提交
4696
      pQuery->current->lastKey = QUERY_IS_ASC_QUERY(pQuery)? blockInfo.window.ekey + step : blockInfo.window.skey + step;
H
Haojun Liao 已提交
4697 4698 4699
      continue;
    }

4700 4701
    summary->totalRows += blockInfo.rows;
    stableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, pDataBlock, binarySearchForKey);
H
Haojun Liao 已提交
4702

H
Haojun Liao 已提交
4703 4704 4705 4706
    qDebug("QInfo:%p check data block completed, uid:%"PRId64", tid:%d, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, "
           "lastKey:%" PRId64,
           pQInfo, blockInfo.uid, blockInfo.tid, blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows,
           pQuery->current->lastKey);
4707
  }
4708

H
Haojun Liao 已提交
4709 4710 4711 4712
  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }

H
Haojun Liao 已提交
4713 4714
  updateWindowResNumOfRes(pRuntimeEnv);

H
hjxilinx 已提交
4715 4716
  int64_t et = taosGetTimestampMs();
  return et - st;
4717 4718
}

4719 4720
static bool multiTableMultioutputHelper(SQInfo *pQInfo, int32_t index) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4721
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4722

4723
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
H
Haojun Liao 已提交
4724
  SArray *group = GET_TABLEGROUP(pQInfo, 0);
4725
  STableQueryInfo* pCheckInfo = taosArrayGetP(group, index);
4726

H
Haojun Liao 已提交
4727 4728 4729
  if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
    setTagVal(pRuntimeEnv, pCheckInfo->pTable, pQInfo->tsdb);
  }
4730

H
Haojun Liao 已提交
4731
  STableId* id = TSDB_TABLEID(pCheckInfo->pTable);
4732
  qDebug("QInfo:%p query on (%d): uid:%" PRIu64 ", tid:%d, qrange:%" PRId64 "-%" PRId64, pQInfo, index,
H
Haojun Liao 已提交
4733
         id->uid, id->tid, pCheckInfo->lastKey, pCheckInfo->win.ekey);
4734

4735
  STsdbQueryCond cond = {
4736
      .twindow   = {pCheckInfo->lastKey, pCheckInfo->win.ekey},
H
hjxilinx 已提交
4737 4738
      .order     = pQuery->order.order,
      .colList   = pQuery->colList,
4739
      .numOfCols = pQuery->numOfCols,
4740
  };
4741

H
hjxilinx 已提交
4742
  // todo refactor
4743
  SArray *g1 = taosArrayInit(1, POINTER_BYTES);
H
Haojun Liao 已提交
4744 4745 4746 4747
  SArray *tx = taosArrayInit(1, sizeof(STableKeyInfo));

  STableKeyInfo info = {.pTable = pCheckInfo->pTable, .lastKey = pCheckInfo->lastKey};
  taosArrayPush(tx, &info);
4748

4749
  taosArrayPush(g1, &tx);
4750
  STableGroupInfo gp = {.numOfTables = 1, .pGroupList = g1};
4751

4752
  // include only current table
4753 4754 4755 4756
  if (pRuntimeEnv->pQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
    pRuntimeEnv->pQueryHandle = NULL;
  }
4757

H
Haojun Liao 已提交
4758
  pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo);
4759 4760
  taosArrayDestroy(tx);
  taosArrayDestroy(g1);
B
Bomin Zhang 已提交
4761 4762 4763
  if (pRuntimeEnv->pQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
4764

4765
  if (pRuntimeEnv->pTSBuf != NULL) {
4766
    if (pRuntimeEnv->cur.vgroupIndex == -1) {
4767
      STSElem elem = tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, 0, &pRuntimeEnv->pCtx[0].tag);
4768

4769 4770 4771 4772 4773 4774 4775 4776
      // failed to find data with the specified tag value
      if (elem.vnode < 0) {
        return false;
      }
    } else {
      tsBufSetCursor(pRuntimeEnv->pTSBuf, &pRuntimeEnv->cur);
    }
  }
4777

4778
  initCtxOutputBuf(pRuntimeEnv);
4779 4780 4781 4782 4783 4784 4785 4786 4787 4788
  return true;
}

/**
 * super table query handler
 * 1. super table projection query, group-by on normal columns query, ts-comp query
 * 2. point interpolation query, last row query
 *
 * @param pQInfo
 */
4789
static void sequentialTableProcess(SQInfo *pQInfo) {
4790
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4791
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4792
  setQueryStatus(pQuery, QUERY_COMPLETED);
4793

H
Haojun Liao 已提交
4794
  size_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
4795

H
Haojun Liao 已提交
4796
  if (isPointInterpoQuery(pQuery) || isFirstLastRowQuery(pQuery)) {
4797 4798
    resetCtxOutputBuf(pRuntimeEnv);
    assert(pQuery->limit.offset == 0 && pQuery->limit.limit != 0);
4799

4800
    while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
4801
      SArray* group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex);
4802

S
TD-1057  
Shengliang Guan 已提交
4803
      qDebug("QInfo:%p last_row query on group:%d, total group:%" PRIzu ", current group:%p", pQInfo, pQInfo->groupIndex,
dengyihao's avatar
dengyihao 已提交
4804
             numOfGroups, group);
H
Haojun Liao 已提交
4805 4806 4807 4808 4809 4810 4811

      STsdbQueryCond cond = {
          .colList = pQuery->colList,
          .order   = pQuery->order.order,
          .numOfCols = pQuery->numOfCols,
      };

S
TD-1057  
Shengliang Guan 已提交
4812 4813
      TIME_WINDOW_COPY(cond.twindow, pQuery->window);

H
Haojun Liao 已提交
4814 4815 4816
      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);
4817

H
Haojun Liao 已提交
4818 4819 4820 4821 4822 4823 4824
      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }
4825

4826
      if (isFirstLastRowQuery(pQuery)) {
H
Haojun Liao 已提交
4827
        assert(0);  // last_row query switch to other routine to handle
H
Haojun Liao 已提交
4828
      } else {
H
Haojun Liao 已提交
4829
        pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(pQInfo->tsdb, &cond, &gp, pQInfo);
4830
      }
B
Bomin Zhang 已提交
4831 4832 4833 4834 4835 4836

      taosArrayDestroy(tx);
      taosArrayDestroy(g1);
      if (pRuntimeEnv->pQueryHandle == NULL) {
        longjmp(pRuntimeEnv->env, terrno);
      }
H
Haojun Liao 已提交
4837

H
Haojun Liao 已提交
4838
      initCtxOutputBuf(pRuntimeEnv);
4839

4840
      SArray* s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle);
4841
      assert(taosArrayGetSize(s) >= 1);
4842

4843
      setTagVal(pRuntimeEnv, taosArrayGetP(s, 0), pQInfo->tsdb);
4844 4845 4846
      if (isFirstLastRowQuery(pQuery)) {
        assert(taosArrayGetSize(s) == 1);
      }
H
Haojun Liao 已提交
4847

dengyihao's avatar
dengyihao 已提交
4848
      taosArrayDestroy(s);
H
Haojun Liao 已提交
4849

H
Haojun Liao 已提交
4850
      // here we simply set the first table as current table
4851 4852 4853
      SArray* first = GET_TABLEGROUP(pQInfo, pQInfo->groupIndex);
      pQuery->current = taosArrayGetP(first, 0);

4854
      scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
4855

H
Haojun Liao 已提交
4856 4857 4858 4859 4860
      int64_t numOfRes = getNumOfResult(pRuntimeEnv);
      if (numOfRes > 0) {
        pQuery->rec.rows += numOfRes;
        forwardCtxOutputBuf(pRuntimeEnv, numOfRes);
      }
4861

H
Haojun Liao 已提交
4862 4863 4864 4865 4866
      skipResults(pRuntimeEnv);
      pQInfo->groupIndex += 1;

      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
4867 4868 4869 4870 4871 4872

      if (pQuery->rec.rows >= pQuery->rec.capacity) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
        break;
      }
    }
H
Haojun Liao 已提交
4873
  } else if (pRuntimeEnv->groupbyNormalCol) { // group-by on normal columns query
4874
    while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
4875
      SArray* group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex);
4876

S
TD-1057  
Shengliang Guan 已提交
4877
      qDebug("QInfo:%p group by normal columns group:%d, total group:%" PRIzu "", pQInfo, pQInfo->groupIndex, numOfGroups);
4878 4879 4880 4881 4882 4883 4884

      STsdbQueryCond cond = {
          .colList = pQuery->colList,
          .order   = pQuery->order.order,
          .numOfCols = pQuery->numOfCols,
      };

S
TD-1057  
Shengliang Guan 已提交
4885 4886
      TIME_WINDOW_COPY(cond.twindow, pQuery->window);

4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898
      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);

      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }

H
Haojun Liao 已提交
4899
      // no need to update the lastkey for each table
4900
      pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo);
H
Haojun Liao 已提交
4901

B
Bomin Zhang 已提交
4902 4903
      taosArrayDestroy(g1);
      taosArrayDestroy(tx);
B
Bomin Zhang 已提交
4904 4905 4906
      if (pRuntimeEnv->pQueryHandle == NULL) {
        longjmp(pRuntimeEnv->env, terrno);
      }
4907

4908
      SArray* s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle);
4909 4910
      assert(taosArrayGetSize(s) >= 1);

4911
      setTagVal(pRuntimeEnv, taosArrayGetP(s, 0), pQInfo->tsdb);
4912 4913 4914 4915 4916 4917 4918 4919

      // here we simply set the first table as current table
      scanMultiTableDataBlocks(pQInfo);
      pQInfo->groupIndex += 1;

      SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;

        // no results generated for current group, continue to try the next group
4920
      taosArrayDestroy(s);
4921 4922 4923 4924 4925
      if (pWindowResInfo->size <= 0) {
        continue;
      }

      for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
4926
        pWindowResInfo->pResult[i].closed = true; // enable return all results for group by normal columns
4927 4928 4929

        SWindowResult *pResult = &pWindowResInfo->pResult[i];
        for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
4930
          pResult->numOfRows = (uint16_t)(MAX(pResult->numOfRows, pResult->resultInfo[j].numOfRes));
4931 4932 4933
        }
      }

4934
      qDebug("QInfo:%p generated groupby columns results %d rows for group %d completed", pQInfo, pWindowResInfo->size,
4935 4936 4937 4938 4939 4940 4941
          pQInfo->groupIndex);
      int32_t currentGroupIndex = pQInfo->groupIndex;

      pQuery->rec.rows = 0;
      pQInfo->groupIndex = 0;

      ensureOutputBufferSimple(pRuntimeEnv, pWindowResInfo->size);
4942
      copyFromWindowResToSData(pQInfo, pWindowResInfo);
4943 4944 4945 4946 4947 4948

      pQInfo->groupIndex = currentGroupIndex;  //restore the group index
      assert(pQuery->rec.rows == pWindowResInfo->size);

      clearClosedTimeWindow(pRuntimeEnv);
      break;
4949 4950 4951
    }
  } else {
    /*
4952
     * 1. super table projection query, 2. ts-comp query
4953 4954 4955
     * if the subgroup index is larger than 0, results generated by group by tbname,k is existed.
     * we need to return it to client in the first place.
     */
4956
    if (pQInfo->groupIndex > 0) {
4957
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
4958
      pQuery->rec.total += pQuery->rec.rows;
4959

4960
      if (pQuery->rec.rows > 0) {
4961 4962 4963
        return;
      }
    }
4964

4965
    // all data have returned already
4966
    if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
4967 4968
      return;
    }
4969

4970 4971
    resetCtxOutputBuf(pRuntimeEnv);
    resetTimeWindowInfo(pRuntimeEnv, &pRuntimeEnv->windowResInfo);
4972

H
Haojun Liao 已提交
4973
    SArray *group = GET_TABLEGROUP(pQInfo, 0);
4974 4975
    assert(taosArrayGetSize(group) == pQInfo->tableqinfoGroupInfo.numOfTables &&
           1 == taosArrayGetSize(pQInfo->tableqinfoGroupInfo.pGroupList));
4976

4977
    while (pQInfo->tableIndex < pQInfo->tableqinfoGroupInfo.numOfTables) {
H
Haojun Liao 已提交
4978
      if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
4979
        longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4980
      }
4981

4982
      pQuery->current = taosArrayGetP(group, pQInfo->tableIndex);
4983
      if (!multiTableMultioutputHelper(pQInfo, pQInfo->tableIndex)) {
4984
        pQInfo->tableIndex++;
4985 4986
        continue;
      }
4987

H
hjxilinx 已提交
4988
      // TODO handle the limit offset problem
4989
      if (pQuery->numOfFilterCols == 0 && pQuery->limit.offset > 0) {
4990 4991
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
          pQInfo->tableIndex++;
4992 4993 4994
          continue;
        }
      }
4995

4996
      scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
4997
      skipResults(pRuntimeEnv);
4998

4999
      // the limitation of output result is reached, set the query completed
5000
      if (limitResults(pRuntimeEnv)) {
H
Haojun Liao 已提交
5001
        SET_STABLE_QUERY_OVER(pQInfo);
5002 5003
        break;
      }
5004

5005 5006
      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
5007

5008
      if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
5009 5010 5011 5012 5013 5014
        /*
         * query range is identical in terms of all meters involved in query,
         * so we need to restore them at the *beginning* of query on each meter,
         * not the consecutive query on meter on which is aborted due to buffer limitation
         * to ensure that, we can reset the query range once query on a meter is completed.
         */
5015
        pQInfo->tableIndex++;
weixin_48148422's avatar
weixin_48148422 已提交
5016

H
Haojun Liao 已提交
5017
        STableIdInfo tidInfo = {0};
5018

H
Haojun Liao 已提交
5019 5020 5021
        STableId* id = TSDB_TABLEID(pQuery->current->pTable);
        tidInfo.uid = id->uid;
        tidInfo.tid = id->tid;
weixin_48148422's avatar
weixin_48148422 已提交
5022
        tidInfo.key = pQuery->current->lastKey;
weixin_48148422's avatar
weixin_48148422 已提交
5023 5024
        taosArrayPush(pQInfo->arrTableIdInfo, &tidInfo);

5025
        // if the buffer is full or group by each table, we need to jump out of the loop
H
Haojun Liao 已提交
5026
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
5027 5028
          break;
        }
5029

5030
      } else {
5031
        // all data in the result buffer are skipped due to the offset, continue to retrieve data from current meter
5032 5033
        if (pQuery->rec.rows == 0) {
          assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
5034 5035
          continue;
        } else {
5036 5037 5038
          // buffer is full, wait for the next round to retrieve data from current meter
          assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
          break;
5039 5040 5041
        }
      }
    }
H
Haojun Liao 已提交
5042

5043
    if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
H
Haojun Liao 已提交
5044 5045
      setQueryStatus(pQuery, QUERY_COMPLETED);
    }
5046
  }
5047

5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059
  /*
   * 1. super table projection query, group-by on normal columns query, ts-comp query
   * 2. point interpolation query, last row query
   *
   * group-by on normal columns query and last_row query do NOT invoke the finalizer here,
   * since the finalize stage will be done at the client side.
   *
   * projection query, point interpolation query do not need the finalizer.
   *
   * Only the ts-comp query requires the finalizer function to be executed here.
   */
  if (isTSCompQuery(pQuery)) {
H
hjxilinx 已提交
5060
    finalizeQueryResult(pRuntimeEnv);
5061
  }
5062

5063 5064 5065
  if (pRuntimeEnv->pTSBuf != NULL) {
    pRuntimeEnv->cur = pRuntimeEnv->pTSBuf->cur;
  }
5066

5067
  qDebug(
S
TD-1530  
Shengliang Guan 已提交
5068 5069
      "QInfo %p numOfTables:%" PRIu64 ", index:%d, numOfGroups:%" PRIzu ", %" PRId64 " points returned, total:%" PRId64 ", offset:%" PRId64,
      pQInfo, (uint64_t)pQInfo->tableqinfoGroupInfo.numOfTables, pQInfo->tableIndex, numOfGroups, pQuery->rec.rows, pQuery->rec.total,
5070
      pQuery->limit.offset);
5071 5072
}

5073 5074 5075 5076
static void doSaveContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

5077 5078 5079
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
  SWITCH_ORDER(pQuery->order.order);
5080

5081
  if (pRuntimeEnv->pTSBuf != NULL) {
5082
    pRuntimeEnv->pTSBuf->cur.order = pQuery->order.order;
5083
  }
5084

5085 5086 5087 5088 5089
  STsdbQueryCond cond = {
      .order   = pQuery->order.order,
      .colList = pQuery->colList,
      .numOfCols = pQuery->numOfCols,
  };
H
Haojun Liao 已提交
5090

S
TD-1057  
Shengliang Guan 已提交
5091 5092
  TIME_WINDOW_COPY(cond.twindow, pQuery->window);

5093 5094 5095 5096
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
H
Haojun Liao 已提交
5097

H
Haojun Liao 已提交
5098 5099 5100 5101 5102
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
  disableFuncInReverseScan(pQInfo);
  setupQueryRangeForReverseScan(pQInfo);

H
Haojun Liao 已提交
5103
  pRuntimeEnv->prevGroupId = INT32_MIN;
5104
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
B
Bomin Zhang 已提交
5105 5106 5107
  if (pRuntimeEnv->pSecQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
H
hjxilinx 已提交
5108 5109
}

5110 5111 5112 5113
static void doRestoreContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

H
hjxilinx 已提交
5114
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
5115

5116
  if (pRuntimeEnv->pTSBuf != NULL) {
5117
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
5118
  }
5119

5120
  switchCtxOrder(pRuntimeEnv);
5121 5122 5123
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
}

5124 5125 5126
static void doCloseAllTimeWindowAfterScan(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
Haojun Liao 已提交
5127
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
H
Haojun Liao 已提交
5128
    size_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
5129
    for (int32_t i = 0; i < numOfGroup; ++i) {
H
Haojun Liao 已提交
5130
      SArray *group = GET_TABLEGROUP(pQInfo, i);
5131

5132
      size_t num = taosArrayGetSize(group);
5133
      for (int32_t j = 0; j < num; ++j) {
5134 5135
        STableQueryInfo* item = taosArrayGetP(group, j);
        closeAllTimeWindow(&item->windowResInfo);
5136
      }
H
hjxilinx 已提交
5137 5138 5139 5140 5141 5142 5143
    }
  } else {  // close results for group result
    closeAllTimeWindow(&pQInfo->runtimeEnv.windowResInfo);
  }
}

static void multiTableQueryProcess(SQInfo *pQInfo) {
5144 5145 5146
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

5147
  if (pQInfo->groupIndex > 0) {
5148
    /*
5149
     * if the groupIndex > 0, the query process must be completed yet, we only need to
5150 5151
     * copy the data into output buffer
     */
H
Haojun Liao 已提交
5152
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
5153 5154
      copyResToQueryResultBuf(pQInfo, pQuery);
#ifdef _DEBUG_VIEW
5155
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
5156 5157
#endif
    } else {
5158
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5159
    }
5160

5161
    qDebug("QInfo:%p current:%"PRId64", total:%"PRId64"", pQInfo, pQuery->rec.rows, pQuery->rec.total);
5162 5163
    return;
  }
5164

5165
  qDebug("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", order:%d, forward scan start", pQInfo,
5166 5167
         pQuery->window.skey, pQuery->window.ekey, pQuery->order.order);

H
hjxilinx 已提交
5168
  // do check all qualified data blocks
H
Haojun Liao 已提交
5169
  int64_t el = scanMultiTableDataBlocks(pQInfo);
5170
  qDebug("QInfo:%p master scan completed, elapsed time: %" PRId64 "ms, reverse scan start", pQInfo, el);
5171

H
hjxilinx 已提交
5172
  // query error occurred or query is killed, abort current execution
H
Haojun Liao 已提交
5173
  if (pQInfo->code != TSDB_CODE_SUCCESS || IS_QUERY_KILLED(pQInfo)) {
5174
    qDebug("QInfo:%p query killed or error occurred, code:%s, abort", pQInfo, tstrerror(pQInfo->code));
H
Haojun Liao 已提交
5175 5176
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
5177
  }
5178

H
hjxilinx 已提交
5179 5180
  // close all time window results
  doCloseAllTimeWindowAfterScan(pQInfo);
5181

H
hjxilinx 已提交
5182 5183
  if (needReverseScan(pQuery)) {
    doSaveContext(pQInfo);
5184

H
Haojun Liao 已提交
5185
    el = scanMultiTableDataBlocks(pQInfo);
5186
    qDebug("QInfo:%p reversed scan completed, elapsed time: %" PRId64 "ms", pQInfo, el);
5187

H
Haojun Liao 已提交
5188
    doRestoreContext(pQInfo);
H
hjxilinx 已提交
5189
  } else {
5190
    qDebug("QInfo:%p no need to do reversed scan, query completed", pQInfo);
5191
  }
5192

5193
  setQueryStatus(pQuery, QUERY_COMPLETED);
5194

H
Haojun Liao 已提交
5195
  if (pQInfo->code != TSDB_CODE_SUCCESS || IS_QUERY_KILLED(pQInfo)) {
5196
    qDebug("QInfo:%p query killed or error occurred, code:%s, abort", pQInfo, tstrerror(pQInfo->code));
H
Haojun Liao 已提交
5197 5198
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
H
hjxilinx 已提交
5199
  }
5200

H
Haojun Liao 已提交
5201
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || isSumAvgRateQuery(pQuery)) {
5202
    if (mergeIntoGroupResult(pQInfo) == TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
5203
      copyResToQueryResultBuf(pQInfo, pQuery);
5204 5205

#ifdef _DEBUG_VIEW
5206
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
5207 5208 5209
#endif
    }
  } else {  // not a interval query
5210
    copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5211
  }
5212

5213
  // handle the limitation of output buffer
5214
  qDebug("QInfo:%p points returned:%" PRId64 ", total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
5215 5216 5217 5218 5219 5220 5221 5222
}

/*
 * in each query, this function will be called only once, no retry for further result.
 *
 * select count(*)/top(field,k)/avg(field name) from table_name [where ts>now-1a];
 * select count(*) from table_name group by status_column;
 */
H
hjxilinx 已提交
5223
static void tableFixedOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
5224
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
5225

H
hjxilinx 已提交
5226
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
5227
  if (!pRuntimeEnv->topBotQuery && pQuery->limit.offset > 0) {  // no need to execute, since the output will be ignore.
H
Haojun Liao 已提交
5228 5229
    return;
  }
5230

H
hjxilinx 已提交
5231
  pQuery->current = pTableInfo;  // set current query table info
5232

5233
  scanOneTableDataBlocks(pRuntimeEnv, pTableInfo->lastKey);
H
hjxilinx 已提交
5234
  finalizeQueryResult(pRuntimeEnv);
5235

H
Haojun Liao 已提交
5236
  if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
5237 5238
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
5239
  }
5240

H
Haojun Liao 已提交
5241
  // since the numOfRows must be identical for all sql functions that are allowed to be executed simutaneously.
5242
  pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
5243

5244
  skipResults(pRuntimeEnv);
5245
  limitResults(pRuntimeEnv);
5246 5247
}

H
hjxilinx 已提交
5248
static void tableMultiOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
5249
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
5250

H
hjxilinx 已提交
5251 5252
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
5253

5254 5255 5256 5257
  // for ts_comp query, re-initialized is not allowed
  if (!isTSCompQuery(pQuery)) {
    resetCtxOutputBuf(pRuntimeEnv);
  }
5258

5259 5260 5261 5262 5263 5264
  // skip blocks without load the actual data block from file if no filter condition present
  skipBlocks(&pQInfo->runtimeEnv);
  if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return;
  }
5265 5266

  while (1) {
5267
    scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
H
hjxilinx 已提交
5268
    finalizeQueryResult(pRuntimeEnv);
5269

5270 5271
    pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
    if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols > 0 && pQuery->rec.rows > 0) {
5272
      skipResults(pRuntimeEnv);
5273 5274 5275
    }

    /*
H
hjxilinx 已提交
5276 5277
     * 1. if pQuery->size == 0, pQuery->limit.offset >= 0, still need to check data
     * 2. if pQuery->size > 0, pQuery->limit.offset must be 0
5278
     */
5279
    if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
5280 5281 5282
      break;
    }

5283
    qDebug("QInfo:%p skip current result, offset:%" PRId64 ", next qrange:%" PRId64 "-%" PRId64,
B
Bomin Zhang 已提交
5284
           pQInfo, pQuery->limit.offset, pQuery->current->lastKey, pQuery->current->win.ekey);
5285 5286 5287 5288

    resetCtxOutputBuf(pRuntimeEnv);
  }

5289
  limitResults(pRuntimeEnv);
5290
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
5291
    qDebug("QInfo:%p query paused due to output limitation, next qrange:%" PRId64 "-%" PRId64, pQInfo,
H
hjxilinx 已提交
5292
        pQuery->current->lastKey, pQuery->window.ekey);
weixin_48148422's avatar
weixin_48148422 已提交
5293 5294
  } else if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
    STableIdInfo tidInfo;
B
Bomin Zhang 已提交
5295
    STableId* id = TSDB_TABLEID(pQuery->current->pTable);
5296

H
Haojun Liao 已提交
5297 5298
    tidInfo.uid = id->uid;
    tidInfo.tid = id->tid;
weixin_48148422's avatar
weixin_48148422 已提交
5299 5300
    tidInfo.key = pQuery->current->lastKey;
    taosArrayPush(pQInfo->arrTableIdInfo, &tidInfo);
5301 5302
  }

5303 5304 5305
  if (!isTSCompQuery(pQuery)) {
    assert(pQuery->rec.rows <= pQuery->rec.capacity);
  }
5306 5307
}

H
Haojun Liao 已提交
5308
static void tableIntervalProcessImpl(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
5309
  SQuery *pQuery = pRuntimeEnv->pQuery;
5310

5311
  while (1) {
5312
    scanOneTableDataBlocks(pRuntimeEnv, start);
5313

5314
    assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_NOT_COMPLETED));
H
hjxilinx 已提交
5315
    finalizeQueryResult(pRuntimeEnv);
5316

5317 5318 5319
    // here we can ignore the records in case of no interpolation
    // todo handle offset, in case of top/bottom interval query
    if ((pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL) && pQuery->limit.offset > 0 &&
5320
        pQuery->fillType == TSDB_FILL_NONE) {
5321 5322
      // maxOutput <= 0, means current query does not generate any results
      int32_t numOfClosed = numOfClosedTimeWindow(&pRuntimeEnv->windowResInfo);
5323

S
TD-1057  
Shengliang Guan 已提交
5324
      int32_t c = (int32_t)(MIN(numOfClosed, pQuery->limit.offset));
5325 5326 5327
      clearFirstNTimeWindow(pRuntimeEnv, c);
      pQuery->limit.offset -= c;
    }
5328

5329
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED | QUERY_RESBUF_FULL)) {
5330 5331 5332 5333 5334
      break;
    }
  }
}

5335
// handle time interval query on table
H
hjxilinx 已提交
5336
static void tableIntervalProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
5337 5338
  SQueryRuntimeEnv *pRuntimeEnv = &(pQInfo->runtimeEnv);

H
hjxilinx 已提交
5339 5340
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
5341

H
Haojun Liao 已提交
5342
  int32_t numOfFilled = 0;
H
Haojun Liao 已提交
5343
  TSKEY newStartKey = TSKEY_INITIAL_VAL;
5344

5345
  // skip blocks without load the actual data block from file if no filter condition present
H
Haojun Liao 已提交
5346
  skipTimeInterval(pRuntimeEnv, &newStartKey);
5347
  if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0 && pRuntimeEnv->pFillInfo == NULL) {
5348 5349 5350 5351
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return;
  }

5352
  while (1) {
H
Haojun Liao 已提交
5353
    tableIntervalProcessImpl(pRuntimeEnv, newStartKey);
5354

H
Haojun Liao 已提交
5355
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
5356
      pQInfo->groupIndex = 0;  // always start from 0
5357
      pQuery->rec.rows = 0;
5358
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5359

5360
      clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
5361
    }
5362

5363
    // the offset is handled at prepare stage if no interpolation involved
5364
    if (pQuery->fillType == TSDB_FILL_NONE || pQuery->rec.rows == 0) {
5365
      limitResults(pRuntimeEnv);
5366 5367
      break;
    } else {
S
TD-1057  
Shengliang Guan 已提交
5368
      taosFillSetStartInfo(pRuntimeEnv->pFillInfo, (int32_t)pQuery->rec.rows, pQuery->window.ekey);
5369
      taosFillCopyInputDataFromFilePage(pRuntimeEnv->pFillInfo, (tFilePage**) pQuery->sdata);
H
Haojun Liao 已提交
5370
      numOfFilled = 0;
5371

H
Haojun Liao 已提交
5372
      pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, &numOfFilled);
5373
      if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
5374
        limitResults(pRuntimeEnv);
5375 5376
        break;
      }
5377

5378
      // no result generated yet, continue retrieve data
5379
      pQuery->rec.rows = 0;
5380 5381
    }
  }
5382

5383
  // all data scanned, the group by normal column can return
H
Haojun Liao 已提交
5384
  if (pRuntimeEnv->groupbyNormalCol) {  // todo refactor with merge interval time result
5385
    pQInfo->groupIndex = 0;
5386
    pQuery->rec.rows = 0;
5387
    copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5388
    clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
5389 5390 5391
  }
}

5392 5393 5394 5395
static void tableQueryImpl(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

H
Haojun Liao 已提交
5396
  if (queryHasRemainResForTableQuery(pRuntimeEnv)) {
5397

H
Haojun Liao 已提交
5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408 5409
    if (pQuery->fillType != TSDB_FILL_NONE) {
      /*
       * There are remain results that are not returned due to result interpolation
       * So, we do keep in this procedure instead of launching retrieve procedure for next results.
       */
      int32_t numOfFilled = 0;
      pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, &numOfFilled);

      if (pQuery->rec.rows > 0) {
        limitResults(pRuntimeEnv);
      }

H
Haojun Liao 已提交
5410
      qDebug("QInfo:%p current:%" PRId64 " returned, total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total);
5411
      return;
H
Haojun Liao 已提交
5412
    } else {
5413
      pQuery->rec.rows = 0;
5414
      pQInfo->groupIndex = 0;  // always start from 0
5415

5416
      if (pRuntimeEnv->windowResInfo.size > 0) {
5417
        copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5418
        clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
5419

5420
        if (pQuery->rec.rows > 0) {
5421
          qDebug("QInfo:%p %"PRId64" rows returned from group results, total:%"PRId64"", pQInfo, pQuery->rec.rows, pQuery->rec.total);
H
Haojun Liao 已提交
5422 5423 5424

          // there are not data remains
          if (pRuntimeEnv->windowResInfo.size <= 0) {
H
Haojun Liao 已提交
5425
            qDebug("QInfo:%p query over, %"PRId64" rows are returned", pQInfo, pQuery->rec.total);
H
Haojun Liao 已提交
5426 5427
          }

5428 5429 5430 5431 5432
          return;
        }
      }
    }
  }
5433

H
hjxilinx 已提交
5434
  // number of points returned during this query
5435
  pQuery->rec.rows = 0;
5436
  int64_t st = taosGetTimestampUs();
5437

5438
  assert(pQInfo->tableqinfoGroupInfo.numOfTables == 1);
H
Haojun Liao 已提交
5439
  SArray* g = GET_TABLEGROUP(pQInfo, 0);
5440
  STableQueryInfo* item = taosArrayGetP(g, 0);
5441

5442
  // group by normal column, sliding window query, interval query are handled by interval query processor
H
Haojun Liao 已提交
5443
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyNormalCol) {  // interval (down sampling operation)
5444
    tableIntervalProcess(pQInfo, item);
H
Haojun Liao 已提交
5445
  } else if (isFixedOutputQuery(pRuntimeEnv)) {
5446
    tableFixedOutputProcess(pQInfo, item);
5447 5448
  } else {  // diff/add/multiply/subtract/division
    assert(pQuery->checkBuffer == 1);
5449
    tableMultiOutputProcess(pQInfo, item);
5450
  }
5451

5452
  // record the total elapsed time
5453
  pRuntimeEnv->summary.elapsedTime += (taosGetTimestampUs() - st);
5454
  assert(pQInfo->tableqinfoGroupInfo.numOfTables == 1);
5455 5456
}

5457
static void stableQueryImpl(SQInfo *pQInfo) {
H
Haojun Liao 已提交
5458 5459
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *pQuery = pRuntimeEnv->pQuery;
5460
  pQuery->rec.rows = 0;
5461

5462
  int64_t st = taosGetTimestampUs();
5463

H
Haojun Liao 已提交
5464
  if (QUERY_IS_INTERVAL_QUERY(pQuery) ||
5465
      (isFixedOutputQuery(pRuntimeEnv) && (!isPointInterpoQuery(pQuery)) && (!pRuntimeEnv->groupbyNormalCol))) {
H
hjxilinx 已提交
5466
    multiTableQueryProcess(pQInfo);
5467
  } else {
5468
    assert((pQuery->checkBuffer == 1 && pQuery->interval.interval == 0) || isPointInterpoQuery(pQuery) ||
H
Haojun Liao 已提交
5469
            isFirstLastRowQuery(pQuery) || pRuntimeEnv->groupbyNormalCol);
5470

5471
    sequentialTableProcess(pQInfo);
5472
  }
5473

H
hjxilinx 已提交
5474
  // record the total elapsed time
5475
  pQInfo->runtimeEnv.summary.elapsedTime += (taosGetTimestampUs() - st);
H
hjxilinx 已提交
5476 5477
}

5478
static int32_t getColumnIndexInSource(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
5479
  int32_t j = 0;
5480

5481
  if (TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) {
H
Haojun Liao 已提交
5482
    if (pExprMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
5483
      return TSDB_TBNAME_COLUMN_INDEX;
H
Haojun Liao 已提交
5484 5485
    }

5486 5487 5488 5489
    while(j < pQueryMsg->numOfTags) {
      if (pExprMsg->colInfo.colId == pTagCols[j].colId) {
        return j;
      }
5490

5491 5492
      j += 1;
    }
5493

Y
TD-1230  
yihaoDeng 已提交
5494
  } else if (TSDB_COL_IS_UD_COL(pExprMsg->colInfo.flag)) {  // user specified column data
H
Haojun Liao 已提交
5495
    return TSDB_UD_COLUMN_INDEX;
5496 5497 5498 5499 5500
  } else {
    while (j < pQueryMsg->numOfCols) {
      if (pExprMsg->colInfo.colId == pQueryMsg->colList[j].colId) {
        return j;
      }
5501

5502
      j += 1;
5503 5504
    }
  }
5505
  assert(0);
5506
  return -1;
5507 5508
}

5509 5510 5511
bool validateExprColumnInfo(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
  int32_t j = getColumnIndexInSource(pQueryMsg, pExprMsg, pTagCols);
  return j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags;
5512 5513
}

5514
static bool validateQueryMsg(SQueryTableMsg *pQueryMsg) {
5515 5516
  if (pQueryMsg->interval.interval < 0) {
    qError("qmsg:%p illegal value of interval time %" PRId64, pQueryMsg, pQueryMsg->interval.interval);
5517
    return false;
5518 5519
  }

H
hjxilinx 已提交
5520
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
5521
    qError("qmsg:%p illegal value of numOfTables %d", pQueryMsg, pQueryMsg->numOfTables);
5522
    return false;
5523 5524
  }

H
hjxilinx 已提交
5525
  if (pQueryMsg->numOfGroupCols < 0) {
S
slguan 已提交
5526
    qError("qmsg:%p illegal value of numOfGroupbyCols %d", pQueryMsg, pQueryMsg->numOfGroupCols);
5527
    return false;
5528 5529
  }

5530 5531
  if (pQueryMsg->numOfOutput > TSDB_MAX_COLUMNS || pQueryMsg->numOfOutput <= 0) {
    qError("qmsg:%p illegal value of output columns %d", pQueryMsg, pQueryMsg->numOfOutput);
5532
    return false;
5533 5534
  }

5535 5536 5537 5538 5539 5540 5541 5542 5543 5544
  return true;
}

static bool validateQuerySourceCols(SQueryTableMsg *pQueryMsg, SSqlFuncMsg** pExprMsg) {
  int32_t numOfTotal = pQueryMsg->numOfCols + pQueryMsg->numOfTags;
  if (pQueryMsg->numOfCols < 0 || pQueryMsg->numOfTags < 0 || numOfTotal > TSDB_MAX_COLUMNS) {
    qError("qmsg:%p illegal value of numOfCols %d numOfTags:%d", pQueryMsg, pQueryMsg->numOfCols, pQueryMsg->numOfTags);
    return false;
  } else if (numOfTotal == 0) {
    for(int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
H
Haojun Liao 已提交
5545 5546 5547 5548 5549
      SSqlFuncMsg* pFuncMsg = pExprMsg[i];

      if ((pFuncMsg->functionId == TSDB_FUNC_TAGPRJ) ||
          (pFuncMsg->functionId == TSDB_FUNC_TID_TAG && pFuncMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) ||
          (pFuncMsg->functionId == TSDB_FUNC_COUNT && pFuncMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX)) {
5550
        continue;
5551
      }
5552

5553
      return false;
5554 5555
    }
  }
5556

5557
  return true;
5558 5559
}

5560
static char *createTableIdList(SQueryTableMsg *pQueryMsg, char *pMsg, SArray **pTableIdList) {
H
hjxilinx 已提交
5561
  assert(pQueryMsg->numOfTables > 0);
5562

weixin_48148422's avatar
weixin_48148422 已提交
5563
  *pTableIdList = taosArrayInit(pQueryMsg->numOfTables, sizeof(STableIdInfo));
5564

weixin_48148422's avatar
weixin_48148422 已提交
5565 5566
  for (int32_t j = 0; j < pQueryMsg->numOfTables; ++j) {
    STableIdInfo* pTableIdInfo = (STableIdInfo *)pMsg;
5567

5568
    pTableIdInfo->tid = htonl(pTableIdInfo->tid);
H
hjxilinx 已提交
5569 5570
    pTableIdInfo->uid = htobe64(pTableIdInfo->uid);
    pTableIdInfo->key = htobe64(pTableIdInfo->key);
5571

H
hjxilinx 已提交
5572 5573 5574
    taosArrayPush(*pTableIdList, pTableIdInfo);
    pMsg += sizeof(STableIdInfo);
  }
5575

H
hjxilinx 已提交
5576 5577
  return pMsg;
}
5578

5579
/**
H
hjxilinx 已提交
5580
 * pQueryMsg->head has been converted before this function is called.
5581
 *
H
hjxilinx 已提交
5582
 * @param pQueryMsg
5583 5584 5585 5586
 * @param pTableIdList
 * @param pExpr
 * @return
 */
5587
static int32_t convertQueryMsg(SQueryTableMsg *pQueryMsg, SArray **pTableIdList, SSqlFuncMsg ***pExpr,
weixin_48148422's avatar
weixin_48148422 已提交
5588
                               char **tagCond, char** tbnameCond, SColIndex **groupbyCols, SColumnInfo** tagCols) {
5589 5590
  int32_t code = TSDB_CODE_SUCCESS;

5591 5592 5593 5594
  pQueryMsg->numOfTables = htonl(pQueryMsg->numOfTables);

  pQueryMsg->window.skey = htobe64(pQueryMsg->window.skey);
  pQueryMsg->window.ekey = htobe64(pQueryMsg->window.ekey);
5595 5596 5597 5598 5599 5600
  pQueryMsg->interval.interval = htobe64(pQueryMsg->interval.interval);
  pQueryMsg->interval.sliding = htobe64(pQueryMsg->interval.sliding);
  pQueryMsg->interval.offset = htobe64(pQueryMsg->interval.offset);
  pQueryMsg->interval.intervalUnit = pQueryMsg->interval.intervalUnit;
  pQueryMsg->interval.slidingUnit = pQueryMsg->interval.slidingUnit;
  pQueryMsg->interval.offsetUnit = pQueryMsg->interval.offsetUnit;
5601 5602
  pQueryMsg->limit = htobe64(pQueryMsg->limit);
  pQueryMsg->offset = htobe64(pQueryMsg->offset);
H
hjxilinx 已提交
5603

5604 5605
  pQueryMsg->order = htons(pQueryMsg->order);
  pQueryMsg->orderColId = htons(pQueryMsg->orderColId);
H
Haojun Liao 已提交
5606
  pQueryMsg->queryType = htonl(pQueryMsg->queryType);
weixin_48148422's avatar
weixin_48148422 已提交
5607
  pQueryMsg->tagNameRelType = htons(pQueryMsg->tagNameRelType);
5608 5609

  pQueryMsg->numOfCols = htons(pQueryMsg->numOfCols);
5610
  pQueryMsg->numOfOutput = htons(pQueryMsg->numOfOutput);
H
hjxilinx 已提交
5611
  pQueryMsg->numOfGroupCols = htons(pQueryMsg->numOfGroupCols);
5612 5613 5614
  pQueryMsg->tagCondLen = htons(pQueryMsg->tagCondLen);
  pQueryMsg->tsOffset = htonl(pQueryMsg->tsOffset);
  pQueryMsg->tsLen = htonl(pQueryMsg->tsLen);
H
hjxilinx 已提交
5615
  pQueryMsg->tsNumOfBlocks = htonl(pQueryMsg->tsNumOfBlocks);
5616
  pQueryMsg->tsOrder = htonl(pQueryMsg->tsOrder);
5617
  pQueryMsg->numOfTags = htonl(pQueryMsg->numOfTags);
5618

5619
  // query msg safety check
5620
  if (!validateQueryMsg(pQueryMsg)) {
5621 5622
    code = TSDB_CODE_QRY_INVALID_MSG;
    goto _cleanup;
5623 5624
  }

H
hjxilinx 已提交
5625 5626
  char *pMsg = (char *)(pQueryMsg->colList) + sizeof(SColumnInfo) * pQueryMsg->numOfCols;
  for (int32_t col = 0; col < pQueryMsg->numOfCols; ++col) {
5627 5628
    SColumnInfo *pColInfo = &pQueryMsg->colList[col];

H
hjxilinx 已提交
5629
    pColInfo->colId = htons(pColInfo->colId);
5630
    pColInfo->type = htons(pColInfo->type);
H
hjxilinx 已提交
5631 5632
    pColInfo->bytes = htons(pColInfo->bytes);
    pColInfo->numOfFilters = htons(pColInfo->numOfFilters);
5633

H
hjxilinx 已提交
5634
    assert(pColInfo->type >= TSDB_DATA_TYPE_BOOL && pColInfo->type <= TSDB_DATA_TYPE_NCHAR);
5635

H
hjxilinx 已提交
5636
    int32_t numOfFilters = pColInfo->numOfFilters;
5637
    if (numOfFilters > 0) {
H
hjxilinx 已提交
5638
      pColInfo->filters = calloc(numOfFilters, sizeof(SColumnFilterInfo));
H
Haojun Liao 已提交
5639 5640 5641 5642
      if (pColInfo->filters == NULL) {
        code = TSDB_CODE_QRY_OUT_OF_MEMORY;
        goto _cleanup;
      }
5643 5644 5645
    }

    for (int32_t f = 0; f < numOfFilters; ++f) {
5646
      SColumnFilterInfo *pFilterMsg = (SColumnFilterInfo *)pMsg;
5647

5648 5649
      SColumnFilterInfo *pColFilter = &pColInfo->filters[f];
      pColFilter->filterstr = htons(pFilterMsg->filterstr);
5650 5651 5652

      pMsg += sizeof(SColumnFilterInfo);

5653 5654
      if (pColFilter->filterstr) {
        pColFilter->len = htobe64(pFilterMsg->len);
5655

5656
        pColFilter->pz = (int64_t)calloc(1, (size_t)(pColFilter->len + 1 * TSDB_NCHAR_SIZE)); // note: null-terminator
H
Haojun Liao 已提交
5657 5658 5659 5660 5661
        if (pColFilter->pz == 0) {
          code = TSDB_CODE_QRY_OUT_OF_MEMORY;
          goto _cleanup;
        }

5662
        memcpy((void *)pColFilter->pz, pMsg, (size_t)pColFilter->len);
5663
        pMsg += (pColFilter->len + 1);
5664
      } else {
5665 5666
        pColFilter->lowerBndi = htobe64(pFilterMsg->lowerBndi);
        pColFilter->upperBndi = htobe64(pFilterMsg->upperBndi);
5667 5668
      }

5669 5670
      pColFilter->lowerRelOptr = htons(pFilterMsg->lowerRelOptr);
      pColFilter->upperRelOptr = htons(pFilterMsg->upperRelOptr);
5671 5672 5673
    }
  }

5674
  *pExpr = calloc(pQueryMsg->numOfOutput, POINTER_BYTES);
H
Haojun Liao 已提交
5675 5676 5677 5678 5679
  if (*pExpr == NULL) {
    code = TSDB_CODE_QRY_OUT_OF_MEMORY;
    goto _cleanup;
  }

5680
  SSqlFuncMsg *pExprMsg = (SSqlFuncMsg *)pMsg;
5681

5682
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5683
    (*pExpr)[i] = pExprMsg;
5684

5685
    pExprMsg->colInfo.colIndex = htons(pExprMsg->colInfo.colIndex);
5686 5687 5688 5689
    pExprMsg->colInfo.colId = htons(pExprMsg->colInfo.colId);
    pExprMsg->colInfo.flag = htons(pExprMsg->colInfo.flag);
    pExprMsg->functionId = htons(pExprMsg->functionId);
    pExprMsg->numOfParams = htons(pExprMsg->numOfParams);
5690

5691
    pMsg += sizeof(SSqlFuncMsg);
5692 5693

    for (int32_t j = 0; j < pExprMsg->numOfParams; ++j) {
5694
      pExprMsg->arg[j].argType = htons(pExprMsg->arg[j].argType);
5695 5696 5697 5698
      pExprMsg->arg[j].argBytes = htons(pExprMsg->arg[j].argBytes);

      if (pExprMsg->arg[j].argType == TSDB_DATA_TYPE_BINARY) {
        pExprMsg->arg[j].argValue.pz = pMsg;
5699
        pMsg += pExprMsg->arg[j].argBytes;  // one more for the string terminated char.
5700 5701 5702 5703 5704
      } else {
        pExprMsg->arg[j].argValue.i64 = htobe64(pExprMsg->arg[j].argValue.i64);
      }
    }

H
Haojun Liao 已提交
5705 5706
    int16_t functionId = pExprMsg->functionId;
    if (functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ || functionId == TSDB_FUNC_TAG_DUMMY) {
Y
TD-1230  
yihaoDeng 已提交
5707
      if (!TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) {  // ignore the column  index check for arithmetic expression.
5708 5709
        code = TSDB_CODE_QRY_INVALID_MSG;
        goto _cleanup;
5710 5711
      }
    } else {
5712
//      if (!validateExprColumnInfo(pQueryMsg, pExprMsg)) {
5713
//        return TSDB_CODE_QRY_INVALID_MSG;
5714
//      }
5715 5716
    }

5717
    pExprMsg = (SSqlFuncMsg *)pMsg;
5718
  }
5719

5720
  if (!validateQuerySourceCols(pQueryMsg, *pExpr)) {
5721
    code = TSDB_CODE_QRY_INVALID_MSG;
dengyihao's avatar
dengyihao 已提交
5722
    goto _cleanup;
5723
  }
5724

H
hjxilinx 已提交
5725
  pMsg = createTableIdList(pQueryMsg, pMsg, pTableIdList);
5726

H
hjxilinx 已提交
5727
  if (pQueryMsg->numOfGroupCols > 0) {  // group by tag columns
5728
    *groupbyCols = malloc(pQueryMsg->numOfGroupCols * sizeof(SColIndex));
5729 5730 5731 5732
    if (*groupbyCols == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }
5733 5734 5735

    for (int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
      (*groupbyCols)[i].colId = *(int16_t *)pMsg;
5736
      pMsg += sizeof((*groupbyCols)[i].colId);
5737 5738

      (*groupbyCols)[i].colIndex = *(int16_t *)pMsg;
5739 5740
      pMsg += sizeof((*groupbyCols)[i].colIndex);

5741
      (*groupbyCols)[i].flag = *(int16_t *)pMsg;
5742 5743 5744 5745 5746
      pMsg += sizeof((*groupbyCols)[i].flag);

      memcpy((*groupbyCols)[i].name, pMsg, tListLen(groupbyCols[i]->name));
      pMsg += tListLen((*groupbyCols)[i].name);
    }
5747

H
hjxilinx 已提交
5748 5749
    pQueryMsg->orderByIdx = htons(pQueryMsg->orderByIdx);
    pQueryMsg->orderType = htons(pQueryMsg->orderType);
5750 5751
  }

5752 5753
  pQueryMsg->fillType = htons(pQueryMsg->fillType);
  if (pQueryMsg->fillType != TSDB_FILL_NONE) {
5754
    pQueryMsg->fillVal = (uint64_t)(pMsg);
5755 5756

    int64_t *v = (int64_t *)pMsg;
5757
    for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5758 5759
      v[i] = htobe64(v[i]);
    }
5760

5761
    pMsg += sizeof(int64_t) * pQueryMsg->numOfOutput;
5762
  }
5763

5764 5765
  if (pQueryMsg->numOfTags > 0) {
    (*tagCols) = calloc(1, sizeof(SColumnInfo) * pQueryMsg->numOfTags);
H
Haojun Liao 已提交
5766 5767 5768 5769 5770
    if (*tagCols == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }

5771 5772
    for (int32_t i = 0; i < pQueryMsg->numOfTags; ++i) {
      SColumnInfo* pTagCol = (SColumnInfo*) pMsg;
5773

5774 5775 5776 5777
      pTagCol->colId = htons(pTagCol->colId);
      pTagCol->bytes = htons(pTagCol->bytes);
      pTagCol->type  = htons(pTagCol->type);
      pTagCol->numOfFilters = 0;
5778

5779
      (*tagCols)[i] = *pTagCol;
5780
      pMsg += sizeof(SColumnInfo);
5781
    }
H
hjxilinx 已提交
5782
  }
5783

5784 5785 5786
  // the tag query condition expression string is located at the end of query msg
  if (pQueryMsg->tagCondLen > 0) {
    *tagCond = calloc(1, pQueryMsg->tagCondLen);
H
Haojun Liao 已提交
5787 5788 5789 5790 5791 5792

    if (*tagCond == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;

    }
5793 5794 5795
    memcpy(*tagCond, pMsg, pQueryMsg->tagCondLen);
    pMsg += pQueryMsg->tagCondLen;
  }
5796

weixin_48148422's avatar
weixin_48148422 已提交
5797
  if (*pMsg != 0) {
5798
    size_t len = strlen(pMsg) + 1;
5799

5800
    *tbnameCond = malloc(len);
5801 5802 5803 5804 5805
    if (*tbnameCond == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }

weixin_48148422's avatar
weixin_48148422 已提交
5806
    strcpy(*tbnameCond, pMsg);
5807
    pMsg += len;
weixin_48148422's avatar
weixin_48148422 已提交
5808
  }
5809

5810
  qDebug("qmsg:%p query %d tables, type:%d, qrange:%" PRId64 "-%" PRId64 ", numOfGroupbyTagCols:%d, order:%d, "
H
Haojun Liao 已提交
5811 5812
         "outputCols:%d, numOfCols:%d, interval:%" PRId64 ", fillType:%d, comptsLen:%d, compNumOfBlocks:%d, limit:%" PRId64 ", offset:%" PRId64,
         pQueryMsg, pQueryMsg->numOfTables, pQueryMsg->queryType, pQueryMsg->window.skey, pQueryMsg->window.ekey, pQueryMsg->numOfGroupCols,
5813
         pQueryMsg->order, pQueryMsg->numOfOutput, pQueryMsg->numOfCols, pQueryMsg->interval.interval,
H
Haojun Liao 已提交
5814
         pQueryMsg->fillType, pQueryMsg->tsLen, pQueryMsg->tsNumOfBlocks, pQueryMsg->limit, pQueryMsg->offset);
5815 5816

  return TSDB_CODE_SUCCESS;
dengyihao's avatar
dengyihao 已提交
5817 5818

_cleanup:
S
Shengliang Guan 已提交
5819
  taosTFree(*pExpr);
dengyihao's avatar
dengyihao 已提交
5820 5821
  taosArrayDestroy(*pTableIdList);
  *pTableIdList = NULL;
S
Shengliang Guan 已提交
5822 5823 5824 5825
  taosTFree(*tbnameCond);
  taosTFree(*groupbyCols);
  taosTFree(*tagCols);
  taosTFree(*tagCond);
5826 5827

  return code;
5828 5829
}

H
hjxilinx 已提交
5830
static int32_t buildAirthmeticExprFromMsg(SExprInfo *pArithExprInfo, SQueryTableMsg *pQueryMsg) {
5831
  qDebug("qmsg:%p create arithmetic expr from binary string: %s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz);
weixin_48148422's avatar
weixin_48148422 已提交
5832 5833

  tExprNode* pExprNode = NULL;
dengyihao's avatar
dengyihao 已提交
5834
  TRY(TSDB_MAX_TAG_CONDITIONS) {
weixin_48148422's avatar
weixin_48148422 已提交
5835 5836 5837
    pExprNode = exprTreeFromBinary(pArithExprInfo->base.arg[0].argValue.pz, pArithExprInfo->base.arg[0].argBytes);
  } CATCH( code ) {
    CLEANUP_EXECUTE();
dengyihao's avatar
TD-816  
dengyihao 已提交
5838
    qError("qmsg:%p failed to create arithmetic expression string from:%s, reason: %s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz, tstrerror(code));
weixin_48148422's avatar
weixin_48148422 已提交
5839 5840 5841
    return code;
  } END_TRY

H
hjxilinx 已提交
5842
  if (pExprNode == NULL) {
5843
    qError("qmsg:%p failed to create arithmetic expression string from:%s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz);
5844
    return TSDB_CODE_QRY_APP_ERROR;
5845
  }
5846

5847
  pArithExprInfo->pExpr = pExprNode;
5848 5849 5850
  return TSDB_CODE_SUCCESS;
}

H
Haojun Liao 已提交
5851
static int32_t createQFunctionExprFromMsg(SQueryTableMsg *pQueryMsg, SExprInfo **pExprInfo, SSqlFuncMsg **pExprMsg,
5852 5853
    SColumnInfo* pTagCols) {
  *pExprInfo = NULL;
H
hjxilinx 已提交
5854
  int32_t code = TSDB_CODE_SUCCESS;
5855

H
Haojun Liao 已提交
5856
  SExprInfo *pExprs = (SExprInfo *)calloc(pQueryMsg->numOfOutput, sizeof(SExprInfo));
5857
  if (pExprs == NULL) {
5858
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
5859 5860 5861 5862 5863
  }

  bool    isSuperTable = QUERY_IS_STABLE_QUERY(pQueryMsg->queryType);
  int16_t tagLen = 0;

5864
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5865
    pExprs[i].base = *pExprMsg[i];
5866
    pExprs[i].bytes = 0;
5867 5868 5869 5870

    int16_t type = 0;
    int16_t bytes = 0;

5871
    // parse the arithmetic expression
5872
    if (pExprs[i].base.functionId == TSDB_FUNC_ARITHM) {
5873
      code = buildAirthmeticExprFromMsg(&pExprs[i], pQueryMsg);
5874

5875
      if (code != TSDB_CODE_SUCCESS) {
S
Shengliang Guan 已提交
5876
        taosTFree(pExprs);
5877
        return code;
5878 5879
      }

5880
      type  = TSDB_DATA_TYPE_DOUBLE;
5881
      bytes = tDataTypeDesc[type].nSize;
H
Haojun Liao 已提交
5882
    } else if (pExprs[i].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX && pExprs[i].base.functionId == TSDB_FUNC_TAGPRJ) {  // parse the normal column
H
Haojun Liao 已提交
5883
      SSchema s = tGetTableNameColumnSchema();
H
Haojun Liao 已提交
5884
      type = s.type;
H
Haojun Liao 已提交
5885
      bytes = s.bytes;
5886 5887
    } else if (pExprs[i].base.colInfo.colId <= TSDB_UD_COLUMN_INDEX) {
      // it is a user-defined constant value column
H
Haojun Liao 已提交
5888 5889
      assert(pExprs[i].base.functionId == TSDB_FUNC_PRJ);

5890 5891
      type = pExprs[i].base.arg[1].argType;
      bytes = pExprs[i].base.arg[1].argBytes;
H
Haojun Liao 已提交
5892 5893 5894 5895 5896

      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        bytes += VARSTR_HEADER_SIZE;
      }
    } else {
5897
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
dengyihao's avatar
dengyihao 已提交
5898
      assert(j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags);
H
Haojun Liao 已提交
5899

dengyihao's avatar
dengyihao 已提交
5900
      if (pExprs[i].base.colInfo.colId != TSDB_TBNAME_COLUMN_INDEX && j >= 0) {
H
Haojun Liao 已提交
5901 5902 5903 5904
        SColumnInfo* pCol = (TSDB_COL_IS_TAG(pExprs[i].base.colInfo.flag))? &pTagCols[j]:&pQueryMsg->colList[j];
        type = pCol->type;
        bytes = pCol->bytes;
      } else {
H
Haojun Liao 已提交
5905
        SSchema s = tGetTableNameColumnSchema();
H
hjxilinx 已提交
5906

H
Haojun Liao 已提交
5907 5908 5909
        type  = s.type;
        bytes = s.bytes;
      }
5910 5911
    }

S
TD-1057  
Shengliang Guan 已提交
5912
    int32_t param = (int32_t)pExprs[i].base.arg[0].argValue.i64;
5913
    if (getResultDataInfo(type, bytes, pExprs[i].base.functionId, param, &pExprs[i].type, &pExprs[i].bytes,
5914
                          &pExprs[i].interBytes, 0, isSuperTable) != TSDB_CODE_SUCCESS) {
S
Shengliang Guan 已提交
5915
      taosTFree(pExprs);
5916
      return TSDB_CODE_QRY_INVALID_MSG;
5917 5918
    }

5919
    if (pExprs[i].base.functionId == TSDB_FUNC_TAG_DUMMY || pExprs[i].base.functionId == TSDB_FUNC_TS_DUMMY) {
5920
      tagLen += pExprs[i].bytes;
5921
    }
5922
    assert(isValidDataType(pExprs[i].type));
5923 5924 5925
  }

  // TODO refactor
5926
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5927 5928
    pExprs[i].base = *pExprMsg[i];
    int16_t functId = pExprs[i].base.functionId;
5929

5930
    if (functId == TSDB_FUNC_TOP || functId == TSDB_FUNC_BOTTOM) {
5931
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
5932 5933 5934 5935 5936 5937 5938 5939 5940
      if (j < 0 || j >= pQueryMsg->numOfCols) {
        assert(0);
      } else {
        SColumnInfo *pCol = &pQueryMsg->colList[j];
        int32_t ret =
            getResultDataInfo(pCol->type, pCol->bytes, functId, (int32_t)pExprs[i].base.arg[0].argValue.i64,
                              &pExprs[i].type, &pExprs[i].bytes, &pExprs[i].interBytes, tagLen, isSuperTable);
        assert(ret == TSDB_CODE_SUCCESS);
      }
5941 5942 5943
    }
  }

5944
  *pExprInfo = pExprs;
5945 5946 5947
  return TSDB_CODE_SUCCESS;
}

5948
static SSqlGroupbyExpr *createGroupbyExprFromMsg(SQueryTableMsg *pQueryMsg, SColIndex *pColIndex, int32_t *code) {
5949 5950 5951 5952 5953
  if (pQueryMsg->numOfGroupCols == 0) {
    return NULL;
  }

  // using group by tag columns
5954
  SSqlGroupbyExpr *pGroupbyExpr = (SSqlGroupbyExpr *)calloc(1, sizeof(SSqlGroupbyExpr));
5955
  if (pGroupbyExpr == NULL) {
5956
    *code = TSDB_CODE_QRY_OUT_OF_MEMORY;
5957 5958 5959 5960 5961 5962 5963
    return NULL;
  }

  pGroupbyExpr->numOfGroupCols = pQueryMsg->numOfGroupCols;
  pGroupbyExpr->orderType = pQueryMsg->orderType;
  pGroupbyExpr->orderIndex = pQueryMsg->orderByIdx;

5964 5965 5966 5967
  pGroupbyExpr->columnInfo = taosArrayInit(pQueryMsg->numOfGroupCols, sizeof(SColIndex));
  for(int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
    taosArrayPush(pGroupbyExpr->columnInfo, &pColIndex[i]);
  }
5968

5969 5970 5971
  return pGroupbyExpr;
}

5972
static int32_t createFilterInfo(void *pQInfo, SQuery *pQuery) {
5973
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
5974
    if (pQuery->colList[i].numOfFilters > 0) {
5975 5976 5977 5978 5979 5980 5981 5982 5983
      pQuery->numOfFilterCols++;
    }
  }

  if (pQuery->numOfFilterCols == 0) {
    return TSDB_CODE_SUCCESS;
  }

  pQuery->pFilterInfo = calloc(1, sizeof(SSingleColumnFilterInfo) * pQuery->numOfFilterCols);
H
Haojun Liao 已提交
5984 5985 5986
  if (pQuery->pFilterInfo == NULL) {
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
  }
5987 5988

  for (int32_t i = 0, j = 0; i < pQuery->numOfCols; ++i) {
5989
    if (pQuery->colList[i].numOfFilters > 0) {
5990 5991
      SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[j];

B
Bomin Zhang 已提交
5992
      memcpy(&pFilterInfo->info, &pQuery->colList[i], sizeof(SColumnInfo));
5993
      pFilterInfo->info = pQuery->colList[i];
5994

5995
      pFilterInfo->numOfFilters = pQuery->colList[i].numOfFilters;
5996
      pFilterInfo->pFilters = calloc(pFilterInfo->numOfFilters, sizeof(SColumnFilterElem));
H
Haojun Liao 已提交
5997 5998 5999
      if (pFilterInfo->pFilters == NULL) {
        return TSDB_CODE_QRY_OUT_OF_MEMORY;
      }
6000 6001 6002

      for (int32_t f = 0; f < pFilterInfo->numOfFilters; ++f) {
        SColumnFilterElem *pSingleColFilter = &pFilterInfo->pFilters[f];
6003
        pSingleColFilter->filterInfo = pQuery->colList[i].filters[f];
6004 6005 6006 6007 6008

        int32_t lower = pSingleColFilter->filterInfo.lowerRelOptr;
        int32_t upper = pSingleColFilter->filterInfo.upperRelOptr;

        if (lower == TSDB_RELATION_INVALID && upper == TSDB_RELATION_INVALID) {
S
slguan 已提交
6009
          qError("QInfo:%p invalid filter info", pQInfo);
6010
          return TSDB_CODE_QRY_INVALID_MSG;
6011 6012
        }

6013 6014
        int16_t type  = pQuery->colList[i].type;
        int16_t bytes = pQuery->colList[i].bytes;
6015

6016 6017 6018
        // todo refactor
        __filter_func_t *rangeFilterArray = getRangeFilterFuncArray(type);
        __filter_func_t *filterArray = getValueFilterFuncArray(type);
6019 6020

        if (rangeFilterArray == NULL && filterArray == NULL) {
S
slguan 已提交
6021
          qError("QInfo:%p failed to get filter function, invalid data type:%d", pQInfo, type);
6022
          return TSDB_CODE_QRY_INVALID_MSG;
6023 6024
        }

6025
        if ((lower == TSDB_RELATION_GREATER_EQUAL || lower == TSDB_RELATION_GREATER) &&
6026
            (upper == TSDB_RELATION_LESS_EQUAL || upper == TSDB_RELATION_LESS)) {
dengyihao's avatar
dengyihao 已提交
6027
          assert(rangeFilterArray != NULL);
6028
          if (lower == TSDB_RELATION_GREATER_EQUAL) {
6029 6030 6031 6032 6033 6034 6035 6036 6037 6038 6039 6040 6041
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[4];
            } else {
              pSingleColFilter->fp = rangeFilterArray[2];
            }
          } else {
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[3];
            } else {
              pSingleColFilter->fp = rangeFilterArray[1];
            }
          }
        } else {  // set callback filter function
dengyihao's avatar
dengyihao 已提交
6042
          assert(filterArray != NULL);
6043 6044 6045 6046
          if (lower != TSDB_RELATION_INVALID) {
            pSingleColFilter->fp = filterArray[lower];

            if (upper != TSDB_RELATION_INVALID) {
dengyihao's avatar
dengyihao 已提交
6047
              qError("pQInfo:%p failed to get filter function, invalid filter condition: %d", pQInfo, type);
6048
              return TSDB_CODE_QRY_INVALID_MSG;
6049 6050 6051 6052 6053 6054 6055 6056 6057 6058 6059 6060 6061 6062 6063 6064
            }
          } else {
            pSingleColFilter->fp = filterArray[upper];
          }
        }
        assert(pSingleColFilter->fp != NULL);
        pSingleColFilter->bytes = bytes;
      }

      j++;
    }
  }

  return TSDB_CODE_SUCCESS;
}

6065
static void doUpdateExprColumnIndex(SQuery *pQuery) {
6066
  assert(pQuery->pSelectExpr != NULL && pQuery != NULL);
6067

6068
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
6069
    SSqlFuncMsg *pSqlExprMsg = &pQuery->pSelectExpr[k].base;
6070
    if (pSqlExprMsg->functionId == TSDB_FUNC_ARITHM) {
6071 6072
      continue;
    }
6073

6074
    // todo opt performance
H
Haojun Liao 已提交
6075
    SColIndex *pColIndex = &pSqlExprMsg->colInfo;
H
Haojun Liao 已提交
6076
    if (TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
6077 6078
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfCols; ++f) {
H
Haojun Liao 已提交
6079 6080
        if (pColIndex->colId == pQuery->colList[f].colId) {
          pColIndex->colIndex = f;
6081 6082 6083
          break;
        }
      }
H
Haojun Liao 已提交
6084 6085

      assert(f < pQuery->numOfCols);
6086 6087
    } else if (pColIndex->colId <= TSDB_UD_COLUMN_INDEX) {
      // do nothing for user-defined constant value result columns
6088
    } else {
6089 6090
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfTags; ++f) {
H
Haojun Liao 已提交
6091 6092
        if (pColIndex->colId == pQuery->tagColList[f].colId) {
          pColIndex->colIndex = f;
6093 6094
          break;
        }
6095
      }
6096

6097
      assert(f < pQuery->numOfTags || pColIndex->colId == TSDB_TBNAME_COLUMN_INDEX);
6098 6099 6100 6101
    }
  }
}

dengyihao's avatar
dengyihao 已提交
6102 6103
static void freeQInfo(SQInfo *pQInfo);

H
Haojun Liao 已提交
6104 6105 6106
static void calResultBufSize(SQuery* pQuery) {
  const int32_t RESULT_MSG_MIN_SIZE  = 1024 * (1024 + 512);  // bytes
  const int32_t RESULT_MSG_MIN_ROWS  = 8192;
S
TD-1057  
Shengliang Guan 已提交
6107
  const float RESULT_THRESHOLD_RATIO = 0.85f;
H
Haojun Liao 已提交
6108

6109 6110 6111 6112 6113
  if (isProjQuery(pQuery)) {
    int32_t numOfRes = RESULT_MSG_MIN_SIZE / pQuery->rowSize;
    if (numOfRes < RESULT_MSG_MIN_ROWS) {
      numOfRes = RESULT_MSG_MIN_ROWS;
    }
H
Haojun Liao 已提交
6114

6115
    pQuery->rec.capacity  = numOfRes;
S
TD-1057  
Shengliang Guan 已提交
6116
    pQuery->rec.threshold = (int32_t)(numOfRes * RESULT_THRESHOLD_RATIO);
6117 6118
  } else {  // in case of non-prj query, a smaller output buffer will be used.
    pQuery->rec.capacity = 4096;
S
TD-1057  
Shengliang Guan 已提交
6119
    pQuery->rec.threshold = (int32_t)(pQuery->rec.capacity * RESULT_THRESHOLD_RATIO);
6120
  }
H
Haojun Liao 已提交
6121 6122
}

6123 6124
static SQInfo *createQInfoImpl(SQueryTableMsg *pQueryMsg, SSqlGroupbyExpr *pGroupbyExpr, SExprInfo *pExprs,
                               STableGroupInfo *pTableGroupInfo, SColumnInfo* pTagCols, bool stableQuery) {
B
Bomin Zhang 已提交
6125 6126 6127
  int16_t numOfCols = pQueryMsg->numOfCols;
  int16_t numOfOutput = pQueryMsg->numOfOutput;

6128 6129
  SQInfo *pQInfo = (SQInfo *)calloc(1, sizeof(SQInfo));
  if (pQInfo == NULL) {
B
Bomin Zhang 已提交
6130
    goto _cleanup_qinfo;
6131
  }
6132

B
Bomin Zhang 已提交
6133 6134 6135
  // to make sure third party won't overwrite this structure
  pQInfo->signature = pQInfo;
  pQInfo->tableGroupInfo = *pTableGroupInfo;
6136 6137

  SQuery *pQuery = calloc(1, sizeof(SQuery));
B
Bomin Zhang 已提交
6138 6139 6140
  if (pQuery == NULL) {
    goto _cleanup_query;
  }
H
Haojun Liao 已提交
6141

6142 6143
  pQInfo->runtimeEnv.pQuery = pQuery;

6144
  pQuery->numOfCols       = numOfCols;
H
hjxilinx 已提交
6145
  pQuery->numOfOutput     = numOfOutput;
6146 6147 6148
  pQuery->limit.limit     = pQueryMsg->limit;
  pQuery->limit.offset    = pQueryMsg->offset;
  pQuery->order.order     = pQueryMsg->order;
6149
  pQuery->order.orderColId = pQueryMsg->orderColId;
6150 6151
  pQuery->pSelectExpr     = pExprs;
  pQuery->pGroupbyExpr    = pGroupbyExpr;
6152
  memcpy(&pQuery->interval, &pQueryMsg->interval, sizeof(pQuery->interval));
6153
  pQuery->fillType        = pQueryMsg->fillType;
6154
  pQuery->numOfTags       = pQueryMsg->numOfTags;
B
Bomin Zhang 已提交
6155
  pQuery->tagColList      = pTagCols;
H
Haojun Liao 已提交
6156

6157
  pQuery->colList = calloc(numOfCols, sizeof(SSingleColumnFilterInfo));
6158
  if (pQuery->colList == NULL) {
6159
    goto _cleanup;
6160
  }
6161

H
hjxilinx 已提交
6162
  for (int16_t i = 0; i < numOfCols; ++i) {
6163
    pQuery->colList[i] = pQueryMsg->colList[i];
6164
    pQuery->colList[i].filters = tscFilterInfoClone(pQueryMsg->colList[i].filters, pQuery->colList[i].numOfFilters);
H
hjxilinx 已提交
6165
  }
6166

6167
  // calculate the result row size
6168 6169 6170
  for (int16_t col = 0; col < numOfOutput; ++col) {
    assert(pExprs[col].bytes > 0);
    pQuery->rowSize += pExprs[col].bytes;
6171
  }
6172

6173
  doUpdateExprColumnIndex(pQuery);
6174

6175
  int32_t ret = createFilterInfo(pQInfo, pQuery);
6176
  if (ret != TSDB_CODE_SUCCESS) {
6177
    goto _cleanup;
6178 6179 6180
  }

  // prepare the result buffer
6181
  pQuery->sdata = (tFilePage **)calloc(pQuery->numOfOutput, POINTER_BYTES);
6182
  if (pQuery->sdata == NULL) {
6183
    goto _cleanup;
6184 6185
  }

H
Haojun Liao 已提交
6186
  calResultBufSize(pQuery);
6187

6188
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
6189
    assert(pExprs[col].interBytes >= pExprs[col].bytes);
6190 6191

    // allocate additional memory for interResults that are usually larger then final results
6192
    size_t size = (size_t)((pQuery->rec.capacity + 1) * pExprs[col].bytes + pExprs[col].interBytes + sizeof(tFilePage));
6193
    pQuery->sdata[col] = (tFilePage *)calloc(1, size);
6194
    if (pQuery->sdata[col] == NULL) {
6195
      goto _cleanup;
6196 6197 6198
    }
  }

6199
  if (pQuery->fillType != TSDB_FILL_NONE) {
6200 6201
    pQuery->fillVal = malloc(sizeof(int64_t) * pQuery->numOfOutput);
    if (pQuery->fillVal == NULL) {
6202
      goto _cleanup;
6203 6204 6205
    }

    // the first column is the timestamp
6206
    memcpy(pQuery->fillVal, (char *)pQueryMsg->fillVal, pQuery->numOfOutput * sizeof(int64_t));
6207 6208
  }

dengyihao's avatar
dengyihao 已提交
6209 6210 6211 6212 6213 6214
  size_t numOfGroups = 0;
  if (pTableGroupInfo->pGroupList != NULL) {
    numOfGroups = taosArrayGetSize(pTableGroupInfo->pGroupList);

    pQInfo->tableqinfoGroupInfo.pGroupList = taosArrayInit(numOfGroups, POINTER_BYTES);
    pQInfo->tableqinfoGroupInfo.numOfTables = pTableGroupInfo->numOfTables;
H
Haojun Liao 已提交
6215
    pQInfo->tableqinfoGroupInfo.map = taosHashInit(pTableGroupInfo->numOfTables,
H
Haojun Liao 已提交
6216
                                                   taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, false);
H
Haojun Liao 已提交
6217
  }
6218

weixin_48148422's avatar
weixin_48148422 已提交
6219
  int tableIndex = 0;
6220

H
Haojun Liao 已提交
6221
  pQInfo->runtimeEnv.interBufSize = getOutputInterResultBufSize(pQuery);
H
Haojun Liao 已提交
6222
  pQInfo->pBuf = calloc(pTableGroupInfo->numOfTables, sizeof(STableQueryInfo));
H
Haojun Liao 已提交
6223 6224 6225 6226
  if (pQInfo->pBuf == NULL) {
    goto _cleanup;
  }

H
Haojun Liao 已提交
6227
  // NOTE: pTableCheckInfo need to update the query time range and the lastKey info
6228 6229
  pQInfo->arrTableIdInfo = taosArrayInit(tableIndex, sizeof(STableIdInfo));
  pQInfo->dataReady = QUERY_RESULT_NOT_READY;
H
Haojun Liao 已提交
6230
  pQInfo->rspContext = NULL;
6231
  pthread_mutex_init(&pQInfo->lock, NULL);
H
Haojun Liao 已提交
6232
  tsem_init(&pQInfo->ready, 0, 0);
6233 6234 6235 6236 6237 6238

  pQuery->pos = -1;
  pQuery->window = pQueryMsg->window;
  changeExecuteScanOrder(pQInfo, pQueryMsg, stableQuery);

  STimeWindow window = pQuery->window;
H
Haojun Liao 已提交
6239

H
Haojun Liao 已提交
6240 6241
  int32_t index = 0;

H
hjxilinx 已提交
6242
  for(int32_t i = 0; i < numOfGroups; ++i) {
6243
    SArray* pa = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, i);
6244

H
Haojun Liao 已提交
6245
    size_t s = taosArrayGetSize(pa);
6246
    SArray* p1 = taosArrayInit(s, POINTER_BYTES);
B
Bomin Zhang 已提交
6247 6248 6249
    if (p1 == NULL) {
      goto _cleanup;
    }
H
Haojun Liao 已提交
6250

Y
yihaoDeng 已提交
6251
    taosArrayPush(pQInfo->tableqinfoGroupInfo.pGroupList, &p1);
6252

H
hjxilinx 已提交
6253
    for(int32_t j = 0; j < s; ++j) {
H
Haojun Liao 已提交
6254
      STableKeyInfo* info = taosArrayGet(pa, j);
6255

S
TD-1057  
Shengliang Guan 已提交
6256
      void* buf = (char*)pQInfo->pBuf + index * sizeof(STableQueryInfo);
H
Haojun Liao 已提交
6257

6258
      window.skey = info->lastKey;
H
Haojun Liao 已提交
6259
      STableQueryInfo* item = createTableQueryInfo(&pQInfo->runtimeEnv, info->pTable, window, buf);
B
Bomin Zhang 已提交
6260 6261 6262
      if (item == NULL) {
        goto _cleanup;
      }
H
Haojun Liao 已提交
6263

6264
      item->groupIndex = i;
H
hjxilinx 已提交
6265
      taosArrayPush(p1, &item);
6266 6267

      STableId* id = TSDB_TABLEID(info->pTable);
H
Haojun Liao 已提交
6268 6269
      taosHashPut(pQInfo->tableqinfoGroupInfo.map, &id->tid, sizeof(id->tid), &item, POINTER_BYTES);
      index += 1;
H
hjxilinx 已提交
6270 6271
    }
  }
6272

6273
  colIdCheck(pQuery);
6274

6275
  qDebug("qmsg:%p QInfo:%p created", pQueryMsg, pQInfo);
6276 6277
  return pQInfo;

B
Bomin Zhang 已提交
6278
_cleanup_qinfo:
H
Haojun Liao 已提交
6279
  tsdbDestroyTableGroup(pTableGroupInfo);
B
Bomin Zhang 已提交
6280 6281

_cleanup_query:
6282 6283 6284 6285
  if (pGroupbyExpr != NULL) {
    taosArrayDestroy(pGroupbyExpr->columnInfo);
    free(pGroupbyExpr);
  }
H
Haojun Liao 已提交
6286

S
Shengliang Guan 已提交
6287
  taosTFree(pTagCols);
B
Bomin Zhang 已提交
6288 6289 6290 6291 6292 6293
  for (int32_t i = 0; i < numOfOutput; ++i) {
    SExprInfo* pExprInfo = &pExprs[i];
    if (pExprInfo->pExpr != NULL) {
      tExprTreeDestroy(&pExprInfo->pExpr, NULL);
    }
  }
H
Haojun Liao 已提交
6294

S
Shengliang Guan 已提交
6295
  taosTFree(pExprs);
B
Bomin Zhang 已提交
6296

6297
_cleanup:
dengyihao's avatar
dengyihao 已提交
6298
  freeQInfo(pQInfo);
6299 6300 6301
  return NULL;
}

H
hjxilinx 已提交
6302
static bool isValidQInfo(void *param) {
H
hjxilinx 已提交
6303 6304 6305 6306
  SQInfo *pQInfo = (SQInfo *)param;
  if (pQInfo == NULL) {
    return false;
  }
6307

H
hjxilinx 已提交
6308 6309 6310 6311
  /*
   * pQInfo->signature may be changed by another thread, so we assign value of signature
   * into local variable, then compare by using local variable
   */
6312
  uint64_t sig = (uint64_t)pQInfo->signature;
H
hjxilinx 已提交
6313 6314 6315
  return (sig == (uint64_t)pQInfo);
}

6316
static int32_t initQInfo(SQueryTableMsg *pQueryMsg, void *tsdb, int32_t vgId, SQInfo *pQInfo, bool isSTable) {
H
hjxilinx 已提交
6317
  int32_t code = TSDB_CODE_SUCCESS;
6318
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
6319

H
hjxilinx 已提交
6320 6321
  STSBuf *pTSBuf = NULL;
  if (pQueryMsg->tsLen > 0) {  // open new file to save the result
H
Haojun Liao 已提交
6322
    char *tsBlock = (char *) pQueryMsg + pQueryMsg->tsOffset;
H
hjxilinx 已提交
6323
    pTSBuf = tsBufCreateFromCompBlocks(tsBlock, pQueryMsg->tsNumOfBlocks, pQueryMsg->tsLen, pQueryMsg->tsOrder);
6324

H
hjxilinx 已提交
6325
    tsBufResetPos(pTSBuf);
dengyihao's avatar
dengyihao 已提交
6326 6327
    bool ret = tsBufNextPos(pTSBuf);
    UNUSED(ret);
H
hjxilinx 已提交
6328
  }
Y
TD-1665  
yihaoDeng 已提交
6329 6330
  
  pQuery->precision = tsdbGetCfg(tsdb)->precision;
6331

6332 6333
  if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey > pQuery->window.ekey)) ||
      (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey > pQuery->window.skey))) {
6334
    qDebug("QInfo:%p no result in time range %" PRId64 "-%" PRId64 ", order %d", pQInfo, pQuery->window.skey,
6335
           pQuery->window.ekey, pQuery->order.order);
6336
    setQueryStatus(pQuery, QUERY_COMPLETED);
B
Bomin Zhang 已提交
6337
    pQInfo->tableqinfoGroupInfo.numOfTables = 0;
6338 6339
    return TSDB_CODE_SUCCESS;
  }
6340

6341
  if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
6342
    qDebug("QInfo:%p no table qualified for tag filter, abort query", pQInfo);
6343 6344 6345
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return TSDB_CODE_SUCCESS;
  }
H
hjxilinx 已提交
6346 6347

  // filter the qualified
6348
  if ((code = doInitQInfo(pQInfo, pTSBuf, tsdb, vgId, isSTable)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6349 6350
    goto _error;
  }
6351

H
hjxilinx 已提交
6352 6353 6354 6355
  return code;

_error:
  // table query ref will be decrease during error handling
6356
  freeQInfo(pQInfo);
H
hjxilinx 已提交
6357 6358 6359
  return code;
}

B
Bomin Zhang 已提交
6360
static void freeColumnFilterInfo(SColumnFilterInfo* pFilter, int32_t numOfFilters) {
H
Haojun Liao 已提交
6361
    if (pFilter == NULL || numOfFilters == 0) {
B
Bomin Zhang 已提交
6362 6363
      return;
    }
H
Haojun Liao 已提交
6364

B
Bomin Zhang 已提交
6365 6366 6367 6368 6369
    for (int32_t i = 0; i < numOfFilters; i++) {
      if (pFilter[i].filterstr) {
        free((void*)(pFilter[i].pz));
      }
    }
H
Haojun Liao 已提交
6370

B
Bomin Zhang 已提交
6371 6372 6373
    free(pFilter);
}

H
Haojun Liao 已提交
6374 6375
static void doDestroyTableQueryInfo(STableGroupInfo* pTableqinfoGroupInfo) {
  if (pTableqinfoGroupInfo->pGroupList != NULL) {
H
Haojun Liao 已提交
6376
    int32_t numOfGroups = (int32_t) taosArrayGetSize(pTableqinfoGroupInfo->pGroupList);
H
Haojun Liao 已提交
6377 6378 6379 6380 6381 6382 6383 6384 6385 6386 6387 6388 6389 6390 6391 6392 6393 6394 6395 6396 6397
    for (int32_t i = 0; i < numOfGroups; ++i) {
      SArray *p = taosArrayGetP(pTableqinfoGroupInfo->pGroupList, i);

      size_t num = taosArrayGetSize(p);
      for(int32_t j = 0; j < num; ++j) {
        STableQueryInfo* item = taosArrayGetP(p, j);
        destroyTableQueryInfoImpl(item);
      }

      taosArrayDestroy(p);
    }
  }

  taosArrayDestroy(pTableqinfoGroupInfo->pGroupList);
  taosHashCleanup(pTableqinfoGroupInfo->map);

  pTableqinfoGroupInfo->pGroupList = NULL;
  pTableqinfoGroupInfo->map = NULL;
  pTableqinfoGroupInfo->numOfTables = 0;
}

H
hjxilinx 已提交
6398 6399 6400 6401
static void freeQInfo(SQInfo *pQInfo) {
  if (!isValidQInfo(pQInfo)) {
    return;
  }
6402

6403
  qDebug("QInfo:%p start to free QInfo", pQInfo);
6404

6405
  teardownQueryRuntimeEnv(&pQInfo->runtimeEnv);
6406

H
Haojun Liao 已提交
6407 6408 6409 6410 6411 6412 6413
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  if (pQuery != NULL) {
    if (pQuery->sdata != NULL) {
      for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
        taosTFree(pQuery->sdata[col]);
      }
      taosTFree(pQuery->sdata);
H
hjxilinx 已提交
6414
    }
6415

H
Haojun Liao 已提交
6416 6417 6418
    if (pQuery->fillVal != NULL) {
      taosTFree(pQuery->fillVal);
    }
6419

H
Haojun Liao 已提交
6420 6421 6422 6423
    for (int32_t i = 0; i < pQuery->numOfFilterCols; ++i) {
      SSingleColumnFilterInfo *pColFilter = &pQuery->pFilterInfo[i];
      if (pColFilter->numOfFilters > 0) {
        taosTFree(pColFilter->pFilters);
H
hjxilinx 已提交
6424
      }
H
hjxilinx 已提交
6425
    }
6426

H
Haojun Liao 已提交
6427 6428 6429 6430 6431 6432 6433 6434 6435 6436 6437 6438 6439 6440 6441 6442
    if (pQuery->pSelectExpr != NULL) {
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        SExprInfo *pExprInfo = &pQuery->pSelectExpr[i];

        if (pExprInfo->pExpr != NULL) {
          tExprTreeDestroy(&pExprInfo->pExpr, NULL);
        }
      }

      taosTFree(pQuery->pSelectExpr);
    }

    if (pQuery->pGroupbyExpr != NULL) {
      taosArrayDestroy(pQuery->pGroupbyExpr->columnInfo);
      taosTFree(pQuery->pGroupbyExpr);
    }
6443

H
Haojun Liao 已提交
6444 6445 6446 6447 6448 6449 6450 6451 6452 6453 6454 6455
    taosTFree(pQuery->tagColList);
    taosTFree(pQuery->pFilterInfo);

    if (pQuery->colList != NULL) {
      for (int32_t i = 0; i < pQuery->numOfCols; i++) {
        SColumnInfo *column = pQuery->colList + i;
        freeColumnFilterInfo(column->filters, column->numOfFilters);
      }
      taosTFree(pQuery->colList);
    }

    taosTFree(pQuery);
H
hjxilinx 已提交
6456
  }
6457

H
Haojun Liao 已提交
6458
  doDestroyTableQueryInfo(&pQInfo->tableqinfoGroupInfo);
H
Haojun Liao 已提交
6459

H
Haojun Liao 已提交
6460
  taosTFree(pQInfo->pBuf);
H
Haojun Liao 已提交
6461
  tsdbDestroyTableGroup(&pQInfo->tableGroupInfo);
weixin_48148422's avatar
weixin_48148422 已提交
6462
  taosArrayDestroy(pQInfo->arrTableIdInfo);
6463

6464
  pQInfo->signature = 0;
6465

6466
  qDebug("QInfo:%p QInfo is freed", pQInfo);
6467

S
Shengliang Guan 已提交
6468
  taosTFree(pQInfo);
H
hjxilinx 已提交
6469 6470
}

H
hjxilinx 已提交
6471
static size_t getResultSize(SQInfo *pQInfo, int64_t *numOfRows) {
6472 6473
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
hjxilinx 已提交
6474 6475 6476 6477 6478 6479 6480 6481 6482 6483 6484
  /*
   * get the file size and set the numOfRows to be the file size, since for tsComp query,
   * the returned row size is equalled to 1
   * TODO handle the case that the file is too large to send back one time
   */
  if (isTSCompQuery(pQuery) && (*numOfRows) > 0) {
    struct stat fstat;
    if (stat(pQuery->sdata[0]->data, &fstat) == 0) {
      *numOfRows = fstat.st_size;
      return fstat.st_size;
    } else {
S
slguan 已提交
6485
      qError("QInfo:%p failed to get file info, path:%s, reason:%s", pQInfo, pQuery->sdata[0]->data, strerror(errno));
H
hjxilinx 已提交
6486 6487 6488
      return 0;
    }
  } else {
6489
    return (size_t)(pQuery->rowSize * (*numOfRows));
6490
  }
H
hjxilinx 已提交
6491
}
6492

H
hjxilinx 已提交
6493 6494 6495
static int32_t doDumpQueryResult(SQInfo *pQInfo, char *data) {
  // the remained number of retrieved rows, not the interpolated result
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
6496

H
hjxilinx 已提交
6497 6498 6499
  // load data from file to msg buffer
  if (isTSCompQuery(pQuery)) {
    int32_t fd = open(pQuery->sdata[0]->data, O_RDONLY, 0666);
6500

H
hjxilinx 已提交
6501 6502
    // make sure file exist
    if (FD_VALID(fd)) {
6503 6504 6505
      uint64_t s = lseek(fd, 0, SEEK_END);

      qDebug("QInfo:%p ts comp data return, file:%s, size:%"PRId64, pQInfo, pQuery->sdata[0]->data, s);
H
Haojun Liao 已提交
6506
      if (lseek(fd, 0, SEEK_SET) >= 0) {
6507
        size_t sz = read(fd, data, (uint32_t)s);
6508 6509 6510
        if(sz < s) {  // todo handle error
          assert(0);
        }
H
Haojun Liao 已提交
6511
      } else {
6512
        UNUSED(s);
dengyihao's avatar
dengyihao 已提交
6513
      }
H
Haojun Liao 已提交
6514

H
hjxilinx 已提交
6515 6516 6517
      close(fd);
      unlink(pQuery->sdata[0]->data);
    } else {
dengyihao's avatar
dengyihao 已提交
6518
      // todo return the error code to client and handle invalid fd
S
slguan 已提交
6519
      qError("QInfo:%p failed to open tmp file to send ts-comp data to client, path:%s, reason:%s", pQInfo,
H
hjxilinx 已提交
6520
             pQuery->sdata[0]->data, strerror(errno));
dengyihao's avatar
dengyihao 已提交
6521
      if (fd != -1) {
6522
        close(fd);
dengyihao's avatar
dengyihao 已提交
6523
      }
H
hjxilinx 已提交
6524
    }
6525

H
hjxilinx 已提交
6526 6527 6528 6529
    // all data returned, set query over
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
      setQueryStatus(pQuery, QUERY_OVER);
    }
H
hjxilinx 已提交
6530
  } else {
S
TD-1057  
Shengliang Guan 已提交
6531
    doCopyQueryResultToMsg(pQInfo, (int32_t)pQuery->rec.rows, data);
6532
  }
6533

6534
  pQuery->rec.total += pQuery->rec.rows;
6535
  qDebug("QInfo:%p current numOfRes rows:%" PRId64 ", total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total);
6536

6537
  if (pQuery->limit.limit > 0 && pQuery->limit.limit == pQuery->rec.total) {
6538
    qDebug("QInfo:%p results limitation reached, limitation:%"PRId64, pQInfo, pQuery->limit.limit);
6539 6540
    setQueryStatus(pQuery, QUERY_OVER);
  }
6541

H
hjxilinx 已提交
6542
  return TSDB_CODE_SUCCESS;
6543 6544
}

6545 6546 6547 6548 6549 6550 6551
typedef struct SQueryMgmt {
  SCacheObj      *qinfoPool;      // query handle pool
  int32_t         vgId;
  bool            closed;
  pthread_mutex_t lock;
} SQueryMgmt;

6552
int32_t qCreateQueryInfo(void* tsdb, int32_t vgId, SQueryTableMsg* pQueryMsg, qinfo_t* pQInfo) {
6553
  assert(pQueryMsg != NULL && tsdb != NULL);
6554 6555

  int32_t code = TSDB_CODE_SUCCESS;
6556

6557 6558 6559 6560 6561 6562 6563 6564
  char            *tagCond  = NULL;
  char            *tbnameCond = NULL;
  SArray          *pTableIdList = NULL;
  SSqlFuncMsg    **pExprMsg = NULL;
  SExprInfo       *pExprs   = NULL;
  SColIndex       *pGroupColIndex = NULL;
  SColumnInfo     *pTagColumnInfo = NULL;
  SSqlGroupbyExpr *pGroupbyExpr   = NULL;
6565

6566 6567
  code = convertQueryMsg(pQueryMsg, &pTableIdList, &pExprMsg, &tagCond, &tbnameCond, &pGroupColIndex, &pTagColumnInfo);
  if (code != TSDB_CODE_SUCCESS) {
B
Bomin Zhang 已提交
6568
    goto _over;
6569 6570
  }

H
hjxilinx 已提交
6571
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
6572
    qError("Invalid number of tables to query, numOfTables:%d", pQueryMsg->numOfTables);
6573
    code = TSDB_CODE_QRY_INVALID_MSG;
H
hjxilinx 已提交
6574
    goto _over;
6575 6576
  }

H
hjxilinx 已提交
6577
  if (pTableIdList == NULL || taosArrayGetSize(pTableIdList) == 0) {
S
slguan 已提交
6578
    qError("qmsg:%p, SQueryTableMsg wrong format", pQueryMsg);
6579
    code = TSDB_CODE_QRY_INVALID_MSG;
H
hjxilinx 已提交
6580
    goto _over;
6581 6582
  }

H
Haojun Liao 已提交
6583
  if ((code = createQFunctionExprFromMsg(pQueryMsg, &pExprs, pExprMsg, pTagColumnInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6584
    goto _over;
6585 6586
  }

dengyihao's avatar
dengyihao 已提交
6587
  pGroupbyExpr = createGroupbyExprFromMsg(pQueryMsg, pGroupColIndex, &code);
H
hjxilinx 已提交
6588
  if ((pGroupbyExpr == NULL && pQueryMsg->numOfGroupCols != 0) || code != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6589
    goto _over;
6590
  }
6591

H
hjxilinx 已提交
6592
  bool isSTableQuery = false;
6593
  STableGroupInfo tableGroupInfo = {0};
6594 6595
  int64_t st = taosGetTimestampUs();

H
Haojun Liao 已提交
6596
  if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_TABLE_QUERY)) {
weixin_48148422's avatar
weixin_48148422 已提交
6597
    STableIdInfo *id = taosArrayGet(pTableIdList, 0);
H
Haojun Liao 已提交
6598

6599
    qDebug("qmsg:%p query normal table, uid:%"PRId64", tid:%d", pQueryMsg, id->uid, id->tid);
H
Haojun Liao 已提交
6600
    if ((code = tsdbGetOneTableGroup(tsdb, id->uid, pQueryMsg->window.skey, &tableGroupInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6601
      goto _over;
6602
    }
H
Haojun Liao 已提交
6603
  } else if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY|TSDB_QUERY_TYPE_STABLE_QUERY)) {
6604
    isSTableQuery = true;
H
Haojun Liao 已提交
6605 6606 6607

    // also note there's possibility that only one table in the super table
    if (!TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY)) {
weixin_48148422's avatar
weixin_48148422 已提交
6608 6609 6610 6611 6612 6613 6614
      STableIdInfo *id = taosArrayGet(pTableIdList, 0);

      // group by normal column, do not pass the group by condition to tsdb to group table into different group
      int32_t numOfGroupByCols = pQueryMsg->numOfGroupCols;
      if (pQueryMsg->numOfGroupCols == 1 && !TSDB_COL_IS_TAG(pGroupColIndex->flag)) {
        numOfGroupByCols = 0;
      }
6615 6616

      qDebug("qmsg:%p query stable, uid:%"PRId64", tid:%d", pQueryMsg, id->uid, id->tid);
H
Haojun Liao 已提交
6617 6618 6619
      code = tsdbQuerySTableByTagCond(tsdb, id->uid, pQueryMsg->window.skey, tagCond, pQueryMsg->tagCondLen,
          pQueryMsg->tagNameRelType, tbnameCond, &tableGroupInfo, pGroupColIndex, numOfGroupByCols);

6620
      if (code != TSDB_CODE_SUCCESS) {
6621
        qError("qmsg:%p failed to query stable, reason: %s", pQueryMsg, tstrerror(code));
6622 6623
        goto _over;
      }
weixin_48148422's avatar
weixin_48148422 已提交
6624
    } else {
6625 6626 6627 6628
      code = tsdbGetTableGroupFromIdList(tsdb, pTableIdList, &tableGroupInfo);
      if (code != TSDB_CODE_SUCCESS) {
        goto _over;
      }
H
Haojun Liao 已提交
6629

S
TD-1057  
Shengliang Guan 已提交
6630
      qDebug("qmsg:%p query on %" PRIzu " tables in one group from client", pQueryMsg, tableGroupInfo.numOfTables);
6631
    }
6632 6633

    int64_t el = taosGetTimestampUs() - st;
S
TD-1057  
Shengliang Guan 已提交
6634
    qDebug("qmsg:%p tag filter completed, numOfTables:%" PRIzu ", elapsed time:%"PRId64"us", pQueryMsg, tableGroupInfo.numOfTables, el);
H
hjxilinx 已提交
6635
  } else {
6636
    assert(0);
6637
  }
6638

6639
  (*pQInfo) = createQInfoImpl(pQueryMsg, pGroupbyExpr, pExprs, &tableGroupInfo, pTagColumnInfo, isSTableQuery);
dengyihao's avatar
dengyihao 已提交
6640 6641 6642
  pExprs = NULL;
  pGroupbyExpr = NULL;
  pTagColumnInfo = NULL;
6643

6644
  if ((*pQInfo) == NULL) {
6645
    code = TSDB_CODE_QRY_OUT_OF_MEMORY;
H
hjxilinx 已提交
6646
    goto _over;
6647
  }
6648

6649
  code = initQInfo(pQueryMsg, tsdb, vgId, *pQInfo, isSTableQuery);
6650

H
hjxilinx 已提交
6651
_over:
dengyihao's avatar
dengyihao 已提交
6652 6653 6654
  free(tagCond);
  free(tbnameCond);
  free(pGroupColIndex);
dengyihao's avatar
dengyihao 已提交
6655 6656
  if (pGroupbyExpr != NULL) {
    taosArrayDestroy(pGroupbyExpr->columnInfo);
dengyihao's avatar
dengyihao 已提交
6657
    free(pGroupbyExpr);
6658
  }
dengyihao's avatar
dengyihao 已提交
6659 6660
  free(pTagColumnInfo);
  free(pExprs);
dengyihao's avatar
dengyihao 已提交
6661
  free(pExprMsg);
H
hjxilinx 已提交
6662
  taosArrayDestroy(pTableIdList);
6663

B
Bomin Zhang 已提交
6664 6665 6666 6667 6668
  for (int32_t i = 0; i < pQueryMsg->numOfCols; i++) {
    SColumnInfo* column = pQueryMsg->colList + i;
    freeColumnFilterInfo(column->filters, column->numOfFilters);
  }

H
Haojun Liao 已提交
6669
  //pQInfo already freed in initQInfo, but *pQInfo may not pointer to null;
6670 6671 6672 6673
  if (code != TSDB_CODE_SUCCESS) {
    *pQInfo = NULL;
  }

6674
  // if failed to add ref for all tables in this query, abort current query
6675
  return code;
H
hjxilinx 已提交
6676 6677
}

H
Haojun Liao 已提交
6678
void qDestroyQueryInfo(qinfo_t qHandle) {
H
Haojun Liao 已提交
6679 6680 6681 6682 6683
  SQInfo* pQInfo = (SQInfo*) qHandle;
  if (!isValidQInfo(pQInfo)) {
    return;
  }

H
Haojun Liao 已提交
6684 6685 6686
  qDebug("QInfo:%p query completed", pQInfo);
  queryCostStatis(pQInfo);   // print the query cost summary
  freeQInfo(pQInfo);
H
Haojun Liao 已提交
6687 6688
}

6689 6690 6691 6692 6693 6694 6695 6696
static bool doBuildResCheck(SQInfo* pQInfo) {
  bool buildRes = false;

  pthread_mutex_lock(&pQInfo->lock);

  pQInfo->dataReady = QUERY_RESULT_READY;
  buildRes = (pQInfo->rspContext != NULL);

H
Haojun Liao 已提交
6697 6698
  // clear qhandle owner, it must be in the secure area. other thread may run ahead before current, after it is
  // put into task to be executed.
6699 6700
  assert(pQInfo->owner == taosGetPthreadId());
  pQInfo->owner = 0;
H
Haojun Liao 已提交
6701

H
Haojun Liao 已提交
6702
  pthread_mutex_unlock(&pQInfo->lock);
H
Haojun Liao 已提交
6703 6704

  tsem_post(&pQInfo->ready);
6705 6706 6707
  return buildRes;
}

6708
bool qTableQuery(qinfo_t qinfo) {
6709
  SQInfo *pQInfo = (SQInfo *)qinfo;
H
Haojun Liao 已提交
6710
  assert(pQInfo && pQInfo->signature == pQInfo);
6711
  int64_t threadId = taosGetPthreadId();
6712

6713 6714 6715 6716
  int64_t curOwner = 0;
  if ((curOwner = atomic_val_compare_exchange_64(&pQInfo->owner, 0, threadId)) != 0) {
    qError("QInfo:%p qhandle is now executed by thread:%p", pQInfo, (void*) curOwner);
    pQInfo->code = TSDB_CODE_QRY_IN_EXEC;
6717
    return false;
H
hjxilinx 已提交
6718
  }
6719

H
Haojun Liao 已提交
6720
  if (IS_QUERY_KILLED(pQInfo)) {
6721
    qDebug("QInfo:%p it is already killed, abort", pQInfo);
6722
    return doBuildResCheck(pQInfo);
H
hjxilinx 已提交
6723
  }
6724

6725 6726
  if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
    qDebug("QInfo:%p no table exists for query, abort", pQInfo);
6727 6728
    setQueryStatus(pQInfo->runtimeEnv.pQuery, QUERY_COMPLETED);
    return doBuildResCheck(pQInfo);
6729 6730 6731
  }

  // error occurs, record the error code and return to client
H
Haojun Liao 已提交
6732
  int32_t ret = setjmp(pQInfo->runtimeEnv.env);
6733 6734
  if (ret != TSDB_CODE_SUCCESS) {
    pQInfo->code = ret;
H
Haojun Liao 已提交
6735
    qDebug("QInfo:%p query abort due to error/cancel occurs, code:%s", pQInfo, tstrerror(pQInfo->code));
6736
    return doBuildResCheck(pQInfo);
6737 6738
  }

6739
  qDebug("QInfo:%p query task is launched", pQInfo);
6740

6741
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
6742
  if (onlyQueryTags(pQInfo->runtimeEnv.pQuery)) {
H
Haojun Liao 已提交
6743
    assert(pQInfo->runtimeEnv.pQueryHandle == NULL);
6744
    buildTagQueryResult(pQInfo);
H
hjxilinx 已提交
6745
  } else if (pQInfo->runtimeEnv.stableQuery) {
6746
    stableQueryImpl(pQInfo);
H
hjxilinx 已提交
6747
  } else {
6748
    tableQueryImpl(pQInfo);
H
hjxilinx 已提交
6749
  }
6750

6751
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
6752
  if (IS_QUERY_KILLED(pQInfo)) {
6753 6754
    qDebug("QInfo:%p query is killed", pQInfo);
  } else if (pQuery->rec.rows == 0) {
S
TD-1057  
Shengliang Guan 已提交
6755
    qDebug("QInfo:%p over, %" PRIzu " tables queried, %"PRId64" rows are returned", pQInfo, pQInfo->tableqinfoGroupInfo.numOfTables, pQuery->rec.total);
6756 6757 6758 6759 6760
  } else {
    qDebug("QInfo:%p query paused, %" PRId64 " rows returned, numOfTotal:%" PRId64 " rows",
           pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
  }

6761
  return doBuildResCheck(pQInfo);
H
hjxilinx 已提交
6762 6763
}

6764
int32_t qRetrieveQueryResultInfo(qinfo_t qinfo, bool* buildRes, void* pRspContext) {
6765 6766
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
6767
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
H
Haojun Liao 已提交
6768
    qError("QInfo:%p invalid qhandle", pQInfo);
6769
    return TSDB_CODE_QRY_INVALID_QHANDLE;
H
hjxilinx 已提交
6770
  }
6771

6772
  *buildRes = false;
H
Haojun Liao 已提交
6773
  if (IS_QUERY_KILLED(pQInfo)) {
6774
    qDebug("QInfo:%p query is killed, code:%d", pQInfo, pQInfo->code);
H
hjxilinx 已提交
6775
    return pQInfo->code;
H
hjxilinx 已提交
6776
  }
6777

6778
  int32_t code = TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
6779 6780 6781 6782

#if 0
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

6783
  pthread_mutex_lock(&pQInfo->lock);
H
Haojun Liao 已提交
6784 6785
  assert(pQInfo->rspContext == NULL);

6786 6787 6788 6789 6790
  if (pQInfo->dataReady == QUERY_RESULT_READY) {
    *buildRes = true;
    qDebug("QInfo:%p retrieve result info, rowsize:%d, rows:%"PRId64", code:%d", pQInfo, pQuery->rowSize, pQuery->rec.rows,
           pQInfo->code);
  } else {
H
Haojun Liao 已提交
6791
    *buildRes = false;
6792
    qDebug("QInfo:%p retrieve req set query return result after paused", pQInfo);
6793
    pQInfo->rspContext = pRspContext;
H
Haojun Liao 已提交
6794
    assert(pQInfo->rspContext != NULL);
6795
  }
6796

6797
  code = pQInfo->code;
6798
  pthread_mutex_unlock(&pQInfo->lock);
H
Haojun Liao 已提交
6799 6800 6801 6802 6803 6804
#else
  tsem_wait(&pQInfo->ready);
  *buildRes = true;
  code = pQInfo->code;
#endif

6805
  return code;
H
hjxilinx 已提交
6806
}
6807

6808
int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *contLen, bool* continueExec) {
6809 6810
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
6811
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
6812
    return TSDB_CODE_QRY_INVALID_QHANDLE;
6813
  }
6814

6815
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
6816 6817
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  size_t  size = getResultSize(pQInfo, &pQuery->rec.rows);
6818

weixin_48148422's avatar
weixin_48148422 已提交
6819 6820
  size += sizeof(int32_t);
  size += sizeof(STableIdInfo) * taosArrayGetSize(pQInfo->arrTableIdInfo);
6821

S
TD-1057  
Shengliang Guan 已提交
6822
  *contLen = (int32_t)(size + sizeof(SRetrieveTableRsp));
6823

B
Bomin Zhang 已提交
6824 6825
  // todo proper handle failed to allocate memory,
  // current solution only avoid crash, but cannot return error code to client
6826
  *pRsp = (SRetrieveTableRsp *)rpcMallocCont(*contLen);
B
Bomin Zhang 已提交
6827 6828 6829
  if (*pRsp == NULL) {
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
  }
6830

S
TD-1057  
Shengliang Guan 已提交
6831
  (*pRsp)->numOfRows = htonl((int32_t)pQuery->rec.rows);
6832

H
Haojun Liao 已提交
6833
  if (pQInfo->code == TSDB_CODE_SUCCESS) {
6834
    (*pRsp)->offset   = htobe64(pQuery->limit.offset);
6835
    (*pRsp)->useconds = htobe64(pRuntimeEnv->summary.elapsedTime);
6836
  } else {
6837 6838
    (*pRsp)->offset   = 0;
    (*pRsp)->useconds = htobe64(pRuntimeEnv->summary.elapsedTime);
6839
  }
6840

6841
  (*pRsp)->precision = htons(pQuery->precision);
H
Haojun Liao 已提交
6842 6843
  if (pQuery->rec.rows > 0 && pQInfo->code == TSDB_CODE_SUCCESS) {
    doDumpQueryResult(pQInfo, (*pRsp)->data);
6844
  } else {
H
hjxilinx 已提交
6845
    setQueryStatus(pQuery, QUERY_OVER);
6846
  }
6847

6848
  pQInfo->rspContext = NULL;
6849
  pQInfo->dataReady  = QUERY_RESULT_NOT_READY;
6850

H
Haojun Liao 已提交
6851
  if (IS_QUERY_KILLED(pQInfo) || Q_STATUS_EQUAL(pQuery->status, QUERY_OVER)) {
6852
    *continueExec = false;
6853
    (*pRsp)->completed = 1;  // notify no more result to client
6854
  } else {
6855
    *continueExec = true;
6856
    qDebug("QInfo:%p has more results waits for client retrieve", pQInfo);
6857 6858
  }

H
Haojun Liao 已提交
6859
  return pQInfo->code;
6860
}
H
hjxilinx 已提交
6861

6862 6863 6864 6865 6866 6867 6868 6869 6870 6871 6872
int32_t qQueryCompleted(qinfo_t qinfo) {
  SQInfo *pQInfo = (SQInfo *)qinfo;

  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
    return TSDB_CODE_QRY_INVALID_QHANDLE;
  }

  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
  return IS_QUERY_KILLED(pQInfo) || Q_STATUS_EQUAL(pQuery->status, QUERY_OVER);
}

H
Haojun Liao 已提交
6873
int32_t qKillQuery(qinfo_t qinfo) {
H
Haojun Liao 已提交
6874 6875 6876 6877 6878 6879 6880
  SQInfo *pQInfo = (SQInfo *)qinfo;

  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
    return TSDB_CODE_QRY_INVALID_QHANDLE;
  }

  setQueryKilled(pQInfo);
6881 6882 6883

  // Wait for the query executing thread being stopped/
  // Once the query is stopped, the owner of qHandle will be cleared immediately.
6884
  while (pQInfo->owner != 0) {
6885 6886 6887
    taosMsleep(100);
  }

H
Haojun Liao 已提交
6888 6889 6890
  return TSDB_CODE_SUCCESS;
}

6891 6892 6893 6894 6895 6896 6897 6898 6899 6900 6901 6902 6903 6904 6905 6906
static void doSetTagValueToResultBuf(char* output, const char* val, int16_t type, int16_t bytes) {
  if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
    if (val == NULL) {
      setVardataNull(output, type);
    } else {
      memcpy(output, val, varDataTLen(val));
    }
  } else {
    if (val == NULL) {
      setNull(output, type, bytes);
    } else {  // todo here stop will cause client crash
      memcpy(output, val, bytes);
    }
  }
}

H
hjxilinx 已提交
6907 6908 6909
static void buildTagQueryResult(SQInfo* pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
6910

H
Haojun Liao 已提交
6911
  size_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
H
Haojun Liao 已提交
6912
  assert(numOfGroup == 0 || numOfGroup == 1);
6913

H
Haojun Liao 已提交
6914
  if (numOfGroup == 0) {
6915 6916
    return;
  }
6917

H
Haojun Liao 已提交
6918
  SArray* pa = GET_TABLEGROUP(pQInfo, 0);
6919

H
Haojun Liao 已提交
6920
  size_t num = taosArrayGetSize(pa);
6921
  assert(num == pQInfo->tableqinfoGroupInfo.numOfTables);
6922

H
Haojun Liao 已提交
6923
  int32_t count = 0;
6924 6925 6926
  int32_t functionId = pQuery->pSelectExpr[0].base.functionId;
  if (functionId == TSDB_FUNC_TID_TAG) { // return the tags & table Id
    assert(pQuery->numOfOutput == 1);
6927

6928 6929
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[0];
    int32_t rsize = pExprInfo->bytes;
H
Haojun Liao 已提交
6930
    count = 0;
6931

H
Haojun Liao 已提交
6932 6933 6934 6935 6936 6937 6938 6939 6940 6941 6942
    int16_t bytes = pExprInfo->bytes;
    int16_t type = pExprInfo->type;

    for(int32_t i = 0; i < pQuery->numOfTags; ++i) {
      if (pQuery->tagColList[i].colId == pExprInfo->base.colInfo.colId) {
        bytes = pQuery->tagColList[i].bytes;
        type = pQuery->tagColList[i].type;
        break;
      }
    }

H
Haojun Liao 已提交
6943 6944
    while(pQInfo->tableIndex < num && count < pQuery->rec.capacity) {
      int32_t i = pQInfo->tableIndex++;
6945
      STableQueryInfo *item = taosArrayGetP(pa, i);
6946

6947
      char *output = pQuery->sdata[0]->data + count * rsize;
6948
      varDataSetLen(output, rsize - VARSTR_HEADER_SIZE);
6949

6950
      output = varDataVal(output);
H
Haojun Liao 已提交
6951
      STableId* id = TSDB_TABLEID(item->pTable);
6952

6953 6954 6955
      *(int16_t *)output = 0;
      output += sizeof(int16_t);

H
Haojun Liao 已提交
6956 6957
      *(int64_t *)output = id->uid;  // memory align problem, todo serialize
      output += sizeof(id->uid);
6958

H
Haojun Liao 已提交
6959 6960
      *(int32_t *)output = id->tid;
      output += sizeof(id->tid);
6961

6962
      *(int32_t *)output = pQInfo->vgId;
6963
      output += sizeof(pQInfo->vgId);
6964

6965
      if (pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
6966
        char* data = tsdbGetTableName(item->pTable);
6967
        memcpy(output, data, varDataTLen(data));
H
[td-90]  
Haojun Liao 已提交
6968
      } else {
6969 6970
        char* data = tsdbGetTableTagVal(item->pTable, pExprInfo->base.colInfo.colId, type, bytes);
        doSetTagValueToResultBuf(output, data, type, bytes);
H
[td-90]  
Haojun Liao 已提交
6971
      }
6972

H
Haojun Liao 已提交
6973
      count += 1;
6974
    }
6975

6976
    qDebug("QInfo:%p create (tableId, tag) info completed, rows:%d", pQInfo, count);
6977

H
Haojun Liao 已提交
6978 6979 6980 6981
  } else if (functionId == TSDB_FUNC_COUNT) {// handle the "count(tbname)" query
    *(int64_t*) pQuery->sdata[0]->data = num;

    count = 1;
H
Haojun Liao 已提交
6982
    SET_STABLE_QUERY_OVER(pQInfo);
6983
    qDebug("QInfo:%p create count(tbname) query, res:%d rows:1", pQInfo, count);
6984
  } else {  // return only the tags|table name etc.
H
Haojun Liao 已提交
6985
    count = 0;
H
Haojun Liao 已提交
6986
    SSchema tbnameSchema = tGetTableNameColumnSchema();
6987

S
TD-1057  
Shengliang Guan 已提交
6988
    int32_t maxNumOfTables = (int32_t)pQuery->rec.capacity;
6989
    if (pQuery->limit.limit >= 0 && pQuery->limit.limit < pQuery->rec.capacity) {
S
TD-1057  
Shengliang Guan 已提交
6990
      maxNumOfTables = (int32_t)pQuery->limit.limit;
6991 6992
    }

6993
    while(pQInfo->tableIndex < num && count < maxNumOfTables) {
H
Haojun Liao 已提交
6994
      int32_t i = pQInfo->tableIndex++;
6995

6996 6997 6998 6999 7000 7001
      // discard current result due to offset
      if (pQuery->limit.offset > 0) {
        pQuery->limit.offset -= 1;
        continue;
      }

7002
      SExprInfo* pExprInfo = pQuery->pSelectExpr;
7003
      STableQueryInfo* item = taosArrayGetP(pa, i);
7004

7005 7006
      char *data = NULL, *dst = NULL;
      int16_t type = 0, bytes = 0;
7007
      for(int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
7008
        // not assign value in case of user defined constant output column
Y
TD-1230  
yihaoDeng 已提交
7009
        if (TSDB_COL_IS_UD_COL(pExprInfo[j].base.colInfo.flag)) {
H
Haojun Liao 已提交
7010 7011
          continue;
        }
7012

7013
        if (pExprInfo[j].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
7014 7015 7016 7017 7018 7019 7020 7021
          bytes = tbnameSchema.bytes;
          type = tbnameSchema.type;

          data = tsdbGetTableName(item->pTable);
          dst = pQuery->sdata[j]->data + count * tbnameSchema.bytes;
        } else {
          type = pExprInfo[j].type;
          bytes = pExprInfo[j].bytes;
7022

7023 7024
          data = tsdbGetTableTagVal(item->pTable, pExprInfo[j].base.colInfo.colId, type, bytes);
          dst = pQuery->sdata[j]->data + count * pExprInfo[j].bytes;
7025

7026
        }
7027 7028

        doSetTagValueToResultBuf(dst, data, type, bytes);
H
hjxilinx 已提交
7029
      }
H
Haojun Liao 已提交
7030
      count += 1;
H
hjxilinx 已提交
7031
    }
7032

7033
    qDebug("QInfo:%p create tag values results completed, rows:%d", pQInfo, count);
H
hjxilinx 已提交
7034
  }
7035

H
Haojun Liao 已提交
7036
  pQuery->rec.rows = count;
H
hjxilinx 已提交
7037
  setQueryStatus(pQuery, QUERY_COMPLETED);
H
hjxilinx 已提交
7038 7039
}

7040 7041 7042 7043 7044 7045 7046
void* qGetResultRetrieveMsg(qinfo_t qinfo) {
  SQInfo* pQInfo = (SQInfo*) qinfo;
  assert(pQInfo != NULL);

  return pQInfo->rspContext;
}

7047 7048 7049 7050 7051 7052 7053
void freeqinfoFn(void *qhandle) {
  void** handle = qhandle;
  if (handle == NULL || *handle == NULL) {
    return;
  }

  qKillQuery(*handle);
H
Haojun Liao 已提交
7054
  qDestroyQueryInfo(*handle);
7055 7056 7057
}

void* qOpenQueryMgmt(int32_t vgId) {
H
Haojun Liao 已提交
7058
  const int32_t REFRESH_HANDLE_INTERVAL = 30; // every 30 seconds, refresh handle pool
7059 7060 7061 7062

  char cacheName[128] = {0};
  sprintf(cacheName, "qhandle_%d", vgId);

7063
  SQueryMgmt* pQueryMgmt = calloc(1, sizeof(SQueryMgmt));
H
Haojun Liao 已提交
7064 7065 7066 7067
  if (pQueryMgmt == NULL) {
    terrno = TSDB_CODE_QRY_OUT_OF_MEMORY;
    return NULL;
  }
7068

S
TD-1530  
Shengliang Guan 已提交
7069
  pQueryMgmt->qinfoPool = taosCacheInit(TSDB_CACHE_PTR_KEY, REFRESH_HANDLE_INTERVAL, true, freeqinfoFn, cacheName);
7070 7071 7072 7073
  pQueryMgmt->closed    = false;
  pQueryMgmt->vgId      = vgId;

  pthread_mutex_init(&pQueryMgmt->lock, NULL);
7074 7075

  qDebug("vgId:%d, open querymgmt success", vgId);
7076
  return pQueryMgmt;
7077 7078
}

H
Haojun Liao 已提交
7079
static void queryMgmtKillQueryFn(void* handle) {
H
Hui Li 已提交
7080 7081
  void** fp = (void**)handle;
  qKillQuery(*fp);
H
Haojun Liao 已提交
7082 7083 7084
}

void qQueryMgmtNotifyClosed(void* pQMgmt) {
7085 7086 7087 7088 7089 7090 7091
  if (pQMgmt == NULL) {
    return;
  }

  SQueryMgmt* pQueryMgmt = pQMgmt;
  qDebug("vgId:%d, set querymgmt closed, wait for all queries cancelled", pQueryMgmt->vgId);

H
Haojun Liao 已提交
7092
//  pthread_mutex_lock(&pQueryMgmt->lock);
7093
  pQueryMgmt->closed = true;
H
Haojun Liao 已提交
7094
//  pthread_mutex_unlock(&pQueryMgmt->lock);
7095

H
Haojun Liao 已提交
7096
  taosCacheRefresh(pQueryMgmt->qinfoPool, queryMgmtKillQueryFn);
7097 7098 7099 7100 7101 7102 7103 7104 7105 7106 7107 7108 7109 7110 7111 7112 7113
}

void qCleanupQueryMgmt(void* pQMgmt) {
  if (pQMgmt == NULL) {
    return;
  }

  SQueryMgmt* pQueryMgmt = pQMgmt;
  int32_t vgId = pQueryMgmt->vgId;

  assert(pQueryMgmt->closed);

  SCacheObj* pqinfoPool = pQueryMgmt->qinfoPool;
  pQueryMgmt->qinfoPool = NULL;

  taosCacheCleanup(pqinfoPool);
  pthread_mutex_destroy(&pQueryMgmt->lock);
S
Shengliang Guan 已提交
7114
  taosTFree(pQueryMgmt);
7115

7116
  qDebug("vgId:%d queryMgmt cleanup completed", vgId);
7117 7118
}

7119
void** qRegisterQInfo(void* pMgmt, uint64_t qInfo) {
7120
  if (pMgmt == NULL) {
7121
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
7122 7123 7124
    return NULL;
  }

7125
  const int32_t DEFAULT_QHANDLE_LIFE_SPAN = tsShellActivityTimer * 2 * 1000;
7126

7127 7128
  SQueryMgmt *pQueryMgmt = pMgmt;
  if (pQueryMgmt->qinfoPool == NULL) {
7129
    qError("QInfo:%p failed to add qhandle into qMgmt, since qMgmt is closed", (void *)qInfo);
7130
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
7131 7132 7133
    return NULL;
  }

H
Haojun Liao 已提交
7134
//  pthread_mutex_lock(&pQueryMgmt->lock);
7135
  if (pQueryMgmt->closed) {
H
Haojun Liao 已提交
7136
//    pthread_mutex_unlock(&pQueryMgmt->lock);
7137
    qError("QInfo:%p failed to add qhandle into cache, since qMgmt is colsing", (void *)qInfo);
7138
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
7139 7140
    return NULL;
  } else {
S
TD-1530  
Shengliang Guan 已提交
7141 7142
    TSDB_CACHE_PTR_TYPE handleVal = (TSDB_CACHE_PTR_TYPE) qInfo;
    void** handle = taosCachePut(pQueryMgmt->qinfoPool, &handleVal, sizeof(TSDB_CACHE_PTR_TYPE), &qInfo, sizeof(TSDB_CACHE_PTR_TYPE), DEFAULT_QHANDLE_LIFE_SPAN);
H
Haojun Liao 已提交
7143
//    pthread_mutex_unlock(&pQueryMgmt->lock);
7144 7145 7146 7147 7148

    return handle;
  }
}

S
TD-1530  
Shengliang Guan 已提交
7149
void** qAcquireQInfo(void* pMgmt, uint64_t _key) {
7150 7151 7152 7153 7154 7155
  SQueryMgmt *pQueryMgmt = pMgmt;

  if (pQueryMgmt->qinfoPool == NULL || pQueryMgmt->closed) {
    return NULL;
  }

S
TD-1530  
Shengliang Guan 已提交
7156 7157
  TSDB_CACHE_PTR_TYPE key = (TSDB_CACHE_PTR_TYPE)_key;
  void** handle = taosCacheAcquireByKey(pQueryMgmt->qinfoPool, &key, sizeof(TSDB_CACHE_PTR_TYPE));
7158 7159 7160 7161 7162 7163 7164
  if (handle == NULL || *handle == NULL) {
    return NULL;
  } else {
    return handle;
  }
}

H
Haojun Liao 已提交
7165
void** qReleaseQInfo(void* pMgmt, void* pQInfo, bool freeHandle) {
7166 7167 7168 7169 7170
  SQueryMgmt *pQueryMgmt = pMgmt;
  if (pQueryMgmt->qinfoPool == NULL) {
    return NULL;
  }

H
Haojun Liao 已提交
7171
  taosCacheRelease(pQueryMgmt->qinfoPool, pQInfo, freeHandle);
7172 7173 7174
  return 0;
}

7175