qExecutor.c 246.2 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
#include "os.h"
H
Haojun Liao 已提交
16 17
#include "qFill.h"
#include "taosmsg.h"
18 19
#include "tcache.h"
#include "tglobal.h"
20

H
Haojun Liao 已提交
21
#include "exception.h"
22
#include "hash.h"
H
Haojun Liao 已提交
23 24 25 26
#include "qAst.h"
#include "qExecutor.h"
#include "qResultbuf.h"
#include "qUtil.h"
H
hjxilinx 已提交
27
#include "query.h"
S
slguan 已提交
28
#include "queryLog.h"
29
#include "tlosertree.h"
30

H
Haojun Liao 已提交
31
#define MAX_ROWS_PER_RESBUF_PAGE  ((1u<<12) - 1)
32 33 34 35 36

/**
 * check if the primary column is load by default, otherwise, the program will
 * forced to load primary column explicitly.
 */
37
#define Q_STATUS_EQUAL(p, s)  (((p) & (s)) != 0)
38 39
#define QUERY_IS_ASC_QUERY(q) (GET_FORWARD_DIRECTION_FACTOR((q)->order.order) == QUERY_ASC_FORWARD_STEP)

40
#define IS_MASTER_SCAN(runtime)        ((runtime)->scanFlag == MASTER_SCAN)
H
hjxilinx 已提交
41
#define IS_REVERSE_SCAN(runtime)       ((runtime)->scanFlag == REVERSE_SCAN)
42
#define SET_MASTER_SCAN_FLAG(runtime)  ((runtime)->scanFlag = MASTER_SCAN)
H
hjxilinx 已提交
43
#define SET_REVERSE_SCAN_FLAG(runtime) ((runtime)->scanFlag = REVERSE_SCAN)
44

H
Haojun Liao 已提交
45
#define GET_QINFO_ADDR(x) ((SQInfo *)((char *)(x)-offsetof(SQInfo, runtimeEnv)))
46

47
#define GET_COL_DATA_POS(query, index, step) ((query)->pos + (index) * (step))
48
#define SWITCH_ORDER(n) (((n) = ((n) == TSDB_ORDER_ASC) ? TSDB_ORDER_DESC : TSDB_ORDER_ASC))
49

H
Haojun Liao 已提交
50 51
#define SDATA_BLOCK_INITIALIZER (SDataBlockInfo) {{0}, 0}

S
TD-1057  
Shengliang Guan 已提交
52 53 54 55 56
#define TIME_WINDOW_COPY(_dst, _src)  do {\
   _dst.skey = _src.skey;\
   _dst.ekey = _src.ekey;\
} while (0);

57
enum {
H
hjxilinx 已提交
58
  // when query starts to execute, this status will set
59 60
  QUERY_NOT_COMPLETED = 0x1u,

H
hjxilinx 已提交
61 62
  /* result output buffer is full, current query is paused.
   * this status is only exist in group-by clause and diff/add/division/multiply/ query.
63
   */
64 65
  QUERY_RESBUF_FULL = 0x2u,

H
hjxilinx 已提交
66 67 68
  /* query is over
   * 1. this status is used in one row result query process, e.g., count/sum/first/last/ avg...etc.
   * 2. when all data within queried time window, it is also denoted as query_completed
69
   */
70
  QUERY_COMPLETED = 0x4u,
71

H
hjxilinx 已提交
72 73
  /* when the result is not completed return to client, this status will be
   * usually used in case of interval query with interpolation option
74
   */
75
  QUERY_OVER = 0x8u,
76
};
77 78

enum {
79 80
  TS_JOIN_TS_EQUAL       = 0,
  TS_JOIN_TS_NOT_EQUALS  = 1,
81 82 83
  TS_JOIN_TAG_NOT_EQUALS = 2,
};

84
typedef struct {
85 86 87 88 89 90
  int32_t     status;       // query status
  TSKEY       lastKey;      // the lastKey value before query executed
  STimeWindow w;            // whole query time window
  STimeWindow curWindow;    // current query window
  int32_t     windowIndex;  // index of active time window result for interval query
  STSCursor   cur;
91 92
} SQueryStatusInfo;

H
Haojun Liao 已提交
93
#if 0
H
Haojun Liao 已提交
94
static UNUSED_FUNC void *u_malloc (size_t __size) {
H
Haojun Liao 已提交
95
  uint32_t v = rand();
H
Haojun Liao 已提交
96 97

  if (v % 1000 <= 0) {
H
Haojun Liao 已提交
98 99
    return NULL;
  } else {
H
Haojun Liao 已提交
100
    return malloc(__size);
H
Haojun Liao 已提交
101
  }
H
Haojun Liao 已提交
102 103
}

H
Haojun Liao 已提交
104 105
static UNUSED_FUNC void* u_calloc(size_t num, size_t __size) {
  uint32_t v = rand();
H
Haojun Liao 已提交
106
  if (v % 1000 <= 0) {
H
Haojun Liao 已提交
107 108 109 110 111 112
    return NULL;
  } else {
    return calloc(num, __size);
  }
}

H
Haojun Liao 已提交
113 114 115 116 117 118 119 120 121
static UNUSED_FUNC void* u_realloc(void* p, size_t __size) {
  uint32_t v = rand();
  if (v % 5 <= 1) {
    return NULL;
  } else {
    return realloc(p, __size);
  }
}

H
Haojun Liao 已提交
122
#define calloc  u_calloc
H
Haojun Liao 已提交
123
#define malloc  u_malloc
H
Haojun Liao 已提交
124
#define realloc u_realloc
H
Haojun Liao 已提交
125
#endif
H
Haojun Liao 已提交
126

127
#define CLEAR_QUERY_STATUS(q, st)   ((q)->status &= (~(st)))
H
Haojun Liao 已提交
128 129 130
#define GET_NUM_OF_TABLEGROUP(q)    taosArrayGetSize((q)->tableqinfoGroupInfo.pGroupList)
#define GET_TABLEGROUP(q, _index)   ((SArray*) taosArrayGetP((q)->tableqinfoGroupInfo.pGroupList, (_index)))

131
static void setQueryStatus(SQuery *pQuery, int8_t status);
H
Haojun Liao 已提交
132
static void finalizeQueryResult(SQueryRuntimeEnv *pRuntimeEnv);
133

134
#define QUERY_IS_INTERVAL_QUERY(_q) ((_q)->interval.interval > 0)
135

136 137
static void getNextTimeWindow(SQuery* pQuery, STimeWindow* tw) {
  int32_t factor = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
138 139 140
  if (pQuery->interval.intervalUnit != 'n' && pQuery->interval.intervalUnit != 'y') {
    tw->skey += pQuery->interval.sliding * factor;
    tw->ekey = tw->skey + pQuery->interval.interval - 1;
141 142 143
    return;
  }

144
  int64_t key = tw->skey / 1000, interval = pQuery->interval.interval;
145 146 147
  if (pQuery->precision == TSDB_TIME_PRECISION_MICRO) {
    key /= 1000;
  }
148
  if (pQuery->interval.intervalUnit == 'y') {
149 150
    interval *= 12;
  }
151 152 153 154 155

  struct tm tm;
  time_t t = (time_t)key;
  localtime_r(&t, &tm);

S
Shengliang Guan 已提交
156
  int mon = (int)(tm.tm_year * 12 + tm.tm_mon + interval * factor);
157 158 159 160
  tm.tm_year = mon / 12;
  tm.tm_mon = mon % 12;
  tw->skey = mktime(&tm) * 1000L;

S
Shengliang Guan 已提交
161
  mon = (int)(mon + interval);
162 163 164 165 166 167 168 169 170 171 172 173
  tm.tm_year = mon / 12;
  tm.tm_mon = mon % 12;
  tw->ekey = mktime(&tm) * 1000L;

  if (pQuery->precision == TSDB_TIME_PRECISION_MICRO) {
    tw->skey *= 1000L;
    tw->ekey *= 1000L;
  }
  tw->ekey -= 1;
}

#define GET_NEXT_TIMEWINDOW(_q, tw) getNextTimeWindow((_q), (tw))
H
Haojun Liao 已提交
174

175 176
#define SET_STABLE_QUERY_OVER(_q) ((_q)->tableIndex = (int32_t)((_q)->tableqinfoGroupInfo.numOfTables))
#define IS_STASBLE_QUERY_OVER(_q) ((_q)->tableIndex >= (int32_t)((_q)->tableqinfoGroupInfo.numOfTables))
H
Haojun Liao 已提交
177

H
hjxilinx 已提交
178
// todo move to utility
179
static int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *group);
180

H
hjxilinx 已提交
181
static void setWindowResOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult);
H
Haojun Liao 已提交
182
static void setWindowResOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult);
183 184
static void resetMergeResultBuf(SQuery *pQuery, SQLFunctionCtx *pCtx, SResultInfo *pResultInfo);
static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId);
185

186
static void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
H
Haojun Liao 已提交
187
                          SDataStatis *pStatis, void *param, int32_t colIndex, int32_t vgId);
188

189
static void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
H
Haojun Liao 已提交
190
static void destroyTableQueryInfoImpl(STableQueryInfo *pTableQueryInfo);
191 192
static void resetCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
static bool hasMainOutput(SQuery *pQuery);
H
hjxilinx 已提交
193
static void buildTagQueryResult(SQInfo *pQInfo);
194

195
static int32_t setAdditionalInfo(SQInfo *pQInfo, void *pTable, STableQueryInfo *pTableQueryInfo);
H
Haojun Liao 已提交
196
static int32_t flushFromResultBuf(SQueryRuntimeEnv* pRuntimeEnv, SGroupResInfo* pGroupResInfo);
H
Haojun Liao 已提交
197 198
static int32_t checkForQueryBuf(size_t numOfTables);
static void releaseQueryBuf(size_t numOfTables);
199

200
bool doFilterData(SQuery *pQuery, int32_t elemPos) {
201 202
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
203

S
TD-1057  
Shengliang Guan 已提交
204
    char *pElem = (char*)pFilterInfo->pData + pFilterInfo->info.bytes * elemPos;
205

206 207
    bool qualified = false;
    for (int32_t j = 0; j < pFilterInfo->numOfFilters; ++j) {
208
      SColumnFilterElem *pFilterElem = &pFilterInfo->pFilters[j];
209

H
Haojun Liao 已提交
210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226
      bool isnull = isNull(pElem, pFilterInfo->info.type);
      if (isnull) {
        if (pFilterElem->fp == isNull_filter) {
          qualified = true;
          break;
        } else {
          continue;
        }
      } else {
        if (pFilterElem->fp == notNull_filter) {
          qualified = true;
          break;
        } else if (pFilterElem->fp == isNull_filter) {
          continue;
        }
      }

227 228 229 230 231
      if (pFilterElem->fp(pFilterElem, pElem, pElem)) {
        qualified = true;
        break;
      }
    }
232

233 234 235 236
    if (!qualified) {
      return false;
    }
  }
237

238 239 240 241 242 243
  return true;
}

int64_t getNumOfResult(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  bool    hasMainFunction = hasMainOutput(pQuery);
244

245
  int64_t maxOutput = 0;
246
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
247
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
248

249 250 251 252 253 254 255 256
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (hasMainFunction &&
        (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ)) {
      continue;
    }
257

258 259 260 261 262
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
    if (pResInfo != NULL && maxOutput < pResInfo->numOfRes) {
      maxOutput = pResInfo->numOfRes;
    }
  }
263

264
  assert(maxOutput >= 0);
265 266 267
  return maxOutput;
}

268 269 270 271 272
/*
 * the value of number of result needs to be update due to offset value upated.
 */
void updateNumOfResult(SQueryRuntimeEnv *pRuntimeEnv, int32_t numOfRes) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
273

274 275
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
276

H
Haojun Liao 已提交
277 278 279 280 281
    int16_t functionId = pRuntimeEnv->pCtx[j].functionId;
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ ||
        functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
282

H
Haojun Liao 已提交
283
    assert(pResInfo->numOfRes > numOfRes);
284 285 286 287
    pResInfo->numOfRes = numOfRes;
  }
}

H
Haojun Liao 已提交
288
static UNUSED_FUNC int32_t getGroupResultId(int32_t groupIndex) {
H
Haojun Liao 已提交
289
  int32_t base = 20000000;
290 291 292 293 294 295 296
  return base + (groupIndex * 10000);
}

bool isGroupbyNormalCol(SSqlGroupbyExpr *pGroupbyExpr) {
  if (pGroupbyExpr == NULL || pGroupbyExpr->numOfGroupCols == 0) {
    return false;
  }
297

298
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
299
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
Y
TD-1230  
yihaoDeng 已提交
300
    if (TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
301
      //make sure the normal column locates at the second position if tbname exists in group by clause
302
      if (pGroupbyExpr->numOfGroupCols > 1) {
303
        assert(pColIndex->colIndex > 0);
304
      }
305

306 307 308
      return true;
    }
  }
309

310 311 312 313 314
  return false;
}

int16_t getGroupbyColumnType(SQuery *pQuery, SSqlGroupbyExpr *pGroupbyExpr) {
  assert(pGroupbyExpr != NULL);
315

316 317
  int32_t colId = -2;
  int16_t type = TSDB_DATA_TYPE_NULL;
318

319
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
320
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
Y
TD-1230  
yihaoDeng 已提交
321
    if (TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
322 323 324 325
      colId = pColIndex->colId;
      break;
    }
  }
326

327
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
328 329
    if (colId == pQuery->colList[i].colId) {
      type = pQuery->colList[i].type;
330 331 332
      break;
    }
  }
333

334 335 336 337 338 339
  return type;
}

bool isSelectivityWithTagsQuery(SQuery *pQuery) {
  bool    hasTags = false;
  int32_t numOfSelectivity = 0;
340

341
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
342
    int32_t functId = pQuery->pSelectExpr[i].base.functionId;
343 344 345 346
    if (functId == TSDB_FUNC_TAG_DUMMY || functId == TSDB_FUNC_TS_DUMMY) {
      hasTags = true;
      continue;
    }
347

348 349 350 351
    if ((aAggs[functId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
      numOfSelectivity++;
    }
  }
352

353 354 355
  if (numOfSelectivity > 0 && hasTags) {
    return true;
  }
356

357 358 359
  return false;
}

360 361 362 363 364 365 366 367 368 369 370
bool isProjQuery(SQuery *pQuery) {
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    int32_t functId = pQuery->pSelectExpr[i].base.functionId;
    if (functId != TSDB_FUNC_PRJ && functId != TSDB_FUNC_TAGPRJ) {
      return false;
    }
  }

  return true;
}

371
bool isTSCompQuery(SQuery *pQuery) { return pQuery->pSelectExpr[0].base.functionId == TSDB_FUNC_TS_COMP; }
372

373 374 375
static bool limitResults(SQueryRuntimeEnv* pRuntimeEnv) {
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery* pQuery = pRuntimeEnv->pQuery;
376

377 378
  if ((pQuery->limit.limit > 0) && (pQuery->rec.total + pQuery->rec.rows > pQuery->limit.limit)) {
    pQuery->rec.rows = pQuery->limit.limit - pQuery->rec.total;
379

380
    qDebug("QInfo:%p discard remain data due to result limitation, limit:%"PRId64", current return:%" PRId64 ", total:%"PRId64,
381 382
        pQInfo, pQuery->limit.limit, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
    assert(pQuery->rec.rows >= 0);
383 384 385
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return true;
  }
386

387 388 389 390
  return false;
}

static bool isTopBottomQuery(SQuery *pQuery) {
391
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
392
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
393 394 395
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
396

397 398 399 400
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      return true;
    }
  }
401

402 403 404
  return false;
}

H
Haojun Liao 已提交
405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422
static bool hasTagValOutput(SQuery* pQuery) {
  SExprInfo *pExprInfo = &pQuery->pSelectExpr[0];
  if (pQuery->numOfOutput == 1 && pExprInfo->base.functionId == TSDB_FUNC_TS_COMP) {
    return true;
  } else {  // set tag value, by which the results are aggregated.
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
      SExprInfo *pLocalExprInfo = &pQuery->pSelectExpr[idx];

      // ts_comp column required the tag value for join filter
      if (TSDB_COL_IS_TAG(pLocalExprInfo->base.colInfo.flag)) {
        return true;
      }
    }
  }

  return false;
}

423 424 425 426 427 428 429 430
/**
 * @param pQuery
 * @param col
 * @param pDataBlockInfo
 * @param pStatis
 * @param pColStatis
 * @return
 */
H
Haojun Liao 已提交
431
static bool hasNullValue(SColIndex* pColIndex, SDataStatis *pStatis, SDataStatis **pColStatis) {
H
Haojun Liao 已提交
432
  if (pStatis != NULL && TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
H
Haojun Liao 已提交
433 434
    *pColStatis = &pStatis[pColIndex->colIndex];
    assert((*pColStatis)->colId == pColIndex->colId);
H
hjxilinx 已提交
435 436
  } else {
    *pColStatis = NULL;
437
  }
438

H
Haojun Liao 已提交
439
  if (TSDB_COL_IS_TAG(pColIndex->flag) || TSDB_COL_IS_UD_COL(pColIndex->flag) || pColIndex->colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
H
Haojun Liao 已提交
440 441 442
    return false;
  }

443 444 445
  if ((*pColStatis) != NULL && (*pColStatis)->numOfNull == 0) {
    return false;
  }
446

447 448 449 450
  return true;
}

static SWindowResult *doSetTimeWindowFromKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, char *pData,
451
                                             int16_t bytes, bool masterscan) {
452
  SQuery *pQuery = pRuntimeEnv->pQuery;
453

454
  int32_t *p1 = (int32_t *) taosHashGet(pWindowResInfo->hashList, pData, bytes);
455 456
  if (p1 != NULL) {
    pWindowResInfo->curIndex = *p1;
457
  } else {
H
Haojun Liao 已提交
458 459 460
    if (!masterscan) {  // not master scan, do not add new timewindow
      return NULL;
    }
461

H
Haojun Liao 已提交
462 463
    // more than the capacity, reallocate the resources
    if (pWindowResInfo->size >= pWindowResInfo->capacity) {
H
Haojun Liao 已提交
464
      int64_t newCapacity = 0;
465
      if (pWindowResInfo->capacity > 10000) {
H
Haojun Liao 已提交
466
        newCapacity = (int64_t)(pWindowResInfo->capacity * 1.25);
467
      } else {
H
Haojun Liao 已提交
468
        newCapacity = (int64_t)(pWindowResInfo->capacity * 1.5);
469 470
      }

H
Haojun Liao 已提交
471 472 473
      char *t = realloc(pWindowResInfo->pResult, (size_t)(newCapacity * sizeof(SWindowResult)));
      pRuntimeEnv->summary.winInfoSize += (newCapacity - pWindowResInfo->capacity) * sizeof(SWindowResult);
      pRuntimeEnv->summary.numOfTimeWindows += (newCapacity - pWindowResInfo->capacity);
474

475 476
      if (t == NULL) {
        longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
477
      }
478

479 480
      pWindowResInfo->pResult = (SWindowResult *)t;

H
Haojun Liao 已提交
481
      int32_t inc = (int32_t)newCapacity - pWindowResInfo->capacity;
482 483
      memset(&pWindowResInfo->pResult[pWindowResInfo->capacity], 0, sizeof(SWindowResult) * inc);

H
Haojun Liao 已提交
484
      pRuntimeEnv->summary.winInfoSize += (pQuery->numOfOutput * sizeof(SResultInfo) + pRuntimeEnv->interBufSize) * inc;
485

H
Haojun Liao 已提交
486
      for (int32_t i = pWindowResInfo->capacity; i < newCapacity; ++i) {
H
Haojun Liao 已提交
487 488 489 490
        int32_t ret = createQueryResultInfo(pQuery, &pWindowResInfo->pResult[i], pRuntimeEnv->stableQuery, pRuntimeEnv->interBufSize);
        if (ret != TSDB_CODE_SUCCESS) {
          longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
        }
H
Haojun Liao 已提交
491 492
      }

H
Haojun Liao 已提交
493
      pWindowResInfo->capacity = (int32_t)newCapacity;
494
    }
H
Haojun Liao 已提交
495 496 497 498

    // add a new result set for a new group
    pWindowResInfo->curIndex = pWindowResInfo->size++;
    taosHashPut(pWindowResInfo->hashList, pData, bytes, (char *)&pWindowResInfo->curIndex, sizeof(int32_t));
499
  }
500

501 502 503 504 505
  // too many time window in query
  if (pWindowResInfo->size > MAX_INTERVAL_TIME_WINDOW) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_TOO_MANY_TIMEWINDOW);
  }

506 507 508 509 510 511
  return getWindowResult(pWindowResInfo, pWindowResInfo->curIndex);
}

// get the correct time window according to the handled timestamp
static STimeWindow getActiveTimeWindow(SWindowResInfo *pWindowResInfo, int64_t ts, SQuery *pQuery) {
  STimeWindow w = {0};
512

513
 if (pWindowResInfo->curIndex == -1) {  // the first window, from the previous stored value
514
    w.skey = pWindowResInfo->prevSKey;
515 516
    if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
      w.ekey = taosTimeAdd(w.skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision);
517
    } else {
518
      w.ekey = w.skey + pQuery->interval.interval - 1;
519
    }
520
  } else {
521 522
    int32_t slot = curTimeWindowIndex(pWindowResInfo);
    SWindowResult* pWindowRes = getWindowResult(pWindowResInfo, slot);
523
    w = pWindowRes->win;
524
  }
525

526
  if (w.skey > ts || w.ekey < ts) {
527 528 529
    if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
      w.skey = taosTimeTruncate(ts, &pQuery->interval, pQuery->precision);
      w.ekey = taosTimeAdd(w.skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
530 531
    } else {
      int64_t st = w.skey;
532

533
      if (st > ts) {
534
        st -= ((st - ts + pQuery->interval.sliding - 1) / pQuery->interval.sliding) * pQuery->interval.sliding;
535
      }
536

537
      int64_t et = st + pQuery->interval.interval - 1;
538
      if (et < ts) {
539
        st += ((ts - et + pQuery->interval.sliding - 1) / pQuery->interval.sliding) * pQuery->interval.sliding;
540
      }
541

542
      w.skey = st;
543
      w.ekey = w.skey + pQuery->interval.interval - 1;
544
    }
545
  }
546

547 548 549 550 551 552 553
  /*
   * query border check, skey should not be bounded by the query time range, since the value skey will
   * be used as the time window index value. So we only change ekey of time window accordingly.
   */
  if (w.ekey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) {
    w.ekey = pQuery->window.ekey;
  }
554

555 556 557 558 559
  return w;
}

static int32_t addNewWindowResultBuf(SWindowResult *pWindowRes, SDiskbasedResultBuf *pResultBuf, int32_t sid,
                                     int32_t numOfRowsPerPage) {
560
  if (pWindowRes->pageId != -1) {
561 562
    return 0;
  }
563

564
  tFilePage *pData = NULL;
565

566 567 568
  // in the first scan, new space needed for results
  int32_t pageId = -1;
  SIDList list = getDataBufPagesIdList(pResultBuf, sid);
569

H
Haojun Liao 已提交
570
  if (taosArrayGetSize(list) == 0) {
571 572
    pData = getNewDataBuf(pResultBuf, sid, &pageId);
  } else {
H
Haojun Liao 已提交
573 574 575
    SPageInfo* pi = getLastPageInfo(list);
    pData = getResBufPage(pResultBuf, pi->pageId);
    pageId = pi->pageId;
576

577
    if (pData->num >= numOfRowsPerPage) {
H
Haojun Liao 已提交
578 579 580
      // release current page first, and prepare the next one
      releaseResBufPageInfo(pResultBuf, pi);

581 582
      pData = getNewDataBuf(pResultBuf, sid, &pageId);
      if (pData != NULL) {
583
        assert(pData->num == 0);  // number of elements must be 0 for new allocated buffer
584 585 586
      }
    }
  }
587

588 589 590
  if (pData == NULL) {
    return -1;
  }
591

592
  // set the number of rows in current disk page
593 594 595
  if (pWindowRes->pageId == -1) {  // not allocated yet, allocate new buffer
    pWindowRes->pageId = pageId;
    pWindowRes->rowId = (int32_t)(pData->num++);
H
Haojun Liao 已提交
596

597
    assert(pWindowRes->pageId >= 0);
598
  }
599

600 601 602 603
  return 0;
}

static int32_t setWindowOutputBufByKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, int32_t sid,
604
                                       STimeWindow *win, bool masterscan, bool* newWind) {
605 606
  assert(win->skey <= win->ekey);
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
607

608 609
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&win->skey,
      TSDB_KEYSIZE, masterscan);
610
  if (pWindowRes == NULL) {
611 612 613
    *newWind = false;

    return masterscan? -1:0;
614
  }
615

616
  *newWind = true;
H
Haojun Liao 已提交
617

618
  // not assign result buffer yet, add new result buffer
619
  if (pWindowRes->pageId == -1) {
620
    int32_t ret = addNewWindowResultBuf(pWindowRes, pResultBuf, sid, pRuntimeEnv->numOfRowsPerPage);
H
Haojun Liao 已提交
621
    if (ret != TSDB_CODE_SUCCESS) {
622 623 624
      return -1;
    }
  }
625

626
  // set time window for current result
627
  pWindowRes->win = (*win);
628

H
Haojun Liao 已提交
629
  setWindowResOutputBufInitCtx(pRuntimeEnv, pWindowRes);
630 631 632
  return TSDB_CODE_SUCCESS;
}

633
static bool getTimeWindowResStatus(SWindowResInfo *pWindowResInfo, int32_t slot) {
634
  assert(slot >= 0 && slot < pWindowResInfo->size);
635
  return pWindowResInfo->pResult[slot].closed;
636 637
}

H
Haojun Liao 已提交
638
static FORCE_INLINE int32_t getForwardStepsInBlock(int32_t numOfRows, __block_search_fn_t searchFn, TSKEY ekey, int16_t pos,
639 640
                                      int16_t order, int64_t *pData) {
  int32_t forwardStep = 0;
641

H
Haojun Liao 已提交
642 643 644 645
  if (order == TSDB_ORDER_ASC) {
    int32_t end = searchFn((char*) &pData[pos], numOfRows - pos, ekey, order);
    if (end >= 0) {
      forwardStep = end;
646

H
Haojun Liao 已提交
647 648 649 650 651 652 653 654 655 656 657 658
      if (pData[end + pos] == ekey) {
        forwardStep += 1;
      }
    }
  } else {
    int32_t end = searchFn((char *)pData, pos + 1, ekey, order);
    if (end >= 0) {
      forwardStep = pos - end;

      if (pData[end] == ekey) {
        forwardStep += 1;
      }
659 660
    }
  }
661

H
Haojun Liao 已提交
662
  assert(forwardStep > 0);
663 664 665 666 667 668
  return forwardStep;
}

/**
 * NOTE: the query status only set for the first scan of master scan.
 */
669
static int32_t doCheckQueryCompleted(SQueryRuntimeEnv *pRuntimeEnv, TSKEY lastKey, SWindowResInfo *pWindowResInfo) {
670
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
671
  if (pRuntimeEnv->scanFlag != MASTER_SCAN || (!QUERY_IS_INTERVAL_QUERY(pQuery))) {
672
    return pWindowResInfo->size;
673
  }
674

675
  // no qualified results exist, abort check
676
  int32_t numOfClosed = 0;
677

678
  if (pWindowResInfo->size == 0) {
679
    return pWindowResInfo->size;
680
  }
681

682
  // query completed
H
hjxilinx 已提交
683 684
  if ((lastKey >= pQuery->current->win.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (lastKey <= pQuery->current->win.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
685
    closeAllTimeWindow(pWindowResInfo);
686

687 688 689 690
    pWindowResInfo->curIndex = pWindowResInfo->size - 1;
    setQueryStatus(pQuery, QUERY_COMPLETED | QUERY_RESBUF_FULL);
  } else {  // set the current index to be the last unclosed window
    int32_t i = 0;
691
    int64_t skey = TSKEY_INITIAL_VAL;
692

693 694
    for (i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *pResult = &pWindowResInfo->pResult[i];
695
      if (pResult->closed) {
696
        numOfClosed += 1;
697 698
        continue;
      }
699

700
      TSKEY ekey = pResult->win.ekey;
701
      if ((ekey <= lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
702
          (pResult->win.skey >= lastKey && !QUERY_IS_ASC_QUERY(pQuery))) {
703 704
        closeTimeWindow(pWindowResInfo, i);
      } else {
705
        skey = pResult->win.skey;
706 707 708
        break;
      }
    }
709

710
    // all windows are closed, set the last one to be the skey
711
    if (skey == TSKEY_INITIAL_VAL) {
712 713 714 715 716
      assert(i == pWindowResInfo->size);
      pWindowResInfo->curIndex = pWindowResInfo->size - 1;
    } else {
      pWindowResInfo->curIndex = i;
    }
717

718
    pWindowResInfo->prevSKey = pWindowResInfo->pResult[pWindowResInfo->curIndex].win.skey;
719

720 721
    // the number of completed slots are larger than the threshold, return current generated results to client.
    if (numOfClosed > pWindowResInfo->threshold) {
722
      qDebug("QInfo:%p total result window:%d closed:%d, reached the output threshold %d, return",
723
          GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size, numOfClosed, pQuery->rec.threshold);
724

725
      setQueryStatus(pQuery, QUERY_RESBUF_FULL);
726
    } else {
727
      qDebug("QInfo:%p total result window:%d already closed:%d", GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size,
728
             numOfClosed);
729 730
    }
  }
731

732 733 734 735 736
  // output has reached the limitation, set query completed
  if (pQuery->limit.limit > 0 && (pQuery->limit.limit + pQuery->limit.offset) <= numOfClosed &&
      pRuntimeEnv->scanFlag == MASTER_SCAN) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
737

738
  assert(pWindowResInfo->prevSKey != TSKEY_INITIAL_VAL);
739
  return numOfClosed;
740 741 742
}

static int32_t getNumOfRowsInTimeWindow(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo, TSKEY *pPrimaryColumn,
H
hjxilinx 已提交
743
                                        int32_t startPos, TSKEY ekey, __block_search_fn_t searchFn, bool updateLastKey) {
744
  assert(startPos >= 0 && startPos < pDataBlockInfo->rows);
745

H
Haojun Liao 已提交
746
  int32_t num   = -1;
747
  int32_t order = pQuery->order.order;
H
Haojun Liao 已提交
748
  int32_t step  = GET_FORWARD_DIRECTION_FACTOR(order);
749

H
hjxilinx 已提交
750
  STableQueryInfo* item = pQuery->current;
751

752 753
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (ekey < pDataBlockInfo->window.ekey) {
754
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
H
Haojun Liao 已提交
755 756
      if (updateLastKey) { // update the last key
        item->lastKey = pPrimaryColumn[startPos + (num - 1)] + step;
757 758
      }
    } else {
759
      num = pDataBlockInfo->rows - startPos;
760
      if (updateLastKey) {
H
hjxilinx 已提交
761
        item->lastKey = pDataBlockInfo->window.ekey + step;
762 763 764 765
      }
    }
  } else {  // desc
    if (ekey > pDataBlockInfo->window.skey) {
766
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
H
Haojun Liao 已提交
767 768
      if (updateLastKey) {  // update the last key
        item->lastKey = pPrimaryColumn[startPos - (num - 1)] + step;
769 770 771 772
      }
    } else {
      num = startPos + 1;
      if (updateLastKey) {
H
hjxilinx 已提交
773
        item->lastKey = pDataBlockInfo->window.skey + step;
774 775 776
      }
    }
  }
777

H
Haojun Liao 已提交
778
  assert(num > 0);
779 780 781
  return num;
}

H
Haojun Liao 已提交
782 783
static void doBlockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, bool closed, STimeWindow *pWin, int32_t offset,
                                      int32_t forwardStep, TSKEY *tsCol, int32_t numOfTotal) {
784 785
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
786

H
Haojun Liao 已提交
787 788
  bool hasPrev = pCtx[0].preAggVals.isSet;

789
  if (IS_MASTER_SCAN(pRuntimeEnv) || closed) {
790 791 792 793
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
      pCtx[k].nStartQueryTimestamp = pWin->skey;
      pCtx[k].size = forwardStep;
      pCtx[k].startOffset = (QUERY_IS_ASC_QUERY(pQuery)) ? offset : offset - (forwardStep - 1);
794

H
Haojun Liao 已提交
795
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
796
      if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
H
Haojun Liao 已提交
797
        pCtx[k].ptsList = &tsCol[pCtx[k].startOffset];
798
      }
799

800
      // not a whole block involved in query processing, statistics data can not be used
H
Haojun Liao 已提交
801 802 803 804
      // NOTE: the original value of isSet have been changed here
      if (pCtx[k].preAggVals.isSet && forwardStep < numOfTotal) {
        pCtx[k].preAggVals.isSet = false;
      }
H
Haojun Liao 已提交
805

806 807 808
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunction(&pCtx[k]);
      }
H
Haojun Liao 已提交
809 810 811

      // restore it
      pCtx[k].preAggVals.isSet = hasPrev;
812 813 814 815
    }
  }
}

816
static void doRowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, bool closed, STimeWindow *pWin, int32_t offset) {
817 818
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
819

820
  if (IS_MASTER_SCAN(pRuntimeEnv) || closed) {
821 822
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
      pCtx[k].nStartQueryTimestamp = pWin->skey;
823

824 825 826 827
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunctionF(&pCtx[k], offset);
      }
828 829 830 831
    }
  }
}

H
Haojun Liao 已提交
832 833
static int32_t getNextQualifiedWindow(SQueryRuntimeEnv *pRuntimeEnv, STimeWindow *pNext, SDataBlockInfo *pDataBlockInfo,
    TSKEY *primaryKeys, __block_search_fn_t searchFn, int32_t prevPosition) {
834
  SQuery *pQuery = pRuntimeEnv->pQuery;
835

H
Haojun Liao 已提交
836
  GET_NEXT_TIMEWINDOW(pQuery, pNext);
837

H
Haojun Liao 已提交
838
  // next time window is not in current block
H
Haojun Liao 已提交
839 840
  if ((pNext->skey > pDataBlockInfo->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (pNext->ekey < pDataBlockInfo->window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
H
Haojun Liao 已提交
841 842
    return -1;
  }
843

H
Haojun Liao 已提交
844 845
  TSKEY startKey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
846
    startKey = pNext->skey;
H
Haojun Liao 已提交
847 848
    if (startKey < pQuery->window.skey) {
      startKey = pQuery->window.skey;
849
    }
H
Haojun Liao 已提交
850
  } else {
H
Haojun Liao 已提交
851
    startKey = pNext->ekey;
H
Haojun Liao 已提交
852 853
    if (startKey > pQuery->window.skey) {
      startKey = pQuery->window.skey;
854
    }
H
Haojun Liao 已提交
855
  }
856

H
Haojun Liao 已提交
857 858
  int32_t startPos = 0;
  // tumbling time window query, a special case of sliding time window query
859
  if (pQuery->interval.sliding == pQuery->interval.interval && prevPosition != -1) {
H
Haojun Liao 已提交
860 861 862 863 864
    int32_t factor = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
    startPos = prevPosition + factor;
  } else {
    startPos = searchFn((char *)primaryKeys, pDataBlockInfo->rows, startKey, pQuery->order.order);
  }
865

H
Haojun Liao 已提交
866 867 868 869
  /*
   * This time window does not cover any data, try next time window,
   * this case may happen when the time window is too small
   */
H
Haojun Liao 已提交
870
  if (QUERY_IS_ASC_QUERY(pQuery) && primaryKeys[startPos] > pNext->ekey) {
H
Haojun Liao 已提交
871
    TSKEY next = primaryKeys[startPos];
872 873 874
    if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
      pNext->skey = taosTimeTruncate(next, &pQuery->interval, pQuery->precision);
      pNext->ekey = taosTimeAdd(pNext->skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
875
    } else {
876 877
      pNext->ekey += ((next - pNext->ekey + pQuery->interval.sliding - 1)/pQuery->interval.sliding) * pQuery->interval.sliding;
      pNext->skey = pNext->ekey - pQuery->interval.interval + 1;
878
    }
H
Haojun Liao 已提交
879
  } else if ((!QUERY_IS_ASC_QUERY(pQuery)) && primaryKeys[startPos] < pNext->skey) {
H
Haojun Liao 已提交
880
    TSKEY next = primaryKeys[startPos];
881 882 883
    if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
      pNext->skey = taosTimeTruncate(next, &pQuery->interval, pQuery->precision);
      pNext->ekey = taosTimeAdd(pNext->skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
884
    } else {
885 886
      pNext->skey -= ((pNext->skey - next + pQuery->interval.sliding - 1) / pQuery->interval.sliding) * pQuery->interval.sliding;
      pNext->ekey = pNext->skey + pQuery->interval.interval - 1;
887
    }
888
  }
889

H
Haojun Liao 已提交
890
  return startPos;
891 892
}

H
Haojun Liao 已提交
893
static FORCE_INLINE TSKEY reviseWindowEkey(SQuery *pQuery, STimeWindow *pWindow) {
894 895 896 897 898 899 900 901 902 903 904 905
  TSKEY ekey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    ekey = pWindow->ekey;
    if (ekey > pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  } else {
    ekey = pWindow->skey;
    if (ekey < pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  }
906

907 908 909
  return ekey;
}

H
hjxilinx 已提交
910 911
//todo binary search
static void* getDataBlockImpl(SArray* pDataBlock, int32_t colId) {
S
TD-1057  
Shengliang Guan 已提交
912
  int32_t numOfCols = (int32_t)taosArrayGetSize(pDataBlock);
913

H
hjxilinx 已提交
914 915 916 917 918 919
  for (int32_t i = 0; i < numOfCols; ++i) {
    SColumnInfoData *p = taosArrayGet(pDataBlock, i);
    if (colId == p->info.colId) {
      return p->pData;
    }
  }
920

H
hjxilinx 已提交
921 922 923 924
  return NULL;
}

static char *getDataBlock(SQueryRuntimeEnv *pRuntimeEnv, SArithmeticSupport *sas, int32_t col, int32_t size,
925
                    SArray *pDataBlock) {
dengyihao's avatar
dengyihao 已提交
926 927 928
  if (pDataBlock == NULL) {
    return NULL;
  }
929

H
Haojun Liao 已提交
930
  char *dataBlock = NULL;
H
Haojun Liao 已提交
931
  SQuery *pQuery = pRuntimeEnv->pQuery;
932

933
  int32_t functionId = pQuery->pSelectExpr[col].base.functionId;
934
  if (functionId == TSDB_FUNC_ARITHM) {
935
    sas->pArithExpr = &pQuery->pSelectExpr[col];
936

937 938 939 940
    sas->offset  = 0;
    sas->colList = pQuery->colList;
    sas->numOfCols = pQuery->numOfCols;
    sas->data    = calloc(pQuery->numOfCols, POINTER_BYTES);
941

H
Haojun Liao 已提交
942 943 944 945
    if (sas->data == NULL) {
      longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
    }

946
    // here the pQuery->colList and sas->colList are identical
S
TD-1057  
Shengliang Guan 已提交
947
    int32_t numOfCols = (int32_t)taosArrayGetSize(pDataBlock);
948
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
949
      SColumnInfo *pColMsg = &pQuery->colList[i];
950

951 952 953 954 955 956 957 958
      dataBlock = NULL;
      for (int32_t k = 0; k < numOfCols; ++k) {  //todo refactor
        SColumnInfoData *p = taosArrayGet(pDataBlock, k);
        if (pColMsg->colId == p->info.colId) {
          dataBlock = p->pData;
          break;
        }
      }
959

960
      assert(dataBlock != NULL);
961
      sas->data[i] = dataBlock;  // start from the offset
962
    }
963

964
  } else {  // other type of query function
965
    SColIndex *pCol = &pQuery->pSelectExpr[col].base.colInfo;
H
Haojun Liao 已提交
966
    if (TSDB_COL_IS_NORMAL_COL(pCol->flag)) {
H
Haojun Liao 已提交
967 968 969 970 971
      SColIndex* pColIndex = &pQuery->pSelectExpr[col].base.colInfo;
      SColumnInfoData *p = taosArrayGet(pDataBlock, pColIndex->colIndex);
      assert(p->info.colId == pColIndex->colId);

      dataBlock = p->pData;
H
Haojun Liao 已提交
972 973
    } else {
      dataBlock = NULL;
974 975
    }
  }
976

977 978 979 980
  return dataBlock;
}

/**
H
Haojun Liao 已提交
981
 * todo set the last value for pQueryTableInfo as in rowwiseapplyfunctions
982 983
 * @param pRuntimeEnv
 * @param forwardStep
984
 * @param tsCols
985 986 987 988 989
 * @param pFields
 * @param isDiskFileBlock
 * @return                  the incremental number of output value, so it maybe 0 for fixed number of query,
 *                          such as count/min/max etc.
 */
990
static void blockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis,
991 992
                                       SDataBlockInfo *pDataBlockInfo, SWindowResInfo *pWindowResInfo,
                                       __block_search_fn_t searchFn, SArray *pDataBlock) {
993
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
994 995
  bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);

996 997
  SQuery *pQuery = pRuntimeEnv->pQuery;
  TSKEY  *tsCols = NULL;
998
  if (pDataBlock != NULL) {
999
    SColumnInfoData* pColInfo = taosArrayGet(pDataBlock, 0);
1000
    tsCols = (TSKEY *)(pColInfo->pData);
1001
  }
1002

1003
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
H
Haojun Liao 已提交
1004 1005 1006
  if (sasArray == NULL) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }
1007

H
Haojun Liao 已提交
1008
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
1009
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
1010
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
H
Haojun Liao 已提交
1011
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k, pQInfo->vgId);
1012
  }
1013

1014
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
1015
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
H
Haojun Liao 已提交
1016
    TSKEY ts = TSKEY_INITIAL_VAL;
1017

H
Haojun Liao 已提交
1018 1019 1020 1021 1022 1023 1024 1025
    if (tsCols == NULL) {
      ts = QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->window.skey:pDataBlockInfo->window.ekey;
    } else {
      int32_t offset = GET_COL_DATA_POS(pQuery, 0, step);
      ts = tsCols[offset];
    }

    bool        hasTimeWindow = false;
1026
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
H
Haojun Liao 已提交
1027 1028
    if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &win, masterScan, &hasTimeWindow) !=
        TSDB_CODE_SUCCESS) {
S
Shengliang Guan 已提交
1029
      taosTFree(sasArray);
H
hjxilinx 已提交
1030
      return;
1031
    }
1032

H
Haojun Liao 已提交
1033 1034 1035
    int32_t forwardStep = 0;
    int32_t startPos = pQuery->pos;

1036
    if (hasTimeWindow) {
H
Haojun Liao 已提交
1037
      TSKEY ekey = reviseWindowEkey(pQuery, &win);
H
Haojun Liao 已提交
1038
      forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, pQuery->pos, ekey, searchFn, true);
1039

1040
      bool pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
H
Haojun Liao 已提交
1041
      doBlockwiseApplyFunctions(pRuntimeEnv, pStatus, &win, startPos, forwardStep, tsCols, pDataBlockInfo->rows);
1042
    }
1043

1044 1045
    int32_t     index = pWindowResInfo->curIndex;
    STimeWindow nextWin = win;
1046

1047
    while (1) {
H
Haojun Liao 已提交
1048 1049
      int32_t prevEndPos = (forwardStep - 1) * step + startPos;
      startPos = getNextQualifiedWindow(pRuntimeEnv, &nextWin, pDataBlockInfo, tsCols, searchFn, prevEndPos);
1050 1051 1052
      if (startPos < 0) {
        break;
      }
1053

1054
      // null data, failed to allocate more memory buffer
H
Haojun Liao 已提交
1055
      hasTimeWindow = false;
H
Haojun Liao 已提交
1056 1057
      if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &nextWin, masterScan,
                                  &hasTimeWindow) != TSDB_CODE_SUCCESS) {
1058 1059
        break;
      }
1060

1061 1062 1063 1064 1065
      if (!hasTimeWindow) {
        continue;
      }

      TSKEY ekey = reviseWindowEkey(pQuery, &nextWin);
H
Haojun Liao 已提交
1066
      forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, startPos, ekey, searchFn, true);
1067

1068 1069
      bool closed = getTimeWindowResStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
      doBlockwiseApplyFunctions(pRuntimeEnv, closed, &nextWin, startPos, forwardStep, tsCols, pDataBlockInfo->rows);
1070
    }
1071

1072 1073 1074 1075 1076 1077 1078
    pWindowResInfo->curIndex = index;
  } else {
    /*
     * the sqlfunctionCtx parameters should be set done before all functions are invoked,
     * since the selectivity + tag_prj query needs all parameters been set done.
     * tag_prj function are changed to be TSDB_FUNC_TAG_DUMMY
     */
1079
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
1080
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
1081 1082 1083 1084 1085
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunction(&pCtx[k]);
      }
    }
  }
1086

1087 1088 1089 1090
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    if (pQuery->pSelectExpr[i].base.functionId != TSDB_FUNC_ARITHM) {
      continue;
    }
1091

S
Shengliang Guan 已提交
1092
    taosTFree(sasArray[i].data);
1093
  }
1094

S
Shengliang Guan 已提交
1095
  taosTFree(sasArray);
1096 1097 1098 1099 1100 1101
}

static int32_t setGroupResultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, char *pData, int16_t type, int16_t bytes) {
  if (isNull(pData, type)) {  // ignore the null value
    return -1;
  }
1102

1103
  int32_t GROUPRESULTID = 1;
1104

1105
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
1106

1107
  // not assign result buffer yet, add new result buffer
1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125
  char* d = pData;
  int16_t len = bytes;
  if (type == TSDB_DATA_TYPE_BINARY||type == TSDB_DATA_TYPE_NCHAR) {
    d = varDataVal(pData);
    len = varDataLen(pData);
  } else if (type == TSDB_DATA_TYPE_FLOAT || type == TSDB_DATA_TYPE_DOUBLE) {
    SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
    qError("QInfo:%p group by not supported on double/float/binary/nchar columns, abort", pQInfo);

    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_APP_ERROR);
  }

  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, &pRuntimeEnv->windowResInfo, d, len, true);
  if (pWindowRes == NULL) {
    return -1;
  }

  int64_t v = -1;
1126 1127 1128 1129 1130 1131 1132 1133
  switch(type) {
    case TSDB_DATA_TYPE_BOOL:
    case TSDB_DATA_TYPE_TINYINT:  v = GET_INT8_VAL(pData);  break;
    case TSDB_DATA_TYPE_SMALLINT: v = GET_INT16_VAL(pData); break;
    case TSDB_DATA_TYPE_INT:      v = GET_INT32_VAL(pData); break;
    case TSDB_DATA_TYPE_BIGINT:   v = GET_INT64_VAL(pData); break;
  }

1134 1135 1136 1137 1138 1139
  if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
    pWindowRes->key = malloc(varDataTLen(pData));
    varDataCopy(pWindowRes->key, pData);
  } else {
    pWindowRes->win.skey = v;
    pWindowRes->win.ekey = v;
1140
  }
1141

1142
  assert(pRuntimeEnv->windowResInfo.interval == 0);
1143

1144
  if (pWindowRes->pageId == -1) {
1145 1146 1147 1148 1149
    int32_t ret = addNewWindowResultBuf(pWindowRes, pResultBuf, GROUPRESULTID, pRuntimeEnv->numOfRowsPerPage);
    if (ret != 0) {
      return -1;
    }
  }
1150

1151 1152 1153 1154 1155
  setWindowResOutputBuf(pRuntimeEnv, pWindowRes);
  initCtxOutputBuf(pRuntimeEnv);
  return TSDB_CODE_SUCCESS;
}

1156
static char *getGroupbyColumnData(SQuery *pQuery, int16_t *type, int16_t *bytes, SArray* pDataBlock) {
1157
  SSqlGroupbyExpr *pGroupbyExpr = pQuery->pGroupbyExpr;
1158

1159
  for (int32_t k = 0; k < pGroupbyExpr->numOfGroupCols; ++k) {
1160
    SColIndex* pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, k);
Y
TD-1230  
yihaoDeng 已提交
1161
    if (TSDB_COL_IS_TAG(pColIndex->flag)) {
1162 1163
      continue;
    }
1164

1165
    int16_t colIndex = -1;
1166
    int32_t colId = pColIndex->colId;
1167

1168
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
1169
      if (pQuery->colList[i].colId == colId) {
1170 1171 1172 1173
        colIndex = i;
        break;
      }
    }
1174

1175
    assert(colIndex >= 0 && colIndex < pQuery->numOfCols);
1176

1177 1178
    *type = pQuery->colList[colIndex].type;
    *bytes = pQuery->colList[colIndex].bytes;
1179
    /*
1180 1181 1182
     *  the colIndex is acquired from the first tables of all qualified tables in this vnode during query prepare
     * stage, the remain tables may not have the required column in cache actually. So, the validation of required
     * column in cache with the corresponding schema is reinforced.
1183
     */
S
TD-1057  
Shengliang Guan 已提交
1184
    int32_t numOfCols = (int32_t)taosArrayGetSize(pDataBlock);
1185

1186 1187 1188 1189 1190 1191
    for (int32_t i = 0; i < numOfCols; ++i) {
      SColumnInfoData *p = taosArrayGet(pDataBlock, i);
      if (pColIndex->colId == p->info.colId) {
        return p->pData;
      }
    }
1192
  }
1193

1194
  return NULL;
1195 1196 1197 1198
}

static int32_t doTSJoinFilter(SQueryRuntimeEnv *pRuntimeEnv, int32_t offset) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
1199

1200 1201
  STSElem         elem = tsBufGetElem(pRuntimeEnv->pTSBuf);
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1202

1203
  // compare tag first
H
Haojun Liao 已提交
1204
  if (tVariantCompare(&pCtx[0].tag, elem.tag) != 0) {
1205 1206
    return TS_JOIN_TAG_NOT_EQUALS;
  }
1207

S
TD-1057  
Shengliang Guan 已提交
1208
  TSKEY key = *(TSKEY *)((char*)pCtx[0].aInputElemBuf + TSDB_KEYSIZE * offset);
1209 1210

#if defined(_DEBUG_VIEW)
1211
  printf("elem in comp ts file:%" PRId64 ", key:%" PRId64 ", tag:%"PRIu64", query order:%d, ts order:%d, traverse:%d, index:%d\n",
H
Haojun Liao 已提交
1212
         elem.ts, key, elem.tag.i64Key, pQuery->order.order, pRuntimeEnv->pTSBuf->tsOrder,
1213 1214
         pRuntimeEnv->pTSBuf->cur.order, pRuntimeEnv->pTSBuf->cur.tsIndex);
#endif
1215

1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (key < elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key > elem.ts) {
      assert(false);
    }
  } else {
    if (key > elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key < elem.ts) {
      assert(false);
    }
  }
1229

1230 1231 1232 1233 1234
  return TS_JOIN_TS_EQUAL;
}

static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId) {
  SResultInfo *pResInfo = GET_RES_INFO(pCtx);
H
hjxilinx 已提交
1235
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
1236 1237 1238 1239 1240

  // in case of timestamp column, always generated results.
  if (functionId == TSDB_FUNC_TS) {
    return true;
  }
1241

1242 1243 1244
  if (pResInfo->complete || functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TS_DUMMY) {
    return false;
  }
1245

1246
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_FIRST) {
H
hjxilinx 已提交
1247 1248
    return QUERY_IS_ASC_QUERY(pQuery);
  }
1249 1250 1251 1252 1253 1254

  // todo add comments
  if ((functionId == TSDB_FUNC_LAST_DST || functionId == TSDB_FUNC_LAST)) {
    return pCtx->param[0].i64Key == pQuery->order.order;
  }

1255
  // in the supplementary scan, only the following functions need to be executed
H
Haojun Liao 已提交
1256
  if (IS_REVERSE_SCAN(pRuntimeEnv)) {
1257 1258
    return false;
  }
1259

1260 1261 1262
  return true;
}

1263 1264
static void rowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis, SDataBlockInfo *pDataBlockInfo,
    SWindowResInfo *pWindowResInfo, SArray *pDataBlock) {
1265
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1266
  bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);
1267

1268
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1269
  STableQueryInfo* item = pQuery->current;
H
Haojun Liao 已提交
1270 1271 1272 1273

  SColumnInfoData* pColumnInfoData = (SColumnInfoData *)taosArrayGet(pDataBlock, 0);

  TSKEY  *tsCols = (pColumnInfoData->info.type == TSDB_DATA_TYPE_TIMESTAMP)? (TSKEY*) pColumnInfoData->pData:NULL;
H
Haojun Liao 已提交
1274 1275
  bool    groupbyColumnValue = pRuntimeEnv->groupbyNormalCol;

1276
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
H
Haojun Liao 已提交
1277 1278 1279
  if (sasArray == NULL) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }
1280

1281 1282
  int16_t type = 0;
  int16_t bytes = 0;
1283

1284
  char *groupbyColumnData = NULL;
H
Haojun Liao 已提交
1285
  if (groupbyColumnValue) {
1286
    groupbyColumnData = getGroupbyColumnData(pQuery, &type, &bytes, pDataBlock);
1287
  }
1288

H
Haojun Liao 已提交
1289
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
1290
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
1291
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
H
Haojun Liao 已提交
1292
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k, pQInfo->vgId);
1293
  }
1294

1295 1296
  // set the input column data
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
1297
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
H
hjxilinx 已提交
1298 1299
    pFilterInfo->pData = getDataBlockImpl(pDataBlock, pFilterInfo->info.colId);
    assert(pFilterInfo->pData != NULL);
1300
  }
1301

1302
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1303

1304 1305 1306
  // from top to bottom in desc
  // from bottom to top in asc order
  if (pRuntimeEnv->pTSBuf != NULL) {
1307
    qDebug("QInfo:%p process data rows, numOfRows:%d, query order:%d, ts comp order:%d", pQInfo, pDataBlockInfo->rows,
1308 1309
           pQuery->order.order, pRuntimeEnv->pTSBuf->cur.order);
  }
1310

1311
  int32_t j = 0;
H
hjxilinx 已提交
1312
  int32_t offset = -1;
1313

1314
  for (j = 0; j < pDataBlockInfo->rows; ++j) {
H
hjxilinx 已提交
1315
    offset = GET_COL_DATA_POS(pQuery, j, step);
1316

1317 1318 1319 1320 1321 1322 1323 1324 1325 1326
    if (pRuntimeEnv->pTSBuf != NULL) {
      int32_t r = doTSJoinFilter(pRuntimeEnv, offset);
      if (r == TS_JOIN_TAG_NOT_EQUALS) {
        break;
      } else if (r == TS_JOIN_TS_NOT_EQUALS) {
        continue;
      } else {
        assert(r == TS_JOIN_TS_EQUAL);
      }
    }
1327

1328
    if (pQuery->numOfFilterCols > 0 && (!doFilterData(pQuery, offset))) {
1329 1330
      continue;
    }
1331

1332
    // interval window query, decide the time window according to the primary timestamp
H
Haojun Liao 已提交
1333
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1334
      int64_t     ts = tsCols[offset];
1335
      STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
1336

1337 1338
      bool hasTimeWindow = false;
      int32_t ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &win, masterScan, &hasTimeWindow);
1339 1340 1341
      if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
        continue;
      }
1342

1343 1344 1345 1346
      if (!hasTimeWindow) {
        continue;
      }

1347 1348
      bool closed = getTimeWindowResStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
      doRowwiseApplyFunctions(pRuntimeEnv, closed, &win, offset);
1349

1350 1351
      STimeWindow nextWin = win;
      int32_t     index = pWindowResInfo->curIndex;
1352

1353
      while (1) {
H
Haojun Liao 已提交
1354
        GET_NEXT_TIMEWINDOW(pQuery, &nextWin);
1355
        if ((nextWin.skey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
H
Haojun Liao 已提交
1356
            (nextWin.skey < pQuery->window.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
1357 1358
          break;
        }
1359

1360 1361 1362
        if (ts < nextWin.skey || ts > nextWin.ekey) {
          break;
        }
1363

1364
        // null data, failed to allocate more memory buffer
H
Haojun Liao 已提交
1365
        hasTimeWindow = false;
1366
        if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &nextWin, masterScan, &hasTimeWindow) != TSDB_CODE_SUCCESS) {
1367 1368
          break;
        }
1369

1370
        if (hasTimeWindow) {
1371 1372
          closed = getTimeWindowResStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
          doRowwiseApplyFunctions(pRuntimeEnv, closed, &nextWin, offset);
1373
        }
1374
      }
1375

1376 1377 1378
      pWindowResInfo->curIndex = index;
    } else {  // other queries
      // decide which group this rows belongs to according to current state value
H
Haojun Liao 已提交
1379
      if (groupbyColumnValue) {
H
hjxilinx 已提交
1380
        char *val = groupbyColumnData + bytes * offset;
1381

H
hjxilinx 已提交
1382
        int32_t ret = setGroupResultOutputBuf(pRuntimeEnv, val, type, bytes);
1383 1384 1385 1386
        if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
          continue;
        }
      }
1387

1388
      for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
1389
        int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
1390 1391 1392 1393 1394
        if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
          aAggs[functionId].xFunctionF(&pCtx[k], offset);
        }
      }
    }
1395

1396 1397 1398
    if (pRuntimeEnv->pTSBuf != NULL) {
      // if timestamp filter list is empty, quit current query
      if (!tsBufNextPos(pRuntimeEnv->pTSBuf)) {
H
hjxilinx 已提交
1399
        setQueryStatus(pQuery, QUERY_COMPLETED);
1400 1401 1402 1403
        break;
      }
    }
  }
H
Haojun Liao 已提交
1404 1405 1406 1407 1408 1409 1410 1411

  assert(offset >= 0);
  if (tsCols != NULL) {
    item->lastKey = tsCols[offset] + step;
  } else {
    item->lastKey = (QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->window.ekey:pDataBlockInfo->window.skey) + step;
  }

H
Haojun Liao 已提交
1412 1413 1414
  if (pRuntimeEnv->pTSBuf != NULL) {
    item->cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);
  }
H
Haojun Liao 已提交
1415

1416 1417 1418 1419 1420
  // todo refactor: extract method
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    if (pQuery->pSelectExpr[i].base.functionId != TSDB_FUNC_ARITHM) {
      continue;
    }
1421

S
Shengliang Guan 已提交
1422
    taosTFree(sasArray[i].data);
1423
  }
1424

1425 1426 1427 1428
  free(sasArray);
}

static int32_t tableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo,
H
hjxilinx 已提交
1429
                                          SDataStatis *pStatis, __block_search_fn_t searchFn, SArray *pDataBlock) {
H
hjxilinx 已提交
1430
  SQuery *pQuery = pRuntimeEnv->pQuery;
1431

H
hjxilinx 已提交
1432 1433
  STableQueryInfo* pTableQInfo = pQuery->current;
  SWindowResInfo*  pWindowResInfo = &pRuntimeEnv->windowResInfo;
1434

H
Haojun Liao 已提交
1435
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->groupbyNormalCol) {
1436
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
1437
  } else {
1438
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
1439
  }
1440

1441
  // update the lastkey of current table
1442
  TSKEY lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pDataBlockInfo->window.ekey : pDataBlockInfo->window.skey;
H
hjxilinx 已提交
1443
  pTableQInfo->lastKey = lastKey + GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1444

1445
  // interval query with limit applied
1446
  int32_t numOfRes = 0;
H
Haojun Liao 已提交
1447
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1448 1449
    numOfRes = doCheckQueryCompleted(pRuntimeEnv, lastKey, pWindowResInfo);
  } else {
S
TD-1057  
Shengliang Guan 已提交
1450
    numOfRes = (int32_t)getNumOfResult(pRuntimeEnv);
1451

1452 1453 1454 1455
    // update the number of output result
    if (numOfRes > 0 && pQuery->checkBuffer == 1) {
      assert(numOfRes >= pQuery->rec.rows);
      pQuery->rec.rows = numOfRes;
1456

1457 1458 1459
      if (numOfRes >= pQuery->rec.threshold) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
      }
1460

1461 1462 1463
      if ((pQuery->limit.limit >= 0) && (pQuery->limit.limit + pQuery->limit.offset) <= numOfRes) {
        setQueryStatus(pQuery, QUERY_COMPLETED);
      }
1464 1465 1466 1467 1468

      if (((pTableQInfo->lastKey > pTableQInfo->win.ekey) && QUERY_IS_ASC_QUERY(pQuery)) ||
          ((pTableQInfo->lastKey < pTableQInfo->win.ekey) && (!QUERY_IS_ASC_QUERY(pQuery)))) {
        setQueryStatus(pQuery, QUERY_COMPLETED);
      }
H
Haojun Liao 已提交
1469
    }
1470
  }
1471

1472
  return numOfRes;
1473 1474
}

H
Haojun Liao 已提交
1475
void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
H
Haojun Liao 已提交
1476
                   SDataStatis *pStatis, void *param, int32_t colIndex, int32_t vgId) {
1477

1478 1479
  int32_t functionId = pQuery->pSelectExpr[colIndex].base.functionId;
  int32_t colId = pQuery->pSelectExpr[colIndex].base.colInfo.colId;
1480

1481
  SDataStatis *tpField = NULL;
H
Haojun Liao 已提交
1482
  pCtx->hasNull = hasNullValue(&pQuery->pSelectExpr[colIndex].base.colInfo, pStatis, &tpField);
1483
  pCtx->aInputElemBuf = inputData;
1484

1485
  if (tpField != NULL) {
H
Haojun Liao 已提交
1486
    pCtx->preAggVals.isSet  = true;
1487 1488
    pCtx->preAggVals.statis = *tpField;
    assert(pCtx->preAggVals.statis.numOfNull <= pBlockInfo->rows);
1489 1490 1491
  } else {
    pCtx->preAggVals.isSet = false;
  }
1492

H
Haojun Liao 已提交
1493 1494
  pCtx->preAggVals.dataBlockLoaded = (inputData != NULL);

H
Haojun Liao 已提交
1495 1496
  // limit/offset query will affect this value
  pCtx->size = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->rows - pQuery->pos : pQuery->pos + 1;
1497

H
Haojun Liao 已提交
1498
  // minimum value no matter ascending/descending order query
H
Haojun Liao 已提交
1499 1500
  pCtx->startOffset = QUERY_IS_ASC_QUERY(pQuery) ? pQuery->pos: (pQuery->pos - pCtx->size + 1);
  assert(pCtx->startOffset >= 0);
H
Haojun Liao 已提交
1501

1502 1503
  uint32_t status = aAggs[functionId].nStatus;
  if (((status & (TSDB_FUNCSTATE_SELECTIVITY | TSDB_FUNCSTATE_NEED_TS)) != 0) && (tsCol != NULL)) {
H
Haojun Liao 已提交
1504
    pCtx->ptsList = &tsCol[pCtx->startOffset];
1505
  }
1506

1507 1508 1509 1510 1511
  if (functionId >= TSDB_FUNC_FIRST_DST && functionId <= TSDB_FUNC_LAST_DST) {
    // last_dist or first_dist function
    // store the first&last timestamp into the intermediate buffer [1], the true
    // value may be null but timestamp will never be null
  } else if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_TWA ||
1512
             functionId == TSDB_FUNC_DIFF || (functionId >= TSDB_FUNC_RATE && functionId <= TSDB_FUNC_AVG_IRATE)) {
1513
    /*
H
Haojun Liao 已提交
1514
     * least squares function needs two columns of input, currently, the x value of linear equation is set to
1515 1516 1517 1518 1519 1520 1521 1522 1523 1524
     * timestamp column, and the y-value is the column specified in pQuery->pSelectExpr[i].colIdxInBuffer
     *
     * top/bottom function needs timestamp to indicate when the
     * top/bottom values emerge, so does diff function
     */
    if (functionId == TSDB_FUNC_TWA) {
      STwaInfo *pTWAInfo = GET_RES_INFO(pCtx)->interResultBuf;
      pTWAInfo->SKey = pQuery->window.skey;
      pTWAInfo->EKey = pQuery->window.ekey;
    }
1525

1526 1527
  } else if (functionId == TSDB_FUNC_ARITHM) {
    pCtx->param[1].pz = param;
H
Haojun Liao 已提交
1528 1529 1530 1531 1532 1533
  } else if (functionId == TSDB_FUNC_SPREAD) {  // set the statistics data for primary time stamp column
    if (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
      pCtx->preAggVals.isSet  = true;
      pCtx->preAggVals.statis.min = pBlockInfo->window.skey;
      pCtx->preAggVals.statis.max = pBlockInfo->window.ekey;
    }
1534 1535
  } else if (functionId == TSDB_FUNC_INTERP) {
    SInterpInfoDetail *pInterpInfo = GET_RES_INFO(pCtx)->interResultBuf;
S
TD-1057  
Shengliang Guan 已提交
1536
    pInterpInfo->type = (int8_t)pQuery->fillType;
1537 1538
    pInterpInfo->ts = pQuery->window.skey;
    pInterpInfo->primaryCol = (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX);
1539

1540 1541 1542 1543
    if (pQuery->fillVal != NULL) {
      if (isNull((const char*) &pQuery->fillVal[colIndex], pCtx->inputType)) {
        pCtx->param[1].nType = TSDB_DATA_TYPE_NULL;
      } else { // todo refactor, tVariantCreateFromBinary should handle the NULL value
H
Haojun Liao 已提交
1544 1545 1546
        if (pCtx->inputType != TSDB_DATA_TYPE_BINARY && pCtx->inputType != TSDB_DATA_TYPE_NCHAR) {
          tVariantCreateFromBinary(&pCtx->param[1], (char*) &pQuery->fillVal[colIndex], pCtx->inputBytes, pCtx->inputType);
        }
1547 1548
      }
    }
H
Haojun Liao 已提交
1549 1550 1551
  } else if (functionId == TSDB_FUNC_TS_COMP) {
    pCtx->param[0].i64Key = vgId;
    pCtx->param[0].nType = TSDB_DATA_TYPE_BIGINT;
1552
  }
1553

1554 1555 1556 1557 1558 1559
#if defined(_DEBUG_VIEW)
  //  int64_t *tsList = (int64_t *)primaryColumnData;
//  int64_t  s = tsList[0];
//  int64_t  e = tsList[size - 1];

//    if (IS_DATA_BLOCK_LOADED(blockStatus)) {
1560
//        qDebug("QInfo:%p query ts:%lld-%lld, offset:%d, rows:%d, bstatus:%d,
1561 1562 1563
//        functId:%d", GET_QINFO_ADDR(pQuery),
//               s, e, startOffset, size, blockStatus, functionId);
//    } else {
1564
//        qDebug("QInfo:%p block not loaded, bstatus:%d",
1565 1566 1567 1568 1569 1570
//        GET_QINFO_ADDR(pQuery), blockStatus);
//    }
#endif
}

// set the output buffer for the selectivity + tag query
H
Haojun Liao 已提交
1571
static int32_t setCtxTagColumnInfo(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx) {
H
Haojun Liao 已提交
1572 1573
  SQuery* pQuery = pRuntimeEnv->pQuery;

1574
  if (isSelectivityWithTagsQuery(pQuery)) {
1575
    int32_t num = 0;
1576
    int16_t tagLen = 0;
1577

1578
    SQLFunctionCtx *p = NULL;
1579
    SQLFunctionCtx **pTagCtx = calloc(pQuery->numOfOutput, POINTER_BYTES);
H
Haojun Liao 已提交
1580 1581 1582
    if (pTagCtx == NULL) {
      return TSDB_CODE_QRY_OUT_OF_MEMORY;
    }
H
Haojun Liao 已提交
1583

1584
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1585
      SSqlFuncMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].base;
1586

1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599
      if (pSqlFuncMsg->functionId == TSDB_FUNC_TAG_DUMMY || pSqlFuncMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        tagLen += pCtx[i].outputBytes;
        pTagCtx[num++] = &pCtx[i];
      } else if ((aAggs[pSqlFuncMsg->functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
        p = &pCtx[i];
      } else if (pSqlFuncMsg->functionId == TSDB_FUNC_TS || pSqlFuncMsg->functionId == TSDB_FUNC_TAG) {
        // tag function may be the group by tag column
        // ts may be the required primary timestamp column
        continue;
      } else {
        // the column may be the normal column, group by normal_column, the functionId is TSDB_FUNC_PRJ
      }
    }
dengyihao's avatar
dengyihao 已提交
1600 1601 1602 1603 1604
    if (p != NULL) {
      p->tagInfo.pTagCtxList = pTagCtx;
      p->tagInfo.numOfTagCols = num;
      p->tagInfo.tagsLen = tagLen;
    } else {
1605
      taosTFree(pTagCtx);
dengyihao's avatar
dengyihao 已提交
1606
    }
1607
  }
H
Haojun Liao 已提交
1608 1609

  return TSDB_CODE_SUCCESS;
1610 1611
}

H
Haojun Liao 已提交
1612 1613
static FORCE_INLINE void setWindowResultInfo(SResultInfo *pResultInfo, SQuery *pQuery, bool isStableQuery, char* buf) {
  char* p = buf;
1614
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1615 1616 1617 1618
    int32_t size = pQuery->pSelectExpr[i].interBytes;
    setResultInfoBuf(&pResultInfo[i], size, isStableQuery, p);

    p += size;
1619 1620 1621
  }
}

1622
static int32_t setupQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv, int16_t order) {
1623
  qDebug("QInfo:%p setup runtime env", GET_QINFO_ADDR(pRuntimeEnv));
1624 1625
  SQuery *pQuery = pRuntimeEnv->pQuery;

H
Haojun Liao 已提交
1626 1627 1628
  size_t size = pRuntimeEnv->interBufSize + pQuery->numOfOutput * sizeof(SResultInfo);

  pRuntimeEnv->resultInfo = calloc(1, size);
1629
  pRuntimeEnv->pCtx = (SQLFunctionCtx *)calloc(pQuery->numOfOutput, sizeof(SQLFunctionCtx));
1630

1631
  if (pRuntimeEnv->resultInfo == NULL || pRuntimeEnv->pCtx == NULL) {
1632
    goto _clean;
1633
  }
1634

1635
  pRuntimeEnv->offset[0] = 0;
1636
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1637
    SSqlFuncMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].base;
1638

1639
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1640
    SColIndex* pIndex = &pSqlFuncMsg->colInfo;
1641

Y
TD-1230  
yihaoDeng 已提交
1642
    if (TSDB_COL_REQ_NULL(pIndex->flag)) {
1643 1644
      pCtx->requireNull = true;
      pIndex->flag &= ~(TSDB_COL_NULL);
Y
TD-1230  
yihaoDeng 已提交
1645
    } else {
1646 1647
      pCtx->requireNull = false;
    }
Y
TD-1230  
yihaoDeng 已提交
1648

1649 1650
    int32_t index = pSqlFuncMsg->colInfo.colIndex;
    if (TSDB_COL_IS_TAG(pIndex->flag)) {
1651
      if (pIndex->colId == TSDB_TBNAME_COLUMN_INDEX) {  // todo refactor
H
Haojun Liao 已提交
1652 1653 1654 1655
        SSchema s = tGetTableNameColumnSchema();

        pCtx->inputBytes = s.bytes;
        pCtx->inputType = s.type;
1656 1657 1658 1659
      } else {
        pCtx->inputBytes = pQuery->tagColList[index].bytes;
        pCtx->inputType = pQuery->tagColList[index].type;
      }
H
Haojun Liao 已提交
1660 1661 1662
    } else if (TSDB_COL_IS_UD_COL(pIndex->flag)) {
      pCtx->inputBytes = pSqlFuncMsg->arg[0].argBytes;
      pCtx->inputType = pSqlFuncMsg->arg[0].argType;
1663 1664 1665 1666
    } else {
      pCtx->inputBytes = pQuery->colList[index].bytes;
      pCtx->inputType = pQuery->colList[index].type;
    }
1667

Y
TD-1230  
yihaoDeng 已提交
1668

1669
    assert(isValidDataType(pCtx->inputType));
1670
    pCtx->ptsOutputBuf = NULL;
1671

1672 1673
    pCtx->outputBytes = pQuery->pSelectExpr[i].bytes;
    pCtx->outputType = pQuery->pSelectExpr[i].type;
1674

1675 1676
    pCtx->order = pQuery->order.order;
    pCtx->functionId = pSqlFuncMsg->functionId;
1677

1678 1679 1680 1681 1682
    pCtx->numOfParams = pSqlFuncMsg->numOfParams;
    for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
      int16_t type = pSqlFuncMsg->arg[j].argType;
      int16_t bytes = pSqlFuncMsg->arg[j].argBytes;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
1683
        tVariantCreateFromBinary(&pCtx->param[j], pSqlFuncMsg->arg[j].argValue.pz, bytes, type);
1684 1685 1686 1687
      } else {
        tVariantCreateFromBinary(&pCtx->param[j], (char *)&pSqlFuncMsg->arg[j].argValue.i64, bytes, type);
      }
    }
1688

1689 1690
    // set the order information for top/bottom query
    int32_t functionId = pCtx->functionId;
1691

1692
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
1693
      int32_t f = pQuery->pSelectExpr[0].base.functionId;
1694
      assert(f == TSDB_FUNC_TS || f == TSDB_FUNC_TS_DUMMY);
1695

1696 1697 1698 1699
      pCtx->param[2].i64Key = order;
      pCtx->param[2].nType = TSDB_DATA_TYPE_BIGINT;
      pCtx->param[3].i64Key = functionId;
      pCtx->param[3].nType = TSDB_DATA_TYPE_BIGINT;
1700

1701 1702
      pCtx->param[1].i64Key = pQuery->order.orderColId;
    }
1703

1704 1705 1706 1707
    if (i > 0) {
      pRuntimeEnv->offset[i] = pRuntimeEnv->offset[i - 1] + pRuntimeEnv->pCtx[i - 1].outputBytes;
    }
  }
1708

H
Haojun Liao 已提交
1709
  char* buf = (char*) pRuntimeEnv->resultInfo + sizeof(SResultInfo) * pQuery->numOfOutput;
H
Haojun Liao 已提交
1710

1711
  // set the intermediate result output buffer
H
Haojun Liao 已提交
1712
  setWindowResultInfo(pRuntimeEnv->resultInfo, pQuery, pRuntimeEnv->stableQuery, buf);
1713

1714
  // if it is group by normal column, do not set output buffer, the output buffer is pResult
1715
  if (!pRuntimeEnv->groupbyNormalCol && !pRuntimeEnv->stableQuery) {
1716 1717
    resetCtxOutputBuf(pRuntimeEnv);
  }
1718

H
Haojun Liao 已提交
1719 1720 1721
  if (setCtxTagColumnInfo(pRuntimeEnv, pRuntimeEnv->pCtx) != TSDB_CODE_SUCCESS) {
    goto _clean;
  }
1722

H
Haojun Liao 已提交
1723
  qDebug("QInfo:%p init runtime completed", GET_QINFO_ADDR(pRuntimeEnv));
1724
  return TSDB_CODE_SUCCESS;
1725

1726
_clean:
S
Shengliang Guan 已提交
1727 1728
  taosTFree(pRuntimeEnv->resultInfo);
  taosTFree(pRuntimeEnv->pCtx);
1729

1730
  return TSDB_CODE_QRY_OUT_OF_MEMORY;
1731 1732 1733 1734 1735 1736
}

static void teardownQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv) {
  if (pRuntimeEnv->pQuery == NULL) {
    return;
  }
1737

1738
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1739
  SQInfo* pQInfo = (SQInfo*) GET_QINFO_ADDR(pRuntimeEnv);
1740

1741
  qDebug("QInfo:%p teardown runtime env", pQInfo);
H
Haojun Liao 已提交
1742
  cleanupTimeWindowInfo(&pRuntimeEnv->windowResInfo);
1743

1744
  if (pRuntimeEnv->pCtx != NULL) {
1745
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1746
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1747

1748 1749 1750
      for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
        tVariantDestroy(&pCtx->param[j]);
      }
1751

1752
      tVariantDestroy(&pCtx->tag);
S
Shengliang Guan 已提交
1753
      taosTFree(pCtx->tagInfo.pTagCtxList);
1754
    }
1755

S
Shengliang Guan 已提交
1756 1757
    taosTFree(pRuntimeEnv->resultInfo);
    taosTFree(pRuntimeEnv->pCtx);
1758
  }
1759

H
Haojun Liao 已提交
1760
  pRuntimeEnv->pFillInfo = taosDestoryFillInfo(pRuntimeEnv->pFillInfo);
1761

H
Haojun Liao 已提交
1762
  destroyResultBuf(pRuntimeEnv->pResultBuf);
1763
  tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
1764
  tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
1765

H
Haojun Liao 已提交
1766
  pRuntimeEnv->pTSBuf = tsBufDestroy(pRuntimeEnv->pTSBuf);
1767 1768
}

H
Haojun Liao 已提交
1769
#define IS_QUERY_KILLED(_q) ((_q)->code == TSDB_CODE_TSC_QUERY_CANCELLED)
1770

H
Haojun Liao 已提交
1771
static void setQueryKilled(SQInfo *pQInfo) { pQInfo->code = TSDB_CODE_TSC_QUERY_CANCELLED;}
H
hjxilinx 已提交
1772

H
Haojun Liao 已提交
1773 1774 1775
static bool isFixedOutputQuery(SQueryRuntimeEnv* pRuntimeEnv) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1776 1777
    return false;
  }
1778

1779
  // Note:top/bottom query is fixed output query
H
Haojun Liao 已提交
1780
  if (pRuntimeEnv->topBotQuery || pRuntimeEnv->groupbyNormalCol) {
1781 1782
    return true;
  }
1783

1784
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1785
    SSqlFuncMsg *pExprMsg = &pQuery->pSelectExpr[i].base;
1786

1787 1788
    // ignore the ts_comp function
    if (i == 0 && pExprMsg->functionId == TSDB_FUNC_PRJ && pExprMsg->numOfParams == 1 &&
1789
        pExprMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
1790 1791
      continue;
    }
1792

1793 1794 1795
    if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
1796

1797 1798 1799 1800
    if (!IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus)) {
      return true;
    }
  }
1801

1802 1803 1804
  return false;
}

1805
// todo refactor with isLastRowQuery
H
hjxilinx 已提交
1806
static bool isPointInterpoQuery(SQuery *pQuery) {
1807
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1808
    int32_t functionID = pQuery->pSelectExpr[i].base.functionId;
1809
    if (functionID == TSDB_FUNC_INTERP) {
1810 1811 1812
      return true;
    }
  }
1813

1814 1815 1816 1817
  return false;
}

// TODO REFACTOR:MERGE WITH CLIENT-SIDE FUNCTION
H
hjxilinx 已提交
1818
static bool isSumAvgRateQuery(SQuery *pQuery) {
1819
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1820
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1821 1822 1823
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
1824

1825 1826 1827 1828 1829
    if (functionId == TSDB_FUNC_SUM_RATE || functionId == TSDB_FUNC_SUM_IRATE || functionId == TSDB_FUNC_AVG_RATE ||
        functionId == TSDB_FUNC_AVG_IRATE) {
      return true;
    }
  }
1830

1831 1832 1833
  return false;
}

H
hjxilinx 已提交
1834
static bool isFirstLastRowQuery(SQuery *pQuery) {
1835
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1836
    int32_t functionID = pQuery->pSelectExpr[i].base.functionId;
1837 1838 1839 1840
    if (functionID == TSDB_FUNC_LAST_ROW) {
      return true;
    }
  }
1841

1842 1843 1844
  return false;
}

H
hjxilinx 已提交
1845
static bool needReverseScan(SQuery *pQuery) {
1846
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1847
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1848 1849 1850
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG) {
      continue;
    }
1851

1852
    if ((functionId == TSDB_FUNC_FIRST || functionId == TSDB_FUNC_FIRST_DST) && !QUERY_IS_ASC_QUERY(pQuery)) {
1853 1854
      return true;
    }
1855 1856

    if (functionId == TSDB_FUNC_LAST || functionId == TSDB_FUNC_LAST_DST) {
H
Haojun Liao 已提交
1857
      // the scan order to acquire the last result of the specified column
S
TD-1057  
Shengliang Guan 已提交
1858
      int32_t order = (int32_t)pQuery->pSelectExpr[i].base.arg->argValue.i64;
H
Haojun Liao 已提交
1859 1860 1861
      if (order != pQuery->order.order) {
        return true;
      }
1862
    }
1863
  }
1864

1865 1866
  return false;
}
H
hjxilinx 已提交
1867

H
Haojun Liao 已提交
1868 1869 1870 1871
/**
 * The following 4 kinds of query are treated as the tags query
 * tagprj, tid_tag query, count(tbname), 'abc' (user defined constant value column) query
 */
H
hjxilinx 已提交
1872 1873
static bool onlyQueryTags(SQuery* pQuery) {
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1874 1875 1876
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];

    int32_t functionId = pExprInfo->base.functionId;
H
Haojun Liao 已提交
1877 1878 1879 1880

    if (functionId != TSDB_FUNC_TAGPRJ &&
        functionId != TSDB_FUNC_TID_TAG &&
        (!(functionId == TSDB_FUNC_COUNT && pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX)) &&
Y
TD-1230  
yihaoDeng 已提交
1881
        (!(functionId == TSDB_FUNC_PRJ && TSDB_COL_IS_UD_COL(pExprInfo->base.colInfo.flag)))) {
H
hjxilinx 已提交
1882 1883 1884
      return false;
    }
  }
1885

H
hjxilinx 已提交
1886 1887 1888
  return true;
}

1889 1890
/////////////////////////////////////////////////////////////////////////////////////////////

H
Haojun Liao 已提交
1891
void getAlignQueryTimeWindow(SQuery *pQuery, int64_t key, int64_t keyFirst, int64_t keyLast, STimeWindow *win) {
1892 1893
  assert(key >= keyFirst && key <= keyLast && pQuery->interval.sliding <= pQuery->interval.interval);
  win->skey = taosTimeTruncate(key, &pQuery->interval, pQuery->precision);
H
Haojun Liao 已提交
1894 1895

  /*
1896
   * if the realSkey > INT64_MAX - pQuery->interval.interval, the query duration between
H
Haojun Liao 已提交
1897 1898
   * realSkey and realEkey must be less than one interval.Therefore, no need to adjust the query ranges.
   */
1899 1900
  if (keyFirst > (INT64_MAX - pQuery->interval.interval)) {
    assert(keyLast - keyFirst < pQuery->interval.interval);
1901
    win->ekey = INT64_MAX;
1902 1903
  } else if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
    win->ekey = taosTimeAdd(win->skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
H
Haojun Liao 已提交
1904
  } else {
1905
    win->ekey = win->skey + pQuery->interval.interval - 1;
1906 1907 1908 1909 1910
  }
}

static void setScanLimitationByResultBuffer(SQuery *pQuery) {
  if (isTopBottomQuery(pQuery)) {
1911
    pQuery->checkBuffer = 0;
1912
  } else if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
1913
    pQuery->checkBuffer = 0;
1914 1915
  } else {
    bool hasMultioutput = false;
1916
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1917
      SSqlFuncMsg *pExprMsg = &pQuery->pSelectExpr[i].base;
1918 1919 1920
      if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        continue;
      }
1921

1922 1923 1924 1925 1926
      hasMultioutput = IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus);
      if (!hasMultioutput) {
        break;
      }
    }
1927

1928
    pQuery->checkBuffer = hasMultioutput ? 1 : 0;
1929 1930 1931 1932 1933 1934
  }
}

/*
 * todo add more parameters to check soon..
 */
1935
bool colIdCheck(SQuery *pQuery) {
1936 1937
  // load data column information is incorrect
  for (int32_t i = 0; i < pQuery->numOfCols - 1; ++i) {
1938
    if (pQuery->colList[i].colId == pQuery->colList[i + 1].colId) {
S
slguan 已提交
1939
      qError("QInfo:%p invalid data load column for query", GET_QINFO_ADDR(pQuery));
1940 1941 1942
      return false;
    }
  }
1943

1944 1945 1946 1947 1948 1949
  return true;
}

// todo ignore the avg/sum/min/max/count/stddev/top/bottom functions, of which
// the scan order is not matter
static bool onlyOneQueryType(SQuery *pQuery, int32_t functId, int32_t functIdDst) {
1950
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1951
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1952

1953 1954 1955 1956
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG ||
        functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
1957

1958 1959 1960 1961
    if (functionId != functId && functionId != functIdDst) {
      return false;
    }
  }
1962

1963 1964 1965 1966 1967 1968 1969
  return true;
}

static bool onlyFirstQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_FIRST, TSDB_FUNC_FIRST_DST); }

static bool onlyLastQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_LAST, TSDB_FUNC_LAST_DST); }

H
Haojun Liao 已提交
1970
// todo refactor, add iterator
1971 1972
static void doExchangeTimeWindow(SQInfo* pQInfo, STimeWindow* win) {
  size_t t = taosArrayGetSize(pQInfo->tableGroupInfo.pGroupList);
H
Haojun Liao 已提交
1973
  for(int32_t i = 0; i < t; ++i) {
1974
    SArray* p1 = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, i);
H
Haojun Liao 已提交
1975 1976 1977

    size_t len = taosArrayGetSize(p1);
    for(int32_t j = 0; j < len; ++j) {
1978
      STableKeyInfo* pInfo = taosArrayGet(p1, j);
H
Haojun Liao 已提交
1979

1980 1981 1982 1983
      // update the new lastkey if it is equalled to the value of the old skey
      if (pInfo->lastKey == win->ekey) {
        pInfo->lastKey = win->skey;
      }
H
Haojun Liao 已提交
1984 1985 1986 1987
    }
  }
}

1988
static void changeExecuteScanOrder(SQInfo *pQInfo, SQueryTableMsg* pQueryMsg, bool stableQuery) {
H
Haojun Liao 已提交
1989 1990
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;

1991 1992 1993
  // in case of point-interpolation query, use asc order scan
  char msg[] = "QInfo:%p scan order changed for %s query, old:%d, new:%d, qrange exchanged, old qrange:%" PRId64
               "-%" PRId64 ", new qrange:%" PRId64 "-%" PRId64;
1994

1995 1996
  // todo handle the case the the order irrelevant query type mixed up with order critical query type
  // descending order query for last_row query
H
Haojun Liao 已提交
1997
  if (isFirstLastRowQuery(pQuery)) {
H
Haojun Liao 已提交
1998
    qDebug("QInfo:%p scan order changed for last_row query, old:%d, new:%d", pQInfo, pQuery->order.order, TSDB_ORDER_ASC);
1999

H
Haojun Liao 已提交
2000
    pQuery->order.order = TSDB_ORDER_ASC;
H
Haojun Liao 已提交
2001 2002 2003
    if (pQuery->window.skey > pQuery->window.ekey) {
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
2004

2005 2006
    return;
  }
2007

H
Haojun Liao 已提交
2008
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr) && pQuery->order.order == TSDB_ORDER_DESC) {
H
Haojun Liao 已提交
2009
    pQuery->order.order = TSDB_ORDER_ASC;
H
Haojun Liao 已提交
2010 2011 2012
    if (pQuery->window.skey > pQuery->window.ekey) {
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
H
Haojun Liao 已提交
2013

2014
    doExchangeTimeWindow(pQInfo, &pQuery->window);
H
Haojun Liao 已提交
2015 2016 2017
    return;
  }

2018
  if (isPointInterpoQuery(pQuery) && pQuery->interval.interval == 0) {
H
Haojun Liao 已提交
2019 2020 2021 2022 2023
    if (!QUERY_IS_ASC_QUERY(pQuery)) {
      qDebug(msg, GET_QINFO_ADDR(pQuery), "interp", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
             pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
2024

2025
    pQuery->order.order = TSDB_ORDER_ASC;
2026 2027
    return;
  }
2028

2029
  if (pQuery->interval.interval == 0) {
2030 2031
    if (onlyFirstQuery(pQuery)) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
2032
        qDebug(msg, GET_QINFO_ADDR(pQuery), "only-first", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
2033 2034
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2035
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2036
        doExchangeTimeWindow(pQInfo, &pQuery->window);
2037
      }
2038

2039
      pQuery->order.order = TSDB_ORDER_ASC;
2040 2041
    } else if (onlyLastQuery(pQuery)) {
      if (QUERY_IS_ASC_QUERY(pQuery)) {
2042
        qDebug(msg, GET_QINFO_ADDR(pQuery), "only-last", pQuery->order.order, TSDB_ORDER_DESC, pQuery->window.skey,
2043 2044
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2045
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2046
        doExchangeTimeWindow(pQInfo, &pQuery->window);
2047
      }
2048

2049
      pQuery->order.order = TSDB_ORDER_DESC;
2050
    }
2051

2052
  } else {  // interval query
2053
    if (stableQuery) {
2054 2055
      if (onlyFirstQuery(pQuery)) {
        if (!QUERY_IS_ASC_QUERY(pQuery)) {
2056
          qDebug(msg, GET_QINFO_ADDR(pQuery), "only-first stable", pQuery->order.order, TSDB_ORDER_ASC,
2057 2058
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2059
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2060
          doExchangeTimeWindow(pQInfo, &pQuery->window);
2061
        }
2062

2063
        pQuery->order.order = TSDB_ORDER_ASC;
2064 2065
      } else if (onlyLastQuery(pQuery)) {
        if (QUERY_IS_ASC_QUERY(pQuery)) {
2066
          qDebug(msg, GET_QINFO_ADDR(pQuery), "only-last stable", pQuery->order.order, TSDB_ORDER_DESC,
2067 2068
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2069
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2070
          doExchangeTimeWindow(pQInfo, &pQuery->window);
2071
        }
2072

2073
        pQuery->order.order = TSDB_ORDER_DESC;
2074 2075 2076 2077 2078 2079 2080 2081
      }
    }
  }
}

static int32_t getInitialPageNum(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  int32_t INITIAL_RESULT_ROWS_VALUE = 16;
2082

2083
  int32_t num = 0;
2084

2085 2086
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
    num = 128;
H
Haojun Liao 已提交
2087
  } else if (QUERY_IS_INTERVAL_QUERY(pQuery)) {  // time window query, allocate one page for each table
2088
    size_t s = pQInfo->tableqinfoGroupInfo.numOfTables;
S
TD-1057  
Shengliang Guan 已提交
2089
    num = (int32_t)(MAX(s, INITIAL_RESULT_ROWS_VALUE));
2090 2091
  } else {    // for super table query, one page for each subset
    num = 1;  // pQInfo->pSidSet->numOfSubSet;
2092
  }
2093

2094 2095 2096 2097
  assert(num > 0);
  return num;
}

2098 2099
static void getIntermediateBufInfo(SQueryRuntimeEnv* pRuntimeEnv, int32_t* ps, int32_t* rowsize) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
2100
  int32_t MIN_ROWS_PER_PAGE = 4;
2101

S
TD-1057  
Shengliang Guan 已提交
2102
  *rowsize = (int32_t)(pQuery->rowSize * GET_ROW_PARAM_FOR_MULTIOUTPUT(pQuery, pRuntimeEnv->topBotQuery, pRuntimeEnv->stableQuery));
2103 2104 2105 2106
  int32_t overhead = sizeof(tFilePage);

  // one page contains at least two rows
  *ps = DEFAULT_INTERN_BUF_PAGE_SIZE;
H
Haojun Liao 已提交
2107
  while(((*rowsize) * MIN_ROWS_PER_PAGE) > (*ps) - overhead) {
2108 2109 2110 2111
    *ps = (*ps << 1u);
  }

  pRuntimeEnv->numOfRowsPerPage = ((*ps) - sizeof(tFilePage)) / (*rowsize);
2112
  assert(pRuntimeEnv->numOfRowsPerPage <= MAX_ROWS_PER_RESBUF_PAGE);
2113 2114
}

H
Haojun Liao 已提交
2115
#define IS_PREFILTER_TYPE(_t) ((_t) != TSDB_DATA_TYPE_BINARY && (_t) != TSDB_DATA_TYPE_NCHAR)
2116

H
Haojun Liao 已提交
2117 2118 2119 2120
static bool needToLoadDataBlock(SQueryRuntimeEnv* pRuntimeEnv, SDataStatis *pDataStatis, SQLFunctionCtx *pCtx,
    int32_t numOfRows) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
  if (pDataStatis == NULL || (pQuery->numOfFilterCols == 0 && (!pRuntimeEnv->topBotQuery))) {
2121 2122 2123 2124 2125
    return true;
  }

  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
2126

H
Haojun Liao 已提交
2127 2128 2129 2130 2131 2132 2133 2134
    int32_t index = -1;
    for(int32_t i = 0; i < pQuery->numOfCols; ++i) {
      if (pDataStatis[i].colId == pFilterInfo->info.colId) {
        index = i;
        break;
      }
    }

2135
    // no statistics data, load the true data block
H
Haojun Liao 已提交
2136
    if (index == -1) {
H
Haojun Liao 已提交
2137
      return true;
2138
    }
2139

2140
    // not support pre-filter operation on binary/nchar data type
H
Haojun Liao 已提交
2141
    if (!IS_PREFILTER_TYPE(pFilterInfo->info.type)) {
H
Haojun Liao 已提交
2142
      return true;
2143
    }
2144

2145
    // all data in current column are NULL, no need to check its boundary value
H
Haojun Liao 已提交
2146
    if (pDataStatis[index].numOfNull == numOfRows) {
2147 2148 2149 2150 2151 2152 2153 2154 2155

      // if isNULL query exists, load the null data column
      for (int32_t j = 0; j < pFilterInfo->numOfFilters; ++j) {
        SColumnFilterElem *pFilterElem = &pFilterInfo->pFilters[j];
        if (pFilterElem->fp == isNull_filter) {
          return true;
        }
      }

2156 2157
      continue;
    }
2158

H
Haojun Liao 已提交
2159 2160 2161
    SDataStatis* pDataBlockst = &pDataStatis[index];

    if (pFilterInfo->info.type == TSDB_DATA_TYPE_FLOAT) {
S
TD-1057  
Shengliang Guan 已提交
2162 2163
      float minval = (float)(*(double *)(&pDataBlockst->min));
      float maxval = (float)(*(double *)(&pDataBlockst->max));
2164

2165 2166 2167 2168 2169 2170 2171
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&minval, (char *)&maxval)) {
          return true;
        }
      }
    } else {
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
H
Haojun Liao 已提交
2172
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&pDataBlockst->min, (char *)&pDataBlockst->max)) {
2173 2174 2175 2176 2177
          return true;
        }
      }
    }
  }
2178

H
Haojun Liao 已提交
2179 2180 2181 2182 2183 2184 2185 2186
  if (pRuntimeEnv->topBotQuery) {
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
      int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
      if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
        return topbot_datablock_filter(&pCtx[i], functionId, (char *)&pDataStatis[i].min, (char *)&pDataStatis[i].max);
      }
    }
  }
2187

H
Haojun Liao 已提交
2188
  return false;
2189 2190
}

H
Haojun Liao 已提交
2191 2192 2193 2194 2195 2196 2197 2198
static bool overlapWithTimeWindow(SQuery* pQuery, SDataBlockInfo* pBlockInfo) {
  STimeWindow w = {0};

  TSKEY sk = MIN(pQuery->window.skey, pQuery->window.ekey);
  TSKEY ek = MAX(pQuery->window.skey, pQuery->window.ekey);

  if (QUERY_IS_ASC_QUERY(pQuery)) {
    getAlignQueryTimeWindow(pQuery, pBlockInfo->window.skey, sk, ek, &w);
H
Haojun Liao 已提交
2199
    assert(w.ekey >= pBlockInfo->window.skey);
H
Haojun Liao 已提交
2200

H
Haojun Liao 已提交
2201
    if (w.ekey < pBlockInfo->window.ekey) {
H
Haojun Liao 已提交
2202 2203 2204 2205 2206
      return true;
    }

    while(1) {
      GET_NEXT_TIMEWINDOW(pQuery, &w);
H
Haojun Liao 已提交
2207
      if (w.skey > pBlockInfo->window.ekey) {
H
Haojun Liao 已提交
2208 2209 2210
        break;
      }

H
Haojun Liao 已提交
2211 2212
      assert(w.ekey > pBlockInfo->window.ekey);
      if (w.skey <= pBlockInfo->window.ekey && w.skey > pBlockInfo->window.skey) {
H
Haojun Liao 已提交
2213 2214 2215 2216 2217
        return true;
      }
    }
  } else {
    getAlignQueryTimeWindow(pQuery, pBlockInfo->window.ekey, sk, ek, &w);
H
Haojun Liao 已提交
2218 2219 2220
    assert(w.skey <= pBlockInfo->window.ekey);

    if (w.skey > pBlockInfo->window.skey) {
H
Haojun Liao 已提交
2221 2222 2223 2224 2225 2226 2227 2228 2229
      return true;
    }

    while(1) {
      GET_NEXT_TIMEWINDOW(pQuery, &w);
      if (w.ekey < pBlockInfo->window.skey) {
        break;
      }

H
Haojun Liao 已提交
2230 2231
      assert(w.skey < pBlockInfo->window.skey);
      if (w.ekey < pBlockInfo->window.ekey && w.ekey >= pBlockInfo->window.skey) {
H
Haojun Liao 已提交
2232 2233 2234 2235 2236 2237 2238 2239
        return true;
      }
    }
  }

  return false;
}

H
Haojun Liao 已提交
2240
int32_t loadDataBlockOnDemand(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo * pWindowResInfo, void* pQueryHandle, SDataBlockInfo* pBlockInfo, SDataStatis **pStatis, SArray** pDataBlock, uint32_t* status) {
2241
  SQuery *pQuery = pRuntimeEnv->pQuery;
2242

H
Haojun Liao 已提交
2243 2244
  *status = BLK_DATA_NO_NEEDED;

H
Haojun Liao 已提交
2245
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf > 0) {
2246
    *status = BLK_DATA_ALL_NEEDED;
H
Haojun Liao 已提交
2247
  } else { // check if this data block is required to load
H
Haojun Liao 已提交
2248

H
Haojun Liao 已提交
2249
    // Calculate all time windows that are overlapping or contain current data block.
2250
    // If current data block is contained by all possible time window, do not load current data block.
H
Haojun Liao 已提交
2251
    if (QUERY_IS_INTERVAL_QUERY(pQuery) && overlapWithTimeWindow(pQuery, pBlockInfo)) {
2252
      *status = BLK_DATA_ALL_NEEDED;
2253
    }
2254

2255
    if ((*status) != BLK_DATA_ALL_NEEDED) {
H
Haojun Liao 已提交
2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270
      // the pCtx[i] result is belonged to previous time window since the outputBuf has not been set yet,
      // the filter result may be incorrect. So in case of interval query, we need to set the correct time output buffer
      if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
        bool hasTimeWindow = false;
        bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);

        TSKEY k = QUERY_IS_ASC_QUERY(pQuery)? pBlockInfo->window.skey:pBlockInfo->window.ekey;

        STimeWindow win = getActiveTimeWindow(pWindowResInfo, k, pQuery);
        if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pBlockInfo->tid, &win, masterScan, &hasTimeWindow) !=
            TSDB_CODE_SUCCESS) {
          // todo handle error in set result for timewindow
        }
      }

H
Haojun Liao 已提交
2271 2272 2273 2274 2275
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        SSqlFuncMsg* pSqlFunc = &pQuery->pSelectExpr[i].base;

        int32_t functionId = pSqlFunc->functionId;
        int32_t colId = pSqlFunc->colInfo.colId;
2276 2277
        (*status) |= aAggs[functionId].dataReqFunc(&pRuntimeEnv->pCtx[i], pBlockInfo->window.skey, pBlockInfo->window.ekey, colId);
        if (((*status) & BLK_DATA_ALL_NEEDED) == BLK_DATA_ALL_NEEDED) {
H
Haojun Liao 已提交
2278 2279 2280
          break;
        }
      }
2281 2282
    }
  }
2283

2284
  if ((*status) == BLK_DATA_NO_NEEDED) {
H
Haojun Liao 已提交
2285 2286
    qDebug("QInfo:%p data block discard, brange:%"PRId64 "-%"PRId64", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
           pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
2287
    pRuntimeEnv->summary.discardBlocks += 1;
2288 2289 2290 2291
  } else if ((*status) == BLK_DATA_STATIS_NEEDED) {

    // this function never returns error?
    tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis);
2292

2293
    pRuntimeEnv->summary.loadBlockStatis += 1;
2294

2295
    if (*pStatis == NULL) { // data block statistics does not exist, load data block
H
Haojun Liao 已提交
2296
      *pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2297
      pRuntimeEnv->summary.totalCheckedRows += pBlockInfo->rows;
2298 2299
    }
  } else {
2300
    assert((*status) == BLK_DATA_ALL_NEEDED);
2301

2302
    // load the data block statistics to perform further filter
H
Haojun Liao 已提交
2303
    pRuntimeEnv->summary.loadBlockStatis += 1;
2304
    tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis);
2305

H
Haojun Liao 已提交
2306
    if (!needToLoadDataBlock(pRuntimeEnv, *pStatis, pRuntimeEnv->pCtx, pBlockInfo->rows)) {
2307 2308
      // current block has been discard due to filter applied
      pRuntimeEnv->summary.discardBlocks += 1;
H
Haojun Liao 已提交
2309 2310
      qDebug("QInfo:%p data block discard, brange:%"PRId64 "-%"PRId64", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
          pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
2311
      (*status) = BLK_DATA_DISCARD;
2312
    }
2313

2314
    pRuntimeEnv->summary.totalCheckedRows += pBlockInfo->rows;
H
Haojun Liao 已提交
2315
    pRuntimeEnv->summary.loadBlocks += 1;
H
Haojun Liao 已提交
2316
    *pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2317 2318 2319
    if (*pDataBlock == NULL) {
      return terrno;
    }
2320
  }
2321

H
Haojun Liao 已提交
2322
  return TSDB_CODE_SUCCESS;
2323 2324
}

H
hjxilinx 已提交
2325
int32_t binarySearchForKey(char *pValue, int num, TSKEY key, int order) {
2326
  int32_t midPos = -1;
H
Haojun Liao 已提交
2327
  int32_t numOfRows;
2328

2329 2330 2331
  if (num <= 0) {
    return -1;
  }
2332

2333
  assert(order == TSDB_ORDER_ASC || order == TSDB_ORDER_DESC);
2334 2335

  TSKEY * keyList = (TSKEY *)pValue;
2336
  int32_t firstPos = 0;
2337
  int32_t lastPos = num - 1;
2338

2339
  if (order == TSDB_ORDER_DESC) {
H
hjxilinx 已提交
2340 2341 2342 2343 2344
    // find the first position which is smaller than the key
    while (1) {
      if (key >= keyList[lastPos]) return lastPos;
      if (key == keyList[firstPos]) return firstPos;
      if (key < keyList[firstPos]) return firstPos - 1;
2345

H
Haojun Liao 已提交
2346 2347
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2348

H
hjxilinx 已提交
2349 2350 2351 2352 2353 2354 2355 2356
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
2357

H
hjxilinx 已提交
2358 2359 2360 2361 2362
  } else {
    // find the first position which is bigger than the key
    while (1) {
      if (key <= keyList[firstPos]) return firstPos;
      if (key == keyList[lastPos]) return lastPos;
2363

H
hjxilinx 已提交
2364 2365 2366 2367 2368 2369 2370
      if (key > keyList[lastPos]) {
        lastPos = lastPos + 1;
        if (lastPos >= num)
          return -1;
        else
          return lastPos;
      }
2371

H
Haojun Liao 已提交
2372 2373
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2374

H
hjxilinx 已提交
2375 2376 2377 2378 2379 2380 2381 2382 2383
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
  }
2384

H
hjxilinx 已提交
2385 2386 2387
  return midPos;
}

2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400
static void ensureOutputBufferSimple(SQueryRuntimeEnv* pRuntimeEnv, int32_t capacity) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  if (capacity < pQuery->rec.capacity) {
    return;
  }

  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    int32_t bytes = pQuery->pSelectExpr[i].bytes;
    assert(bytes > 0 && capacity > 0);

    char *tmp = realloc(pQuery->sdata[i], bytes * capacity + sizeof(tFilePage));
    if (tmp == NULL) {  // todo handle the oom
H
Haojun Liao 已提交
2401
      longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
2402 2403 2404 2405 2406 2407 2408 2409
    } else {
      pQuery->sdata[i] = (tFilePage *)tmp;
    }

    // set the pCtx output buffer position
    pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data;
  }

2410
  qDebug("QInfo:%p realloc output buffer to inc output buffer from: %" PRId64 " rows to:%d rows", GET_QINFO_ADDR(pRuntimeEnv),
2411 2412 2413 2414 2415
         pQuery->rec.capacity, capacity);

  pQuery->rec.capacity = capacity;
}

2416 2417 2418
static void ensureOutputBuffer(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pBlockInfo) {
  // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
  SQuery* pQuery = pRuntimeEnv->pQuery;
2419
  if (!QUERY_IS_INTERVAL_QUERY(pQuery) && !pRuntimeEnv->groupbyNormalCol && !isFixedOutputQuery(pRuntimeEnv) && !isTSCompQuery(pQuery)) {
2420
    SResultRec *pRec = &pQuery->rec;
2421

2422
    if (pQuery->rec.capacity - pQuery->rec.rows < pBlockInfo->rows) {
S
TD-1057  
Shengliang Guan 已提交
2423 2424
      int32_t remain = (int32_t)(pRec->capacity - pRec->rows);
      int32_t newSize = (int32_t)(pRec->capacity + (pBlockInfo->rows - remain));
2425

2426 2427
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        int32_t bytes = pQuery->pSelectExpr[i].bytes;
H
Haojun Liao 已提交
2428 2429
        assert(bytes > 0 && newSize > 0);

2430 2431
        char *tmp = realloc(pQuery->sdata[i], bytes * newSize + sizeof(tFilePage));
        if (tmp == NULL) {  // todo handle the oom
H
Haojun Liao 已提交
2432
          longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
2433
        } else {
2434
          memset(tmp + sizeof(tFilePage) + bytes * pRec->rows, 0, (size_t)((newSize - pRec->rows) * bytes));
2435 2436
          pQuery->sdata[i] = (tFilePage *)tmp;
        }
H
Haojun Liao 已提交
2437

2438 2439
        // set the pCtx output buffer position
        pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data + pRec->rows * bytes;
H
Haojun Liao 已提交
2440

2441 2442 2443 2444 2445
        int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
        if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
          pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
        }
      }
H
Haojun Liao 已提交
2446

2447
      qDebug("QInfo:%p realloc output buffer, new size: %d rows, old:%" PRId64 ", remain:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv),
2448
             newSize, pRec->capacity, newSize - pRec->rows);
2449

2450 2451 2452 2453 2454
      pRec->capacity = newSize;
    }
  }
}

2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475
static void doSetInitialTimewindow(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pBlockInfo) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  if (QUERY_IS_INTERVAL_QUERY(pQuery) && pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL) {
    STimeWindow w = TSWINDOW_INITIALIZER;
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;

    if (QUERY_IS_ASC_QUERY(pQuery)) {
      getAlignQueryTimeWindow(pQuery, pBlockInfo->window.skey, pBlockInfo->window.skey, pQuery->window.ekey, &w);
      pWindowResInfo->startTime = w.skey;
      pWindowResInfo->prevSKey = w.skey;
    } else {
      // the start position of the first time window in the endpoint that spreads beyond the queried last timestamp
      getAlignQueryTimeWindow(pQuery, pBlockInfo->window.ekey, pQuery->window.ekey, pBlockInfo->window.ekey, &w);

      pWindowResInfo->startTime = pQuery->window.skey;
      pWindowResInfo->prevSKey = w.skey;
    }
  }
}

2476 2477
static int64_t doScanAllDataBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
2478
  STableQueryInfo* pTableQueryInfo = pQuery->current;
H
Haojun Liao 已提交
2479
  SQueryCostInfo*  summary  = &pRuntimeEnv->summary;
2480

2481
  qDebug("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", lastkey:%" PRId64 ", order:%d",
H
hjxilinx 已提交
2482 2483
         GET_QINFO_ADDR(pRuntimeEnv), pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, pTableQueryInfo->lastKey,
         pQuery->order.order);
2484

2485
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
H
Haojun Liao 已提交
2486
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
2487

H
Haojun Liao 已提交
2488
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
2489
  while (tsdbNextDataBlock(pQueryHandle)) {
H
Haojun Liao 已提交
2490
    summary->totalBlocks += 1;
H
Haojun Liao 已提交
2491

H
Haojun Liao 已提交
2492
    if (IS_QUERY_KILLED(GET_QINFO_ADDR(pRuntimeEnv))) {
H
Haojun Liao 已提交
2493
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
2494
    }
2495

H
Haojun Liao 已提交
2496
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
2497
    doSetInitialTimewindow(pRuntimeEnv, &blockInfo);
2498

H
hjxilinx 已提交
2499
    // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
2500
    ensureOutputBuffer(pRuntimeEnv, &blockInfo);
2501

2502
    SDataStatis *pStatis = NULL;
2503 2504
    SArray *     pDataBlock = NULL;
    uint32_t     status = 0;
2505

H
Haojun Liao 已提交
2506
    int32_t ret = loadDataBlockOnDemand(pRuntimeEnv, &pRuntimeEnv->windowResInfo, pQueryHandle, &blockInfo, &pStatis, &pDataBlock, &status);
2507
    if (ret != TSDB_CODE_SUCCESS) {
2508 2509 2510
      break;
    }

2511 2512 2513 2514 2515 2516
    if (status == BLK_DATA_DISCARD) {
      pQuery->current->lastKey =
              QUERY_IS_ASC_QUERY(pQuery) ? blockInfo.window.ekey + step : blockInfo.window.skey + step;
      continue;
    }

H
Haojun Liao 已提交
2517 2518
    // query start position can not move into tableApplyFunctionsOnBlock due to limit/offset condition
    pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : blockInfo.rows - 1;
H
hjxilinx 已提交
2519
    int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, binarySearchForKey, pDataBlock);
2520

H
Haojun Liao 已提交
2521
    summary->totalRows += blockInfo.rows;
2522
    qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
2523
           blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes, pQuery->current->lastKey);
2524

2525 2526
    // while the output buffer is full or limit/offset is applied, query may be paused here
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL | QUERY_COMPLETED)) {
H
hjxilinx 已提交
2527
      break;
2528 2529
    }
  }
2530

H
Haojun Liao 已提交
2531 2532 2533 2534
  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }

H
hjxilinx 已提交
2535
  // if the result buffer is not full, set the query complete
2536 2537 2538
  if (!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
2539

H
Haojun Liao 已提交
2540
  if (QUERY_IS_INTERVAL_QUERY(pQuery) && IS_MASTER_SCAN(pRuntimeEnv)) {
H
hjxilinx 已提交
2541
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
2542
      closeAllTimeWindow(&pRuntimeEnv->windowResInfo);
H
hjxilinx 已提交
2543
      pRuntimeEnv->windowResInfo.curIndex = pRuntimeEnv->windowResInfo.size - 1;  // point to the last time window
2544 2545 2546 2547
    } else {
      assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
    }
  }
2548

2549
  return 0;
2550 2551 2552 2553 2554 2555
}

/*
 * set tag value in SQLFunctionCtx
 * e.g.,tag information into input buffer
 */
2556
static void doSetTagValueInParam(void *tsdb, void* pTable, int32_t tagColId, tVariant *tag, int16_t type, int16_t bytes) {
H
[td-90]  
Haojun Liao 已提交
2557
  tVariantDestroy(tag);
2558

2559
  if (tagColId == TSDB_TBNAME_COLUMN_INDEX) {
2560
    char* val = tsdbGetTableName(pTable);
H
[td-90]  
Haojun Liao 已提交
2561
    assert(val != NULL);
2562

H
[td-90]  
Haojun Liao 已提交
2563
    tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), TSDB_DATA_TYPE_BINARY);
2564
  } else {
2565
    char* val = tsdbGetTableTagVal(pTable, tagColId, type, bytes);
H
[td-90]  
Haojun Liao 已提交
2566 2567 2568 2569
    if (val == NULL) {
      tag->nType = TSDB_DATA_TYPE_NULL;
      return;
    }
2570

H
hjxilinx 已提交
2571
    if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
H
Hongze Cheng 已提交
2572
      if (isNull(val, type)) {
H
Haojun Liao 已提交
2573 2574 2575 2576
        tag->nType = TSDB_DATA_TYPE_NULL;
        return;
      }

H
[td-90]  
Haojun Liao 已提交
2577
      tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), type);
H
hjxilinx 已提交
2578
    } else {
H
Haojun Liao 已提交
2579 2580 2581 2582 2583
      if (isNull(val, type)) {
        tag->nType = TSDB_DATA_TYPE_NULL;
        return;
      }

H
[td-90]  
Haojun Liao 已提交
2584
      tVariantCreateFromBinary(tag, val, bytes, type);
H
hjxilinx 已提交
2585
    }
2586
  }
2587 2588
}

2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600
static SColumnInfo* doGetTagColumnInfoById(SColumnInfo* pTagColList, int32_t numOfTags, int16_t colId) {
  assert(pTagColList != NULL && numOfTags > 0);

  for(int32_t i = 0; i < numOfTags; ++i) {
    if (pTagColList[i].colId == colId) {
      return &pTagColList[i];
    }
  }

  return NULL;
}

2601
void setTagVal(SQueryRuntimeEnv *pRuntimeEnv, void *pTable, void *tsdb) {
2602
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
2603
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
2604

H
[td-90]  
Haojun Liao 已提交
2605 2606 2607
  SExprInfo *pExprInfo = &pQuery->pSelectExpr[0];
  if (pQuery->numOfOutput == 1 && pExprInfo->base.functionId == TSDB_FUNC_TS_COMP) {
    assert(pExprInfo->base.numOfParams == 1);
H
Haojun Liao 已提交
2608

S
TD-1057  
Shengliang Guan 已提交
2609
    int16_t tagColId = (int16_t)pExprInfo->base.arg->argValue.i64;
2610
    SColumnInfo* pColInfo = doGetTagColumnInfoById(pQuery->tagColList, pQuery->numOfTags, tagColId);
H
Haojun Liao 已提交
2611

2612
    doSetTagValueInParam(tsdb, pTable, tagColId, &pRuntimeEnv->pCtx[0].tag, pColInfo->type, pColInfo->bytes);
2613 2614
  } else {
    // set tag value, by which the results are aggregated.
2615
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
H
Haojun Liao 已提交
2616
      SExprInfo* pLocalExprInfo = &pQuery->pSelectExpr[idx];
2617

2618
      // ts_comp column required the tag value for join filter
H
Haojun Liao 已提交
2619
      if (!TSDB_COL_IS_TAG(pLocalExprInfo->base.colInfo.flag)) {
2620 2621
        continue;
      }
2622

2623
      // todo use tag column index to optimize performance
2624
      doSetTagValueInParam(tsdb, pTable, pLocalExprInfo->base.colInfo.colId, &pRuntimeEnv->pCtx[idx].tag,
H
Haojun Liao 已提交
2625
                           pLocalExprInfo->type, pLocalExprInfo->bytes);
2626
    }
2627

2628
    // set the join tag for first column
H
[td-90]  
Haojun Liao 已提交
2629
    SSqlFuncMsg *pFuncMsg = &pExprInfo->base;
2630 2631
    if ((pFuncMsg->functionId == TSDB_FUNC_TS || pFuncMsg->functionId == TSDB_FUNC_PRJ) && pRuntimeEnv->pTSBuf != NULL &&
        pFuncMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
2632
      assert(pFuncMsg->numOfParams == 1);
H
Haojun Liao 已提交
2633

2634 2635
      int16_t      tagColId = (int16_t)pExprInfo->base.arg->argValue.i64;
      SColumnInfo *pColInfo = doGetTagColumnInfoById(pQuery->tagColList, pQuery->numOfTags, tagColId);
H
Haojun Liao 已提交
2636

2637
      doSetTagValueInParam(tsdb, pTable, tagColId, &pRuntimeEnv->pCtx[0].tag, pColInfo->type, pColInfo->bytes);
H
Haojun Liao 已提交
2638

2639 2640 2641 2642 2643 2644 2645 2646
      int16_t tagType = pRuntimeEnv->pCtx[0].tag.nType;
      if (tagType == TSDB_DATA_TYPE_BINARY || tagType == TSDB_DATA_TYPE_NCHAR) {
        qDebug("QInfo:%p set tag value for join comparison, colId:%" PRId64 ", val:%s", pQInfo,
               pExprInfo->base.arg->argValue.i64, pRuntimeEnv->pCtx[0].tag.pz);
      } else {
        qDebug("QInfo:%p set tag value for join comparison, colId:%" PRId64 ", val:%" PRId64, pQInfo,
               pExprInfo->base.arg->argValue.i64, pRuntimeEnv->pCtx[0].tag.i64Key);
      }
2647 2648 2649 2650 2651 2652 2653
    }
  }
}

static void doMerge(SQueryRuntimeEnv *pRuntimeEnv, int64_t timestamp, SWindowResult *pWindowRes, bool mergeFlag) {
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
2654

2655
  tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes->pageId);
H
Haojun Liao 已提交
2656

2657
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2658
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2659 2660 2661
    if (!mergeFlag) {
      pCtx[i].aOutputBuf = pCtx[i].aOutputBuf + pCtx[i].outputBytes;
      pCtx[i].currentStage = FIRST_STAGE_MERGE;
2662

2663
      RESET_RESULT_INFO(pCtx[i].resultInfo);
2664 2665
      aAggs[functionId].init(&pCtx[i]);
    }
2666

2667 2668
    pCtx[i].hasNull = true;
    pCtx[i].nStartQueryTimestamp = timestamp;
H
Haojun Liao 已提交
2669
    pCtx[i].aInputElemBuf = getPosInResultPage(pRuntimeEnv, i, pWindowRes, page);
2670

2671 2672 2673
    // in case of tag column, the tag information should be extracted from input buffer
    if (functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TAG) {
      tVariantDestroy(&pCtx[i].tag);
2674

2675 2676 2677 2678 2679 2680
      int32_t type = pCtx[i].outputType;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        tVariantCreateFromBinary(&pCtx[i].tag, varDataVal(pCtx[i].aInputElemBuf), varDataLen(pCtx[i].aInputElemBuf), type);
      } else {
        tVariantCreateFromBinary(&pCtx[i].tag, pCtx[i].aInputElemBuf, pCtx[i].inputBytes, pCtx[i].inputType);
      }
2681

2682 2683
    }
  }
2684

2685
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2686
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2687 2688 2689
    if (functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
2690

2691 2692 2693 2694
    aAggs[functionId].distMergeFunc(&pCtx[i]);
  }
}

2695
static UNUSED_FUNC void printBinaryData(int32_t functionId, char *data, int32_t srcDataType) {
2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_LAST_DST) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_BINARY:
        printf("%" PRId64 ",%s\t", *(TSKEY *)data, (data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int8_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int16_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 ",%" PRId64 "\t", *(TSKEY *)data, *(TSKEY *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int32_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%" PRId64 ",%f\t", *(TSKEY *)data, *(float *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%" PRId64 ",%lf\t", *(TSKEY *)data, *(double *)(data + TSDB_KEYSIZE + 1));
        break;
    }
  } else if (functionId == TSDB_FUNC_AVG) {
    printf("%lf,%d\t", *(double *)data, *(int32_t *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_SPREAD) {
    printf("%lf,%lf\t", *(double *)data, *(double *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_TWA) {
    data += 1;
    printf("%lf,%" PRId64 ",%" PRId64 ",%" PRId64 "\t", *(double *)data, *(int64_t *)(data + 8),
           *(int64_t *)(data + 16), *(int64_t *)(data + 24));
  } else if (functionId == TSDB_FUNC_MIN || functionId == TSDB_FUNC_MAX) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%d\t", *(int8_t *)data);
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%d\t", *(int16_t *)data);
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 "\t", *(int64_t *)data);
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%d\t", *(int *)data);
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%f\t", *(float *)data);
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%f\t", *(float *)data);
        break;
    }
  } else if (functionId == TSDB_FUNC_SUM) {
    if (srcDataType == TSDB_DATA_TYPE_FLOAT || srcDataType == TSDB_DATA_TYPE_DOUBLE) {
      printf("%lf\t", *(float *)data);
    } else {
      printf("%" PRId64 "\t", *(int64_t *)data);
    }
  } else {
    printf("%s\t", data);
  }
}

2764
void UNUSED_FUNC displayInterResult(tFilePage **pdata, SQueryRuntimeEnv* pRuntimeEnv, int32_t numOfRows) {
2765
  SQuery* pQuery = pRuntimeEnv->pQuery;
2766
  int32_t numOfCols = pQuery->numOfOutput;
2767
  printf("super table query intermediate result, total:%d\n", numOfRows);
2768

2769 2770
  for (int32_t j = 0; j < numOfRows; ++j) {
    for (int32_t i = 0; i < numOfCols; ++i) {
2771

2772
      switch (pQuery->pSelectExpr[i].type) {
2773
        case TSDB_DATA_TYPE_BINARY: {
2774
          int32_t type = pQuery->pSelectExpr[i].type;
2775
          printBinaryData(pQuery->pSelectExpr[i].base.functionId, pdata[i]->data + pQuery->pSelectExpr[i].bytes * j,
2776 2777 2778 2779 2780
                          type);
          break;
        }
        case TSDB_DATA_TYPE_TIMESTAMP:
        case TSDB_DATA_TYPE_BIGINT:
2781
          printf("%" PRId64 "\t", *(int64_t *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2782 2783
          break;
        case TSDB_DATA_TYPE_INT:
2784
          printf("%d\t", *(int32_t *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2785 2786
          break;
        case TSDB_DATA_TYPE_FLOAT:
2787
          printf("%f\t", *(float *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2788 2789
          break;
        case TSDB_DATA_TYPE_DOUBLE:
2790
          printf("%lf\t", *(double *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2791 2792 2793 2794 2795 2796 2797 2798
          break;
      }
    }
    printf("\n");
  }
}

typedef struct SCompSupporter {
H
hjxilinx 已提交
2799 2800 2801
  STableQueryInfo **pTableQueryInfo;
  int32_t *         position;
  SQInfo *          pQInfo;
2802 2803 2804 2805 2806
} SCompSupporter;

int32_t tableResultComparFn(const void *pLeft, const void *pRight, void *param) {
  int32_t left = *(int32_t *)pLeft;
  int32_t right = *(int32_t *)pRight;
2807

2808 2809
  SCompSupporter *  supporter = (SCompSupporter *)param;
  SQueryRuntimeEnv *pRuntimeEnv = &supporter->pQInfo->runtimeEnv;
2810

2811 2812
  int32_t leftPos = supporter->position[left];
  int32_t rightPos = supporter->position[right];
2813

2814 2815 2816 2817
  /* left source is exhausted */
  if (leftPos == -1) {
    return 1;
  }
2818

2819 2820 2821 2822
  /* right source is exhausted*/
  if (rightPos == -1) {
    return -1;
  }
2823

H
hjxilinx 已提交
2824
  SWindowResInfo *pWindowResInfo1 = &supporter->pTableQueryInfo[left]->windowResInfo;
2825
  SWindowResult * pWindowRes1 = getWindowResult(pWindowResInfo1, leftPos);
2826
  tFilePage *page1 = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes1->pageId);
2827

H
Haojun Liao 已提交
2828
  char *b1 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes1, page1);
2829
  TSKEY leftTimestamp = GET_INT64_VAL(b1);
2830

H
hjxilinx 已提交
2831
  SWindowResInfo *pWindowResInfo2 = &supporter->pTableQueryInfo[right]->windowResInfo;
2832
  SWindowResult * pWindowRes2 = getWindowResult(pWindowResInfo2, rightPos);
2833
  tFilePage *page2 = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes2->pageId);
2834

H
Haojun Liao 已提交
2835
  char *b2 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes2, page2);
2836
  TSKEY rightTimestamp = GET_INT64_VAL(b2);
2837

2838 2839 2840
  if (leftTimestamp == rightTimestamp) {
    return 0;
  }
2841

2842 2843 2844
  return leftTimestamp > rightTimestamp ? 1 : -1;
}

2845
int32_t mergeIntoGroupResult(SQInfo *pQInfo) {
H
Haojun Liao 已提交
2846
  int64_t st = taosGetTimestampUs();
2847
  int32_t ret = TSDB_CODE_SUCCESS;
2848

S
TD-1057  
Shengliang Guan 已提交
2849
  int32_t numOfGroups = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
2850

2851
  while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
2852
    SArray *group = GET_TABLEGROUP(pQInfo, pQInfo->groupIndex);
2853
    ret = mergeIntoGroupResultImpl(pQInfo, group);
2854 2855 2856 2857
    if (ret < 0) {  // not enough disk space to save the data into disk
      return -1;
    }

2858
    pQInfo->groupIndex += 1;
2859 2860

    // this group generates at least one result, return results
2861 2862 2863
    if (ret > 0) {
      break;
    }
2864

H
Haojun Liao 已提交
2865
    assert(pQInfo->groupResInfo.numOfDataPages == 0);
2866
    qDebug("QInfo:%p no result in group %d, continue", pQInfo, pQInfo->groupIndex - 1);
2867
  }
2868

H
Haojun Liao 已提交
2869
  SGroupResInfo* info = &pQInfo->groupResInfo;
2870
  if (pQInfo->groupIndex == numOfGroups && info->pageId == info->numOfDataPages) {
H
Haojun Liao 已提交
2871 2872 2873
    SET_STABLE_QUERY_OVER(pQInfo);
  }

H
Haojun Liao 已提交
2874 2875 2876
  int64_t elapsedTime = taosGetTimestampUs() - st;
  qDebug("QInfo:%p merge res data into group, index:%d, total group:%d, elapsed time:%" PRId64 "us", pQInfo,
         pQInfo->groupIndex - 1, numOfGroups, elapsedTime);
2877

H
Haojun Liao 已提交
2878
  pQInfo->runtimeEnv.summary.firstStageMergeTime += elapsedTime;
2879 2880 2881 2882
  return TSDB_CODE_SUCCESS;
}

void copyResToQueryResultBuf(SQInfo *pQInfo, SQuery *pQuery) {
H
Haojun Liao 已提交
2883 2884 2885
  SGroupResInfo* pGroupResInfo = &pQInfo->groupResInfo;

  // all results have been return to client, try next group
2886
  if (pGroupResInfo->pageId == pGroupResInfo->numOfDataPages) {
H
Haojun Liao 已提交
2887
    pGroupResInfo->numOfDataPages = 0;
2888 2889
    pGroupResInfo->pageId = 0;
    pGroupResInfo->rowId = 0;
2890

2891
    // current results of group has been sent to client, try next group
2892
    if (mergeIntoGroupResult(pQInfo) != TSDB_CODE_SUCCESS) {
2893 2894
      return;  // failed to save data in the disk
    }
2895

2896
    // check if all results has been sent to client
S
TD-1057  
Shengliang Guan 已提交
2897
    int32_t numOfGroup = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
H
Haojun Liao 已提交
2898
    if (pGroupResInfo->numOfDataPages == 0 && pQInfo->groupIndex == numOfGroup) {
H
Haojun Liao 已提交
2899
      SET_STABLE_QUERY_OVER(pQInfo);
2900 2901
      return;
    }
2902
  }
2903 2904

  SQueryRuntimeEnv *   pRuntimeEnv = &pQInfo->runtimeEnv;
2905
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
2906

H
Haojun Liao 已提交
2907 2908
  int32_t id = pQInfo->groupResInfo.groupId;
  SIDList list = getDataBufPagesIdList(pResultBuf, id);
2909

2910
  int32_t offset = 0;
H
Haojun Liao 已提交
2911 2912 2913 2914 2915 2916
  int32_t numOfCopiedRows = 0;

  size_t size = taosArrayGetSize(list);
  assert(size == pGroupResInfo->numOfDataPages);

  bool done = false;
2917
  for (int32_t j = pGroupResInfo->pageId; j < size; ++j) {
H
Haojun Liao 已提交
2918
    SPageInfo* pi = *(SPageInfo**) taosArrayGet(list, j);
H
Haojun Liao 已提交
2919 2920
    tFilePage* pData = getResBufPage(pResultBuf, pi->pageId);

2921 2922
    assert(pData->num > 0 && pData->num <= pRuntimeEnv->numOfRowsPerPage && pGroupResInfo->rowId < pData->num);
    int32_t numOfRes = (int32_t)(pData->num - pGroupResInfo->rowId);
H
Haojun Liao 已提交
2923 2924

    if (numOfRes > pQuery->rec.capacity - offset) {
2925
      numOfCopiedRows = (int32_t)(pQuery->rec.capacity - offset);
2926
      pGroupResInfo->rowId += numOfCopiedRows;
H
Haojun Liao 已提交
2927 2928
      done = true;
    } else {
2929
      numOfCopiedRows = (int32_t)pData->num;
H
Haojun Liao 已提交
2930

2931 2932
      pGroupResInfo->pageId += 1;
      pGroupResInfo->rowId = 0;
H
Haojun Liao 已提交
2933
    }
2934

2935
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2936
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
2937
      char *  pDest = pQuery->sdata[i]->data;
2938

H
Haojun Liao 已提交
2939 2940
      memcpy(pDest + offset * bytes, pData->data + pRuntimeEnv->offset[i] * pRuntimeEnv->numOfRowsPerPage,
          (size_t)bytes * numOfCopiedRows);
2941
    }
2942

H
Haojun Liao 已提交
2943 2944 2945 2946
    offset += numOfCopiedRows;
    if (done) {
      break;
    }
2947
  }
2948

2949
  assert(pQuery->rec.rows == 0);
H
Haojun Liao 已提交
2950
  pQuery->rec.rows += offset;
2951 2952
}

H
Haojun Liao 已提交
2953
int64_t getNumOfResultWindowRes(SQuery *pQuery, SWindowResult *pWindowRes) {
2954
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
2955
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
2956

2957 2958 2959 2960 2961 2962 2963
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
      continue;
    }
2964

2965
    SResultInfo *pResultInfo = &pWindowRes->resultInfo[j];
H
Haojun Liao 已提交
2966
    assert(pResultInfo != NULL);
2967

H
Haojun Liao 已提交
2968 2969
    if (pResultInfo->numOfRes > 0) {
      return pResultInfo->numOfRes;
2970 2971
    }
  }
2972

H
Haojun Liao 已提交
2973
  return 0;
2974 2975
}

2976
int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *pGroup) {
2977
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
2978
  SQuery *          pQuery = pRuntimeEnv->pQuery;
2979

2980
  size_t size = taosArrayGetSize(pGroup);
2981
  tFilePage **buffer = pQuery->sdata;
2982

H
Haojun Liao 已提交
2983
  int32_t *posList = calloc(size, sizeof(int32_t));
H
hjxilinx 已提交
2984
  STableQueryInfo **pTableList = malloc(POINTER_BYTES * size);
2985

2986
  if (pTableList == NULL || posList == NULL) {
S
Shengliang Guan 已提交
2987 2988
    taosTFree(posList);
    taosTFree(pTableList);
2989 2990

    qError("QInfo:%p failed alloc memory", pQInfo);
H
Haojun Liao 已提交
2991
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
2992 2993
  }

2994
  // todo opt for the case of one table per group
2995
  int32_t numOfTables = 0;
H
Haojun Liao 已提交
2996 2997 2998
  SIDList pageList = NULL;
  int32_t tid = -1;

2999
  for (int32_t i = 0; i < size; ++i) {
3000
    STableQueryInfo *item = taosArrayGetP(pGroup, i);
3001

H
Haojun Liao 已提交
3002
    SIDList list = getDataBufPagesIdList(pRuntimeEnv->pResultBuf, TSDB_TABLEID(item->pTable)->tid);
H
Haojun Liao 已提交
3003
    if (taosArrayGetSize(list) > 0 && item->windowResInfo.size > 0) {
H
Haojun Liao 已提交
3004
      pTableList[numOfTables++] = item;
3005 3006
      tid = TSDB_TABLEID(item->pTable)->tid;
      pageList = list;
3007 3008
    }
  }
3009

H
Haojun Liao 已提交
3010
  // there is no data in current group
3011
  if (numOfTables == 0) {
S
Shengliang Guan 已提交
3012 3013
    taosTFree(posList);
    taosTFree(pTableList);
3014
    return 0;
H
Haojun Liao 已提交
3015
  } else if (numOfTables == 1) { // no need to merge results since only one table in each group
H
Haojun Liao 已提交
3016 3017 3018 3019 3020
    taosTFree(posList);
    taosTFree(pTableList);

    SGroupResInfo* pGroupResInfo = &pQInfo->groupResInfo;

3021
    pGroupResInfo->numOfDataPages = (int32_t)taosArrayGetSize(pageList);
H
Haojun Liao 已提交
3022
    pGroupResInfo->groupId = tid;
3023 3024
    pGroupResInfo->pageId = 0;
    pGroupResInfo->rowId = 0;
H
Haojun Liao 已提交
3025 3026

    return pGroupResInfo->numOfDataPages;
3027
  }
3028

3029
  SCompSupporter cs = {pTableList, posList, pQInfo};
3030

3031
  SLoserTreeInfo *pTree = NULL;
3032
  tLoserTreeCreate(&pTree, numOfTables, &cs, tableResultComparFn);
3033

3034
  SResultInfo *pResultInfo = calloc(pQuery->numOfOutput, sizeof(SResultInfo));
H
Haojun Liao 已提交
3035 3036 3037 3038
  if (pResultInfo == NULL) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }

H
Haojun Liao 已提交
3039
  char* buf = calloc(1, pRuntimeEnv->interBufSize);
H
Haojun Liao 已提交
3040 3041 3042 3043
  if (buf == NULL) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }

H
Haojun Liao 已提交
3044
  setWindowResultInfo(pResultInfo, pQuery, pRuntimeEnv->stableQuery, buf);
3045
  resetMergeResultBuf(pQuery, pRuntimeEnv->pCtx, pResultInfo);
3046

H
Haojun Liao 已提交
3047 3048
  pQInfo->groupResInfo.groupId = getGroupResultId(pQInfo->groupIndex);

H
Haojun Liao 已提交
3049
  // todo add windowRes iterator
3050 3051
  int64_t lastTimestamp = -1;
  int64_t startt = taosGetTimestampMs();
3052

3053
  while (1) {
3054 3055
    if (IS_QUERY_KILLED(pQInfo)) {
      qDebug("QInfo:%p it is already killed, abort", pQInfo);
H
Haojun Liao 已提交
3056 3057 3058 3059 3060 3061 3062

      taosTFree(pTableList);
      taosTFree(posList);
      taosTFree(pTree);
      taosTFree(pResultInfo);
      taosTFree(buf);

3063 3064 3065
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
    }

3066
    int32_t pos = pTree->pNode[0].index;
3067

H
hjxilinx 已提交
3068
    SWindowResInfo *pWindowResInfo = &pTableList[pos]->windowResInfo;
H
Haojun Liao 已提交
3069
    SWindowResult  *pWindowRes = getWindowResult(pWindowResInfo, cs.position[pos]);
3070
    tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes->pageId);
3071

H
Haojun Liao 已提交
3072
    char *b = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes, page);
3073
    TSKEY ts = GET_INT64_VAL(b);
3074

3075
    assert(ts == pWindowRes->win.skey);
H
Haojun Liao 已提交
3076
    int64_t num = getNumOfResultWindowRes(pQuery, pWindowRes);
3077 3078
    if (num <= 0) {
      cs.position[pos] += 1;
3079

3080 3081
      if (cs.position[pos] >= pWindowResInfo->size) {
        cs.position[pos] = -1;
3082

3083
        // all input sources are exhausted
3084
        if (--numOfTables == 0) {
3085 3086 3087 3088 3089 3090 3091
          break;
        }
      }
    } else {
      if (ts == lastTimestamp) {  // merge with the last one
        doMerge(pRuntimeEnv, ts, pWindowRes, true);
      } else {  // copy data to disk buffer
3092
        if (buffer[0]->num == pQuery->rec.capacity) {
H
Haojun Liao 已提交
3093
          if (flushFromResultBuf(pRuntimeEnv, &pQInfo->groupResInfo) != TSDB_CODE_SUCCESS) {
3094 3095
            return -1;
          }
3096

3097 3098
          resetMergeResultBuf(pQuery, pRuntimeEnv->pCtx, pResultInfo);
        }
3099

3100
        doMerge(pRuntimeEnv, ts, pWindowRes, false);
3101
        buffer[0]->num += 1;
3102
      }
3103

3104
      lastTimestamp = ts;
3105

H
Haojun Liao 已提交
3106
      // move to the next element of current entry
3107
      int32_t currentPageId = pWindowRes->pageId;
H
Haojun Liao 已提交
3108

3109 3110 3111
      cs.position[pos] += 1;
      if (cs.position[pos] >= pWindowResInfo->size) {
        cs.position[pos] = -1;
3112

3113
        // all input sources are exhausted
3114
        if (--numOfTables == 0) {
3115 3116
          break;
        }
H
Haojun Liao 已提交
3117 3118 3119
      } else {
        // current page is not needed anymore
        SWindowResult  *pNextWindowRes = getWindowResult(pWindowResInfo, cs.position[pos]);
3120
        if (pNextWindowRes->pageId != currentPageId) {
H
Haojun Liao 已提交
3121 3122
          releaseResBufPage(pRuntimeEnv->pResultBuf, page);
        }
3123 3124
      }
    }
3125

3126 3127
    tLoserTreeAdjust(pTree, pos + pTree->numOfEntries);
  }
3128

3129
  if (buffer[0]->num != 0) {  // there are data in buffer
H
Haojun Liao 已提交
3130
    if (flushFromResultBuf(pRuntimeEnv, &pQInfo->groupResInfo) != TSDB_CODE_SUCCESS) {
S
slguan 已提交
3131
      qError("QInfo:%p failed to flush data into temp file, abort query", pQInfo);
3132

S
Shengliang Guan 已提交
3133 3134 3135 3136
      taosTFree(pTree);
      taosTFree(pTableList);
      taosTFree(posList);
      taosTFree(pResultInfo);
3137

3138 3139 3140
      return -1;
    }
  }
3141

3142 3143 3144
  int64_t endt = taosGetTimestampMs();

#ifdef _DEBUG_VIEW
3145
  displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
3146
#endif
3147

3148
  qDebug("QInfo:%p result merge completed for group:%d, elapsed time:%" PRId64 " ms", pQInfo, pQInfo->groupIndex, endt - startt);
3149

S
Shengliang Guan 已提交
3150 3151 3152
  taosTFree(pTableList);
  taosTFree(posList);
  taosTFree(pTree);
3153

S
Shengliang Guan 已提交
3154 3155
  taosTFree(pResultInfo);
  taosTFree(buf);
H
Haojun Liao 已提交
3156 3157

  return pQInfo->groupResInfo.numOfDataPages;
3158 3159
}

H
Haojun Liao 已提交
3160 3161
int32_t flushFromResultBuf(SQueryRuntimeEnv* pRuntimeEnv, SGroupResInfo* pGroupResInfo) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3162

3163
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
3164

3165 3166
  // the base value for group result, since the maximum number of table for each vnode will not exceed 100,000.
  int32_t pageId = -1;
3167
  int32_t capacity = pResultBuf->numOfRowsPerPage;
3168

H
Haojun Liao 已提交
3169
  int32_t remain = (int32_t) pQuery->sdata[0]->num;
3170
  int32_t offset = 0;
3171

3172
  while (remain > 0) {
H
Haojun Liao 已提交
3173 3174
    int32_t rows = (remain > capacity)? capacity:remain;
    assert(rows > 0);
3175

H
Haojun Liao 已提交
3176 3177 3178
    // get the output buffer page
    tFilePage *buf = getNewDataBuf(pResultBuf, pGroupResInfo->groupId, &pageId);
    buf->num = rows;
3179

H
Haojun Liao 已提交
3180
    // pagewisely copy to dest buffer
3181
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3182
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
3183

H
Haojun Liao 已提交
3184 3185
      char* output = buf->data + pRuntimeEnv->offset[i] * pRuntimeEnv->numOfRowsPerPage;
      char* src = ((char *) pQuery->sdata[i]->data) + offset * bytes;
3186
      memcpy(output, src, (size_t)(buf->num * bytes));
3187
    }
3188

H
Haojun Liao 已提交
3189 3190 3191 3192
    offset += rows;
    remain -= rows;

    pGroupResInfo->numOfDataPages += 1;
3193
  }
3194

3195 3196 3197 3198
  return TSDB_CODE_SUCCESS;
}

void resetMergeResultBuf(SQuery *pQuery, SQLFunctionCtx *pCtx, SResultInfo *pResultInfo) {
3199
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
3200
    pCtx[k].aOutputBuf = pQuery->sdata[k]->data - pCtx[k].outputBytes;
3201 3202 3203
    pCtx[k].size = 1;
    pCtx[k].startOffset = 0;
    pCtx[k].resultInfo = &pResultInfo[k];
3204

3205
    pQuery->sdata[k]->num = 0;
3206 3207 3208
  }
}

3209 3210 3211 3212
static void updateTableQueryInfoForReverseScan(SQuery *pQuery, STableQueryInfo *pTableQueryInfo) {
  if (pTableQueryInfo == NULL) {
    return;
  }
3213

H
Haojun Liao 已提交
3214
  // order has changed already
3215
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
3216

H
Haojun Liao 已提交
3217 3218 3219 3220 3221 3222
  // TODO validate the assertion
//  if (!QUERY_IS_ASC_QUERY(pQuery)) {
//    assert(pTableQueryInfo->win.ekey >= pTableQueryInfo->lastKey + step);
//  } else {
//    assert(pTableQueryInfo->win.ekey <= pTableQueryInfo->lastKey + step);
//  }
H
Haojun Liao 已提交
3223 3224 3225 3226 3227 3228 3229

  if (pTableQueryInfo->lastKey == pTableQueryInfo->win.skey) {
    // do nothing, no results
  } else {
    pTableQueryInfo->win.ekey = pTableQueryInfo->lastKey + step;
  }

3230 3231
  SWAP(pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, TSKEY);
  pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
3232

3233 3234
  SWITCH_ORDER(pTableQueryInfo->cur.order);
  pTableQueryInfo->cur.vgroupIndex = -1;
H
Haojun Liao 已提交
3235 3236 3237

  // set the index at the end of time window
  pTableQueryInfo->windowResInfo.curIndex = pTableQueryInfo->windowResInfo.size - 1;
3238 3239 3240 3241
}

static void disableFuncInReverseScanImpl(SQInfo* pQInfo, SWindowResInfo *pWindowResInfo, int32_t order) {
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
3242

3243
  for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
3244 3245
    bool closed = getTimeWindowResStatus(pWindowResInfo, i);
    if (!closed) {
3246 3247
      continue;
    }
3248

3249
    SWindowResult *buf = getWindowResult(pWindowResInfo, i);
3250

3251
    // open/close the specified query for each group result
3252
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3253
      int32_t functId = pQuery->pSelectExpr[j].base.functionId;
3254

3255 3256
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
3257 3258 3259 3260 3261 3262 3263 3264
        buf->resultInfo[j].complete = false;
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
        buf->resultInfo[j].complete = true;
      }
    }
  }
}

3265 3266
void disableFuncInReverseScan(SQInfo *pQInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
3267
  SQuery *pQuery = pRuntimeEnv->pQuery;
3268
  int32_t order = pQuery->order.order;
3269

3270 3271
  // group by normal columns and interval query on normal table
  SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
3272
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3273
    disableFuncInReverseScanImpl(pQInfo, pWindowResInfo, order);
3274
  } else {  // for simple result of table query,
3275
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {  // todo refactor
3276
      int32_t functId = pQuery->pSelectExpr[j].base.functionId;
3277

3278
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[j];
3279 3280 3281
      if (pCtx->resultInfo == NULL) {
        continue; // resultInfo is NULL, means no data checked in previous scan
      }
3282

3283 3284
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
3285 3286 3287 3288 3289 3290
        pCtx->resultInfo->complete = false;
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
        pCtx->resultInfo->complete = true;
      }
    }
  }
H
Haojun Liao 已提交
3291 3292 3293 3294
}

static void setupQueryRangeForReverseScan(SQInfo* pQInfo) {
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
S
Shengliang Guan 已提交
3295
  int32_t numOfGroups = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
H
Haojun Liao 已提交
3296

H
hjxilinx 已提交
3297
  for(int32_t i = 0; i < numOfGroups; ++i) {
H
Haojun Liao 已提交
3298
    SArray *group = GET_TABLEGROUP(pQInfo, i);
H
Haojun Liao 已提交
3299 3300
    SArray *tableKeyGroup = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, i);

H
hjxilinx 已提交
3301 3302
    size_t t = taosArrayGetSize(group);
    for (int32_t j = 0; j < t; ++j) {
3303 3304
      STableQueryInfo *pCheckInfo = taosArrayGetP(group, j);
      updateTableQueryInfoForReverseScan(pQuery, pCheckInfo);
H
Haojun Liao 已提交
3305

H
Haojun Liao 已提交
3306 3307
      // update the last key in tableKeyInfo list, the tableKeyInfo is used to build the tsdbQueryHandle and decide
      // the start check timestamp of tsdbQueryHandle
H
Haojun Liao 已提交
3308 3309 3310 3311
      STableKeyInfo *pTableKeyInfo = taosArrayGet(tableKeyGroup, j);
      pTableKeyInfo->lastKey = pCheckInfo->lastKey;

      assert(pCheckInfo->pTable == pTableKeyInfo->pTable);
H
hjxilinx 已提交
3312 3313
    }
  }
3314 3315
}

3316
void switchCtxOrder(SQueryRuntimeEnv *pRuntimeEnv) {
3317
  SQuery *pQuery = pRuntimeEnv->pQuery;
3318
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
3319
    SWITCH_ORDER(pRuntimeEnv->pCtx[i].order);
3320 3321 3322
  }
}

H
Haojun Liao 已提交
3323
int32_t createQueryResultInfo(SQuery *pQuery, SWindowResult *pResultRow, bool isSTableQuery, size_t interBufSize) {
3324
  int32_t numOfCols = pQuery->numOfOutput;
3325

H
Haojun Liao 已提交
3326 3327
  size_t size = numOfCols * sizeof(SResultInfo) + interBufSize;
  pResultRow->resultInfo = calloc(1, size);
B
Bomin Zhang 已提交
3328 3329 3330
  if (pResultRow->resultInfo == NULL) {
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
  }
3331

3332 3333
  pResultRow->pageId = -1;
  pResultRow->rowId = -1;
3334

H
Haojun Liao 已提交
3335
  char* buf = (char*) pResultRow->resultInfo + numOfCols * sizeof(SResultInfo);
H
Haojun Liao 已提交
3336

3337
  // set the intermediate result output buffer
H
Haojun Liao 已提交
3338
  setWindowResultInfo(pResultRow->resultInfo, pQuery, isSTableQuery, buf);
B
Bomin Zhang 已提交
3339
  return TSDB_CODE_SUCCESS;
3340 3341 3342 3343
}

void resetCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3344

3345
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3346 3347
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
    pCtx->aOutputBuf = pQuery->sdata[i]->data;
3348

3349 3350 3351 3352
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT/TAGPRJ/PRJ/TAG etc.
     */
3353
    RESET_RESULT_INFO(&pRuntimeEnv->resultInfo[i]);
3354
    pCtx->resultInfo = &pRuntimeEnv->resultInfo[i];
3355

3356
    // set the timestamp output buffer for top/bottom/diff query
3357
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3358 3359 3360
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3361

3362
    memset(pQuery->sdata[i]->data, 0, (size_t)(pQuery->pSelectExpr[i].bytes * pQuery->rec.capacity));
3363
  }
3364

3365 3366 3367 3368 3369
  initCtxOutputBuf(pRuntimeEnv);
}

void forwardCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, int64_t output) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3370

3371
  // reset the execution contexts
3372
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3373
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
3374
    assert(functionId != TSDB_FUNC_DIFF);
3375

3376 3377 3378 3379
    // set next output position
    if (IS_OUTER_FORWARD(aAggs[functionId].nStatus)) {
      pRuntimeEnv->pCtx[j].aOutputBuf += pRuntimeEnv->pCtx[j].outputBytes * output;
    }
3380

3381 3382 3383 3384 3385 3386 3387 3388
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      /*
       * NOTE: for top/bottom query, the value of first column of output (timestamp) are assigned
       * in the procedure of top/bottom routine
       * the output buffer in top/bottom routine is ptsOutputBuf, so we need to forward the output buffer
       *
       * diff function is handled in multi-output function
       */
S
TD-1057  
Shengliang Guan 已提交
3389
      pRuntimeEnv->pCtx[j].ptsOutputBuf = (char*)pRuntimeEnv->pCtx[j].ptsOutputBuf + TSDB_KEYSIZE * output;
3390
    }
3391

3392
    RESET_RESULT_INFO(pRuntimeEnv->pCtx[j].resultInfo);
3393 3394 3395 3396 3397
  }
}

void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3398

3399
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3400
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
3401
    pRuntimeEnv->pCtx[j].currentStage = 0;
3402

H
Haojun Liao 已提交
3403 3404 3405 3406
    SResultInfo* pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
    if (pResInfo->initialized) {
      continue;
    }
3407

3408 3409 3410 3411
    aAggs[functionId].init(&pRuntimeEnv->pCtx[j]);
  }
}

3412
void skipResults(SQueryRuntimeEnv *pRuntimeEnv) {
3413
  SQuery *pQuery = pRuntimeEnv->pQuery;
3414
  if (pQuery->rec.rows == 0 || pQuery->limit.offset == 0) {
3415 3416
    return;
  }
3417

3418
  if (pQuery->rec.rows <= pQuery->limit.offset) {
3419
    qDebug("QInfo:%p skip rows:%" PRId64 ", new offset:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), pQuery->rec.rows,
3420
        pQuery->limit.offset - pQuery->rec.rows);
3421

3422 3423
    pQuery->limit.offset -= pQuery->rec.rows;
    pQuery->rec.rows = 0;
3424

3425
    resetCtxOutputBuf(pRuntimeEnv);
3426

H
Haojun Liao 已提交
3427
    // clear the buffer full flag if exists
3428
    CLEAR_QUERY_STATUS(pQuery, QUERY_RESBUF_FULL);
3429
  } else {
3430
    int64_t numOfSkip = pQuery->limit.offset;
3431
    pQuery->rec.rows -= numOfSkip;
3432
    pQuery->limit.offset = 0;
3433

3434
    qDebug("QInfo:%p skip row:%"PRId64", new offset:%d, numOfRows remain:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), numOfSkip,
3435
           0, pQuery->rec.rows);
3436

3437
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3438
      int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3439
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
H
Haojun Liao 已提交
3440

3441
      memmove(pQuery->sdata[i]->data, (char*)pQuery->sdata[i]->data + bytes * numOfSkip, (size_t)(pQuery->rec.rows * bytes));
H
Haojun Liao 已提交
3442
      pRuntimeEnv->pCtx[i].aOutputBuf = ((char*) pQuery->sdata[i]->data) + pQuery->rec.rows * bytes;
3443

3444
      if (functionId == TSDB_FUNC_DIFF || functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
3445
        pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
3446 3447
      }
    }
3448

S
TD-1057  
Shengliang Guan 已提交
3449
    updateNumOfResult(pRuntimeEnv, (int32_t)pQuery->rec.rows);
3450 3451 3452 3453 3454 3455 3456 3457
  }
}

void setQueryStatus(SQuery *pQuery, int8_t status) {
  if (status == QUERY_NOT_COMPLETED) {
    pQuery->status = status;
  } else {
    // QUERY_NOT_COMPLETED is not compatible with any other status, so clear its position first
3458
    CLEAR_QUERY_STATUS(pQuery, QUERY_NOT_COMPLETED);
3459 3460 3461 3462 3463 3464
    pQuery->status |= status;
  }
}

bool needScanDataBlocksAgain(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3465

H
hjxilinx 已提交
3466
  bool toContinue = false;
H
Haojun Liao 已提交
3467
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3468 3469
    // for each group result, call the finalize function for each column
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
3470

3471 3472
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *pResult = getWindowResult(pWindowResInfo, i);
3473
      if (!pResult->closed) {
3474 3475
        continue;
      }
3476

3477
      setWindowResOutputBuf(pRuntimeEnv, pResult);
3478

3479
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3480
        int16_t functId = pQuery->pSelectExpr[j].base.functionId;
3481 3482 3483
        if (functId == TSDB_FUNC_TS) {
          continue;
        }
3484

3485 3486
        aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
        SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3487

3488 3489 3490 3491
        toContinue |= (!pResInfo->complete);
      }
    }
  } else {
3492
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3493
      int16_t functId = pQuery->pSelectExpr[j].base.functionId;
3494 3495 3496
      if (functId == TSDB_FUNC_TS) {
        continue;
      }
3497

3498 3499
      aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
      SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3500

3501 3502 3503
      toContinue |= (!pResInfo->complete);
    }
  }
3504

3505 3506 3507
  return toContinue;
}

H
Haojun Liao 已提交
3508
static SQueryStatusInfo getQueryStatusInfo(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
3509
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3510
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3511

H
Haojun Liao 已提交
3512 3513
  assert((start <= pTableQueryInfo->lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (start >= pTableQueryInfo->lastKey && !QUERY_IS_ASC_QUERY(pQuery)));
3514

3515
  SQueryStatusInfo info = {
H
hjxilinx 已提交
3516
      .status      = pQuery->status,
3517
      .windowIndex = pRuntimeEnv->windowResInfo.curIndex,
H
Haojun Liao 已提交
3518
      .lastKey     = start,
3519
  };
3520

S
TD-1057  
Shengliang Guan 已提交
3521 3522 3523 3524 3525
  TIME_WINDOW_COPY(info.w, pQuery->window);
  TIME_WINDOW_COPY(info.curWindow, pTableQueryInfo->win);

  info.curWindow.skey = start;

3526 3527 3528
  return info;
}

3529 3530 3531 3532
static void setEnvBeforeReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQInfo *pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery *pQuery = pRuntimeEnv->pQuery;

3533 3534 3535
  pStatus->cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);  // save the cursor
  if (pRuntimeEnv->pTSBuf) {
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
3536 3537
    bool ret = tsBufNextPos(pRuntimeEnv->pTSBuf);
    assert(ret);
3538
  }
3539

3540
  // reverse order time range
3541 3542 3543
  pQuery->window = pStatus->curWindow;
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);

3544
  SWITCH_ORDER(pQuery->order.order);
3545 3546 3547 3548 3549 3550 3551

  if (QUERY_IS_ASC_QUERY(pQuery)) {
    assert(pQuery->window.skey <= pQuery->window.ekey);
  } else {
    assert(pQuery->window.skey >= pQuery->window.ekey);
  }

3552
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
3553

3554
  STsdbQueryCond cond = {
H
hjxilinx 已提交
3555
      .order   = pQuery->order.order,
3556
      .colList = pQuery->colList,
3557 3558
      .numOfCols = pQuery->numOfCols,
  };
3559

S
TD-1057  
Shengliang Guan 已提交
3560 3561
  TIME_WINDOW_COPY(cond.twindow, pQuery->window);

H
Haojun Liao 已提交
3562 3563 3564 3565 3566
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
  disableFuncInReverseScan(pQInfo);
  setupQueryRangeForReverseScan(pQInfo);

3567 3568 3569 3570
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
3571

3572
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
B
Bomin Zhang 已提交
3573 3574 3575
  if (pRuntimeEnv->pSecQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
3576 3577
}

3578 3579
static void clearEnvAfterReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3580
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3581

3582 3583
  SWITCH_ORDER(pQuery->order.order);
  switchCtxOrder(pRuntimeEnv);
3584

3585 3586 3587 3588
  tsBufSetCursor(pRuntimeEnv->pTSBuf, &pStatus->cur);
  if (pRuntimeEnv->pTSBuf) {
    pRuntimeEnv->pTSBuf->cur.order = pQuery->order.order;
  }
3589

3590
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
3591

3592
  // update the pQuery->window.skey and pQuery->window.ekey to limit the scan scope of sliding query during reverse scan
H
hjxilinx 已提交
3593
  pTableQueryInfo->lastKey = pStatus->lastKey;
3594
  pQuery->status = pStatus->status;
3595

H
hjxilinx 已提交
3596
  pTableQueryInfo->win = pStatus->w;
3597
  pQuery->window = pTableQueryInfo->win;
3598 3599
}

H
Haojun Liao 已提交
3600 3601 3602 3603 3604 3605 3606
static void restoreTimeWindow(STableGroupInfo* pTableGroupInfo, STsdbQueryCond* pCond) {
  assert(pTableGroupInfo->numOfTables == 1);
  SArray* pTableKeyGroup = taosArrayGetP(pTableGroupInfo->pGroupList, 0);
  STableKeyInfo* pKeyInfo = taosArrayGet(pTableKeyGroup, 0);
  pKeyInfo->lastKey = pCond->twindow.skey;
}

3607
void scanOneTableDataBlocks(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
H
hjxilinx 已提交
3608
  SQInfo *pQInfo = (SQInfo *) GET_QINFO_ADDR(pRuntimeEnv);
3609
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3610
  STableQueryInfo *pTableQueryInfo = pQuery->current;
3611

3612
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
3613

3614
  // store the start query position
H
Haojun Liao 已提交
3615
  SQueryStatusInfo qstatus = getQueryStatusInfo(pRuntimeEnv, start);
3616

3617 3618
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
3619

3620 3621
  while (1) {
    doScanAllDataBlocks(pRuntimeEnv);
3622

3623 3624
    if (pRuntimeEnv->scanFlag == MASTER_SCAN) {
      qstatus.status = pQuery->status;
3625 3626 3627 3628 3629 3630

      // do nothing if no data blocks are found qualified during scan
      if (qstatus.lastKey != pTableQueryInfo->lastKey) {
        qstatus.curWindow.ekey = pTableQueryInfo->lastKey - step;
      }

3631
      qstatus.lastKey = pTableQueryInfo->lastKey;
3632
    }
3633

3634
    if (!needScanDataBlocksAgain(pRuntimeEnv)) {
3635
      // restore the status code and jump out of loop
3636
      if (pRuntimeEnv->scanFlag == REPEAT_SCAN) {
3637
        pQuery->status = qstatus.status;
3638
      }
3639

3640 3641
      break;
    }
3642

3643
    STsdbQueryCond cond = {
H
hjxilinx 已提交
3644
        .order   = pQuery->order.order,
3645
        .colList = pQuery->colList,
3646
        .numOfCols = pQuery->numOfCols,
3647
    };
3648

S
TD-1057  
Shengliang Guan 已提交
3649 3650
    TIME_WINDOW_COPY(cond.twindow, qstatus.curWindow);

3651 3652
    if (pRuntimeEnv->pSecQueryHandle != NULL) {
      tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
3653
    }
3654

H
Haojun Liao 已提交
3655
    restoreTimeWindow(&pQInfo->tableGroupInfo, &cond);
3656
    pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
B
Bomin Zhang 已提交
3657 3658 3659
    if (pRuntimeEnv->pSecQueryHandle == NULL) {
      longjmp(pRuntimeEnv->env, terrno);
    }
3660

3661
    pRuntimeEnv->windowResInfo.curIndex = qstatus.windowIndex;
3662 3663
    setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
    pRuntimeEnv->scanFlag = REPEAT_SCAN;
3664

3665
    qDebug("QInfo:%p start to repeat scan data blocks due to query func required, qrange:%"PRId64"-%"PRId64, pQInfo,
3666
        cond.twindow.skey, cond.twindow.ekey);
3667

3668
    // check if query is killed or not
H
Haojun Liao 已提交
3669
    if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
3670
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
3671 3672
    }
  }
3673

H
hjxilinx 已提交
3674
  if (!needReverseScan(pQuery)) {
3675 3676
    return;
  }
3677

3678
  setEnvBeforeReverseScan(pRuntimeEnv, &qstatus);
3679

3680
  // reverse scan from current position
3681
  qDebug("QInfo:%p start to reverse scan", pQInfo);
3682
  doScanAllDataBlocks(pRuntimeEnv);
3683 3684

  clearEnvAfterReverseScan(pRuntimeEnv, &qstatus);
3685 3686
}

H
hjxilinx 已提交
3687
void finalizeQueryResult(SQueryRuntimeEnv *pRuntimeEnv) {
3688
  SQuery *pQuery = pRuntimeEnv->pQuery;
3689

H
Haojun Liao 已提交
3690
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3691 3692
    // for each group result, call the finalize function for each column
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
3693
    if (pRuntimeEnv->groupbyNormalCol) {
3694 3695
      closeAllTimeWindow(pWindowResInfo);
    }
3696

3697 3698 3699 3700 3701
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *buf = &pWindowResInfo->pResult[i];
      if (!isWindowResClosed(pWindowResInfo, i)) {
        continue;
      }
3702

3703
      setWindowResOutputBuf(pRuntimeEnv, buf);
3704

3705
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3706
        aAggs[pQuery->pSelectExpr[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3707
      }
3708

3709 3710 3711 3712
      /*
       * set the number of output results for group by normal columns, the number of output rows usually is 1 except
       * the top and bottom query
       */
S
TD-1057  
Shengliang Guan 已提交
3713
      buf->numOfRows = (uint16_t)getNumOfResult(pRuntimeEnv);
3714
    }
3715

3716
  } else {
3717
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3718
      aAggs[pQuery->pSelectExpr[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3719 3720 3721 3722 3723
    }
  }
}

static bool hasMainOutput(SQuery *pQuery) {
3724
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3725
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3726

3727 3728 3729 3730
    if (functionId != TSDB_FUNC_TS && functionId != TSDB_FUNC_TAG && functionId != TSDB_FUNC_TAGPRJ) {
      return true;
    }
  }
3731

3732 3733 3734
  return false;
}

H
Haojun Liao 已提交
3735
static STableQueryInfo *createTableQueryInfo(SQueryRuntimeEnv *pRuntimeEnv, void* pTable, STimeWindow win, void* buf) {
H
Haojun Liao 已提交
3736
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
3737

H
Haojun Liao 已提交
3738
  STableQueryInfo *pTableQueryInfo = buf;
3739

H
hjxilinx 已提交
3740 3741
  pTableQueryInfo->win = win;
  pTableQueryInfo->lastKey = win.skey;
3742

3743
  pTableQueryInfo->pTable = pTable;
3744
  pTableQueryInfo->cur.vgroupIndex = -1;
3745

H
Haojun Liao 已提交
3746 3747
  // set more initial size of interval/groupby query
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyNormalCol) {
H
Haojun Liao 已提交
3748
    int32_t initialSize = 16;
H
Haojun Liao 已提交
3749
    int32_t initialThreshold = 100;
B
Bomin Zhang 已提交
3750 3751 3752 3753
    int32_t code = initWindowResInfo(&pTableQueryInfo->windowResInfo, pRuntimeEnv, initialSize, initialThreshold, TSDB_DATA_TYPE_INT);
    if (code != TSDB_CODE_SUCCESS) {
      return NULL;
    }
H
Haojun Liao 已提交
3754
  } else { // in other aggregate query, do not initialize the windowResInfo
H
Haojun Liao 已提交
3755 3756
  }

3757 3758 3759
  return pTableQueryInfo;
}

H
Haojun Liao 已提交
3760
void destroyTableQueryInfoImpl(STableQueryInfo *pTableQueryInfo) {
3761 3762 3763
  if (pTableQueryInfo == NULL) {
    return;
  }
3764

H
Haojun Liao 已提交
3765
  tVariantDestroy(&pTableQueryInfo->tag);
H
Haojun Liao 已提交
3766
  cleanupTimeWindowInfo(&pTableQueryInfo->windowResInfo);
3767 3768 3769 3770 3771
}

/**
 * set output buffer for different group
 * @param pRuntimeEnv
3772
 * @param pDataBlockInfo
3773
 */
H
Haojun Liao 已提交
3774
void setExecutionContext(SQInfo *pQInfo, int32_t groupIndex, TSKEY nextKey) {
3775
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
Haojun Liao 已提交
3776 3777 3778
  STableQueryInfo  *pTableQueryInfo = pRuntimeEnv->pQuery->current;
  SWindowResInfo   *pWindowResInfo = &pRuntimeEnv->windowResInfo;

H
Haojun Liao 已提交
3779 3780
  // lastKey needs to be updated
  pTableQueryInfo->lastKey = nextKey;
H
Haojun Liao 已提交
3781 3782 3783 3784

  if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
    setAdditionalInfo(pQInfo, pTableQueryInfo->pTable, pTableQueryInfo);
  }
H
Haojun Liao 已提交
3785

H
Haojun Liao 已提交
3786 3787 3788
  if (pRuntimeEnv->prevGroupId != INT32_MIN && pRuntimeEnv->prevGroupId == groupIndex) {
    return;
  }
3789

3790 3791
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&groupIndex,
      sizeof(groupIndex), true);
3792 3793 3794
  if (pWindowRes == NULL) {
    return;
  }
3795

3796 3797 3798 3799
  /*
   * not assign result buffer yet, add new result buffer
   * all group belong to one result set, and each group result has different group id so set the id to be one
   */
3800
  if (pWindowRes->pageId == -1) {
3801
    if (addNewWindowResultBuf(pWindowRes, pRuntimeEnv->pResultBuf, groupIndex, pRuntimeEnv->numOfRowsPerPage) !=
3802 3803 3804 3805
        TSDB_CODE_SUCCESS) {
      return;
    }
  }
3806

H
Haojun Liao 已提交
3807 3808
  // record the current active group id
  pRuntimeEnv->prevGroupId = groupIndex;
3809 3810 3811 3812
  setWindowResOutputBuf(pRuntimeEnv, pWindowRes);
  initCtxOutputBuf(pRuntimeEnv);
}

H
Haojun Liao 已提交
3813
void setWindowResOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult) {
3814
  SQuery *pQuery = pRuntimeEnv->pQuery;
3815

3816
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
3817
  tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pageId);
H
Haojun Liao 已提交
3818

3819
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3820
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
H
Haojun Liao 已提交
3821
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult, page);
3822

3823
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3824 3825 3826
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3827

3828 3829 3830 3831 3832
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT
     */
    pCtx->resultInfo = &pResult->resultInfo[i];
3833

3834 3835 3836 3837 3838 3839
    // set super table query flag
    SResultInfo *pResInfo = GET_RES_INFO(pCtx);
    pResInfo->superTableQ = pRuntimeEnv->stableQuery;
  }
}

H
Haojun Liao 已提交
3840 3841
void setWindowResOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3842

H
Haojun Liao 已提交
3843
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
3844
  tFilePage* bufPage = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pageId);
H
Haojun Liao 已提交
3845

H
Haojun Liao 已提交
3846 3847 3848 3849
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];

    pCtx->resultInfo = &pResult->resultInfo[i];
H
Haojun Liao 已提交
3850
    if (pCtx->resultInfo->initialized && pCtx->resultInfo->complete) {
H
Haojun Liao 已提交
3851 3852
      continue;
    }
3853

H
Haojun Liao 已提交
3854
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult, bufPage);
H
Haojun Liao 已提交
3855
    pCtx->currentStage = 0;
3856

H
Haojun Liao 已提交
3857 3858 3859 3860
    int32_t functionId = pCtx->functionId;
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3861

H
Haojun Liao 已提交
3862 3863 3864 3865 3866
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT
     */
    pCtx->resultInfo->superTableQ = pRuntimeEnv->stableQuery;     // set super table query flag
3867

H
Haojun Liao 已提交
3868 3869 3870 3871 3872 3873
    if (!pCtx->resultInfo->initialized) {
      aAggs[functionId].init(pCtx);
    }
  }
}

3874
int32_t setAdditionalInfo(SQInfo *pQInfo, void* pTable, STableQueryInfo *pTableQueryInfo) {
3875
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
3876

3877
  setTagVal(pRuntimeEnv, pTable, pQInfo->tsdb);
3878

3879 3880
  // both the master and supplement scan needs to set the correct ts comp start position
  if (pRuntimeEnv->pTSBuf != NULL) {
H
Haojun Liao 已提交
3881 3882
    tVariant* pTag = &pRuntimeEnv->pCtx[0].tag;

H
Haojun Liao 已提交
3883 3884
    if (pTableQueryInfo->cur.vgroupIndex == -1) {
      tVariantAssign(&pTableQueryInfo->tag, pTag);
3885

H
Haojun Liao 已提交
3886 3887
      STSElem elem = tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, pQInfo->vgId, &pTableQueryInfo->tag);

H
Haojun Liao 已提交
3888 3889 3890 3891 3892
      // failed to find data with the specified tag value and vnodeId
      if (elem.vnode < 0) {
        if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
          qError("QInfo:%p failed to find tag:%s in ts_comp", pQInfo, pTag->pz);
        } else {
H
Haojun Liao 已提交
3893
          qError("QInfo:%p failed to find tag:%" PRId64 " in ts_comp", pQInfo, pTag->i64Key);
H
Haojun Liao 已提交
3894 3895 3896 3897 3898
        }

        return false;
      }

H
Haojun Liao 已提交
3899
      // keep the cursor info of current meter
H
Haojun Liao 已提交
3900 3901
      pTableQueryInfo->cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);
      if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
H
Haojun Liao 已提交
3902
        qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex);
H
Haojun Liao 已提交
3903
      } else {
H
Haojun Liao 已提交
3904
        qDebug("QInfo:%p find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex);
H
Haojun Liao 已提交
3905 3906
      }

H
Haojun Liao 已提交
3907 3908
    } else {
      tsBufSetCursor(pRuntimeEnv->pTSBuf, &pTableQueryInfo->cur);
H
Haojun Liao 已提交
3909 3910

      if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
H
Haojun Liao 已提交
3911
        qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex);
H
Haojun Liao 已提交
3912
      } else {
H
Haojun Liao 已提交
3913
        qDebug("QInfo:%p find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex);
H
Haojun Liao 已提交
3914
      }
3915 3916
    }
  }
3917

3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929
  return 0;
}

/*
 * There are two cases to handle:
 *
 * 1. Query range is not set yet (queryRangeSet = 0). we need to set the query range info, including pQuery->lastKey,
 *    pQuery->window.skey, and pQuery->eKey.
 * 2. Query range is set and query is in progress. There may be another result with the same query ranges to be
 *    merged during merge stage. In this case, we need the pTableQueryInfo->lastResRows to decide if there
 *    is a previous result generated or not.
 */
H
hjxilinx 已提交
3930
void setIntervalQueryRange(SQInfo *pQInfo, TSKEY key) {
3931 3932
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3933
  STableQueryInfo *pTableQueryInfo = pQuery->current;
3934

3935 3936 3937
  if (pTableQueryInfo->queryRangeSet) {
    pTableQueryInfo->lastKey = key;
  } else {
3938
    pTableQueryInfo->win.skey = key;
3939
    STimeWindow win = {.skey = key, .ekey = pQuery->window.ekey};
3940

3941 3942 3943 3944 3945
    // for too small query range, no data in this interval.
    if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey < pQuery->window.skey)) ||
        (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey < pQuery->window.ekey))) {
      return;
    }
3946

3947 3948 3949 3950 3951 3952
    /**
     * In handling the both ascending and descending order super table query, we need to find the first qualified
     * timestamp of this table, and then set the first qualified start timestamp.
     * In ascending query, key is the first qualified timestamp. However, in the descending order query, additional
     * operations involve.
     */
H
Haojun Liao 已提交
3953
    STimeWindow     w = TSWINDOW_INITIALIZER;
3954
    SWindowResInfo *pWindowResInfo = &pTableQueryInfo->windowResInfo;
3955

H
Haojun Liao 已提交
3956 3957
    TSKEY sk = MIN(win.skey, win.ekey);
    TSKEY ek = MAX(win.skey, win.ekey);
H
Haojun Liao 已提交
3958
    getAlignQueryTimeWindow(pQuery, win.skey, sk, ek, &w);
3959
    pWindowResInfo->startTime = pTableQueryInfo->win.skey;  // windowSKey may be 0 in case of 1970 timestamp
3960

3961 3962
    if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
3963
        assert(win.ekey == pQuery->window.ekey);
3964
      }
3965

3966
      pWindowResInfo->prevSKey = w.skey;
3967
    }
3968

3969
    pTableQueryInfo->queryRangeSet = 1;
3970
    pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
3971 3972 3973 3974
  }
}

bool requireTimestamp(SQuery *pQuery) {
3975
  for (int32_t i = 0; i < pQuery->numOfOutput; i++) {
3976
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989
    if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_NEED_TS) != 0) {
      return true;
    }
  }
  return false;
}

bool needPrimaryTimestampCol(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo) {
  /*
   * 1. if skey or ekey locates in this block, we need to load the timestamp column to decide the precise position
   * 2. if there are top/bottom, first_dst/last_dst functions, we need to load timestamp column in any cases;
   */
  STimeWindow *w = &pDataBlockInfo->window;
H
hjxilinx 已提交
3990
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3991

H
hjxilinx 已提交
3992
  bool loadPrimaryTS = (pTableQueryInfo->lastKey >= w->skey && pTableQueryInfo->lastKey <= w->ekey) ||
3993 3994
                       (pQuery->window.ekey >= w->skey && pQuery->window.ekey <= w->ekey) || requireTimestamp(pQuery);

3995 3996 3997
  return loadPrimaryTS;
}

3998
static int32_t doCopyToSData(SQInfo *pQInfo, SWindowResInfo *pResultInfo, int32_t orderType) {
3999 4000
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4001

4002 4003 4004
  int32_t numOfResult = 0;
  int32_t startIdx = 0;
  int32_t step = -1;
4005

4006
  qDebug("QInfo:%p start to copy data from windowResInfo to query buf", pQInfo);
4007 4008
  int32_t totalSet = numOfClosedTimeWindow(pResultInfo);
  SWindowResult* result = pResultInfo->pResult;
4009

4010
  if (orderType == TSDB_ORDER_ASC) {
4011
    startIdx = pQInfo->groupIndex;
4012 4013
    step = 1;
  } else {  // desc order copy all data
4014
    startIdx = totalSet - pQInfo->groupIndex - 1;
4015 4016
    step = -1;
  }
4017

H
Haojun Liao 已提交
4018 4019
  SGroupResInfo* pGroupResInfo = &pQInfo->groupResInfo;

4020
  for (int32_t i = startIdx; (i < totalSet) && (i >= 0); i += step) {
4021
    if (result[i].numOfRows == 0) {
4022
      pQInfo->groupIndex += 1;
4023
      pGroupResInfo->rowId = 0;
4024 4025
      continue;
    }
4026

4027 4028
    int32_t numOfRowsToCopy = result[i].numOfRows - pGroupResInfo->rowId;
    int32_t oldOffset = pGroupResInfo->rowId;
4029

4030
    /*
H
Haojun Liao 已提交
4031 4032
     * current output space is not enough to accommodate all data of this page, only partial results
     * will be copied to SQuery object's result buffer
4033
     */
4034
    if (numOfRowsToCopy > pQuery->rec.capacity - numOfResult) {
H
Haojun Liao 已提交
4035
      numOfRowsToCopy = (int32_t) pQuery->rec.capacity - numOfResult;
4036
      pGroupResInfo->rowId += numOfRowsToCopy;
4037
    } else {
4038
      pGroupResInfo->rowId = 0;
4039
      pQInfo->groupIndex += 1;
4040
    }
4041

4042
    tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, result[i].pageId);
H
Haojun Liao 已提交
4043

4044
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
4045
      int32_t size = pRuntimeEnv->pCtx[j].outputBytes;
4046

4047
      char *out = pQuery->sdata[j]->data + numOfResult * size;
H
Haojun Liao 已提交
4048
      char *in = getPosInResultPage(pRuntimeEnv, j, &result[i], page);
4049 4050
      memcpy(out, in + oldOffset * size, size * numOfRowsToCopy);
    }
4051

4052
    numOfResult += numOfRowsToCopy;
4053 4054 4055
    if (numOfResult == pQuery->rec.capacity) {
      break;
    }
4056
  }
4057

4058
  qDebug("QInfo:%p copy data to query buf completed", pQInfo);
4059 4060

#ifdef _DEBUG_VIEW
4061
  displayInterResult(pQuery->sdata, pRuntimeEnv, numOfResult);
4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074
#endif
  return numOfResult;
}

/**
 * copyFromWindowResToSData support copy data in ascending/descending order
 * For interval query of both super table and table, copy the data in ascending order, since the output results are
 * ordered in SWindowResutl already. While handling the group by query for both table and super table,
 * all group result are completed already.
 *
 * @param pQInfo
 * @param result
 */
4075
void copyFromWindowResToSData(SQInfo *pQInfo, SWindowResInfo *pResultInfo) {
4076
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
4077

4078
  int32_t orderType = (pQuery->pGroupbyExpr != NULL) ? pQuery->pGroupbyExpr->orderType : TSDB_ORDER_ASC;
4079
  int32_t numOfResult = doCopyToSData(pQInfo, pResultInfo, orderType);
4080

4081
  pQuery->rec.rows += numOfResult;
4082

4083
  assert(pQuery->rec.rows <= pQuery->rec.capacity);
4084 4085
}

H
Haojun Liao 已提交
4086
static void updateWindowResNumOfRes(SQueryRuntimeEnv *pRuntimeEnv) {
4087
  SQuery *pQuery = pRuntimeEnv->pQuery;
4088

4089
  // update the number of result for each, only update the number of rows for the corresponding window result.
H
Haojun Liao 已提交
4090 4091 4092
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
    return;
  }
4093

H
Haojun Liao 已提交
4094 4095
  for (int32_t i = 0; i < pRuntimeEnv->windowResInfo.size; ++i) {
    SWindowResult *pResult = &pRuntimeEnv->windowResInfo.pResult[i];
4096

H
Haojun Liao 已提交
4097 4098 4099 4100
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
      int32_t functionId = pRuntimeEnv->pCtx[j].functionId;
      if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
        continue;
4101
      }
H
Haojun Liao 已提交
4102

4103
      pResult->numOfRows = (uint16_t)(MAX(pResult->numOfRows, pResult->resultInfo[j].numOfRes));
4104 4105 4106 4107
    }
  }
}

H
Haojun Liao 已提交
4108
static void stableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo, SDataStatis *pStatis,
4109
    SArray *pDataBlock, __block_search_fn_t searchFn) {
4110
  SQuery *         pQuery = pRuntimeEnv->pQuery;
4111
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4112

4113
  SWindowResInfo * pWindowResInfo = &pTableQueryInfo->windowResInfo;
H
hjxilinx 已提交
4114
  pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : pDataBlockInfo->rows - 1;
4115

H
Haojun Liao 已提交
4116
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->groupbyNormalCol) {
4117
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
4118
  } else {
4119
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
4120 4121 4122
  }
}

H
Haojun Liao 已提交
4123
bool queryHasRemainResForTableQuery(SQueryRuntimeEnv* pRuntimeEnv) {
4124 4125
  SQuery *pQuery = pRuntimeEnv->pQuery;
  SFillInfo *pFillInfo = pRuntimeEnv->pFillInfo;
4126

H
Haojun Liao 已提交
4127
  if (pQuery->limit.limit > 0 && pQuery->rec.total >= pQuery->limit.limit) {
4128 4129
    return false;
  }
4130

4131
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
H
Haojun Liao 已提交
4132
    // There are results not returned to client yet, so filling applied to the remain result is required firstly.
H
Haojun Liao 已提交
4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147
    int32_t remain = taosNumOfRemainRows(pFillInfo);
    if (remain > 0) {
      return true;
    }

    /*
     * While the code reaches here, there are no results remains now.
     * If query is not completed yet, the gaps between two results blocks need to be handled after next data block
     * is retrieved from TSDB.
     *
     * NOTE: If the result set is not the first block, the gap in front of the result set will be filled. If the result
     * set is the FIRST result block, the gap between the start time of query time window and the timestamp of the
     * first result row in the actual result set will fill nothing.
     */
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
S
TD-1057  
Shengliang Guan 已提交
4148
      int32_t numOfTotal = (int32_t)getFilledNumOfRes(pFillInfo, pQuery->window.ekey, (int32_t)pQuery->rec.capacity);
H
Haojun Liao 已提交
4149 4150 4151 4152 4153 4154
      return numOfTotal > 0;
    }

  } else {
    // there are results waiting for returned to client.
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED) &&
H
Haojun Liao 已提交
4155
        (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) &&
H
Haojun Liao 已提交
4156 4157 4158
        (pRuntimeEnv->windowResInfo.size > 0)) {
      return true;
    }
4159
  }
4160 4161

  return false;
4162 4163 4164
}

static void doCopyQueryResultToMsg(SQInfo *pQInfo, int32_t numOfRows, char *data) {
4165
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
4166

4167 4168
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
    int32_t bytes = pQuery->pSelectExpr[col].bytes;
4169

4170 4171 4172
    memmove(data, pQuery->sdata[col]->data, bytes * numOfRows);
    data += bytes * numOfRows;
  }
4173

weixin_48148422's avatar
weixin_48148422 已提交
4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185
  int32_t numOfTables = (int32_t)taosArrayGetSize(pQInfo->arrTableIdInfo);
  *(int32_t*)data = htonl(numOfTables);
  data += sizeof(int32_t);
  for(int32_t i = 0; i < numOfTables; i++) {
    STableIdInfo* pSrc = taosArrayGet(pQInfo->arrTableIdInfo, i);
    STableIdInfo* pDst = (STableIdInfo*)data;
    pDst->uid = htobe64(pSrc->uid);
    pDst->tid = htonl(pSrc->tid);
    pDst->key = htobe64(pSrc->key);
    data += sizeof(STableIdInfo);
  }

H
Haojun Liao 已提交
4186
  // Check if query is completed or not for stable query or normal table query respectively.
H
hjxilinx 已提交
4187
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4188
    if (pQInfo->runtimeEnv.stableQuery) {
H
Haojun Liao 已提交
4189
      if (IS_STASBLE_QUERY_OVER(pQInfo)) {
4190 4191 4192
        setQueryStatus(pQuery, QUERY_OVER);
      }
    } else {
H
Haojun Liao 已提交
4193
      if (!queryHasRemainResForTableQuery(&pQInfo->runtimeEnv)) {
4194 4195
        setQueryStatus(pQuery, QUERY_OVER);
      }
4196
    }
H
hjxilinx 已提交
4197
  }
4198 4199
}

H
Haojun Liao 已提交
4200
int32_t doFillGapsInResults(SQueryRuntimeEnv* pRuntimeEnv, tFilePage **pDst, int32_t *numOfFilled) {
4201
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
4202
  SQuery *pQuery = pRuntimeEnv->pQuery;
4203
  SFillInfo* pFillInfo = pRuntimeEnv->pFillInfo;
4204

4205
  while (1) {
S
TD-1057  
Shengliang Guan 已提交
4206
    int32_t ret = (int32_t)taosGenerateDataBlock(pFillInfo, (tFilePage**)pQuery->sdata, (int32_t)pQuery->rec.capacity);
4207

4208
    // todo apply limit output function
4209 4210
    /* reached the start position of according to offset value, return immediately */
    if (pQuery->limit.offset == 0) {
4211
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows", pQInfo, pFillInfo->numOfRows, ret);
4212 4213
      return ret;
    }
4214

4215
    if (pQuery->limit.offset < ret) {
4216
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%" PRId64 ". Discard due to offset, remain:%" PRId64 ", new offset:%d",
4217
             pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, ret - pQuery->limit.offset, 0);
4218

S
TD-1057  
Shengliang Guan 已提交
4219
      ret -= (int32_t)pQuery->limit.offset;
4220 4221
      // todo !!!!there exactly number of interpo is not valid.
      // todo refactor move to the beginning of buffer
4222 4223 4224
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        memmove(pDst[i]->data, pDst[i]->data + pQuery->pSelectExpr[i].bytes * pQuery->limit.offset,
                ret * pQuery->pSelectExpr[i].bytes);
4225
      }
4226

4227 4228 4229
      pQuery->limit.offset = 0;
      return ret;
    } else {
4230
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%" PRId64 ". Discard due to offset, "
B
Bomin Zhang 已提交
4231
             "remain:%d, new offset:%" PRId64, pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, 0,
4232
          pQuery->limit.offset - ret);
4233

4234
      pQuery->limit.offset -= ret;
4235
      pQuery->rec.rows = 0;
4236 4237
      ret = 0;
    }
4238

H
Haojun Liao 已提交
4239
    if (!queryHasRemainResForTableQuery(pRuntimeEnv)) {
4240 4241 4242 4243 4244
      return ret;
    }
  }
}

4245
static void queryCostStatis(SQInfo *pQInfo) {
4246
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4247
  SQueryCostInfo *pSummary = &pRuntimeEnv->summary;
H
Haojun Liao 已提交
4248

H
Haojun Liao 已提交
4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265
  uint64_t hashSize = taosHashGetMemSize(pQInfo->runtimeEnv.windowResInfo.hashList);

  hashSize += taosHashGetMemSize(pQInfo->tableqinfoGroupInfo.map);
  int32_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
  for(int32_t i = 0; i < numOfGroup; ++i) {
    SArray* pa = GET_TABLEGROUP(pQInfo, i);

    int32_t numOfTables = taosArrayGetSize(pa);
    for(int32_t j = 0; j < numOfTables; ++j) {
      STableQueryInfo* pTableQueryInfo = taosArrayGetP(pa, j);

      hashSize += taosHashGetMemSize(pTableQueryInfo->windowResInfo.hashList);
    }
  }

  pSummary->hashSize = hashSize;

H
Haojun Liao 已提交
4266 4267 4268
  // add the merge time
  pSummary->elapsedTime += pSummary->firstStageMergeTime;

H
Haojun Liao 已提交
4269 4270 4271
  qDebug("QInfo:%p :cost summary: elapsed time:%"PRId64" us, first merge:%"PRId64" us, total blocks:%d, "
         "load block statis:%d, load data block:%d, total rows:%"PRId64 ", check rows:%"PRId64,
         pQInfo, pSummary->elapsedTime, pSummary->firstStageMergeTime, pSummary->totalBlocks, pSummary->loadBlockStatis,
H
Haojun Liao 已提交
4272
         pSummary->loadBlocks, pSummary->totalRows, pSummary->totalCheckedRows);
4273

H
Haojun Liao 已提交
4274 4275
  qDebug("QInfo:%p :cost summary: windowInfo size:%.2f Kb, numOfWin:%"PRId64", tableInfoSize:%.2f Kb, hashTable:%.2f Kb", pQInfo, pSummary->winInfoSize/1024.0,
      pSummary->numOfTimeWindows, pSummary->tableInfoSize/1024.0, pSummary->hashSize/1024.0);
4276 4277
}

4278 4279
static void updateOffsetVal(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pBlockInfo) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
4280
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4281

4282
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
4283

4284
  if (pQuery->limit.offset == pBlockInfo->rows) {  // current block will ignore completed
H
hjxilinx 已提交
4285
    pTableQueryInfo->lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->window.ekey + step : pBlockInfo->window.skey + step;
4286 4287 4288
    pQuery->limit.offset = 0;
    return;
  }
4289

4290
  if (QUERY_IS_ASC_QUERY(pQuery)) {
S
TD-1057  
Shengliang Guan 已提交
4291
    pQuery->pos = (int32_t)pQuery->limit.offset;
4292
  } else {
S
TD-1057  
Shengliang Guan 已提交
4293
    pQuery->pos = pBlockInfo->rows - (int32_t)pQuery->limit.offset - 1;
4294
  }
4295

4296
  assert(pQuery->pos >= 0 && pQuery->pos <= pBlockInfo->rows - 1);
4297

4298
  SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
4299
  SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);
4300

4301
  // update the pQuery->limit.offset value, and pQuery->pos value
H
Haojun Liao 已提交
4302
  TSKEY *keys = (TSKEY *) pColInfoData->pData;
4303 4304

  // update the offset value
H
hjxilinx 已提交
4305
  pTableQueryInfo->lastKey = keys[pQuery->pos];
4306
  pQuery->limit.offset = 0;
4307

H
hjxilinx 已提交
4308
  int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, pBlockInfo, NULL, binarySearchForKey, pDataBlock);
4309

4310
  qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
4311
         pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows, numOfRes, pQuery->current->lastKey);
4312
}
4313

4314 4315 4316 4317 4318
void skipBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;

  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0) {
    return;
4319
  }
4320

4321 4322 4323
  pQuery->pos = 0;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

H
hjxilinx 已提交
4324
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4325
  TsdbQueryHandleT pQueryHandle = pRuntimeEnv->pQueryHandle;
4326

H
Haojun Liao 已提交
4327
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4328
  while (tsdbNextDataBlock(pQueryHandle)) {
H
Haojun Liao 已提交
4329
    if (IS_QUERY_KILLED(GET_QINFO_ADDR(pRuntimeEnv))) {
H
Haojun Liao 已提交
4330
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4331
    }
4332

H
Haojun Liao 已提交
4333
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
4334

4335 4336
    if (pQuery->limit.offset > blockInfo.rows) {
      pQuery->limit.offset -= blockInfo.rows;
H
hjxilinx 已提交
4337 4338
      pTableQueryInfo->lastKey = (QUERY_IS_ASC_QUERY(pQuery)) ? blockInfo.window.ekey : blockInfo.window.skey;
      pTableQueryInfo->lastKey += step;
4339

4340
      qDebug("QInfo:%p skip rows:%d, offset:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv), blockInfo.rows,
4341 4342
             pQuery->limit.offset);
    } else {  // find the appropriated start position in current block
4343 4344 4345
      updateOffsetVal(pRuntimeEnv, &blockInfo);
      break;
    }
4346
  }
H
Haojun Liao 已提交
4347 4348 4349 4350

  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }
4351
}
4352

H
Haojun Liao 已提交
4353
static bool skipTimeInterval(SQueryRuntimeEnv *pRuntimeEnv, TSKEY* start) {
4354
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
4355
  *start = pQuery->current->lastKey;
4356

4357
  // if queried with value filter, do NOT forward query start position
4358
  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->pFillInfo != NULL) {
4359
    return true;
4360
  }
4361

4362
  /*
4363 4364
   * 1. for interval without interpolation query we forward pQuery->interval.interval at a time for
   *    pQuery->limit.offset times. Since hole exists, pQuery->interval.interval*pQuery->limit.offset value is
4365 4366
   *    not valid. otherwise, we only forward pQuery->limit.offset number of points
   */
4367
  assert(pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL);
4368

H
Haojun Liao 已提交
4369
  STimeWindow w = TSWINDOW_INITIALIZER;
4370

4371
  SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
hjxilinx 已提交
4372
  STableQueryInfo *pTableQueryInfo = pQuery->current;
4373

H
Haojun Liao 已提交
4374
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4375
  while (tsdbNextDataBlock(pRuntimeEnv->pQueryHandle)) {
H
Haojun Liao 已提交
4376
    tsdbRetrieveDataBlockInfo(pRuntimeEnv->pQueryHandle, &blockInfo);
4377

H
Haojun Liao 已提交
4378 4379
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
H
Haojun Liao 已提交
4380
        getAlignQueryTimeWindow(pQuery, blockInfo.window.skey, blockInfo.window.skey, pQuery->window.ekey, &w);
H
Haojun Liao 已提交
4381 4382 4383
        pWindowResInfo->startTime = w.skey;
        pWindowResInfo->prevSKey = w.skey;
      }
4384
    } else {
H
Haojun Liao 已提交
4385
      getAlignQueryTimeWindow(pQuery, blockInfo.window.ekey, pQuery->window.ekey, blockInfo.window.ekey, &w);
4386

4387 4388 4389
      pWindowResInfo->startTime = pQuery->window.skey;
      pWindowResInfo->prevSKey = w.skey;
    }
4390

4391 4392
    // the first time window
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, pWindowResInfo->prevSKey, pQuery);
4393

4394 4395 4396 4397 4398 4399
    while (pQuery->limit.offset > 0) {
      if ((win.ekey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (win.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        pQuery->limit.offset -= 1;
        pWindowResInfo->prevSKey = win.skey;
      }
4400

4401
      STimeWindow tw = win;
H
Haojun Liao 已提交
4402
      GET_NEXT_TIMEWINDOW(pQuery, &tw);
4403

4404
      if (pQuery->limit.offset == 0) {
4405 4406
        if ((tw.skey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
            (tw.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
H
Haojun Liao 已提交
4407 4408
          // load the data block and check data remaining in current data block
          // TODO optimize performance
4409 4410 4411
          SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
          SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

4412 4413
          tw = win;
          int32_t startPos =
H
Haojun Liao 已提交
4414
              getNextQualifiedWindow(pRuntimeEnv, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey, -1);
4415 4416 4417 4418
          assert(startPos >= 0);

          // set the abort info
          pQuery->pos = startPos;
4419

H
Haojun Liao 已提交
4420 4421 4422 4423
          // reset the query start timestamp
          pTableQueryInfo->win.skey = ((TSKEY *)pColInfoData->pData)[startPos];
          pQuery->window.skey = pTableQueryInfo->win.skey;
          *start = pTableQueryInfo->win.skey;
4424

4425
          pWindowResInfo->prevSKey = tw.skey;
H
Haojun Liao 已提交
4426
          int32_t index = pRuntimeEnv->windowResInfo.curIndex;
4427

H
hjxilinx 已提交
4428
          int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, NULL, binarySearchForKey, pDataBlock);
H
Haojun Liao 已提交
4429
          pRuntimeEnv->windowResInfo.curIndex = index;  // restore the window index
4430

4431
          qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64,
4432
                 GET_QINFO_ADDR(pRuntimeEnv), blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes, pQuery->current->lastKey);
4433

4434
          return true;
H
Haojun Liao 已提交
4435 4436 4437 4438
        } else { // do nothing
          *start = tw.skey;
          pQuery->window.skey = tw.skey;
          pWindowResInfo->prevSKey = tw.skey;
4439
          return true;
4440 4441 4442
        }
      }

H
Haojun Liao 已提交
4443 4444 4445 4446 4447 4448 4449
      /*
       * If the next time window still starts from current data block,
       * load the primary timestamp column first, and then find the start position for the next queried time window.
       * Note that only the primary timestamp column is required.
       * TODO: Optimize for this cases. All data blocks are not needed to be loaded, only if the first actually required
       * time window resides in current data block.
       */
4450 4451 4452 4453 4454 4455 4456
      if ((tw.skey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (tw.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
        SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

        tw = win;
        int32_t startPos =
H
Haojun Liao 已提交
4457
            getNextQualifiedWindow(pRuntimeEnv, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey, -1);
4458 4459 4460 4461
        assert(startPos >= 0);

        // set the abort info
        pQuery->pos = startPos;
H
hjxilinx 已提交
4462
        pTableQueryInfo->lastKey = ((TSKEY *)pColInfoData->pData)[startPos];
4463 4464
        pWindowResInfo->prevSKey = tw.skey;
        win = tw;
4465
      } else {
H
Haojun Liao 已提交
4466
        break;  // offset is not 0, and next time window begins or ends in the next block.
4467 4468 4469
      }
    }
  }
4470

H
Haojun Liao 已提交
4471 4472 4473 4474 4475
  // check for error
  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }

4476 4477 4478
  return true;
}

H
Haojun Liao 已提交
4479 4480
static void doDestroyTableQueryInfo(STableGroupInfo* pTableqinfoGroupInfo);

B
Bomin Zhang 已提交
4481
static int32_t setupQueryHandle(void* tsdb, SQInfo* pQInfo, bool isSTableQuery) {
B
Bomin Zhang 已提交
4482
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4483 4484
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

B
Bomin Zhang 已提交
4485
  if (onlyQueryTags(pQuery)) {
B
Bomin Zhang 已提交
4486
    return TSDB_CODE_SUCCESS;
B
Bomin Zhang 已提交
4487 4488
  }

H
Haojun Liao 已提交
4489
  if (isSTableQuery && (!QUERY_IS_INTERVAL_QUERY(pQuery)) && (!isFixedOutputQuery(pRuntimeEnv))) {
B
Bomin Zhang 已提交
4490
    return TSDB_CODE_SUCCESS;
B
Bomin Zhang 已提交
4491
  }
4492 4493

  STsdbQueryCond cond = {
B
Bomin Zhang 已提交
4494 4495 4496
    .order   = pQuery->order.order,
    .colList = pQuery->colList,
    .numOfCols = pQuery->numOfCols,
4497
  };
weixin_48148422's avatar
weixin_48148422 已提交
4498

S
TD-1057  
Shengliang Guan 已提交
4499 4500
  TIME_WINDOW_COPY(cond.twindow, pQuery->window);

B
Bomin Zhang 已提交
4501
  if (!isSTableQuery
4502
    && (pQInfo->tableqinfoGroupInfo.numOfTables == 1)
4503
    && (cond.order == TSDB_ORDER_ASC)
H
Haojun Liao 已提交
4504
    && (!QUERY_IS_INTERVAL_QUERY(pQuery))
B
Bomin Zhang 已提交
4505
    && (!isGroupbyNormalCol(pQuery->pGroupbyExpr))
H
Haojun Liao 已提交
4506
    && (!isFixedOutputQuery(pRuntimeEnv))
B
Bomin Zhang 已提交
4507
  ) {
H
Haojun Liao 已提交
4508
    SArray* pa = GET_TABLEGROUP(pQInfo, 0);
4509 4510
    STableQueryInfo* pCheckInfo = taosArrayGetP(pa, 0);
    cond.twindow = pCheckInfo->win;
4511
  }
B
Bomin Zhang 已提交
4512

B
Bomin Zhang 已提交
4513
  terrno = TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
4514
  if (isFirstLastRowQuery(pQuery)) {
4515
    pRuntimeEnv->pQueryHandle = tsdbQueryLastRow(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
H
Haojun Liao 已提交
4516

H
Haojun Liao 已提交
4517 4518 4519
    // update the query time window
    pQuery->window = cond.twindow;

H
Haojun Liao 已提交
4520
    if (pQInfo->tableGroupInfo.numOfTables == 0) {
H
Haojun Liao 已提交
4521
      pQInfo->tableqinfoGroupInfo.numOfTables = 0;
H
Haojun Liao 已提交
4522 4523 4524 4525
    } else {
      size_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
      for(int32_t i = 0; i < numOfGroups; ++i) {
        SArray *group = GET_TABLEGROUP(pQInfo, i);
H
Haojun Liao 已提交
4526

H
Haojun Liao 已提交
4527 4528 4529
        size_t t = taosArrayGetSize(group);
        for (int32_t j = 0; j < t; ++j) {
          STableQueryInfo *pCheckInfo = taosArrayGetP(group, j);
H
Haojun Liao 已提交
4530

H
Haojun Liao 已提交
4531 4532 4533
          pCheckInfo->win = pQuery->window;
          pCheckInfo->lastKey = pCheckInfo->win.skey;
        }
H
Haojun Liao 已提交
4534 4535
      }
    }
4536
  } else if (isPointInterpoQuery(pQuery)) {
4537
    pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
H
Haojun Liao 已提交
4538
  } else {
4539
    pRuntimeEnv->pQueryHandle = tsdbQueryTables(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
H
Haojun Liao 已提交
4540
  }
4541

B
Bomin Zhang 已提交
4542
  return terrno;
B
Bomin Zhang 已提交
4543 4544
}

4545 4546 4547
static SFillColInfo* taosCreateFillColInfo(SQuery* pQuery) {
  int32_t numOfCols = pQuery->numOfOutput;
  int32_t offset = 0;
4548

4549
  SFillColInfo* pFillCol = calloc(numOfCols, sizeof(SFillColInfo));
H
Haojun Liao 已提交
4550 4551 4552 4553
  if (pFillCol == NULL) {
    return NULL;
  }

4554 4555
  for(int32_t i = 0; i < numOfCols; ++i) {
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];
4556

4557
    pFillCol[i].col.bytes  = pExprInfo->bytes;
S
TD-1057  
Shengliang Guan 已提交
4558
    pFillCol[i].col.type   = (int8_t)pExprInfo->type;
4559 4560 4561
    pFillCol[i].col.offset = offset;
    pFillCol[i].flag       = TSDB_COL_NORMAL;    // always be ta normal column for table query
    pFillCol[i].functionId = pExprInfo->base.functionId;
4562
    pFillCol[i].fillVal.i = pQuery->fillVal[i];
4563

4564 4565
    offset += pExprInfo->bytes;
  }
4566

4567 4568 4569
  return pFillCol;
}

4570
int32_t doInitQInfo(SQInfo *pQInfo, STSBuf *pTsBuf, void *tsdb, int32_t vgId, bool isSTableQuery) {
4571 4572
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;

4573
  int32_t code = TSDB_CODE_SUCCESS;
4574
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
4575 4576 4577

  pRuntimeEnv->topBotQuery = isTopBottomQuery(pQuery);
  pRuntimeEnv->hasTagResults = hasTagValOutput(pQuery);
4578 4579

  setScanLimitationByResultBuffer(pQuery);
H
Haojun Liao 已提交
4580

B
Bomin Zhang 已提交
4581 4582 4583 4584
  code = setupQueryHandle(tsdb, pQInfo, isSTableQuery);
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }
4585

4586
  pQInfo->tsdb = tsdb;
4587
  pQInfo->vgId = vgId;
4588 4589

  pRuntimeEnv->pQuery = pQuery;
H
Haojun Liao 已提交
4590
  pRuntimeEnv->pTSBuf = pTsBuf;
4591
  pRuntimeEnv->cur.vgroupIndex = -1;
4592
  pRuntimeEnv->stableQuery = isSTableQuery;
H
Haojun Liao 已提交
4593
  pRuntimeEnv->prevGroupId = INT32_MIN;
H
Haojun Liao 已提交
4594
  pRuntimeEnv->groupbyNormalCol = isGroupbyNormalCol(pQuery->pGroupbyExpr);
4595

H
Haojun Liao 已提交
4596
  if (pTsBuf != NULL) {
4597 4598 4599 4600 4601 4602 4603 4604 4605 4606
    int16_t order = (pQuery->order.order == pRuntimeEnv->pTSBuf->tsOrder) ? TSDB_ORDER_ASC : TSDB_ORDER_DESC;
    tsBufSetTraverseOrder(pRuntimeEnv->pTSBuf, order);
  }

  // create runtime environment
  code = setupQueryRuntimeEnv(pRuntimeEnv, pQuery->order.order);
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }

4607 4608 4609
  int32_t ps = DEFAULT_PAGE_SIZE;
  int32_t rowsize = 0;
  getIntermediateBufInfo(pRuntimeEnv, &ps, &rowsize);
H
Haojun Liao 已提交
4610
  int32_t TWOMB = 1024*1024*2;
4611

H
Haojun Liao 已提交
4612
  if (isSTableQuery && !onlyQueryTags(pRuntimeEnv->pQuery)) {
H
Haojun Liao 已提交
4613
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rowsize, ps, TWOMB, pQInfo);
4614 4615 4616 4617
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

H
Haojun Liao 已提交
4618
    if (!QUERY_IS_INTERVAL_QUERY(pQuery)) {
4619
      int16_t type = TSDB_DATA_TYPE_NULL;
4620
      int32_t threshold = 0;
4621

H
Haojun Liao 已提交
4622
      if (pRuntimeEnv->groupbyNormalCol) {  // group by columns not tags;
4623
        type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
4624
        threshold = 4000;
4625 4626
      } else {
        type = TSDB_DATA_TYPE_INT;  // group id
S
TD-1057  
Shengliang Guan 已提交
4627
        threshold = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
4628 4629 4630
        if (threshold < 8) {
          threshold = 8;
        }
4631 4632
      }

4633
      code = initWindowResInfo(&pRuntimeEnv->windowResInfo, pRuntimeEnv, 8, threshold, type);
B
Bomin Zhang 已提交
4634 4635 4636
      if (code != TSDB_CODE_SUCCESS) {
        return code;
      }
4637
    }
H
Haojun Liao 已提交
4638
  } else if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
4639 4640
    int32_t numOfResultRows = getInitialPageNum(pQInfo);
    getIntermediateBufInfo(pRuntimeEnv, &ps, &rowsize);
H
Haojun Liao 已提交
4641
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rowsize, ps, TWOMB, pQInfo);
4642 4643 4644 4645 4646
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

    int16_t type = TSDB_DATA_TYPE_NULL;
H
Haojun Liao 已提交
4647
    if (pRuntimeEnv->groupbyNormalCol) {
4648 4649 4650 4651 4652
      type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
    } else {
      type = TSDB_DATA_TYPE_TIMESTAMP;
    }

4653
    code = initWindowResInfo(&pRuntimeEnv->windowResInfo, pRuntimeEnv, numOfResultRows, 4096, type);
B
Bomin Zhang 已提交
4654 4655 4656
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }
4657 4658
  }

4659
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
4660
    SFillColInfo* pColInfo = taosCreateFillColInfo(pQuery);
H
Haojun Liao 已提交
4661 4662 4663 4664 4665 4666
    STimeWindow w = TSWINDOW_INITIALIZER;

    TSKEY sk = MIN(pQuery->window.skey, pQuery->window.ekey);
    TSKEY ek = MAX(pQuery->window.skey, pQuery->window.ekey);
    getAlignQueryTimeWindow(pQuery, pQuery->window.skey, sk, ek, &w);

S
TD-1057  
Shengliang Guan 已提交
4667
    pRuntimeEnv->pFillInfo = taosInitFillInfo(pQuery->order.order, w.skey, 0, (int32_t)pQuery->rec.capacity, pQuery->numOfOutput,
4668 4669
                                              pQuery->interval.sliding, pQuery->interval.slidingUnit, (int8_t)pQuery->precision,
                                              pQuery->fillType, pColInfo);
4670
  }
4671

H
Haojun Liao 已提交
4672
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
4673
  return TSDB_CODE_SUCCESS;
4674 4675
}

4676
static void enableExecutionForNextTable(SQueryRuntimeEnv *pRuntimeEnv) {
4677
  SQuery *pQuery = pRuntimeEnv->pQuery;
4678

4679
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
4680 4681 4682 4683 4684 4685 4686
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[i]);
    if (pResInfo != NULL) {
      pResInfo->complete = false;
    }
  }
}

H
Haojun Liao 已提交
4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703
static FORCE_INLINE void setEnvForEachBlock(SQInfo* pQInfo, STableQueryInfo* pTableQueryInfo, SDataBlockInfo* pBlockInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

  if (!QUERY_IS_INTERVAL_QUERY(pQuery)) {
    setExecutionContext(pQInfo, pTableQueryInfo->groupIndex, pBlockInfo->window.ekey + step);
  } else {  // interval query
    TSKEY nextKey = pBlockInfo->window.skey;
    setIntervalQueryRange(pQInfo, nextKey);

    if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
      setAdditionalInfo(pQInfo, pTableQueryInfo->pTable, pTableQueryInfo);
    }
  }
}

H
Haojun Liao 已提交
4704
static int64_t scanMultiTableDataBlocks(SQInfo *pQInfo) {
4705
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
Haojun Liao 已提交
4706 4707
  SQuery*           pQuery = pRuntimeEnv->pQuery;
  SQueryCostInfo*   summary  = &pRuntimeEnv->summary;
H
Haojun Liao 已提交
4708

H
hjxilinx 已提交
4709
  int64_t st = taosGetTimestampMs();
4710

4711
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
H
Haojun Liao 已提交
4712
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4713

H
Haojun Liao 已提交
4714 4715
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

H
Haojun Liao 已提交
4716
  while (tsdbNextDataBlock(pQueryHandle)) {
4717
    summary->totalBlocks += 1;
H
Haojun Liao 已提交
4718

H
Haojun Liao 已提交
4719
    if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
4720
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4721
    }
4722

H
Haojun Liao 已提交
4723
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
H
Haojun Liao 已提交
4724 4725 4726 4727
    STableQueryInfo **pTableQueryInfo = (STableQueryInfo**) taosHashGet(pQInfo->tableqinfoGroupInfo.map, &blockInfo.tid, sizeof(blockInfo.tid));
    if(pTableQueryInfo == NULL) {
      break;
    }
4728

H
Haojun Liao 已提交
4729
    pQuery->current = *pTableQueryInfo;
H
Haojun Liao 已提交
4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      assert(
          ((*pTableQueryInfo)->win.skey <= (*pTableQueryInfo)->win.ekey) &&
          ((*pTableQueryInfo)->lastKey >= (*pTableQueryInfo)->win.skey) &&
          ((*pTableQueryInfo)->win.skey >= pQuery->window.skey && (*pTableQueryInfo)->win.ekey <= pQuery->window.ekey));
    } else {
      assert(
          ((*pTableQueryInfo)->win.skey >= (*pTableQueryInfo)->win.ekey) &&
          ((*pTableQueryInfo)->lastKey <= (*pTableQueryInfo)->win.skey) &&
          ((*pTableQueryInfo)->win.skey <= pQuery->window.skey && (*pTableQueryInfo)->win.ekey >= pQuery->window.ekey));
    }
4741

H
Haojun Liao 已提交
4742
    if (!pRuntimeEnv->groupbyNormalCol) {
H
Haojun Liao 已提交
4743
      setEnvForEachBlock(pQInfo, *pTableQueryInfo, &blockInfo);
4744
    }
4745

4746
    uint32_t     status = 0;
H
Haojun Liao 已提交
4747 4748
    SDataStatis *pStatis = NULL;
    SArray      *pDataBlock = NULL;
4749

H
Haojun Liao 已提交
4750
    int32_t ret = loadDataBlockOnDemand(pRuntimeEnv, &pQuery->current->windowResInfo, pQueryHandle, &blockInfo, &pStatis, &pDataBlock, &status);
4751 4752 4753 4754 4755
    if (ret != TSDB_CODE_SUCCESS) {
      break;
    }

    if (status == BLK_DATA_DISCARD) {
H
Haojun Liao 已提交
4756
      pQuery->current->lastKey = QUERY_IS_ASC_QUERY(pQuery)? blockInfo.window.ekey + step : blockInfo.window.skey + step;
H
Haojun Liao 已提交
4757 4758 4759
      continue;
    }

4760 4761
    summary->totalRows += blockInfo.rows;
    stableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, pDataBlock, binarySearchForKey);
H
Haojun Liao 已提交
4762

H
Haojun Liao 已提交
4763 4764 4765 4766
    qDebug("QInfo:%p check data block completed, uid:%"PRId64", tid:%d, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, "
           "lastKey:%" PRId64,
           pQInfo, blockInfo.uid, blockInfo.tid, blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows,
           pQuery->current->lastKey);
4767
  }
4768

H
Haojun Liao 已提交
4769 4770 4771 4772
  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }

H
Haojun Liao 已提交
4773 4774
  updateWindowResNumOfRes(pRuntimeEnv);

H
hjxilinx 已提交
4775 4776
  int64_t et = taosGetTimestampMs();
  return et - st;
4777 4778
}

4779 4780
static bool multiTableMultioutputHelper(SQInfo *pQInfo, int32_t index) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4781
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4782

4783
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
H
Haojun Liao 已提交
4784
  SArray *group = GET_TABLEGROUP(pQInfo, 0);
4785
  STableQueryInfo* pCheckInfo = taosArrayGetP(group, index);
4786

H
Haojun Liao 已提交
4787 4788 4789
  if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
    setTagVal(pRuntimeEnv, pCheckInfo->pTable, pQInfo->tsdb);
  }
4790

H
Haojun Liao 已提交
4791
  STableId* id = TSDB_TABLEID(pCheckInfo->pTable);
4792
  qDebug("QInfo:%p query on (%d): uid:%" PRIu64 ", tid:%d, qrange:%" PRId64 "-%" PRId64, pQInfo, index,
H
Haojun Liao 已提交
4793
         id->uid, id->tid, pCheckInfo->lastKey, pCheckInfo->win.ekey);
4794

4795
  STsdbQueryCond cond = {
4796
      .twindow   = {pCheckInfo->lastKey, pCheckInfo->win.ekey},
H
hjxilinx 已提交
4797 4798
      .order     = pQuery->order.order,
      .colList   = pQuery->colList,
4799
      .numOfCols = pQuery->numOfCols,
4800
  };
4801

H
hjxilinx 已提交
4802
  // todo refactor
4803
  SArray *g1 = taosArrayInit(1, POINTER_BYTES);
H
Haojun Liao 已提交
4804 4805 4806 4807
  SArray *tx = taosArrayInit(1, sizeof(STableKeyInfo));

  STableKeyInfo info = {.pTable = pCheckInfo->pTable, .lastKey = pCheckInfo->lastKey};
  taosArrayPush(tx, &info);
4808

4809
  taosArrayPush(g1, &tx);
4810
  STableGroupInfo gp = {.numOfTables = 1, .pGroupList = g1};
4811

4812
  // include only current table
4813 4814 4815 4816
  if (pRuntimeEnv->pQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
    pRuntimeEnv->pQueryHandle = NULL;
  }
4817

H
Haojun Liao 已提交
4818
  pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo);
4819 4820
  taosArrayDestroy(tx);
  taosArrayDestroy(g1);
B
Bomin Zhang 已提交
4821 4822 4823
  if (pRuntimeEnv->pQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
4824

4825
  if (pRuntimeEnv->pTSBuf != NULL) {
H
Haojun Liao 已提交
4826 4827
      tVariant* pTag = &pRuntimeEnv->pCtx[0].tag;

4828
    if (pRuntimeEnv->cur.vgroupIndex == -1) {
H
Haojun Liao 已提交
4829
      STSElem elem = tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, pQInfo->vgId, pTag);
H
Haojun Liao 已提交
4830
      // failed to find data with the specified tag value and vnodeId
4831
      if (elem.vnode < 0) {
H
Haojun Liao 已提交
4832 4833 4834 4835 4836 4837
        if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
          qError("QInfo:%p failed to find tag:%s in ts_comp", pQInfo, pTag->pz);
        } else {
          qError("QInfo:%p failed to find tag:%"PRId64" in ts_comp", pQInfo, pTag->i64Key);
        }

4838
        return false;
H
Haojun Liao 已提交
4839 4840
      } else {
        STSCursor cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);
H
Haojun Liao 已提交
4841 4842 4843 4844 4845 4846 4847 4848

        if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
          qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz,
                 cur.blockIndex, cur.tsIndex);
        } else {
          qDebug("QInfo:%p find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key,
                 cur.blockIndex, cur.tsIndex);
        }
4849 4850
      }
    } else {
H
Haojun Liao 已提交
4851
      STSElem elem = tsBufGetElem(pRuntimeEnv->pTSBuf);
H
Haojun Liao 已提交
4852
      if (tVariantCompare(elem.tag, &pRuntimeEnv->pCtx[0].tag) != 0) {
H
Haojun Liao 已提交
4853

H
Haojun Liao 已提交
4854
        STSElem elem1 = tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, pQInfo->vgId, pTag);
H
Haojun Liao 已提交
4855
        // failed to find data with the specified tag value and vnodeId
H
Haojun Liao 已提交
4856
        if (elem1.vnode < 0) {
H
Haojun Liao 已提交
4857 4858 4859 4860 4861
          if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
            qError("QInfo:%p failed to find tag:%s in ts_comp", pQInfo, pTag->pz);
          } else {
            qError("QInfo:%p failed to find tag:%"PRId64" in ts_comp", pQInfo, pTag->i64Key);
          }
H
Haojun Liao 已提交
4862

H
Haojun Liao 已提交
4863
          return false;
H
Haojun Liao 已提交
4864 4865
        } else {
          STSCursor cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);
H
Haojun Liao 已提交
4866 4867 4868 4869 4870
          if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
            qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, cur.blockIndex, cur.tsIndex);
          } else {
            qDebug("QInfo:%p find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key, cur.blockIndex, cur.tsIndex);
          }
H
Haojun Liao 已提交
4871
        }
H
Haojun Liao 已提交
4872

H
Haojun Liao 已提交
4873 4874
      } else {
        tsBufSetCursor(pRuntimeEnv->pTSBuf, &pRuntimeEnv->cur);
H
Haojun Liao 已提交
4875
        STSCursor cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);
H
Haojun Liao 已提交
4876 4877 4878 4879 4880
        if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
          qDebug("QInfo:%p continue scan ts_comp file, tag:%s blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, cur.blockIndex, cur.tsIndex);
        } else {
          qDebug("QInfo:%p continue scan ts_comp file, tag:%"PRId64" blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key, cur.blockIndex, cur.tsIndex);
        }
H
Haojun Liao 已提交
4881
      }
4882 4883
    }
  }
4884

4885
  initCtxOutputBuf(pRuntimeEnv);
4886 4887 4888 4889 4890 4891 4892 4893 4894 4895
  return true;
}

/**
 * super table query handler
 * 1. super table projection query, group-by on normal columns query, ts-comp query
 * 2. point interpolation query, last row query
 *
 * @param pQInfo
 */
4896
static void sequentialTableProcess(SQInfo *pQInfo) {
4897
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4898
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4899
  setQueryStatus(pQuery, QUERY_COMPLETED);
4900

H
Haojun Liao 已提交
4901
  size_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
4902

H
Haojun Liao 已提交
4903
  if (isPointInterpoQuery(pQuery) || isFirstLastRowQuery(pQuery)) {
4904 4905
    resetCtxOutputBuf(pRuntimeEnv);
    assert(pQuery->limit.offset == 0 && pQuery->limit.limit != 0);
4906

4907
    while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
4908
      SArray* group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex);
4909

S
TD-1057  
Shengliang Guan 已提交
4910
      qDebug("QInfo:%p last_row query on group:%d, total group:%" PRIzu ", current group:%p", pQInfo, pQInfo->groupIndex,
dengyihao's avatar
dengyihao 已提交
4911
             numOfGroups, group);
H
Haojun Liao 已提交
4912 4913 4914 4915 4916 4917 4918

      STsdbQueryCond cond = {
          .colList = pQuery->colList,
          .order   = pQuery->order.order,
          .numOfCols = pQuery->numOfCols,
      };

S
TD-1057  
Shengliang Guan 已提交
4919 4920
      TIME_WINDOW_COPY(cond.twindow, pQuery->window);

H
Haojun Liao 已提交
4921 4922 4923
      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);
4924

H
Haojun Liao 已提交
4925 4926 4927 4928 4929 4930 4931
      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }
4932

4933
      if (isFirstLastRowQuery(pQuery)) {
H
Haojun Liao 已提交
4934
        assert(0);  // last_row query switch to other routine to handle
H
Haojun Liao 已提交
4935
      } else {
H
Haojun Liao 已提交
4936
        pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(pQInfo->tsdb, &cond, &gp, pQInfo);
4937
      }
B
Bomin Zhang 已提交
4938 4939 4940 4941 4942 4943

      taosArrayDestroy(tx);
      taosArrayDestroy(g1);
      if (pRuntimeEnv->pQueryHandle == NULL) {
        longjmp(pRuntimeEnv->env, terrno);
      }
H
Haojun Liao 已提交
4944

H
Haojun Liao 已提交
4945
      initCtxOutputBuf(pRuntimeEnv);
4946

4947
      SArray* s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle);
4948
      assert(taosArrayGetSize(s) >= 1);
4949

4950
      setTagVal(pRuntimeEnv, taosArrayGetP(s, 0), pQInfo->tsdb);
4951 4952 4953
      if (isFirstLastRowQuery(pQuery)) {
        assert(taosArrayGetSize(s) == 1);
      }
H
Haojun Liao 已提交
4954

dengyihao's avatar
dengyihao 已提交
4955
      taosArrayDestroy(s);
H
Haojun Liao 已提交
4956

H
Haojun Liao 已提交
4957
      // here we simply set the first table as current table
4958 4959 4960
      SArray* first = GET_TABLEGROUP(pQInfo, pQInfo->groupIndex);
      pQuery->current = taosArrayGetP(first, 0);

4961
      scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
4962

H
Haojun Liao 已提交
4963 4964 4965 4966 4967
      int64_t numOfRes = getNumOfResult(pRuntimeEnv);
      if (numOfRes > 0) {
        pQuery->rec.rows += numOfRes;
        forwardCtxOutputBuf(pRuntimeEnv, numOfRes);
      }
4968

H
Haojun Liao 已提交
4969 4970 4971 4972 4973
      skipResults(pRuntimeEnv);
      pQInfo->groupIndex += 1;

      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
4974 4975 4976 4977 4978 4979

      if (pQuery->rec.rows >= pQuery->rec.capacity) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
        break;
      }
    }
H
Haojun Liao 已提交
4980
  } else if (pRuntimeEnv->groupbyNormalCol) { // group-by on normal columns query
4981
    while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
4982
      SArray* group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex);
4983

S
TD-1057  
Shengliang Guan 已提交
4984
      qDebug("QInfo:%p group by normal columns group:%d, total group:%" PRIzu "", pQInfo, pQInfo->groupIndex, numOfGroups);
4985 4986 4987 4988 4989 4990 4991

      STsdbQueryCond cond = {
          .colList = pQuery->colList,
          .order   = pQuery->order.order,
          .numOfCols = pQuery->numOfCols,
      };

S
TD-1057  
Shengliang Guan 已提交
4992 4993
      TIME_WINDOW_COPY(cond.twindow, pQuery->window);

4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005
      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);

      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }

H
Haojun Liao 已提交
5006
      // no need to update the lastkey for each table
5007
      pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo);
H
Haojun Liao 已提交
5008

B
Bomin Zhang 已提交
5009 5010
      taosArrayDestroy(g1);
      taosArrayDestroy(tx);
B
Bomin Zhang 已提交
5011 5012 5013
      if (pRuntimeEnv->pQueryHandle == NULL) {
        longjmp(pRuntimeEnv->env, terrno);
      }
5014

5015
      SArray* s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle);
5016 5017
      assert(taosArrayGetSize(s) >= 1);

5018
      setTagVal(pRuntimeEnv, taosArrayGetP(s, 0), pQInfo->tsdb);
5019 5020 5021 5022 5023 5024 5025 5026

      // here we simply set the first table as current table
      scanMultiTableDataBlocks(pQInfo);
      pQInfo->groupIndex += 1;

      SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;

        // no results generated for current group, continue to try the next group
5027
      taosArrayDestroy(s);
5028 5029 5030 5031 5032
      if (pWindowResInfo->size <= 0) {
        continue;
      }

      for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
5033
        pWindowResInfo->pResult[i].closed = true; // enable return all results for group by normal columns
5034 5035 5036

        SWindowResult *pResult = &pWindowResInfo->pResult[i];
        for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
5037
          pResult->numOfRows = (uint16_t)(MAX(pResult->numOfRows, pResult->resultInfo[j].numOfRes));
5038 5039 5040
        }
      }

5041
      qDebug("QInfo:%p generated groupby columns results %d rows for group %d completed", pQInfo, pWindowResInfo->size,
5042 5043 5044 5045 5046 5047 5048
          pQInfo->groupIndex);
      int32_t currentGroupIndex = pQInfo->groupIndex;

      pQuery->rec.rows = 0;
      pQInfo->groupIndex = 0;

      ensureOutputBufferSimple(pRuntimeEnv, pWindowResInfo->size);
5049
      copyFromWindowResToSData(pQInfo, pWindowResInfo);
5050 5051 5052 5053 5054 5055

      pQInfo->groupIndex = currentGroupIndex;  //restore the group index
      assert(pQuery->rec.rows == pWindowResInfo->size);

      clearClosedTimeWindow(pRuntimeEnv);
      break;
5056 5057 5058
    }
  } else {
    /*
5059
     * 1. super table projection query, 2. ts-comp query
5060 5061 5062
     * if the subgroup index is larger than 0, results generated by group by tbname,k is existed.
     * we need to return it to client in the first place.
     */
5063
    if (pQInfo->groupIndex > 0) {
5064
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5065
      pQuery->rec.total += pQuery->rec.rows;
5066

5067
      if (pQuery->rec.rows > 0) {
5068 5069 5070
        return;
      }
    }
5071

5072
    // all data have returned already
5073
    if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
5074 5075
      return;
    }
5076

5077 5078
    resetCtxOutputBuf(pRuntimeEnv);
    resetTimeWindowInfo(pRuntimeEnv, &pRuntimeEnv->windowResInfo);
5079

H
Haojun Liao 已提交
5080
    SArray *group = GET_TABLEGROUP(pQInfo, 0);
5081 5082
    assert(taosArrayGetSize(group) == pQInfo->tableqinfoGroupInfo.numOfTables &&
           1 == taosArrayGetSize(pQInfo->tableqinfoGroupInfo.pGroupList));
5083

5084
    while (pQInfo->tableIndex < pQInfo->tableqinfoGroupInfo.numOfTables) {
H
Haojun Liao 已提交
5085
      if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
5086
        longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
5087
      }
5088

5089
      pQuery->current = taosArrayGetP(group, pQInfo->tableIndex);
5090
      if (!multiTableMultioutputHelper(pQInfo, pQInfo->tableIndex)) {
5091
        pQInfo->tableIndex++;
5092 5093
        continue;
      }
5094

H
hjxilinx 已提交
5095
      // TODO handle the limit offset problem
5096
      if (pQuery->numOfFilterCols == 0 && pQuery->limit.offset > 0) {
5097 5098
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
          pQInfo->tableIndex++;
5099 5100 5101
          continue;
        }
      }
5102

5103
      scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
5104
      skipResults(pRuntimeEnv);
5105

5106
      // the limitation of output result is reached, set the query completed
5107
      if (limitResults(pRuntimeEnv)) {
H
Haojun Liao 已提交
5108
        SET_STABLE_QUERY_OVER(pQInfo);
5109 5110
        break;
      }
5111

5112 5113
      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
5114

5115
      if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
5116 5117 5118 5119 5120 5121
        /*
         * query range is identical in terms of all meters involved in query,
         * so we need to restore them at the *beginning* of query on each meter,
         * not the consecutive query on meter on which is aborted due to buffer limitation
         * to ensure that, we can reset the query range once query on a meter is completed.
         */
5122
        pQInfo->tableIndex++;
weixin_48148422's avatar
weixin_48148422 已提交
5123

H
Haojun Liao 已提交
5124
        STableIdInfo tidInfo = {0};
5125

H
Haojun Liao 已提交
5126 5127 5128
        STableId* id = TSDB_TABLEID(pQuery->current->pTable);
        tidInfo.uid = id->uid;
        tidInfo.tid = id->tid;
weixin_48148422's avatar
weixin_48148422 已提交
5129
        tidInfo.key = pQuery->current->lastKey;
weixin_48148422's avatar
weixin_48148422 已提交
5130 5131
        taosArrayPush(pQInfo->arrTableIdInfo, &tidInfo);

5132
        // if the buffer is full or group by each table, we need to jump out of the loop
H
Haojun Liao 已提交
5133
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
5134 5135
          break;
        }
5136

H
Haojun Liao 已提交
5137 5138 5139 5140
        if (pRuntimeEnv->pTSBuf != NULL) {
          pRuntimeEnv->cur = pRuntimeEnv->pTSBuf->cur;
        }

5141
      } else {
5142
        // all data in the result buffer are skipped due to the offset, continue to retrieve data from current meter
5143 5144
        if (pQuery->rec.rows == 0) {
          assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
5145 5146
          continue;
        } else {
5147 5148 5149
          // buffer is full, wait for the next round to retrieve data from current meter
          assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
          break;
5150 5151 5152
        }
      }
    }
H
Haojun Liao 已提交
5153

5154
    if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
H
Haojun Liao 已提交
5155 5156
      setQueryStatus(pQuery, QUERY_COMPLETED);
    }
5157
  }
5158

5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170
  /*
   * 1. super table projection query, group-by on normal columns query, ts-comp query
   * 2. point interpolation query, last row query
   *
   * group-by on normal columns query and last_row query do NOT invoke the finalizer here,
   * since the finalize stage will be done at the client side.
   *
   * projection query, point interpolation query do not need the finalizer.
   *
   * Only the ts-comp query requires the finalizer function to be executed here.
   */
  if (isTSCompQuery(pQuery)) {
H
hjxilinx 已提交
5171
    finalizeQueryResult(pRuntimeEnv);
5172
  }
5173

5174 5175 5176
  if (pRuntimeEnv->pTSBuf != NULL) {
    pRuntimeEnv->cur = pRuntimeEnv->pTSBuf->cur;
  }
5177

5178
  qDebug(
S
TD-1530  
Shengliang Guan 已提交
5179 5180
      "QInfo %p numOfTables:%" PRIu64 ", index:%d, numOfGroups:%" PRIzu ", %" PRId64 " points returned, total:%" PRId64 ", offset:%" PRId64,
      pQInfo, (uint64_t)pQInfo->tableqinfoGroupInfo.numOfTables, pQInfo->tableIndex, numOfGroups, pQuery->rec.rows, pQuery->rec.total,
5181
      pQuery->limit.offset);
5182 5183
}

5184 5185 5186 5187
static void doSaveContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

5188 5189 5190
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
  SWITCH_ORDER(pQuery->order.order);
5191

5192
  if (pRuntimeEnv->pTSBuf != NULL) {
H
Haojun Liao 已提交
5193
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
5194
  }
5195

5196 5197 5198 5199 5200
  STsdbQueryCond cond = {
      .order   = pQuery->order.order,
      .colList = pQuery->colList,
      .numOfCols = pQuery->numOfCols,
  };
H
Haojun Liao 已提交
5201

S
TD-1057  
Shengliang Guan 已提交
5202 5203
  TIME_WINDOW_COPY(cond.twindow, pQuery->window);

5204 5205 5206 5207
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
H
Haojun Liao 已提交
5208

H
Haojun Liao 已提交
5209 5210 5211 5212 5213
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
  disableFuncInReverseScan(pQInfo);
  setupQueryRangeForReverseScan(pQInfo);

H
Haojun Liao 已提交
5214
  pRuntimeEnv->prevGroupId = INT32_MIN;
5215
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
B
Bomin Zhang 已提交
5216 5217 5218
  if (pRuntimeEnv->pSecQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
H
hjxilinx 已提交
5219 5220
}

5221 5222 5223 5224
static void doRestoreContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

H
hjxilinx 已提交
5225
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
5226

5227
  if (pRuntimeEnv->pTSBuf != NULL) {
5228
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
5229
  }
5230

5231
  switchCtxOrder(pRuntimeEnv);
5232 5233 5234
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
}

5235 5236 5237
static void doCloseAllTimeWindowAfterScan(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
Haojun Liao 已提交
5238
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
H
Haojun Liao 已提交
5239
    size_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
5240
    for (int32_t i = 0; i < numOfGroup; ++i) {
H
Haojun Liao 已提交
5241
      SArray *group = GET_TABLEGROUP(pQInfo, i);
5242

5243
      size_t num = taosArrayGetSize(group);
5244
      for (int32_t j = 0; j < num; ++j) {
5245 5246
        STableQueryInfo* item = taosArrayGetP(group, j);
        closeAllTimeWindow(&item->windowResInfo);
5247
      }
H
hjxilinx 已提交
5248 5249 5250 5251 5252 5253 5254
    }
  } else {  // close results for group result
    closeAllTimeWindow(&pQInfo->runtimeEnv.windowResInfo);
  }
}

static void multiTableQueryProcess(SQInfo *pQInfo) {
5255 5256 5257
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

5258
  if (pQInfo->groupIndex > 0) {
5259
    /*
5260
     * if the groupIndex > 0, the query process must be completed yet, we only need to
5261 5262
     * copy the data into output buffer
     */
H
Haojun Liao 已提交
5263
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
5264 5265
      copyResToQueryResultBuf(pQInfo, pQuery);
#ifdef _DEBUG_VIEW
5266
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
5267 5268
#endif
    } else {
5269
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5270
    }
5271

5272
    qDebug("QInfo:%p current:%"PRId64", total:%"PRId64"", pQInfo, pQuery->rec.rows, pQuery->rec.total);
5273 5274
    return;
  }
5275

5276
  qDebug("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", order:%d, forward scan start", pQInfo,
5277 5278
         pQuery->window.skey, pQuery->window.ekey, pQuery->order.order);

H
hjxilinx 已提交
5279
  // do check all qualified data blocks
H
Haojun Liao 已提交
5280
  int64_t el = scanMultiTableDataBlocks(pQInfo);
5281
  qDebug("QInfo:%p master scan completed, elapsed time: %" PRId64 "ms, reverse scan start", pQInfo, el);
5282

H
hjxilinx 已提交
5283
  // query error occurred or query is killed, abort current execution
H
Haojun Liao 已提交
5284
  if (pQInfo->code != TSDB_CODE_SUCCESS || IS_QUERY_KILLED(pQInfo)) {
5285
    qDebug("QInfo:%p query killed or error occurred, code:%s, abort", pQInfo, tstrerror(pQInfo->code));
H
Haojun Liao 已提交
5286
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
5287
  }
5288

H
hjxilinx 已提交
5289 5290
  // close all time window results
  doCloseAllTimeWindowAfterScan(pQInfo);
5291

H
hjxilinx 已提交
5292 5293
  if (needReverseScan(pQuery)) {
    doSaveContext(pQInfo);
5294

H
Haojun Liao 已提交
5295
    el = scanMultiTableDataBlocks(pQInfo);
5296
    qDebug("QInfo:%p reversed scan completed, elapsed time: %" PRId64 "ms", pQInfo, el);
5297

H
Haojun Liao 已提交
5298
    doRestoreContext(pQInfo);
H
hjxilinx 已提交
5299
  } else {
5300
    qDebug("QInfo:%p no need to do reversed scan, query completed", pQInfo);
5301
  }
5302

5303
  setQueryStatus(pQuery, QUERY_COMPLETED);
5304

H
Haojun Liao 已提交
5305
  if (pQInfo->code != TSDB_CODE_SUCCESS || IS_QUERY_KILLED(pQInfo)) {
5306
    qDebug("QInfo:%p query killed or error occurred, code:%s, abort", pQInfo, tstrerror(pQInfo->code));
H
Haojun Liao 已提交
5307 5308
    //TODO finalizeQueryResult may cause SEGSEV, since the memory may not allocated yet, add a cleanup function instead
//    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
H
Haojun Liao 已提交
5309
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
H
hjxilinx 已提交
5310
  }
5311

H
Haojun Liao 已提交
5312
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || isSumAvgRateQuery(pQuery)) {
5313
    if (mergeIntoGroupResult(pQInfo) == TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
5314
      copyResToQueryResultBuf(pQInfo, pQuery);
5315 5316

#ifdef _DEBUG_VIEW
5317
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
5318 5319 5320
#endif
    }
  } else {  // not a interval query
5321
    copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5322
  }
5323

5324
  // handle the limitation of output buffer
5325
  qDebug("QInfo:%p points returned:%" PRId64 ", total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
5326 5327 5328 5329 5330 5331 5332 5333
}

/*
 * in each query, this function will be called only once, no retry for further result.
 *
 * select count(*)/top(field,k)/avg(field name) from table_name [where ts>now-1a];
 * select count(*) from table_name group by status_column;
 */
H
hjxilinx 已提交
5334
static void tableFixedOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
5335
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
5336

H
hjxilinx 已提交
5337
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
5338
  if (!pRuntimeEnv->topBotQuery && pQuery->limit.offset > 0) {  // no need to execute, since the output will be ignore.
H
Haojun Liao 已提交
5339 5340
    return;
  }
5341

H
hjxilinx 已提交
5342
  pQuery->current = pTableInfo;  // set current query table info
5343

5344
  scanOneTableDataBlocks(pRuntimeEnv, pTableInfo->lastKey);
H
hjxilinx 已提交
5345
  finalizeQueryResult(pRuntimeEnv);
5346

H
Haojun Liao 已提交
5347
  if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
5348
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
5349
  }
5350

H
Haojun Liao 已提交
5351
  // since the numOfRows must be identical for all sql functions that are allowed to be executed simutaneously.
5352
  pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
5353

5354
  skipResults(pRuntimeEnv);
5355
  limitResults(pRuntimeEnv);
5356 5357
}

H
hjxilinx 已提交
5358
static void tableMultiOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
5359
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
5360

H
hjxilinx 已提交
5361 5362
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
5363

5364 5365 5366 5367
  // for ts_comp query, re-initialized is not allowed
  if (!isTSCompQuery(pQuery)) {
    resetCtxOutputBuf(pRuntimeEnv);
  }
5368

5369 5370 5371 5372 5373 5374
  // skip blocks without load the actual data block from file if no filter condition present
  skipBlocks(&pQInfo->runtimeEnv);
  if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return;
  }
5375 5376

  while (1) {
5377
    scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
H
hjxilinx 已提交
5378
    finalizeQueryResult(pRuntimeEnv);
5379

5380 5381
    pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
    if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols > 0 && pQuery->rec.rows > 0) {
5382
      skipResults(pRuntimeEnv);
5383 5384 5385
    }

    /*
H
hjxilinx 已提交
5386 5387
     * 1. if pQuery->size == 0, pQuery->limit.offset >= 0, still need to check data
     * 2. if pQuery->size > 0, pQuery->limit.offset must be 0
5388
     */
5389
    if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
5390 5391 5392
      break;
    }

5393
    qDebug("QInfo:%p skip current result, offset:%" PRId64 ", next qrange:%" PRId64 "-%" PRId64,
B
Bomin Zhang 已提交
5394
           pQInfo, pQuery->limit.offset, pQuery->current->lastKey, pQuery->current->win.ekey);
5395 5396 5397 5398

    resetCtxOutputBuf(pRuntimeEnv);
  }

5399
  limitResults(pRuntimeEnv);
5400
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
5401
    qDebug("QInfo:%p query paused due to output limitation, next qrange:%" PRId64 "-%" PRId64, pQInfo,
H
hjxilinx 已提交
5402
        pQuery->current->lastKey, pQuery->window.ekey);
weixin_48148422's avatar
weixin_48148422 已提交
5403 5404
  } else if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
    STableIdInfo tidInfo;
B
Bomin Zhang 已提交
5405
    STableId* id = TSDB_TABLEID(pQuery->current->pTable);
5406

H
Haojun Liao 已提交
5407 5408
    tidInfo.uid = id->uid;
    tidInfo.tid = id->tid;
weixin_48148422's avatar
weixin_48148422 已提交
5409 5410
    tidInfo.key = pQuery->current->lastKey;
    taosArrayPush(pQInfo->arrTableIdInfo, &tidInfo);
5411 5412
  }

5413 5414 5415
  if (!isTSCompQuery(pQuery)) {
    assert(pQuery->rec.rows <= pQuery->rec.capacity);
  }
5416 5417
}

H
Haojun Liao 已提交
5418
static void tableIntervalProcessImpl(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
5419
  SQuery *pQuery = pRuntimeEnv->pQuery;
5420

5421
  while (1) {
5422
    scanOneTableDataBlocks(pRuntimeEnv, start);
5423

5424
    assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_NOT_COMPLETED));
H
hjxilinx 已提交
5425
    finalizeQueryResult(pRuntimeEnv);
5426

5427 5428 5429
    // here we can ignore the records in case of no interpolation
    // todo handle offset, in case of top/bottom interval query
    if ((pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL) && pQuery->limit.offset > 0 &&
5430
        pQuery->fillType == TSDB_FILL_NONE) {
5431 5432
      // maxOutput <= 0, means current query does not generate any results
      int32_t numOfClosed = numOfClosedTimeWindow(&pRuntimeEnv->windowResInfo);
5433

S
TD-1057  
Shengliang Guan 已提交
5434
      int32_t c = (int32_t)(MIN(numOfClosed, pQuery->limit.offset));
5435 5436 5437
      clearFirstNTimeWindow(pRuntimeEnv, c);
      pQuery->limit.offset -= c;
    }
5438

5439
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED | QUERY_RESBUF_FULL)) {
5440 5441 5442 5443 5444
      break;
    }
  }
}

5445
// handle time interval query on table
H
hjxilinx 已提交
5446
static void tableIntervalProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
5447 5448
  SQueryRuntimeEnv *pRuntimeEnv = &(pQInfo->runtimeEnv);

H
hjxilinx 已提交
5449 5450
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
5451

H
Haojun Liao 已提交
5452
  int32_t numOfFilled = 0;
H
Haojun Liao 已提交
5453
  TSKEY newStartKey = TSKEY_INITIAL_VAL;
5454

5455
  // skip blocks without load the actual data block from file if no filter condition present
H
Haojun Liao 已提交
5456
  skipTimeInterval(pRuntimeEnv, &newStartKey);
5457
  if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0 && pRuntimeEnv->pFillInfo == NULL) {
5458 5459 5460 5461
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return;
  }

5462
  while (1) {
H
Haojun Liao 已提交
5463
    tableIntervalProcessImpl(pRuntimeEnv, newStartKey);
5464

H
Haojun Liao 已提交
5465
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
5466
      pQInfo->groupIndex = 0;  // always start from 0
5467
      pQuery->rec.rows = 0;
5468
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5469

5470
      clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
5471
    }
5472

5473
    // the offset is handled at prepare stage if no interpolation involved
5474
    if (pQuery->fillType == TSDB_FILL_NONE || pQuery->rec.rows == 0) {
5475
      limitResults(pRuntimeEnv);
5476 5477
      break;
    } else {
S
TD-1057  
Shengliang Guan 已提交
5478
      taosFillSetStartInfo(pRuntimeEnv->pFillInfo, (int32_t)pQuery->rec.rows, pQuery->window.ekey);
5479
      taosFillCopyInputDataFromFilePage(pRuntimeEnv->pFillInfo, (tFilePage**) pQuery->sdata);
H
Haojun Liao 已提交
5480
      numOfFilled = 0;
5481

H
Haojun Liao 已提交
5482
      pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, &numOfFilled);
5483
      if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
5484
        limitResults(pRuntimeEnv);
5485 5486
        break;
      }
5487

5488
      // no result generated yet, continue retrieve data
5489
      pQuery->rec.rows = 0;
5490 5491
    }
  }
5492

5493
  // all data scanned, the group by normal column can return
H
Haojun Liao 已提交
5494
  if (pRuntimeEnv->groupbyNormalCol) {  // todo refactor with merge interval time result
5495
    pQInfo->groupIndex = 0;
5496
    pQuery->rec.rows = 0;
5497
    copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5498
    clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
5499 5500 5501
  }
}

5502 5503 5504 5505
static void tableQueryImpl(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

H
Haojun Liao 已提交
5506
  if (queryHasRemainResForTableQuery(pRuntimeEnv)) {
5507

H
Haojun Liao 已提交
5508 5509 5510 5511 5512 5513 5514 5515 5516 5517 5518 5519
    if (pQuery->fillType != TSDB_FILL_NONE) {
      /*
       * There are remain results that are not returned due to result interpolation
       * So, we do keep in this procedure instead of launching retrieve procedure for next results.
       */
      int32_t numOfFilled = 0;
      pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, &numOfFilled);

      if (pQuery->rec.rows > 0) {
        limitResults(pRuntimeEnv);
      }

H
Haojun Liao 已提交
5520
      qDebug("QInfo:%p current:%" PRId64 " returned, total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total);
5521
      return;
H
Haojun Liao 已提交
5522
    } else {
5523
      pQuery->rec.rows = 0;
5524
      pQInfo->groupIndex = 0;  // always start from 0
5525

5526
      if (pRuntimeEnv->windowResInfo.size > 0) {
5527
        copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5528
        clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
5529

5530
        if (pQuery->rec.rows > 0) {
5531
          qDebug("QInfo:%p %"PRId64" rows returned from group results, total:%"PRId64"", pQInfo, pQuery->rec.rows, pQuery->rec.total);
H
Haojun Liao 已提交
5532 5533 5534

          // there are not data remains
          if (pRuntimeEnv->windowResInfo.size <= 0) {
H
Haojun Liao 已提交
5535
            qDebug("QInfo:%p query over, %"PRId64" rows are returned", pQInfo, pQuery->rec.total);
H
Haojun Liao 已提交
5536 5537
          }

5538 5539 5540 5541 5542
          return;
        }
      }
    }
  }
5543

H
hjxilinx 已提交
5544
  // number of points returned during this query
5545
  pQuery->rec.rows = 0;
5546
  int64_t st = taosGetTimestampUs();
5547

5548
  assert(pQInfo->tableqinfoGroupInfo.numOfTables == 1);
H
Haojun Liao 已提交
5549
  SArray* g = GET_TABLEGROUP(pQInfo, 0);
5550
  STableQueryInfo* item = taosArrayGetP(g, 0);
5551

5552
  // group by normal column, sliding window query, interval query are handled by interval query processor
H
Haojun Liao 已提交
5553
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyNormalCol) {  // interval (down sampling operation)
5554
    tableIntervalProcess(pQInfo, item);
H
Haojun Liao 已提交
5555
  } else if (isFixedOutputQuery(pRuntimeEnv)) {
5556
    tableFixedOutputProcess(pQInfo, item);
5557 5558
  } else {  // diff/add/multiply/subtract/division
    assert(pQuery->checkBuffer == 1);
5559
    tableMultiOutputProcess(pQInfo, item);
5560
  }
5561

5562
  // record the total elapsed time
5563
  pRuntimeEnv->summary.elapsedTime += (taosGetTimestampUs() - st);
5564
  assert(pQInfo->tableqinfoGroupInfo.numOfTables == 1);
5565 5566
}

5567
static void stableQueryImpl(SQInfo *pQInfo) {
H
Haojun Liao 已提交
5568 5569
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *pQuery = pRuntimeEnv->pQuery;
5570
  pQuery->rec.rows = 0;
5571

5572
  int64_t st = taosGetTimestampUs();
5573

H
Haojun Liao 已提交
5574
  if (QUERY_IS_INTERVAL_QUERY(pQuery) ||
5575
      (isFixedOutputQuery(pRuntimeEnv) && (!isPointInterpoQuery(pQuery)) && (!pRuntimeEnv->groupbyNormalCol))) {
H
hjxilinx 已提交
5576
    multiTableQueryProcess(pQInfo);
5577
  } else {
5578
    assert((pQuery->checkBuffer == 1 && pQuery->interval.interval == 0) || isPointInterpoQuery(pQuery) ||
H
Haojun Liao 已提交
5579
            isFirstLastRowQuery(pQuery) || pRuntimeEnv->groupbyNormalCol);
5580

5581
    sequentialTableProcess(pQInfo);
5582
  }
5583

H
hjxilinx 已提交
5584
  // record the total elapsed time
5585
  pQInfo->runtimeEnv.summary.elapsedTime += (taosGetTimestampUs() - st);
H
hjxilinx 已提交
5586 5587
}

5588
static int32_t getColumnIndexInSource(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
5589
  int32_t j = 0;
5590

5591
  if (TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) {
H
Haojun Liao 已提交
5592
    if (pExprMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
5593
      return TSDB_TBNAME_COLUMN_INDEX;
H
Haojun Liao 已提交
5594 5595
    }

5596 5597 5598 5599
    while(j < pQueryMsg->numOfTags) {
      if (pExprMsg->colInfo.colId == pTagCols[j].colId) {
        return j;
      }
5600

5601 5602
      j += 1;
    }
5603

Y
TD-1230  
yihaoDeng 已提交
5604
  } else if (TSDB_COL_IS_UD_COL(pExprMsg->colInfo.flag)) {  // user specified column data
H
Haojun Liao 已提交
5605
    return TSDB_UD_COLUMN_INDEX;
5606 5607 5608 5609 5610
  } else {
    while (j < pQueryMsg->numOfCols) {
      if (pExprMsg->colInfo.colId == pQueryMsg->colList[j].colId) {
        return j;
      }
5611

5612
      j += 1;
5613 5614
    }
  }
5615
  assert(0);
5616
  return -1;
5617 5618
}

5619 5620 5621
bool validateExprColumnInfo(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
  int32_t j = getColumnIndexInSource(pQueryMsg, pExprMsg, pTagCols);
  return j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags;
5622 5623
}

5624
static bool validateQueryMsg(SQueryTableMsg *pQueryMsg) {
5625 5626
  if (pQueryMsg->interval.interval < 0) {
    qError("qmsg:%p illegal value of interval time %" PRId64, pQueryMsg, pQueryMsg->interval.interval);
5627
    return false;
5628 5629
  }

H
hjxilinx 已提交
5630
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
5631
    qError("qmsg:%p illegal value of numOfTables %d", pQueryMsg, pQueryMsg->numOfTables);
5632
    return false;
5633 5634
  }

H
hjxilinx 已提交
5635
  if (pQueryMsg->numOfGroupCols < 0) {
S
slguan 已提交
5636
    qError("qmsg:%p illegal value of numOfGroupbyCols %d", pQueryMsg, pQueryMsg->numOfGroupCols);
5637
    return false;
5638 5639
  }

5640 5641
  if (pQueryMsg->numOfOutput > TSDB_MAX_COLUMNS || pQueryMsg->numOfOutput <= 0) {
    qError("qmsg:%p illegal value of output columns %d", pQueryMsg, pQueryMsg->numOfOutput);
5642
    return false;
5643 5644
  }

5645 5646 5647 5648 5649 5650 5651 5652 5653 5654
  return true;
}

static bool validateQuerySourceCols(SQueryTableMsg *pQueryMsg, SSqlFuncMsg** pExprMsg) {
  int32_t numOfTotal = pQueryMsg->numOfCols + pQueryMsg->numOfTags;
  if (pQueryMsg->numOfCols < 0 || pQueryMsg->numOfTags < 0 || numOfTotal > TSDB_MAX_COLUMNS) {
    qError("qmsg:%p illegal value of numOfCols %d numOfTags:%d", pQueryMsg, pQueryMsg->numOfCols, pQueryMsg->numOfTags);
    return false;
  } else if (numOfTotal == 0) {
    for(int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
H
Haojun Liao 已提交
5655 5656 5657 5658 5659
      SSqlFuncMsg* pFuncMsg = pExprMsg[i];

      if ((pFuncMsg->functionId == TSDB_FUNC_TAGPRJ) ||
          (pFuncMsg->functionId == TSDB_FUNC_TID_TAG && pFuncMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) ||
          (pFuncMsg->functionId == TSDB_FUNC_COUNT && pFuncMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX)) {
5660
        continue;
5661
      }
5662

5663
      return false;
5664 5665
    }
  }
5666

5667
  return true;
5668 5669
}

5670
static char *createTableIdList(SQueryTableMsg *pQueryMsg, char *pMsg, SArray **pTableIdList) {
H
hjxilinx 已提交
5671
  assert(pQueryMsg->numOfTables > 0);
5672

weixin_48148422's avatar
weixin_48148422 已提交
5673
  *pTableIdList = taosArrayInit(pQueryMsg->numOfTables, sizeof(STableIdInfo));
5674

weixin_48148422's avatar
weixin_48148422 已提交
5675 5676
  for (int32_t j = 0; j < pQueryMsg->numOfTables; ++j) {
    STableIdInfo* pTableIdInfo = (STableIdInfo *)pMsg;
5677

5678
    pTableIdInfo->tid = htonl(pTableIdInfo->tid);
H
hjxilinx 已提交
5679 5680
    pTableIdInfo->uid = htobe64(pTableIdInfo->uid);
    pTableIdInfo->key = htobe64(pTableIdInfo->key);
5681

H
hjxilinx 已提交
5682 5683 5684
    taosArrayPush(*pTableIdList, pTableIdInfo);
    pMsg += sizeof(STableIdInfo);
  }
5685

H
hjxilinx 已提交
5686 5687
  return pMsg;
}
5688

5689
/**
H
hjxilinx 已提交
5690
 * pQueryMsg->head has been converted before this function is called.
5691
 *
H
hjxilinx 已提交
5692
 * @param pQueryMsg
5693 5694 5695 5696
 * @param pTableIdList
 * @param pExpr
 * @return
 */
5697
static int32_t convertQueryMsg(SQueryTableMsg *pQueryMsg, SArray **pTableIdList, SSqlFuncMsg ***pExpr,
weixin_48148422's avatar
weixin_48148422 已提交
5698
                               char **tagCond, char** tbnameCond, SColIndex **groupbyCols, SColumnInfo** tagCols) {
5699 5700
  int32_t code = TSDB_CODE_SUCCESS;

5701 5702 5703 5704
  pQueryMsg->numOfTables = htonl(pQueryMsg->numOfTables);

  pQueryMsg->window.skey = htobe64(pQueryMsg->window.skey);
  pQueryMsg->window.ekey = htobe64(pQueryMsg->window.ekey);
5705 5706 5707 5708 5709 5710
  pQueryMsg->interval.interval = htobe64(pQueryMsg->interval.interval);
  pQueryMsg->interval.sliding = htobe64(pQueryMsg->interval.sliding);
  pQueryMsg->interval.offset = htobe64(pQueryMsg->interval.offset);
  pQueryMsg->interval.intervalUnit = pQueryMsg->interval.intervalUnit;
  pQueryMsg->interval.slidingUnit = pQueryMsg->interval.slidingUnit;
  pQueryMsg->interval.offsetUnit = pQueryMsg->interval.offsetUnit;
5711 5712
  pQueryMsg->limit = htobe64(pQueryMsg->limit);
  pQueryMsg->offset = htobe64(pQueryMsg->offset);
H
hjxilinx 已提交
5713

5714 5715
  pQueryMsg->order = htons(pQueryMsg->order);
  pQueryMsg->orderColId = htons(pQueryMsg->orderColId);
H
Haojun Liao 已提交
5716
  pQueryMsg->queryType = htonl(pQueryMsg->queryType);
weixin_48148422's avatar
weixin_48148422 已提交
5717
  pQueryMsg->tagNameRelType = htons(pQueryMsg->tagNameRelType);
5718 5719

  pQueryMsg->numOfCols = htons(pQueryMsg->numOfCols);
5720
  pQueryMsg->numOfOutput = htons(pQueryMsg->numOfOutput);
H
hjxilinx 已提交
5721
  pQueryMsg->numOfGroupCols = htons(pQueryMsg->numOfGroupCols);
5722 5723 5724
  pQueryMsg->tagCondLen = htons(pQueryMsg->tagCondLen);
  pQueryMsg->tsOffset = htonl(pQueryMsg->tsOffset);
  pQueryMsg->tsLen = htonl(pQueryMsg->tsLen);
H
hjxilinx 已提交
5725
  pQueryMsg->tsNumOfBlocks = htonl(pQueryMsg->tsNumOfBlocks);
5726
  pQueryMsg->tsOrder = htonl(pQueryMsg->tsOrder);
5727
  pQueryMsg->numOfTags = htonl(pQueryMsg->numOfTags);
5728

5729
  // query msg safety check
5730
  if (!validateQueryMsg(pQueryMsg)) {
5731 5732
    code = TSDB_CODE_QRY_INVALID_MSG;
    goto _cleanup;
5733 5734
  }

H
hjxilinx 已提交
5735 5736
  char *pMsg = (char *)(pQueryMsg->colList) + sizeof(SColumnInfo) * pQueryMsg->numOfCols;
  for (int32_t col = 0; col < pQueryMsg->numOfCols; ++col) {
5737 5738
    SColumnInfo *pColInfo = &pQueryMsg->colList[col];

H
hjxilinx 已提交
5739
    pColInfo->colId = htons(pColInfo->colId);
5740
    pColInfo->type = htons(pColInfo->type);
H
hjxilinx 已提交
5741 5742
    pColInfo->bytes = htons(pColInfo->bytes);
    pColInfo->numOfFilters = htons(pColInfo->numOfFilters);
5743

H
hjxilinx 已提交
5744
    assert(pColInfo->type >= TSDB_DATA_TYPE_BOOL && pColInfo->type <= TSDB_DATA_TYPE_NCHAR);
5745

H
hjxilinx 已提交
5746
    int32_t numOfFilters = pColInfo->numOfFilters;
5747
    if (numOfFilters > 0) {
H
hjxilinx 已提交
5748
      pColInfo->filters = calloc(numOfFilters, sizeof(SColumnFilterInfo));
H
Haojun Liao 已提交
5749 5750 5751 5752
      if (pColInfo->filters == NULL) {
        code = TSDB_CODE_QRY_OUT_OF_MEMORY;
        goto _cleanup;
      }
5753 5754 5755
    }

    for (int32_t f = 0; f < numOfFilters; ++f) {
5756
      SColumnFilterInfo *pFilterMsg = (SColumnFilterInfo *)pMsg;
5757

5758 5759
      SColumnFilterInfo *pColFilter = &pColInfo->filters[f];
      pColFilter->filterstr = htons(pFilterMsg->filterstr);
5760 5761 5762

      pMsg += sizeof(SColumnFilterInfo);

5763 5764
      if (pColFilter->filterstr) {
        pColFilter->len = htobe64(pFilterMsg->len);
5765

5766
        pColFilter->pz = (int64_t)calloc(1, (size_t)(pColFilter->len + 1 * TSDB_NCHAR_SIZE)); // note: null-terminator
H
Haojun Liao 已提交
5767 5768 5769 5770 5771
        if (pColFilter->pz == 0) {
          code = TSDB_CODE_QRY_OUT_OF_MEMORY;
          goto _cleanup;
        }

5772
        memcpy((void *)pColFilter->pz, pMsg, (size_t)pColFilter->len);
5773
        pMsg += (pColFilter->len + 1);
5774
      } else {
5775 5776
        pColFilter->lowerBndi = htobe64(pFilterMsg->lowerBndi);
        pColFilter->upperBndi = htobe64(pFilterMsg->upperBndi);
5777 5778
      }

5779 5780
      pColFilter->lowerRelOptr = htons(pFilterMsg->lowerRelOptr);
      pColFilter->upperRelOptr = htons(pFilterMsg->upperRelOptr);
5781 5782 5783
    }
  }

5784
  *pExpr = calloc(pQueryMsg->numOfOutput, POINTER_BYTES);
H
Haojun Liao 已提交
5785 5786 5787 5788 5789
  if (*pExpr == NULL) {
    code = TSDB_CODE_QRY_OUT_OF_MEMORY;
    goto _cleanup;
  }

5790
  SSqlFuncMsg *pExprMsg = (SSqlFuncMsg *)pMsg;
5791

5792
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5793
    (*pExpr)[i] = pExprMsg;
5794

5795
    pExprMsg->colInfo.colIndex = htons(pExprMsg->colInfo.colIndex);
5796 5797 5798 5799
    pExprMsg->colInfo.colId = htons(pExprMsg->colInfo.colId);
    pExprMsg->colInfo.flag = htons(pExprMsg->colInfo.flag);
    pExprMsg->functionId = htons(pExprMsg->functionId);
    pExprMsg->numOfParams = htons(pExprMsg->numOfParams);
5800

5801
    pMsg += sizeof(SSqlFuncMsg);
5802 5803

    for (int32_t j = 0; j < pExprMsg->numOfParams; ++j) {
5804
      pExprMsg->arg[j].argType = htons(pExprMsg->arg[j].argType);
5805 5806 5807 5808
      pExprMsg->arg[j].argBytes = htons(pExprMsg->arg[j].argBytes);

      if (pExprMsg->arg[j].argType == TSDB_DATA_TYPE_BINARY) {
        pExprMsg->arg[j].argValue.pz = pMsg;
5809
        pMsg += pExprMsg->arg[j].argBytes;  // one more for the string terminated char.
5810 5811 5812 5813 5814
      } else {
        pExprMsg->arg[j].argValue.i64 = htobe64(pExprMsg->arg[j].argValue.i64);
      }
    }

H
Haojun Liao 已提交
5815 5816
    int16_t functionId = pExprMsg->functionId;
    if (functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ || functionId == TSDB_FUNC_TAG_DUMMY) {
Y
TD-1230  
yihaoDeng 已提交
5817
      if (!TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) {  // ignore the column  index check for arithmetic expression.
5818 5819
        code = TSDB_CODE_QRY_INVALID_MSG;
        goto _cleanup;
5820 5821
      }
    } else {
5822
//      if (!validateExprColumnInfo(pQueryMsg, pExprMsg)) {
5823
//        return TSDB_CODE_QRY_INVALID_MSG;
5824
//      }
5825 5826
    }

5827
    pExprMsg = (SSqlFuncMsg *)pMsg;
5828
  }
5829

5830
  if (!validateQuerySourceCols(pQueryMsg, *pExpr)) {
5831
    code = TSDB_CODE_QRY_INVALID_MSG;
dengyihao's avatar
dengyihao 已提交
5832
    goto _cleanup;
5833
  }
5834

H
hjxilinx 已提交
5835
  pMsg = createTableIdList(pQueryMsg, pMsg, pTableIdList);
5836

H
hjxilinx 已提交
5837
  if (pQueryMsg->numOfGroupCols > 0) {  // group by tag columns
5838
    *groupbyCols = malloc(pQueryMsg->numOfGroupCols * sizeof(SColIndex));
5839 5840 5841 5842
    if (*groupbyCols == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }
5843 5844 5845

    for (int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
      (*groupbyCols)[i].colId = *(int16_t *)pMsg;
5846
      pMsg += sizeof((*groupbyCols)[i].colId);
5847 5848

      (*groupbyCols)[i].colIndex = *(int16_t *)pMsg;
5849 5850
      pMsg += sizeof((*groupbyCols)[i].colIndex);

5851
      (*groupbyCols)[i].flag = *(int16_t *)pMsg;
5852 5853 5854 5855 5856
      pMsg += sizeof((*groupbyCols)[i].flag);

      memcpy((*groupbyCols)[i].name, pMsg, tListLen(groupbyCols[i]->name));
      pMsg += tListLen((*groupbyCols)[i].name);
    }
5857

H
hjxilinx 已提交
5858 5859
    pQueryMsg->orderByIdx = htons(pQueryMsg->orderByIdx);
    pQueryMsg->orderType = htons(pQueryMsg->orderType);
5860 5861
  }

5862 5863
  pQueryMsg->fillType = htons(pQueryMsg->fillType);
  if (pQueryMsg->fillType != TSDB_FILL_NONE) {
5864
    pQueryMsg->fillVal = (uint64_t)(pMsg);
5865 5866

    int64_t *v = (int64_t *)pMsg;
5867
    for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5868 5869
      v[i] = htobe64(v[i]);
    }
5870

5871
    pMsg += sizeof(int64_t) * pQueryMsg->numOfOutput;
5872
  }
5873

5874 5875
  if (pQueryMsg->numOfTags > 0) {
    (*tagCols) = calloc(1, sizeof(SColumnInfo) * pQueryMsg->numOfTags);
H
Haojun Liao 已提交
5876 5877 5878 5879 5880
    if (*tagCols == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }

5881 5882
    for (int32_t i = 0; i < pQueryMsg->numOfTags; ++i) {
      SColumnInfo* pTagCol = (SColumnInfo*) pMsg;
5883

5884 5885 5886 5887
      pTagCol->colId = htons(pTagCol->colId);
      pTagCol->bytes = htons(pTagCol->bytes);
      pTagCol->type  = htons(pTagCol->type);
      pTagCol->numOfFilters = 0;
5888

5889
      (*tagCols)[i] = *pTagCol;
5890
      pMsg += sizeof(SColumnInfo);
5891
    }
H
hjxilinx 已提交
5892
  }
5893

5894 5895 5896
  // the tag query condition expression string is located at the end of query msg
  if (pQueryMsg->tagCondLen > 0) {
    *tagCond = calloc(1, pQueryMsg->tagCondLen);
H
Haojun Liao 已提交
5897 5898 5899 5900 5901 5902

    if (*tagCond == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;

    }
5903 5904 5905
    memcpy(*tagCond, pMsg, pQueryMsg->tagCondLen);
    pMsg += pQueryMsg->tagCondLen;
  }
5906

weixin_48148422's avatar
weixin_48148422 已提交
5907
  if (*pMsg != 0) {
5908
    size_t len = strlen(pMsg) + 1;
5909

5910
    *tbnameCond = malloc(len);
5911 5912 5913 5914 5915
    if (*tbnameCond == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }

weixin_48148422's avatar
weixin_48148422 已提交
5916
    strcpy(*tbnameCond, pMsg);
5917
    pMsg += len;
weixin_48148422's avatar
weixin_48148422 已提交
5918
  }
5919

5920
  qDebug("qmsg:%p query %d tables, type:%d, qrange:%" PRId64 "-%" PRId64 ", numOfGroupbyTagCols:%d, order:%d, "
H
Haojun Liao 已提交
5921 5922
         "outputCols:%d, numOfCols:%d, interval:%" PRId64 ", fillType:%d, comptsLen:%d, compNumOfBlocks:%d, limit:%" PRId64 ", offset:%" PRId64,
         pQueryMsg, pQueryMsg->numOfTables, pQueryMsg->queryType, pQueryMsg->window.skey, pQueryMsg->window.ekey, pQueryMsg->numOfGroupCols,
5923
         pQueryMsg->order, pQueryMsg->numOfOutput, pQueryMsg->numOfCols, pQueryMsg->interval.interval,
H
Haojun Liao 已提交
5924
         pQueryMsg->fillType, pQueryMsg->tsLen, pQueryMsg->tsNumOfBlocks, pQueryMsg->limit, pQueryMsg->offset);
5925 5926

  return TSDB_CODE_SUCCESS;
dengyihao's avatar
dengyihao 已提交
5927 5928

_cleanup:
S
Shengliang Guan 已提交
5929
  taosTFree(*pExpr);
dengyihao's avatar
dengyihao 已提交
5930 5931
  taosArrayDestroy(*pTableIdList);
  *pTableIdList = NULL;
S
Shengliang Guan 已提交
5932 5933 5934 5935
  taosTFree(*tbnameCond);
  taosTFree(*groupbyCols);
  taosTFree(*tagCols);
  taosTFree(*tagCond);
5936 5937

  return code;
5938 5939
}

H
hjxilinx 已提交
5940
static int32_t buildAirthmeticExprFromMsg(SExprInfo *pArithExprInfo, SQueryTableMsg *pQueryMsg) {
5941
  qDebug("qmsg:%p create arithmetic expr from binary string: %s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz);
weixin_48148422's avatar
weixin_48148422 已提交
5942 5943

  tExprNode* pExprNode = NULL;
dengyihao's avatar
dengyihao 已提交
5944
  TRY(TSDB_MAX_TAG_CONDITIONS) {
weixin_48148422's avatar
weixin_48148422 已提交
5945 5946 5947
    pExprNode = exprTreeFromBinary(pArithExprInfo->base.arg[0].argValue.pz, pArithExprInfo->base.arg[0].argBytes);
  } CATCH( code ) {
    CLEANUP_EXECUTE();
dengyihao's avatar
TD-816  
dengyihao 已提交
5948
    qError("qmsg:%p failed to create arithmetic expression string from:%s, reason: %s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz, tstrerror(code));
weixin_48148422's avatar
weixin_48148422 已提交
5949 5950 5951
    return code;
  } END_TRY

H
hjxilinx 已提交
5952
  if (pExprNode == NULL) {
5953
    qError("qmsg:%p failed to create arithmetic expression string from:%s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz);
5954
    return TSDB_CODE_QRY_APP_ERROR;
5955
  }
5956

5957
  pArithExprInfo->pExpr = pExprNode;
5958 5959 5960
  return TSDB_CODE_SUCCESS;
}

H
Haojun Liao 已提交
5961
static int32_t createQFunctionExprFromMsg(SQueryTableMsg *pQueryMsg, SExprInfo **pExprInfo, SSqlFuncMsg **pExprMsg,
5962 5963
    SColumnInfo* pTagCols) {
  *pExprInfo = NULL;
H
hjxilinx 已提交
5964
  int32_t code = TSDB_CODE_SUCCESS;
5965

H
Haojun Liao 已提交
5966
  SExprInfo *pExprs = (SExprInfo *)calloc(pQueryMsg->numOfOutput, sizeof(SExprInfo));
5967
  if (pExprs == NULL) {
5968
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
5969 5970 5971 5972 5973
  }

  bool    isSuperTable = QUERY_IS_STABLE_QUERY(pQueryMsg->queryType);
  int16_t tagLen = 0;

5974
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5975
    pExprs[i].base = *pExprMsg[i];
5976
    pExprs[i].bytes = 0;
5977 5978 5979 5980

    int16_t type = 0;
    int16_t bytes = 0;

5981
    // parse the arithmetic expression
5982
    if (pExprs[i].base.functionId == TSDB_FUNC_ARITHM) {
5983
      code = buildAirthmeticExprFromMsg(&pExprs[i], pQueryMsg);
5984

5985
      if (code != TSDB_CODE_SUCCESS) {
S
Shengliang Guan 已提交
5986
        taosTFree(pExprs);
5987
        return code;
5988 5989
      }

5990
      type  = TSDB_DATA_TYPE_DOUBLE;
5991
      bytes = tDataTypeDesc[type].nSize;
H
Haojun Liao 已提交
5992
    } else if (pExprs[i].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX && pExprs[i].base.functionId == TSDB_FUNC_TAGPRJ) {  // parse the normal column
H
Haojun Liao 已提交
5993
      SSchema s = tGetTableNameColumnSchema();
H
Haojun Liao 已提交
5994
      type = s.type;
H
Haojun Liao 已提交
5995
      bytes = s.bytes;
5996 5997
    } else if (pExprs[i].base.colInfo.colId <= TSDB_UD_COLUMN_INDEX) {
      // it is a user-defined constant value column
H
Haojun Liao 已提交
5998 5999
      assert(pExprs[i].base.functionId == TSDB_FUNC_PRJ);

6000 6001
      type = pExprs[i].base.arg[1].argType;
      bytes = pExprs[i].base.arg[1].argBytes;
H
Haojun Liao 已提交
6002 6003 6004 6005 6006

      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        bytes += VARSTR_HEADER_SIZE;
      }
    } else {
6007
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
dengyihao's avatar
dengyihao 已提交
6008
      assert(j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags);
H
Haojun Liao 已提交
6009

dengyihao's avatar
dengyihao 已提交
6010
      if (pExprs[i].base.colInfo.colId != TSDB_TBNAME_COLUMN_INDEX && j >= 0) {
H
Haojun Liao 已提交
6011 6012 6013 6014
        SColumnInfo* pCol = (TSDB_COL_IS_TAG(pExprs[i].base.colInfo.flag))? &pTagCols[j]:&pQueryMsg->colList[j];
        type = pCol->type;
        bytes = pCol->bytes;
      } else {
H
Haojun Liao 已提交
6015
        SSchema s = tGetTableNameColumnSchema();
H
hjxilinx 已提交
6016

H
Haojun Liao 已提交
6017 6018 6019
        type  = s.type;
        bytes = s.bytes;
      }
6020 6021
    }

S
TD-1057  
Shengliang Guan 已提交
6022
    int32_t param = (int32_t)pExprs[i].base.arg[0].argValue.i64;
6023
    if (getResultDataInfo(type, bytes, pExprs[i].base.functionId, param, &pExprs[i].type, &pExprs[i].bytes,
6024
                          &pExprs[i].interBytes, 0, isSuperTable) != TSDB_CODE_SUCCESS) {
S
Shengliang Guan 已提交
6025
      taosTFree(pExprs);
6026
      return TSDB_CODE_QRY_INVALID_MSG;
6027 6028
    }

6029
    if (pExprs[i].base.functionId == TSDB_FUNC_TAG_DUMMY || pExprs[i].base.functionId == TSDB_FUNC_TS_DUMMY) {
6030
      tagLen += pExprs[i].bytes;
6031
    }
6032
    assert(isValidDataType(pExprs[i].type));
6033 6034 6035
  }

  // TODO refactor
6036
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
6037 6038
    pExprs[i].base = *pExprMsg[i];
    int16_t functId = pExprs[i].base.functionId;
6039

6040
    if (functId == TSDB_FUNC_TOP || functId == TSDB_FUNC_BOTTOM) {
6041
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
6042 6043 6044 6045 6046 6047 6048 6049 6050
      if (j < 0 || j >= pQueryMsg->numOfCols) {
        assert(0);
      } else {
        SColumnInfo *pCol = &pQueryMsg->colList[j];
        int32_t ret =
            getResultDataInfo(pCol->type, pCol->bytes, functId, (int32_t)pExprs[i].base.arg[0].argValue.i64,
                              &pExprs[i].type, &pExprs[i].bytes, &pExprs[i].interBytes, tagLen, isSuperTable);
        assert(ret == TSDB_CODE_SUCCESS);
      }
6051 6052 6053
    }
  }

6054
  *pExprInfo = pExprs;
6055 6056 6057
  return TSDB_CODE_SUCCESS;
}

6058
static SSqlGroupbyExpr *createGroupbyExprFromMsg(SQueryTableMsg *pQueryMsg, SColIndex *pColIndex, int32_t *code) {
6059 6060 6061 6062 6063
  if (pQueryMsg->numOfGroupCols == 0) {
    return NULL;
  }

  // using group by tag columns
6064
  SSqlGroupbyExpr *pGroupbyExpr = (SSqlGroupbyExpr *)calloc(1, sizeof(SSqlGroupbyExpr));
6065
  if (pGroupbyExpr == NULL) {
6066
    *code = TSDB_CODE_QRY_OUT_OF_MEMORY;
6067 6068 6069 6070 6071 6072 6073
    return NULL;
  }

  pGroupbyExpr->numOfGroupCols = pQueryMsg->numOfGroupCols;
  pGroupbyExpr->orderType = pQueryMsg->orderType;
  pGroupbyExpr->orderIndex = pQueryMsg->orderByIdx;

6074 6075 6076 6077
  pGroupbyExpr->columnInfo = taosArrayInit(pQueryMsg->numOfGroupCols, sizeof(SColIndex));
  for(int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
    taosArrayPush(pGroupbyExpr->columnInfo, &pColIndex[i]);
  }
6078

6079 6080 6081
  return pGroupbyExpr;
}

6082
static int32_t createFilterInfo(void *pQInfo, SQuery *pQuery) {
6083
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
6084
    if (pQuery->colList[i].numOfFilters > 0) {
6085 6086 6087 6088 6089 6090 6091 6092 6093
      pQuery->numOfFilterCols++;
    }
  }

  if (pQuery->numOfFilterCols == 0) {
    return TSDB_CODE_SUCCESS;
  }

  pQuery->pFilterInfo = calloc(1, sizeof(SSingleColumnFilterInfo) * pQuery->numOfFilterCols);
H
Haojun Liao 已提交
6094 6095 6096
  if (pQuery->pFilterInfo == NULL) {
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
  }
6097 6098

  for (int32_t i = 0, j = 0; i < pQuery->numOfCols; ++i) {
6099
    if (pQuery->colList[i].numOfFilters > 0) {
6100 6101
      SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[j];

B
Bomin Zhang 已提交
6102
      memcpy(&pFilterInfo->info, &pQuery->colList[i], sizeof(SColumnInfo));
6103
      pFilterInfo->info = pQuery->colList[i];
6104

6105
      pFilterInfo->numOfFilters = pQuery->colList[i].numOfFilters;
6106
      pFilterInfo->pFilters = calloc(pFilterInfo->numOfFilters, sizeof(SColumnFilterElem));
H
Haojun Liao 已提交
6107 6108 6109
      if (pFilterInfo->pFilters == NULL) {
        return TSDB_CODE_QRY_OUT_OF_MEMORY;
      }
6110 6111 6112

      for (int32_t f = 0; f < pFilterInfo->numOfFilters; ++f) {
        SColumnFilterElem *pSingleColFilter = &pFilterInfo->pFilters[f];
6113
        pSingleColFilter->filterInfo = pQuery->colList[i].filters[f];
6114 6115 6116 6117 6118

        int32_t lower = pSingleColFilter->filterInfo.lowerRelOptr;
        int32_t upper = pSingleColFilter->filterInfo.upperRelOptr;

        if (lower == TSDB_RELATION_INVALID && upper == TSDB_RELATION_INVALID) {
S
slguan 已提交
6119
          qError("QInfo:%p invalid filter info", pQInfo);
6120
          return TSDB_CODE_QRY_INVALID_MSG;
6121 6122
        }

6123 6124
        int16_t type  = pQuery->colList[i].type;
        int16_t bytes = pQuery->colList[i].bytes;
6125

6126 6127 6128
        // todo refactor
        __filter_func_t *rangeFilterArray = getRangeFilterFuncArray(type);
        __filter_func_t *filterArray = getValueFilterFuncArray(type);
6129 6130

        if (rangeFilterArray == NULL && filterArray == NULL) {
S
slguan 已提交
6131
          qError("QInfo:%p failed to get filter function, invalid data type:%d", pQInfo, type);
6132
          return TSDB_CODE_QRY_INVALID_MSG;
6133 6134
        }

6135
        if ((lower == TSDB_RELATION_GREATER_EQUAL || lower == TSDB_RELATION_GREATER) &&
6136
            (upper == TSDB_RELATION_LESS_EQUAL || upper == TSDB_RELATION_LESS)) {
dengyihao's avatar
dengyihao 已提交
6137
          assert(rangeFilterArray != NULL);
6138
          if (lower == TSDB_RELATION_GREATER_EQUAL) {
6139 6140 6141 6142 6143 6144 6145 6146 6147 6148 6149 6150 6151
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[4];
            } else {
              pSingleColFilter->fp = rangeFilterArray[2];
            }
          } else {
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[3];
            } else {
              pSingleColFilter->fp = rangeFilterArray[1];
            }
          }
        } else {  // set callback filter function
dengyihao's avatar
dengyihao 已提交
6152
          assert(filterArray != NULL);
6153 6154 6155 6156
          if (lower != TSDB_RELATION_INVALID) {
            pSingleColFilter->fp = filterArray[lower];

            if (upper != TSDB_RELATION_INVALID) {
dengyihao's avatar
dengyihao 已提交
6157
              qError("pQInfo:%p failed to get filter function, invalid filter condition: %d", pQInfo, type);
6158
              return TSDB_CODE_QRY_INVALID_MSG;
6159 6160 6161 6162 6163 6164 6165 6166 6167 6168 6169 6170 6171 6172 6173 6174
            }
          } else {
            pSingleColFilter->fp = filterArray[upper];
          }
        }
        assert(pSingleColFilter->fp != NULL);
        pSingleColFilter->bytes = bytes;
      }

      j++;
    }
  }

  return TSDB_CODE_SUCCESS;
}

6175
static void doUpdateExprColumnIndex(SQuery *pQuery) {
6176
  assert(pQuery->pSelectExpr != NULL && pQuery != NULL);
6177

6178
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
6179
    SSqlFuncMsg *pSqlExprMsg = &pQuery->pSelectExpr[k].base;
6180
    if (pSqlExprMsg->functionId == TSDB_FUNC_ARITHM) {
6181 6182
      continue;
    }
6183

6184
    // todo opt performance
H
Haojun Liao 已提交
6185
    SColIndex *pColIndex = &pSqlExprMsg->colInfo;
H
Haojun Liao 已提交
6186
    if (TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
6187 6188
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfCols; ++f) {
H
Haojun Liao 已提交
6189 6190
        if (pColIndex->colId == pQuery->colList[f].colId) {
          pColIndex->colIndex = f;
6191 6192 6193
          break;
        }
      }
H
Haojun Liao 已提交
6194 6195

      assert(f < pQuery->numOfCols);
6196 6197
    } else if (pColIndex->colId <= TSDB_UD_COLUMN_INDEX) {
      // do nothing for user-defined constant value result columns
6198
    } else {
6199 6200
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfTags; ++f) {
H
Haojun Liao 已提交
6201 6202
        if (pColIndex->colId == pQuery->tagColList[f].colId) {
          pColIndex->colIndex = f;
6203 6204
          break;
        }
6205
      }
6206

6207
      assert(f < pQuery->numOfTags || pColIndex->colId == TSDB_TBNAME_COLUMN_INDEX);
6208 6209 6210 6211
    }
  }
}

dengyihao's avatar
dengyihao 已提交
6212 6213
static void freeQInfo(SQInfo *pQInfo);

H
Haojun Liao 已提交
6214 6215 6216
static void calResultBufSize(SQuery* pQuery) {
  const int32_t RESULT_MSG_MIN_SIZE  = 1024 * (1024 + 512);  // bytes
  const int32_t RESULT_MSG_MIN_ROWS  = 8192;
S
TD-1057  
Shengliang Guan 已提交
6217
  const float RESULT_THRESHOLD_RATIO = 0.85f;
H
Haojun Liao 已提交
6218

6219 6220 6221 6222 6223
  if (isProjQuery(pQuery)) {
    int32_t numOfRes = RESULT_MSG_MIN_SIZE / pQuery->rowSize;
    if (numOfRes < RESULT_MSG_MIN_ROWS) {
      numOfRes = RESULT_MSG_MIN_ROWS;
    }
H
Haojun Liao 已提交
6224

6225
    pQuery->rec.capacity  = numOfRes;
S
TD-1057  
Shengliang Guan 已提交
6226
    pQuery->rec.threshold = (int32_t)(numOfRes * RESULT_THRESHOLD_RATIO);
6227 6228
  } else {  // in case of non-prj query, a smaller output buffer will be used.
    pQuery->rec.capacity = 4096;
S
TD-1057  
Shengliang Guan 已提交
6229
    pQuery->rec.threshold = (int32_t)(pQuery->rec.capacity * RESULT_THRESHOLD_RATIO);
6230
  }
H
Haojun Liao 已提交
6231 6232
}

6233 6234
static SQInfo *createQInfoImpl(SQueryTableMsg *pQueryMsg, SSqlGroupbyExpr *pGroupbyExpr, SExprInfo *pExprs,
                               STableGroupInfo *pTableGroupInfo, SColumnInfo* pTagCols, bool stableQuery) {
B
Bomin Zhang 已提交
6235 6236 6237
  int16_t numOfCols = pQueryMsg->numOfCols;
  int16_t numOfOutput = pQueryMsg->numOfOutput;

6238 6239
  SQInfo *pQInfo = (SQInfo *)calloc(1, sizeof(SQInfo));
  if (pQInfo == NULL) {
B
Bomin Zhang 已提交
6240
    goto _cleanup_qinfo;
6241
  }
6242

B
Bomin Zhang 已提交
6243 6244 6245
  // to make sure third party won't overwrite this structure
  pQInfo->signature = pQInfo;
  pQInfo->tableGroupInfo = *pTableGroupInfo;
6246 6247

  SQuery *pQuery = calloc(1, sizeof(SQuery));
B
Bomin Zhang 已提交
6248 6249 6250
  if (pQuery == NULL) {
    goto _cleanup_query;
  }
H
Haojun Liao 已提交
6251

6252 6253
  pQInfo->runtimeEnv.pQuery = pQuery;

6254
  pQuery->numOfCols       = numOfCols;
H
hjxilinx 已提交
6255
  pQuery->numOfOutput     = numOfOutput;
6256 6257 6258
  pQuery->limit.limit     = pQueryMsg->limit;
  pQuery->limit.offset    = pQueryMsg->offset;
  pQuery->order.order     = pQueryMsg->order;
6259
  pQuery->order.orderColId = pQueryMsg->orderColId;
6260 6261
  pQuery->pSelectExpr     = pExprs;
  pQuery->pGroupbyExpr    = pGroupbyExpr;
6262
  memcpy(&pQuery->interval, &pQueryMsg->interval, sizeof(pQuery->interval));
6263
  pQuery->fillType        = pQueryMsg->fillType;
6264
  pQuery->numOfTags       = pQueryMsg->numOfTags;
B
Bomin Zhang 已提交
6265
  pQuery->tagColList      = pTagCols;
H
Haojun Liao 已提交
6266

6267
  pQuery->colList = calloc(numOfCols, sizeof(SSingleColumnFilterInfo));
6268
  if (pQuery->colList == NULL) {
6269
    goto _cleanup;
6270
  }
6271

H
hjxilinx 已提交
6272
  for (int16_t i = 0; i < numOfCols; ++i) {
6273
    pQuery->colList[i] = pQueryMsg->colList[i];
6274
    pQuery->colList[i].filters = tscFilterInfoClone(pQueryMsg->colList[i].filters, pQuery->colList[i].numOfFilters);
H
hjxilinx 已提交
6275
  }
6276

6277
  // calculate the result row size
6278 6279 6280
  for (int16_t col = 0; col < numOfOutput; ++col) {
    assert(pExprs[col].bytes > 0);
    pQuery->rowSize += pExprs[col].bytes;
6281
  }
6282

6283
  doUpdateExprColumnIndex(pQuery);
6284

6285
  int32_t ret = createFilterInfo(pQInfo, pQuery);
6286
  if (ret != TSDB_CODE_SUCCESS) {
6287
    goto _cleanup;
6288 6289 6290
  }

  // prepare the result buffer
6291
  pQuery->sdata = (tFilePage **)calloc(pQuery->numOfOutput, POINTER_BYTES);
6292
  if (pQuery->sdata == NULL) {
6293
    goto _cleanup;
6294 6295
  }

H
Haojun Liao 已提交
6296
  calResultBufSize(pQuery);
6297

6298
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
6299
    assert(pExprs[col].interBytes >= pExprs[col].bytes);
6300 6301

    // allocate additional memory for interResults that are usually larger then final results
6302
    size_t size = (size_t)((pQuery->rec.capacity + 1) * pExprs[col].bytes + pExprs[col].interBytes + sizeof(tFilePage));
6303
    pQuery->sdata[col] = (tFilePage *)calloc(1, size);
6304
    if (pQuery->sdata[col] == NULL) {
6305
      goto _cleanup;
6306 6307 6308
    }
  }

6309
  if (pQuery->fillType != TSDB_FILL_NONE) {
6310 6311
    pQuery->fillVal = malloc(sizeof(int64_t) * pQuery->numOfOutput);
    if (pQuery->fillVal == NULL) {
6312
      goto _cleanup;
6313 6314 6315
    }

    // the first column is the timestamp
6316
    memcpy(pQuery->fillVal, (char *)pQueryMsg->fillVal, pQuery->numOfOutput * sizeof(int64_t));
6317 6318
  }

dengyihao's avatar
dengyihao 已提交
6319 6320 6321 6322 6323 6324
  size_t numOfGroups = 0;
  if (pTableGroupInfo->pGroupList != NULL) {
    numOfGroups = taosArrayGetSize(pTableGroupInfo->pGroupList);

    pQInfo->tableqinfoGroupInfo.pGroupList = taosArrayInit(numOfGroups, POINTER_BYTES);
    pQInfo->tableqinfoGroupInfo.numOfTables = pTableGroupInfo->numOfTables;
H
Haojun Liao 已提交
6325
    pQInfo->tableqinfoGroupInfo.map = taosHashInit(pTableGroupInfo->numOfTables,
H
Haojun Liao 已提交
6326
                                                   taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, false);
H
Haojun Liao 已提交
6327
  }
6328

weixin_48148422's avatar
weixin_48148422 已提交
6329
  int tableIndex = 0;
6330

H
Haojun Liao 已提交
6331
  pQInfo->runtimeEnv.interBufSize = getOutputInterResultBufSize(pQuery);
H
Haojun Liao 已提交
6332 6333
  pQInfo->runtimeEnv.summary.tableInfoSize += (pTableGroupInfo->numOfTables * sizeof(STableQueryInfo));

H
Haojun Liao 已提交
6334
  pQInfo->pBuf = calloc(pTableGroupInfo->numOfTables, sizeof(STableQueryInfo));
H
Haojun Liao 已提交
6335 6336 6337 6338
  if (pQInfo->pBuf == NULL) {
    goto _cleanup;
  }

H
Haojun Liao 已提交
6339
  // NOTE: pTableCheckInfo need to update the query time range and the lastKey info
6340 6341
  pQInfo->arrTableIdInfo = taosArrayInit(tableIndex, sizeof(STableIdInfo));
  pQInfo->dataReady = QUERY_RESULT_NOT_READY;
H
Haojun Liao 已提交
6342
  pQInfo->rspContext = NULL;
6343
  pthread_mutex_init(&pQInfo->lock, NULL);
H
Haojun Liao 已提交
6344
  tsem_init(&pQInfo->ready, 0, 0);
6345 6346 6347 6348 6349 6350

  pQuery->pos = -1;
  pQuery->window = pQueryMsg->window;
  changeExecuteScanOrder(pQInfo, pQueryMsg, stableQuery);

  STimeWindow window = pQuery->window;
H
Haojun Liao 已提交
6351

H
Haojun Liao 已提交
6352 6353
  int32_t index = 0;

H
hjxilinx 已提交
6354
  for(int32_t i = 0; i < numOfGroups; ++i) {
6355
    SArray* pa = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, i);
6356

H
Haojun Liao 已提交
6357
    size_t s = taosArrayGetSize(pa);
6358
    SArray* p1 = taosArrayInit(s, POINTER_BYTES);
B
Bomin Zhang 已提交
6359 6360 6361
    if (p1 == NULL) {
      goto _cleanup;
    }
H
Haojun Liao 已提交
6362

Y
yihaoDeng 已提交
6363
    taosArrayPush(pQInfo->tableqinfoGroupInfo.pGroupList, &p1);
6364

H
hjxilinx 已提交
6365
    for(int32_t j = 0; j < s; ++j) {
H
Haojun Liao 已提交
6366
      STableKeyInfo* info = taosArrayGet(pa, j);
6367

H
Haojun Liao 已提交
6368
      void* buf = (char*) pQInfo->pBuf + index * sizeof(STableQueryInfo);
H
Haojun Liao 已提交
6369

6370
      window.skey = info->lastKey;
H
Haojun Liao 已提交
6371
      STableQueryInfo* item = createTableQueryInfo(&pQInfo->runtimeEnv, info->pTable, window, buf);
B
Bomin Zhang 已提交
6372 6373 6374
      if (item == NULL) {
        goto _cleanup;
      }
H
Haojun Liao 已提交
6375

6376
      item->groupIndex = i;
H
hjxilinx 已提交
6377
      taosArrayPush(p1, &item);
6378 6379

      STableId* id = TSDB_TABLEID(info->pTable);
H
Haojun Liao 已提交
6380 6381
      taosHashPut(pQInfo->tableqinfoGroupInfo.map, &id->tid, sizeof(id->tid), &item, POINTER_BYTES);
      index += 1;
H
hjxilinx 已提交
6382 6383
    }
  }
6384

6385
  colIdCheck(pQuery);
6386

6387
  qDebug("qmsg:%p QInfo:%p created", pQueryMsg, pQInfo);
6388 6389
  return pQInfo;

B
Bomin Zhang 已提交
6390
_cleanup_qinfo:
H
Haojun Liao 已提交
6391
  tsdbDestroyTableGroup(pTableGroupInfo);
B
Bomin Zhang 已提交
6392 6393

_cleanup_query:
6394 6395 6396 6397
  if (pGroupbyExpr != NULL) {
    taosArrayDestroy(pGroupbyExpr->columnInfo);
    free(pGroupbyExpr);
  }
H
Haojun Liao 已提交
6398

S
Shengliang Guan 已提交
6399
  taosTFree(pTagCols);
B
Bomin Zhang 已提交
6400 6401 6402 6403 6404 6405
  for (int32_t i = 0; i < numOfOutput; ++i) {
    SExprInfo* pExprInfo = &pExprs[i];
    if (pExprInfo->pExpr != NULL) {
      tExprTreeDestroy(&pExprInfo->pExpr, NULL);
    }
  }
H
Haojun Liao 已提交
6406

S
Shengliang Guan 已提交
6407
  taosTFree(pExprs);
B
Bomin Zhang 已提交
6408

6409
_cleanup:
dengyihao's avatar
dengyihao 已提交
6410
  freeQInfo(pQInfo);
6411 6412 6413
  return NULL;
}

H
hjxilinx 已提交
6414
static bool isValidQInfo(void *param) {
H
hjxilinx 已提交
6415 6416 6417 6418
  SQInfo *pQInfo = (SQInfo *)param;
  if (pQInfo == NULL) {
    return false;
  }
6419

H
hjxilinx 已提交
6420 6421 6422 6423
  /*
   * pQInfo->signature may be changed by another thread, so we assign value of signature
   * into local variable, then compare by using local variable
   */
6424
  uint64_t sig = (uint64_t)pQInfo->signature;
H
hjxilinx 已提交
6425 6426 6427
  return (sig == (uint64_t)pQInfo);
}

6428
static int32_t initQInfo(SQueryTableMsg *pQueryMsg, void *tsdb, int32_t vgId, SQInfo *pQInfo, bool isSTable) {
H
hjxilinx 已提交
6429
  int32_t code = TSDB_CODE_SUCCESS;
6430
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
6431

H
hjxilinx 已提交
6432 6433
  STSBuf *pTSBuf = NULL;
  if (pQueryMsg->tsLen > 0) {  // open new file to save the result
H
Haojun Liao 已提交
6434
    char *tsBlock = (char *) pQueryMsg + pQueryMsg->tsOffset;
H
Haojun Liao 已提交
6435
    pTSBuf = tsBufCreateFromCompBlocks(tsBlock, pQueryMsg->tsNumOfBlocks, pQueryMsg->tsLen, pQueryMsg->tsOrder, vgId);
6436

H
hjxilinx 已提交
6437
    tsBufResetPos(pTSBuf);
dengyihao's avatar
dengyihao 已提交
6438 6439
    bool ret = tsBufNextPos(pTSBuf);
    UNUSED(ret);
H
hjxilinx 已提交
6440
  }
Y
TD-1665  
yihaoDeng 已提交
6441 6442
  
  pQuery->precision = tsdbGetCfg(tsdb)->precision;
6443

6444 6445
  if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey > pQuery->window.ekey)) ||
      (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey > pQuery->window.skey))) {
6446
    qDebug("QInfo:%p no result in time range %" PRId64 "-%" PRId64 ", order %d", pQInfo, pQuery->window.skey,
6447
           pQuery->window.ekey, pQuery->order.order);
6448
    setQueryStatus(pQuery, QUERY_COMPLETED);
B
Bomin Zhang 已提交
6449
    pQInfo->tableqinfoGroupInfo.numOfTables = 0;
6450 6451
    return TSDB_CODE_SUCCESS;
  }
6452

6453
  if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
6454
    qDebug("QInfo:%p no table qualified for tag filter, abort query", pQInfo);
6455 6456 6457
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return TSDB_CODE_SUCCESS;
  }
H
hjxilinx 已提交
6458 6459

  // filter the qualified
6460
  if ((code = doInitQInfo(pQInfo, pTSBuf, tsdb, vgId, isSTable)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6461 6462
    goto _error;
  }
6463

H
hjxilinx 已提交
6464 6465 6466 6467
  return code;

_error:
  // table query ref will be decrease during error handling
6468
  freeQInfo(pQInfo);
H
hjxilinx 已提交
6469 6470 6471
  return code;
}

B
Bomin Zhang 已提交
6472
static void freeColumnFilterInfo(SColumnFilterInfo* pFilter, int32_t numOfFilters) {
H
Haojun Liao 已提交
6473
    if (pFilter == NULL || numOfFilters == 0) {
B
Bomin Zhang 已提交
6474 6475
      return;
    }
H
Haojun Liao 已提交
6476

B
Bomin Zhang 已提交
6477 6478 6479 6480 6481
    for (int32_t i = 0; i < numOfFilters; i++) {
      if (pFilter[i].filterstr) {
        free((void*)(pFilter[i].pz));
      }
    }
H
Haojun Liao 已提交
6482

B
Bomin Zhang 已提交
6483 6484 6485
    free(pFilter);
}

H
Haojun Liao 已提交
6486 6487
static void doDestroyTableQueryInfo(STableGroupInfo* pTableqinfoGroupInfo) {
  if (pTableqinfoGroupInfo->pGroupList != NULL) {
H
Haojun Liao 已提交
6488
    int32_t numOfGroups = (int32_t) taosArrayGetSize(pTableqinfoGroupInfo->pGroupList);
H
Haojun Liao 已提交
6489 6490 6491 6492 6493 6494 6495 6496 6497 6498 6499 6500 6501 6502 6503 6504 6505 6506 6507 6508 6509
    for (int32_t i = 0; i < numOfGroups; ++i) {
      SArray *p = taosArrayGetP(pTableqinfoGroupInfo->pGroupList, i);

      size_t num = taosArrayGetSize(p);
      for(int32_t j = 0; j < num; ++j) {
        STableQueryInfo* item = taosArrayGetP(p, j);
        destroyTableQueryInfoImpl(item);
      }

      taosArrayDestroy(p);
    }
  }

  taosArrayDestroy(pTableqinfoGroupInfo->pGroupList);
  taosHashCleanup(pTableqinfoGroupInfo->map);

  pTableqinfoGroupInfo->pGroupList = NULL;
  pTableqinfoGroupInfo->map = NULL;
  pTableqinfoGroupInfo->numOfTables = 0;
}

H
hjxilinx 已提交
6510 6511 6512 6513
static void freeQInfo(SQInfo *pQInfo) {
  if (!isValidQInfo(pQInfo)) {
    return;
  }
6514

6515
  qDebug("QInfo:%p start to free QInfo", pQInfo);
6516

H
Haojun Liao 已提交
6517
  releaseQueryBuf(pQInfo->tableqinfoGroupInfo.numOfTables);
H
Haojun Liao 已提交
6518

6519
  teardownQueryRuntimeEnv(&pQInfo->runtimeEnv);
6520

H
Haojun Liao 已提交
6521 6522 6523 6524 6525 6526 6527
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  if (pQuery != NULL) {
    if (pQuery->sdata != NULL) {
      for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
        taosTFree(pQuery->sdata[col]);
      }
      taosTFree(pQuery->sdata);
H
hjxilinx 已提交
6528
    }
6529

H
Haojun Liao 已提交
6530 6531 6532
    if (pQuery->fillVal != NULL) {
      taosTFree(pQuery->fillVal);
    }
6533

H
Haojun Liao 已提交
6534 6535 6536 6537
    for (int32_t i = 0; i < pQuery->numOfFilterCols; ++i) {
      SSingleColumnFilterInfo *pColFilter = &pQuery->pFilterInfo[i];
      if (pColFilter->numOfFilters > 0) {
        taosTFree(pColFilter->pFilters);
H
hjxilinx 已提交
6538
      }
H
hjxilinx 已提交
6539
    }
6540

H
Haojun Liao 已提交
6541 6542 6543 6544 6545 6546 6547 6548 6549 6550 6551 6552 6553 6554 6555 6556
    if (pQuery->pSelectExpr != NULL) {
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        SExprInfo *pExprInfo = &pQuery->pSelectExpr[i];

        if (pExprInfo->pExpr != NULL) {
          tExprTreeDestroy(&pExprInfo->pExpr, NULL);
        }
      }

      taosTFree(pQuery->pSelectExpr);
    }

    if (pQuery->pGroupbyExpr != NULL) {
      taosArrayDestroy(pQuery->pGroupbyExpr->columnInfo);
      taosTFree(pQuery->pGroupbyExpr);
    }
6557

H
Haojun Liao 已提交
6558 6559 6560 6561 6562 6563 6564 6565 6566 6567 6568 6569
    taosTFree(pQuery->tagColList);
    taosTFree(pQuery->pFilterInfo);

    if (pQuery->colList != NULL) {
      for (int32_t i = 0; i < pQuery->numOfCols; i++) {
        SColumnInfo *column = pQuery->colList + i;
        freeColumnFilterInfo(column->filters, column->numOfFilters);
      }
      taosTFree(pQuery->colList);
    }

    taosTFree(pQuery);
H
hjxilinx 已提交
6570
  }
6571

H
Haojun Liao 已提交
6572
  doDestroyTableQueryInfo(&pQInfo->tableqinfoGroupInfo);
H
Haojun Liao 已提交
6573

H
Haojun Liao 已提交
6574
  taosTFree(pQInfo->pBuf);
H
Haojun Liao 已提交
6575
  tsdbDestroyTableGroup(&pQInfo->tableGroupInfo);
weixin_48148422's avatar
weixin_48148422 已提交
6576
  taosArrayDestroy(pQInfo->arrTableIdInfo);
6577

6578
  pQInfo->signature = 0;
6579

6580
  qDebug("QInfo:%p QInfo is freed", pQInfo);
6581

S
Shengliang Guan 已提交
6582
  taosTFree(pQInfo);
H
hjxilinx 已提交
6583 6584
}

H
hjxilinx 已提交
6585
static size_t getResultSize(SQInfo *pQInfo, int64_t *numOfRows) {
6586 6587
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
hjxilinx 已提交
6588 6589 6590 6591 6592 6593 6594 6595 6596 6597 6598
  /*
   * get the file size and set the numOfRows to be the file size, since for tsComp query,
   * the returned row size is equalled to 1
   * TODO handle the case that the file is too large to send back one time
   */
  if (isTSCompQuery(pQuery) && (*numOfRows) > 0) {
    struct stat fstat;
    if (stat(pQuery->sdata[0]->data, &fstat) == 0) {
      *numOfRows = fstat.st_size;
      return fstat.st_size;
    } else {
S
slguan 已提交
6599
      qError("QInfo:%p failed to get file info, path:%s, reason:%s", pQInfo, pQuery->sdata[0]->data, strerror(errno));
H
hjxilinx 已提交
6600 6601 6602
      return 0;
    }
  } else {
6603
    return (size_t)(pQuery->rowSize * (*numOfRows));
6604
  }
H
hjxilinx 已提交
6605
}
6606

H
hjxilinx 已提交
6607 6608 6609
static int32_t doDumpQueryResult(SQInfo *pQInfo, char *data) {
  // the remained number of retrieved rows, not the interpolated result
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
6610

H
hjxilinx 已提交
6611 6612 6613
  // load data from file to msg buffer
  if (isTSCompQuery(pQuery)) {
    int32_t fd = open(pQuery->sdata[0]->data, O_RDONLY, 0666);
6614

H
hjxilinx 已提交
6615 6616
    // make sure file exist
    if (FD_VALID(fd)) {
6617 6618 6619
      uint64_t s = lseek(fd, 0, SEEK_END);

      qDebug("QInfo:%p ts comp data return, file:%s, size:%"PRId64, pQInfo, pQuery->sdata[0]->data, s);
H
Haojun Liao 已提交
6620
      if (lseek(fd, 0, SEEK_SET) >= 0) {
6621
        size_t sz = read(fd, data, (uint32_t)s);
6622 6623 6624
        if(sz < s) {  // todo handle error
          assert(0);
        }
H
Haojun Liao 已提交
6625
      } else {
6626
        UNUSED(s);
dengyihao's avatar
dengyihao 已提交
6627
      }
H
Haojun Liao 已提交
6628

H
hjxilinx 已提交
6629 6630 6631
      close(fd);
      unlink(pQuery->sdata[0]->data);
    } else {
dengyihao's avatar
dengyihao 已提交
6632
      // todo return the error code to client and handle invalid fd
S
slguan 已提交
6633
      qError("QInfo:%p failed to open tmp file to send ts-comp data to client, path:%s, reason:%s", pQInfo,
H
hjxilinx 已提交
6634
             pQuery->sdata[0]->data, strerror(errno));
dengyihao's avatar
dengyihao 已提交
6635
      if (fd != -1) {
6636
        close(fd);
dengyihao's avatar
dengyihao 已提交
6637
      }
H
hjxilinx 已提交
6638
    }
6639

H
hjxilinx 已提交
6640 6641 6642 6643
    // all data returned, set query over
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
      setQueryStatus(pQuery, QUERY_OVER);
    }
H
hjxilinx 已提交
6644
  } else {
S
TD-1057  
Shengliang Guan 已提交
6645
    doCopyQueryResultToMsg(pQInfo, (int32_t)pQuery->rec.rows, data);
6646
  }
6647

6648
  pQuery->rec.total += pQuery->rec.rows;
6649
  qDebug("QInfo:%p current numOfRes rows:%" PRId64 ", total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total);
6650

6651
  if (pQuery->limit.limit > 0 && pQuery->limit.limit == pQuery->rec.total) {
6652
    qDebug("QInfo:%p results limitation reached, limitation:%"PRId64, pQInfo, pQuery->limit.limit);
6653 6654
    setQueryStatus(pQuery, QUERY_OVER);
  }
6655

H
hjxilinx 已提交
6656
  return TSDB_CODE_SUCCESS;
6657 6658
}

6659 6660 6661 6662 6663 6664 6665
typedef struct SQueryMgmt {
  SCacheObj      *qinfoPool;      // query handle pool
  int32_t         vgId;
  bool            closed;
  pthread_mutex_t lock;
} SQueryMgmt;

6666
int32_t qCreateQueryInfo(void* tsdb, int32_t vgId, SQueryTableMsg* pQueryMsg, qinfo_t* pQInfo) {
6667
  assert(pQueryMsg != NULL && tsdb != NULL);
6668 6669

  int32_t code = TSDB_CODE_SUCCESS;
6670

6671 6672 6673 6674 6675 6676 6677 6678
  char            *tagCond  = NULL;
  char            *tbnameCond = NULL;
  SArray          *pTableIdList = NULL;
  SSqlFuncMsg    **pExprMsg = NULL;
  SExprInfo       *pExprs   = NULL;
  SColIndex       *pGroupColIndex = NULL;
  SColumnInfo     *pTagColumnInfo = NULL;
  SSqlGroupbyExpr *pGroupbyExpr   = NULL;
6679

6680 6681
  code = convertQueryMsg(pQueryMsg, &pTableIdList, &pExprMsg, &tagCond, &tbnameCond, &pGroupColIndex, &pTagColumnInfo);
  if (code != TSDB_CODE_SUCCESS) {
B
Bomin Zhang 已提交
6682
    goto _over;
6683 6684
  }

H
hjxilinx 已提交
6685
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
6686
    qError("Invalid number of tables to query, numOfTables:%d", pQueryMsg->numOfTables);
6687
    code = TSDB_CODE_QRY_INVALID_MSG;
H
hjxilinx 已提交
6688
    goto _over;
6689 6690
  }

H
hjxilinx 已提交
6691
  if (pTableIdList == NULL || taosArrayGetSize(pTableIdList) == 0) {
S
slguan 已提交
6692
    qError("qmsg:%p, SQueryTableMsg wrong format", pQueryMsg);
6693
    code = TSDB_CODE_QRY_INVALID_MSG;
H
hjxilinx 已提交
6694
    goto _over;
6695 6696
  }

H
Haojun Liao 已提交
6697
  if ((code = createQFunctionExprFromMsg(pQueryMsg, &pExprs, pExprMsg, pTagColumnInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6698
    goto _over;
6699 6700
  }

dengyihao's avatar
dengyihao 已提交
6701
  pGroupbyExpr = createGroupbyExprFromMsg(pQueryMsg, pGroupColIndex, &code);
H
hjxilinx 已提交
6702
  if ((pGroupbyExpr == NULL && pQueryMsg->numOfGroupCols != 0) || code != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6703
    goto _over;
6704
  }
6705

H
hjxilinx 已提交
6706
  bool isSTableQuery = false;
6707
  STableGroupInfo tableGroupInfo = {0};
6708 6709
  int64_t st = taosGetTimestampUs();

H
Haojun Liao 已提交
6710
  if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_TABLE_QUERY)) {
weixin_48148422's avatar
weixin_48148422 已提交
6711
    STableIdInfo *id = taosArrayGet(pTableIdList, 0);
H
Haojun Liao 已提交
6712

6713
    qDebug("qmsg:%p query normal table, uid:%"PRId64", tid:%d", pQueryMsg, id->uid, id->tid);
H
Haojun Liao 已提交
6714
    if ((code = tsdbGetOneTableGroup(tsdb, id->uid, pQueryMsg->window.skey, &tableGroupInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6715
      goto _over;
6716
    }
H
Haojun Liao 已提交
6717
  } else if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY|TSDB_QUERY_TYPE_STABLE_QUERY)) {
6718
    isSTableQuery = true;
H
Haojun Liao 已提交
6719 6720 6721

    // also note there's possibility that only one table in the super table
    if (!TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY)) {
weixin_48148422's avatar
weixin_48148422 已提交
6722 6723 6724 6725 6726 6727 6728
      STableIdInfo *id = taosArrayGet(pTableIdList, 0);

      // group by normal column, do not pass the group by condition to tsdb to group table into different group
      int32_t numOfGroupByCols = pQueryMsg->numOfGroupCols;
      if (pQueryMsg->numOfGroupCols == 1 && !TSDB_COL_IS_TAG(pGroupColIndex->flag)) {
        numOfGroupByCols = 0;
      }
6729 6730

      qDebug("qmsg:%p query stable, uid:%"PRId64", tid:%d", pQueryMsg, id->uid, id->tid);
H
Haojun Liao 已提交
6731 6732 6733
      code = tsdbQuerySTableByTagCond(tsdb, id->uid, pQueryMsg->window.skey, tagCond, pQueryMsg->tagCondLen,
          pQueryMsg->tagNameRelType, tbnameCond, &tableGroupInfo, pGroupColIndex, numOfGroupByCols);

6734
      if (code != TSDB_CODE_SUCCESS) {
6735
        qError("qmsg:%p failed to query stable, reason: %s", pQueryMsg, tstrerror(code));
6736 6737
        goto _over;
      }
weixin_48148422's avatar
weixin_48148422 已提交
6738
    } else {
6739 6740 6741 6742
      code = tsdbGetTableGroupFromIdList(tsdb, pTableIdList, &tableGroupInfo);
      if (code != TSDB_CODE_SUCCESS) {
        goto _over;
      }
H
Haojun Liao 已提交
6743

S
TD-1057  
Shengliang Guan 已提交
6744
      qDebug("qmsg:%p query on %" PRIzu " tables in one group from client", pQueryMsg, tableGroupInfo.numOfTables);
6745
    }
6746 6747

    int64_t el = taosGetTimestampUs() - st;
S
TD-1057  
Shengliang Guan 已提交
6748
    qDebug("qmsg:%p tag filter completed, numOfTables:%" PRIzu ", elapsed time:%"PRId64"us", pQueryMsg, tableGroupInfo.numOfTables, el);
H
hjxilinx 已提交
6749
  } else {
6750
    assert(0);
6751
  }
6752

H
Haojun Liao 已提交
6753 6754 6755 6756 6757
  code = checkForQueryBuf(tableGroupInfo.numOfTables);
  if (code != TSDB_CODE_SUCCESS) {  // not enough query buffer, abort
    goto _over;
  }

6758
  (*pQInfo) = createQInfoImpl(pQueryMsg, pGroupbyExpr, pExprs, &tableGroupInfo, pTagColumnInfo, isSTableQuery);
dengyihao's avatar
dengyihao 已提交
6759 6760 6761
  pExprs = NULL;
  pGroupbyExpr = NULL;
  pTagColumnInfo = NULL;
6762

6763
  if ((*pQInfo) == NULL) {
6764
    code = TSDB_CODE_QRY_OUT_OF_MEMORY;
H
hjxilinx 已提交
6765
    goto _over;
6766
  }
6767

6768
  code = initQInfo(pQueryMsg, tsdb, vgId, *pQInfo, isSTableQuery);
6769

H
hjxilinx 已提交
6770
_over:
dengyihao's avatar
dengyihao 已提交
6771 6772 6773
  free(tagCond);
  free(tbnameCond);
  free(pGroupColIndex);
dengyihao's avatar
dengyihao 已提交
6774 6775
  if (pGroupbyExpr != NULL) {
    taosArrayDestroy(pGroupbyExpr->columnInfo);
dengyihao's avatar
dengyihao 已提交
6776
    free(pGroupbyExpr);
6777
  }
dengyihao's avatar
dengyihao 已提交
6778 6779
  free(pTagColumnInfo);
  free(pExprs);
dengyihao's avatar
dengyihao 已提交
6780
  free(pExprMsg);
H
hjxilinx 已提交
6781
  taosArrayDestroy(pTableIdList);
6782

B
Bomin Zhang 已提交
6783 6784 6785 6786 6787
  for (int32_t i = 0; i < pQueryMsg->numOfCols; i++) {
    SColumnInfo* column = pQueryMsg->colList + i;
    freeColumnFilterInfo(column->filters, column->numOfFilters);
  }

H
Haojun Liao 已提交
6788
  //pQInfo already freed in initQInfo, but *pQInfo may not pointer to null;
6789 6790 6791 6792
  if (code != TSDB_CODE_SUCCESS) {
    *pQInfo = NULL;
  }

6793
  // if failed to add ref for all tables in this query, abort current query
6794
  return code;
H
hjxilinx 已提交
6795 6796
}

H
Haojun Liao 已提交
6797
void qDestroyQueryInfo(qinfo_t qHandle) {
H
Haojun Liao 已提交
6798 6799 6800 6801 6802
  SQInfo* pQInfo = (SQInfo*) qHandle;
  if (!isValidQInfo(pQInfo)) {
    return;
  }

H
Haojun Liao 已提交
6803 6804 6805
  qDebug("QInfo:%p query completed", pQInfo);
  queryCostStatis(pQInfo);   // print the query cost summary
  freeQInfo(pQInfo);
H
Haojun Liao 已提交
6806 6807
}

6808 6809 6810 6811 6812 6813 6814 6815
static bool doBuildResCheck(SQInfo* pQInfo) {
  bool buildRes = false;

  pthread_mutex_lock(&pQInfo->lock);

  pQInfo->dataReady = QUERY_RESULT_READY;
  buildRes = (pQInfo->rspContext != NULL);

H
Haojun Liao 已提交
6816 6817
  // clear qhandle owner, it must be in the secure area. other thread may run ahead before current, after it is
  // put into task to be executed.
6818 6819
  assert(pQInfo->owner == taosGetPthreadId());
  pQInfo->owner = 0;
H
Haojun Liao 已提交
6820

H
Haojun Liao 已提交
6821
  pthread_mutex_unlock(&pQInfo->lock);
H
Haojun Liao 已提交
6822 6823

  tsem_post(&pQInfo->ready);
6824 6825 6826
  return buildRes;
}

6827
bool qTableQuery(qinfo_t qinfo) {
6828
  SQInfo *pQInfo = (SQInfo *)qinfo;
H
Haojun Liao 已提交
6829
  assert(pQInfo && pQInfo->signature == pQInfo);
6830
  int64_t threadId = taosGetPthreadId();
6831

6832 6833 6834 6835
  int64_t curOwner = 0;
  if ((curOwner = atomic_val_compare_exchange_64(&pQInfo->owner, 0, threadId)) != 0) {
    qError("QInfo:%p qhandle is now executed by thread:%p", pQInfo, (void*) curOwner);
    pQInfo->code = TSDB_CODE_QRY_IN_EXEC;
6836
    return false;
H
hjxilinx 已提交
6837
  }
6838

H
Haojun Liao 已提交
6839
  if (IS_QUERY_KILLED(pQInfo)) {
6840
    qDebug("QInfo:%p it is already killed, abort", pQInfo);
6841
    return doBuildResCheck(pQInfo);
H
hjxilinx 已提交
6842
  }
6843

6844 6845
  if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
    qDebug("QInfo:%p no table exists for query, abort", pQInfo);
6846 6847
    setQueryStatus(pQInfo->runtimeEnv.pQuery, QUERY_COMPLETED);
    return doBuildResCheck(pQInfo);
6848 6849 6850
  }

  // error occurs, record the error code and return to client
H
Haojun Liao 已提交
6851
  int32_t ret = setjmp(pQInfo->runtimeEnv.env);
6852 6853
  if (ret != TSDB_CODE_SUCCESS) {
    pQInfo->code = ret;
H
Haojun Liao 已提交
6854
    qDebug("QInfo:%p query abort due to error/cancel occurs, code:%s", pQInfo, tstrerror(pQInfo->code));
6855
    return doBuildResCheck(pQInfo);
6856 6857
  }

6858
  qDebug("QInfo:%p query task is launched", pQInfo);
6859

6860
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
6861
  if (onlyQueryTags(pQInfo->runtimeEnv.pQuery)) {
H
Haojun Liao 已提交
6862
    assert(pQInfo->runtimeEnv.pQueryHandle == NULL);
6863
    buildTagQueryResult(pQInfo);
H
hjxilinx 已提交
6864
  } else if (pQInfo->runtimeEnv.stableQuery) {
6865
    stableQueryImpl(pQInfo);
H
hjxilinx 已提交
6866
  } else {
6867
    tableQueryImpl(pQInfo);
H
hjxilinx 已提交
6868
  }
6869

6870
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
6871
  if (IS_QUERY_KILLED(pQInfo)) {
6872 6873
    qDebug("QInfo:%p query is killed", pQInfo);
  } else if (pQuery->rec.rows == 0) {
S
TD-1057  
Shengliang Guan 已提交
6874
    qDebug("QInfo:%p over, %" PRIzu " tables queried, %"PRId64" rows are returned", pQInfo, pQInfo->tableqinfoGroupInfo.numOfTables, pQuery->rec.total);
6875 6876 6877 6878 6879
  } else {
    qDebug("QInfo:%p query paused, %" PRId64 " rows returned, numOfTotal:%" PRId64 " rows",
           pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
  }

6880
  return doBuildResCheck(pQInfo);
H
hjxilinx 已提交
6881 6882
}

6883
int32_t qRetrieveQueryResultInfo(qinfo_t qinfo, bool* buildRes, void* pRspContext) {
6884 6885
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
6886
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
H
Haojun Liao 已提交
6887
    qError("QInfo:%p invalid qhandle", pQInfo);
6888
    return TSDB_CODE_QRY_INVALID_QHANDLE;
H
hjxilinx 已提交
6889
  }
6890

6891
  *buildRes = false;
H
Haojun Liao 已提交
6892
  if (IS_QUERY_KILLED(pQInfo)) {
B
Bomin Zhang 已提交
6893
    qDebug("QInfo:%p query is killed, code:0x%08x", pQInfo, pQInfo->code);
H
hjxilinx 已提交
6894
    return pQInfo->code;
H
hjxilinx 已提交
6895
  }
6896

6897
  int32_t code = TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
6898 6899 6900 6901

#if 0
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

6902
  pthread_mutex_lock(&pQInfo->lock);
H
Haojun Liao 已提交
6903 6904
  assert(pQInfo->rspContext == NULL);

6905 6906 6907 6908 6909
  if (pQInfo->dataReady == QUERY_RESULT_READY) {
    *buildRes = true;
    qDebug("QInfo:%p retrieve result info, rowsize:%d, rows:%"PRId64", code:%d", pQInfo, pQuery->rowSize, pQuery->rec.rows,
           pQInfo->code);
  } else {
H
Haojun Liao 已提交
6910
    *buildRes = false;
6911
    qDebug("QInfo:%p retrieve req set query return result after paused", pQInfo);
6912
    pQInfo->rspContext = pRspContext;
H
Haojun Liao 已提交
6913
    assert(pQInfo->rspContext != NULL);
6914
  }
6915

6916
  code = pQInfo->code;
6917
  pthread_mutex_unlock(&pQInfo->lock);
H
Haojun Liao 已提交
6918 6919 6920 6921 6922 6923
#else
  tsem_wait(&pQInfo->ready);
  *buildRes = true;
  code = pQInfo->code;
#endif

6924
  return code;
H
hjxilinx 已提交
6925
}
6926

6927
int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *contLen, bool* continueExec) {
6928 6929
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
6930
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
6931
    return TSDB_CODE_QRY_INVALID_QHANDLE;
6932
  }
6933

6934
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
6935 6936
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  size_t  size = getResultSize(pQInfo, &pQuery->rec.rows);
6937

weixin_48148422's avatar
weixin_48148422 已提交
6938 6939
  size += sizeof(int32_t);
  size += sizeof(STableIdInfo) * taosArrayGetSize(pQInfo->arrTableIdInfo);
6940

S
TD-1057  
Shengliang Guan 已提交
6941
  *contLen = (int32_t)(size + sizeof(SRetrieveTableRsp));
6942

B
Bomin Zhang 已提交
6943 6944
  // todo proper handle failed to allocate memory,
  // current solution only avoid crash, but cannot return error code to client
6945
  *pRsp = (SRetrieveTableRsp *)rpcMallocCont(*contLen);
B
Bomin Zhang 已提交
6946 6947 6948
  if (*pRsp == NULL) {
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
  }
6949

S
TD-1057  
Shengliang Guan 已提交
6950
  (*pRsp)->numOfRows = htonl((int32_t)pQuery->rec.rows);
6951

H
Haojun Liao 已提交
6952
  if (pQInfo->code == TSDB_CODE_SUCCESS) {
6953
    (*pRsp)->offset   = htobe64(pQuery->limit.offset);
6954
    (*pRsp)->useconds = htobe64(pRuntimeEnv->summary.elapsedTime);
6955
  } else {
6956 6957
    (*pRsp)->offset   = 0;
    (*pRsp)->useconds = htobe64(pRuntimeEnv->summary.elapsedTime);
6958
  }
6959

6960
  (*pRsp)->precision = htons(pQuery->precision);
H
Haojun Liao 已提交
6961 6962
  if (pQuery->rec.rows > 0 && pQInfo->code == TSDB_CODE_SUCCESS) {
    doDumpQueryResult(pQInfo, (*pRsp)->data);
6963
  } else {
H
hjxilinx 已提交
6964
    setQueryStatus(pQuery, QUERY_OVER);
6965
  }
6966

6967
  pQInfo->rspContext = NULL;
6968
  pQInfo->dataReady  = QUERY_RESULT_NOT_READY;
6969

H
Haojun Liao 已提交
6970
  if (IS_QUERY_KILLED(pQInfo) || Q_STATUS_EQUAL(pQuery->status, QUERY_OVER)) {
6971
    *continueExec = false;
6972
    (*pRsp)->completed = 1;  // notify no more result to client
6973
  } else {
6974
    *continueExec = true;
6975
    qDebug("QInfo:%p has more results waits for client retrieve", pQInfo);
6976 6977
  }

H
Haojun Liao 已提交
6978
  return pQInfo->code;
6979
}
H
hjxilinx 已提交
6980

6981 6982 6983 6984 6985 6986 6987 6988 6989 6990 6991
int32_t qQueryCompleted(qinfo_t qinfo) {
  SQInfo *pQInfo = (SQInfo *)qinfo;

  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
    return TSDB_CODE_QRY_INVALID_QHANDLE;
  }

  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
  return IS_QUERY_KILLED(pQInfo) || Q_STATUS_EQUAL(pQuery->status, QUERY_OVER);
}

H
Haojun Liao 已提交
6992
int32_t qKillQuery(qinfo_t qinfo) {
H
Haojun Liao 已提交
6993 6994 6995 6996 6997 6998 6999
  SQInfo *pQInfo = (SQInfo *)qinfo;

  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
    return TSDB_CODE_QRY_INVALID_QHANDLE;
  }

  setQueryKilled(pQInfo);
7000 7001 7002

  // Wait for the query executing thread being stopped/
  // Once the query is stopped, the owner of qHandle will be cleared immediately.
7003
  while (pQInfo->owner != 0) {
7004 7005 7006
    taosMsleep(100);
  }

H
Haojun Liao 已提交
7007 7008 7009
  return TSDB_CODE_SUCCESS;
}

7010 7011 7012 7013 7014 7015 7016 7017 7018 7019 7020 7021 7022 7023 7024 7025
static void doSetTagValueToResultBuf(char* output, const char* val, int16_t type, int16_t bytes) {
  if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
    if (val == NULL) {
      setVardataNull(output, type);
    } else {
      memcpy(output, val, varDataTLen(val));
    }
  } else {
    if (val == NULL) {
      setNull(output, type, bytes);
    } else {  // todo here stop will cause client crash
      memcpy(output, val, bytes);
    }
  }
}

H
hjxilinx 已提交
7026 7027 7028
static void buildTagQueryResult(SQInfo* pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
7029

H
Haojun Liao 已提交
7030
  size_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
H
Haojun Liao 已提交
7031
  assert(numOfGroup == 0 || numOfGroup == 1);
7032

H
Haojun Liao 已提交
7033
  if (numOfGroup == 0) {
7034 7035
    return;
  }
7036

H
Haojun Liao 已提交
7037
  SArray* pa = GET_TABLEGROUP(pQInfo, 0);
7038

H
Haojun Liao 已提交
7039
  size_t num = taosArrayGetSize(pa);
7040
  assert(num == pQInfo->tableqinfoGroupInfo.numOfTables);
7041

H
Haojun Liao 已提交
7042
  int32_t count = 0;
7043 7044 7045
  int32_t functionId = pQuery->pSelectExpr[0].base.functionId;
  if (functionId == TSDB_FUNC_TID_TAG) { // return the tags & table Id
    assert(pQuery->numOfOutput == 1);
7046

7047 7048
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[0];
    int32_t rsize = pExprInfo->bytes;
H
Haojun Liao 已提交
7049
    count = 0;
7050

H
Haojun Liao 已提交
7051 7052 7053 7054 7055 7056 7057 7058 7059 7060 7061
    int16_t bytes = pExprInfo->bytes;
    int16_t type = pExprInfo->type;

    for(int32_t i = 0; i < pQuery->numOfTags; ++i) {
      if (pQuery->tagColList[i].colId == pExprInfo->base.colInfo.colId) {
        bytes = pQuery->tagColList[i].bytes;
        type = pQuery->tagColList[i].type;
        break;
      }
    }

H
Haojun Liao 已提交
7062 7063
    while(pQInfo->tableIndex < num && count < pQuery->rec.capacity) {
      int32_t i = pQInfo->tableIndex++;
7064
      STableQueryInfo *item = taosArrayGetP(pa, i);
7065

7066
      char *output = pQuery->sdata[0]->data + count * rsize;
7067
      varDataSetLen(output, rsize - VARSTR_HEADER_SIZE);
7068

7069
      output = varDataVal(output);
H
Haojun Liao 已提交
7070
      STableId* id = TSDB_TABLEID(item->pTable);
7071

7072 7073 7074
      *(int16_t *)output = 0;
      output += sizeof(int16_t);

H
Haojun Liao 已提交
7075 7076
      *(int64_t *)output = id->uid;  // memory align problem, todo serialize
      output += sizeof(id->uid);
7077

H
Haojun Liao 已提交
7078 7079
      *(int32_t *)output = id->tid;
      output += sizeof(id->tid);
7080

7081
      *(int32_t *)output = pQInfo->vgId;
7082
      output += sizeof(pQInfo->vgId);
7083

7084
      if (pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
7085
        char* data = tsdbGetTableName(item->pTable);
7086
        memcpy(output, data, varDataTLen(data));
H
[td-90]  
Haojun Liao 已提交
7087
      } else {
7088 7089
        char* data = tsdbGetTableTagVal(item->pTable, pExprInfo->base.colInfo.colId, type, bytes);
        doSetTagValueToResultBuf(output, data, type, bytes);
H
[td-90]  
Haojun Liao 已提交
7090
      }
7091

H
Haojun Liao 已提交
7092
      count += 1;
7093
    }
7094

7095
    qDebug("QInfo:%p create (tableId, tag) info completed, rows:%d", pQInfo, count);
7096

H
Haojun Liao 已提交
7097 7098 7099 7100
  } else if (functionId == TSDB_FUNC_COUNT) {// handle the "count(tbname)" query
    *(int64_t*) pQuery->sdata[0]->data = num;

    count = 1;
H
Haojun Liao 已提交
7101
    SET_STABLE_QUERY_OVER(pQInfo);
7102
    qDebug("QInfo:%p create count(tbname) query, res:%d rows:1", pQInfo, count);
7103
  } else {  // return only the tags|table name etc.
H
Haojun Liao 已提交
7104
    count = 0;
H
Haojun Liao 已提交
7105
    SSchema tbnameSchema = tGetTableNameColumnSchema();
7106

S
TD-1057  
Shengliang Guan 已提交
7107
    int32_t maxNumOfTables = (int32_t)pQuery->rec.capacity;
7108
    if (pQuery->limit.limit >= 0 && pQuery->limit.limit < pQuery->rec.capacity) {
S
TD-1057  
Shengliang Guan 已提交
7109
      maxNumOfTables = (int32_t)pQuery->limit.limit;
7110 7111
    }

7112
    while(pQInfo->tableIndex < num && count < maxNumOfTables) {
H
Haojun Liao 已提交
7113
      int32_t i = pQInfo->tableIndex++;
7114

7115 7116 7117 7118 7119 7120
      // discard current result due to offset
      if (pQuery->limit.offset > 0) {
        pQuery->limit.offset -= 1;
        continue;
      }

7121
      SExprInfo* pExprInfo = pQuery->pSelectExpr;
7122
      STableQueryInfo* item = taosArrayGetP(pa, i);
7123

7124 7125
      char *data = NULL, *dst = NULL;
      int16_t type = 0, bytes = 0;
7126
      for(int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
7127
        // not assign value in case of user defined constant output column
Y
TD-1230  
yihaoDeng 已提交
7128
        if (TSDB_COL_IS_UD_COL(pExprInfo[j].base.colInfo.flag)) {
H
Haojun Liao 已提交
7129 7130
          continue;
        }
7131

7132
        if (pExprInfo[j].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
7133 7134 7135 7136 7137 7138 7139 7140
          bytes = tbnameSchema.bytes;
          type = tbnameSchema.type;

          data = tsdbGetTableName(item->pTable);
          dst = pQuery->sdata[j]->data + count * tbnameSchema.bytes;
        } else {
          type = pExprInfo[j].type;
          bytes = pExprInfo[j].bytes;
7141

7142 7143
          data = tsdbGetTableTagVal(item->pTable, pExprInfo[j].base.colInfo.colId, type, bytes);
          dst = pQuery->sdata[j]->data + count * pExprInfo[j].bytes;
7144

7145
        }
7146 7147

        doSetTagValueToResultBuf(dst, data, type, bytes);
H
hjxilinx 已提交
7148
      }
H
Haojun Liao 已提交
7149
      count += 1;
H
hjxilinx 已提交
7150
    }
7151

7152
    qDebug("QInfo:%p create tag values results completed, rows:%d", pQInfo, count);
H
hjxilinx 已提交
7153
  }
7154

H
Haojun Liao 已提交
7155
  pQuery->rec.rows = count;
H
hjxilinx 已提交
7156
  setQueryStatus(pQuery, QUERY_COMPLETED);
H
hjxilinx 已提交
7157 7158
}

H
Haojun Liao 已提交
7159
static int64_t getQuerySupportBufSize(size_t numOfTables) {
H
Haojun Liao 已提交
7160 7161 7162 7163
  size_t s1 = sizeof(STableQueryInfo);
  size_t s2 = sizeof(SHashNode);

//  size_t s3 = sizeof(STableCheckInfo);  buffer consumption in tsdb
H
Haojun Liao 已提交
7164
  return (int64_t)((s1 + s2) * 1.5 * numOfTables);
H
Haojun Liao 已提交
7165 7166
}

H
Haojun Liao 已提交
7167
int32_t checkForQueryBuf(size_t numOfTables) {
H
Haojun Liao 已提交
7168 7169
  int64_t t = getQuerySupportBufSize(numOfTables);
  if (tsQueryBufferSize < 0) {
H
Haojun Liao 已提交
7170
    return TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
7171 7172 7173 7174 7175 7176 7177 7178 7179 7180 7181 7182 7183 7184 7185 7186 7187 7188 7189
  } else if (tsQueryBufferSize > 0) {

    while(1) {
      int64_t s = tsQueryBufferSize;
      int64_t remain = s - t;
      if (remain >= 0) {
        if (atomic_val_compare_exchange_64(&tsQueryBufferSize, s, remain) == s) {
          return TSDB_CODE_SUCCESS;
        }
      } else {
        return TSDB_CODE_QRY_NOT_ENOUGH_BUFFER;
      }
    }
  }

  // disable query processing if the value of tsQueryBufferSize is zero.
  return TSDB_CODE_QRY_NOT_ENOUGH_BUFFER;
}

H
Haojun Liao 已提交
7190
void releaseQueryBuf(size_t numOfTables) {
H
Haojun Liao 已提交
7191 7192 7193 7194 7195 7196 7197 7198 7199 7200
  if (tsQueryBufferSize <= 0) {
    return;
  }

  int64_t t = getQuerySupportBufSize(numOfTables);

  // restore value is not enough buffer available
  atomic_add_fetch_64(&tsQueryBufferSize, t);
}

7201 7202 7203 7204 7205 7206 7207
void* qGetResultRetrieveMsg(qinfo_t qinfo) {
  SQInfo* pQInfo = (SQInfo*) qinfo;
  assert(pQInfo != NULL);

  return pQInfo->rspContext;
}

7208 7209 7210 7211 7212 7213 7214
void freeqinfoFn(void *qhandle) {
  void** handle = qhandle;
  if (handle == NULL || *handle == NULL) {
    return;
  }

  qKillQuery(*handle);
H
Haojun Liao 已提交
7215
  qDestroyQueryInfo(*handle);
7216 7217 7218
}

void* qOpenQueryMgmt(int32_t vgId) {
H
Haojun Liao 已提交
7219
  const int32_t REFRESH_HANDLE_INTERVAL = 30; // every 30 seconds, refresh handle pool
7220 7221 7222 7223

  char cacheName[128] = {0};
  sprintf(cacheName, "qhandle_%d", vgId);

7224
  SQueryMgmt* pQueryMgmt = calloc(1, sizeof(SQueryMgmt));
H
Haojun Liao 已提交
7225 7226 7227 7228
  if (pQueryMgmt == NULL) {
    terrno = TSDB_CODE_QRY_OUT_OF_MEMORY;
    return NULL;
  }
7229

S
TD-1530  
Shengliang Guan 已提交
7230
  pQueryMgmt->qinfoPool = taosCacheInit(TSDB_CACHE_PTR_KEY, REFRESH_HANDLE_INTERVAL, true, freeqinfoFn, cacheName);
7231 7232 7233 7234
  pQueryMgmt->closed    = false;
  pQueryMgmt->vgId      = vgId;

  pthread_mutex_init(&pQueryMgmt->lock, NULL);
7235 7236

  qDebug("vgId:%d, open querymgmt success", vgId);
7237
  return pQueryMgmt;
7238 7239
}

H
Haojun Liao 已提交
7240
static void queryMgmtKillQueryFn(void* handle) {
H
Hui Li 已提交
7241 7242
  void** fp = (void**)handle;
  qKillQuery(*fp);
H
Haojun Liao 已提交
7243 7244 7245
}

void qQueryMgmtNotifyClosed(void* pQMgmt) {
7246 7247 7248 7249 7250 7251 7252
  if (pQMgmt == NULL) {
    return;
  }

  SQueryMgmt* pQueryMgmt = pQMgmt;
  qDebug("vgId:%d, set querymgmt closed, wait for all queries cancelled", pQueryMgmt->vgId);

H
Haojun Liao 已提交
7253
//  pthread_mutex_lock(&pQueryMgmt->lock);
7254
  pQueryMgmt->closed = true;
H
Haojun Liao 已提交
7255
//  pthread_mutex_unlock(&pQueryMgmt->lock);
7256

H
Haojun Liao 已提交
7257
  taosCacheRefresh(pQueryMgmt->qinfoPool, queryMgmtKillQueryFn);
7258 7259 7260 7261 7262 7263 7264 7265 7266 7267 7268 7269 7270 7271 7272 7273 7274
}

void qCleanupQueryMgmt(void* pQMgmt) {
  if (pQMgmt == NULL) {
    return;
  }

  SQueryMgmt* pQueryMgmt = pQMgmt;
  int32_t vgId = pQueryMgmt->vgId;

  assert(pQueryMgmt->closed);

  SCacheObj* pqinfoPool = pQueryMgmt->qinfoPool;
  pQueryMgmt->qinfoPool = NULL;

  taosCacheCleanup(pqinfoPool);
  pthread_mutex_destroy(&pQueryMgmt->lock);
S
Shengliang Guan 已提交
7275
  taosTFree(pQueryMgmt);
7276

7277
  qDebug("vgId:%d queryMgmt cleanup completed", vgId);
7278 7279
}

7280
void** qRegisterQInfo(void* pMgmt, uint64_t qInfo) {
7281
  if (pMgmt == NULL) {
7282
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
7283 7284 7285
    return NULL;
  }

7286
  const int32_t DEFAULT_QHANDLE_LIFE_SPAN = tsShellActivityTimer * 2 * 1000;
7287

7288 7289
  SQueryMgmt *pQueryMgmt = pMgmt;
  if (pQueryMgmt->qinfoPool == NULL) {
7290
    qError("QInfo:%p failed to add qhandle into qMgmt, since qMgmt is closed", (void *)qInfo);
7291
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
7292 7293 7294
    return NULL;
  }

H
Haojun Liao 已提交
7295
//  pthread_mutex_lock(&pQueryMgmt->lock);
7296
  if (pQueryMgmt->closed) {
H
Haojun Liao 已提交
7297
//    pthread_mutex_unlock(&pQueryMgmt->lock);
7298
    qError("QInfo:%p failed to add qhandle into cache, since qMgmt is colsing", (void *)qInfo);
7299
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
7300 7301
    return NULL;
  } else {
S
TD-1530  
Shengliang Guan 已提交
7302 7303
    TSDB_CACHE_PTR_TYPE handleVal = (TSDB_CACHE_PTR_TYPE) qInfo;
    void** handle = taosCachePut(pQueryMgmt->qinfoPool, &handleVal, sizeof(TSDB_CACHE_PTR_TYPE), &qInfo, sizeof(TSDB_CACHE_PTR_TYPE), DEFAULT_QHANDLE_LIFE_SPAN);
H
Haojun Liao 已提交
7304
//    pthread_mutex_unlock(&pQueryMgmt->lock);
7305 7306 7307 7308 7309

    return handle;
  }
}

S
TD-1530  
Shengliang Guan 已提交
7310
void** qAcquireQInfo(void* pMgmt, uint64_t _key) {
7311 7312
  SQueryMgmt *pQueryMgmt = pMgmt;

B
Bomin Zhang 已提交
7313 7314 7315 7316 7317 7318 7319
  if (pQueryMgmt->closed) {
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
    return NULL;
  }

  if (pQueryMgmt->qinfoPool == NULL) {
    terrno = TSDB_CODE_QRY_INVALID_QHANDLE;
7320 7321 7322
    return NULL;
  }

S
TD-1530  
Shengliang Guan 已提交
7323 7324
  TSDB_CACHE_PTR_TYPE key = (TSDB_CACHE_PTR_TYPE)_key;
  void** handle = taosCacheAcquireByKey(pQueryMgmt->qinfoPool, &key, sizeof(TSDB_CACHE_PTR_TYPE));
7325
  if (handle == NULL || *handle == NULL) {
B
Bomin Zhang 已提交
7326
    terrno = TSDB_CODE_QRY_INVALID_QHANDLE;
7327 7328 7329 7330 7331 7332
    return NULL;
  } else {
    return handle;
  }
}

H
Haojun Liao 已提交
7333
void** qReleaseQInfo(void* pMgmt, void* pQInfo, bool freeHandle) {
7334 7335 7336 7337 7338
  SQueryMgmt *pQueryMgmt = pMgmt;
  if (pQueryMgmt->qinfoPool == NULL) {
    return NULL;
  }

H
Haojun Liao 已提交
7339
  taosCacheRelease(pQueryMgmt->qinfoPool, pQInfo, freeHandle);
7340 7341 7342
  return 0;
}

7343