qExecutor.c 245.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
#include "os.h"
H
Haojun Liao 已提交
16 17
#include "qFill.h"
#include "taosmsg.h"
18 19
#include "tcache.h"
#include "tglobal.h"
20

H
Haojun Liao 已提交
21
#include "exception.h"
22
#include "hash.h"
H
Haojun Liao 已提交
23 24 25 26
#include "qAst.h"
#include "qExecutor.h"
#include "qResultbuf.h"
#include "qUtil.h"
H
hjxilinx 已提交
27
#include "query.h"
S
slguan 已提交
28
#include "queryLog.h"
29
#include "tlosertree.h"
30

H
Haojun Liao 已提交
31
#define MAX_ROWS_PER_RESBUF_PAGE  ((1u<<12) - 1)
32 33 34 35 36

/**
 * check if the primary column is load by default, otherwise, the program will
 * forced to load primary column explicitly.
 */
37
#define Q_STATUS_EQUAL(p, s)  (((p) & (s)) != 0)
38 39
#define QUERY_IS_ASC_QUERY(q) (GET_FORWARD_DIRECTION_FACTOR((q)->order.order) == QUERY_ASC_FORWARD_STEP)

40
#define IS_MASTER_SCAN(runtime)        ((runtime)->scanFlag == MASTER_SCAN)
H
hjxilinx 已提交
41
#define IS_REVERSE_SCAN(runtime)       ((runtime)->scanFlag == REVERSE_SCAN)
42
#define SET_MASTER_SCAN_FLAG(runtime)  ((runtime)->scanFlag = MASTER_SCAN)
H
hjxilinx 已提交
43
#define SET_REVERSE_SCAN_FLAG(runtime) ((runtime)->scanFlag = REVERSE_SCAN)
44

H
Haojun Liao 已提交
45
#define GET_QINFO_ADDR(x) ((SQInfo *)((char *)(x)-offsetof(SQInfo, runtimeEnv)))
46

47
#define GET_COL_DATA_POS(query, index, step) ((query)->pos + (index) * (step))
48
#define SWITCH_ORDER(n) (((n) = ((n) == TSDB_ORDER_ASC) ? TSDB_ORDER_DESC : TSDB_ORDER_ASC))
49

H
Haojun Liao 已提交
50 51
#define SDATA_BLOCK_INITIALIZER (SDataBlockInfo) {{0}, 0}

S
TD-1057  
Shengliang Guan 已提交
52 53 54 55 56
#define TIME_WINDOW_COPY(_dst, _src)  do {\
   _dst.skey = _src.skey;\
   _dst.ekey = _src.ekey;\
} while (0);

57
enum {
H
hjxilinx 已提交
58
  // when query starts to execute, this status will set
59 60
  QUERY_NOT_COMPLETED = 0x1u,

H
hjxilinx 已提交
61 62
  /* result output buffer is full, current query is paused.
   * this status is only exist in group-by clause and diff/add/division/multiply/ query.
63
   */
64 65
  QUERY_RESBUF_FULL = 0x2u,

H
hjxilinx 已提交
66 67 68
  /* query is over
   * 1. this status is used in one row result query process, e.g., count/sum/first/last/ avg...etc.
   * 2. when all data within queried time window, it is also denoted as query_completed
69
   */
70
  QUERY_COMPLETED = 0x4u,
71

H
hjxilinx 已提交
72 73
  /* when the result is not completed return to client, this status will be
   * usually used in case of interval query with interpolation option
74
   */
75
  QUERY_OVER = 0x8u,
76
};
77 78

enum {
79 80
  TS_JOIN_TS_EQUAL       = 0,
  TS_JOIN_TS_NOT_EQUALS  = 1,
81 82 83
  TS_JOIN_TAG_NOT_EQUALS = 2,
};

84
typedef struct {
85 86 87 88 89 90
  int32_t     status;       // query status
  TSKEY       lastKey;      // the lastKey value before query executed
  STimeWindow w;            // whole query time window
  STimeWindow curWindow;    // current query window
  int32_t     windowIndex;  // index of active time window result for interval query
  STSCursor   cur;
91 92
} SQueryStatusInfo;

H
Haojun Liao 已提交
93
#if 0
H
Haojun Liao 已提交
94
static UNUSED_FUNC void *u_malloc (size_t __size) {
H
Haojun Liao 已提交
95
  uint32_t v = rand();
H
Haojun Liao 已提交
96 97

  if (v % 1000 <= 0) {
H
Haojun Liao 已提交
98 99
    return NULL;
  } else {
H
Haojun Liao 已提交
100
    return malloc(__size);
H
Haojun Liao 已提交
101
  }
H
Haojun Liao 已提交
102 103
}

H
Haojun Liao 已提交
104 105
static UNUSED_FUNC void* u_calloc(size_t num, size_t __size) {
  uint32_t v = rand();
H
Haojun Liao 已提交
106
  if (v % 1000 <= 0) {
H
Haojun Liao 已提交
107 108 109 110 111 112
    return NULL;
  } else {
    return calloc(num, __size);
  }
}

H
Haojun Liao 已提交
113 114 115 116 117 118 119 120 121
static UNUSED_FUNC void* u_realloc(void* p, size_t __size) {
  uint32_t v = rand();
  if (v % 5 <= 1) {
    return NULL;
  } else {
    return realloc(p, __size);
  }
}

H
Haojun Liao 已提交
122
#define calloc  u_calloc
H
Haojun Liao 已提交
123
#define malloc  u_malloc
H
Haojun Liao 已提交
124
#define realloc u_realloc
H
Haojun Liao 已提交
125
#endif
H
Haojun Liao 已提交
126

127
#define CLEAR_QUERY_STATUS(q, st)   ((q)->status &= (~(st)))
H
Haojun Liao 已提交
128 129 130
#define GET_NUM_OF_TABLEGROUP(q)    taosArrayGetSize((q)->tableqinfoGroupInfo.pGroupList)
#define GET_TABLEGROUP(q, _index)   ((SArray*) taosArrayGetP((q)->tableqinfoGroupInfo.pGroupList, (_index)))

131
static void setQueryStatus(SQuery *pQuery, int8_t status);
H
Haojun Liao 已提交
132
static void finalizeQueryResult(SQueryRuntimeEnv *pRuntimeEnv);
133

134
#define QUERY_IS_INTERVAL_QUERY(_q) ((_q)->interval.interval > 0)
135

136 137
static void getNextTimeWindow(SQuery* pQuery, STimeWindow* tw) {
  int32_t factor = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
138 139 140
  if (pQuery->interval.intervalUnit != 'n' && pQuery->interval.intervalUnit != 'y') {
    tw->skey += pQuery->interval.sliding * factor;
    tw->ekey = tw->skey + pQuery->interval.interval - 1;
141 142 143
    return;
  }

144
  int64_t key = tw->skey / 1000, interval = pQuery->interval.interval;
145 146 147
  if (pQuery->precision == TSDB_TIME_PRECISION_MICRO) {
    key /= 1000;
  }
148
  if (pQuery->interval.intervalUnit == 'y') {
149 150
    interval *= 12;
  }
151 152 153 154 155

  struct tm tm;
  time_t t = (time_t)key;
  localtime_r(&t, &tm);

S
Shengliang Guan 已提交
156
  int mon = (int)(tm.tm_year * 12 + tm.tm_mon + interval * factor);
157 158 159 160
  tm.tm_year = mon / 12;
  tm.tm_mon = mon % 12;
  tw->skey = mktime(&tm) * 1000L;

S
Shengliang Guan 已提交
161
  mon = (int)(mon + interval);
162 163 164 165 166 167 168 169 170 171 172 173
  tm.tm_year = mon / 12;
  tm.tm_mon = mon % 12;
  tw->ekey = mktime(&tm) * 1000L;

  if (pQuery->precision == TSDB_TIME_PRECISION_MICRO) {
    tw->skey *= 1000L;
    tw->ekey *= 1000L;
  }
  tw->ekey -= 1;
}

#define GET_NEXT_TIMEWINDOW(_q, tw) getNextTimeWindow((_q), (tw))
H
Haojun Liao 已提交
174

175 176
#define SET_STABLE_QUERY_OVER(_q) ((_q)->tableIndex = (int32_t)((_q)->tableqinfoGroupInfo.numOfTables))
#define IS_STASBLE_QUERY_OVER(_q) ((_q)->tableIndex >= (int32_t)((_q)->tableqinfoGroupInfo.numOfTables))
H
Haojun Liao 已提交
177

H
hjxilinx 已提交
178
// todo move to utility
179
static int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *group);
180

H
hjxilinx 已提交
181
static void setWindowResOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult);
H
Haojun Liao 已提交
182
static void setWindowResOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult);
183 184
static void resetMergeResultBuf(SQuery *pQuery, SQLFunctionCtx *pCtx, SResultInfo *pResultInfo);
static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId);
185

186
static void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
H
Haojun Liao 已提交
187
                          SDataStatis *pStatis, void *param, int32_t colIndex, int32_t vgId);
188

189
static void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
H
Haojun Liao 已提交
190
static void destroyTableQueryInfoImpl(STableQueryInfo *pTableQueryInfo);
191 192
static void resetCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
static bool hasMainOutput(SQuery *pQuery);
H
hjxilinx 已提交
193
static void buildTagQueryResult(SQInfo *pQInfo);
194

195
static int32_t setAdditionalInfo(SQInfo *pQInfo, void *pTable, STableQueryInfo *pTableQueryInfo);
H
Haojun Liao 已提交
196
static int32_t flushFromResultBuf(SQueryRuntimeEnv* pRuntimeEnv, SGroupResInfo* pGroupResInfo);
H
Haojun Liao 已提交
197 198
static int32_t checkForQueryBuf(size_t numOfTables);
static void releaseQueryBuf(size_t numOfTables);
199

200
bool doFilterData(SQuery *pQuery, int32_t elemPos) {
201 202
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
203

S
TD-1057  
Shengliang Guan 已提交
204
    char *pElem = (char*)pFilterInfo->pData + pFilterInfo->info.bytes * elemPos;
205

206 207
    bool qualified = false;
    for (int32_t j = 0; j < pFilterInfo->numOfFilters; ++j) {
208
      SColumnFilterElem *pFilterElem = &pFilterInfo->pFilters[j];
209

H
Haojun Liao 已提交
210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226
      bool isnull = isNull(pElem, pFilterInfo->info.type);
      if (isnull) {
        if (pFilterElem->fp == isNull_filter) {
          qualified = true;
          break;
        } else {
          continue;
        }
      } else {
        if (pFilterElem->fp == notNull_filter) {
          qualified = true;
          break;
        } else if (pFilterElem->fp == isNull_filter) {
          continue;
        }
      }

227 228 229 230 231
      if (pFilterElem->fp(pFilterElem, pElem, pElem)) {
        qualified = true;
        break;
      }
    }
232

233 234 235 236
    if (!qualified) {
      return false;
    }
  }
237

238 239 240 241 242 243
  return true;
}

int64_t getNumOfResult(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  bool    hasMainFunction = hasMainOutput(pQuery);
244

245
  int64_t maxOutput = 0;
246
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
247
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
248

249 250 251 252 253 254 255 256
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (hasMainFunction &&
        (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ)) {
      continue;
    }
257

258 259 260 261 262
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
    if (pResInfo != NULL && maxOutput < pResInfo->numOfRes) {
      maxOutput = pResInfo->numOfRes;
    }
  }
263

264
  assert(maxOutput >= 0);
265 266 267
  return maxOutput;
}

268 269 270 271 272
/*
 * the value of number of result needs to be update due to offset value upated.
 */
void updateNumOfResult(SQueryRuntimeEnv *pRuntimeEnv, int32_t numOfRes) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
273

274 275
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
276

H
Haojun Liao 已提交
277 278 279 280 281
    int16_t functionId = pRuntimeEnv->pCtx[j].functionId;
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ ||
        functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
282

H
Haojun Liao 已提交
283
    assert(pResInfo->numOfRes > numOfRes);
284 285 286 287
    pResInfo->numOfRes = numOfRes;
  }
}

H
Haojun Liao 已提交
288
static UNUSED_FUNC int32_t getGroupResultId(int32_t groupIndex) {
H
Haojun Liao 已提交
289
  int32_t base = 20000000;
290 291 292 293 294 295 296
  return base + (groupIndex * 10000);
}

bool isGroupbyNormalCol(SSqlGroupbyExpr *pGroupbyExpr) {
  if (pGroupbyExpr == NULL || pGroupbyExpr->numOfGroupCols == 0) {
    return false;
  }
297

298
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
299
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
Y
TD-1230  
yihaoDeng 已提交
300
    if (TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
301
      //make sure the normal column locates at the second position if tbname exists in group by clause
302
      if (pGroupbyExpr->numOfGroupCols > 1) {
303
        assert(pColIndex->colIndex > 0);
304
      }
305

306 307 308
      return true;
    }
  }
309

310 311 312 313 314
  return false;
}

int16_t getGroupbyColumnType(SQuery *pQuery, SSqlGroupbyExpr *pGroupbyExpr) {
  assert(pGroupbyExpr != NULL);
315

316 317
  int32_t colId = -2;
  int16_t type = TSDB_DATA_TYPE_NULL;
318

319
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
320
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
Y
TD-1230  
yihaoDeng 已提交
321
    if (TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
322 323 324 325
      colId = pColIndex->colId;
      break;
    }
  }
326

327
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
328 329
    if (colId == pQuery->colList[i].colId) {
      type = pQuery->colList[i].type;
330 331 332
      break;
    }
  }
333

334 335 336 337 338 339
  return type;
}

bool isSelectivityWithTagsQuery(SQuery *pQuery) {
  bool    hasTags = false;
  int32_t numOfSelectivity = 0;
340

341
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
342
    int32_t functId = pQuery->pSelectExpr[i].base.functionId;
343 344 345 346
    if (functId == TSDB_FUNC_TAG_DUMMY || functId == TSDB_FUNC_TS_DUMMY) {
      hasTags = true;
      continue;
    }
347

348 349 350 351
    if ((aAggs[functId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
      numOfSelectivity++;
    }
  }
352

353 354 355
  if (numOfSelectivity > 0 && hasTags) {
    return true;
  }
356

357 358 359
  return false;
}

360 361 362 363 364 365 366 367 368 369 370
bool isProjQuery(SQuery *pQuery) {
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    int32_t functId = pQuery->pSelectExpr[i].base.functionId;
    if (functId != TSDB_FUNC_PRJ && functId != TSDB_FUNC_TAGPRJ) {
      return false;
    }
  }

  return true;
}

371
bool isTSCompQuery(SQuery *pQuery) { return pQuery->pSelectExpr[0].base.functionId == TSDB_FUNC_TS_COMP; }
372

373 374 375
static bool limitResults(SQueryRuntimeEnv* pRuntimeEnv) {
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery* pQuery = pRuntimeEnv->pQuery;
376

377 378
  if ((pQuery->limit.limit > 0) && (pQuery->rec.total + pQuery->rec.rows > pQuery->limit.limit)) {
    pQuery->rec.rows = pQuery->limit.limit - pQuery->rec.total;
379

380
    qDebug("QInfo:%p discard remain data due to result limitation, limit:%"PRId64", current return:%" PRId64 ", total:%"PRId64,
381 382
        pQInfo, pQuery->limit.limit, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
    assert(pQuery->rec.rows >= 0);
383 384 385
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return true;
  }
386

387 388 389 390
  return false;
}

static bool isTopBottomQuery(SQuery *pQuery) {
391
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
392
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
393 394 395
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
396

397 398 399 400
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      return true;
    }
  }
401

402 403 404
  return false;
}

H
Haojun Liao 已提交
405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422
static bool hasTagValOutput(SQuery* pQuery) {
  SExprInfo *pExprInfo = &pQuery->pSelectExpr[0];
  if (pQuery->numOfOutput == 1 && pExprInfo->base.functionId == TSDB_FUNC_TS_COMP) {
    return true;
  } else {  // set tag value, by which the results are aggregated.
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
      SExprInfo *pLocalExprInfo = &pQuery->pSelectExpr[idx];

      // ts_comp column required the tag value for join filter
      if (TSDB_COL_IS_TAG(pLocalExprInfo->base.colInfo.flag)) {
        return true;
      }
    }
  }

  return false;
}

423 424 425 426 427 428 429 430
/**
 * @param pQuery
 * @param col
 * @param pDataBlockInfo
 * @param pStatis
 * @param pColStatis
 * @return
 */
H
Haojun Liao 已提交
431
static bool hasNullValue(SColIndex* pColIndex, SDataStatis *pStatis, SDataStatis **pColStatis) {
H
Haojun Liao 已提交
432
  if (pStatis != NULL && TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
H
Haojun Liao 已提交
433 434
    *pColStatis = &pStatis[pColIndex->colIndex];
    assert((*pColStatis)->colId == pColIndex->colId);
H
hjxilinx 已提交
435 436
  } else {
    *pColStatis = NULL;
437
  }
438

H
Haojun Liao 已提交
439
  if (TSDB_COL_IS_TAG(pColIndex->flag) || TSDB_COL_IS_UD_COL(pColIndex->flag) || pColIndex->colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
H
Haojun Liao 已提交
440 441 442
    return false;
  }

443 444 445
  if ((*pColStatis) != NULL && (*pColStatis)->numOfNull == 0) {
    return false;
  }
446

447 448 449 450
  return true;
}

static SWindowResult *doSetTimeWindowFromKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, char *pData,
451
                                             int16_t bytes, bool masterscan) {
452
  SQuery *pQuery = pRuntimeEnv->pQuery;
453

454
  int32_t *p1 = (int32_t *) taosHashGet(pWindowResInfo->hashList, pData, bytes);
455 456
  if (p1 != NULL) {
    pWindowResInfo->curIndex = *p1;
457
  } else {
H
Haojun Liao 已提交
458 459 460
    if (!masterscan) {  // not master scan, do not add new timewindow
      return NULL;
    }
461

H
Haojun Liao 已提交
462 463
    // more than the capacity, reallocate the resources
    if (pWindowResInfo->size >= pWindowResInfo->capacity) {
464 465
      int64_t newCap = 0;
      if (pWindowResInfo->capacity > 10000) {
S
Shengliang Guan 已提交
466
        newCap = (int64_t)(pWindowResInfo->capacity * 1.25);
467
      } else {
S
Shengliang Guan 已提交
468
        newCap = (int64_t)(pWindowResInfo->capacity * 1.5);
469 470
      }

471
      char *t = realloc(pWindowResInfo->pResult, (size_t)(newCap * sizeof(SWindowResult)));
472
      pRuntimeEnv->summary.internalSupSize += (newCap - pWindowResInfo->capacity) * sizeof(SWindowResult);
473
      pRuntimeEnv->summary.numOfTimeWindows += (newCap - pWindowResInfo->capacity);
474

475 476
      if (t == NULL) {
        longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
477
      }
478

479 480
      pWindowResInfo->pResult = (SWindowResult *)t;

S
TD-1057  
Shengliang Guan 已提交
481
      int32_t inc = (int32_t)newCap - pWindowResInfo->capacity;
482 483
      memset(&pWindowResInfo->pResult[pWindowResInfo->capacity], 0, sizeof(SWindowResult) * inc);

484 485
      pRuntimeEnv->summary.internalSupSize += (pQuery->numOfOutput * sizeof(SResultInfo) + pRuntimeEnv->interBufSize) * inc;

H
Haojun Liao 已提交
486
      for (int32_t i = pWindowResInfo->capacity; i < newCap; ++i) {
H
Haojun Liao 已提交
487 488 489 490
        int32_t ret = createQueryResultInfo(pQuery, &pWindowResInfo->pResult[i], pRuntimeEnv->stableQuery, pRuntimeEnv->interBufSize);
        if (ret != TSDB_CODE_SUCCESS) {
          longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
        }
H
Haojun Liao 已提交
491 492
      }

S
TD-1057  
Shengliang Guan 已提交
493
      pWindowResInfo->capacity = (int32_t)newCap;
494
    }
H
Haojun Liao 已提交
495 496 497 498

    // add a new result set for a new group
    pWindowResInfo->curIndex = pWindowResInfo->size++;
    taosHashPut(pWindowResInfo->hashList, pData, bytes, (char *)&pWindowResInfo->curIndex, sizeof(int32_t));
499
  }
500

501 502 503 504 505
  // too many time window in query
  if (pWindowResInfo->size > MAX_INTERVAL_TIME_WINDOW) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_TOO_MANY_TIMEWINDOW);
  }

506 507 508 509 510 511
  return getWindowResult(pWindowResInfo, pWindowResInfo->curIndex);
}

// get the correct time window according to the handled timestamp
static STimeWindow getActiveTimeWindow(SWindowResInfo *pWindowResInfo, int64_t ts, SQuery *pQuery) {
  STimeWindow w = {0};
512

513
 if (pWindowResInfo->curIndex == -1) {  // the first window, from the previous stored value
514
    w.skey = pWindowResInfo->prevSKey;
515 516
    if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
      w.ekey = taosTimeAdd(w.skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision);
517
    } else {
518
      w.ekey = w.skey + pQuery->interval.interval - 1;
519
    }
520
  } else {
521 522
    int32_t slot = curTimeWindowIndex(pWindowResInfo);
    SWindowResult* pWindowRes = getWindowResult(pWindowResInfo, slot);
523
    w = pWindowRes->win;
524
  }
525

526
  if (w.skey > ts || w.ekey < ts) {
527 528 529
    if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
      w.skey = taosTimeTruncate(ts, &pQuery->interval, pQuery->precision);
      w.ekey = taosTimeAdd(w.skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
530 531
    } else {
      int64_t st = w.skey;
532

533
      if (st > ts) {
534
        st -= ((st - ts + pQuery->interval.sliding - 1) / pQuery->interval.sliding) * pQuery->interval.sliding;
535
      }
536

537
      int64_t et = st + pQuery->interval.interval - 1;
538
      if (et < ts) {
539
        st += ((ts - et + pQuery->interval.sliding - 1) / pQuery->interval.sliding) * pQuery->interval.sliding;
540
      }
541

542
      w.skey = st;
543
      w.ekey = w.skey + pQuery->interval.interval - 1;
544
    }
545
  }
546

547 548 549 550 551 552 553
  /*
   * query border check, skey should not be bounded by the query time range, since the value skey will
   * be used as the time window index value. So we only change ekey of time window accordingly.
   */
  if (w.ekey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) {
    w.ekey = pQuery->window.ekey;
  }
554

555 556 557 558 559 560 561 562
  return w;
}

static int32_t addNewWindowResultBuf(SWindowResult *pWindowRes, SDiskbasedResultBuf *pResultBuf, int32_t sid,
                                     int32_t numOfRowsPerPage) {
  if (pWindowRes->pos.pageId != -1) {
    return 0;
  }
563

564
  tFilePage *pData = NULL;
565

566 567 568
  // in the first scan, new space needed for results
  int32_t pageId = -1;
  SIDList list = getDataBufPagesIdList(pResultBuf, sid);
569

H
Haojun Liao 已提交
570
  if (taosArrayGetSize(list) == 0) {
571 572
    pData = getNewDataBuf(pResultBuf, sid, &pageId);
  } else {
H
Haojun Liao 已提交
573 574 575
    SPageInfo* pi = getLastPageInfo(list);
    pData = getResBufPage(pResultBuf, pi->pageId);
    pageId = pi->pageId;
576

577
    if (pData->num >= numOfRowsPerPage) {
H
Haojun Liao 已提交
578 579 580
      // release current page first, and prepare the next one
      releaseResBufPageInfo(pResultBuf, pi);

581 582
      pData = getNewDataBuf(pResultBuf, sid, &pageId);
      if (pData != NULL) {
583
        assert(pData->num == 0);  // number of elements must be 0 for new allocated buffer
584 585 586
      }
    }
  }
587

588 589 590
  if (pData == NULL) {
    return -1;
  }
591

592 593 594
  // set the number of rows in current disk page
  if (pWindowRes->pos.pageId == -1) {  // not allocated yet, allocate new buffer
    pWindowRes->pos.pageId = pageId;
S
TD-1057  
Shengliang Guan 已提交
595
    pWindowRes->pos.rowId = (int32_t)(pData->num++);
H
Haojun Liao 已提交
596 597

    assert(pWindowRes->pos.pageId >= 0);
598
  }
599

600 601 602 603
  return 0;
}

static int32_t setWindowOutputBufByKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, int32_t sid,
604
                                       STimeWindow *win, bool masterscan, bool* newWind) {
605 606
  assert(win->skey <= win->ekey);
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
607

608 609
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&win->skey,
      TSDB_KEYSIZE, masterscan);
610
  if (pWindowRes == NULL) {
611 612 613
    *newWind = false;

    return masterscan? -1:0;
614
  }
615

616
  *newWind = true;
H
Haojun Liao 已提交
617

618 619 620
  // not assign result buffer yet, add new result buffer
  if (pWindowRes->pos.pageId == -1) {
    int32_t ret = addNewWindowResultBuf(pWindowRes, pResultBuf, sid, pRuntimeEnv->numOfRowsPerPage);
H
Haojun Liao 已提交
621
    if (ret != TSDB_CODE_SUCCESS) {
622 623 624
      return -1;
    }
  }
625

626
  // set time window for current result
627
  pWindowRes->win = (*win);
628

H
Haojun Liao 已提交
629
  setWindowResOutputBufInitCtx(pRuntimeEnv, pWindowRes);
630 631 632
  return TSDB_CODE_SUCCESS;
}

633
static bool getTimeWindowResStatus(SWindowResInfo *pWindowResInfo, int32_t slot) {
634
  assert(slot >= 0 && slot < pWindowResInfo->size);
635
  return pWindowResInfo->pResult[slot].closed;
636 637
}

H
Haojun Liao 已提交
638
static FORCE_INLINE int32_t getForwardStepsInBlock(int32_t numOfRows, __block_search_fn_t searchFn, TSKEY ekey, int16_t pos,
639 640
                                      int16_t order, int64_t *pData) {
  int32_t forwardStep = 0;
641

H
Haojun Liao 已提交
642 643 644 645
  if (order == TSDB_ORDER_ASC) {
    int32_t end = searchFn((char*) &pData[pos], numOfRows - pos, ekey, order);
    if (end >= 0) {
      forwardStep = end;
646

H
Haojun Liao 已提交
647 648 649 650 651 652 653 654 655 656 657 658
      if (pData[end + pos] == ekey) {
        forwardStep += 1;
      }
    }
  } else {
    int32_t end = searchFn((char *)pData, pos + 1, ekey, order);
    if (end >= 0) {
      forwardStep = pos - end;

      if (pData[end] == ekey) {
        forwardStep += 1;
      }
659 660
    }
  }
661

H
Haojun Liao 已提交
662
  assert(forwardStep > 0);
663 664 665 666 667 668
  return forwardStep;
}

/**
 * NOTE: the query status only set for the first scan of master scan.
 */
669
static int32_t doCheckQueryCompleted(SQueryRuntimeEnv *pRuntimeEnv, TSKEY lastKey, SWindowResInfo *pWindowResInfo) {
670
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
671
  if (pRuntimeEnv->scanFlag != MASTER_SCAN || (!QUERY_IS_INTERVAL_QUERY(pQuery))) {
672
    return pWindowResInfo->size;
673
  }
674

675
  // no qualified results exist, abort check
676
  int32_t numOfClosed = 0;
677

678
  if (pWindowResInfo->size == 0) {
679
    return pWindowResInfo->size;
680
  }
681

682
  // query completed
H
hjxilinx 已提交
683 684
  if ((lastKey >= pQuery->current->win.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (lastKey <= pQuery->current->win.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
685
    closeAllTimeWindow(pWindowResInfo);
686

687 688 689 690
    pWindowResInfo->curIndex = pWindowResInfo->size - 1;
    setQueryStatus(pQuery, QUERY_COMPLETED | QUERY_RESBUF_FULL);
  } else {  // set the current index to be the last unclosed window
    int32_t i = 0;
691
    int64_t skey = TSKEY_INITIAL_VAL;
692

693 694
    for (i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *pResult = &pWindowResInfo->pResult[i];
695
      if (pResult->closed) {
696
        numOfClosed += 1;
697 698
        continue;
      }
699

700
      TSKEY ekey = pResult->win.ekey;
701
      if ((ekey <= lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
702
          (pResult->win.skey >= lastKey && !QUERY_IS_ASC_QUERY(pQuery))) {
703 704
        closeTimeWindow(pWindowResInfo, i);
      } else {
705
        skey = pResult->win.skey;
706 707 708
        break;
      }
    }
709

710
    // all windows are closed, set the last one to be the skey
711
    if (skey == TSKEY_INITIAL_VAL) {
712 713 714 715 716
      assert(i == pWindowResInfo->size);
      pWindowResInfo->curIndex = pWindowResInfo->size - 1;
    } else {
      pWindowResInfo->curIndex = i;
    }
717

718
    pWindowResInfo->prevSKey = pWindowResInfo->pResult[pWindowResInfo->curIndex].win.skey;
719

720 721
    // the number of completed slots are larger than the threshold, return current generated results to client.
    if (numOfClosed > pWindowResInfo->threshold) {
722
      qDebug("QInfo:%p total result window:%d closed:%d, reached the output threshold %d, return",
723
          GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size, numOfClosed, pQuery->rec.threshold);
724

725
      setQueryStatus(pQuery, QUERY_RESBUF_FULL);
726
    } else {
727
      qDebug("QInfo:%p total result window:%d already closed:%d", GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size,
728
             numOfClosed);
729 730
    }
  }
731

732 733 734 735 736
  // output has reached the limitation, set query completed
  if (pQuery->limit.limit > 0 && (pQuery->limit.limit + pQuery->limit.offset) <= numOfClosed &&
      pRuntimeEnv->scanFlag == MASTER_SCAN) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
737

738
  assert(pWindowResInfo->prevSKey != TSKEY_INITIAL_VAL);
739
  return numOfClosed;
740 741 742
}

static int32_t getNumOfRowsInTimeWindow(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo, TSKEY *pPrimaryColumn,
H
hjxilinx 已提交
743
                                        int32_t startPos, TSKEY ekey, __block_search_fn_t searchFn, bool updateLastKey) {
744
  assert(startPos >= 0 && startPos < pDataBlockInfo->rows);
745

H
Haojun Liao 已提交
746
  int32_t num   = -1;
747
  int32_t order = pQuery->order.order;
H
Haojun Liao 已提交
748
  int32_t step  = GET_FORWARD_DIRECTION_FACTOR(order);
749

H
hjxilinx 已提交
750
  STableQueryInfo* item = pQuery->current;
751

752 753
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (ekey < pDataBlockInfo->window.ekey) {
754
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
H
Haojun Liao 已提交
755 756
      if (updateLastKey) { // update the last key
        item->lastKey = pPrimaryColumn[startPos + (num - 1)] + step;
757 758
      }
    } else {
759
      num = pDataBlockInfo->rows - startPos;
760
      if (updateLastKey) {
H
hjxilinx 已提交
761
        item->lastKey = pDataBlockInfo->window.ekey + step;
762 763 764 765
      }
    }
  } else {  // desc
    if (ekey > pDataBlockInfo->window.skey) {
766
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
H
Haojun Liao 已提交
767 768
      if (updateLastKey) {  // update the last key
        item->lastKey = pPrimaryColumn[startPos - (num - 1)] + step;
769 770 771 772
      }
    } else {
      num = startPos + 1;
      if (updateLastKey) {
H
hjxilinx 已提交
773
        item->lastKey = pDataBlockInfo->window.skey + step;
774 775 776
      }
    }
  }
777

H
Haojun Liao 已提交
778
  assert(num > 0);
779 780 781
  return num;
}

H
Haojun Liao 已提交
782 783
static void doBlockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, bool closed, STimeWindow *pWin, int32_t offset,
                                      int32_t forwardStep, TSKEY *tsCol, int32_t numOfTotal) {
784 785
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
786

H
Haojun Liao 已提交
787 788
  bool hasPrev = pCtx[0].preAggVals.isSet;

789
  if (IS_MASTER_SCAN(pRuntimeEnv) || closed) {
790 791 792 793
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
      pCtx[k].nStartQueryTimestamp = pWin->skey;
      pCtx[k].size = forwardStep;
      pCtx[k].startOffset = (QUERY_IS_ASC_QUERY(pQuery)) ? offset : offset - (forwardStep - 1);
794

H
Haojun Liao 已提交
795
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
796
      if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
H
Haojun Liao 已提交
797
        pCtx[k].ptsList = &tsCol[pCtx[k].startOffset];
798
      }
799

800
      // not a whole block involved in query processing, statistics data can not be used
H
Haojun Liao 已提交
801 802 803 804
      // NOTE: the original value of isSet have been changed here
      if (pCtx[k].preAggVals.isSet && forwardStep < numOfTotal) {
        pCtx[k].preAggVals.isSet = false;
      }
H
Haojun Liao 已提交
805

806 807 808
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunction(&pCtx[k]);
      }
H
Haojun Liao 已提交
809 810 811

      // restore it
      pCtx[k].preAggVals.isSet = hasPrev;
812 813 814 815
    }
  }
}

816
static void doRowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, bool closed, STimeWindow *pWin, int32_t offset) {
817 818
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
819

820
  if (IS_MASTER_SCAN(pRuntimeEnv) || closed) {
821 822
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
      pCtx[k].nStartQueryTimestamp = pWin->skey;
823

824 825 826 827
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunctionF(&pCtx[k], offset);
      }
828 829 830 831
    }
  }
}

H
Haojun Liao 已提交
832 833
static int32_t getNextQualifiedWindow(SQueryRuntimeEnv *pRuntimeEnv, STimeWindow *pNext, SDataBlockInfo *pDataBlockInfo,
    TSKEY *primaryKeys, __block_search_fn_t searchFn, int32_t prevPosition) {
834
  SQuery *pQuery = pRuntimeEnv->pQuery;
835

H
Haojun Liao 已提交
836
  GET_NEXT_TIMEWINDOW(pQuery, pNext);
837

H
Haojun Liao 已提交
838
  // next time window is not in current block
H
Haojun Liao 已提交
839 840
  if ((pNext->skey > pDataBlockInfo->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (pNext->ekey < pDataBlockInfo->window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
H
Haojun Liao 已提交
841 842
    return -1;
  }
843

H
Haojun Liao 已提交
844 845
  TSKEY startKey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
846
    startKey = pNext->skey;
H
Haojun Liao 已提交
847 848
    if (startKey < pQuery->window.skey) {
      startKey = pQuery->window.skey;
849
    }
H
Haojun Liao 已提交
850
  } else {
H
Haojun Liao 已提交
851
    startKey = pNext->ekey;
H
Haojun Liao 已提交
852 853
    if (startKey > pQuery->window.skey) {
      startKey = pQuery->window.skey;
854
    }
H
Haojun Liao 已提交
855
  }
856

H
Haojun Liao 已提交
857 858
  int32_t startPos = 0;
  // tumbling time window query, a special case of sliding time window query
859
  if (pQuery->interval.sliding == pQuery->interval.interval && prevPosition != -1) {
H
Haojun Liao 已提交
860 861 862 863 864
    int32_t factor = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
    startPos = prevPosition + factor;
  } else {
    startPos = searchFn((char *)primaryKeys, pDataBlockInfo->rows, startKey, pQuery->order.order);
  }
865

H
Haojun Liao 已提交
866 867 868 869
  /*
   * This time window does not cover any data, try next time window,
   * this case may happen when the time window is too small
   */
H
Haojun Liao 已提交
870
  if (QUERY_IS_ASC_QUERY(pQuery) && primaryKeys[startPos] > pNext->ekey) {
H
Haojun Liao 已提交
871
    TSKEY next = primaryKeys[startPos];
872 873 874
    if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
      pNext->skey = taosTimeTruncate(next, &pQuery->interval, pQuery->precision);
      pNext->ekey = taosTimeAdd(pNext->skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
875
    } else {
876 877
      pNext->ekey += ((next - pNext->ekey + pQuery->interval.sliding - 1)/pQuery->interval.sliding) * pQuery->interval.sliding;
      pNext->skey = pNext->ekey - pQuery->interval.interval + 1;
878
    }
H
Haojun Liao 已提交
879
  } else if ((!QUERY_IS_ASC_QUERY(pQuery)) && primaryKeys[startPos] < pNext->skey) {
H
Haojun Liao 已提交
880
    TSKEY next = primaryKeys[startPos];
881 882 883
    if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
      pNext->skey = taosTimeTruncate(next, &pQuery->interval, pQuery->precision);
      pNext->ekey = taosTimeAdd(pNext->skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
884
    } else {
885 886
      pNext->skey -= ((pNext->skey - next + pQuery->interval.sliding - 1) / pQuery->interval.sliding) * pQuery->interval.sliding;
      pNext->ekey = pNext->skey + pQuery->interval.interval - 1;
887
    }
888
  }
889

H
Haojun Liao 已提交
890
  return startPos;
891 892
}

H
Haojun Liao 已提交
893
static FORCE_INLINE TSKEY reviseWindowEkey(SQuery *pQuery, STimeWindow *pWindow) {
894 895 896 897 898 899 900 901 902 903 904 905
  TSKEY ekey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    ekey = pWindow->ekey;
    if (ekey > pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  } else {
    ekey = pWindow->skey;
    if (ekey < pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  }
906

907 908 909
  return ekey;
}

H
hjxilinx 已提交
910 911
//todo binary search
static void* getDataBlockImpl(SArray* pDataBlock, int32_t colId) {
S
TD-1057  
Shengliang Guan 已提交
912
  int32_t numOfCols = (int32_t)taosArrayGetSize(pDataBlock);
913

H
hjxilinx 已提交
914 915 916 917 918 919
  for (int32_t i = 0; i < numOfCols; ++i) {
    SColumnInfoData *p = taosArrayGet(pDataBlock, i);
    if (colId == p->info.colId) {
      return p->pData;
    }
  }
920

H
hjxilinx 已提交
921 922 923 924
  return NULL;
}

static char *getDataBlock(SQueryRuntimeEnv *pRuntimeEnv, SArithmeticSupport *sas, int32_t col, int32_t size,
925
                    SArray *pDataBlock) {
dengyihao's avatar
dengyihao 已提交
926 927 928
  if (pDataBlock == NULL) {
    return NULL;
  }
929

H
Haojun Liao 已提交
930
  char *dataBlock = NULL;
H
Haojun Liao 已提交
931
  SQuery *pQuery = pRuntimeEnv->pQuery;
932

933
  int32_t functionId = pQuery->pSelectExpr[col].base.functionId;
934
  if (functionId == TSDB_FUNC_ARITHM) {
935
    sas->pArithExpr = &pQuery->pSelectExpr[col];
936

937 938 939 940
    sas->offset  = 0;
    sas->colList = pQuery->colList;
    sas->numOfCols = pQuery->numOfCols;
    sas->data    = calloc(pQuery->numOfCols, POINTER_BYTES);
941

H
Haojun Liao 已提交
942 943 944 945
    if (sas->data == NULL) {
      longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
    }

946
    // here the pQuery->colList and sas->colList are identical
S
TD-1057  
Shengliang Guan 已提交
947
    int32_t numOfCols = (int32_t)taosArrayGetSize(pDataBlock);
948
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
949
      SColumnInfo *pColMsg = &pQuery->colList[i];
950

951 952 953 954 955 956 957 958
      dataBlock = NULL;
      for (int32_t k = 0; k < numOfCols; ++k) {  //todo refactor
        SColumnInfoData *p = taosArrayGet(pDataBlock, k);
        if (pColMsg->colId == p->info.colId) {
          dataBlock = p->pData;
          break;
        }
      }
959

960
      assert(dataBlock != NULL);
961
      sas->data[i] = dataBlock;  // start from the offset
962
    }
963

964
  } else {  // other type of query function
965
    SColIndex *pCol = &pQuery->pSelectExpr[col].base.colInfo;
H
Haojun Liao 已提交
966
    if (TSDB_COL_IS_NORMAL_COL(pCol->flag)) {
H
Haojun Liao 已提交
967 968 969 970 971
      SColIndex* pColIndex = &pQuery->pSelectExpr[col].base.colInfo;
      SColumnInfoData *p = taosArrayGet(pDataBlock, pColIndex->colIndex);
      assert(p->info.colId == pColIndex->colId);

      dataBlock = p->pData;
H
Haojun Liao 已提交
972 973
    } else {
      dataBlock = NULL;
974 975
    }
  }
976

977 978 979 980
  return dataBlock;
}

/**
H
Haojun Liao 已提交
981
 * todo set the last value for pQueryTableInfo as in rowwiseapplyfunctions
982 983
 * @param pRuntimeEnv
 * @param forwardStep
984
 * @param tsCols
985 986 987 988 989
 * @param pFields
 * @param isDiskFileBlock
 * @return                  the incremental number of output value, so it maybe 0 for fixed number of query,
 *                          such as count/min/max etc.
 */
990
static void blockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis,
991 992
                                       SDataBlockInfo *pDataBlockInfo, SWindowResInfo *pWindowResInfo,
                                       __block_search_fn_t searchFn, SArray *pDataBlock) {
993
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
994 995
  bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);

996 997
  SQuery *pQuery = pRuntimeEnv->pQuery;
  TSKEY  *tsCols = NULL;
998
  if (pDataBlock != NULL) {
999
    SColumnInfoData* pColInfo = taosArrayGet(pDataBlock, 0);
1000
    tsCols = (TSKEY *)(pColInfo->pData);
1001
  }
1002

1003
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
H
Haojun Liao 已提交
1004 1005 1006
  if (sasArray == NULL) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }
1007

H
Haojun Liao 已提交
1008
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
1009
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
1010
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
H
Haojun Liao 已提交
1011
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k, pQInfo->vgId);
1012
  }
1013

1014
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
1015
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
H
Haojun Liao 已提交
1016
    TSKEY ts = TSKEY_INITIAL_VAL;
1017

H
Haojun Liao 已提交
1018 1019 1020 1021 1022 1023 1024 1025
    if (tsCols == NULL) {
      ts = QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->window.skey:pDataBlockInfo->window.ekey;
    } else {
      int32_t offset = GET_COL_DATA_POS(pQuery, 0, step);
      ts = tsCols[offset];
    }

    bool        hasTimeWindow = false;
1026
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
H
Haojun Liao 已提交
1027 1028
    if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &win, masterScan, &hasTimeWindow) !=
        TSDB_CODE_SUCCESS) {
S
Shengliang Guan 已提交
1029
      taosTFree(sasArray);
H
hjxilinx 已提交
1030
      return;
1031
    }
1032

H
Haojun Liao 已提交
1033 1034 1035
    int32_t forwardStep = 0;
    int32_t startPos = pQuery->pos;

1036
    if (hasTimeWindow) {
H
Haojun Liao 已提交
1037
      TSKEY ekey = reviseWindowEkey(pQuery, &win);
H
Haojun Liao 已提交
1038
      forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, pQuery->pos, ekey, searchFn, true);
1039

1040
      bool pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
H
Haojun Liao 已提交
1041
      doBlockwiseApplyFunctions(pRuntimeEnv, pStatus, &win, startPos, forwardStep, tsCols, pDataBlockInfo->rows);
1042
    }
1043

1044 1045
    int32_t     index = pWindowResInfo->curIndex;
    STimeWindow nextWin = win;
1046

1047
    while (1) {
H
Haojun Liao 已提交
1048 1049
      int32_t prevEndPos = (forwardStep - 1) * step + startPos;
      startPos = getNextQualifiedWindow(pRuntimeEnv, &nextWin, pDataBlockInfo, tsCols, searchFn, prevEndPos);
1050 1051 1052
      if (startPos < 0) {
        break;
      }
1053

1054
      // null data, failed to allocate more memory buffer
H
Haojun Liao 已提交
1055
      hasTimeWindow = false;
H
Haojun Liao 已提交
1056 1057
      if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &nextWin, masterScan,
                                  &hasTimeWindow) != TSDB_CODE_SUCCESS) {
1058 1059
        break;
      }
1060

1061 1062 1063 1064 1065
      if (!hasTimeWindow) {
        continue;
      }

      TSKEY ekey = reviseWindowEkey(pQuery, &nextWin);
H
Haojun Liao 已提交
1066
      forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, startPos, ekey, searchFn, true);
1067

1068 1069
      bool closed = getTimeWindowResStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
      doBlockwiseApplyFunctions(pRuntimeEnv, closed, &nextWin, startPos, forwardStep, tsCols, pDataBlockInfo->rows);
1070
    }
1071

1072 1073 1074 1075 1076 1077 1078
    pWindowResInfo->curIndex = index;
  } else {
    /*
     * the sqlfunctionCtx parameters should be set done before all functions are invoked,
     * since the selectivity + tag_prj query needs all parameters been set done.
     * tag_prj function are changed to be TSDB_FUNC_TAG_DUMMY
     */
1079
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
1080
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
1081 1082 1083 1084 1085
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunction(&pCtx[k]);
      }
    }
  }
1086

1087 1088 1089 1090
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    if (pQuery->pSelectExpr[i].base.functionId != TSDB_FUNC_ARITHM) {
      continue;
    }
1091

S
Shengliang Guan 已提交
1092
    taosTFree(sasArray[i].data);
1093
  }
1094

S
Shengliang Guan 已提交
1095
  taosTFree(sasArray);
1096 1097 1098 1099 1100 1101
}

static int32_t setGroupResultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, char *pData, int16_t type, int16_t bytes) {
  if (isNull(pData, type)) {  // ignore the null value
    return -1;
  }
1102

1103
  int32_t GROUPRESULTID = 1;
1104

1105
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
1106

1107
  // not assign result buffer yet, add new result buffer
1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125
  char* d = pData;
  int16_t len = bytes;
  if (type == TSDB_DATA_TYPE_BINARY||type == TSDB_DATA_TYPE_NCHAR) {
    d = varDataVal(pData);
    len = varDataLen(pData);
  } else if (type == TSDB_DATA_TYPE_FLOAT || type == TSDB_DATA_TYPE_DOUBLE) {
    SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
    qError("QInfo:%p group by not supported on double/float/binary/nchar columns, abort", pQInfo);

    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_APP_ERROR);
  }

  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, &pRuntimeEnv->windowResInfo, d, len, true);
  if (pWindowRes == NULL) {
    return -1;
  }

  int64_t v = -1;
1126 1127 1128 1129 1130 1131 1132 1133
  switch(type) {
    case TSDB_DATA_TYPE_BOOL:
    case TSDB_DATA_TYPE_TINYINT:  v = GET_INT8_VAL(pData);  break;
    case TSDB_DATA_TYPE_SMALLINT: v = GET_INT16_VAL(pData); break;
    case TSDB_DATA_TYPE_INT:      v = GET_INT32_VAL(pData); break;
    case TSDB_DATA_TYPE_BIGINT:   v = GET_INT64_VAL(pData); break;
  }

1134 1135 1136 1137 1138 1139
  if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
    pWindowRes->key = malloc(varDataTLen(pData));
    varDataCopy(pWindowRes->key, pData);
  } else {
    pWindowRes->win.skey = v;
    pWindowRes->win.ekey = v;
1140
  }
1141

1142
  assert(pRuntimeEnv->windowResInfo.interval == 0);
1143

1144 1145 1146 1147 1148 1149
  if (pWindowRes->pos.pageId == -1) {
    int32_t ret = addNewWindowResultBuf(pWindowRes, pResultBuf, GROUPRESULTID, pRuntimeEnv->numOfRowsPerPage);
    if (ret != 0) {
      return -1;
    }
  }
1150

1151 1152 1153 1154 1155
  setWindowResOutputBuf(pRuntimeEnv, pWindowRes);
  initCtxOutputBuf(pRuntimeEnv);
  return TSDB_CODE_SUCCESS;
}

1156
static char *getGroupbyColumnData(SQuery *pQuery, int16_t *type, int16_t *bytes, SArray* pDataBlock) {
1157
  SSqlGroupbyExpr *pGroupbyExpr = pQuery->pGroupbyExpr;
1158

1159
  for (int32_t k = 0; k < pGroupbyExpr->numOfGroupCols; ++k) {
1160
    SColIndex* pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, k);
Y
TD-1230  
yihaoDeng 已提交
1161
    if (TSDB_COL_IS_TAG(pColIndex->flag)) {
1162 1163
      continue;
    }
1164

1165
    int16_t colIndex = -1;
1166
    int32_t colId = pColIndex->colId;
1167

1168
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
1169
      if (pQuery->colList[i].colId == colId) {
1170 1171 1172 1173
        colIndex = i;
        break;
      }
    }
1174

1175
    assert(colIndex >= 0 && colIndex < pQuery->numOfCols);
1176

1177 1178
    *type = pQuery->colList[colIndex].type;
    *bytes = pQuery->colList[colIndex].bytes;
1179
    /*
1180 1181 1182
     *  the colIndex is acquired from the first tables of all qualified tables in this vnode during query prepare
     * stage, the remain tables may not have the required column in cache actually. So, the validation of required
     * column in cache with the corresponding schema is reinforced.
1183
     */
S
TD-1057  
Shengliang Guan 已提交
1184
    int32_t numOfCols = (int32_t)taosArrayGetSize(pDataBlock);
1185

1186 1187 1188 1189 1190 1191
    for (int32_t i = 0; i < numOfCols; ++i) {
      SColumnInfoData *p = taosArrayGet(pDataBlock, i);
      if (pColIndex->colId == p->info.colId) {
        return p->pData;
      }
    }
1192
  }
1193

1194
  return NULL;
1195 1196 1197 1198
}

static int32_t doTSJoinFilter(SQueryRuntimeEnv *pRuntimeEnv, int32_t offset) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
1199

1200 1201
  STSElem         elem = tsBufGetElem(pRuntimeEnv->pTSBuf);
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1202

1203
  // compare tag first
H
Haojun Liao 已提交
1204
  if (tVariantCompare(&pCtx[0].tag, elem.tag) != 0) {
1205 1206
    return TS_JOIN_TAG_NOT_EQUALS;
  }
1207

S
TD-1057  
Shengliang Guan 已提交
1208
  TSKEY key = *(TSKEY *)((char*)pCtx[0].aInputElemBuf + TSDB_KEYSIZE * offset);
1209 1210

#if defined(_DEBUG_VIEW)
1211
  printf("elem in comp ts file:%" PRId64 ", key:%" PRId64 ", tag:%"PRIu64", query order:%d, ts order:%d, traverse:%d, index:%d\n",
H
Haojun Liao 已提交
1212
         elem.ts, key, elem.tag.i64Key, pQuery->order.order, pRuntimeEnv->pTSBuf->tsOrder,
1213 1214
         pRuntimeEnv->pTSBuf->cur.order, pRuntimeEnv->pTSBuf->cur.tsIndex);
#endif
1215

1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (key < elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key > elem.ts) {
      assert(false);
    }
  } else {
    if (key > elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key < elem.ts) {
      assert(false);
    }
  }
1229

1230 1231 1232 1233 1234
  return TS_JOIN_TS_EQUAL;
}

static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId) {
  SResultInfo *pResInfo = GET_RES_INFO(pCtx);
H
hjxilinx 已提交
1235
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
1236 1237 1238 1239 1240

  // in case of timestamp column, always generated results.
  if (functionId == TSDB_FUNC_TS) {
    return true;
  }
1241

1242 1243 1244
  if (pResInfo->complete || functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TS_DUMMY) {
    return false;
  }
1245

1246
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_FIRST) {
H
hjxilinx 已提交
1247 1248
    return QUERY_IS_ASC_QUERY(pQuery);
  }
1249 1250 1251 1252 1253 1254

  // todo add comments
  if ((functionId == TSDB_FUNC_LAST_DST || functionId == TSDB_FUNC_LAST)) {
    return pCtx->param[0].i64Key == pQuery->order.order;
  }

1255
  // in the supplementary scan, only the following functions need to be executed
H
Haojun Liao 已提交
1256
  if (IS_REVERSE_SCAN(pRuntimeEnv)) {
1257 1258
    return false;
  }
1259

1260 1261 1262
  return true;
}

1263 1264
static void rowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis, SDataBlockInfo *pDataBlockInfo,
    SWindowResInfo *pWindowResInfo, SArray *pDataBlock) {
1265
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1266
  bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);
1267

1268
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1269
  STableQueryInfo* item = pQuery->current;
H
Haojun Liao 已提交
1270 1271 1272 1273

  SColumnInfoData* pColumnInfoData = (SColumnInfoData *)taosArrayGet(pDataBlock, 0);

  TSKEY  *tsCols = (pColumnInfoData->info.type == TSDB_DATA_TYPE_TIMESTAMP)? (TSKEY*) pColumnInfoData->pData:NULL;
H
Haojun Liao 已提交
1274 1275
  bool    groupbyColumnValue = pRuntimeEnv->groupbyNormalCol;

1276
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
H
Haojun Liao 已提交
1277 1278 1279
  if (sasArray == NULL) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }
1280

1281 1282
  int16_t type = 0;
  int16_t bytes = 0;
1283

1284
  char *groupbyColumnData = NULL;
H
Haojun Liao 已提交
1285
  if (groupbyColumnValue) {
1286
    groupbyColumnData = getGroupbyColumnData(pQuery, &type, &bytes, pDataBlock);
1287
  }
1288

H
Haojun Liao 已提交
1289
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
1290
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
1291
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
H
Haojun Liao 已提交
1292
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k, pQInfo->vgId);
1293
  }
1294

1295 1296
  // set the input column data
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
1297
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
H
hjxilinx 已提交
1298 1299
    pFilterInfo->pData = getDataBlockImpl(pDataBlock, pFilterInfo->info.colId);
    assert(pFilterInfo->pData != NULL);
1300
  }
1301

1302
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1303

1304 1305 1306
  // from top to bottom in desc
  // from bottom to top in asc order
  if (pRuntimeEnv->pTSBuf != NULL) {
1307
    qDebug("QInfo:%p process data rows, numOfRows:%d, query order:%d, ts comp order:%d", pQInfo, pDataBlockInfo->rows,
1308 1309
           pQuery->order.order, pRuntimeEnv->pTSBuf->cur.order);
  }
1310

1311
  int32_t j = 0;
H
hjxilinx 已提交
1312
  int32_t offset = -1;
1313

1314
  for (j = 0; j < pDataBlockInfo->rows; ++j) {
H
hjxilinx 已提交
1315
    offset = GET_COL_DATA_POS(pQuery, j, step);
1316

1317 1318 1319 1320 1321 1322 1323 1324 1325 1326
    if (pRuntimeEnv->pTSBuf != NULL) {
      int32_t r = doTSJoinFilter(pRuntimeEnv, offset);
      if (r == TS_JOIN_TAG_NOT_EQUALS) {
        break;
      } else if (r == TS_JOIN_TS_NOT_EQUALS) {
        continue;
      } else {
        assert(r == TS_JOIN_TS_EQUAL);
      }
    }
1327

1328
    if (pQuery->numOfFilterCols > 0 && (!doFilterData(pQuery, offset))) {
1329 1330
      continue;
    }
1331

1332
    // interval window query, decide the time window according to the primary timestamp
H
Haojun Liao 已提交
1333
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1334
      int64_t     ts = tsCols[offset];
1335
      STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
1336

1337 1338
      bool hasTimeWindow = false;
      int32_t ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &win, masterScan, &hasTimeWindow);
1339 1340 1341
      if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
        continue;
      }
1342

1343 1344 1345 1346
      if (!hasTimeWindow) {
        continue;
      }

1347 1348
      bool closed = getTimeWindowResStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
      doRowwiseApplyFunctions(pRuntimeEnv, closed, &win, offset);
1349

1350 1351
      STimeWindow nextWin = win;
      int32_t     index = pWindowResInfo->curIndex;
1352

1353
      while (1) {
H
Haojun Liao 已提交
1354
        GET_NEXT_TIMEWINDOW(pQuery, &nextWin);
1355
        if ((nextWin.skey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
H
Haojun Liao 已提交
1356
            (nextWin.skey < pQuery->window.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
1357 1358
          break;
        }
1359

1360 1361 1362
        if (ts < nextWin.skey || ts > nextWin.ekey) {
          break;
        }
1363

1364
        // null data, failed to allocate more memory buffer
H
Haojun Liao 已提交
1365
        hasTimeWindow = false;
1366
        if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &nextWin, masterScan, &hasTimeWindow) != TSDB_CODE_SUCCESS) {
1367 1368
          break;
        }
1369

1370
        if (hasTimeWindow) {
1371 1372
          closed = getTimeWindowResStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo));
          doRowwiseApplyFunctions(pRuntimeEnv, closed, &nextWin, offset);
1373
        }
1374
      }
1375

1376 1377 1378
      pWindowResInfo->curIndex = index;
    } else {  // other queries
      // decide which group this rows belongs to according to current state value
H
Haojun Liao 已提交
1379
      if (groupbyColumnValue) {
H
hjxilinx 已提交
1380
        char *val = groupbyColumnData + bytes * offset;
1381

H
hjxilinx 已提交
1382
        int32_t ret = setGroupResultOutputBuf(pRuntimeEnv, val, type, bytes);
1383 1384 1385 1386
        if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
          continue;
        }
      }
1387

1388
      for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
1389
        int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
1390 1391 1392 1393 1394
        if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
          aAggs[functionId].xFunctionF(&pCtx[k], offset);
        }
      }
    }
1395

1396 1397 1398
    if (pRuntimeEnv->pTSBuf != NULL) {
      // if timestamp filter list is empty, quit current query
      if (!tsBufNextPos(pRuntimeEnv->pTSBuf)) {
H
hjxilinx 已提交
1399
        setQueryStatus(pQuery, QUERY_COMPLETED);
1400 1401 1402 1403
        break;
      }
    }
  }
H
Haojun Liao 已提交
1404 1405 1406 1407 1408 1409 1410 1411

  assert(offset >= 0);
  if (tsCols != NULL) {
    item->lastKey = tsCols[offset] + step;
  } else {
    item->lastKey = (QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->window.ekey:pDataBlockInfo->window.skey) + step;
  }

H
Haojun Liao 已提交
1412 1413 1414
  if (pRuntimeEnv->pTSBuf != NULL) {
    item->cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);
  }
H
Haojun Liao 已提交
1415

1416 1417 1418 1419 1420
  // todo refactor: extract method
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    if (pQuery->pSelectExpr[i].base.functionId != TSDB_FUNC_ARITHM) {
      continue;
    }
1421

S
Shengliang Guan 已提交
1422
    taosTFree(sasArray[i].data);
1423
  }
1424

1425 1426 1427 1428
  free(sasArray);
}

static int32_t tableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo,
H
hjxilinx 已提交
1429
                                          SDataStatis *pStatis, __block_search_fn_t searchFn, SArray *pDataBlock) {
H
hjxilinx 已提交
1430
  SQuery *pQuery = pRuntimeEnv->pQuery;
1431

H
hjxilinx 已提交
1432 1433
  STableQueryInfo* pTableQInfo = pQuery->current;
  SWindowResInfo*  pWindowResInfo = &pRuntimeEnv->windowResInfo;
1434

H
Haojun Liao 已提交
1435
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->groupbyNormalCol) {
1436
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
1437
  } else {
1438
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
1439
  }
1440

1441
  // update the lastkey of current table
1442
  TSKEY lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pDataBlockInfo->window.ekey : pDataBlockInfo->window.skey;
H
hjxilinx 已提交
1443
  pTableQInfo->lastKey = lastKey + GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1444

1445
  // interval query with limit applied
1446
  int32_t numOfRes = 0;
H
Haojun Liao 已提交
1447
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1448 1449
    numOfRes = doCheckQueryCompleted(pRuntimeEnv, lastKey, pWindowResInfo);
  } else {
S
TD-1057  
Shengliang Guan 已提交
1450
    numOfRes = (int32_t)getNumOfResult(pRuntimeEnv);
1451

1452 1453 1454 1455
    // update the number of output result
    if (numOfRes > 0 && pQuery->checkBuffer == 1) {
      assert(numOfRes >= pQuery->rec.rows);
      pQuery->rec.rows = numOfRes;
1456

1457 1458 1459
      if (numOfRes >= pQuery->rec.threshold) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
      }
1460

1461 1462 1463
      if ((pQuery->limit.limit >= 0) && (pQuery->limit.limit + pQuery->limit.offset) <= numOfRes) {
        setQueryStatus(pQuery, QUERY_COMPLETED);
      }
1464 1465 1466 1467 1468

      if (((pTableQInfo->lastKey > pTableQInfo->win.ekey) && QUERY_IS_ASC_QUERY(pQuery)) ||
          ((pTableQInfo->lastKey < pTableQInfo->win.ekey) && (!QUERY_IS_ASC_QUERY(pQuery)))) {
        setQueryStatus(pQuery, QUERY_COMPLETED);
      }
H
Haojun Liao 已提交
1469
    }
1470
  }
1471

1472
  return numOfRes;
1473 1474
}

H
Haojun Liao 已提交
1475
void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
H
Haojun Liao 已提交
1476
                   SDataStatis *pStatis, void *param, int32_t colIndex, int32_t vgId) {
1477

1478 1479
  int32_t functionId = pQuery->pSelectExpr[colIndex].base.functionId;
  int32_t colId = pQuery->pSelectExpr[colIndex].base.colInfo.colId;
1480

1481
  SDataStatis *tpField = NULL;
H
Haojun Liao 已提交
1482
  pCtx->hasNull = hasNullValue(&pQuery->pSelectExpr[colIndex].base.colInfo, pStatis, &tpField);
1483
  pCtx->aInputElemBuf = inputData;
1484

1485
  if (tpField != NULL) {
H
Haojun Liao 已提交
1486
    pCtx->preAggVals.isSet  = true;
1487 1488
    pCtx->preAggVals.statis = *tpField;
    assert(pCtx->preAggVals.statis.numOfNull <= pBlockInfo->rows);
1489 1490 1491
  } else {
    pCtx->preAggVals.isSet = false;
  }
1492

H
Haojun Liao 已提交
1493 1494
  pCtx->preAggVals.dataBlockLoaded = (inputData != NULL);

H
Haojun Liao 已提交
1495 1496
  // limit/offset query will affect this value
  pCtx->size = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->rows - pQuery->pos : pQuery->pos + 1;
1497

H
Haojun Liao 已提交
1498
  // minimum value no matter ascending/descending order query
H
Haojun Liao 已提交
1499 1500
  pCtx->startOffset = QUERY_IS_ASC_QUERY(pQuery) ? pQuery->pos: (pQuery->pos - pCtx->size + 1);
  assert(pCtx->startOffset >= 0);
H
Haojun Liao 已提交
1501

1502 1503
  uint32_t status = aAggs[functionId].nStatus;
  if (((status & (TSDB_FUNCSTATE_SELECTIVITY | TSDB_FUNCSTATE_NEED_TS)) != 0) && (tsCol != NULL)) {
H
Haojun Liao 已提交
1504
    pCtx->ptsList = &tsCol[pCtx->startOffset];
1505
  }
1506

1507 1508 1509 1510 1511
  if (functionId >= TSDB_FUNC_FIRST_DST && functionId <= TSDB_FUNC_LAST_DST) {
    // last_dist or first_dist function
    // store the first&last timestamp into the intermediate buffer [1], the true
    // value may be null but timestamp will never be null
  } else if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_TWA ||
1512
             functionId == TSDB_FUNC_DIFF || (functionId >= TSDB_FUNC_RATE && functionId <= TSDB_FUNC_AVG_IRATE)) {
1513
    /*
H
Haojun Liao 已提交
1514
     * least squares function needs two columns of input, currently, the x value of linear equation is set to
1515 1516 1517 1518 1519 1520 1521 1522 1523 1524
     * timestamp column, and the y-value is the column specified in pQuery->pSelectExpr[i].colIdxInBuffer
     *
     * top/bottom function needs timestamp to indicate when the
     * top/bottom values emerge, so does diff function
     */
    if (functionId == TSDB_FUNC_TWA) {
      STwaInfo *pTWAInfo = GET_RES_INFO(pCtx)->interResultBuf;
      pTWAInfo->SKey = pQuery->window.skey;
      pTWAInfo->EKey = pQuery->window.ekey;
    }
1525

1526 1527
  } else if (functionId == TSDB_FUNC_ARITHM) {
    pCtx->param[1].pz = param;
H
Haojun Liao 已提交
1528 1529 1530 1531 1532 1533
  } else if (functionId == TSDB_FUNC_SPREAD) {  // set the statistics data for primary time stamp column
    if (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
      pCtx->preAggVals.isSet  = true;
      pCtx->preAggVals.statis.min = pBlockInfo->window.skey;
      pCtx->preAggVals.statis.max = pBlockInfo->window.ekey;
    }
1534 1535
  } else if (functionId == TSDB_FUNC_INTERP) {
    SInterpInfoDetail *pInterpInfo = GET_RES_INFO(pCtx)->interResultBuf;
S
TD-1057  
Shengliang Guan 已提交
1536
    pInterpInfo->type = (int8_t)pQuery->fillType;
1537 1538
    pInterpInfo->ts = pQuery->window.skey;
    pInterpInfo->primaryCol = (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX);
1539

1540 1541 1542 1543
    if (pQuery->fillVal != NULL) {
      if (isNull((const char*) &pQuery->fillVal[colIndex], pCtx->inputType)) {
        pCtx->param[1].nType = TSDB_DATA_TYPE_NULL;
      } else { // todo refactor, tVariantCreateFromBinary should handle the NULL value
H
Haojun Liao 已提交
1544 1545 1546
        if (pCtx->inputType != TSDB_DATA_TYPE_BINARY && pCtx->inputType != TSDB_DATA_TYPE_NCHAR) {
          tVariantCreateFromBinary(&pCtx->param[1], (char*) &pQuery->fillVal[colIndex], pCtx->inputBytes, pCtx->inputType);
        }
1547 1548
      }
    }
H
Haojun Liao 已提交
1549 1550 1551
  } else if (functionId == TSDB_FUNC_TS_COMP) {
    pCtx->param[0].i64Key = vgId;
    pCtx->param[0].nType = TSDB_DATA_TYPE_BIGINT;
1552
  }
1553

1554 1555 1556 1557 1558 1559
#if defined(_DEBUG_VIEW)
  //  int64_t *tsList = (int64_t *)primaryColumnData;
//  int64_t  s = tsList[0];
//  int64_t  e = tsList[size - 1];

//    if (IS_DATA_BLOCK_LOADED(blockStatus)) {
1560
//        qDebug("QInfo:%p query ts:%lld-%lld, offset:%d, rows:%d, bstatus:%d,
1561 1562 1563
//        functId:%d", GET_QINFO_ADDR(pQuery),
//               s, e, startOffset, size, blockStatus, functionId);
//    } else {
1564
//        qDebug("QInfo:%p block not loaded, bstatus:%d",
1565 1566 1567 1568 1569 1570
//        GET_QINFO_ADDR(pQuery), blockStatus);
//    }
#endif
}

// set the output buffer for the selectivity + tag query
H
Haojun Liao 已提交
1571
static int32_t setCtxTagColumnInfo(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx) {
H
Haojun Liao 已提交
1572 1573
  SQuery* pQuery = pRuntimeEnv->pQuery;

1574
  if (isSelectivityWithTagsQuery(pQuery)) {
1575
    int32_t num = 0;
1576
    int16_t tagLen = 0;
1577

1578
    SQLFunctionCtx *p = NULL;
1579
    SQLFunctionCtx **pTagCtx = calloc(pQuery->numOfOutput, POINTER_BYTES);
H
Haojun Liao 已提交
1580 1581 1582
    if (pTagCtx == NULL) {
      return TSDB_CODE_QRY_OUT_OF_MEMORY;
    }
H
Haojun Liao 已提交
1583

1584
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1585
      SSqlFuncMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].base;
1586

1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599
      if (pSqlFuncMsg->functionId == TSDB_FUNC_TAG_DUMMY || pSqlFuncMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        tagLen += pCtx[i].outputBytes;
        pTagCtx[num++] = &pCtx[i];
      } else if ((aAggs[pSqlFuncMsg->functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
        p = &pCtx[i];
      } else if (pSqlFuncMsg->functionId == TSDB_FUNC_TS || pSqlFuncMsg->functionId == TSDB_FUNC_TAG) {
        // tag function may be the group by tag column
        // ts may be the required primary timestamp column
        continue;
      } else {
        // the column may be the normal column, group by normal_column, the functionId is TSDB_FUNC_PRJ
      }
    }
dengyihao's avatar
dengyihao 已提交
1600 1601 1602 1603 1604
    if (p != NULL) {
      p->tagInfo.pTagCtxList = pTagCtx;
      p->tagInfo.numOfTagCols = num;
      p->tagInfo.tagsLen = tagLen;
    } else {
1605
      taosTFree(pTagCtx);
dengyihao's avatar
dengyihao 已提交
1606
    }
1607
  }
H
Haojun Liao 已提交
1608 1609

  return TSDB_CODE_SUCCESS;
1610 1611
}

H
Haojun Liao 已提交
1612 1613
static FORCE_INLINE void setWindowResultInfo(SResultInfo *pResultInfo, SQuery *pQuery, bool isStableQuery, char* buf) {
  char* p = buf;
1614
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1615 1616 1617 1618
    int32_t size = pQuery->pSelectExpr[i].interBytes;
    setResultInfoBuf(&pResultInfo[i], size, isStableQuery, p);

    p += size;
1619 1620 1621
  }
}

1622
static int32_t setupQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv, int16_t order) {
1623
  qDebug("QInfo:%p setup runtime env", GET_QINFO_ADDR(pRuntimeEnv));
1624 1625
  SQuery *pQuery = pRuntimeEnv->pQuery;

H
Haojun Liao 已提交
1626 1627 1628
  size_t size = pRuntimeEnv->interBufSize + pQuery->numOfOutput * sizeof(SResultInfo);

  pRuntimeEnv->resultInfo = calloc(1, size);
1629
  pRuntimeEnv->pCtx = (SQLFunctionCtx *)calloc(pQuery->numOfOutput, sizeof(SQLFunctionCtx));
1630

1631
  if (pRuntimeEnv->resultInfo == NULL || pRuntimeEnv->pCtx == NULL) {
1632
    goto _clean;
1633
  }
1634

1635
  pRuntimeEnv->offset[0] = 0;
1636
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1637
    SSqlFuncMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].base;
1638

1639
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1640
    SColIndex* pIndex = &pSqlFuncMsg->colInfo;
1641

Y
TD-1230  
yihaoDeng 已提交
1642
    if (TSDB_COL_REQ_NULL(pIndex->flag)) {
1643 1644
      pCtx->requireNull = true;
      pIndex->flag &= ~(TSDB_COL_NULL);
Y
TD-1230  
yihaoDeng 已提交
1645
    } else {
1646 1647
      pCtx->requireNull = false;
    }
Y
TD-1230  
yihaoDeng 已提交
1648

1649 1650
    int32_t index = pSqlFuncMsg->colInfo.colIndex;
    if (TSDB_COL_IS_TAG(pIndex->flag)) {
1651
      if (pIndex->colId == TSDB_TBNAME_COLUMN_INDEX) {  // todo refactor
H
Haojun Liao 已提交
1652 1653 1654 1655
        SSchema s = tGetTableNameColumnSchema();

        pCtx->inputBytes = s.bytes;
        pCtx->inputType = s.type;
1656 1657 1658 1659
      } else {
        pCtx->inputBytes = pQuery->tagColList[index].bytes;
        pCtx->inputType = pQuery->tagColList[index].type;
      }
H
Haojun Liao 已提交
1660 1661 1662
    } else if (TSDB_COL_IS_UD_COL(pIndex->flag)) {
      pCtx->inputBytes = pSqlFuncMsg->arg[0].argBytes;
      pCtx->inputType = pSqlFuncMsg->arg[0].argType;
1663 1664 1665 1666
    } else {
      pCtx->inputBytes = pQuery->colList[index].bytes;
      pCtx->inputType = pQuery->colList[index].type;
    }
1667

Y
TD-1230  
yihaoDeng 已提交
1668

1669
    assert(isValidDataType(pCtx->inputType));
1670
    pCtx->ptsOutputBuf = NULL;
1671

1672 1673
    pCtx->outputBytes = pQuery->pSelectExpr[i].bytes;
    pCtx->outputType = pQuery->pSelectExpr[i].type;
1674

1675 1676
    pCtx->order = pQuery->order.order;
    pCtx->functionId = pSqlFuncMsg->functionId;
1677

1678 1679 1680 1681 1682
    pCtx->numOfParams = pSqlFuncMsg->numOfParams;
    for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
      int16_t type = pSqlFuncMsg->arg[j].argType;
      int16_t bytes = pSqlFuncMsg->arg[j].argBytes;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
1683
        tVariantCreateFromBinary(&pCtx->param[j], pSqlFuncMsg->arg[j].argValue.pz, bytes, type);
1684 1685 1686 1687
      } else {
        tVariantCreateFromBinary(&pCtx->param[j], (char *)&pSqlFuncMsg->arg[j].argValue.i64, bytes, type);
      }
    }
1688

1689 1690
    // set the order information for top/bottom query
    int32_t functionId = pCtx->functionId;
1691

1692
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
1693
      int32_t f = pQuery->pSelectExpr[0].base.functionId;
1694
      assert(f == TSDB_FUNC_TS || f == TSDB_FUNC_TS_DUMMY);
1695

1696 1697 1698 1699
      pCtx->param[2].i64Key = order;
      pCtx->param[2].nType = TSDB_DATA_TYPE_BIGINT;
      pCtx->param[3].i64Key = functionId;
      pCtx->param[3].nType = TSDB_DATA_TYPE_BIGINT;
1700

1701 1702
      pCtx->param[1].i64Key = pQuery->order.orderColId;
    }
1703

1704 1705 1706 1707
    if (i > 0) {
      pRuntimeEnv->offset[i] = pRuntimeEnv->offset[i - 1] + pRuntimeEnv->pCtx[i - 1].outputBytes;
    }
  }
1708

H
Haojun Liao 已提交
1709
  char* buf = (char*) pRuntimeEnv->resultInfo + sizeof(SResultInfo) * pQuery->numOfOutput;
H
Haojun Liao 已提交
1710

1711
  // set the intermediate result output buffer
H
Haojun Liao 已提交
1712
  setWindowResultInfo(pRuntimeEnv->resultInfo, pQuery, pRuntimeEnv->stableQuery, buf);
1713

1714
  // if it is group by normal column, do not set output buffer, the output buffer is pResult
1715
  if (!pRuntimeEnv->groupbyNormalCol && !pRuntimeEnv->stableQuery) {
1716 1717
    resetCtxOutputBuf(pRuntimeEnv);
  }
1718

H
Haojun Liao 已提交
1719 1720 1721
  if (setCtxTagColumnInfo(pRuntimeEnv, pRuntimeEnv->pCtx) != TSDB_CODE_SUCCESS) {
    goto _clean;
  }
1722

H
Haojun Liao 已提交
1723
  qDebug("QInfo:%p init runtime completed", GET_QINFO_ADDR(pRuntimeEnv));
1724
  return TSDB_CODE_SUCCESS;
1725

1726
_clean:
S
Shengliang Guan 已提交
1727 1728
  taosTFree(pRuntimeEnv->resultInfo);
  taosTFree(pRuntimeEnv->pCtx);
1729

1730
  return TSDB_CODE_QRY_OUT_OF_MEMORY;
1731 1732 1733 1734 1735 1736
}

static void teardownQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv) {
  if (pRuntimeEnv->pQuery == NULL) {
    return;
  }
1737

1738
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1739
  SQInfo* pQInfo = (SQInfo*) GET_QINFO_ADDR(pRuntimeEnv);
1740

1741
  qDebug("QInfo:%p teardown runtime env", pQInfo);
H
Haojun Liao 已提交
1742
  cleanupTimeWindowInfo(&pRuntimeEnv->windowResInfo);
1743

1744
  if (pRuntimeEnv->pCtx != NULL) {
1745
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1746
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1747

1748 1749 1750
      for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
        tVariantDestroy(&pCtx->param[j]);
      }
1751

1752
      tVariantDestroy(&pCtx->tag);
S
Shengliang Guan 已提交
1753
      taosTFree(pCtx->tagInfo.pTagCtxList);
1754
    }
1755

S
Shengliang Guan 已提交
1756 1757
    taosTFree(pRuntimeEnv->resultInfo);
    taosTFree(pRuntimeEnv->pCtx);
1758
  }
1759

H
Haojun Liao 已提交
1760
  pRuntimeEnv->pFillInfo = taosDestoryFillInfo(pRuntimeEnv->pFillInfo);
1761

H
Haojun Liao 已提交
1762
  destroyResultBuf(pRuntimeEnv->pResultBuf);
1763
  tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
1764
  tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
1765

H
Haojun Liao 已提交
1766
  pRuntimeEnv->pTSBuf = tsBufDestroy(pRuntimeEnv->pTSBuf);
1767 1768
}

H
Haojun Liao 已提交
1769
#define IS_QUERY_KILLED(_q) ((_q)->code == TSDB_CODE_TSC_QUERY_CANCELLED)
1770

H
Haojun Liao 已提交
1771
static void setQueryKilled(SQInfo *pQInfo) { pQInfo->code = TSDB_CODE_TSC_QUERY_CANCELLED;}
H
hjxilinx 已提交
1772

H
Haojun Liao 已提交
1773 1774 1775
static bool isFixedOutputQuery(SQueryRuntimeEnv* pRuntimeEnv) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1776 1777
    return false;
  }
1778

1779
  // Note:top/bottom query is fixed output query
H
Haojun Liao 已提交
1780
  if (pRuntimeEnv->topBotQuery || pRuntimeEnv->groupbyNormalCol) {
1781 1782
    return true;
  }
1783

1784
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1785
    SSqlFuncMsg *pExprMsg = &pQuery->pSelectExpr[i].base;
1786

1787 1788
    // ignore the ts_comp function
    if (i == 0 && pExprMsg->functionId == TSDB_FUNC_PRJ && pExprMsg->numOfParams == 1 &&
1789
        pExprMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
1790 1791
      continue;
    }
1792

1793 1794 1795
    if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
1796

1797 1798 1799 1800
    if (!IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus)) {
      return true;
    }
  }
1801

1802 1803 1804
  return false;
}

1805
// todo refactor with isLastRowQuery
H
hjxilinx 已提交
1806
static bool isPointInterpoQuery(SQuery *pQuery) {
1807
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1808
    int32_t functionID = pQuery->pSelectExpr[i].base.functionId;
1809
    if (functionID == TSDB_FUNC_INTERP) {
1810 1811 1812
      return true;
    }
  }
1813

1814 1815 1816 1817
  return false;
}

// TODO REFACTOR:MERGE WITH CLIENT-SIDE FUNCTION
H
hjxilinx 已提交
1818
static bool isSumAvgRateQuery(SQuery *pQuery) {
1819
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1820
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1821 1822 1823
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
1824

1825 1826 1827 1828 1829
    if (functionId == TSDB_FUNC_SUM_RATE || functionId == TSDB_FUNC_SUM_IRATE || functionId == TSDB_FUNC_AVG_RATE ||
        functionId == TSDB_FUNC_AVG_IRATE) {
      return true;
    }
  }
1830

1831 1832 1833
  return false;
}

H
hjxilinx 已提交
1834
static bool isFirstLastRowQuery(SQuery *pQuery) {
1835
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1836
    int32_t functionID = pQuery->pSelectExpr[i].base.functionId;
1837 1838 1839 1840
    if (functionID == TSDB_FUNC_LAST_ROW) {
      return true;
    }
  }
1841

1842 1843 1844
  return false;
}

H
hjxilinx 已提交
1845
static bool needReverseScan(SQuery *pQuery) {
1846
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1847
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1848 1849 1850
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG) {
      continue;
    }
1851

1852
    if ((functionId == TSDB_FUNC_FIRST || functionId == TSDB_FUNC_FIRST_DST) && !QUERY_IS_ASC_QUERY(pQuery)) {
1853 1854
      return true;
    }
1855 1856

    if (functionId == TSDB_FUNC_LAST || functionId == TSDB_FUNC_LAST_DST) {
S
TD-1057  
Shengliang Guan 已提交
1857
      int32_t order = (int32_t)pQuery->pSelectExpr[i].base.arg->argValue.i64;
1858 1859
      return order != pQuery->order.order;
    }
1860
  }
1861

1862 1863
  return false;
}
H
hjxilinx 已提交
1864

H
Haojun Liao 已提交
1865 1866 1867 1868
/**
 * The following 4 kinds of query are treated as the tags query
 * tagprj, tid_tag query, count(tbname), 'abc' (user defined constant value column) query
 */
H
hjxilinx 已提交
1869 1870
static bool onlyQueryTags(SQuery* pQuery) {
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1871 1872 1873
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];

    int32_t functionId = pExprInfo->base.functionId;
H
Haojun Liao 已提交
1874 1875 1876 1877

    if (functionId != TSDB_FUNC_TAGPRJ &&
        functionId != TSDB_FUNC_TID_TAG &&
        (!(functionId == TSDB_FUNC_COUNT && pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX)) &&
Y
TD-1230  
yihaoDeng 已提交
1878
        (!(functionId == TSDB_FUNC_PRJ && TSDB_COL_IS_UD_COL(pExprInfo->base.colInfo.flag)))) {
H
hjxilinx 已提交
1879 1880 1881
      return false;
    }
  }
1882

H
hjxilinx 已提交
1883 1884 1885
  return true;
}

1886 1887
/////////////////////////////////////////////////////////////////////////////////////////////

H
Haojun Liao 已提交
1888
void getAlignQueryTimeWindow(SQuery *pQuery, int64_t key, int64_t keyFirst, int64_t keyLast, STimeWindow *win) {
1889 1890
  assert(key >= keyFirst && key <= keyLast && pQuery->interval.sliding <= pQuery->interval.interval);
  win->skey = taosTimeTruncate(key, &pQuery->interval, pQuery->precision);
H
Haojun Liao 已提交
1891 1892

  /*
1893
   * if the realSkey > INT64_MAX - pQuery->interval.interval, the query duration between
H
Haojun Liao 已提交
1894 1895
   * realSkey and realEkey must be less than one interval.Therefore, no need to adjust the query ranges.
   */
1896 1897
  if (keyFirst > (INT64_MAX - pQuery->interval.interval)) {
    assert(keyLast - keyFirst < pQuery->interval.interval);
1898
    win->ekey = INT64_MAX;
1899 1900
  } else if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
    win->ekey = taosTimeAdd(win->skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
H
Haojun Liao 已提交
1901
  } else {
1902
    win->ekey = win->skey + pQuery->interval.interval - 1;
1903 1904 1905 1906 1907
  }
}

static void setScanLimitationByResultBuffer(SQuery *pQuery) {
  if (isTopBottomQuery(pQuery)) {
1908
    pQuery->checkBuffer = 0;
1909
  } else if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
1910
    pQuery->checkBuffer = 0;
1911 1912
  } else {
    bool hasMultioutput = false;
1913
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1914
      SSqlFuncMsg *pExprMsg = &pQuery->pSelectExpr[i].base;
1915 1916 1917
      if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        continue;
      }
1918

1919 1920 1921 1922 1923
      hasMultioutput = IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus);
      if (!hasMultioutput) {
        break;
      }
    }
1924

1925
    pQuery->checkBuffer = hasMultioutput ? 1 : 0;
1926 1927 1928 1929 1930 1931
  }
}

/*
 * todo add more parameters to check soon..
 */
1932
bool colIdCheck(SQuery *pQuery) {
1933 1934
  // load data column information is incorrect
  for (int32_t i = 0; i < pQuery->numOfCols - 1; ++i) {
1935
    if (pQuery->colList[i].colId == pQuery->colList[i + 1].colId) {
S
slguan 已提交
1936
      qError("QInfo:%p invalid data load column for query", GET_QINFO_ADDR(pQuery));
1937 1938 1939
      return false;
    }
  }
1940

1941 1942 1943 1944 1945 1946
  return true;
}

// todo ignore the avg/sum/min/max/count/stddev/top/bottom functions, of which
// the scan order is not matter
static bool onlyOneQueryType(SQuery *pQuery, int32_t functId, int32_t functIdDst) {
1947
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1948
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1949

1950 1951 1952 1953
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG ||
        functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
1954

1955 1956 1957 1958
    if (functionId != functId && functionId != functIdDst) {
      return false;
    }
  }
1959

1960 1961 1962 1963 1964 1965 1966
  return true;
}

static bool onlyFirstQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_FIRST, TSDB_FUNC_FIRST_DST); }

static bool onlyLastQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_LAST, TSDB_FUNC_LAST_DST); }

H
Haojun Liao 已提交
1967
// todo refactor, add iterator
1968 1969
static void doExchangeTimeWindow(SQInfo* pQInfo, STimeWindow* win) {
  size_t t = taosArrayGetSize(pQInfo->tableGroupInfo.pGroupList);
H
Haojun Liao 已提交
1970
  for(int32_t i = 0; i < t; ++i) {
1971
    SArray* p1 = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, i);
H
Haojun Liao 已提交
1972 1973 1974

    size_t len = taosArrayGetSize(p1);
    for(int32_t j = 0; j < len; ++j) {
1975
      STableKeyInfo* pInfo = taosArrayGet(p1, j);
H
Haojun Liao 已提交
1976

1977 1978 1979 1980
      // update the new lastkey if it is equalled to the value of the old skey
      if (pInfo->lastKey == win->ekey) {
        pInfo->lastKey = win->skey;
      }
H
Haojun Liao 已提交
1981 1982 1983 1984
    }
  }
}

1985
static void changeExecuteScanOrder(SQInfo *pQInfo, SQueryTableMsg* pQueryMsg, bool stableQuery) {
H
Haojun Liao 已提交
1986 1987
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;

1988 1989 1990
  // in case of point-interpolation query, use asc order scan
  char msg[] = "QInfo:%p scan order changed for %s query, old:%d, new:%d, qrange exchanged, old qrange:%" PRId64
               "-%" PRId64 ", new qrange:%" PRId64 "-%" PRId64;
1991

1992 1993
  // todo handle the case the the order irrelevant query type mixed up with order critical query type
  // descending order query for last_row query
H
Haojun Liao 已提交
1994
  if (isFirstLastRowQuery(pQuery)) {
H
Haojun Liao 已提交
1995
    qDebug("QInfo:%p scan order changed for last_row query, old:%d, new:%d", pQInfo, pQuery->order.order, TSDB_ORDER_ASC);
1996

H
Haojun Liao 已提交
1997
    pQuery->order.order = TSDB_ORDER_ASC;
H
Haojun Liao 已提交
1998 1999 2000
    if (pQuery->window.skey > pQuery->window.ekey) {
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
2001

2002 2003
    return;
  }
2004

H
Haojun Liao 已提交
2005
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr) && pQuery->order.order == TSDB_ORDER_DESC) {
H
Haojun Liao 已提交
2006
    pQuery->order.order = TSDB_ORDER_ASC;
H
Haojun Liao 已提交
2007 2008 2009
    if (pQuery->window.skey > pQuery->window.ekey) {
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
H
Haojun Liao 已提交
2010

2011
    doExchangeTimeWindow(pQInfo, &pQuery->window);
H
Haojun Liao 已提交
2012 2013 2014
    return;
  }

2015
  if (isPointInterpoQuery(pQuery) && pQuery->interval.interval == 0) {
H
Haojun Liao 已提交
2016 2017 2018 2019 2020
    if (!QUERY_IS_ASC_QUERY(pQuery)) {
      qDebug(msg, GET_QINFO_ADDR(pQuery), "interp", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
             pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
2021

2022
    pQuery->order.order = TSDB_ORDER_ASC;
2023 2024
    return;
  }
2025

2026
  if (pQuery->interval.interval == 0) {
2027 2028
    if (onlyFirstQuery(pQuery)) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
2029
        qDebug(msg, GET_QINFO_ADDR(pQuery), "only-first", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
2030 2031
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2032
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2033
        doExchangeTimeWindow(pQInfo, &pQuery->window);
2034
      }
2035

2036
      pQuery->order.order = TSDB_ORDER_ASC;
2037 2038
    } else if (onlyLastQuery(pQuery)) {
      if (QUERY_IS_ASC_QUERY(pQuery)) {
2039
        qDebug(msg, GET_QINFO_ADDR(pQuery), "only-last", pQuery->order.order, TSDB_ORDER_DESC, pQuery->window.skey,
2040 2041
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2042
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2043
        doExchangeTimeWindow(pQInfo, &pQuery->window);
2044
      }
2045

2046
      pQuery->order.order = TSDB_ORDER_DESC;
2047
    }
2048

2049
  } else {  // interval query
2050
    if (stableQuery) {
2051 2052
      if (onlyFirstQuery(pQuery)) {
        if (!QUERY_IS_ASC_QUERY(pQuery)) {
2053
          qDebug(msg, GET_QINFO_ADDR(pQuery), "only-first stable", pQuery->order.order, TSDB_ORDER_ASC,
2054 2055
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2056
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2057
          doExchangeTimeWindow(pQInfo, &pQuery->window);
2058
        }
2059

2060
        pQuery->order.order = TSDB_ORDER_ASC;
2061 2062
      } else if (onlyLastQuery(pQuery)) {
        if (QUERY_IS_ASC_QUERY(pQuery)) {
2063
          qDebug(msg, GET_QINFO_ADDR(pQuery), "only-last stable", pQuery->order.order, TSDB_ORDER_DESC,
2064 2065
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2066
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2067
          doExchangeTimeWindow(pQInfo, &pQuery->window);
2068
        }
2069

2070
        pQuery->order.order = TSDB_ORDER_DESC;
2071 2072 2073 2074 2075 2076 2077 2078
      }
    }
  }
}

static int32_t getInitialPageNum(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  int32_t INITIAL_RESULT_ROWS_VALUE = 16;
2079

2080
  int32_t num = 0;
2081

2082 2083
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
    num = 128;
H
Haojun Liao 已提交
2084
  } else if (QUERY_IS_INTERVAL_QUERY(pQuery)) {  // time window query, allocate one page for each table
2085
    size_t s = pQInfo->tableqinfoGroupInfo.numOfTables;
S
TD-1057  
Shengliang Guan 已提交
2086
    num = (int32_t)(MAX(s, INITIAL_RESULT_ROWS_VALUE));
2087 2088
  } else {    // for super table query, one page for each subset
    num = 1;  // pQInfo->pSidSet->numOfSubSet;
2089
  }
2090

2091 2092 2093 2094
  assert(num > 0);
  return num;
}

2095 2096
static void getIntermediateBufInfo(SQueryRuntimeEnv* pRuntimeEnv, int32_t* ps, int32_t* rowsize) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
2097
  int32_t MIN_ROWS_PER_PAGE = 4;
2098

S
TD-1057  
Shengliang Guan 已提交
2099
  *rowsize = (int32_t)(pQuery->rowSize * GET_ROW_PARAM_FOR_MULTIOUTPUT(pQuery, pRuntimeEnv->topBotQuery, pRuntimeEnv->stableQuery));
2100 2101 2102 2103
  int32_t overhead = sizeof(tFilePage);

  // one page contains at least two rows
  *ps = DEFAULT_INTERN_BUF_PAGE_SIZE;
H
Haojun Liao 已提交
2104
  while(((*rowsize) * MIN_ROWS_PER_PAGE) > (*ps) - overhead) {
2105 2106 2107 2108
    *ps = (*ps << 1u);
  }

  pRuntimeEnv->numOfRowsPerPage = ((*ps) - sizeof(tFilePage)) / (*rowsize);
2109
  assert(pRuntimeEnv->numOfRowsPerPage <= MAX_ROWS_PER_RESBUF_PAGE);
2110 2111
}

H
Haojun Liao 已提交
2112
#define IS_PREFILTER_TYPE(_t) ((_t) != TSDB_DATA_TYPE_BINARY && (_t) != TSDB_DATA_TYPE_NCHAR)
2113

H
Haojun Liao 已提交
2114 2115 2116 2117
static bool needToLoadDataBlock(SQueryRuntimeEnv* pRuntimeEnv, SDataStatis *pDataStatis, SQLFunctionCtx *pCtx,
    int32_t numOfRows) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
  if (pDataStatis == NULL || (pQuery->numOfFilterCols == 0 && (!pRuntimeEnv->topBotQuery))) {
2118 2119 2120 2121 2122
    return true;
  }

  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
2123

H
Haojun Liao 已提交
2124 2125 2126 2127 2128 2129 2130 2131
    int32_t index = -1;
    for(int32_t i = 0; i < pQuery->numOfCols; ++i) {
      if (pDataStatis[i].colId == pFilterInfo->info.colId) {
        index = i;
        break;
      }
    }

2132
    // no statistics data, load the true data block
H
Haojun Liao 已提交
2133
    if (index == -1) {
H
Haojun Liao 已提交
2134
      return true;
2135
    }
2136

2137
    // not support pre-filter operation on binary/nchar data type
H
Haojun Liao 已提交
2138
    if (!IS_PREFILTER_TYPE(pFilterInfo->info.type)) {
H
Haojun Liao 已提交
2139
      return true;
2140
    }
2141

2142
    // all data in current column are NULL, no need to check its boundary value
H
Haojun Liao 已提交
2143
    if (pDataStatis[index].numOfNull == numOfRows) {
2144 2145 2146 2147 2148 2149 2150 2151 2152

      // if isNULL query exists, load the null data column
      for (int32_t j = 0; j < pFilterInfo->numOfFilters; ++j) {
        SColumnFilterElem *pFilterElem = &pFilterInfo->pFilters[j];
        if (pFilterElem->fp == isNull_filter) {
          return true;
        }
      }

2153 2154
      continue;
    }
2155

H
Haojun Liao 已提交
2156 2157 2158
    SDataStatis* pDataBlockst = &pDataStatis[index];

    if (pFilterInfo->info.type == TSDB_DATA_TYPE_FLOAT) {
S
TD-1057  
Shengliang Guan 已提交
2159 2160
      float minval = (float)(*(double *)(&pDataBlockst->min));
      float maxval = (float)(*(double *)(&pDataBlockst->max));
2161

2162 2163 2164 2165 2166 2167 2168
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&minval, (char *)&maxval)) {
          return true;
        }
      }
    } else {
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
H
Haojun Liao 已提交
2169
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&pDataBlockst->min, (char *)&pDataBlockst->max)) {
2170 2171 2172 2173 2174
          return true;
        }
      }
    }
  }
2175

H
Haojun Liao 已提交
2176 2177 2178 2179 2180 2181 2182 2183
  if (pRuntimeEnv->topBotQuery) {
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
      int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
      if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
        return topbot_datablock_filter(&pCtx[i], functionId, (char *)&pDataStatis[i].min, (char *)&pDataStatis[i].max);
      }
    }
  }
2184

H
Haojun Liao 已提交
2185
  return false;
2186 2187
}

H
Haojun Liao 已提交
2188 2189 2190 2191 2192 2193 2194 2195
static bool overlapWithTimeWindow(SQuery* pQuery, SDataBlockInfo* pBlockInfo) {
  STimeWindow w = {0};

  TSKEY sk = MIN(pQuery->window.skey, pQuery->window.ekey);
  TSKEY ek = MAX(pQuery->window.skey, pQuery->window.ekey);

  if (QUERY_IS_ASC_QUERY(pQuery)) {
    getAlignQueryTimeWindow(pQuery, pBlockInfo->window.skey, sk, ek, &w);
H
Haojun Liao 已提交
2196
    assert(w.ekey >= pBlockInfo->window.skey);
H
Haojun Liao 已提交
2197

H
Haojun Liao 已提交
2198
    if (w.ekey < pBlockInfo->window.ekey) {
H
Haojun Liao 已提交
2199 2200 2201 2202 2203
      return true;
    }

    while(1) {
      GET_NEXT_TIMEWINDOW(pQuery, &w);
H
Haojun Liao 已提交
2204
      if (w.skey > pBlockInfo->window.ekey) {
H
Haojun Liao 已提交
2205 2206 2207
        break;
      }

H
Haojun Liao 已提交
2208 2209
      assert(w.ekey > pBlockInfo->window.ekey);
      if (w.skey <= pBlockInfo->window.ekey && w.skey > pBlockInfo->window.skey) {
H
Haojun Liao 已提交
2210 2211 2212 2213 2214
        return true;
      }
    }
  } else {
    getAlignQueryTimeWindow(pQuery, pBlockInfo->window.ekey, sk, ek, &w);
H
Haojun Liao 已提交
2215 2216 2217
    assert(w.skey <= pBlockInfo->window.ekey);

    if (w.skey > pBlockInfo->window.skey) {
H
Haojun Liao 已提交
2218 2219 2220 2221 2222 2223 2224 2225 2226
      return true;
    }

    while(1) {
      GET_NEXT_TIMEWINDOW(pQuery, &w);
      if (w.ekey < pBlockInfo->window.skey) {
        break;
      }

H
Haojun Liao 已提交
2227 2228
      assert(w.skey < pBlockInfo->window.skey);
      if (w.ekey < pBlockInfo->window.ekey && w.ekey >= pBlockInfo->window.skey) {
H
Haojun Liao 已提交
2229 2230 2231 2232 2233 2234 2235 2236
        return true;
      }
    }
  }

  return false;
}

H
Haojun Liao 已提交
2237
int32_t loadDataBlockOnDemand(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo * pWindowResInfo, void* pQueryHandle, SDataBlockInfo* pBlockInfo, SDataStatis **pStatis, SArray** pDataBlock, uint32_t* status) {
2238
  SQuery *pQuery = pRuntimeEnv->pQuery;
2239

H
Haojun Liao 已提交
2240 2241
  *status = BLK_DATA_NO_NEEDED;

H
Haojun Liao 已提交
2242
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf > 0) {
2243
    *status = BLK_DATA_ALL_NEEDED;
H
Haojun Liao 已提交
2244
  } else { // check if this data block is required to load
H
Haojun Liao 已提交
2245

H
Haojun Liao 已提交
2246
    // Calculate all time windows that are overlapping or contain current data block.
2247
    // If current data block is contained by all possible time window, do not load current data block.
H
Haojun Liao 已提交
2248
    if (QUERY_IS_INTERVAL_QUERY(pQuery) && overlapWithTimeWindow(pQuery, pBlockInfo)) {
2249
      *status = BLK_DATA_ALL_NEEDED;
2250
    }
2251

2252
    if ((*status) != BLK_DATA_ALL_NEEDED) {
H
Haojun Liao 已提交
2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267
      // the pCtx[i] result is belonged to previous time window since the outputBuf has not been set yet,
      // the filter result may be incorrect. So in case of interval query, we need to set the correct time output buffer
      if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
        bool hasTimeWindow = false;
        bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);

        TSKEY k = QUERY_IS_ASC_QUERY(pQuery)? pBlockInfo->window.skey:pBlockInfo->window.ekey;

        STimeWindow win = getActiveTimeWindow(pWindowResInfo, k, pQuery);
        if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pBlockInfo->tid, &win, masterScan, &hasTimeWindow) !=
            TSDB_CODE_SUCCESS) {
          // todo handle error in set result for timewindow
        }
      }

H
Haojun Liao 已提交
2268 2269 2270 2271 2272
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        SSqlFuncMsg* pSqlFunc = &pQuery->pSelectExpr[i].base;

        int32_t functionId = pSqlFunc->functionId;
        int32_t colId = pSqlFunc->colInfo.colId;
2273 2274
        (*status) |= aAggs[functionId].dataReqFunc(&pRuntimeEnv->pCtx[i], pBlockInfo->window.skey, pBlockInfo->window.ekey, colId);
        if (((*status) & BLK_DATA_ALL_NEEDED) == BLK_DATA_ALL_NEEDED) {
H
Haojun Liao 已提交
2275 2276 2277
          break;
        }
      }
2278 2279
    }
  }
2280

2281
  if ((*status) == BLK_DATA_NO_NEEDED) {
H
Haojun Liao 已提交
2282 2283
    qDebug("QInfo:%p data block discard, brange:%"PRId64 "-%"PRId64", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
           pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
2284
    pRuntimeEnv->summary.discardBlocks += 1;
2285 2286 2287 2288
  } else if ((*status) == BLK_DATA_STATIS_NEEDED) {

    // this function never returns error?
    tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis);
2289

2290
    pRuntimeEnv->summary.loadBlockStatis += 1;
2291

2292
    if (*pStatis == NULL) { // data block statistics does not exist, load data block
H
Haojun Liao 已提交
2293
      *pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2294
      pRuntimeEnv->summary.totalCheckedRows += pBlockInfo->rows;
2295 2296
    }
  } else {
2297
    assert((*status) == BLK_DATA_ALL_NEEDED);
2298

2299
    // load the data block statistics to perform further filter
H
Haojun Liao 已提交
2300
    pRuntimeEnv->summary.loadBlockStatis += 1;
2301
    tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis);
2302

H
Haojun Liao 已提交
2303
    if (!needToLoadDataBlock(pRuntimeEnv, *pStatis, pRuntimeEnv->pCtx, pBlockInfo->rows)) {
2304 2305
      // current block has been discard due to filter applied
      pRuntimeEnv->summary.discardBlocks += 1;
H
Haojun Liao 已提交
2306 2307
      qDebug("QInfo:%p data block discard, brange:%"PRId64 "-%"PRId64", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
          pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
2308
      (*status) = BLK_DATA_DISCARD;
2309
    }
2310

2311
    pRuntimeEnv->summary.totalCheckedRows += pBlockInfo->rows;
H
Haojun Liao 已提交
2312
    pRuntimeEnv->summary.loadBlocks += 1;
H
Haojun Liao 已提交
2313
    *pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2314 2315 2316
    if (*pDataBlock == NULL) {
      return terrno;
    }
2317
  }
2318

H
Haojun Liao 已提交
2319
  return TSDB_CODE_SUCCESS;
2320 2321
}

H
hjxilinx 已提交
2322
int32_t binarySearchForKey(char *pValue, int num, TSKEY key, int order) {
2323
  int32_t midPos = -1;
H
Haojun Liao 已提交
2324
  int32_t numOfRows;
2325

2326 2327 2328
  if (num <= 0) {
    return -1;
  }
2329

2330
  assert(order == TSDB_ORDER_ASC || order == TSDB_ORDER_DESC);
2331 2332

  TSKEY * keyList = (TSKEY *)pValue;
2333
  int32_t firstPos = 0;
2334
  int32_t lastPos = num - 1;
2335

2336
  if (order == TSDB_ORDER_DESC) {
H
hjxilinx 已提交
2337 2338 2339 2340 2341
    // find the first position which is smaller than the key
    while (1) {
      if (key >= keyList[lastPos]) return lastPos;
      if (key == keyList[firstPos]) return firstPos;
      if (key < keyList[firstPos]) return firstPos - 1;
2342

H
Haojun Liao 已提交
2343 2344
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2345

H
hjxilinx 已提交
2346 2347 2348 2349 2350 2351 2352 2353
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
2354

H
hjxilinx 已提交
2355 2356 2357 2358 2359
  } else {
    // find the first position which is bigger than the key
    while (1) {
      if (key <= keyList[firstPos]) return firstPos;
      if (key == keyList[lastPos]) return lastPos;
2360

H
hjxilinx 已提交
2361 2362 2363 2364 2365 2366 2367
      if (key > keyList[lastPos]) {
        lastPos = lastPos + 1;
        if (lastPos >= num)
          return -1;
        else
          return lastPos;
      }
2368

H
Haojun Liao 已提交
2369 2370
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2371

H
hjxilinx 已提交
2372 2373 2374 2375 2376 2377 2378 2379 2380
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
  }
2381

H
hjxilinx 已提交
2382 2383 2384
  return midPos;
}

2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397
static void ensureOutputBufferSimple(SQueryRuntimeEnv* pRuntimeEnv, int32_t capacity) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  if (capacity < pQuery->rec.capacity) {
    return;
  }

  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    int32_t bytes = pQuery->pSelectExpr[i].bytes;
    assert(bytes > 0 && capacity > 0);

    char *tmp = realloc(pQuery->sdata[i], bytes * capacity + sizeof(tFilePage));
    if (tmp == NULL) {  // todo handle the oom
H
Haojun Liao 已提交
2398
      longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
2399 2400 2401 2402 2403 2404 2405 2406
    } else {
      pQuery->sdata[i] = (tFilePage *)tmp;
    }

    // set the pCtx output buffer position
    pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data;
  }

2407
  qDebug("QInfo:%p realloc output buffer to inc output buffer from: %" PRId64 " rows to:%d rows", GET_QINFO_ADDR(pRuntimeEnv),
2408 2409 2410 2411 2412
         pQuery->rec.capacity, capacity);

  pQuery->rec.capacity = capacity;
}

2413 2414 2415
static void ensureOutputBuffer(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pBlockInfo) {
  // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
  SQuery* pQuery = pRuntimeEnv->pQuery;
2416
  if (!QUERY_IS_INTERVAL_QUERY(pQuery) && !pRuntimeEnv->groupbyNormalCol && !isFixedOutputQuery(pRuntimeEnv) && !isTSCompQuery(pQuery)) {
2417
    SResultRec *pRec = &pQuery->rec;
2418

2419
    if (pQuery->rec.capacity - pQuery->rec.rows < pBlockInfo->rows) {
S
TD-1057  
Shengliang Guan 已提交
2420 2421
      int32_t remain = (int32_t)(pRec->capacity - pRec->rows);
      int32_t newSize = (int32_t)(pRec->capacity + (pBlockInfo->rows - remain));
2422

2423 2424
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        int32_t bytes = pQuery->pSelectExpr[i].bytes;
H
Haojun Liao 已提交
2425 2426
        assert(bytes > 0 && newSize > 0);

2427 2428
        char *tmp = realloc(pQuery->sdata[i], bytes * newSize + sizeof(tFilePage));
        if (tmp == NULL) {  // todo handle the oom
H
Haojun Liao 已提交
2429
          longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
2430
        } else {
2431
          memset(tmp + sizeof(tFilePage) + bytes * pRec->rows, 0, (size_t)((newSize - pRec->rows) * bytes));
2432 2433
          pQuery->sdata[i] = (tFilePage *)tmp;
        }
H
Haojun Liao 已提交
2434

2435 2436
        // set the pCtx output buffer position
        pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data + pRec->rows * bytes;
H
Haojun Liao 已提交
2437

2438 2439 2440 2441 2442
        int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
        if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
          pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
        }
      }
H
Haojun Liao 已提交
2443

2444
      qDebug("QInfo:%p realloc output buffer, new size: %d rows, old:%" PRId64 ", remain:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv),
2445
             newSize, pRec->capacity, newSize - pRec->rows);
2446

2447 2448 2449 2450 2451
      pRec->capacity = newSize;
    }
  }
}

2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472
static void doSetInitialTimewindow(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pBlockInfo) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  if (QUERY_IS_INTERVAL_QUERY(pQuery) && pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL) {
    STimeWindow w = TSWINDOW_INITIALIZER;
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;

    if (QUERY_IS_ASC_QUERY(pQuery)) {
      getAlignQueryTimeWindow(pQuery, pBlockInfo->window.skey, pBlockInfo->window.skey, pQuery->window.ekey, &w);
      pWindowResInfo->startTime = w.skey;
      pWindowResInfo->prevSKey = w.skey;
    } else {
      // the start position of the first time window in the endpoint that spreads beyond the queried last timestamp
      getAlignQueryTimeWindow(pQuery, pBlockInfo->window.ekey, pQuery->window.ekey, pBlockInfo->window.ekey, &w);

      pWindowResInfo->startTime = pQuery->window.skey;
      pWindowResInfo->prevSKey = w.skey;
    }
  }
}

2473 2474
static int64_t doScanAllDataBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
2475
  STableQueryInfo* pTableQueryInfo = pQuery->current;
H
Haojun Liao 已提交
2476
  SQueryCostInfo*  summary  = &pRuntimeEnv->summary;
2477

2478
  qDebug("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", lastkey:%" PRId64 ", order:%d",
H
hjxilinx 已提交
2479 2480
         GET_QINFO_ADDR(pRuntimeEnv), pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, pTableQueryInfo->lastKey,
         pQuery->order.order);
2481

2482
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
H
Haojun Liao 已提交
2483
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
2484

H
Haojun Liao 已提交
2485
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
2486
  while (tsdbNextDataBlock(pQueryHandle)) {
H
Haojun Liao 已提交
2487
    summary->totalBlocks += 1;
H
Haojun Liao 已提交
2488

H
Haojun Liao 已提交
2489
    if (IS_QUERY_KILLED(GET_QINFO_ADDR(pRuntimeEnv))) {
H
Haojun Liao 已提交
2490
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
2491
    }
2492

H
Haojun Liao 已提交
2493
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
2494
    doSetInitialTimewindow(pRuntimeEnv, &blockInfo);
2495

H
hjxilinx 已提交
2496
    // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
2497
    ensureOutputBuffer(pRuntimeEnv, &blockInfo);
2498

2499
    SDataStatis *pStatis = NULL;
2500 2501
    SArray *     pDataBlock = NULL;
    uint32_t     status = 0;
2502

H
Haojun Liao 已提交
2503
    int32_t ret = loadDataBlockOnDemand(pRuntimeEnv, &pRuntimeEnv->windowResInfo, pQueryHandle, &blockInfo, &pStatis, &pDataBlock, &status);
2504
    if (ret != TSDB_CODE_SUCCESS) {
2505 2506 2507
      break;
    }

2508 2509 2510 2511 2512 2513
    if (status == BLK_DATA_DISCARD) {
      pQuery->current->lastKey =
              QUERY_IS_ASC_QUERY(pQuery) ? blockInfo.window.ekey + step : blockInfo.window.skey + step;
      continue;
    }

H
Haojun Liao 已提交
2514 2515
    // query start position can not move into tableApplyFunctionsOnBlock due to limit/offset condition
    pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : blockInfo.rows - 1;
H
hjxilinx 已提交
2516
    int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, binarySearchForKey, pDataBlock);
2517

H
Haojun Liao 已提交
2518
    summary->totalRows += blockInfo.rows;
2519
    qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
2520
           blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes, pQuery->current->lastKey);
2521

2522 2523
    // while the output buffer is full or limit/offset is applied, query may be paused here
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL | QUERY_COMPLETED)) {
H
hjxilinx 已提交
2524
      break;
2525 2526
    }
  }
2527

H
Haojun Liao 已提交
2528 2529 2530 2531
  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }

H
hjxilinx 已提交
2532
  // if the result buffer is not full, set the query complete
2533 2534 2535
  if (!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
2536

H
Haojun Liao 已提交
2537
  if (QUERY_IS_INTERVAL_QUERY(pQuery) && IS_MASTER_SCAN(pRuntimeEnv)) {
H
hjxilinx 已提交
2538
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
2539
      closeAllTimeWindow(&pRuntimeEnv->windowResInfo);
H
hjxilinx 已提交
2540
      pRuntimeEnv->windowResInfo.curIndex = pRuntimeEnv->windowResInfo.size - 1;  // point to the last time window
2541 2542 2543 2544
    } else {
      assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
    }
  }
2545

2546
  return 0;
2547 2548 2549 2550 2551 2552
}

/*
 * set tag value in SQLFunctionCtx
 * e.g.,tag information into input buffer
 */
2553
static void doSetTagValueInParam(void *tsdb, void* pTable, int32_t tagColId, tVariant *tag, int16_t type, int16_t bytes) {
H
[td-90]  
Haojun Liao 已提交
2554
  tVariantDestroy(tag);
2555

2556
  if (tagColId == TSDB_TBNAME_COLUMN_INDEX) {
2557
    char* val = tsdbGetTableName(pTable);
H
[td-90]  
Haojun Liao 已提交
2558
    assert(val != NULL);
2559

H
[td-90]  
Haojun Liao 已提交
2560
    tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), TSDB_DATA_TYPE_BINARY);
2561
  } else {
2562
    char* val = tsdbGetTableTagVal(pTable, tagColId, type, bytes);
H
[td-90]  
Haojun Liao 已提交
2563 2564 2565 2566
    if (val == NULL) {
      tag->nType = TSDB_DATA_TYPE_NULL;
      return;
    }
2567

H
hjxilinx 已提交
2568
    if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
H
Hongze Cheng 已提交
2569
      if (isNull(val, type)) {
H
Haojun Liao 已提交
2570 2571 2572 2573
        tag->nType = TSDB_DATA_TYPE_NULL;
        return;
      }

H
[td-90]  
Haojun Liao 已提交
2574
      tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), type);
H
hjxilinx 已提交
2575
    } else {
H
Haojun Liao 已提交
2576 2577 2578 2579 2580
      if (isNull(val, type)) {
        tag->nType = TSDB_DATA_TYPE_NULL;
        return;
      }

H
[td-90]  
Haojun Liao 已提交
2581
      tVariantCreateFromBinary(tag, val, bytes, type);
H
hjxilinx 已提交
2582
    }
2583
  }
2584 2585
}

2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597
static SColumnInfo* doGetTagColumnInfoById(SColumnInfo* pTagColList, int32_t numOfTags, int16_t colId) {
  assert(pTagColList != NULL && numOfTags > 0);

  for(int32_t i = 0; i < numOfTags; ++i) {
    if (pTagColList[i].colId == colId) {
      return &pTagColList[i];
    }
  }

  return NULL;
}

2598
void setTagVal(SQueryRuntimeEnv *pRuntimeEnv, void *pTable, void *tsdb) {
2599
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
2600
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
2601

H
[td-90]  
Haojun Liao 已提交
2602 2603 2604
  SExprInfo *pExprInfo = &pQuery->pSelectExpr[0];
  if (pQuery->numOfOutput == 1 && pExprInfo->base.functionId == TSDB_FUNC_TS_COMP) {
    assert(pExprInfo->base.numOfParams == 1);
H
Haojun Liao 已提交
2605

S
TD-1057  
Shengliang Guan 已提交
2606
    int16_t tagColId = (int16_t)pExprInfo->base.arg->argValue.i64;
2607
    SColumnInfo* pColInfo = doGetTagColumnInfoById(pQuery->tagColList, pQuery->numOfTags, tagColId);
H
Haojun Liao 已提交
2608

2609
    doSetTagValueInParam(tsdb, pTable, tagColId, &pRuntimeEnv->pCtx[0].tag, pColInfo->type, pColInfo->bytes);
2610 2611
  } else {
    // set tag value, by which the results are aggregated.
2612
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
H
Haojun Liao 已提交
2613
      SExprInfo* pLocalExprInfo = &pQuery->pSelectExpr[idx];
2614

2615
      // ts_comp column required the tag value for join filter
H
Haojun Liao 已提交
2616
      if (!TSDB_COL_IS_TAG(pLocalExprInfo->base.colInfo.flag)) {
2617 2618
        continue;
      }
2619

2620
      // todo use tag column index to optimize performance
2621
      doSetTagValueInParam(tsdb, pTable, pLocalExprInfo->base.colInfo.colId, &pRuntimeEnv->pCtx[idx].tag,
H
Haojun Liao 已提交
2622
                           pLocalExprInfo->type, pLocalExprInfo->bytes);
2623
    }
2624

2625
    // set the join tag for first column
H
[td-90]  
Haojun Liao 已提交
2626
    SSqlFuncMsg *pFuncMsg = &pExprInfo->base;
2627 2628
    if ((pFuncMsg->functionId == TSDB_FUNC_TS || pFuncMsg->functionId == TSDB_FUNC_PRJ) && pRuntimeEnv->pTSBuf != NULL &&
        pFuncMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
2629
      assert(pFuncMsg->numOfParams == 1);
H
Haojun Liao 已提交
2630

2631 2632
      int16_t      tagColId = (int16_t)pExprInfo->base.arg->argValue.i64;
      SColumnInfo *pColInfo = doGetTagColumnInfoById(pQuery->tagColList, pQuery->numOfTags, tagColId);
H
Haojun Liao 已提交
2633

2634
      doSetTagValueInParam(tsdb, pTable, tagColId, &pRuntimeEnv->pCtx[0].tag, pColInfo->type, pColInfo->bytes);
H
Haojun Liao 已提交
2635

2636 2637 2638 2639 2640 2641 2642 2643
      int16_t tagType = pRuntimeEnv->pCtx[0].tag.nType;
      if (tagType == TSDB_DATA_TYPE_BINARY || tagType == TSDB_DATA_TYPE_NCHAR) {
        qDebug("QInfo:%p set tag value for join comparison, colId:%" PRId64 ", val:%s", pQInfo,
               pExprInfo->base.arg->argValue.i64, pRuntimeEnv->pCtx[0].tag.pz);
      } else {
        qDebug("QInfo:%p set tag value for join comparison, colId:%" PRId64 ", val:%" PRId64, pQInfo,
               pExprInfo->base.arg->argValue.i64, pRuntimeEnv->pCtx[0].tag.i64Key);
      }
2644 2645 2646 2647 2648 2649 2650
    }
  }
}

static void doMerge(SQueryRuntimeEnv *pRuntimeEnv, int64_t timestamp, SWindowResult *pWindowRes, bool mergeFlag) {
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
2651

H
Haojun Liao 已提交
2652 2653
  tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes->pos.pageId);

2654
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2655
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2656 2657 2658
    if (!mergeFlag) {
      pCtx[i].aOutputBuf = pCtx[i].aOutputBuf + pCtx[i].outputBytes;
      pCtx[i].currentStage = FIRST_STAGE_MERGE;
2659

2660
      RESET_RESULT_INFO(pCtx[i].resultInfo);
2661 2662
      aAggs[functionId].init(&pCtx[i]);
    }
2663

2664 2665
    pCtx[i].hasNull = true;
    pCtx[i].nStartQueryTimestamp = timestamp;
H
Haojun Liao 已提交
2666
    pCtx[i].aInputElemBuf = getPosInResultPage(pRuntimeEnv, i, pWindowRes, page);
2667

2668 2669 2670
    // in case of tag column, the tag information should be extracted from input buffer
    if (functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TAG) {
      tVariantDestroy(&pCtx[i].tag);
2671

2672 2673 2674 2675 2676 2677
      int32_t type = pCtx[i].outputType;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        tVariantCreateFromBinary(&pCtx[i].tag, varDataVal(pCtx[i].aInputElemBuf), varDataLen(pCtx[i].aInputElemBuf), type);
      } else {
        tVariantCreateFromBinary(&pCtx[i].tag, pCtx[i].aInputElemBuf, pCtx[i].inputBytes, pCtx[i].inputType);
      }
2678

2679 2680
    }
  }
2681

2682
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2683
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2684 2685 2686
    if (functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
2687

2688 2689 2690 2691
    aAggs[functionId].distMergeFunc(&pCtx[i]);
  }
}

2692
static UNUSED_FUNC void printBinaryData(int32_t functionId, char *data, int32_t srcDataType) {
2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_LAST_DST) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_BINARY:
        printf("%" PRId64 ",%s\t", *(TSKEY *)data, (data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int8_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int16_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 ",%" PRId64 "\t", *(TSKEY *)data, *(TSKEY *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int32_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%" PRId64 ",%f\t", *(TSKEY *)data, *(float *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%" PRId64 ",%lf\t", *(TSKEY *)data, *(double *)(data + TSDB_KEYSIZE + 1));
        break;
    }
  } else if (functionId == TSDB_FUNC_AVG) {
    printf("%lf,%d\t", *(double *)data, *(int32_t *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_SPREAD) {
    printf("%lf,%lf\t", *(double *)data, *(double *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_TWA) {
    data += 1;
    printf("%lf,%" PRId64 ",%" PRId64 ",%" PRId64 "\t", *(double *)data, *(int64_t *)(data + 8),
           *(int64_t *)(data + 16), *(int64_t *)(data + 24));
  } else if (functionId == TSDB_FUNC_MIN || functionId == TSDB_FUNC_MAX) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%d\t", *(int8_t *)data);
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%d\t", *(int16_t *)data);
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 "\t", *(int64_t *)data);
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%d\t", *(int *)data);
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%f\t", *(float *)data);
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%f\t", *(float *)data);
        break;
    }
  } else if (functionId == TSDB_FUNC_SUM) {
    if (srcDataType == TSDB_DATA_TYPE_FLOAT || srcDataType == TSDB_DATA_TYPE_DOUBLE) {
      printf("%lf\t", *(float *)data);
    } else {
      printf("%" PRId64 "\t", *(int64_t *)data);
    }
  } else {
    printf("%s\t", data);
  }
}

2761
void UNUSED_FUNC displayInterResult(tFilePage **pdata, SQueryRuntimeEnv* pRuntimeEnv, int32_t numOfRows) {
2762
  SQuery* pQuery = pRuntimeEnv->pQuery;
2763
  int32_t numOfCols = pQuery->numOfOutput;
2764
  printf("super table query intermediate result, total:%d\n", numOfRows);
2765

2766 2767
  for (int32_t j = 0; j < numOfRows; ++j) {
    for (int32_t i = 0; i < numOfCols; ++i) {
2768

2769
      switch (pQuery->pSelectExpr[i].type) {
2770
        case TSDB_DATA_TYPE_BINARY: {
2771
          int32_t type = pQuery->pSelectExpr[i].type;
2772
          printBinaryData(pQuery->pSelectExpr[i].base.functionId, pdata[i]->data + pQuery->pSelectExpr[i].bytes * j,
2773 2774 2775 2776 2777
                          type);
          break;
        }
        case TSDB_DATA_TYPE_TIMESTAMP:
        case TSDB_DATA_TYPE_BIGINT:
2778
          printf("%" PRId64 "\t", *(int64_t *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2779 2780
          break;
        case TSDB_DATA_TYPE_INT:
2781
          printf("%d\t", *(int32_t *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2782 2783
          break;
        case TSDB_DATA_TYPE_FLOAT:
2784
          printf("%f\t", *(float *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2785 2786
          break;
        case TSDB_DATA_TYPE_DOUBLE:
2787
          printf("%lf\t", *(double *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2788 2789 2790 2791 2792 2793 2794 2795
          break;
      }
    }
    printf("\n");
  }
}

typedef struct SCompSupporter {
H
hjxilinx 已提交
2796 2797 2798
  STableQueryInfo **pTableQueryInfo;
  int32_t *         position;
  SQInfo *          pQInfo;
2799 2800 2801 2802 2803
} SCompSupporter;

int32_t tableResultComparFn(const void *pLeft, const void *pRight, void *param) {
  int32_t left = *(int32_t *)pLeft;
  int32_t right = *(int32_t *)pRight;
2804

2805 2806
  SCompSupporter *  supporter = (SCompSupporter *)param;
  SQueryRuntimeEnv *pRuntimeEnv = &supporter->pQInfo->runtimeEnv;
2807

2808 2809
  int32_t leftPos = supporter->position[left];
  int32_t rightPos = supporter->position[right];
2810

2811 2812 2813 2814
  /* left source is exhausted */
  if (leftPos == -1) {
    return 1;
  }
2815

2816 2817 2818 2819
  /* right source is exhausted*/
  if (rightPos == -1) {
    return -1;
  }
2820

H
hjxilinx 已提交
2821
  SWindowResInfo *pWindowResInfo1 = &supporter->pTableQueryInfo[left]->windowResInfo;
2822
  SWindowResult * pWindowRes1 = getWindowResult(pWindowResInfo1, leftPos);
H
Haojun Liao 已提交
2823
  tFilePage *page1 = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes1->pos.pageId);
2824

H
Haojun Liao 已提交
2825
  char *b1 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes1, page1);
2826
  TSKEY leftTimestamp = GET_INT64_VAL(b1);
2827

H
hjxilinx 已提交
2828
  SWindowResInfo *pWindowResInfo2 = &supporter->pTableQueryInfo[right]->windowResInfo;
2829
  SWindowResult * pWindowRes2 = getWindowResult(pWindowResInfo2, rightPos);
H
Haojun Liao 已提交
2830
  tFilePage *page2 = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes2->pos.pageId);
2831

H
Haojun Liao 已提交
2832
  char *b2 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes2, page2);
2833
  TSKEY rightTimestamp = GET_INT64_VAL(b2);
2834

2835 2836 2837
  if (leftTimestamp == rightTimestamp) {
    return 0;
  }
2838

2839 2840 2841
  return leftTimestamp > rightTimestamp ? 1 : -1;
}

2842
int32_t mergeIntoGroupResult(SQInfo *pQInfo) {
H
Haojun Liao 已提交
2843
  int64_t st = taosGetTimestampUs();
2844
  int32_t ret = TSDB_CODE_SUCCESS;
2845

S
TD-1057  
Shengliang Guan 已提交
2846
  int32_t numOfGroups = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
2847

2848
  while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
2849
    SArray *group = GET_TABLEGROUP(pQInfo, pQInfo->groupIndex);
2850
    ret = mergeIntoGroupResultImpl(pQInfo, group);
2851 2852 2853 2854
    if (ret < 0) {  // not enough disk space to save the data into disk
      return -1;
    }

2855
    pQInfo->groupIndex += 1;
2856 2857

    // this group generates at least one result, return results
2858 2859 2860
    if (ret > 0) {
      break;
    }
2861

H
Haojun Liao 已提交
2862
    assert(pQInfo->groupResInfo.numOfDataPages == 0);
2863
    qDebug("QInfo:%p no result in group %d, continue", pQInfo, pQInfo->groupIndex - 1);
2864
  }
2865

H
Haojun Liao 已提交
2866 2867
  SGroupResInfo* info = &pQInfo->groupResInfo;
  if (pQInfo->groupIndex == numOfGroups && info->pos.pageId == info->numOfDataPages) {
H
Haojun Liao 已提交
2868 2869 2870
    SET_STABLE_QUERY_OVER(pQInfo);
  }

H
Haojun Liao 已提交
2871 2872 2873
  int64_t elapsedTime = taosGetTimestampUs() - st;
  qDebug("QInfo:%p merge res data into group, index:%d, total group:%d, elapsed time:%" PRId64 "us", pQInfo,
         pQInfo->groupIndex - 1, numOfGroups, elapsedTime);
2874

H
Haojun Liao 已提交
2875
  pQInfo->runtimeEnv.summary.firstStageMergeTime += elapsedTime;
2876 2877 2878 2879
  return TSDB_CODE_SUCCESS;
}

void copyResToQueryResultBuf(SQInfo *pQInfo, SQuery *pQuery) {
H
Haojun Liao 已提交
2880 2881 2882 2883 2884
  SGroupResInfo* pGroupResInfo = &pQInfo->groupResInfo;

  // all results have been return to client, try next group
  if (pGroupResInfo->pos.pageId == pGroupResInfo->numOfDataPages) {
    pGroupResInfo->numOfDataPages = 0;
H
Haojun Liao 已提交
2885
    pGroupResInfo->pos.pageId = 0;
H
Haojun Liao 已提交
2886
    pGroupResInfo->pos.rowId = 0;
2887

2888
    // current results of group has been sent to client, try next group
2889
    if (mergeIntoGroupResult(pQInfo) != TSDB_CODE_SUCCESS) {
2890 2891
      return;  // failed to save data in the disk
    }
2892

2893
    // check if all results has been sent to client
S
TD-1057  
Shengliang Guan 已提交
2894
    int32_t numOfGroup = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
H
Haojun Liao 已提交
2895
    if (pGroupResInfo->numOfDataPages == 0 && pQInfo->groupIndex == numOfGroup) {
H
Haojun Liao 已提交
2896
      SET_STABLE_QUERY_OVER(pQInfo);
2897 2898
      return;
    }
2899
  }
2900 2901

  SQueryRuntimeEnv *   pRuntimeEnv = &pQInfo->runtimeEnv;
2902
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
2903

H
Haojun Liao 已提交
2904 2905
  int32_t id = pQInfo->groupResInfo.groupId;
  SIDList list = getDataBufPagesIdList(pResultBuf, id);
2906

2907
  int32_t offset = 0;
H
Haojun Liao 已提交
2908 2909 2910 2911 2912 2913 2914
  int32_t numOfCopiedRows = 0;

  size_t size = taosArrayGetSize(list);
  assert(size == pGroupResInfo->numOfDataPages);

  bool done = false;
  for (int32_t j = pGroupResInfo->pos.pageId; j < size; ++j) {
H
Haojun Liao 已提交
2915
    SPageInfo* pi = *(SPageInfo**) taosArrayGet(list, j);
H
Haojun Liao 已提交
2916 2917 2918
    tFilePage* pData = getResBufPage(pResultBuf, pi->pageId);

    assert(pData->num > 0 && pData->num <= pRuntimeEnv->numOfRowsPerPage && pGroupResInfo->pos.rowId < pData->num);
2919
    int32_t numOfRes = (int32_t)(pData->num - pGroupResInfo->pos.rowId);
H
Haojun Liao 已提交
2920 2921

    if (numOfRes > pQuery->rec.capacity - offset) {
2922
      numOfCopiedRows = (int32_t)(pQuery->rec.capacity - offset);
H
Haojun Liao 已提交
2923 2924 2925
      pGroupResInfo->pos.rowId += numOfCopiedRows;
      done = true;
    } else {
2926
      numOfCopiedRows = (int32_t)pData->num;
H
Haojun Liao 已提交
2927 2928 2929 2930

      pGroupResInfo->pos.pageId += 1;
      pGroupResInfo->pos.rowId = 0;
    }
2931

2932
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2933
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
2934
      char *  pDest = pQuery->sdata[i]->data;
2935

H
Haojun Liao 已提交
2936 2937
      memcpy(pDest + offset * bytes, pData->data + pRuntimeEnv->offset[i] * pRuntimeEnv->numOfRowsPerPage,
          (size_t)bytes * numOfCopiedRows);
2938
    }
2939

H
Haojun Liao 已提交
2940 2941 2942 2943
    offset += numOfCopiedRows;
    if (done) {
      break;
    }
2944
  }
2945

2946
  assert(pQuery->rec.rows == 0);
H
Haojun Liao 已提交
2947
  pQuery->rec.rows += offset;
2948 2949
}

H
Haojun Liao 已提交
2950
int64_t getNumOfResultWindowRes(SQuery *pQuery, SWindowResult *pWindowRes) {
2951
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
2952
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
2953

2954 2955 2956 2957 2958 2959 2960
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
      continue;
    }
2961

2962
    SResultInfo *pResultInfo = &pWindowRes->resultInfo[j];
H
Haojun Liao 已提交
2963
    assert(pResultInfo != NULL);
2964

H
Haojun Liao 已提交
2965 2966
    if (pResultInfo->numOfRes > 0) {
      return pResultInfo->numOfRes;
2967 2968
    }
  }
2969

H
Haojun Liao 已提交
2970
  return 0;
2971 2972
}

2973
int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *pGroup) {
2974
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
2975
  SQuery *          pQuery = pRuntimeEnv->pQuery;
2976

2977
  size_t size = taosArrayGetSize(pGroup);
2978
  tFilePage **buffer = pQuery->sdata;
2979

H
Haojun Liao 已提交
2980
  int32_t *posList = calloc(size, sizeof(int32_t));
H
hjxilinx 已提交
2981
  STableQueryInfo **pTableList = malloc(POINTER_BYTES * size);
2982

2983
  if (pTableList == NULL || posList == NULL) {
S
Shengliang Guan 已提交
2984 2985
    taosTFree(posList);
    taosTFree(pTableList);
2986 2987

    qError("QInfo:%p failed alloc memory", pQInfo);
H
Haojun Liao 已提交
2988
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
2989 2990
  }

2991
  // todo opt for the case of one table per group
2992
  int32_t numOfTables = 0;
H
Haojun Liao 已提交
2993 2994 2995
  SIDList pageList = NULL;
  int32_t tid = -1;

2996
  for (int32_t i = 0; i < size; ++i) {
2997
    STableQueryInfo *item = taosArrayGetP(pGroup, i);
2998

H
Haojun Liao 已提交
2999
    SIDList list = getDataBufPagesIdList(pRuntimeEnv->pResultBuf, TSDB_TABLEID(item->pTable)->tid);
H
Haojun Liao 已提交
3000
    if (taosArrayGetSize(list) > 0 && item->windowResInfo.size > 0) {
H
Haojun Liao 已提交
3001
      pTableList[numOfTables++] = item;
3002 3003
      tid = TSDB_TABLEID(item->pTable)->tid;
      pageList = list;
3004 3005
    }
  }
3006

H
Haojun Liao 已提交
3007
  // there is no data in current group
3008
  if (numOfTables == 0) {
S
Shengliang Guan 已提交
3009 3010
    taosTFree(posList);
    taosTFree(pTableList);
3011
    return 0;
H
Haojun Liao 已提交
3012
  } else if (numOfTables == 1) { // no need to merge results since only one table in each group
H
Haojun Liao 已提交
3013 3014 3015 3016 3017
    taosTFree(posList);
    taosTFree(pTableList);

    SGroupResInfo* pGroupResInfo = &pQInfo->groupResInfo;

3018
    pGroupResInfo->numOfDataPages = (int32_t)taosArrayGetSize(pageList);
H
Haojun Liao 已提交
3019 3020 3021 3022 3023
    pGroupResInfo->groupId = tid;
    pGroupResInfo->pos.pageId = 0;
    pGroupResInfo->pos.rowId = 0;

    return pGroupResInfo->numOfDataPages;
3024
  }
3025

3026
  SCompSupporter cs = {pTableList, posList, pQInfo};
3027

3028
  SLoserTreeInfo *pTree = NULL;
3029
  tLoserTreeCreate(&pTree, numOfTables, &cs, tableResultComparFn);
3030

3031
  SResultInfo *pResultInfo = calloc(pQuery->numOfOutput, sizeof(SResultInfo));
H
Haojun Liao 已提交
3032 3033 3034 3035
  if (pResultInfo == NULL) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }

H
Haojun Liao 已提交
3036
  char* buf = calloc(1, pRuntimeEnv->interBufSize);
H
Haojun Liao 已提交
3037 3038 3039 3040
  if (buf == NULL) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }

H
Haojun Liao 已提交
3041
  setWindowResultInfo(pResultInfo, pQuery, pRuntimeEnv->stableQuery, buf);
3042
  resetMergeResultBuf(pQuery, pRuntimeEnv->pCtx, pResultInfo);
3043

H
Haojun Liao 已提交
3044 3045
  pQInfo->groupResInfo.groupId = getGroupResultId(pQInfo->groupIndex);

H
Haojun Liao 已提交
3046
  // todo add windowRes iterator
3047 3048
  int64_t lastTimestamp = -1;
  int64_t startt = taosGetTimestampMs();
3049

3050
  while (1) {
3051 3052
    if (IS_QUERY_KILLED(pQInfo)) {
      qDebug("QInfo:%p it is already killed, abort", pQInfo);
H
Haojun Liao 已提交
3053 3054 3055 3056 3057 3058 3059

      taosTFree(pTableList);
      taosTFree(posList);
      taosTFree(pTree);
      taosTFree(pResultInfo);
      taosTFree(buf);

3060 3061 3062
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
    }

3063
    int32_t pos = pTree->pNode[0].index;
3064

H
hjxilinx 已提交
3065
    SWindowResInfo *pWindowResInfo = &pTableList[pos]->windowResInfo;
H
Haojun Liao 已提交
3066
    SWindowResult  *pWindowRes = getWindowResult(pWindowResInfo, cs.position[pos]);
H
Haojun Liao 已提交
3067
    tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes->pos.pageId);
3068

H
Haojun Liao 已提交
3069
    char *b = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes, page);
3070
    TSKEY ts = GET_INT64_VAL(b);
3071

3072
    assert(ts == pWindowRes->win.skey);
H
Haojun Liao 已提交
3073
    int64_t num = getNumOfResultWindowRes(pQuery, pWindowRes);
3074 3075
    if (num <= 0) {
      cs.position[pos] += 1;
3076

3077 3078
      if (cs.position[pos] >= pWindowResInfo->size) {
        cs.position[pos] = -1;
3079

3080
        // all input sources are exhausted
3081
        if (--numOfTables == 0) {
3082 3083 3084 3085 3086 3087 3088
          break;
        }
      }
    } else {
      if (ts == lastTimestamp) {  // merge with the last one
        doMerge(pRuntimeEnv, ts, pWindowRes, true);
      } else {  // copy data to disk buffer
3089
        if (buffer[0]->num == pQuery->rec.capacity) {
H
Haojun Liao 已提交
3090
          if (flushFromResultBuf(pRuntimeEnv, &pQInfo->groupResInfo) != TSDB_CODE_SUCCESS) {
3091 3092
            return -1;
          }
3093

3094 3095
          resetMergeResultBuf(pQuery, pRuntimeEnv->pCtx, pResultInfo);
        }
3096

3097
        doMerge(pRuntimeEnv, ts, pWindowRes, false);
3098
        buffer[0]->num += 1;
3099
      }
3100

3101
      lastTimestamp = ts;
3102

H
Haojun Liao 已提交
3103 3104 3105
      // move to the next element of current entry
      int32_t currentPageId = pWindowRes->pos.pageId;

3106 3107 3108
      cs.position[pos] += 1;
      if (cs.position[pos] >= pWindowResInfo->size) {
        cs.position[pos] = -1;
3109

3110
        // all input sources are exhausted
3111
        if (--numOfTables == 0) {
3112 3113
          break;
        }
H
Haojun Liao 已提交
3114 3115 3116 3117 3118 3119
      } else {
        // current page is not needed anymore
        SWindowResult  *pNextWindowRes = getWindowResult(pWindowResInfo, cs.position[pos]);
        if (pNextWindowRes->pos.pageId != currentPageId) {
          releaseResBufPage(pRuntimeEnv->pResultBuf, page);
        }
3120 3121
      }
    }
3122

3123 3124
    tLoserTreeAdjust(pTree, pos + pTree->numOfEntries);
  }
3125

3126
  if (buffer[0]->num != 0) {  // there are data in buffer
H
Haojun Liao 已提交
3127
    if (flushFromResultBuf(pRuntimeEnv, &pQInfo->groupResInfo) != TSDB_CODE_SUCCESS) {
S
slguan 已提交
3128
      qError("QInfo:%p failed to flush data into temp file, abort query", pQInfo);
3129

S
Shengliang Guan 已提交
3130 3131 3132 3133
      taosTFree(pTree);
      taosTFree(pTableList);
      taosTFree(posList);
      taosTFree(pResultInfo);
3134

3135 3136 3137
      return -1;
    }
  }
3138

3139 3140 3141
  int64_t endt = taosGetTimestampMs();

#ifdef _DEBUG_VIEW
3142
  displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
3143
#endif
3144

3145
  qDebug("QInfo:%p result merge completed for group:%d, elapsed time:%" PRId64 " ms", pQInfo, pQInfo->groupIndex, endt - startt);
3146

S
Shengliang Guan 已提交
3147 3148 3149
  taosTFree(pTableList);
  taosTFree(posList);
  taosTFree(pTree);
3150

S
Shengliang Guan 已提交
3151 3152
  taosTFree(pResultInfo);
  taosTFree(buf);
H
Haojun Liao 已提交
3153 3154

  return pQInfo->groupResInfo.numOfDataPages;
3155 3156
}

H
Haojun Liao 已提交
3157 3158
int32_t flushFromResultBuf(SQueryRuntimeEnv* pRuntimeEnv, SGroupResInfo* pGroupResInfo) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3159

3160
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
3161

3162 3163
  // the base value for group result, since the maximum number of table for each vnode will not exceed 100,000.
  int32_t pageId = -1;
3164
  int32_t capacity = pResultBuf->numOfRowsPerPage;
3165

H
Haojun Liao 已提交
3166
  int32_t remain = (int32_t) pQuery->sdata[0]->num;
3167
  int32_t offset = 0;
3168

3169
  while (remain > 0) {
H
Haojun Liao 已提交
3170 3171
    int32_t rows = (remain > capacity)? capacity:remain;
    assert(rows > 0);
3172

H
Haojun Liao 已提交
3173 3174 3175
    // get the output buffer page
    tFilePage *buf = getNewDataBuf(pResultBuf, pGroupResInfo->groupId, &pageId);
    buf->num = rows;
3176

H
Haojun Liao 已提交
3177
    // pagewisely copy to dest buffer
3178
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3179
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
3180

H
Haojun Liao 已提交
3181 3182
      char* output = buf->data + pRuntimeEnv->offset[i] * pRuntimeEnv->numOfRowsPerPage;
      char* src = ((char *) pQuery->sdata[i]->data) + offset * bytes;
3183
      memcpy(output, src, (size_t)(buf->num * bytes));
3184
    }
3185

H
Haojun Liao 已提交
3186 3187 3188 3189
    offset += rows;
    remain -= rows;

    pGroupResInfo->numOfDataPages += 1;
3190
  }
3191

3192 3193 3194 3195
  return TSDB_CODE_SUCCESS;
}

void resetMergeResultBuf(SQuery *pQuery, SQLFunctionCtx *pCtx, SResultInfo *pResultInfo) {
3196
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
3197
    pCtx[k].aOutputBuf = pQuery->sdata[k]->data - pCtx[k].outputBytes;
3198 3199 3200
    pCtx[k].size = 1;
    pCtx[k].startOffset = 0;
    pCtx[k].resultInfo = &pResultInfo[k];
3201

3202
    pQuery->sdata[k]->num = 0;
3203 3204 3205
  }
}

3206 3207 3208 3209
static void updateTableQueryInfoForReverseScan(SQuery *pQuery, STableQueryInfo *pTableQueryInfo) {
  if (pTableQueryInfo == NULL) {
    return;
  }
3210

H
Haojun Liao 已提交
3211
  // order has changed already
3212
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
3213

H
Haojun Liao 已提交
3214 3215 3216 3217 3218 3219
  // TODO validate the assertion
//  if (!QUERY_IS_ASC_QUERY(pQuery)) {
//    assert(pTableQueryInfo->win.ekey >= pTableQueryInfo->lastKey + step);
//  } else {
//    assert(pTableQueryInfo->win.ekey <= pTableQueryInfo->lastKey + step);
//  }
H
Haojun Liao 已提交
3220 3221 3222 3223 3224 3225 3226

  if (pTableQueryInfo->lastKey == pTableQueryInfo->win.skey) {
    // do nothing, no results
  } else {
    pTableQueryInfo->win.ekey = pTableQueryInfo->lastKey + step;
  }

3227 3228
  SWAP(pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, TSKEY);
  pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
3229

3230 3231
  SWITCH_ORDER(pTableQueryInfo->cur.order);
  pTableQueryInfo->cur.vgroupIndex = -1;
H
Haojun Liao 已提交
3232 3233 3234

  // set the index at the end of time window
  pTableQueryInfo->windowResInfo.curIndex = pTableQueryInfo->windowResInfo.size - 1;
3235 3236 3237 3238
}

static void disableFuncInReverseScanImpl(SQInfo* pQInfo, SWindowResInfo *pWindowResInfo, int32_t order) {
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
3239

3240
  for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
3241 3242
    bool closed = getTimeWindowResStatus(pWindowResInfo, i);
    if (!closed) {
3243 3244
      continue;
    }
3245

3246
    SWindowResult *buf = getWindowResult(pWindowResInfo, i);
3247

3248
    // open/close the specified query for each group result
3249
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3250
      int32_t functId = pQuery->pSelectExpr[j].base.functionId;
3251

3252 3253
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
3254 3255 3256 3257 3258 3259 3260 3261
        buf->resultInfo[j].complete = false;
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
        buf->resultInfo[j].complete = true;
      }
    }
  }
}

3262 3263
void disableFuncInReverseScan(SQInfo *pQInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
3264
  SQuery *pQuery = pRuntimeEnv->pQuery;
3265
  int32_t order = pQuery->order.order;
3266

3267 3268
  // group by normal columns and interval query on normal table
  SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
3269
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3270
    disableFuncInReverseScanImpl(pQInfo, pWindowResInfo, order);
3271
  } else {  // for simple result of table query,
3272
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {  // todo refactor
3273
      int32_t functId = pQuery->pSelectExpr[j].base.functionId;
3274

3275
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[j];
3276 3277 3278
      if (pCtx->resultInfo == NULL) {
        continue; // resultInfo is NULL, means no data checked in previous scan
      }
3279

3280 3281
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
3282 3283 3284 3285 3286 3287
        pCtx->resultInfo->complete = false;
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
        pCtx->resultInfo->complete = true;
      }
    }
  }
H
Haojun Liao 已提交
3288 3289 3290 3291
}

static void setupQueryRangeForReverseScan(SQInfo* pQInfo) {
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
S
Shengliang Guan 已提交
3292
  int32_t numOfGroups = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
H
Haojun Liao 已提交
3293

H
hjxilinx 已提交
3294
  for(int32_t i = 0; i < numOfGroups; ++i) {
H
Haojun Liao 已提交
3295
    SArray *group = GET_TABLEGROUP(pQInfo, i);
H
Haojun Liao 已提交
3296 3297
    SArray *tableKeyGroup = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, i);

H
hjxilinx 已提交
3298 3299
    size_t t = taosArrayGetSize(group);
    for (int32_t j = 0; j < t; ++j) {
3300 3301
      STableQueryInfo *pCheckInfo = taosArrayGetP(group, j);
      updateTableQueryInfoForReverseScan(pQuery, pCheckInfo);
H
Haojun Liao 已提交
3302

H
Haojun Liao 已提交
3303 3304
      // update the last key in tableKeyInfo list, the tableKeyInfo is used to build the tsdbQueryHandle and decide
      // the start check timestamp of tsdbQueryHandle
H
Haojun Liao 已提交
3305 3306 3307 3308
      STableKeyInfo *pTableKeyInfo = taosArrayGet(tableKeyGroup, j);
      pTableKeyInfo->lastKey = pCheckInfo->lastKey;

      assert(pCheckInfo->pTable == pTableKeyInfo->pTable);
H
hjxilinx 已提交
3309 3310
    }
  }
3311 3312
}

3313
void switchCtxOrder(SQueryRuntimeEnv *pRuntimeEnv) {
3314
  SQuery *pQuery = pRuntimeEnv->pQuery;
3315
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
3316
    SWITCH_ORDER(pRuntimeEnv->pCtx[i].order);
3317 3318 3319
  }
}

H
Haojun Liao 已提交
3320
int32_t createQueryResultInfo(SQuery *pQuery, SWindowResult *pResultRow, bool isSTableQuery, size_t interBufSize) {
3321
  int32_t numOfCols = pQuery->numOfOutput;
3322

H
Haojun Liao 已提交
3323 3324
  size_t size = numOfCols * sizeof(SResultInfo) + interBufSize;
  pResultRow->resultInfo = calloc(1, size);
B
Bomin Zhang 已提交
3325 3326 3327
  if (pResultRow->resultInfo == NULL) {
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
  }
3328

H
Haojun Liao 已提交
3329
  pResultRow->pos = (SPosInfo) {-1, -1};
3330

H
Haojun Liao 已提交
3331
  char* buf = (char*) pResultRow->resultInfo + numOfCols * sizeof(SResultInfo);
H
Haojun Liao 已提交
3332

3333
  // set the intermediate result output buffer
H
Haojun Liao 已提交
3334
  setWindowResultInfo(pResultRow->resultInfo, pQuery, isSTableQuery, buf);
B
Bomin Zhang 已提交
3335
  return TSDB_CODE_SUCCESS;
3336 3337 3338 3339
}

void resetCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3340

3341
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3342 3343
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
    pCtx->aOutputBuf = pQuery->sdata[i]->data;
3344

3345 3346 3347 3348
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT/TAGPRJ/PRJ/TAG etc.
     */
3349
    RESET_RESULT_INFO(&pRuntimeEnv->resultInfo[i]);
3350
    pCtx->resultInfo = &pRuntimeEnv->resultInfo[i];
3351

3352
    // set the timestamp output buffer for top/bottom/diff query
3353
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3354 3355 3356
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3357

3358
    memset(pQuery->sdata[i]->data, 0, (size_t)(pQuery->pSelectExpr[i].bytes * pQuery->rec.capacity));
3359
  }
3360

3361 3362 3363 3364 3365
  initCtxOutputBuf(pRuntimeEnv);
}

void forwardCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, int64_t output) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3366

3367
  // reset the execution contexts
3368
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3369
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
3370
    assert(functionId != TSDB_FUNC_DIFF);
3371

3372 3373 3374 3375
    // set next output position
    if (IS_OUTER_FORWARD(aAggs[functionId].nStatus)) {
      pRuntimeEnv->pCtx[j].aOutputBuf += pRuntimeEnv->pCtx[j].outputBytes * output;
    }
3376

3377 3378 3379 3380 3381 3382 3383 3384
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      /*
       * NOTE: for top/bottom query, the value of first column of output (timestamp) are assigned
       * in the procedure of top/bottom routine
       * the output buffer in top/bottom routine is ptsOutputBuf, so we need to forward the output buffer
       *
       * diff function is handled in multi-output function
       */
S
TD-1057  
Shengliang Guan 已提交
3385
      pRuntimeEnv->pCtx[j].ptsOutputBuf = (char*)pRuntimeEnv->pCtx[j].ptsOutputBuf + TSDB_KEYSIZE * output;
3386
    }
3387

3388
    RESET_RESULT_INFO(pRuntimeEnv->pCtx[j].resultInfo);
3389 3390 3391 3392 3393
  }
}

void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3394

3395
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3396
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
3397
    pRuntimeEnv->pCtx[j].currentStage = 0;
3398

H
Haojun Liao 已提交
3399 3400 3401 3402
    SResultInfo* pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
    if (pResInfo->initialized) {
      continue;
    }
3403

3404 3405 3406 3407
    aAggs[functionId].init(&pRuntimeEnv->pCtx[j]);
  }
}

3408
void skipResults(SQueryRuntimeEnv *pRuntimeEnv) {
3409
  SQuery *pQuery = pRuntimeEnv->pQuery;
3410
  if (pQuery->rec.rows == 0 || pQuery->limit.offset == 0) {
3411 3412
    return;
  }
3413

3414
  if (pQuery->rec.rows <= pQuery->limit.offset) {
3415
    qDebug("QInfo:%p skip rows:%" PRId64 ", new offset:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), pQuery->rec.rows,
3416
        pQuery->limit.offset - pQuery->rec.rows);
3417

3418 3419
    pQuery->limit.offset -= pQuery->rec.rows;
    pQuery->rec.rows = 0;
3420

3421
    resetCtxOutputBuf(pRuntimeEnv);
3422

H
Haojun Liao 已提交
3423
    // clear the buffer full flag if exists
3424
    CLEAR_QUERY_STATUS(pQuery, QUERY_RESBUF_FULL);
3425
  } else {
3426
    int64_t numOfSkip = pQuery->limit.offset;
3427
    pQuery->rec.rows -= numOfSkip;
3428
    pQuery->limit.offset = 0;
3429

3430
    qDebug("QInfo:%p skip row:%"PRId64", new offset:%d, numOfRows remain:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), numOfSkip,
3431
           0, pQuery->rec.rows);
3432

3433
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3434
      int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3435
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
H
Haojun Liao 已提交
3436

3437
      memmove(pQuery->sdata[i]->data, (char*)pQuery->sdata[i]->data + bytes * numOfSkip, (size_t)(pQuery->rec.rows * bytes));
H
Haojun Liao 已提交
3438
      pRuntimeEnv->pCtx[i].aOutputBuf = ((char*) pQuery->sdata[i]->data) + pQuery->rec.rows * bytes;
3439

3440
      if (functionId == TSDB_FUNC_DIFF || functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
3441
        pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
3442 3443
      }
    }
3444

S
TD-1057  
Shengliang Guan 已提交
3445
    updateNumOfResult(pRuntimeEnv, (int32_t)pQuery->rec.rows);
3446 3447 3448 3449 3450 3451 3452 3453
  }
}

void setQueryStatus(SQuery *pQuery, int8_t status) {
  if (status == QUERY_NOT_COMPLETED) {
    pQuery->status = status;
  } else {
    // QUERY_NOT_COMPLETED is not compatible with any other status, so clear its position first
3454
    CLEAR_QUERY_STATUS(pQuery, QUERY_NOT_COMPLETED);
3455 3456 3457 3458 3459 3460
    pQuery->status |= status;
  }
}

bool needScanDataBlocksAgain(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3461

H
hjxilinx 已提交
3462
  bool toContinue = false;
H
Haojun Liao 已提交
3463
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3464 3465
    // for each group result, call the finalize function for each column
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
3466

3467 3468
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *pResult = getWindowResult(pWindowResInfo, i);
3469
      if (!pResult->closed) {
3470 3471
        continue;
      }
3472

3473
      setWindowResOutputBuf(pRuntimeEnv, pResult);
3474

3475
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3476
        int16_t functId = pQuery->pSelectExpr[j].base.functionId;
3477 3478 3479
        if (functId == TSDB_FUNC_TS) {
          continue;
        }
3480

3481 3482
        aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
        SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3483

3484 3485 3486 3487
        toContinue |= (!pResInfo->complete);
      }
    }
  } else {
3488
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3489
      int16_t functId = pQuery->pSelectExpr[j].base.functionId;
3490 3491 3492
      if (functId == TSDB_FUNC_TS) {
        continue;
      }
3493

3494 3495
      aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
      SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3496

3497 3498 3499
      toContinue |= (!pResInfo->complete);
    }
  }
3500

3501 3502 3503
  return toContinue;
}

H
Haojun Liao 已提交
3504
static SQueryStatusInfo getQueryStatusInfo(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
3505
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3506
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3507

H
Haojun Liao 已提交
3508 3509
  assert((start <= pTableQueryInfo->lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (start >= pTableQueryInfo->lastKey && !QUERY_IS_ASC_QUERY(pQuery)));
3510

3511
  SQueryStatusInfo info = {
H
hjxilinx 已提交
3512
      .status      = pQuery->status,
3513
      .windowIndex = pRuntimeEnv->windowResInfo.curIndex,
H
Haojun Liao 已提交
3514
      .lastKey     = start,
3515
  };
3516

S
TD-1057  
Shengliang Guan 已提交
3517 3518 3519 3520 3521
  TIME_WINDOW_COPY(info.w, pQuery->window);
  TIME_WINDOW_COPY(info.curWindow, pTableQueryInfo->win);

  info.curWindow.skey = start;

3522 3523 3524
  return info;
}

3525 3526 3527 3528
static void setEnvBeforeReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQInfo *pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery *pQuery = pRuntimeEnv->pQuery;

3529 3530 3531
  pStatus->cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);  // save the cursor
  if (pRuntimeEnv->pTSBuf) {
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
3532 3533
    bool ret = tsBufNextPos(pRuntimeEnv->pTSBuf);
    assert(ret);
3534
  }
3535

3536
  // reverse order time range
3537 3538 3539
  pQuery->window = pStatus->curWindow;
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);

3540
  SWITCH_ORDER(pQuery->order.order);
3541 3542 3543 3544 3545 3546 3547

  if (QUERY_IS_ASC_QUERY(pQuery)) {
    assert(pQuery->window.skey <= pQuery->window.ekey);
  } else {
    assert(pQuery->window.skey >= pQuery->window.ekey);
  }

3548
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
3549

3550
  STsdbQueryCond cond = {
H
hjxilinx 已提交
3551
      .order   = pQuery->order.order,
3552
      .colList = pQuery->colList,
3553 3554
      .numOfCols = pQuery->numOfCols,
  };
3555

S
TD-1057  
Shengliang Guan 已提交
3556 3557
  TIME_WINDOW_COPY(cond.twindow, pQuery->window);

H
Haojun Liao 已提交
3558 3559 3560 3561 3562
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
  disableFuncInReverseScan(pQInfo);
  setupQueryRangeForReverseScan(pQInfo);

3563 3564 3565 3566
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
3567

3568
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
B
Bomin Zhang 已提交
3569 3570 3571
  if (pRuntimeEnv->pSecQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
3572 3573
}

3574 3575
static void clearEnvAfterReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3576
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3577

3578 3579
  SWITCH_ORDER(pQuery->order.order);
  switchCtxOrder(pRuntimeEnv);
3580

3581 3582 3583 3584
  tsBufSetCursor(pRuntimeEnv->pTSBuf, &pStatus->cur);
  if (pRuntimeEnv->pTSBuf) {
    pRuntimeEnv->pTSBuf->cur.order = pQuery->order.order;
  }
3585

3586
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
3587

3588
  // update the pQuery->window.skey and pQuery->window.ekey to limit the scan scope of sliding query during reverse scan
H
hjxilinx 已提交
3589
  pTableQueryInfo->lastKey = pStatus->lastKey;
3590
  pQuery->status = pStatus->status;
3591

H
hjxilinx 已提交
3592
  pTableQueryInfo->win = pStatus->w;
3593
  pQuery->window = pTableQueryInfo->win;
3594 3595
}

H
Haojun Liao 已提交
3596 3597 3598 3599 3600 3601 3602
static void restoreTimeWindow(STableGroupInfo* pTableGroupInfo, STsdbQueryCond* pCond) {
  assert(pTableGroupInfo->numOfTables == 1);
  SArray* pTableKeyGroup = taosArrayGetP(pTableGroupInfo->pGroupList, 0);
  STableKeyInfo* pKeyInfo = taosArrayGet(pTableKeyGroup, 0);
  pKeyInfo->lastKey = pCond->twindow.skey;
}

3603
void scanOneTableDataBlocks(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
H
hjxilinx 已提交
3604
  SQInfo *pQInfo = (SQInfo *) GET_QINFO_ADDR(pRuntimeEnv);
3605
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3606
  STableQueryInfo *pTableQueryInfo = pQuery->current;
3607

3608
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
3609

3610
  // store the start query position
H
Haojun Liao 已提交
3611
  SQueryStatusInfo qstatus = getQueryStatusInfo(pRuntimeEnv, start);
3612

3613 3614
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
3615

3616 3617
  while (1) {
    doScanAllDataBlocks(pRuntimeEnv);
3618

3619 3620
    if (pRuntimeEnv->scanFlag == MASTER_SCAN) {
      qstatus.status = pQuery->status;
3621 3622 3623 3624 3625 3626

      // do nothing if no data blocks are found qualified during scan
      if (qstatus.lastKey != pTableQueryInfo->lastKey) {
        qstatus.curWindow.ekey = pTableQueryInfo->lastKey - step;
      }

3627
      qstatus.lastKey = pTableQueryInfo->lastKey;
3628
    }
3629

3630
    if (!needScanDataBlocksAgain(pRuntimeEnv)) {
3631
      // restore the status code and jump out of loop
3632
      if (pRuntimeEnv->scanFlag == REPEAT_SCAN) {
3633
        pQuery->status = qstatus.status;
3634
      }
3635

3636 3637
      break;
    }
3638

3639
    STsdbQueryCond cond = {
H
hjxilinx 已提交
3640
        .order   = pQuery->order.order,
3641
        .colList = pQuery->colList,
3642
        .numOfCols = pQuery->numOfCols,
3643
    };
3644

S
TD-1057  
Shengliang Guan 已提交
3645 3646
    TIME_WINDOW_COPY(cond.twindow, qstatus.curWindow);

3647 3648
    if (pRuntimeEnv->pSecQueryHandle != NULL) {
      tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
3649
    }
3650

H
Haojun Liao 已提交
3651
    restoreTimeWindow(&pQInfo->tableGroupInfo, &cond);
3652
    pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
B
Bomin Zhang 已提交
3653 3654 3655
    if (pRuntimeEnv->pSecQueryHandle == NULL) {
      longjmp(pRuntimeEnv->env, terrno);
    }
3656

3657
    pRuntimeEnv->windowResInfo.curIndex = qstatus.windowIndex;
3658 3659
    setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
    pRuntimeEnv->scanFlag = REPEAT_SCAN;
3660

3661
    qDebug("QInfo:%p start to repeat scan data blocks due to query func required, qrange:%"PRId64"-%"PRId64, pQInfo,
3662
        cond.twindow.skey, cond.twindow.ekey);
3663

3664
    // check if query is killed or not
H
Haojun Liao 已提交
3665
    if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
3666
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
3667 3668
    }
  }
3669

H
hjxilinx 已提交
3670
  if (!needReverseScan(pQuery)) {
3671 3672
    return;
  }
3673

3674
  setEnvBeforeReverseScan(pRuntimeEnv, &qstatus);
3675

3676
  // reverse scan from current position
3677
  qDebug("QInfo:%p start to reverse scan", pQInfo);
3678
  doScanAllDataBlocks(pRuntimeEnv);
3679 3680

  clearEnvAfterReverseScan(pRuntimeEnv, &qstatus);
3681 3682
}

H
hjxilinx 已提交
3683
void finalizeQueryResult(SQueryRuntimeEnv *pRuntimeEnv) {
3684
  SQuery *pQuery = pRuntimeEnv->pQuery;
3685

H
Haojun Liao 已提交
3686
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3687 3688
    // for each group result, call the finalize function for each column
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
3689
    if (pRuntimeEnv->groupbyNormalCol) {
3690 3691
      closeAllTimeWindow(pWindowResInfo);
    }
3692

3693 3694 3695 3696 3697
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *buf = &pWindowResInfo->pResult[i];
      if (!isWindowResClosed(pWindowResInfo, i)) {
        continue;
      }
3698

3699
      setWindowResOutputBuf(pRuntimeEnv, buf);
3700

3701
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3702
        aAggs[pQuery->pSelectExpr[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3703
      }
3704

3705 3706 3707 3708
      /*
       * set the number of output results for group by normal columns, the number of output rows usually is 1 except
       * the top and bottom query
       */
S
TD-1057  
Shengliang Guan 已提交
3709
      buf->numOfRows = (uint16_t)getNumOfResult(pRuntimeEnv);
3710
    }
3711

3712
  } else {
3713
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3714
      aAggs[pQuery->pSelectExpr[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3715 3716 3717 3718 3719
    }
  }
}

static bool hasMainOutput(SQuery *pQuery) {
3720
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3721
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3722

3723 3724 3725 3726
    if (functionId != TSDB_FUNC_TS && functionId != TSDB_FUNC_TAG && functionId != TSDB_FUNC_TAGPRJ) {
      return true;
    }
  }
3727

3728 3729 3730
  return false;
}

H
Haojun Liao 已提交
3731
static STableQueryInfo *createTableQueryInfo(SQueryRuntimeEnv *pRuntimeEnv, void* pTable, STimeWindow win, void* buf) {
H
Haojun Liao 已提交
3732
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
3733

H
Haojun Liao 已提交
3734
  STableQueryInfo *pTableQueryInfo = buf;
3735

H
hjxilinx 已提交
3736 3737
  pTableQueryInfo->win = win;
  pTableQueryInfo->lastKey = win.skey;
3738

3739
  pTableQueryInfo->pTable = pTable;
3740
  pTableQueryInfo->cur.vgroupIndex = -1;
3741

H
Haojun Liao 已提交
3742 3743
  // set more initial size of interval/groupby query
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyNormalCol) {
H
Haojun Liao 已提交
3744
    int32_t initialSize = 16;
H
Haojun Liao 已提交
3745
    int32_t initialThreshold = 100;
B
Bomin Zhang 已提交
3746 3747 3748 3749
    int32_t code = initWindowResInfo(&pTableQueryInfo->windowResInfo, pRuntimeEnv, initialSize, initialThreshold, TSDB_DATA_TYPE_INT);
    if (code != TSDB_CODE_SUCCESS) {
      return NULL;
    }
H
Haojun Liao 已提交
3750
  } else { // in other aggregate query, do not initialize the windowResInfo
H
Haojun Liao 已提交
3751 3752
  }

3753 3754 3755
  return pTableQueryInfo;
}

H
Haojun Liao 已提交
3756
void destroyTableQueryInfoImpl(STableQueryInfo *pTableQueryInfo) {
3757 3758 3759
  if (pTableQueryInfo == NULL) {
    return;
  }
3760

H
Haojun Liao 已提交
3761
  cleanupTimeWindowInfo(&pTableQueryInfo->windowResInfo);
3762 3763 3764 3765 3766
}

/**
 * set output buffer for different group
 * @param pRuntimeEnv
3767
 * @param pDataBlockInfo
3768
 */
H
Haojun Liao 已提交
3769
void setExecutionContext(SQInfo *pQInfo, int32_t groupIndex, TSKEY nextKey) {
3770
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
Haojun Liao 已提交
3771 3772 3773
  STableQueryInfo  *pTableQueryInfo = pRuntimeEnv->pQuery->current;
  SWindowResInfo   *pWindowResInfo = &pRuntimeEnv->windowResInfo;

H
Haojun Liao 已提交
3774 3775
  // lastKey needs to be updated
  pTableQueryInfo->lastKey = nextKey;
H
Haojun Liao 已提交
3776 3777 3778 3779

  if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
    setAdditionalInfo(pQInfo, pTableQueryInfo->pTable, pTableQueryInfo);
  }
H
Haojun Liao 已提交
3780

H
Haojun Liao 已提交
3781 3782 3783
  if (pRuntimeEnv->prevGroupId != INT32_MIN && pRuntimeEnv->prevGroupId == groupIndex) {
    return;
  }
3784

3785 3786
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&groupIndex,
      sizeof(groupIndex), true);
3787 3788 3789
  if (pWindowRes == NULL) {
    return;
  }
3790

3791 3792 3793 3794 3795
  /*
   * not assign result buffer yet, add new result buffer
   * all group belong to one result set, and each group result has different group id so set the id to be one
   */
  if (pWindowRes->pos.pageId == -1) {
3796
    if (addNewWindowResultBuf(pWindowRes, pRuntimeEnv->pResultBuf, groupIndex, pRuntimeEnv->numOfRowsPerPage) !=
3797 3798 3799 3800
        TSDB_CODE_SUCCESS) {
      return;
    }
  }
3801

H
Haojun Liao 已提交
3802 3803
  // record the current active group id
  pRuntimeEnv->prevGroupId = groupIndex;
3804 3805 3806 3807
  setWindowResOutputBuf(pRuntimeEnv, pWindowRes);
  initCtxOutputBuf(pRuntimeEnv);
}

H
Haojun Liao 已提交
3808
void setWindowResOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult) {
3809
  SQuery *pQuery = pRuntimeEnv->pQuery;
3810

3811
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
H
Haojun Liao 已提交
3812 3813
  tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pos.pageId);

3814
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3815
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
H
Haojun Liao 已提交
3816
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult, page);
3817

3818
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3819 3820 3821
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3822

3823 3824 3825 3826 3827
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT
     */
    pCtx->resultInfo = &pResult->resultInfo[i];
3828

3829 3830 3831 3832 3833 3834
    // set super table query flag
    SResultInfo *pResInfo = GET_RES_INFO(pCtx);
    pResInfo->superTableQ = pRuntimeEnv->stableQuery;
  }
}

H
Haojun Liao 已提交
3835 3836
void setWindowResOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3837

H
Haojun Liao 已提交
3838
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
H
Haojun Liao 已提交
3839 3840
  tFilePage* bufPage = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pos.pageId);

H
Haojun Liao 已提交
3841 3842 3843 3844
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];

    pCtx->resultInfo = &pResult->resultInfo[i];
H
Haojun Liao 已提交
3845
    if (pCtx->resultInfo->initialized && pCtx->resultInfo->complete) {
H
Haojun Liao 已提交
3846 3847
      continue;
    }
3848

H
Haojun Liao 已提交
3849
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult, bufPage);
H
Haojun Liao 已提交
3850
    pCtx->currentStage = 0;
3851

H
Haojun Liao 已提交
3852 3853 3854 3855
    int32_t functionId = pCtx->functionId;
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3856

H
Haojun Liao 已提交
3857 3858 3859 3860 3861
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT
     */
    pCtx->resultInfo->superTableQ = pRuntimeEnv->stableQuery;     // set super table query flag
3862

H
Haojun Liao 已提交
3863 3864 3865 3866 3867 3868
    if (!pCtx->resultInfo->initialized) {
      aAggs[functionId].init(pCtx);
    }
  }
}

3869
int32_t setAdditionalInfo(SQInfo *pQInfo, void* pTable, STableQueryInfo *pTableQueryInfo) {
3870
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
3871

3872
  setTagVal(pRuntimeEnv, pTable, pQInfo->tsdb);
3873

3874 3875
  // both the master and supplement scan needs to set the correct ts comp start position
  if (pRuntimeEnv->pTSBuf != NULL) {
H
Haojun Liao 已提交
3876 3877
    tVariant* pTag = &pRuntimeEnv->pCtx[0].tag;

H
Haojun Liao 已提交
3878 3879
    if (pTableQueryInfo->cur.vgroupIndex == -1) {
      tVariantAssign(&pTableQueryInfo->tag, pTag);
3880

H
Haojun Liao 已提交
3881 3882
      STSElem elem = tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, pQInfo->vgId, &pTableQueryInfo->tag);

H
Haojun Liao 已提交
3883 3884 3885 3886 3887
      // failed to find data with the specified tag value and vnodeId
      if (elem.vnode < 0) {
        if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
          qError("QInfo:%p failed to find tag:%s in ts_comp", pQInfo, pTag->pz);
        } else {
H
Haojun Liao 已提交
3888
          qError("QInfo:%p failed to find tag:%" PRId64 " in ts_comp", pQInfo, pTag->i64Key);
H
Haojun Liao 已提交
3889 3890 3891 3892 3893
        }

        return false;
      }

H
Haojun Liao 已提交
3894
      // keep the cursor info of current meter
H
Haojun Liao 已提交
3895 3896
      pTableQueryInfo->cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);
      if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
H
Haojun Liao 已提交
3897
        qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex);
H
Haojun Liao 已提交
3898
      } else {
H
Haojun Liao 已提交
3899
        qDebug("QInfo:%p find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex);
H
Haojun Liao 已提交
3900 3901
      }

H
Haojun Liao 已提交
3902 3903
    } else {
      tsBufSetCursor(pRuntimeEnv->pTSBuf, &pTableQueryInfo->cur);
H
Haojun Liao 已提交
3904 3905

      if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
H
Haojun Liao 已提交
3906
        qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex);
H
Haojun Liao 已提交
3907
      } else {
H
Haojun Liao 已提交
3908
        qDebug("QInfo:%p find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex);
H
Haojun Liao 已提交
3909
      }
3910 3911
    }
  }
3912

3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924
  return 0;
}

/*
 * There are two cases to handle:
 *
 * 1. Query range is not set yet (queryRangeSet = 0). we need to set the query range info, including pQuery->lastKey,
 *    pQuery->window.skey, and pQuery->eKey.
 * 2. Query range is set and query is in progress. There may be another result with the same query ranges to be
 *    merged during merge stage. In this case, we need the pTableQueryInfo->lastResRows to decide if there
 *    is a previous result generated or not.
 */
H
hjxilinx 已提交
3925
void setIntervalQueryRange(SQInfo *pQInfo, TSKEY key) {
3926 3927
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3928
  STableQueryInfo *pTableQueryInfo = pQuery->current;
3929

3930 3931 3932
  if (pTableQueryInfo->queryRangeSet) {
    pTableQueryInfo->lastKey = key;
  } else {
3933
    pTableQueryInfo->win.skey = key;
3934
    STimeWindow win = {.skey = key, .ekey = pQuery->window.ekey};
3935

3936 3937 3938 3939 3940
    // for too small query range, no data in this interval.
    if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey < pQuery->window.skey)) ||
        (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey < pQuery->window.ekey))) {
      return;
    }
3941

3942 3943 3944 3945 3946 3947
    /**
     * In handling the both ascending and descending order super table query, we need to find the first qualified
     * timestamp of this table, and then set the first qualified start timestamp.
     * In ascending query, key is the first qualified timestamp. However, in the descending order query, additional
     * operations involve.
     */
H
Haojun Liao 已提交
3948
    STimeWindow     w = TSWINDOW_INITIALIZER;
3949
    SWindowResInfo *pWindowResInfo = &pTableQueryInfo->windowResInfo;
3950

H
Haojun Liao 已提交
3951 3952
    TSKEY sk = MIN(win.skey, win.ekey);
    TSKEY ek = MAX(win.skey, win.ekey);
H
Haojun Liao 已提交
3953
    getAlignQueryTimeWindow(pQuery, win.skey, sk, ek, &w);
3954
    pWindowResInfo->startTime = pTableQueryInfo->win.skey;  // windowSKey may be 0 in case of 1970 timestamp
3955

3956 3957
    if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
3958
        assert(win.ekey == pQuery->window.ekey);
3959
      }
3960

3961
      pWindowResInfo->prevSKey = w.skey;
3962
    }
3963

3964
    pTableQueryInfo->queryRangeSet = 1;
3965
    pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
3966 3967 3968 3969
  }
}

bool requireTimestamp(SQuery *pQuery) {
3970
  for (int32_t i = 0; i < pQuery->numOfOutput; i++) {
3971
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984
    if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_NEED_TS) != 0) {
      return true;
    }
  }
  return false;
}

bool needPrimaryTimestampCol(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo) {
  /*
   * 1. if skey or ekey locates in this block, we need to load the timestamp column to decide the precise position
   * 2. if there are top/bottom, first_dst/last_dst functions, we need to load timestamp column in any cases;
   */
  STimeWindow *w = &pDataBlockInfo->window;
H
hjxilinx 已提交
3985
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3986

H
hjxilinx 已提交
3987
  bool loadPrimaryTS = (pTableQueryInfo->lastKey >= w->skey && pTableQueryInfo->lastKey <= w->ekey) ||
3988 3989
                       (pQuery->window.ekey >= w->skey && pQuery->window.ekey <= w->ekey) || requireTimestamp(pQuery);

3990 3991 3992
  return loadPrimaryTS;
}

3993
static int32_t doCopyToSData(SQInfo *pQInfo, SWindowResInfo *pResultInfo, int32_t orderType) {
3994 3995
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
3996

3997 3998 3999
  int32_t numOfResult = 0;
  int32_t startIdx = 0;
  int32_t step = -1;
4000

4001
  qDebug("QInfo:%p start to copy data from windowResInfo to query buf", pQInfo);
4002 4003
  int32_t totalSet = numOfClosedTimeWindow(pResultInfo);
  SWindowResult* result = pResultInfo->pResult;
4004

4005
  if (orderType == TSDB_ORDER_ASC) {
4006
    startIdx = pQInfo->groupIndex;
4007 4008
    step = 1;
  } else {  // desc order copy all data
4009
    startIdx = totalSet - pQInfo->groupIndex - 1;
4010 4011
    step = -1;
  }
4012

H
Haojun Liao 已提交
4013 4014
  SGroupResInfo* pGroupResInfo = &pQInfo->groupResInfo;

4015
  for (int32_t i = startIdx; (i < totalSet) && (i >= 0); i += step) {
4016
    if (result[i].numOfRows == 0) {
4017
      pQInfo->groupIndex += 1;
H
Haojun Liao 已提交
4018
      pGroupResInfo->pos.rowId = 0;
4019 4020
      continue;
    }
4021

H
Haojun Liao 已提交
4022 4023
    int32_t numOfRowsToCopy = result[i].numOfRows - pGroupResInfo->pos.rowId;
    int32_t oldOffset = pGroupResInfo->pos.rowId;
4024

4025
    /*
H
Haojun Liao 已提交
4026 4027
     * current output space is not enough to accommodate all data of this page, only partial results
     * will be copied to SQuery object's result buffer
4028
     */
4029
    if (numOfRowsToCopy > pQuery->rec.capacity - numOfResult) {
H
Haojun Liao 已提交
4030 4031
      numOfRowsToCopy = (int32_t) pQuery->rec.capacity - numOfResult;
      pGroupResInfo->pos.rowId += numOfRowsToCopy;
4032
    } else {
H
Haojun Liao 已提交
4033
      pGroupResInfo->pos.rowId = 0;
4034
      pQInfo->groupIndex += 1;
4035
    }
4036

H
Haojun Liao 已提交
4037 4038
    tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, result[i].pos.pageId);

4039
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
4040
      int32_t size = pRuntimeEnv->pCtx[j].outputBytes;
4041

4042
      char *out = pQuery->sdata[j]->data + numOfResult * size;
H
Haojun Liao 已提交
4043
      char *in = getPosInResultPage(pRuntimeEnv, j, &result[i], page);
4044 4045
      memcpy(out, in + oldOffset * size, size * numOfRowsToCopy);
    }
4046

4047
    numOfResult += numOfRowsToCopy;
4048 4049 4050
    if (numOfResult == pQuery->rec.capacity) {
      break;
    }
4051
  }
4052

4053
  qDebug("QInfo:%p copy data to query buf completed", pQInfo);
4054 4055

#ifdef _DEBUG_VIEW
4056
  displayInterResult(pQuery->sdata, pRuntimeEnv, numOfResult);
4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069
#endif
  return numOfResult;
}

/**
 * copyFromWindowResToSData support copy data in ascending/descending order
 * For interval query of both super table and table, copy the data in ascending order, since the output results are
 * ordered in SWindowResutl already. While handling the group by query for both table and super table,
 * all group result are completed already.
 *
 * @param pQInfo
 * @param result
 */
4070
void copyFromWindowResToSData(SQInfo *pQInfo, SWindowResInfo *pResultInfo) {
4071
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
4072

4073
  int32_t orderType = (pQuery->pGroupbyExpr != NULL) ? pQuery->pGroupbyExpr->orderType : TSDB_ORDER_ASC;
4074
  int32_t numOfResult = doCopyToSData(pQInfo, pResultInfo, orderType);
4075

4076
  pQuery->rec.rows += numOfResult;
4077

4078
  assert(pQuery->rec.rows <= pQuery->rec.capacity);
4079 4080
}

H
Haojun Liao 已提交
4081
static void updateWindowResNumOfRes(SQueryRuntimeEnv *pRuntimeEnv) {
4082
  SQuery *pQuery = pRuntimeEnv->pQuery;
4083

4084
  // update the number of result for each, only update the number of rows for the corresponding window result.
H
Haojun Liao 已提交
4085 4086 4087
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
    return;
  }
4088

H
Haojun Liao 已提交
4089 4090
  for (int32_t i = 0; i < pRuntimeEnv->windowResInfo.size; ++i) {
    SWindowResult *pResult = &pRuntimeEnv->windowResInfo.pResult[i];
4091

H
Haojun Liao 已提交
4092 4093 4094 4095
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
      int32_t functionId = pRuntimeEnv->pCtx[j].functionId;
      if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
        continue;
4096
      }
H
Haojun Liao 已提交
4097

4098
      pResult->numOfRows = (uint16_t)(MAX(pResult->numOfRows, pResult->resultInfo[j].numOfRes));
4099 4100 4101 4102
    }
  }
}

H
Haojun Liao 已提交
4103
static void stableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo, SDataStatis *pStatis,
4104
    SArray *pDataBlock, __block_search_fn_t searchFn) {
4105
  SQuery *         pQuery = pRuntimeEnv->pQuery;
4106
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4107

4108
  SWindowResInfo * pWindowResInfo = &pTableQueryInfo->windowResInfo;
H
hjxilinx 已提交
4109
  pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : pDataBlockInfo->rows - 1;
4110

H
Haojun Liao 已提交
4111
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->groupbyNormalCol) {
4112
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
4113
  } else {
4114
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
4115 4116 4117
  }
}

H
Haojun Liao 已提交
4118
bool queryHasRemainResForTableQuery(SQueryRuntimeEnv* pRuntimeEnv) {
4119 4120
  SQuery *pQuery = pRuntimeEnv->pQuery;
  SFillInfo *pFillInfo = pRuntimeEnv->pFillInfo;
4121

H
Haojun Liao 已提交
4122
  if (pQuery->limit.limit > 0 && pQuery->rec.total >= pQuery->limit.limit) {
4123 4124
    return false;
  }
4125

4126
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
H
Haojun Liao 已提交
4127
    // There are results not returned to client yet, so filling applied to the remain result is required firstly.
H
Haojun Liao 已提交
4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142
    int32_t remain = taosNumOfRemainRows(pFillInfo);
    if (remain > 0) {
      return true;
    }

    /*
     * While the code reaches here, there are no results remains now.
     * If query is not completed yet, the gaps between two results blocks need to be handled after next data block
     * is retrieved from TSDB.
     *
     * NOTE: If the result set is not the first block, the gap in front of the result set will be filled. If the result
     * set is the FIRST result block, the gap between the start time of query time window and the timestamp of the
     * first result row in the actual result set will fill nothing.
     */
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
S
TD-1057  
Shengliang Guan 已提交
4143
      int32_t numOfTotal = (int32_t)getFilledNumOfRes(pFillInfo, pQuery->window.ekey, (int32_t)pQuery->rec.capacity);
H
Haojun Liao 已提交
4144 4145 4146 4147 4148 4149
      return numOfTotal > 0;
    }

  } else {
    // there are results waiting for returned to client.
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED) &&
H
Haojun Liao 已提交
4150
        (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) &&
H
Haojun Liao 已提交
4151 4152 4153
        (pRuntimeEnv->windowResInfo.size > 0)) {
      return true;
    }
4154
  }
4155 4156

  return false;
4157 4158 4159
}

static void doCopyQueryResultToMsg(SQInfo *pQInfo, int32_t numOfRows, char *data) {
4160
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
4161

4162 4163
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
    int32_t bytes = pQuery->pSelectExpr[col].bytes;
4164

4165 4166 4167
    memmove(data, pQuery->sdata[col]->data, bytes * numOfRows);
    data += bytes * numOfRows;
  }
4168

weixin_48148422's avatar
weixin_48148422 已提交
4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180
  int32_t numOfTables = (int32_t)taosArrayGetSize(pQInfo->arrTableIdInfo);
  *(int32_t*)data = htonl(numOfTables);
  data += sizeof(int32_t);
  for(int32_t i = 0; i < numOfTables; i++) {
    STableIdInfo* pSrc = taosArrayGet(pQInfo->arrTableIdInfo, i);
    STableIdInfo* pDst = (STableIdInfo*)data;
    pDst->uid = htobe64(pSrc->uid);
    pDst->tid = htonl(pSrc->tid);
    pDst->key = htobe64(pSrc->key);
    data += sizeof(STableIdInfo);
  }

H
Haojun Liao 已提交
4181
  // Check if query is completed or not for stable query or normal table query respectively.
H
hjxilinx 已提交
4182
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4183
    if (pQInfo->runtimeEnv.stableQuery) {
H
Haojun Liao 已提交
4184
      if (IS_STASBLE_QUERY_OVER(pQInfo)) {
4185 4186 4187
        setQueryStatus(pQuery, QUERY_OVER);
      }
    } else {
H
Haojun Liao 已提交
4188
      if (!queryHasRemainResForTableQuery(&pQInfo->runtimeEnv)) {
4189 4190
        setQueryStatus(pQuery, QUERY_OVER);
      }
4191
    }
H
hjxilinx 已提交
4192
  }
4193 4194
}

H
Haojun Liao 已提交
4195
int32_t doFillGapsInResults(SQueryRuntimeEnv* pRuntimeEnv, tFilePage **pDst, int32_t *numOfFilled) {
4196
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
4197
  SQuery *pQuery = pRuntimeEnv->pQuery;
4198
  SFillInfo* pFillInfo = pRuntimeEnv->pFillInfo;
4199

4200
  while (1) {
S
TD-1057  
Shengliang Guan 已提交
4201
    int32_t ret = (int32_t)taosGenerateDataBlock(pFillInfo, (tFilePage**)pQuery->sdata, (int32_t)pQuery->rec.capacity);
4202

4203
    // todo apply limit output function
4204 4205
    /* reached the start position of according to offset value, return immediately */
    if (pQuery->limit.offset == 0) {
4206
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows", pQInfo, pFillInfo->numOfRows, ret);
4207 4208
      return ret;
    }
4209

4210
    if (pQuery->limit.offset < ret) {
4211
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%" PRId64 ". Discard due to offset, remain:%" PRId64 ", new offset:%d",
4212
             pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, ret - pQuery->limit.offset, 0);
4213

S
TD-1057  
Shengliang Guan 已提交
4214
      ret -= (int32_t)pQuery->limit.offset;
4215 4216
      // todo !!!!there exactly number of interpo is not valid.
      // todo refactor move to the beginning of buffer
4217 4218 4219
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        memmove(pDst[i]->data, pDst[i]->data + pQuery->pSelectExpr[i].bytes * pQuery->limit.offset,
                ret * pQuery->pSelectExpr[i].bytes);
4220
      }
4221

4222 4223 4224
      pQuery->limit.offset = 0;
      return ret;
    } else {
4225
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%" PRId64 ". Discard due to offset, "
B
Bomin Zhang 已提交
4226
             "remain:%d, new offset:%" PRId64, pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, 0,
4227
          pQuery->limit.offset - ret);
4228

4229
      pQuery->limit.offset -= ret;
4230
      pQuery->rec.rows = 0;
4231 4232
      ret = 0;
    }
4233

H
Haojun Liao 已提交
4234
    if (!queryHasRemainResForTableQuery(pRuntimeEnv)) {
4235 4236 4237 4238 4239
      return ret;
    }
  }
}

4240
static void queryCostStatis(SQInfo *pQInfo) {
4241
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4242
  SQueryCostInfo *pSummary = &pRuntimeEnv->summary;
H
Haojun Liao 已提交
4243

H
Haojun Liao 已提交
4244 4245 4246
  // add the merge time
  pSummary->elapsedTime += pSummary->firstStageMergeTime;

H
Haojun Liao 已提交
4247 4248 4249
  qDebug("QInfo:%p :cost summary: elapsed time:%"PRId64" us, first merge:%"PRId64" us, total blocks:%d, "
         "load block statis:%d, load data block:%d, total rows:%"PRId64 ", check rows:%"PRId64,
         pQInfo, pSummary->elapsedTime, pSummary->firstStageMergeTime, pSummary->totalBlocks, pSummary->loadBlockStatis,
H
Haojun Liao 已提交
4250
         pSummary->loadBlocks, pSummary->totalRows, pSummary->totalCheckedRows);
4251

H
Haojun Liao 已提交
4252
  qDebug("QInfo:%p :cost summary: internal size:%"PRId64"B, numOfWin:%"PRId64, pQInfo, pSummary->internalSupSize,
4253
      pSummary->numOfTimeWindows);
4254 4255
}

4256 4257
static void updateOffsetVal(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pBlockInfo) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
4258
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4259

4260
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
4261

4262
  if (pQuery->limit.offset == pBlockInfo->rows) {  // current block will ignore completed
H
hjxilinx 已提交
4263
    pTableQueryInfo->lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->window.ekey + step : pBlockInfo->window.skey + step;
4264 4265 4266
    pQuery->limit.offset = 0;
    return;
  }
4267

4268
  if (QUERY_IS_ASC_QUERY(pQuery)) {
S
TD-1057  
Shengliang Guan 已提交
4269
    pQuery->pos = (int32_t)pQuery->limit.offset;
4270
  } else {
S
TD-1057  
Shengliang Guan 已提交
4271
    pQuery->pos = pBlockInfo->rows - (int32_t)pQuery->limit.offset - 1;
4272
  }
4273

4274
  assert(pQuery->pos >= 0 && pQuery->pos <= pBlockInfo->rows - 1);
4275

4276
  SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
4277
  SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);
4278

4279
  // update the pQuery->limit.offset value, and pQuery->pos value
H
Haojun Liao 已提交
4280
  TSKEY *keys = (TSKEY *) pColInfoData->pData;
4281 4282

  // update the offset value
H
hjxilinx 已提交
4283
  pTableQueryInfo->lastKey = keys[pQuery->pos];
4284
  pQuery->limit.offset = 0;
4285

H
hjxilinx 已提交
4286
  int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, pBlockInfo, NULL, binarySearchForKey, pDataBlock);
4287

4288
  qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
4289
         pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows, numOfRes, pQuery->current->lastKey);
4290
}
4291

4292 4293 4294 4295 4296
void skipBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;

  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0) {
    return;
4297
  }
4298

4299 4300 4301
  pQuery->pos = 0;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

H
hjxilinx 已提交
4302
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4303
  TsdbQueryHandleT pQueryHandle = pRuntimeEnv->pQueryHandle;
4304

H
Haojun Liao 已提交
4305
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4306
  while (tsdbNextDataBlock(pQueryHandle)) {
H
Haojun Liao 已提交
4307
    if (IS_QUERY_KILLED(GET_QINFO_ADDR(pRuntimeEnv))) {
H
Haojun Liao 已提交
4308
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4309
    }
4310

H
Haojun Liao 已提交
4311
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
4312

4313 4314
    if (pQuery->limit.offset > blockInfo.rows) {
      pQuery->limit.offset -= blockInfo.rows;
H
hjxilinx 已提交
4315 4316
      pTableQueryInfo->lastKey = (QUERY_IS_ASC_QUERY(pQuery)) ? blockInfo.window.ekey : blockInfo.window.skey;
      pTableQueryInfo->lastKey += step;
4317

4318
      qDebug("QInfo:%p skip rows:%d, offset:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv), blockInfo.rows,
4319 4320
             pQuery->limit.offset);
    } else {  // find the appropriated start position in current block
4321 4322 4323
      updateOffsetVal(pRuntimeEnv, &blockInfo);
      break;
    }
4324
  }
H
Haojun Liao 已提交
4325 4326 4327 4328

  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }
4329
}
4330

H
Haojun Liao 已提交
4331
static bool skipTimeInterval(SQueryRuntimeEnv *pRuntimeEnv, TSKEY* start) {
4332
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
4333
  *start = pQuery->current->lastKey;
4334

4335
  // if queried with value filter, do NOT forward query start position
4336
  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->pFillInfo != NULL) {
4337
    return true;
4338
  }
4339

4340
  /*
4341 4342
   * 1. for interval without interpolation query we forward pQuery->interval.interval at a time for
   *    pQuery->limit.offset times. Since hole exists, pQuery->interval.interval*pQuery->limit.offset value is
4343 4344
   *    not valid. otherwise, we only forward pQuery->limit.offset number of points
   */
4345
  assert(pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL);
4346

H
Haojun Liao 已提交
4347
  STimeWindow w = TSWINDOW_INITIALIZER;
4348

4349
  SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
hjxilinx 已提交
4350
  STableQueryInfo *pTableQueryInfo = pQuery->current;
4351

H
Haojun Liao 已提交
4352
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4353
  while (tsdbNextDataBlock(pRuntimeEnv->pQueryHandle)) {
H
Haojun Liao 已提交
4354
    tsdbRetrieveDataBlockInfo(pRuntimeEnv->pQueryHandle, &blockInfo);
4355

H
Haojun Liao 已提交
4356 4357
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
H
Haojun Liao 已提交
4358
        getAlignQueryTimeWindow(pQuery, blockInfo.window.skey, blockInfo.window.skey, pQuery->window.ekey, &w);
H
Haojun Liao 已提交
4359 4360 4361
        pWindowResInfo->startTime = w.skey;
        pWindowResInfo->prevSKey = w.skey;
      }
4362
    } else {
H
Haojun Liao 已提交
4363
      getAlignQueryTimeWindow(pQuery, blockInfo.window.ekey, pQuery->window.ekey, blockInfo.window.ekey, &w);
4364

4365 4366 4367
      pWindowResInfo->startTime = pQuery->window.skey;
      pWindowResInfo->prevSKey = w.skey;
    }
4368

4369 4370
    // the first time window
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, pWindowResInfo->prevSKey, pQuery);
4371

4372 4373 4374 4375 4376 4377
    while (pQuery->limit.offset > 0) {
      if ((win.ekey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (win.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        pQuery->limit.offset -= 1;
        pWindowResInfo->prevSKey = win.skey;
      }
4378

4379
      STimeWindow tw = win;
H
Haojun Liao 已提交
4380
      GET_NEXT_TIMEWINDOW(pQuery, &tw);
4381

4382
      if (pQuery->limit.offset == 0) {
4383 4384
        if ((tw.skey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
            (tw.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
H
Haojun Liao 已提交
4385 4386
          // load the data block and check data remaining in current data block
          // TODO optimize performance
4387 4388 4389
          SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
          SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

4390 4391
          tw = win;
          int32_t startPos =
H
Haojun Liao 已提交
4392
              getNextQualifiedWindow(pRuntimeEnv, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey, -1);
4393 4394 4395 4396
          assert(startPos >= 0);

          // set the abort info
          pQuery->pos = startPos;
4397

H
Haojun Liao 已提交
4398 4399 4400 4401
          // reset the query start timestamp
          pTableQueryInfo->win.skey = ((TSKEY *)pColInfoData->pData)[startPos];
          pQuery->window.skey = pTableQueryInfo->win.skey;
          *start = pTableQueryInfo->win.skey;
4402

4403
          pWindowResInfo->prevSKey = tw.skey;
H
Haojun Liao 已提交
4404
          int32_t index = pRuntimeEnv->windowResInfo.curIndex;
4405

H
hjxilinx 已提交
4406
          int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, NULL, binarySearchForKey, pDataBlock);
H
Haojun Liao 已提交
4407
          pRuntimeEnv->windowResInfo.curIndex = index;  // restore the window index
4408

4409
          qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64,
4410
                 GET_QINFO_ADDR(pRuntimeEnv), blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes, pQuery->current->lastKey);
4411

4412
          return true;
H
Haojun Liao 已提交
4413 4414 4415 4416
        } else { // do nothing
          *start = tw.skey;
          pQuery->window.skey = tw.skey;
          pWindowResInfo->prevSKey = tw.skey;
4417
          return true;
4418 4419 4420
        }
      }

H
Haojun Liao 已提交
4421 4422 4423 4424 4425 4426 4427
      /*
       * If the next time window still starts from current data block,
       * load the primary timestamp column first, and then find the start position for the next queried time window.
       * Note that only the primary timestamp column is required.
       * TODO: Optimize for this cases. All data blocks are not needed to be loaded, only if the first actually required
       * time window resides in current data block.
       */
4428 4429 4430 4431 4432 4433 4434
      if ((tw.skey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (tw.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
        SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

        tw = win;
        int32_t startPos =
H
Haojun Liao 已提交
4435
            getNextQualifiedWindow(pRuntimeEnv, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey, -1);
4436 4437 4438 4439
        assert(startPos >= 0);

        // set the abort info
        pQuery->pos = startPos;
H
hjxilinx 已提交
4440
        pTableQueryInfo->lastKey = ((TSKEY *)pColInfoData->pData)[startPos];
4441 4442
        pWindowResInfo->prevSKey = tw.skey;
        win = tw;
4443
      } else {
H
Haojun Liao 已提交
4444
        break;  // offset is not 0, and next time window begins or ends in the next block.
4445 4446 4447
      }
    }
  }
4448

H
Haojun Liao 已提交
4449 4450 4451 4452 4453
  // check for error
  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }

4454 4455 4456
  return true;
}

H
Haojun Liao 已提交
4457 4458
static void doDestroyTableQueryInfo(STableGroupInfo* pTableqinfoGroupInfo);

B
Bomin Zhang 已提交
4459
static int32_t setupQueryHandle(void* tsdb, SQInfo* pQInfo, bool isSTableQuery) {
B
Bomin Zhang 已提交
4460
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4461 4462
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

B
Bomin Zhang 已提交
4463
  if (onlyQueryTags(pQuery)) {
B
Bomin Zhang 已提交
4464
    return TSDB_CODE_SUCCESS;
B
Bomin Zhang 已提交
4465 4466
  }

H
Haojun Liao 已提交
4467
  if (isSTableQuery && (!QUERY_IS_INTERVAL_QUERY(pQuery)) && (!isFixedOutputQuery(pRuntimeEnv))) {
B
Bomin Zhang 已提交
4468
    return TSDB_CODE_SUCCESS;
B
Bomin Zhang 已提交
4469
  }
4470 4471

  STsdbQueryCond cond = {
B
Bomin Zhang 已提交
4472 4473 4474
    .order   = pQuery->order.order,
    .colList = pQuery->colList,
    .numOfCols = pQuery->numOfCols,
4475
  };
weixin_48148422's avatar
weixin_48148422 已提交
4476

S
TD-1057  
Shengliang Guan 已提交
4477 4478
  TIME_WINDOW_COPY(cond.twindow, pQuery->window);

B
Bomin Zhang 已提交
4479
  if (!isSTableQuery
4480
    && (pQInfo->tableqinfoGroupInfo.numOfTables == 1)
4481
    && (cond.order == TSDB_ORDER_ASC)
H
Haojun Liao 已提交
4482
    && (!QUERY_IS_INTERVAL_QUERY(pQuery))
B
Bomin Zhang 已提交
4483
    && (!isGroupbyNormalCol(pQuery->pGroupbyExpr))
H
Haojun Liao 已提交
4484
    && (!isFixedOutputQuery(pRuntimeEnv))
B
Bomin Zhang 已提交
4485
  ) {
H
Haojun Liao 已提交
4486
    SArray* pa = GET_TABLEGROUP(pQInfo, 0);
4487 4488
    STableQueryInfo* pCheckInfo = taosArrayGetP(pa, 0);
    cond.twindow = pCheckInfo->win;
4489
  }
B
Bomin Zhang 已提交
4490

B
Bomin Zhang 已提交
4491
  terrno = TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
4492
  if (isFirstLastRowQuery(pQuery)) {
4493
    pRuntimeEnv->pQueryHandle = tsdbQueryLastRow(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
H
Haojun Liao 已提交
4494

H
Haojun Liao 已提交
4495 4496 4497
    // update the query time window
    pQuery->window = cond.twindow;

H
Haojun Liao 已提交
4498
    if (pQInfo->tableGroupInfo.numOfTables == 0) {
H
Haojun Liao 已提交
4499
      pQInfo->tableqinfoGroupInfo.numOfTables = 0;
H
Haojun Liao 已提交
4500 4501 4502 4503
    } else {
      size_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
      for(int32_t i = 0; i < numOfGroups; ++i) {
        SArray *group = GET_TABLEGROUP(pQInfo, i);
H
Haojun Liao 已提交
4504

H
Haojun Liao 已提交
4505 4506 4507
        size_t t = taosArrayGetSize(group);
        for (int32_t j = 0; j < t; ++j) {
          STableQueryInfo *pCheckInfo = taosArrayGetP(group, j);
H
Haojun Liao 已提交
4508

H
Haojun Liao 已提交
4509 4510 4511
          pCheckInfo->win = pQuery->window;
          pCheckInfo->lastKey = pCheckInfo->win.skey;
        }
H
Haojun Liao 已提交
4512 4513
      }
    }
4514
  } else if (isPointInterpoQuery(pQuery)) {
4515
    pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
H
Haojun Liao 已提交
4516
  } else {
4517
    pRuntimeEnv->pQueryHandle = tsdbQueryTables(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
H
Haojun Liao 已提交
4518
  }
4519

B
Bomin Zhang 已提交
4520
  return terrno;
B
Bomin Zhang 已提交
4521 4522
}

4523 4524 4525
static SFillColInfo* taosCreateFillColInfo(SQuery* pQuery) {
  int32_t numOfCols = pQuery->numOfOutput;
  int32_t offset = 0;
4526

4527
  SFillColInfo* pFillCol = calloc(numOfCols, sizeof(SFillColInfo));
H
Haojun Liao 已提交
4528 4529 4530 4531
  if (pFillCol == NULL) {
    return NULL;
  }

4532 4533
  for(int32_t i = 0; i < numOfCols; ++i) {
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];
4534

4535
    pFillCol[i].col.bytes  = pExprInfo->bytes;
S
TD-1057  
Shengliang Guan 已提交
4536
    pFillCol[i].col.type   = (int8_t)pExprInfo->type;
4537 4538 4539
    pFillCol[i].col.offset = offset;
    pFillCol[i].flag       = TSDB_COL_NORMAL;    // always be ta normal column for table query
    pFillCol[i].functionId = pExprInfo->base.functionId;
4540
    pFillCol[i].fillVal.i = pQuery->fillVal[i];
4541

4542 4543
    offset += pExprInfo->bytes;
  }
4544

4545 4546 4547
  return pFillCol;
}

4548
int32_t doInitQInfo(SQInfo *pQInfo, STSBuf *pTsBuf, void *tsdb, int32_t vgId, bool isSTableQuery) {
4549 4550
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;

4551
  int32_t code = TSDB_CODE_SUCCESS;
4552
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
4553 4554 4555

  pRuntimeEnv->topBotQuery = isTopBottomQuery(pQuery);
  pRuntimeEnv->hasTagResults = hasTagValOutput(pQuery);
4556 4557

  setScanLimitationByResultBuffer(pQuery);
H
Haojun Liao 已提交
4558

B
Bomin Zhang 已提交
4559 4560 4561 4562
  code = setupQueryHandle(tsdb, pQInfo, isSTableQuery);
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }
4563

4564
  pQInfo->tsdb = tsdb;
4565
  pQInfo->vgId = vgId;
4566 4567

  pRuntimeEnv->pQuery = pQuery;
H
Haojun Liao 已提交
4568
  pRuntimeEnv->pTSBuf = pTsBuf;
4569
  pRuntimeEnv->cur.vgroupIndex = -1;
4570
  pRuntimeEnv->stableQuery = isSTableQuery;
H
Haojun Liao 已提交
4571
  pRuntimeEnv->prevGroupId = INT32_MIN;
H
Haojun Liao 已提交
4572
  pRuntimeEnv->groupbyNormalCol = isGroupbyNormalCol(pQuery->pGroupbyExpr);
4573

H
Haojun Liao 已提交
4574
  if (pTsBuf != NULL) {
4575 4576 4577 4578 4579 4580 4581 4582 4583 4584
    int16_t order = (pQuery->order.order == pRuntimeEnv->pTSBuf->tsOrder) ? TSDB_ORDER_ASC : TSDB_ORDER_DESC;
    tsBufSetTraverseOrder(pRuntimeEnv->pTSBuf, order);
  }

  // create runtime environment
  code = setupQueryRuntimeEnv(pRuntimeEnv, pQuery->order.order);
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }

4585 4586 4587
  int32_t ps = DEFAULT_PAGE_SIZE;
  int32_t rowsize = 0;
  getIntermediateBufInfo(pRuntimeEnv, &ps, &rowsize);
H
Haojun Liao 已提交
4588
  int32_t TWOMB = 1024*1024*2;
4589

H
Haojun Liao 已提交
4590
  if (isSTableQuery && !onlyQueryTags(pRuntimeEnv->pQuery)) {
H
Haojun Liao 已提交
4591
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rowsize, ps, TWOMB, pQInfo);
4592 4593 4594 4595
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

H
Haojun Liao 已提交
4596
    if (!QUERY_IS_INTERVAL_QUERY(pQuery)) {
4597
      int16_t type = TSDB_DATA_TYPE_NULL;
4598
      int32_t threshold = 0;
4599

H
Haojun Liao 已提交
4600
      if (pRuntimeEnv->groupbyNormalCol) {  // group by columns not tags;
4601
        type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
4602
        threshold = 4000;
4603 4604
      } else {
        type = TSDB_DATA_TYPE_INT;  // group id
S
TD-1057  
Shengliang Guan 已提交
4605
        threshold = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
4606 4607 4608
        if (threshold < 8) {
          threshold = 8;
        }
4609 4610
      }

4611
      code = initWindowResInfo(&pRuntimeEnv->windowResInfo, pRuntimeEnv, 8, threshold, type);
B
Bomin Zhang 已提交
4612 4613 4614
      if (code != TSDB_CODE_SUCCESS) {
        return code;
      }
4615
    }
H
Haojun Liao 已提交
4616
  } else if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
4617 4618
    int32_t numOfResultRows = getInitialPageNum(pQInfo);
    getIntermediateBufInfo(pRuntimeEnv, &ps, &rowsize);
H
Haojun Liao 已提交
4619
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rowsize, ps, TWOMB, pQInfo);
4620 4621 4622 4623 4624
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

    int16_t type = TSDB_DATA_TYPE_NULL;
H
Haojun Liao 已提交
4625
    if (pRuntimeEnv->groupbyNormalCol) {
4626 4627 4628 4629 4630
      type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
    } else {
      type = TSDB_DATA_TYPE_TIMESTAMP;
    }

4631
    code = initWindowResInfo(&pRuntimeEnv->windowResInfo, pRuntimeEnv, numOfResultRows, 4096, type);
B
Bomin Zhang 已提交
4632 4633 4634
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }
4635 4636
  }

4637
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
4638
    SFillColInfo* pColInfo = taosCreateFillColInfo(pQuery);
H
Haojun Liao 已提交
4639 4640 4641 4642 4643 4644
    STimeWindow w = TSWINDOW_INITIALIZER;

    TSKEY sk = MIN(pQuery->window.skey, pQuery->window.ekey);
    TSKEY ek = MAX(pQuery->window.skey, pQuery->window.ekey);
    getAlignQueryTimeWindow(pQuery, pQuery->window.skey, sk, ek, &w);

S
TD-1057  
Shengliang Guan 已提交
4645
    pRuntimeEnv->pFillInfo = taosInitFillInfo(pQuery->order.order, w.skey, 0, (int32_t)pQuery->rec.capacity, pQuery->numOfOutput,
4646 4647
                                              pQuery->interval.sliding, pQuery->interval.slidingUnit, (int8_t)pQuery->precision,
                                              pQuery->fillType, pColInfo);
4648
  }
4649

H
Haojun Liao 已提交
4650
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
4651
  return TSDB_CODE_SUCCESS;
4652 4653
}

4654
static void enableExecutionForNextTable(SQueryRuntimeEnv *pRuntimeEnv) {
4655
  SQuery *pQuery = pRuntimeEnv->pQuery;
4656

4657
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
4658 4659 4660 4661 4662 4663 4664
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[i]);
    if (pResInfo != NULL) {
      pResInfo->complete = false;
    }
  }
}

H
Haojun Liao 已提交
4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681
static FORCE_INLINE void setEnvForEachBlock(SQInfo* pQInfo, STableQueryInfo* pTableQueryInfo, SDataBlockInfo* pBlockInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

  if (!QUERY_IS_INTERVAL_QUERY(pQuery)) {
    setExecutionContext(pQInfo, pTableQueryInfo->groupIndex, pBlockInfo->window.ekey + step);
  } else {  // interval query
    TSKEY nextKey = pBlockInfo->window.skey;
    setIntervalQueryRange(pQInfo, nextKey);

    if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
      setAdditionalInfo(pQInfo, pTableQueryInfo->pTable, pTableQueryInfo);
    }
  }
}

H
Haojun Liao 已提交
4682
static int64_t scanMultiTableDataBlocks(SQInfo *pQInfo) {
4683
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
Haojun Liao 已提交
4684 4685
  SQuery*           pQuery = pRuntimeEnv->pQuery;
  SQueryCostInfo*   summary  = &pRuntimeEnv->summary;
H
Haojun Liao 已提交
4686

H
hjxilinx 已提交
4687
  int64_t st = taosGetTimestampMs();
4688

4689
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
H
Haojun Liao 已提交
4690
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4691

H
Haojun Liao 已提交
4692 4693
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

H
Haojun Liao 已提交
4694
  while (tsdbNextDataBlock(pQueryHandle)) {
4695
    summary->totalBlocks += 1;
H
Haojun Liao 已提交
4696

H
Haojun Liao 已提交
4697
    if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
4698
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4699
    }
4700

H
Haojun Liao 已提交
4701
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
H
Haojun Liao 已提交
4702 4703 4704 4705
    STableQueryInfo **pTableQueryInfo = (STableQueryInfo**) taosHashGet(pQInfo->tableqinfoGroupInfo.map, &blockInfo.tid, sizeof(blockInfo.tid));
    if(pTableQueryInfo == NULL) {
      break;
    }
4706

H
Haojun Liao 已提交
4707
    pQuery->current = *pTableQueryInfo;
H
Haojun Liao 已提交
4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      assert(
          ((*pTableQueryInfo)->win.skey <= (*pTableQueryInfo)->win.ekey) &&
          ((*pTableQueryInfo)->lastKey >= (*pTableQueryInfo)->win.skey) &&
          ((*pTableQueryInfo)->win.skey >= pQuery->window.skey && (*pTableQueryInfo)->win.ekey <= pQuery->window.ekey));
    } else {
      assert(
          ((*pTableQueryInfo)->win.skey >= (*pTableQueryInfo)->win.ekey) &&
          ((*pTableQueryInfo)->lastKey <= (*pTableQueryInfo)->win.skey) &&
          ((*pTableQueryInfo)->win.skey <= pQuery->window.skey && (*pTableQueryInfo)->win.ekey >= pQuery->window.ekey));
    }
4719

H
Haojun Liao 已提交
4720
    if (!pRuntimeEnv->groupbyNormalCol) {
H
Haojun Liao 已提交
4721
      setEnvForEachBlock(pQInfo, *pTableQueryInfo, &blockInfo);
4722
    }
4723

4724
    uint32_t     status = 0;
H
Haojun Liao 已提交
4725 4726
    SDataStatis *pStatis = NULL;
    SArray      *pDataBlock = NULL;
4727

H
Haojun Liao 已提交
4728
    int32_t ret = loadDataBlockOnDemand(pRuntimeEnv, &pQuery->current->windowResInfo, pQueryHandle, &blockInfo, &pStatis, &pDataBlock, &status);
4729 4730 4731 4732 4733
    if (ret != TSDB_CODE_SUCCESS) {
      break;
    }

    if (status == BLK_DATA_DISCARD) {
H
Haojun Liao 已提交
4734
      pQuery->current->lastKey = QUERY_IS_ASC_QUERY(pQuery)? blockInfo.window.ekey + step : blockInfo.window.skey + step;
H
Haojun Liao 已提交
4735 4736 4737
      continue;
    }

4738 4739
    summary->totalRows += blockInfo.rows;
    stableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, pDataBlock, binarySearchForKey);
H
Haojun Liao 已提交
4740

H
Haojun Liao 已提交
4741 4742 4743 4744
    qDebug("QInfo:%p check data block completed, uid:%"PRId64", tid:%d, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, "
           "lastKey:%" PRId64,
           pQInfo, blockInfo.uid, blockInfo.tid, blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows,
           pQuery->current->lastKey);
4745
  }
4746

H
Haojun Liao 已提交
4747 4748 4749 4750
  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }

H
Haojun Liao 已提交
4751 4752
  updateWindowResNumOfRes(pRuntimeEnv);

H
hjxilinx 已提交
4753 4754
  int64_t et = taosGetTimestampMs();
  return et - st;
4755 4756
}

4757 4758
static bool multiTableMultioutputHelper(SQInfo *pQInfo, int32_t index) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4759
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4760

4761
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
H
Haojun Liao 已提交
4762
  SArray *group = GET_TABLEGROUP(pQInfo, 0);
4763
  STableQueryInfo* pCheckInfo = taosArrayGetP(group, index);
4764

H
Haojun Liao 已提交
4765 4766 4767
  if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
    setTagVal(pRuntimeEnv, pCheckInfo->pTable, pQInfo->tsdb);
  }
4768

H
Haojun Liao 已提交
4769
  STableId* id = TSDB_TABLEID(pCheckInfo->pTable);
4770
  qDebug("QInfo:%p query on (%d): uid:%" PRIu64 ", tid:%d, qrange:%" PRId64 "-%" PRId64, pQInfo, index,
H
Haojun Liao 已提交
4771
         id->uid, id->tid, pCheckInfo->lastKey, pCheckInfo->win.ekey);
4772

4773
  STsdbQueryCond cond = {
4774
      .twindow   = {pCheckInfo->lastKey, pCheckInfo->win.ekey},
H
hjxilinx 已提交
4775 4776
      .order     = pQuery->order.order,
      .colList   = pQuery->colList,
4777
      .numOfCols = pQuery->numOfCols,
4778
  };
4779

H
hjxilinx 已提交
4780
  // todo refactor
4781
  SArray *g1 = taosArrayInit(1, POINTER_BYTES);
H
Haojun Liao 已提交
4782 4783 4784 4785
  SArray *tx = taosArrayInit(1, sizeof(STableKeyInfo));

  STableKeyInfo info = {.pTable = pCheckInfo->pTable, .lastKey = pCheckInfo->lastKey};
  taosArrayPush(tx, &info);
4786

4787
  taosArrayPush(g1, &tx);
4788
  STableGroupInfo gp = {.numOfTables = 1, .pGroupList = g1};
4789

4790
  // include only current table
4791 4792 4793 4794
  if (pRuntimeEnv->pQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
    pRuntimeEnv->pQueryHandle = NULL;
  }
4795

H
Haojun Liao 已提交
4796
  pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo);
4797 4798
  taosArrayDestroy(tx);
  taosArrayDestroy(g1);
B
Bomin Zhang 已提交
4799 4800 4801
  if (pRuntimeEnv->pQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
4802

4803
  if (pRuntimeEnv->pTSBuf != NULL) {
H
Haojun Liao 已提交
4804 4805
      tVariant* pTag = &pRuntimeEnv->pCtx[0].tag;

4806
    if (pRuntimeEnv->cur.vgroupIndex == -1) {
H
Haojun Liao 已提交
4807
      STSElem elem = tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, pQInfo->vgId, pTag);
H
Haojun Liao 已提交
4808
      // failed to find data with the specified tag value and vnodeId
4809
      if (elem.vnode < 0) {
H
Haojun Liao 已提交
4810 4811 4812 4813 4814 4815
        if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
          qError("QInfo:%p failed to find tag:%s in ts_comp", pQInfo, pTag->pz);
        } else {
          qError("QInfo:%p failed to find tag:%"PRId64" in ts_comp", pQInfo, pTag->i64Key);
        }

4816
        return false;
H
Haojun Liao 已提交
4817 4818
      } else {
        STSCursor cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);
H
Haojun Liao 已提交
4819 4820 4821 4822 4823 4824 4825 4826

        if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
          qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz,
                 cur.blockIndex, cur.tsIndex);
        } else {
          qDebug("QInfo:%p find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key,
                 cur.blockIndex, cur.tsIndex);
        }
4827 4828
      }
    } else {
H
Haojun Liao 已提交
4829
      STSElem elem = tsBufGetElem(pRuntimeEnv->pTSBuf);
H
Haojun Liao 已提交
4830
      if (tVariantCompare(elem.tag, &pRuntimeEnv->pCtx[0].tag) != 0) {
H
Haojun Liao 已提交
4831

H
Haojun Liao 已提交
4832
        STSElem elem1 = tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, pQInfo->vgId, pTag);
H
Haojun Liao 已提交
4833
        // failed to find data with the specified tag value and vnodeId
H
Haojun Liao 已提交
4834
        if (elem1.vnode < 0) {
H
Haojun Liao 已提交
4835 4836 4837 4838 4839
          if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
            qError("QInfo:%p failed to find tag:%s in ts_comp", pQInfo, pTag->pz);
          } else {
            qError("QInfo:%p failed to find tag:%"PRId64" in ts_comp", pQInfo, pTag->i64Key);
          }
H
Haojun Liao 已提交
4840

H
Haojun Liao 已提交
4841
          return false;
H
Haojun Liao 已提交
4842 4843
        } else {
          STSCursor cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);
H
Haojun Liao 已提交
4844 4845 4846 4847 4848
          if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
            qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, cur.blockIndex, cur.tsIndex);
          } else {
            qDebug("QInfo:%p find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key, cur.blockIndex, cur.tsIndex);
          }
H
Haojun Liao 已提交
4849
        }
H
Haojun Liao 已提交
4850

H
Haojun Liao 已提交
4851 4852
      } else {
        tsBufSetCursor(pRuntimeEnv->pTSBuf, &pRuntimeEnv->cur);
H
Haojun Liao 已提交
4853
        STSCursor cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);
H
Haojun Liao 已提交
4854 4855 4856 4857 4858
        if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
          qDebug("QInfo:%p continue scan ts_comp file, tag:%s blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, cur.blockIndex, cur.tsIndex);
        } else {
          qDebug("QInfo:%p continue scan ts_comp file, tag:%"PRId64" blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key, cur.blockIndex, cur.tsIndex);
        }
H
Haojun Liao 已提交
4859
      }
4860 4861
    }
  }
4862

4863
  initCtxOutputBuf(pRuntimeEnv);
4864 4865 4866 4867 4868 4869 4870 4871 4872 4873
  return true;
}

/**
 * super table query handler
 * 1. super table projection query, group-by on normal columns query, ts-comp query
 * 2. point interpolation query, last row query
 *
 * @param pQInfo
 */
4874
static void sequentialTableProcess(SQInfo *pQInfo) {
4875
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4876
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4877
  setQueryStatus(pQuery, QUERY_COMPLETED);
4878

H
Haojun Liao 已提交
4879
  size_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
4880

H
Haojun Liao 已提交
4881
  if (isPointInterpoQuery(pQuery) || isFirstLastRowQuery(pQuery)) {
4882 4883
    resetCtxOutputBuf(pRuntimeEnv);
    assert(pQuery->limit.offset == 0 && pQuery->limit.limit != 0);
4884

4885
    while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
4886
      SArray* group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex);
4887

S
TD-1057  
Shengliang Guan 已提交
4888
      qDebug("QInfo:%p last_row query on group:%d, total group:%" PRIzu ", current group:%p", pQInfo, pQInfo->groupIndex,
dengyihao's avatar
dengyihao 已提交
4889
             numOfGroups, group);
H
Haojun Liao 已提交
4890 4891 4892 4893 4894 4895 4896

      STsdbQueryCond cond = {
          .colList = pQuery->colList,
          .order   = pQuery->order.order,
          .numOfCols = pQuery->numOfCols,
      };

S
TD-1057  
Shengliang Guan 已提交
4897 4898
      TIME_WINDOW_COPY(cond.twindow, pQuery->window);

H
Haojun Liao 已提交
4899 4900 4901
      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);
4902

H
Haojun Liao 已提交
4903 4904 4905 4906 4907 4908 4909
      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }
4910

4911
      if (isFirstLastRowQuery(pQuery)) {
H
Haojun Liao 已提交
4912
        assert(0);  // last_row query switch to other routine to handle
H
Haojun Liao 已提交
4913
      } else {
H
Haojun Liao 已提交
4914
        pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(pQInfo->tsdb, &cond, &gp, pQInfo);
4915
      }
B
Bomin Zhang 已提交
4916 4917 4918 4919 4920 4921

      taosArrayDestroy(tx);
      taosArrayDestroy(g1);
      if (pRuntimeEnv->pQueryHandle == NULL) {
        longjmp(pRuntimeEnv->env, terrno);
      }
H
Haojun Liao 已提交
4922

H
Haojun Liao 已提交
4923
      initCtxOutputBuf(pRuntimeEnv);
4924

4925
      SArray* s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle);
4926
      assert(taosArrayGetSize(s) >= 1);
4927

4928
      setTagVal(pRuntimeEnv, taosArrayGetP(s, 0), pQInfo->tsdb);
4929 4930 4931
      if (isFirstLastRowQuery(pQuery)) {
        assert(taosArrayGetSize(s) == 1);
      }
H
Haojun Liao 已提交
4932

dengyihao's avatar
dengyihao 已提交
4933
      taosArrayDestroy(s);
H
Haojun Liao 已提交
4934

H
Haojun Liao 已提交
4935
      // here we simply set the first table as current table
4936 4937 4938
      SArray* first = GET_TABLEGROUP(pQInfo, pQInfo->groupIndex);
      pQuery->current = taosArrayGetP(first, 0);

4939
      scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
4940

H
Haojun Liao 已提交
4941 4942 4943 4944 4945
      int64_t numOfRes = getNumOfResult(pRuntimeEnv);
      if (numOfRes > 0) {
        pQuery->rec.rows += numOfRes;
        forwardCtxOutputBuf(pRuntimeEnv, numOfRes);
      }
4946

H
Haojun Liao 已提交
4947 4948 4949 4950 4951
      skipResults(pRuntimeEnv);
      pQInfo->groupIndex += 1;

      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
4952 4953 4954 4955 4956 4957

      if (pQuery->rec.rows >= pQuery->rec.capacity) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
        break;
      }
    }
H
Haojun Liao 已提交
4958
  } else if (pRuntimeEnv->groupbyNormalCol) { // group-by on normal columns query
4959
    while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
4960
      SArray* group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex);
4961

S
TD-1057  
Shengliang Guan 已提交
4962
      qDebug("QInfo:%p group by normal columns group:%d, total group:%" PRIzu "", pQInfo, pQInfo->groupIndex, numOfGroups);
4963 4964 4965 4966 4967 4968 4969

      STsdbQueryCond cond = {
          .colList = pQuery->colList,
          .order   = pQuery->order.order,
          .numOfCols = pQuery->numOfCols,
      };

S
TD-1057  
Shengliang Guan 已提交
4970 4971
      TIME_WINDOW_COPY(cond.twindow, pQuery->window);

4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983
      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);

      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }

H
Haojun Liao 已提交
4984
      // no need to update the lastkey for each table
4985
      pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo);
H
Haojun Liao 已提交
4986

B
Bomin Zhang 已提交
4987 4988
      taosArrayDestroy(g1);
      taosArrayDestroy(tx);
B
Bomin Zhang 已提交
4989 4990 4991
      if (pRuntimeEnv->pQueryHandle == NULL) {
        longjmp(pRuntimeEnv->env, terrno);
      }
4992

4993
      SArray* s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle);
4994 4995
      assert(taosArrayGetSize(s) >= 1);

4996
      setTagVal(pRuntimeEnv, taosArrayGetP(s, 0), pQInfo->tsdb);
4997 4998 4999 5000 5001 5002 5003 5004

      // here we simply set the first table as current table
      scanMultiTableDataBlocks(pQInfo);
      pQInfo->groupIndex += 1;

      SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;

        // no results generated for current group, continue to try the next group
5005
      taosArrayDestroy(s);
5006 5007 5008 5009 5010
      if (pWindowResInfo->size <= 0) {
        continue;
      }

      for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
5011
        pWindowResInfo->pResult[i].closed = true; // enable return all results for group by normal columns
5012 5013 5014

        SWindowResult *pResult = &pWindowResInfo->pResult[i];
        for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
5015
          pResult->numOfRows = (uint16_t)(MAX(pResult->numOfRows, pResult->resultInfo[j].numOfRes));
5016 5017 5018
        }
      }

5019
      qDebug("QInfo:%p generated groupby columns results %d rows for group %d completed", pQInfo, pWindowResInfo->size,
5020 5021 5022 5023 5024 5025 5026
          pQInfo->groupIndex);
      int32_t currentGroupIndex = pQInfo->groupIndex;

      pQuery->rec.rows = 0;
      pQInfo->groupIndex = 0;

      ensureOutputBufferSimple(pRuntimeEnv, pWindowResInfo->size);
5027
      copyFromWindowResToSData(pQInfo, pWindowResInfo);
5028 5029 5030 5031 5032 5033

      pQInfo->groupIndex = currentGroupIndex;  //restore the group index
      assert(pQuery->rec.rows == pWindowResInfo->size);

      clearClosedTimeWindow(pRuntimeEnv);
      break;
5034 5035 5036
    }
  } else {
    /*
5037
     * 1. super table projection query, 2. ts-comp query
5038 5039 5040
     * if the subgroup index is larger than 0, results generated by group by tbname,k is existed.
     * we need to return it to client in the first place.
     */
5041
    if (pQInfo->groupIndex > 0) {
5042
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5043
      pQuery->rec.total += pQuery->rec.rows;
5044

5045
      if (pQuery->rec.rows > 0) {
5046 5047 5048
        return;
      }
    }
5049

5050
    // all data have returned already
5051
    if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
5052 5053
      return;
    }
5054

5055 5056
    resetCtxOutputBuf(pRuntimeEnv);
    resetTimeWindowInfo(pRuntimeEnv, &pRuntimeEnv->windowResInfo);
5057

H
Haojun Liao 已提交
5058
    SArray *group = GET_TABLEGROUP(pQInfo, 0);
5059 5060
    assert(taosArrayGetSize(group) == pQInfo->tableqinfoGroupInfo.numOfTables &&
           1 == taosArrayGetSize(pQInfo->tableqinfoGroupInfo.pGroupList));
5061

5062
    while (pQInfo->tableIndex < pQInfo->tableqinfoGroupInfo.numOfTables) {
H
Haojun Liao 已提交
5063
      if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
5064
        longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
5065
      }
5066

5067
      pQuery->current = taosArrayGetP(group, pQInfo->tableIndex);
5068
      if (!multiTableMultioutputHelper(pQInfo, pQInfo->tableIndex)) {
5069
        pQInfo->tableIndex++;
5070 5071
        continue;
      }
5072

H
hjxilinx 已提交
5073
      // TODO handle the limit offset problem
5074
      if (pQuery->numOfFilterCols == 0 && pQuery->limit.offset > 0) {
5075 5076
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
          pQInfo->tableIndex++;
5077 5078 5079
          continue;
        }
      }
5080

5081
      scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
5082
      skipResults(pRuntimeEnv);
5083

5084
      // the limitation of output result is reached, set the query completed
5085
      if (limitResults(pRuntimeEnv)) {
H
Haojun Liao 已提交
5086
        SET_STABLE_QUERY_OVER(pQInfo);
5087 5088
        break;
      }
5089

5090 5091
      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
5092

5093
      if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
5094 5095 5096 5097 5098 5099
        /*
         * query range is identical in terms of all meters involved in query,
         * so we need to restore them at the *beginning* of query on each meter,
         * not the consecutive query on meter on which is aborted due to buffer limitation
         * to ensure that, we can reset the query range once query on a meter is completed.
         */
5100
        pQInfo->tableIndex++;
weixin_48148422's avatar
weixin_48148422 已提交
5101

H
Haojun Liao 已提交
5102
        STableIdInfo tidInfo = {0};
5103

H
Haojun Liao 已提交
5104 5105 5106
        STableId* id = TSDB_TABLEID(pQuery->current->pTable);
        tidInfo.uid = id->uid;
        tidInfo.tid = id->tid;
weixin_48148422's avatar
weixin_48148422 已提交
5107
        tidInfo.key = pQuery->current->lastKey;
weixin_48148422's avatar
weixin_48148422 已提交
5108 5109
        taosArrayPush(pQInfo->arrTableIdInfo, &tidInfo);

5110
        // if the buffer is full or group by each table, we need to jump out of the loop
H
Haojun Liao 已提交
5111
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
5112 5113
          break;
        }
5114

H
Haojun Liao 已提交
5115 5116 5117 5118
        if (pRuntimeEnv->pTSBuf != NULL) {
          pRuntimeEnv->cur = pRuntimeEnv->pTSBuf->cur;
        }

5119
      } else {
5120
        // all data in the result buffer are skipped due to the offset, continue to retrieve data from current meter
5121 5122
        if (pQuery->rec.rows == 0) {
          assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
5123 5124
          continue;
        } else {
5125 5126 5127
          // buffer is full, wait for the next round to retrieve data from current meter
          assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
          break;
5128 5129 5130
        }
      }
    }
H
Haojun Liao 已提交
5131

5132
    if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
H
Haojun Liao 已提交
5133 5134
      setQueryStatus(pQuery, QUERY_COMPLETED);
    }
5135
  }
5136

5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148
  /*
   * 1. super table projection query, group-by on normal columns query, ts-comp query
   * 2. point interpolation query, last row query
   *
   * group-by on normal columns query and last_row query do NOT invoke the finalizer here,
   * since the finalize stage will be done at the client side.
   *
   * projection query, point interpolation query do not need the finalizer.
   *
   * Only the ts-comp query requires the finalizer function to be executed here.
   */
  if (isTSCompQuery(pQuery)) {
H
hjxilinx 已提交
5149
    finalizeQueryResult(pRuntimeEnv);
5150
  }
5151

5152 5153 5154
  if (pRuntimeEnv->pTSBuf != NULL) {
    pRuntimeEnv->cur = pRuntimeEnv->pTSBuf->cur;
  }
5155

5156
  qDebug(
S
TD-1530  
Shengliang Guan 已提交
5157 5158
      "QInfo %p numOfTables:%" PRIu64 ", index:%d, numOfGroups:%" PRIzu ", %" PRId64 " points returned, total:%" PRId64 ", offset:%" PRId64,
      pQInfo, (uint64_t)pQInfo->tableqinfoGroupInfo.numOfTables, pQInfo->tableIndex, numOfGroups, pQuery->rec.rows, pQuery->rec.total,
5159
      pQuery->limit.offset);
5160 5161
}

5162 5163 5164 5165
static void doSaveContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

5166 5167 5168
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
  SWITCH_ORDER(pQuery->order.order);
5169

5170
  if (pRuntimeEnv->pTSBuf != NULL) {
H
Haojun Liao 已提交
5171
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
5172
  }
5173

5174 5175 5176 5177 5178
  STsdbQueryCond cond = {
      .order   = pQuery->order.order,
      .colList = pQuery->colList,
      .numOfCols = pQuery->numOfCols,
  };
H
Haojun Liao 已提交
5179

S
TD-1057  
Shengliang Guan 已提交
5180 5181
  TIME_WINDOW_COPY(cond.twindow, pQuery->window);

5182 5183 5184 5185
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
H
Haojun Liao 已提交
5186

H
Haojun Liao 已提交
5187 5188 5189 5190 5191
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
  disableFuncInReverseScan(pQInfo);
  setupQueryRangeForReverseScan(pQInfo);

H
Haojun Liao 已提交
5192
  pRuntimeEnv->prevGroupId = INT32_MIN;
5193
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
B
Bomin Zhang 已提交
5194 5195 5196
  if (pRuntimeEnv->pSecQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
H
hjxilinx 已提交
5197 5198
}

5199 5200 5201 5202
static void doRestoreContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

H
hjxilinx 已提交
5203
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
5204

5205
  if (pRuntimeEnv->pTSBuf != NULL) {
5206
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
5207
  }
5208

5209
  switchCtxOrder(pRuntimeEnv);
5210 5211 5212
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
}

5213 5214 5215
static void doCloseAllTimeWindowAfterScan(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
Haojun Liao 已提交
5216
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
H
Haojun Liao 已提交
5217
    size_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
5218
    for (int32_t i = 0; i < numOfGroup; ++i) {
H
Haojun Liao 已提交
5219
      SArray *group = GET_TABLEGROUP(pQInfo, i);
5220

5221
      size_t num = taosArrayGetSize(group);
5222
      for (int32_t j = 0; j < num; ++j) {
5223 5224
        STableQueryInfo* item = taosArrayGetP(group, j);
        closeAllTimeWindow(&item->windowResInfo);
5225
      }
H
hjxilinx 已提交
5226 5227 5228 5229 5230 5231 5232
    }
  } else {  // close results for group result
    closeAllTimeWindow(&pQInfo->runtimeEnv.windowResInfo);
  }
}

static void multiTableQueryProcess(SQInfo *pQInfo) {
5233 5234 5235
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

5236
  if (pQInfo->groupIndex > 0) {
5237
    /*
5238
     * if the groupIndex > 0, the query process must be completed yet, we only need to
5239 5240
     * copy the data into output buffer
     */
H
Haojun Liao 已提交
5241
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
5242 5243
      copyResToQueryResultBuf(pQInfo, pQuery);
#ifdef _DEBUG_VIEW
5244
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
5245 5246
#endif
    } else {
5247
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5248
    }
5249

5250
    qDebug("QInfo:%p current:%"PRId64", total:%"PRId64"", pQInfo, pQuery->rec.rows, pQuery->rec.total);
5251 5252
    return;
  }
5253

5254
  qDebug("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", order:%d, forward scan start", pQInfo,
5255 5256
         pQuery->window.skey, pQuery->window.ekey, pQuery->order.order);

H
hjxilinx 已提交
5257
  // do check all qualified data blocks
H
Haojun Liao 已提交
5258
  int64_t el = scanMultiTableDataBlocks(pQInfo);
5259
  qDebug("QInfo:%p master scan completed, elapsed time: %" PRId64 "ms, reverse scan start", pQInfo, el);
5260

H
hjxilinx 已提交
5261
  // query error occurred or query is killed, abort current execution
H
Haojun Liao 已提交
5262
  if (pQInfo->code != TSDB_CODE_SUCCESS || IS_QUERY_KILLED(pQInfo)) {
5263
    qDebug("QInfo:%p query killed or error occurred, code:%s, abort", pQInfo, tstrerror(pQInfo->code));
H
Haojun Liao 已提交
5264
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
5265
  }
5266

H
hjxilinx 已提交
5267 5268
  // close all time window results
  doCloseAllTimeWindowAfterScan(pQInfo);
5269

H
hjxilinx 已提交
5270 5271
  if (needReverseScan(pQuery)) {
    doSaveContext(pQInfo);
5272

H
Haojun Liao 已提交
5273
    el = scanMultiTableDataBlocks(pQInfo);
5274
    qDebug("QInfo:%p reversed scan completed, elapsed time: %" PRId64 "ms", pQInfo, el);
5275

H
Haojun Liao 已提交
5276
    doRestoreContext(pQInfo);
H
hjxilinx 已提交
5277
  } else {
5278
    qDebug("QInfo:%p no need to do reversed scan, query completed", pQInfo);
5279
  }
5280

5281
  setQueryStatus(pQuery, QUERY_COMPLETED);
5282

H
Haojun Liao 已提交
5283
  if (pQInfo->code != TSDB_CODE_SUCCESS || IS_QUERY_KILLED(pQInfo)) {
5284
    qDebug("QInfo:%p query killed or error occurred, code:%s, abort", pQInfo, tstrerror(pQInfo->code));
H
Haojun Liao 已提交
5285 5286
    //TODO finalizeQueryResult may cause SEGSEV, since the memory may not allocated yet, add a cleanup function instead
//    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
H
Haojun Liao 已提交
5287
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
H
hjxilinx 已提交
5288
  }
5289

H
Haojun Liao 已提交
5290
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || isSumAvgRateQuery(pQuery)) {
5291
    if (mergeIntoGroupResult(pQInfo) == TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
5292
      copyResToQueryResultBuf(pQInfo, pQuery);
5293 5294

#ifdef _DEBUG_VIEW
5295
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
5296 5297 5298
#endif
    }
  } else {  // not a interval query
5299
    copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5300
  }
5301

5302
  // handle the limitation of output buffer
5303
  qDebug("QInfo:%p points returned:%" PRId64 ", total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
5304 5305 5306 5307 5308 5309 5310 5311
}

/*
 * in each query, this function will be called only once, no retry for further result.
 *
 * select count(*)/top(field,k)/avg(field name) from table_name [where ts>now-1a];
 * select count(*) from table_name group by status_column;
 */
H
hjxilinx 已提交
5312
static void tableFixedOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
5313
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
5314

H
hjxilinx 已提交
5315
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
5316
  if (!pRuntimeEnv->topBotQuery && pQuery->limit.offset > 0) {  // no need to execute, since the output will be ignore.
H
Haojun Liao 已提交
5317 5318
    return;
  }
5319

H
hjxilinx 已提交
5320
  pQuery->current = pTableInfo;  // set current query table info
5321

5322
  scanOneTableDataBlocks(pRuntimeEnv, pTableInfo->lastKey);
H
hjxilinx 已提交
5323
  finalizeQueryResult(pRuntimeEnv);
5324

H
Haojun Liao 已提交
5325
  if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
5326
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
5327
  }
5328

H
Haojun Liao 已提交
5329
  // since the numOfRows must be identical for all sql functions that are allowed to be executed simutaneously.
5330
  pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
5331

5332
  skipResults(pRuntimeEnv);
5333
  limitResults(pRuntimeEnv);
5334 5335
}

H
hjxilinx 已提交
5336
static void tableMultiOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
5337
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
5338

H
hjxilinx 已提交
5339 5340
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
5341

5342 5343 5344 5345
  // for ts_comp query, re-initialized is not allowed
  if (!isTSCompQuery(pQuery)) {
    resetCtxOutputBuf(pRuntimeEnv);
  }
5346

5347 5348 5349 5350 5351 5352
  // skip blocks without load the actual data block from file if no filter condition present
  skipBlocks(&pQInfo->runtimeEnv);
  if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return;
  }
5353 5354

  while (1) {
5355
    scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
H
hjxilinx 已提交
5356
    finalizeQueryResult(pRuntimeEnv);
5357

5358 5359
    pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
    if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols > 0 && pQuery->rec.rows > 0) {
5360
      skipResults(pRuntimeEnv);
5361 5362 5363
    }

    /*
H
hjxilinx 已提交
5364 5365
     * 1. if pQuery->size == 0, pQuery->limit.offset >= 0, still need to check data
     * 2. if pQuery->size > 0, pQuery->limit.offset must be 0
5366
     */
5367
    if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
5368 5369 5370
      break;
    }

5371
    qDebug("QInfo:%p skip current result, offset:%" PRId64 ", next qrange:%" PRId64 "-%" PRId64,
B
Bomin Zhang 已提交
5372
           pQInfo, pQuery->limit.offset, pQuery->current->lastKey, pQuery->current->win.ekey);
5373 5374 5375 5376

    resetCtxOutputBuf(pRuntimeEnv);
  }

5377
  limitResults(pRuntimeEnv);
5378
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
5379
    qDebug("QInfo:%p query paused due to output limitation, next qrange:%" PRId64 "-%" PRId64, pQInfo,
H
hjxilinx 已提交
5380
        pQuery->current->lastKey, pQuery->window.ekey);
weixin_48148422's avatar
weixin_48148422 已提交
5381 5382
  } else if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
    STableIdInfo tidInfo;
B
Bomin Zhang 已提交
5383
    STableId* id = TSDB_TABLEID(pQuery->current->pTable);
5384

H
Haojun Liao 已提交
5385 5386
    tidInfo.uid = id->uid;
    tidInfo.tid = id->tid;
weixin_48148422's avatar
weixin_48148422 已提交
5387 5388
    tidInfo.key = pQuery->current->lastKey;
    taosArrayPush(pQInfo->arrTableIdInfo, &tidInfo);
5389 5390
  }

5391 5392 5393
  if (!isTSCompQuery(pQuery)) {
    assert(pQuery->rec.rows <= pQuery->rec.capacity);
  }
5394 5395
}

H
Haojun Liao 已提交
5396
static void tableIntervalProcessImpl(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
5397
  SQuery *pQuery = pRuntimeEnv->pQuery;
5398

5399
  while (1) {
5400
    scanOneTableDataBlocks(pRuntimeEnv, start);
5401

5402
    assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_NOT_COMPLETED));
H
hjxilinx 已提交
5403
    finalizeQueryResult(pRuntimeEnv);
5404

5405 5406 5407
    // here we can ignore the records in case of no interpolation
    // todo handle offset, in case of top/bottom interval query
    if ((pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL) && pQuery->limit.offset > 0 &&
5408
        pQuery->fillType == TSDB_FILL_NONE) {
5409 5410
      // maxOutput <= 0, means current query does not generate any results
      int32_t numOfClosed = numOfClosedTimeWindow(&pRuntimeEnv->windowResInfo);
5411

S
TD-1057  
Shengliang Guan 已提交
5412
      int32_t c = (int32_t)(MIN(numOfClosed, pQuery->limit.offset));
5413 5414 5415
      clearFirstNTimeWindow(pRuntimeEnv, c);
      pQuery->limit.offset -= c;
    }
5416

5417
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED | QUERY_RESBUF_FULL)) {
5418 5419 5420 5421 5422
      break;
    }
  }
}

5423
// handle time interval query on table
H
hjxilinx 已提交
5424
static void tableIntervalProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
5425 5426
  SQueryRuntimeEnv *pRuntimeEnv = &(pQInfo->runtimeEnv);

H
hjxilinx 已提交
5427 5428
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
5429

H
Haojun Liao 已提交
5430
  int32_t numOfFilled = 0;
H
Haojun Liao 已提交
5431
  TSKEY newStartKey = TSKEY_INITIAL_VAL;
5432

5433
  // skip blocks without load the actual data block from file if no filter condition present
H
Haojun Liao 已提交
5434
  skipTimeInterval(pRuntimeEnv, &newStartKey);
5435
  if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0 && pRuntimeEnv->pFillInfo == NULL) {
5436 5437 5438 5439
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return;
  }

5440
  while (1) {
H
Haojun Liao 已提交
5441
    tableIntervalProcessImpl(pRuntimeEnv, newStartKey);
5442

H
Haojun Liao 已提交
5443
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
5444
      pQInfo->groupIndex = 0;  // always start from 0
5445
      pQuery->rec.rows = 0;
5446
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5447

5448
      clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
5449
    }
5450

5451
    // the offset is handled at prepare stage if no interpolation involved
5452
    if (pQuery->fillType == TSDB_FILL_NONE || pQuery->rec.rows == 0) {
5453
      limitResults(pRuntimeEnv);
5454 5455
      break;
    } else {
S
TD-1057  
Shengliang Guan 已提交
5456
      taosFillSetStartInfo(pRuntimeEnv->pFillInfo, (int32_t)pQuery->rec.rows, pQuery->window.ekey);
5457
      taosFillCopyInputDataFromFilePage(pRuntimeEnv->pFillInfo, (tFilePage**) pQuery->sdata);
H
Haojun Liao 已提交
5458
      numOfFilled = 0;
5459

H
Haojun Liao 已提交
5460
      pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, &numOfFilled);
5461
      if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
5462
        limitResults(pRuntimeEnv);
5463 5464
        break;
      }
5465

5466
      // no result generated yet, continue retrieve data
5467
      pQuery->rec.rows = 0;
5468 5469
    }
  }
5470

5471
  // all data scanned, the group by normal column can return
H
Haojun Liao 已提交
5472
  if (pRuntimeEnv->groupbyNormalCol) {  // todo refactor with merge interval time result
5473
    pQInfo->groupIndex = 0;
5474
    pQuery->rec.rows = 0;
5475
    copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5476
    clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
5477 5478 5479
  }
}

5480 5481 5482 5483
static void tableQueryImpl(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

H
Haojun Liao 已提交
5484
  if (queryHasRemainResForTableQuery(pRuntimeEnv)) {
5485

H
Haojun Liao 已提交
5486 5487 5488 5489 5490 5491 5492 5493 5494 5495 5496 5497
    if (pQuery->fillType != TSDB_FILL_NONE) {
      /*
       * There are remain results that are not returned due to result interpolation
       * So, we do keep in this procedure instead of launching retrieve procedure for next results.
       */
      int32_t numOfFilled = 0;
      pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, &numOfFilled);

      if (pQuery->rec.rows > 0) {
        limitResults(pRuntimeEnv);
      }

H
Haojun Liao 已提交
5498
      qDebug("QInfo:%p current:%" PRId64 " returned, total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total);
5499
      return;
H
Haojun Liao 已提交
5500
    } else {
5501
      pQuery->rec.rows = 0;
5502
      pQInfo->groupIndex = 0;  // always start from 0
5503

5504
      if (pRuntimeEnv->windowResInfo.size > 0) {
5505
        copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5506
        clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
5507

5508
        if (pQuery->rec.rows > 0) {
5509
          qDebug("QInfo:%p %"PRId64" rows returned from group results, total:%"PRId64"", pQInfo, pQuery->rec.rows, pQuery->rec.total);
H
Haojun Liao 已提交
5510 5511 5512

          // there are not data remains
          if (pRuntimeEnv->windowResInfo.size <= 0) {
H
Haojun Liao 已提交
5513
            qDebug("QInfo:%p query over, %"PRId64" rows are returned", pQInfo, pQuery->rec.total);
H
Haojun Liao 已提交
5514 5515
          }

5516 5517 5518 5519 5520
          return;
        }
      }
    }
  }
5521

H
hjxilinx 已提交
5522
  // number of points returned during this query
5523
  pQuery->rec.rows = 0;
5524
  int64_t st = taosGetTimestampUs();
5525

5526
  assert(pQInfo->tableqinfoGroupInfo.numOfTables == 1);
H
Haojun Liao 已提交
5527
  SArray* g = GET_TABLEGROUP(pQInfo, 0);
5528
  STableQueryInfo* item = taosArrayGetP(g, 0);
5529

5530
  // group by normal column, sliding window query, interval query are handled by interval query processor
H
Haojun Liao 已提交
5531
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyNormalCol) {  // interval (down sampling operation)
5532
    tableIntervalProcess(pQInfo, item);
H
Haojun Liao 已提交
5533
  } else if (isFixedOutputQuery(pRuntimeEnv)) {
5534
    tableFixedOutputProcess(pQInfo, item);
5535 5536
  } else {  // diff/add/multiply/subtract/division
    assert(pQuery->checkBuffer == 1);
5537
    tableMultiOutputProcess(pQInfo, item);
5538
  }
5539

5540
  // record the total elapsed time
5541
  pRuntimeEnv->summary.elapsedTime += (taosGetTimestampUs() - st);
5542
  assert(pQInfo->tableqinfoGroupInfo.numOfTables == 1);
5543 5544
}

5545
static void stableQueryImpl(SQInfo *pQInfo) {
H
Haojun Liao 已提交
5546 5547
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *pQuery = pRuntimeEnv->pQuery;
5548
  pQuery->rec.rows = 0;
5549

5550
  int64_t st = taosGetTimestampUs();
5551

H
Haojun Liao 已提交
5552
  if (QUERY_IS_INTERVAL_QUERY(pQuery) ||
5553
      (isFixedOutputQuery(pRuntimeEnv) && (!isPointInterpoQuery(pQuery)) && (!pRuntimeEnv->groupbyNormalCol))) {
H
hjxilinx 已提交
5554
    multiTableQueryProcess(pQInfo);
5555
  } else {
5556
    assert((pQuery->checkBuffer == 1 && pQuery->interval.interval == 0) || isPointInterpoQuery(pQuery) ||
H
Haojun Liao 已提交
5557
            isFirstLastRowQuery(pQuery) || pRuntimeEnv->groupbyNormalCol);
5558

5559
    sequentialTableProcess(pQInfo);
5560
  }
5561

H
hjxilinx 已提交
5562
  // record the total elapsed time
5563
  pQInfo->runtimeEnv.summary.elapsedTime += (taosGetTimestampUs() - st);
H
hjxilinx 已提交
5564 5565
}

5566
static int32_t getColumnIndexInSource(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
5567
  int32_t j = 0;
5568

5569
  if (TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) {
H
Haojun Liao 已提交
5570
    if (pExprMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
5571
      return TSDB_TBNAME_COLUMN_INDEX;
H
Haojun Liao 已提交
5572 5573
    }

5574 5575 5576 5577
    while(j < pQueryMsg->numOfTags) {
      if (pExprMsg->colInfo.colId == pTagCols[j].colId) {
        return j;
      }
5578

5579 5580
      j += 1;
    }
5581

Y
TD-1230  
yihaoDeng 已提交
5582
  } else if (TSDB_COL_IS_UD_COL(pExprMsg->colInfo.flag)) {  // user specified column data
H
Haojun Liao 已提交
5583
    return TSDB_UD_COLUMN_INDEX;
5584 5585 5586 5587 5588
  } else {
    while (j < pQueryMsg->numOfCols) {
      if (pExprMsg->colInfo.colId == pQueryMsg->colList[j].colId) {
        return j;
      }
5589

5590
      j += 1;
5591 5592
    }
  }
5593
  assert(0);
5594
  return -1;
5595 5596
}

5597 5598 5599
bool validateExprColumnInfo(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
  int32_t j = getColumnIndexInSource(pQueryMsg, pExprMsg, pTagCols);
  return j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags;
5600 5601
}

5602
static bool validateQueryMsg(SQueryTableMsg *pQueryMsg) {
5603 5604
  if (pQueryMsg->interval.interval < 0) {
    qError("qmsg:%p illegal value of interval time %" PRId64, pQueryMsg, pQueryMsg->interval.interval);
5605
    return false;
5606 5607
  }

H
hjxilinx 已提交
5608
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
5609
    qError("qmsg:%p illegal value of numOfTables %d", pQueryMsg, pQueryMsg->numOfTables);
5610
    return false;
5611 5612
  }

H
hjxilinx 已提交
5613
  if (pQueryMsg->numOfGroupCols < 0) {
S
slguan 已提交
5614
    qError("qmsg:%p illegal value of numOfGroupbyCols %d", pQueryMsg, pQueryMsg->numOfGroupCols);
5615
    return false;
5616 5617
  }

5618 5619
  if (pQueryMsg->numOfOutput > TSDB_MAX_COLUMNS || pQueryMsg->numOfOutput <= 0) {
    qError("qmsg:%p illegal value of output columns %d", pQueryMsg, pQueryMsg->numOfOutput);
5620
    return false;
5621 5622
  }

5623 5624 5625 5626 5627 5628 5629 5630 5631 5632
  return true;
}

static bool validateQuerySourceCols(SQueryTableMsg *pQueryMsg, SSqlFuncMsg** pExprMsg) {
  int32_t numOfTotal = pQueryMsg->numOfCols + pQueryMsg->numOfTags;
  if (pQueryMsg->numOfCols < 0 || pQueryMsg->numOfTags < 0 || numOfTotal > TSDB_MAX_COLUMNS) {
    qError("qmsg:%p illegal value of numOfCols %d numOfTags:%d", pQueryMsg, pQueryMsg->numOfCols, pQueryMsg->numOfTags);
    return false;
  } else if (numOfTotal == 0) {
    for(int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
H
Haojun Liao 已提交
5633 5634 5635 5636 5637
      SSqlFuncMsg* pFuncMsg = pExprMsg[i];

      if ((pFuncMsg->functionId == TSDB_FUNC_TAGPRJ) ||
          (pFuncMsg->functionId == TSDB_FUNC_TID_TAG && pFuncMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) ||
          (pFuncMsg->functionId == TSDB_FUNC_COUNT && pFuncMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX)) {
5638
        continue;
5639
      }
5640

5641
      return false;
5642 5643
    }
  }
5644

5645
  return true;
5646 5647
}

5648
static char *createTableIdList(SQueryTableMsg *pQueryMsg, char *pMsg, SArray **pTableIdList) {
H
hjxilinx 已提交
5649
  assert(pQueryMsg->numOfTables > 0);
5650

weixin_48148422's avatar
weixin_48148422 已提交
5651
  *pTableIdList = taosArrayInit(pQueryMsg->numOfTables, sizeof(STableIdInfo));
5652

weixin_48148422's avatar
weixin_48148422 已提交
5653 5654
  for (int32_t j = 0; j < pQueryMsg->numOfTables; ++j) {
    STableIdInfo* pTableIdInfo = (STableIdInfo *)pMsg;
5655

5656
    pTableIdInfo->tid = htonl(pTableIdInfo->tid);
H
hjxilinx 已提交
5657 5658
    pTableIdInfo->uid = htobe64(pTableIdInfo->uid);
    pTableIdInfo->key = htobe64(pTableIdInfo->key);
5659

H
hjxilinx 已提交
5660 5661 5662
    taosArrayPush(*pTableIdList, pTableIdInfo);
    pMsg += sizeof(STableIdInfo);
  }
5663

H
hjxilinx 已提交
5664 5665
  return pMsg;
}
5666

5667
/**
H
hjxilinx 已提交
5668
 * pQueryMsg->head has been converted before this function is called.
5669
 *
H
hjxilinx 已提交
5670
 * @param pQueryMsg
5671 5672 5673 5674
 * @param pTableIdList
 * @param pExpr
 * @return
 */
5675
static int32_t convertQueryMsg(SQueryTableMsg *pQueryMsg, SArray **pTableIdList, SSqlFuncMsg ***pExpr,
weixin_48148422's avatar
weixin_48148422 已提交
5676
                               char **tagCond, char** tbnameCond, SColIndex **groupbyCols, SColumnInfo** tagCols) {
5677 5678
  int32_t code = TSDB_CODE_SUCCESS;

5679 5680 5681 5682
  pQueryMsg->numOfTables = htonl(pQueryMsg->numOfTables);

  pQueryMsg->window.skey = htobe64(pQueryMsg->window.skey);
  pQueryMsg->window.ekey = htobe64(pQueryMsg->window.ekey);
5683 5684 5685 5686 5687 5688
  pQueryMsg->interval.interval = htobe64(pQueryMsg->interval.interval);
  pQueryMsg->interval.sliding = htobe64(pQueryMsg->interval.sliding);
  pQueryMsg->interval.offset = htobe64(pQueryMsg->interval.offset);
  pQueryMsg->interval.intervalUnit = pQueryMsg->interval.intervalUnit;
  pQueryMsg->interval.slidingUnit = pQueryMsg->interval.slidingUnit;
  pQueryMsg->interval.offsetUnit = pQueryMsg->interval.offsetUnit;
5689 5690
  pQueryMsg->limit = htobe64(pQueryMsg->limit);
  pQueryMsg->offset = htobe64(pQueryMsg->offset);
H
hjxilinx 已提交
5691

5692 5693
  pQueryMsg->order = htons(pQueryMsg->order);
  pQueryMsg->orderColId = htons(pQueryMsg->orderColId);
H
Haojun Liao 已提交
5694
  pQueryMsg->queryType = htonl(pQueryMsg->queryType);
weixin_48148422's avatar
weixin_48148422 已提交
5695
  pQueryMsg->tagNameRelType = htons(pQueryMsg->tagNameRelType);
5696 5697

  pQueryMsg->numOfCols = htons(pQueryMsg->numOfCols);
5698
  pQueryMsg->numOfOutput = htons(pQueryMsg->numOfOutput);
H
hjxilinx 已提交
5699
  pQueryMsg->numOfGroupCols = htons(pQueryMsg->numOfGroupCols);
5700 5701 5702
  pQueryMsg->tagCondLen = htons(pQueryMsg->tagCondLen);
  pQueryMsg->tsOffset = htonl(pQueryMsg->tsOffset);
  pQueryMsg->tsLen = htonl(pQueryMsg->tsLen);
H
hjxilinx 已提交
5703
  pQueryMsg->tsNumOfBlocks = htonl(pQueryMsg->tsNumOfBlocks);
5704
  pQueryMsg->tsOrder = htonl(pQueryMsg->tsOrder);
5705
  pQueryMsg->numOfTags = htonl(pQueryMsg->numOfTags);
5706

5707
  // query msg safety check
5708
  if (!validateQueryMsg(pQueryMsg)) {
5709 5710
    code = TSDB_CODE_QRY_INVALID_MSG;
    goto _cleanup;
5711 5712
  }

H
hjxilinx 已提交
5713 5714
  char *pMsg = (char *)(pQueryMsg->colList) + sizeof(SColumnInfo) * pQueryMsg->numOfCols;
  for (int32_t col = 0; col < pQueryMsg->numOfCols; ++col) {
5715 5716
    SColumnInfo *pColInfo = &pQueryMsg->colList[col];

H
hjxilinx 已提交
5717
    pColInfo->colId = htons(pColInfo->colId);
5718
    pColInfo->type = htons(pColInfo->type);
H
hjxilinx 已提交
5719 5720
    pColInfo->bytes = htons(pColInfo->bytes);
    pColInfo->numOfFilters = htons(pColInfo->numOfFilters);
5721

H
hjxilinx 已提交
5722
    assert(pColInfo->type >= TSDB_DATA_TYPE_BOOL && pColInfo->type <= TSDB_DATA_TYPE_NCHAR);
5723

H
hjxilinx 已提交
5724
    int32_t numOfFilters = pColInfo->numOfFilters;
5725
    if (numOfFilters > 0) {
H
hjxilinx 已提交
5726
      pColInfo->filters = calloc(numOfFilters, sizeof(SColumnFilterInfo));
H
Haojun Liao 已提交
5727 5728 5729 5730
      if (pColInfo->filters == NULL) {
        code = TSDB_CODE_QRY_OUT_OF_MEMORY;
        goto _cleanup;
      }
5731 5732 5733
    }

    for (int32_t f = 0; f < numOfFilters; ++f) {
5734
      SColumnFilterInfo *pFilterMsg = (SColumnFilterInfo *)pMsg;
5735

5736 5737
      SColumnFilterInfo *pColFilter = &pColInfo->filters[f];
      pColFilter->filterstr = htons(pFilterMsg->filterstr);
5738 5739 5740

      pMsg += sizeof(SColumnFilterInfo);

5741 5742
      if (pColFilter->filterstr) {
        pColFilter->len = htobe64(pFilterMsg->len);
5743

5744
        pColFilter->pz = (int64_t)calloc(1, (size_t)(pColFilter->len + 1 * TSDB_NCHAR_SIZE)); // note: null-terminator
H
Haojun Liao 已提交
5745 5746 5747 5748 5749
        if (pColFilter->pz == 0) {
          code = TSDB_CODE_QRY_OUT_OF_MEMORY;
          goto _cleanup;
        }

5750
        memcpy((void *)pColFilter->pz, pMsg, (size_t)pColFilter->len);
5751
        pMsg += (pColFilter->len + 1);
5752
      } else {
5753 5754
        pColFilter->lowerBndi = htobe64(pFilterMsg->lowerBndi);
        pColFilter->upperBndi = htobe64(pFilterMsg->upperBndi);
5755 5756
      }

5757 5758
      pColFilter->lowerRelOptr = htons(pFilterMsg->lowerRelOptr);
      pColFilter->upperRelOptr = htons(pFilterMsg->upperRelOptr);
5759 5760 5761
    }
  }

5762
  *pExpr = calloc(pQueryMsg->numOfOutput, POINTER_BYTES);
H
Haojun Liao 已提交
5763 5764 5765 5766 5767
  if (*pExpr == NULL) {
    code = TSDB_CODE_QRY_OUT_OF_MEMORY;
    goto _cleanup;
  }

5768
  SSqlFuncMsg *pExprMsg = (SSqlFuncMsg *)pMsg;
5769

5770
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5771
    (*pExpr)[i] = pExprMsg;
5772

5773
    pExprMsg->colInfo.colIndex = htons(pExprMsg->colInfo.colIndex);
5774 5775 5776 5777
    pExprMsg->colInfo.colId = htons(pExprMsg->colInfo.colId);
    pExprMsg->colInfo.flag = htons(pExprMsg->colInfo.flag);
    pExprMsg->functionId = htons(pExprMsg->functionId);
    pExprMsg->numOfParams = htons(pExprMsg->numOfParams);
5778

5779
    pMsg += sizeof(SSqlFuncMsg);
5780 5781

    for (int32_t j = 0; j < pExprMsg->numOfParams; ++j) {
5782
      pExprMsg->arg[j].argType = htons(pExprMsg->arg[j].argType);
5783 5784 5785 5786
      pExprMsg->arg[j].argBytes = htons(pExprMsg->arg[j].argBytes);

      if (pExprMsg->arg[j].argType == TSDB_DATA_TYPE_BINARY) {
        pExprMsg->arg[j].argValue.pz = pMsg;
5787
        pMsg += pExprMsg->arg[j].argBytes;  // one more for the string terminated char.
5788 5789 5790 5791 5792
      } else {
        pExprMsg->arg[j].argValue.i64 = htobe64(pExprMsg->arg[j].argValue.i64);
      }
    }

H
Haojun Liao 已提交
5793 5794
    int16_t functionId = pExprMsg->functionId;
    if (functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ || functionId == TSDB_FUNC_TAG_DUMMY) {
Y
TD-1230  
yihaoDeng 已提交
5795
      if (!TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) {  // ignore the column  index check for arithmetic expression.
5796 5797
        code = TSDB_CODE_QRY_INVALID_MSG;
        goto _cleanup;
5798 5799
      }
    } else {
5800
//      if (!validateExprColumnInfo(pQueryMsg, pExprMsg)) {
5801
//        return TSDB_CODE_QRY_INVALID_MSG;
5802
//      }
5803 5804
    }

5805
    pExprMsg = (SSqlFuncMsg *)pMsg;
5806
  }
5807

5808
  if (!validateQuerySourceCols(pQueryMsg, *pExpr)) {
5809
    code = TSDB_CODE_QRY_INVALID_MSG;
dengyihao's avatar
dengyihao 已提交
5810
    goto _cleanup;
5811
  }
5812

H
hjxilinx 已提交
5813
  pMsg = createTableIdList(pQueryMsg, pMsg, pTableIdList);
5814

H
hjxilinx 已提交
5815
  if (pQueryMsg->numOfGroupCols > 0) {  // group by tag columns
5816
    *groupbyCols = malloc(pQueryMsg->numOfGroupCols * sizeof(SColIndex));
5817 5818 5819 5820
    if (*groupbyCols == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }
5821 5822 5823

    for (int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
      (*groupbyCols)[i].colId = *(int16_t *)pMsg;
5824
      pMsg += sizeof((*groupbyCols)[i].colId);
5825 5826

      (*groupbyCols)[i].colIndex = *(int16_t *)pMsg;
5827 5828
      pMsg += sizeof((*groupbyCols)[i].colIndex);

5829
      (*groupbyCols)[i].flag = *(int16_t *)pMsg;
5830 5831 5832 5833 5834
      pMsg += sizeof((*groupbyCols)[i].flag);

      memcpy((*groupbyCols)[i].name, pMsg, tListLen(groupbyCols[i]->name));
      pMsg += tListLen((*groupbyCols)[i].name);
    }
5835

H
hjxilinx 已提交
5836 5837
    pQueryMsg->orderByIdx = htons(pQueryMsg->orderByIdx);
    pQueryMsg->orderType = htons(pQueryMsg->orderType);
5838 5839
  }

5840 5841
  pQueryMsg->fillType = htons(pQueryMsg->fillType);
  if (pQueryMsg->fillType != TSDB_FILL_NONE) {
5842
    pQueryMsg->fillVal = (uint64_t)(pMsg);
5843 5844

    int64_t *v = (int64_t *)pMsg;
5845
    for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5846 5847
      v[i] = htobe64(v[i]);
    }
5848

5849
    pMsg += sizeof(int64_t) * pQueryMsg->numOfOutput;
5850
  }
5851

5852 5853
  if (pQueryMsg->numOfTags > 0) {
    (*tagCols) = calloc(1, sizeof(SColumnInfo) * pQueryMsg->numOfTags);
H
Haojun Liao 已提交
5854 5855 5856 5857 5858
    if (*tagCols == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }

5859 5860
    for (int32_t i = 0; i < pQueryMsg->numOfTags; ++i) {
      SColumnInfo* pTagCol = (SColumnInfo*) pMsg;
5861

5862 5863 5864 5865
      pTagCol->colId = htons(pTagCol->colId);
      pTagCol->bytes = htons(pTagCol->bytes);
      pTagCol->type  = htons(pTagCol->type);
      pTagCol->numOfFilters = 0;
5866

5867
      (*tagCols)[i] = *pTagCol;
5868
      pMsg += sizeof(SColumnInfo);
5869
    }
H
hjxilinx 已提交
5870
  }
5871

5872 5873 5874
  // the tag query condition expression string is located at the end of query msg
  if (pQueryMsg->tagCondLen > 0) {
    *tagCond = calloc(1, pQueryMsg->tagCondLen);
H
Haojun Liao 已提交
5875 5876 5877 5878 5879 5880

    if (*tagCond == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;

    }
5881 5882 5883
    memcpy(*tagCond, pMsg, pQueryMsg->tagCondLen);
    pMsg += pQueryMsg->tagCondLen;
  }
5884

weixin_48148422's avatar
weixin_48148422 已提交
5885
  if (*pMsg != 0) {
5886
    size_t len = strlen(pMsg) + 1;
5887

5888
    *tbnameCond = malloc(len);
5889 5890 5891 5892 5893
    if (*tbnameCond == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }

weixin_48148422's avatar
weixin_48148422 已提交
5894
    strcpy(*tbnameCond, pMsg);
5895
    pMsg += len;
weixin_48148422's avatar
weixin_48148422 已提交
5896
  }
5897

5898
  qDebug("qmsg:%p query %d tables, type:%d, qrange:%" PRId64 "-%" PRId64 ", numOfGroupbyTagCols:%d, order:%d, "
H
Haojun Liao 已提交
5899 5900
         "outputCols:%d, numOfCols:%d, interval:%" PRId64 ", fillType:%d, comptsLen:%d, compNumOfBlocks:%d, limit:%" PRId64 ", offset:%" PRId64,
         pQueryMsg, pQueryMsg->numOfTables, pQueryMsg->queryType, pQueryMsg->window.skey, pQueryMsg->window.ekey, pQueryMsg->numOfGroupCols,
5901
         pQueryMsg->order, pQueryMsg->numOfOutput, pQueryMsg->numOfCols, pQueryMsg->interval.interval,
H
Haojun Liao 已提交
5902
         pQueryMsg->fillType, pQueryMsg->tsLen, pQueryMsg->tsNumOfBlocks, pQueryMsg->limit, pQueryMsg->offset);
5903 5904

  return TSDB_CODE_SUCCESS;
dengyihao's avatar
dengyihao 已提交
5905 5906

_cleanup:
S
Shengliang Guan 已提交
5907
  taosTFree(*pExpr);
dengyihao's avatar
dengyihao 已提交
5908 5909
  taosArrayDestroy(*pTableIdList);
  *pTableIdList = NULL;
S
Shengliang Guan 已提交
5910 5911 5912 5913
  taosTFree(*tbnameCond);
  taosTFree(*groupbyCols);
  taosTFree(*tagCols);
  taosTFree(*tagCond);
5914 5915

  return code;
5916 5917
}

H
hjxilinx 已提交
5918
static int32_t buildAirthmeticExprFromMsg(SExprInfo *pArithExprInfo, SQueryTableMsg *pQueryMsg) {
5919
  qDebug("qmsg:%p create arithmetic expr from binary string: %s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz);
weixin_48148422's avatar
weixin_48148422 已提交
5920 5921

  tExprNode* pExprNode = NULL;
dengyihao's avatar
dengyihao 已提交
5922
  TRY(TSDB_MAX_TAG_CONDITIONS) {
weixin_48148422's avatar
weixin_48148422 已提交
5923 5924 5925
    pExprNode = exprTreeFromBinary(pArithExprInfo->base.arg[0].argValue.pz, pArithExprInfo->base.arg[0].argBytes);
  } CATCH( code ) {
    CLEANUP_EXECUTE();
dengyihao's avatar
TD-816  
dengyihao 已提交
5926
    qError("qmsg:%p failed to create arithmetic expression string from:%s, reason: %s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz, tstrerror(code));
weixin_48148422's avatar
weixin_48148422 已提交
5927 5928 5929
    return code;
  } END_TRY

H
hjxilinx 已提交
5930
  if (pExprNode == NULL) {
5931
    qError("qmsg:%p failed to create arithmetic expression string from:%s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz);
5932
    return TSDB_CODE_QRY_APP_ERROR;
5933
  }
5934

5935
  pArithExprInfo->pExpr = pExprNode;
5936 5937 5938
  return TSDB_CODE_SUCCESS;
}

H
Haojun Liao 已提交
5939
static int32_t createQFunctionExprFromMsg(SQueryTableMsg *pQueryMsg, SExprInfo **pExprInfo, SSqlFuncMsg **pExprMsg,
5940 5941
    SColumnInfo* pTagCols) {
  *pExprInfo = NULL;
H
hjxilinx 已提交
5942
  int32_t code = TSDB_CODE_SUCCESS;
5943

H
Haojun Liao 已提交
5944
  SExprInfo *pExprs = (SExprInfo *)calloc(pQueryMsg->numOfOutput, sizeof(SExprInfo));
5945
  if (pExprs == NULL) {
5946
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
5947 5948 5949 5950 5951
  }

  bool    isSuperTable = QUERY_IS_STABLE_QUERY(pQueryMsg->queryType);
  int16_t tagLen = 0;

5952
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5953
    pExprs[i].base = *pExprMsg[i];
5954
    pExprs[i].bytes = 0;
5955 5956 5957 5958

    int16_t type = 0;
    int16_t bytes = 0;

5959
    // parse the arithmetic expression
5960
    if (pExprs[i].base.functionId == TSDB_FUNC_ARITHM) {
5961
      code = buildAirthmeticExprFromMsg(&pExprs[i], pQueryMsg);
5962

5963
      if (code != TSDB_CODE_SUCCESS) {
S
Shengliang Guan 已提交
5964
        taosTFree(pExprs);
5965
        return code;
5966 5967
      }

5968
      type  = TSDB_DATA_TYPE_DOUBLE;
5969
      bytes = tDataTypeDesc[type].nSize;
H
Haojun Liao 已提交
5970
    } else if (pExprs[i].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX && pExprs[i].base.functionId == TSDB_FUNC_TAGPRJ) {  // parse the normal column
H
Haojun Liao 已提交
5971
      SSchema s = tGetTableNameColumnSchema();
H
Haojun Liao 已提交
5972
      type = s.type;
H
Haojun Liao 已提交
5973
      bytes = s.bytes;
5974 5975
    } else if (pExprs[i].base.colInfo.colId <= TSDB_UD_COLUMN_INDEX) {
      // it is a user-defined constant value column
H
Haojun Liao 已提交
5976 5977
      assert(pExprs[i].base.functionId == TSDB_FUNC_PRJ);

5978 5979
      type = pExprs[i].base.arg[1].argType;
      bytes = pExprs[i].base.arg[1].argBytes;
H
Haojun Liao 已提交
5980 5981 5982 5983 5984

      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        bytes += VARSTR_HEADER_SIZE;
      }
    } else {
5985
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
dengyihao's avatar
dengyihao 已提交
5986
      assert(j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags);
H
Haojun Liao 已提交
5987

dengyihao's avatar
dengyihao 已提交
5988
      if (pExprs[i].base.colInfo.colId != TSDB_TBNAME_COLUMN_INDEX && j >= 0) {
H
Haojun Liao 已提交
5989 5990 5991 5992
        SColumnInfo* pCol = (TSDB_COL_IS_TAG(pExprs[i].base.colInfo.flag))? &pTagCols[j]:&pQueryMsg->colList[j];
        type = pCol->type;
        bytes = pCol->bytes;
      } else {
H
Haojun Liao 已提交
5993
        SSchema s = tGetTableNameColumnSchema();
H
hjxilinx 已提交
5994

H
Haojun Liao 已提交
5995 5996 5997
        type  = s.type;
        bytes = s.bytes;
      }
5998 5999
    }

S
TD-1057  
Shengliang Guan 已提交
6000
    int32_t param = (int32_t)pExprs[i].base.arg[0].argValue.i64;
6001
    if (getResultDataInfo(type, bytes, pExprs[i].base.functionId, param, &pExprs[i].type, &pExprs[i].bytes,
6002
                          &pExprs[i].interBytes, 0, isSuperTable) != TSDB_CODE_SUCCESS) {
S
Shengliang Guan 已提交
6003
      taosTFree(pExprs);
6004
      return TSDB_CODE_QRY_INVALID_MSG;
6005 6006
    }

6007
    if (pExprs[i].base.functionId == TSDB_FUNC_TAG_DUMMY || pExprs[i].base.functionId == TSDB_FUNC_TS_DUMMY) {
6008
      tagLen += pExprs[i].bytes;
6009
    }
6010
    assert(isValidDataType(pExprs[i].type));
6011 6012 6013
  }

  // TODO refactor
6014
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
6015 6016
    pExprs[i].base = *pExprMsg[i];
    int16_t functId = pExprs[i].base.functionId;
6017

6018
    if (functId == TSDB_FUNC_TOP || functId == TSDB_FUNC_BOTTOM) {
6019
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
6020 6021 6022 6023 6024 6025 6026 6027 6028
      if (j < 0 || j >= pQueryMsg->numOfCols) {
        assert(0);
      } else {
        SColumnInfo *pCol = &pQueryMsg->colList[j];
        int32_t ret =
            getResultDataInfo(pCol->type, pCol->bytes, functId, (int32_t)pExprs[i].base.arg[0].argValue.i64,
                              &pExprs[i].type, &pExprs[i].bytes, &pExprs[i].interBytes, tagLen, isSuperTable);
        assert(ret == TSDB_CODE_SUCCESS);
      }
6029 6030 6031
    }
  }

6032
  *pExprInfo = pExprs;
6033 6034 6035
  return TSDB_CODE_SUCCESS;
}

6036
static SSqlGroupbyExpr *createGroupbyExprFromMsg(SQueryTableMsg *pQueryMsg, SColIndex *pColIndex, int32_t *code) {
6037 6038 6039 6040 6041
  if (pQueryMsg->numOfGroupCols == 0) {
    return NULL;
  }

  // using group by tag columns
6042
  SSqlGroupbyExpr *pGroupbyExpr = (SSqlGroupbyExpr *)calloc(1, sizeof(SSqlGroupbyExpr));
6043
  if (pGroupbyExpr == NULL) {
6044
    *code = TSDB_CODE_QRY_OUT_OF_MEMORY;
6045 6046 6047 6048 6049 6050 6051
    return NULL;
  }

  pGroupbyExpr->numOfGroupCols = pQueryMsg->numOfGroupCols;
  pGroupbyExpr->orderType = pQueryMsg->orderType;
  pGroupbyExpr->orderIndex = pQueryMsg->orderByIdx;

6052 6053 6054 6055
  pGroupbyExpr->columnInfo = taosArrayInit(pQueryMsg->numOfGroupCols, sizeof(SColIndex));
  for(int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
    taosArrayPush(pGroupbyExpr->columnInfo, &pColIndex[i]);
  }
6056

6057 6058 6059
  return pGroupbyExpr;
}

6060
static int32_t createFilterInfo(void *pQInfo, SQuery *pQuery) {
6061
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
6062
    if (pQuery->colList[i].numOfFilters > 0) {
6063 6064 6065 6066 6067 6068 6069 6070 6071
      pQuery->numOfFilterCols++;
    }
  }

  if (pQuery->numOfFilterCols == 0) {
    return TSDB_CODE_SUCCESS;
  }

  pQuery->pFilterInfo = calloc(1, sizeof(SSingleColumnFilterInfo) * pQuery->numOfFilterCols);
H
Haojun Liao 已提交
6072 6073 6074
  if (pQuery->pFilterInfo == NULL) {
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
  }
6075 6076

  for (int32_t i = 0, j = 0; i < pQuery->numOfCols; ++i) {
6077
    if (pQuery->colList[i].numOfFilters > 0) {
6078 6079
      SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[j];

B
Bomin Zhang 已提交
6080
      memcpy(&pFilterInfo->info, &pQuery->colList[i], sizeof(SColumnInfo));
6081
      pFilterInfo->info = pQuery->colList[i];
6082

6083
      pFilterInfo->numOfFilters = pQuery->colList[i].numOfFilters;
6084
      pFilterInfo->pFilters = calloc(pFilterInfo->numOfFilters, sizeof(SColumnFilterElem));
H
Haojun Liao 已提交
6085 6086 6087
      if (pFilterInfo->pFilters == NULL) {
        return TSDB_CODE_QRY_OUT_OF_MEMORY;
      }
6088 6089 6090

      for (int32_t f = 0; f < pFilterInfo->numOfFilters; ++f) {
        SColumnFilterElem *pSingleColFilter = &pFilterInfo->pFilters[f];
6091
        pSingleColFilter->filterInfo = pQuery->colList[i].filters[f];
6092 6093 6094 6095 6096

        int32_t lower = pSingleColFilter->filterInfo.lowerRelOptr;
        int32_t upper = pSingleColFilter->filterInfo.upperRelOptr;

        if (lower == TSDB_RELATION_INVALID && upper == TSDB_RELATION_INVALID) {
S
slguan 已提交
6097
          qError("QInfo:%p invalid filter info", pQInfo);
6098
          return TSDB_CODE_QRY_INVALID_MSG;
6099 6100
        }

6101 6102
        int16_t type  = pQuery->colList[i].type;
        int16_t bytes = pQuery->colList[i].bytes;
6103

6104 6105 6106
        // todo refactor
        __filter_func_t *rangeFilterArray = getRangeFilterFuncArray(type);
        __filter_func_t *filterArray = getValueFilterFuncArray(type);
6107 6108

        if (rangeFilterArray == NULL && filterArray == NULL) {
S
slguan 已提交
6109
          qError("QInfo:%p failed to get filter function, invalid data type:%d", pQInfo, type);
6110
          return TSDB_CODE_QRY_INVALID_MSG;
6111 6112
        }

6113
        if ((lower == TSDB_RELATION_GREATER_EQUAL || lower == TSDB_RELATION_GREATER) &&
6114
            (upper == TSDB_RELATION_LESS_EQUAL || upper == TSDB_RELATION_LESS)) {
dengyihao's avatar
dengyihao 已提交
6115
          assert(rangeFilterArray != NULL);
6116
          if (lower == TSDB_RELATION_GREATER_EQUAL) {
6117 6118 6119 6120 6121 6122 6123 6124 6125 6126 6127 6128 6129
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[4];
            } else {
              pSingleColFilter->fp = rangeFilterArray[2];
            }
          } else {
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[3];
            } else {
              pSingleColFilter->fp = rangeFilterArray[1];
            }
          }
        } else {  // set callback filter function
dengyihao's avatar
dengyihao 已提交
6130
          assert(filterArray != NULL);
6131 6132 6133 6134
          if (lower != TSDB_RELATION_INVALID) {
            pSingleColFilter->fp = filterArray[lower];

            if (upper != TSDB_RELATION_INVALID) {
dengyihao's avatar
dengyihao 已提交
6135
              qError("pQInfo:%p failed to get filter function, invalid filter condition: %d", pQInfo, type);
6136
              return TSDB_CODE_QRY_INVALID_MSG;
6137 6138 6139 6140 6141 6142 6143 6144 6145 6146 6147 6148 6149 6150 6151 6152
            }
          } else {
            pSingleColFilter->fp = filterArray[upper];
          }
        }
        assert(pSingleColFilter->fp != NULL);
        pSingleColFilter->bytes = bytes;
      }

      j++;
    }
  }

  return TSDB_CODE_SUCCESS;
}

6153
static void doUpdateExprColumnIndex(SQuery *pQuery) {
6154
  assert(pQuery->pSelectExpr != NULL && pQuery != NULL);
6155

6156
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
6157
    SSqlFuncMsg *pSqlExprMsg = &pQuery->pSelectExpr[k].base;
6158
    if (pSqlExprMsg->functionId == TSDB_FUNC_ARITHM) {
6159 6160
      continue;
    }
6161

6162
    // todo opt performance
H
Haojun Liao 已提交
6163
    SColIndex *pColIndex = &pSqlExprMsg->colInfo;
H
Haojun Liao 已提交
6164
    if (TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
6165 6166
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfCols; ++f) {
H
Haojun Liao 已提交
6167 6168
        if (pColIndex->colId == pQuery->colList[f].colId) {
          pColIndex->colIndex = f;
6169 6170 6171
          break;
        }
      }
H
Haojun Liao 已提交
6172 6173

      assert(f < pQuery->numOfCols);
6174 6175
    } else if (pColIndex->colId <= TSDB_UD_COLUMN_INDEX) {
      // do nothing for user-defined constant value result columns
6176
    } else {
6177 6178
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfTags; ++f) {
H
Haojun Liao 已提交
6179 6180
        if (pColIndex->colId == pQuery->tagColList[f].colId) {
          pColIndex->colIndex = f;
6181 6182
          break;
        }
6183
      }
6184

6185
      assert(f < pQuery->numOfTags || pColIndex->colId == TSDB_TBNAME_COLUMN_INDEX);
6186 6187 6188 6189
    }
  }
}

dengyihao's avatar
dengyihao 已提交
6190 6191
static void freeQInfo(SQInfo *pQInfo);

H
Haojun Liao 已提交
6192 6193 6194
static void calResultBufSize(SQuery* pQuery) {
  const int32_t RESULT_MSG_MIN_SIZE  = 1024 * (1024 + 512);  // bytes
  const int32_t RESULT_MSG_MIN_ROWS  = 8192;
S
TD-1057  
Shengliang Guan 已提交
6195
  const float RESULT_THRESHOLD_RATIO = 0.85f;
H
Haojun Liao 已提交
6196

6197 6198 6199 6200 6201
  if (isProjQuery(pQuery)) {
    int32_t numOfRes = RESULT_MSG_MIN_SIZE / pQuery->rowSize;
    if (numOfRes < RESULT_MSG_MIN_ROWS) {
      numOfRes = RESULT_MSG_MIN_ROWS;
    }
H
Haojun Liao 已提交
6202

6203
    pQuery->rec.capacity  = numOfRes;
S
TD-1057  
Shengliang Guan 已提交
6204
    pQuery->rec.threshold = (int32_t)(numOfRes * RESULT_THRESHOLD_RATIO);
6205 6206
  } else {  // in case of non-prj query, a smaller output buffer will be used.
    pQuery->rec.capacity = 4096;
S
TD-1057  
Shengliang Guan 已提交
6207
    pQuery->rec.threshold = (int32_t)(pQuery->rec.capacity * RESULT_THRESHOLD_RATIO);
6208
  }
H
Haojun Liao 已提交
6209 6210
}

6211 6212
static SQInfo *createQInfoImpl(SQueryTableMsg *pQueryMsg, SSqlGroupbyExpr *pGroupbyExpr, SExprInfo *pExprs,
                               STableGroupInfo *pTableGroupInfo, SColumnInfo* pTagCols, bool stableQuery) {
B
Bomin Zhang 已提交
6213 6214 6215
  int16_t numOfCols = pQueryMsg->numOfCols;
  int16_t numOfOutput = pQueryMsg->numOfOutput;

6216 6217
  SQInfo *pQInfo = (SQInfo *)calloc(1, sizeof(SQInfo));
  if (pQInfo == NULL) {
B
Bomin Zhang 已提交
6218
    goto _cleanup_qinfo;
6219
  }
6220

B
Bomin Zhang 已提交
6221 6222 6223
  // to make sure third party won't overwrite this structure
  pQInfo->signature = pQInfo;
  pQInfo->tableGroupInfo = *pTableGroupInfo;
6224 6225

  SQuery *pQuery = calloc(1, sizeof(SQuery));
B
Bomin Zhang 已提交
6226 6227 6228
  if (pQuery == NULL) {
    goto _cleanup_query;
  }
H
Haojun Liao 已提交
6229

6230 6231
  pQInfo->runtimeEnv.pQuery = pQuery;

6232
  pQuery->numOfCols       = numOfCols;
H
hjxilinx 已提交
6233
  pQuery->numOfOutput     = numOfOutput;
6234 6235 6236
  pQuery->limit.limit     = pQueryMsg->limit;
  pQuery->limit.offset    = pQueryMsg->offset;
  pQuery->order.order     = pQueryMsg->order;
6237
  pQuery->order.orderColId = pQueryMsg->orderColId;
6238 6239
  pQuery->pSelectExpr     = pExprs;
  pQuery->pGroupbyExpr    = pGroupbyExpr;
6240
  memcpy(&pQuery->interval, &pQueryMsg->interval, sizeof(pQuery->interval));
6241
  pQuery->fillType        = pQueryMsg->fillType;
6242
  pQuery->numOfTags       = pQueryMsg->numOfTags;
B
Bomin Zhang 已提交
6243
  pQuery->tagColList      = pTagCols;
H
Haojun Liao 已提交
6244

6245
  pQuery->colList = calloc(numOfCols, sizeof(SSingleColumnFilterInfo));
6246
  if (pQuery->colList == NULL) {
6247
    goto _cleanup;
6248
  }
6249

H
hjxilinx 已提交
6250
  for (int16_t i = 0; i < numOfCols; ++i) {
6251
    pQuery->colList[i] = pQueryMsg->colList[i];
6252
    pQuery->colList[i].filters = tscFilterInfoClone(pQueryMsg->colList[i].filters, pQuery->colList[i].numOfFilters);
H
hjxilinx 已提交
6253
  }
6254

6255
  // calculate the result row size
6256 6257 6258
  for (int16_t col = 0; col < numOfOutput; ++col) {
    assert(pExprs[col].bytes > 0);
    pQuery->rowSize += pExprs[col].bytes;
6259
  }
6260

6261
  doUpdateExprColumnIndex(pQuery);
6262

6263
  int32_t ret = createFilterInfo(pQInfo, pQuery);
6264
  if (ret != TSDB_CODE_SUCCESS) {
6265
    goto _cleanup;
6266 6267 6268
  }

  // prepare the result buffer
6269
  pQuery->sdata = (tFilePage **)calloc(pQuery->numOfOutput, POINTER_BYTES);
6270
  if (pQuery->sdata == NULL) {
6271
    goto _cleanup;
6272 6273
  }

H
Haojun Liao 已提交
6274
  calResultBufSize(pQuery);
6275

6276
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
6277
    assert(pExprs[col].interBytes >= pExprs[col].bytes);
6278 6279

    // allocate additional memory for interResults that are usually larger then final results
6280
    size_t size = (size_t)((pQuery->rec.capacity + 1) * pExprs[col].bytes + pExprs[col].interBytes + sizeof(tFilePage));
6281
    pQuery->sdata[col] = (tFilePage *)calloc(1, size);
6282
    if (pQuery->sdata[col] == NULL) {
6283
      goto _cleanup;
6284 6285 6286
    }
  }

6287
  if (pQuery->fillType != TSDB_FILL_NONE) {
6288 6289
    pQuery->fillVal = malloc(sizeof(int64_t) * pQuery->numOfOutput);
    if (pQuery->fillVal == NULL) {
6290
      goto _cleanup;
6291 6292 6293
    }

    // the first column is the timestamp
6294
    memcpy(pQuery->fillVal, (char *)pQueryMsg->fillVal, pQuery->numOfOutput * sizeof(int64_t));
6295 6296
  }

dengyihao's avatar
dengyihao 已提交
6297 6298 6299 6300 6301 6302
  size_t numOfGroups = 0;
  if (pTableGroupInfo->pGroupList != NULL) {
    numOfGroups = taosArrayGetSize(pTableGroupInfo->pGroupList);

    pQInfo->tableqinfoGroupInfo.pGroupList = taosArrayInit(numOfGroups, POINTER_BYTES);
    pQInfo->tableqinfoGroupInfo.numOfTables = pTableGroupInfo->numOfTables;
H
Haojun Liao 已提交
6303
    pQInfo->tableqinfoGroupInfo.map = taosHashInit(pTableGroupInfo->numOfTables,
H
Haojun Liao 已提交
6304
                                                   taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, false);
H
Haojun Liao 已提交
6305
  }
6306

weixin_48148422's avatar
weixin_48148422 已提交
6307
  int tableIndex = 0;
6308

H
Haojun Liao 已提交
6309
  pQInfo->runtimeEnv.interBufSize = getOutputInterResultBufSize(pQuery);
H
Haojun Liao 已提交
6310
  pQInfo->pBuf = calloc(pTableGroupInfo->numOfTables, sizeof(STableQueryInfo));
H
Haojun Liao 已提交
6311 6312 6313 6314
  if (pQInfo->pBuf == NULL) {
    goto _cleanup;
  }

H
Haojun Liao 已提交
6315
  // NOTE: pTableCheckInfo need to update the query time range and the lastKey info
6316 6317
  pQInfo->arrTableIdInfo = taosArrayInit(tableIndex, sizeof(STableIdInfo));
  pQInfo->dataReady = QUERY_RESULT_NOT_READY;
H
Haojun Liao 已提交
6318
  pQInfo->rspContext = NULL;
6319
  pthread_mutex_init(&pQInfo->lock, NULL);
H
Haojun Liao 已提交
6320
  tsem_init(&pQInfo->ready, 0, 0);
6321 6322 6323 6324 6325 6326

  pQuery->pos = -1;
  pQuery->window = pQueryMsg->window;
  changeExecuteScanOrder(pQInfo, pQueryMsg, stableQuery);

  STimeWindow window = pQuery->window;
H
Haojun Liao 已提交
6327

H
Haojun Liao 已提交
6328 6329
  int32_t index = 0;

H
hjxilinx 已提交
6330
  for(int32_t i = 0; i < numOfGroups; ++i) {
6331
    SArray* pa = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, i);
6332

H
Haojun Liao 已提交
6333
    size_t s = taosArrayGetSize(pa);
6334
    SArray* p1 = taosArrayInit(s, POINTER_BYTES);
B
Bomin Zhang 已提交
6335 6336 6337
    if (p1 == NULL) {
      goto _cleanup;
    }
H
Haojun Liao 已提交
6338

Y
yihaoDeng 已提交
6339
    taosArrayPush(pQInfo->tableqinfoGroupInfo.pGroupList, &p1);
6340

H
hjxilinx 已提交
6341
    for(int32_t j = 0; j < s; ++j) {
H
Haojun Liao 已提交
6342
      STableKeyInfo* info = taosArrayGet(pa, j);
6343

S
TD-1057  
Shengliang Guan 已提交
6344
      void* buf = (char*)pQInfo->pBuf + index * sizeof(STableQueryInfo);
H
Haojun Liao 已提交
6345

6346
      window.skey = info->lastKey;
H
Haojun Liao 已提交
6347
      STableQueryInfo* item = createTableQueryInfo(&pQInfo->runtimeEnv, info->pTable, window, buf);
B
Bomin Zhang 已提交
6348 6349 6350
      if (item == NULL) {
        goto _cleanup;
      }
H
Haojun Liao 已提交
6351

6352
      item->groupIndex = i;
H
hjxilinx 已提交
6353
      taosArrayPush(p1, &item);
6354 6355

      STableId* id = TSDB_TABLEID(info->pTable);
H
Haojun Liao 已提交
6356 6357
      taosHashPut(pQInfo->tableqinfoGroupInfo.map, &id->tid, sizeof(id->tid), &item, POINTER_BYTES);
      index += 1;
H
hjxilinx 已提交
6358 6359
    }
  }
6360

6361
  colIdCheck(pQuery);
6362

6363
  qDebug("qmsg:%p QInfo:%p created", pQueryMsg, pQInfo);
6364 6365
  return pQInfo;

B
Bomin Zhang 已提交
6366
_cleanup_qinfo:
H
Haojun Liao 已提交
6367
  tsdbDestroyTableGroup(pTableGroupInfo);
B
Bomin Zhang 已提交
6368 6369

_cleanup_query:
6370 6371 6372 6373
  if (pGroupbyExpr != NULL) {
    taosArrayDestroy(pGroupbyExpr->columnInfo);
    free(pGroupbyExpr);
  }
H
Haojun Liao 已提交
6374

S
Shengliang Guan 已提交
6375
  taosTFree(pTagCols);
B
Bomin Zhang 已提交
6376 6377 6378 6379 6380 6381
  for (int32_t i = 0; i < numOfOutput; ++i) {
    SExprInfo* pExprInfo = &pExprs[i];
    if (pExprInfo->pExpr != NULL) {
      tExprTreeDestroy(&pExprInfo->pExpr, NULL);
    }
  }
H
Haojun Liao 已提交
6382

S
Shengliang Guan 已提交
6383
  taosTFree(pExprs);
B
Bomin Zhang 已提交
6384

6385
_cleanup:
dengyihao's avatar
dengyihao 已提交
6386
  freeQInfo(pQInfo);
6387 6388 6389
  return NULL;
}

H
hjxilinx 已提交
6390
static bool isValidQInfo(void *param) {
H
hjxilinx 已提交
6391 6392 6393 6394
  SQInfo *pQInfo = (SQInfo *)param;
  if (pQInfo == NULL) {
    return false;
  }
6395

H
hjxilinx 已提交
6396 6397 6398 6399
  /*
   * pQInfo->signature may be changed by another thread, so we assign value of signature
   * into local variable, then compare by using local variable
   */
6400
  uint64_t sig = (uint64_t)pQInfo->signature;
H
hjxilinx 已提交
6401 6402 6403
  return (sig == (uint64_t)pQInfo);
}

6404
static int32_t initQInfo(SQueryTableMsg *pQueryMsg, void *tsdb, int32_t vgId, SQInfo *pQInfo, bool isSTable) {
H
hjxilinx 已提交
6405
  int32_t code = TSDB_CODE_SUCCESS;
6406
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
6407

H
hjxilinx 已提交
6408 6409
  STSBuf *pTSBuf = NULL;
  if (pQueryMsg->tsLen > 0) {  // open new file to save the result
H
Haojun Liao 已提交
6410
    char *tsBlock = (char *) pQueryMsg + pQueryMsg->tsOffset;
H
Haojun Liao 已提交
6411
    pTSBuf = tsBufCreateFromCompBlocks(tsBlock, pQueryMsg->tsNumOfBlocks, pQueryMsg->tsLen, pQueryMsg->tsOrder, vgId);
6412

H
hjxilinx 已提交
6413
    tsBufResetPos(pTSBuf);
dengyihao's avatar
dengyihao 已提交
6414 6415
    bool ret = tsBufNextPos(pTSBuf);
    UNUSED(ret);
H
hjxilinx 已提交
6416
  }
Y
TD-1665  
yihaoDeng 已提交
6417 6418
  
  pQuery->precision = tsdbGetCfg(tsdb)->precision;
6419

6420 6421
  if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey > pQuery->window.ekey)) ||
      (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey > pQuery->window.skey))) {
6422
    qDebug("QInfo:%p no result in time range %" PRId64 "-%" PRId64 ", order %d", pQInfo, pQuery->window.skey,
6423
           pQuery->window.ekey, pQuery->order.order);
6424
    setQueryStatus(pQuery, QUERY_COMPLETED);
B
Bomin Zhang 已提交
6425
    pQInfo->tableqinfoGroupInfo.numOfTables = 0;
6426 6427
    return TSDB_CODE_SUCCESS;
  }
6428

6429
  if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
6430
    qDebug("QInfo:%p no table qualified for tag filter, abort query", pQInfo);
6431 6432 6433
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return TSDB_CODE_SUCCESS;
  }
H
hjxilinx 已提交
6434 6435

  // filter the qualified
6436
  if ((code = doInitQInfo(pQInfo, pTSBuf, tsdb, vgId, isSTable)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6437 6438
    goto _error;
  }
6439

H
hjxilinx 已提交
6440 6441 6442 6443
  return code;

_error:
  // table query ref will be decrease during error handling
6444
  freeQInfo(pQInfo);
H
hjxilinx 已提交
6445 6446 6447
  return code;
}

B
Bomin Zhang 已提交
6448
static void freeColumnFilterInfo(SColumnFilterInfo* pFilter, int32_t numOfFilters) {
H
Haojun Liao 已提交
6449
    if (pFilter == NULL || numOfFilters == 0) {
B
Bomin Zhang 已提交
6450 6451
      return;
    }
H
Haojun Liao 已提交
6452

B
Bomin Zhang 已提交
6453 6454 6455 6456 6457
    for (int32_t i = 0; i < numOfFilters; i++) {
      if (pFilter[i].filterstr) {
        free((void*)(pFilter[i].pz));
      }
    }
H
Haojun Liao 已提交
6458

B
Bomin Zhang 已提交
6459 6460 6461
    free(pFilter);
}

H
Haojun Liao 已提交
6462 6463
static void doDestroyTableQueryInfo(STableGroupInfo* pTableqinfoGroupInfo) {
  if (pTableqinfoGroupInfo->pGroupList != NULL) {
H
Haojun Liao 已提交
6464
    int32_t numOfGroups = (int32_t) taosArrayGetSize(pTableqinfoGroupInfo->pGroupList);
H
Haojun Liao 已提交
6465 6466 6467 6468 6469 6470 6471 6472 6473 6474 6475 6476 6477 6478 6479 6480 6481 6482 6483 6484 6485
    for (int32_t i = 0; i < numOfGroups; ++i) {
      SArray *p = taosArrayGetP(pTableqinfoGroupInfo->pGroupList, i);

      size_t num = taosArrayGetSize(p);
      for(int32_t j = 0; j < num; ++j) {
        STableQueryInfo* item = taosArrayGetP(p, j);
        destroyTableQueryInfoImpl(item);
      }

      taosArrayDestroy(p);
    }
  }

  taosArrayDestroy(pTableqinfoGroupInfo->pGroupList);
  taosHashCleanup(pTableqinfoGroupInfo->map);

  pTableqinfoGroupInfo->pGroupList = NULL;
  pTableqinfoGroupInfo->map = NULL;
  pTableqinfoGroupInfo->numOfTables = 0;
}

H
hjxilinx 已提交
6486 6487 6488 6489
static void freeQInfo(SQInfo *pQInfo) {
  if (!isValidQInfo(pQInfo)) {
    return;
  }
6490

6491
  qDebug("QInfo:%p start to free QInfo", pQInfo);
6492

H
Haojun Liao 已提交
6493
  releaseQueryBuf(pQInfo->tableqinfoGroupInfo.numOfTables);
H
Haojun Liao 已提交
6494

6495
  teardownQueryRuntimeEnv(&pQInfo->runtimeEnv);
6496

H
Haojun Liao 已提交
6497 6498 6499 6500 6501 6502 6503
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  if (pQuery != NULL) {
    if (pQuery->sdata != NULL) {
      for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
        taosTFree(pQuery->sdata[col]);
      }
      taosTFree(pQuery->sdata);
H
hjxilinx 已提交
6504
    }
6505

H
Haojun Liao 已提交
6506 6507 6508
    if (pQuery->fillVal != NULL) {
      taosTFree(pQuery->fillVal);
    }
6509

H
Haojun Liao 已提交
6510 6511 6512 6513
    for (int32_t i = 0; i < pQuery->numOfFilterCols; ++i) {
      SSingleColumnFilterInfo *pColFilter = &pQuery->pFilterInfo[i];
      if (pColFilter->numOfFilters > 0) {
        taosTFree(pColFilter->pFilters);
H
hjxilinx 已提交
6514
      }
H
hjxilinx 已提交
6515
    }
6516

H
Haojun Liao 已提交
6517 6518 6519 6520 6521 6522 6523 6524 6525 6526 6527 6528 6529 6530 6531 6532
    if (pQuery->pSelectExpr != NULL) {
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        SExprInfo *pExprInfo = &pQuery->pSelectExpr[i];

        if (pExprInfo->pExpr != NULL) {
          tExprTreeDestroy(&pExprInfo->pExpr, NULL);
        }
      }

      taosTFree(pQuery->pSelectExpr);
    }

    if (pQuery->pGroupbyExpr != NULL) {
      taosArrayDestroy(pQuery->pGroupbyExpr->columnInfo);
      taosTFree(pQuery->pGroupbyExpr);
    }
6533

H
Haojun Liao 已提交
6534 6535 6536 6537 6538 6539 6540 6541 6542 6543 6544 6545
    taosTFree(pQuery->tagColList);
    taosTFree(pQuery->pFilterInfo);

    if (pQuery->colList != NULL) {
      for (int32_t i = 0; i < pQuery->numOfCols; i++) {
        SColumnInfo *column = pQuery->colList + i;
        freeColumnFilterInfo(column->filters, column->numOfFilters);
      }
      taosTFree(pQuery->colList);
    }

    taosTFree(pQuery);
H
hjxilinx 已提交
6546
  }
6547

H
Haojun Liao 已提交
6548
  doDestroyTableQueryInfo(&pQInfo->tableqinfoGroupInfo);
H
Haojun Liao 已提交
6549

H
Haojun Liao 已提交
6550
  taosTFree(pQInfo->pBuf);
H
Haojun Liao 已提交
6551
  tsdbDestroyTableGroup(&pQInfo->tableGroupInfo);
weixin_48148422's avatar
weixin_48148422 已提交
6552
  taosArrayDestroy(pQInfo->arrTableIdInfo);
6553

6554
  pQInfo->signature = 0;
6555

6556
  qDebug("QInfo:%p QInfo is freed", pQInfo);
6557

S
Shengliang Guan 已提交
6558
  taosTFree(pQInfo);
H
hjxilinx 已提交
6559 6560
}

H
hjxilinx 已提交
6561
static size_t getResultSize(SQInfo *pQInfo, int64_t *numOfRows) {
6562 6563
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
hjxilinx 已提交
6564 6565 6566 6567 6568 6569 6570 6571 6572 6573 6574
  /*
   * get the file size and set the numOfRows to be the file size, since for tsComp query,
   * the returned row size is equalled to 1
   * TODO handle the case that the file is too large to send back one time
   */
  if (isTSCompQuery(pQuery) && (*numOfRows) > 0) {
    struct stat fstat;
    if (stat(pQuery->sdata[0]->data, &fstat) == 0) {
      *numOfRows = fstat.st_size;
      return fstat.st_size;
    } else {
S
slguan 已提交
6575
      qError("QInfo:%p failed to get file info, path:%s, reason:%s", pQInfo, pQuery->sdata[0]->data, strerror(errno));
H
hjxilinx 已提交
6576 6577 6578
      return 0;
    }
  } else {
6579
    return (size_t)(pQuery->rowSize * (*numOfRows));
6580
  }
H
hjxilinx 已提交
6581
}
6582

H
hjxilinx 已提交
6583 6584 6585
static int32_t doDumpQueryResult(SQInfo *pQInfo, char *data) {
  // the remained number of retrieved rows, not the interpolated result
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
6586

H
hjxilinx 已提交
6587 6588 6589
  // load data from file to msg buffer
  if (isTSCompQuery(pQuery)) {
    int32_t fd = open(pQuery->sdata[0]->data, O_RDONLY, 0666);
6590

H
hjxilinx 已提交
6591 6592
    // make sure file exist
    if (FD_VALID(fd)) {
6593 6594 6595
      uint64_t s = lseek(fd, 0, SEEK_END);

      qDebug("QInfo:%p ts comp data return, file:%s, size:%"PRId64, pQInfo, pQuery->sdata[0]->data, s);
H
Haojun Liao 已提交
6596
      if (lseek(fd, 0, SEEK_SET) >= 0) {
6597
        size_t sz = read(fd, data, (uint32_t)s);
6598 6599 6600
        if(sz < s) {  // todo handle error
          assert(0);
        }
H
Haojun Liao 已提交
6601
      } else {
6602
        UNUSED(s);
dengyihao's avatar
dengyihao 已提交
6603
      }
H
Haojun Liao 已提交
6604

H
hjxilinx 已提交
6605 6606 6607
      close(fd);
      unlink(pQuery->sdata[0]->data);
    } else {
dengyihao's avatar
dengyihao 已提交
6608
      // todo return the error code to client and handle invalid fd
S
slguan 已提交
6609
      qError("QInfo:%p failed to open tmp file to send ts-comp data to client, path:%s, reason:%s", pQInfo,
H
hjxilinx 已提交
6610
             pQuery->sdata[0]->data, strerror(errno));
dengyihao's avatar
dengyihao 已提交
6611
      if (fd != -1) {
6612
        close(fd);
dengyihao's avatar
dengyihao 已提交
6613
      }
H
hjxilinx 已提交
6614
    }
6615

H
hjxilinx 已提交
6616 6617 6618 6619
    // all data returned, set query over
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
      setQueryStatus(pQuery, QUERY_OVER);
    }
H
hjxilinx 已提交
6620
  } else {
S
TD-1057  
Shengliang Guan 已提交
6621
    doCopyQueryResultToMsg(pQInfo, (int32_t)pQuery->rec.rows, data);
6622
  }
6623

6624
  pQuery->rec.total += pQuery->rec.rows;
6625
  qDebug("QInfo:%p current numOfRes rows:%" PRId64 ", total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total);
6626

6627
  if (pQuery->limit.limit > 0 && pQuery->limit.limit == pQuery->rec.total) {
6628
    qDebug("QInfo:%p results limitation reached, limitation:%"PRId64, pQInfo, pQuery->limit.limit);
6629 6630
    setQueryStatus(pQuery, QUERY_OVER);
  }
6631

H
hjxilinx 已提交
6632
  return TSDB_CODE_SUCCESS;
6633 6634
}

6635 6636 6637 6638 6639 6640 6641
typedef struct SQueryMgmt {
  SCacheObj      *qinfoPool;      // query handle pool
  int32_t         vgId;
  bool            closed;
  pthread_mutex_t lock;
} SQueryMgmt;

6642
int32_t qCreateQueryInfo(void* tsdb, int32_t vgId, SQueryTableMsg* pQueryMsg, qinfo_t* pQInfo) {
6643
  assert(pQueryMsg != NULL && tsdb != NULL);
6644 6645

  int32_t code = TSDB_CODE_SUCCESS;
6646

6647 6648 6649 6650 6651 6652 6653 6654
  char            *tagCond  = NULL;
  char            *tbnameCond = NULL;
  SArray          *pTableIdList = NULL;
  SSqlFuncMsg    **pExprMsg = NULL;
  SExprInfo       *pExprs   = NULL;
  SColIndex       *pGroupColIndex = NULL;
  SColumnInfo     *pTagColumnInfo = NULL;
  SSqlGroupbyExpr *pGroupbyExpr   = NULL;
6655

6656 6657
  code = convertQueryMsg(pQueryMsg, &pTableIdList, &pExprMsg, &tagCond, &tbnameCond, &pGroupColIndex, &pTagColumnInfo);
  if (code != TSDB_CODE_SUCCESS) {
B
Bomin Zhang 已提交
6658
    goto _over;
6659 6660
  }

H
hjxilinx 已提交
6661
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
6662
    qError("Invalid number of tables to query, numOfTables:%d", pQueryMsg->numOfTables);
6663
    code = TSDB_CODE_QRY_INVALID_MSG;
H
hjxilinx 已提交
6664
    goto _over;
6665 6666
  }

H
hjxilinx 已提交
6667
  if (pTableIdList == NULL || taosArrayGetSize(pTableIdList) == 0) {
S
slguan 已提交
6668
    qError("qmsg:%p, SQueryTableMsg wrong format", pQueryMsg);
6669
    code = TSDB_CODE_QRY_INVALID_MSG;
H
hjxilinx 已提交
6670
    goto _over;
6671 6672
  }

H
Haojun Liao 已提交
6673
  if ((code = createQFunctionExprFromMsg(pQueryMsg, &pExprs, pExprMsg, pTagColumnInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6674
    goto _over;
6675 6676
  }

dengyihao's avatar
dengyihao 已提交
6677
  pGroupbyExpr = createGroupbyExprFromMsg(pQueryMsg, pGroupColIndex, &code);
H
hjxilinx 已提交
6678
  if ((pGroupbyExpr == NULL && pQueryMsg->numOfGroupCols != 0) || code != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6679
    goto _over;
6680
  }
6681

H
hjxilinx 已提交
6682
  bool isSTableQuery = false;
6683
  STableGroupInfo tableGroupInfo = {0};
6684 6685
  int64_t st = taosGetTimestampUs();

H
Haojun Liao 已提交
6686
  if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_TABLE_QUERY)) {
weixin_48148422's avatar
weixin_48148422 已提交
6687
    STableIdInfo *id = taosArrayGet(pTableIdList, 0);
H
Haojun Liao 已提交
6688

6689
    qDebug("qmsg:%p query normal table, uid:%"PRId64", tid:%d", pQueryMsg, id->uid, id->tid);
H
Haojun Liao 已提交
6690
    if ((code = tsdbGetOneTableGroup(tsdb, id->uid, pQueryMsg->window.skey, &tableGroupInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6691
      goto _over;
6692
    }
H
Haojun Liao 已提交
6693
  } else if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY|TSDB_QUERY_TYPE_STABLE_QUERY)) {
6694
    isSTableQuery = true;
H
Haojun Liao 已提交
6695 6696 6697

    // also note there's possibility that only one table in the super table
    if (!TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY)) {
weixin_48148422's avatar
weixin_48148422 已提交
6698 6699 6700 6701 6702 6703 6704
      STableIdInfo *id = taosArrayGet(pTableIdList, 0);

      // group by normal column, do not pass the group by condition to tsdb to group table into different group
      int32_t numOfGroupByCols = pQueryMsg->numOfGroupCols;
      if (pQueryMsg->numOfGroupCols == 1 && !TSDB_COL_IS_TAG(pGroupColIndex->flag)) {
        numOfGroupByCols = 0;
      }
6705 6706

      qDebug("qmsg:%p query stable, uid:%"PRId64", tid:%d", pQueryMsg, id->uid, id->tid);
H
Haojun Liao 已提交
6707 6708 6709
      code = tsdbQuerySTableByTagCond(tsdb, id->uid, pQueryMsg->window.skey, tagCond, pQueryMsg->tagCondLen,
          pQueryMsg->tagNameRelType, tbnameCond, &tableGroupInfo, pGroupColIndex, numOfGroupByCols);

6710
      if (code != TSDB_CODE_SUCCESS) {
6711
        qError("qmsg:%p failed to query stable, reason: %s", pQueryMsg, tstrerror(code));
6712 6713
        goto _over;
      }
weixin_48148422's avatar
weixin_48148422 已提交
6714
    } else {
6715 6716 6717 6718
      code = tsdbGetTableGroupFromIdList(tsdb, pTableIdList, &tableGroupInfo);
      if (code != TSDB_CODE_SUCCESS) {
        goto _over;
      }
H
Haojun Liao 已提交
6719

S
TD-1057  
Shengliang Guan 已提交
6720
      qDebug("qmsg:%p query on %" PRIzu " tables in one group from client", pQueryMsg, tableGroupInfo.numOfTables);
6721
    }
6722 6723

    int64_t el = taosGetTimestampUs() - st;
S
TD-1057  
Shengliang Guan 已提交
6724
    qDebug("qmsg:%p tag filter completed, numOfTables:%" PRIzu ", elapsed time:%"PRId64"us", pQueryMsg, tableGroupInfo.numOfTables, el);
H
hjxilinx 已提交
6725
  } else {
6726
    assert(0);
6727
  }
6728

H
Haojun Liao 已提交
6729 6730 6731 6732 6733
  code = checkForQueryBuf(tableGroupInfo.numOfTables);
  if (code != TSDB_CODE_SUCCESS) {  // not enough query buffer, abort
    goto _over;
  }

6734
  (*pQInfo) = createQInfoImpl(pQueryMsg, pGroupbyExpr, pExprs, &tableGroupInfo, pTagColumnInfo, isSTableQuery);
dengyihao's avatar
dengyihao 已提交
6735 6736 6737
  pExprs = NULL;
  pGroupbyExpr = NULL;
  pTagColumnInfo = NULL;
6738

6739
  if ((*pQInfo) == NULL) {
6740
    code = TSDB_CODE_QRY_OUT_OF_MEMORY;
H
hjxilinx 已提交
6741
    goto _over;
6742
  }
6743

6744
  code = initQInfo(pQueryMsg, tsdb, vgId, *pQInfo, isSTableQuery);
6745

H
hjxilinx 已提交
6746
_over:
dengyihao's avatar
dengyihao 已提交
6747 6748 6749
  free(tagCond);
  free(tbnameCond);
  free(pGroupColIndex);
dengyihao's avatar
dengyihao 已提交
6750 6751
  if (pGroupbyExpr != NULL) {
    taosArrayDestroy(pGroupbyExpr->columnInfo);
dengyihao's avatar
dengyihao 已提交
6752
    free(pGroupbyExpr);
6753
  }
dengyihao's avatar
dengyihao 已提交
6754 6755
  free(pTagColumnInfo);
  free(pExprs);
dengyihao's avatar
dengyihao 已提交
6756
  free(pExprMsg);
H
hjxilinx 已提交
6757
  taosArrayDestroy(pTableIdList);
6758

B
Bomin Zhang 已提交
6759 6760 6761 6762 6763
  for (int32_t i = 0; i < pQueryMsg->numOfCols; i++) {
    SColumnInfo* column = pQueryMsg->colList + i;
    freeColumnFilterInfo(column->filters, column->numOfFilters);
  }

H
Haojun Liao 已提交
6764
  //pQInfo already freed in initQInfo, but *pQInfo may not pointer to null;
6765 6766 6767 6768
  if (code != TSDB_CODE_SUCCESS) {
    *pQInfo = NULL;
  }

6769
  // if failed to add ref for all tables in this query, abort current query
6770
  return code;
H
hjxilinx 已提交
6771 6772
}

H
Haojun Liao 已提交
6773
void qDestroyQueryInfo(qinfo_t qHandle) {
H
Haojun Liao 已提交
6774 6775 6776 6777 6778
  SQInfo* pQInfo = (SQInfo*) qHandle;
  if (!isValidQInfo(pQInfo)) {
    return;
  }

H
Haojun Liao 已提交
6779 6780 6781
  qDebug("QInfo:%p query completed", pQInfo);
  queryCostStatis(pQInfo);   // print the query cost summary
  freeQInfo(pQInfo);
H
Haojun Liao 已提交
6782 6783
}

6784 6785 6786 6787 6788 6789 6790 6791
static bool doBuildResCheck(SQInfo* pQInfo) {
  bool buildRes = false;

  pthread_mutex_lock(&pQInfo->lock);

  pQInfo->dataReady = QUERY_RESULT_READY;
  buildRes = (pQInfo->rspContext != NULL);

H
Haojun Liao 已提交
6792 6793
  // clear qhandle owner, it must be in the secure area. other thread may run ahead before current, after it is
  // put into task to be executed.
6794 6795
  assert(pQInfo->owner == taosGetPthreadId());
  pQInfo->owner = 0;
H
Haojun Liao 已提交
6796

H
Haojun Liao 已提交
6797
  pthread_mutex_unlock(&pQInfo->lock);
H
Haojun Liao 已提交
6798 6799

  tsem_post(&pQInfo->ready);
6800 6801 6802
  return buildRes;
}

6803
bool qTableQuery(qinfo_t qinfo) {
6804
  SQInfo *pQInfo = (SQInfo *)qinfo;
H
Haojun Liao 已提交
6805
  assert(pQInfo && pQInfo->signature == pQInfo);
6806
  int64_t threadId = taosGetPthreadId();
6807

6808 6809 6810 6811
  int64_t curOwner = 0;
  if ((curOwner = atomic_val_compare_exchange_64(&pQInfo->owner, 0, threadId)) != 0) {
    qError("QInfo:%p qhandle is now executed by thread:%p", pQInfo, (void*) curOwner);
    pQInfo->code = TSDB_CODE_QRY_IN_EXEC;
6812
    return false;
H
hjxilinx 已提交
6813
  }
6814

H
Haojun Liao 已提交
6815
  if (IS_QUERY_KILLED(pQInfo)) {
6816
    qDebug("QInfo:%p it is already killed, abort", pQInfo);
6817
    return doBuildResCheck(pQInfo);
H
hjxilinx 已提交
6818
  }
6819

6820 6821
  if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
    qDebug("QInfo:%p no table exists for query, abort", pQInfo);
6822 6823
    setQueryStatus(pQInfo->runtimeEnv.pQuery, QUERY_COMPLETED);
    return doBuildResCheck(pQInfo);
6824 6825 6826
  }

  // error occurs, record the error code and return to client
H
Haojun Liao 已提交
6827
  int32_t ret = setjmp(pQInfo->runtimeEnv.env);
6828 6829
  if (ret != TSDB_CODE_SUCCESS) {
    pQInfo->code = ret;
H
Haojun Liao 已提交
6830
    qDebug("QInfo:%p query abort due to error/cancel occurs, code:%s", pQInfo, tstrerror(pQInfo->code));
6831
    return doBuildResCheck(pQInfo);
6832 6833
  }

6834
  qDebug("QInfo:%p query task is launched", pQInfo);
6835

6836
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
6837
  if (onlyQueryTags(pQInfo->runtimeEnv.pQuery)) {
H
Haojun Liao 已提交
6838
    assert(pQInfo->runtimeEnv.pQueryHandle == NULL);
6839
    buildTagQueryResult(pQInfo);
H
hjxilinx 已提交
6840
  } else if (pQInfo->runtimeEnv.stableQuery) {
6841
    stableQueryImpl(pQInfo);
H
hjxilinx 已提交
6842
  } else {
6843
    tableQueryImpl(pQInfo);
H
hjxilinx 已提交
6844
  }
6845

6846
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
6847
  if (IS_QUERY_KILLED(pQInfo)) {
6848 6849
    qDebug("QInfo:%p query is killed", pQInfo);
  } else if (pQuery->rec.rows == 0) {
S
TD-1057  
Shengliang Guan 已提交
6850
    qDebug("QInfo:%p over, %" PRIzu " tables queried, %"PRId64" rows are returned", pQInfo, pQInfo->tableqinfoGroupInfo.numOfTables, pQuery->rec.total);
6851 6852 6853 6854 6855
  } else {
    qDebug("QInfo:%p query paused, %" PRId64 " rows returned, numOfTotal:%" PRId64 " rows",
           pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
  }

6856
  return doBuildResCheck(pQInfo);
H
hjxilinx 已提交
6857 6858
}

6859
int32_t qRetrieveQueryResultInfo(qinfo_t qinfo, bool* buildRes, void* pRspContext) {
6860 6861
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
6862
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
H
Haojun Liao 已提交
6863
    qError("QInfo:%p invalid qhandle", pQInfo);
6864
    return TSDB_CODE_QRY_INVALID_QHANDLE;
H
hjxilinx 已提交
6865
  }
6866

6867
  *buildRes = false;
H
Haojun Liao 已提交
6868
  if (IS_QUERY_KILLED(pQInfo)) {
B
Bomin Zhang 已提交
6869
    qDebug("QInfo:%p query is killed, code:0x%08x", pQInfo, pQInfo->code);
H
hjxilinx 已提交
6870
    return pQInfo->code;
H
hjxilinx 已提交
6871
  }
6872

6873
  int32_t code = TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
6874 6875 6876 6877

#if 0
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

6878
  pthread_mutex_lock(&pQInfo->lock);
H
Haojun Liao 已提交
6879 6880
  assert(pQInfo->rspContext == NULL);

6881 6882 6883 6884 6885
  if (pQInfo->dataReady == QUERY_RESULT_READY) {
    *buildRes = true;
    qDebug("QInfo:%p retrieve result info, rowsize:%d, rows:%"PRId64", code:%d", pQInfo, pQuery->rowSize, pQuery->rec.rows,
           pQInfo->code);
  } else {
H
Haojun Liao 已提交
6886
    *buildRes = false;
6887
    qDebug("QInfo:%p retrieve req set query return result after paused", pQInfo);
6888
    pQInfo->rspContext = pRspContext;
H
Haojun Liao 已提交
6889
    assert(pQInfo->rspContext != NULL);
6890
  }
6891

6892
  code = pQInfo->code;
6893
  pthread_mutex_unlock(&pQInfo->lock);
H
Haojun Liao 已提交
6894 6895 6896 6897 6898 6899
#else
  tsem_wait(&pQInfo->ready);
  *buildRes = true;
  code = pQInfo->code;
#endif

6900
  return code;
H
hjxilinx 已提交
6901
}
6902

6903
int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *contLen, bool* continueExec) {
6904 6905
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
6906
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
6907
    return TSDB_CODE_QRY_INVALID_QHANDLE;
6908
  }
6909

6910
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
6911 6912
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  size_t  size = getResultSize(pQInfo, &pQuery->rec.rows);
6913

weixin_48148422's avatar
weixin_48148422 已提交
6914 6915
  size += sizeof(int32_t);
  size += sizeof(STableIdInfo) * taosArrayGetSize(pQInfo->arrTableIdInfo);
6916

S
TD-1057  
Shengliang Guan 已提交
6917
  *contLen = (int32_t)(size + sizeof(SRetrieveTableRsp));
6918

B
Bomin Zhang 已提交
6919 6920
  // todo proper handle failed to allocate memory,
  // current solution only avoid crash, but cannot return error code to client
6921
  *pRsp = (SRetrieveTableRsp *)rpcMallocCont(*contLen);
B
Bomin Zhang 已提交
6922 6923 6924
  if (*pRsp == NULL) {
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
  }
6925

S
TD-1057  
Shengliang Guan 已提交
6926
  (*pRsp)->numOfRows = htonl((int32_t)pQuery->rec.rows);
6927

H
Haojun Liao 已提交
6928
  if (pQInfo->code == TSDB_CODE_SUCCESS) {
6929
    (*pRsp)->offset   = htobe64(pQuery->limit.offset);
6930
    (*pRsp)->useconds = htobe64(pRuntimeEnv->summary.elapsedTime);
6931
  } else {
6932 6933
    (*pRsp)->offset   = 0;
    (*pRsp)->useconds = htobe64(pRuntimeEnv->summary.elapsedTime);
6934
  }
6935

6936
  (*pRsp)->precision = htons(pQuery->precision);
H
Haojun Liao 已提交
6937 6938
  if (pQuery->rec.rows > 0 && pQInfo->code == TSDB_CODE_SUCCESS) {
    doDumpQueryResult(pQInfo, (*pRsp)->data);
6939
  } else {
H
hjxilinx 已提交
6940
    setQueryStatus(pQuery, QUERY_OVER);
6941
  }
6942

6943
  pQInfo->rspContext = NULL;
6944
  pQInfo->dataReady  = QUERY_RESULT_NOT_READY;
6945

H
Haojun Liao 已提交
6946
  if (IS_QUERY_KILLED(pQInfo) || Q_STATUS_EQUAL(pQuery->status, QUERY_OVER)) {
6947
    *continueExec = false;
6948
    (*pRsp)->completed = 1;  // notify no more result to client
6949
  } else {
6950
    *continueExec = true;
6951
    qDebug("QInfo:%p has more results waits for client retrieve", pQInfo);
6952 6953
  }

H
Haojun Liao 已提交
6954
  return pQInfo->code;
6955
}
H
hjxilinx 已提交
6956

6957 6958 6959 6960 6961 6962 6963 6964 6965 6966 6967
int32_t qQueryCompleted(qinfo_t qinfo) {
  SQInfo *pQInfo = (SQInfo *)qinfo;

  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
    return TSDB_CODE_QRY_INVALID_QHANDLE;
  }

  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
  return IS_QUERY_KILLED(pQInfo) || Q_STATUS_EQUAL(pQuery->status, QUERY_OVER);
}

H
Haojun Liao 已提交
6968
int32_t qKillQuery(qinfo_t qinfo) {
H
Haojun Liao 已提交
6969 6970 6971 6972 6973 6974 6975
  SQInfo *pQInfo = (SQInfo *)qinfo;

  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
    return TSDB_CODE_QRY_INVALID_QHANDLE;
  }

  setQueryKilled(pQInfo);
6976 6977 6978

  // Wait for the query executing thread being stopped/
  // Once the query is stopped, the owner of qHandle will be cleared immediately.
6979
  while (pQInfo->owner != 0) {
6980 6981 6982
    taosMsleep(100);
  }

H
Haojun Liao 已提交
6983 6984 6985
  return TSDB_CODE_SUCCESS;
}

6986 6987 6988 6989 6990 6991 6992 6993 6994 6995 6996 6997 6998 6999 7000 7001
static void doSetTagValueToResultBuf(char* output, const char* val, int16_t type, int16_t bytes) {
  if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
    if (val == NULL) {
      setVardataNull(output, type);
    } else {
      memcpy(output, val, varDataTLen(val));
    }
  } else {
    if (val == NULL) {
      setNull(output, type, bytes);
    } else {  // todo here stop will cause client crash
      memcpy(output, val, bytes);
    }
  }
}

H
hjxilinx 已提交
7002 7003 7004
static void buildTagQueryResult(SQInfo* pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
7005

H
Haojun Liao 已提交
7006
  size_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
H
Haojun Liao 已提交
7007
  assert(numOfGroup == 0 || numOfGroup == 1);
7008

H
Haojun Liao 已提交
7009
  if (numOfGroup == 0) {
7010 7011
    return;
  }
7012

H
Haojun Liao 已提交
7013
  SArray* pa = GET_TABLEGROUP(pQInfo, 0);
7014

H
Haojun Liao 已提交
7015
  size_t num = taosArrayGetSize(pa);
7016
  assert(num == pQInfo->tableqinfoGroupInfo.numOfTables);
7017

H
Haojun Liao 已提交
7018
  int32_t count = 0;
7019 7020 7021
  int32_t functionId = pQuery->pSelectExpr[0].base.functionId;
  if (functionId == TSDB_FUNC_TID_TAG) { // return the tags & table Id
    assert(pQuery->numOfOutput == 1);
7022

7023 7024
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[0];
    int32_t rsize = pExprInfo->bytes;
H
Haojun Liao 已提交
7025
    count = 0;
7026

H
Haojun Liao 已提交
7027 7028 7029 7030 7031 7032 7033 7034 7035 7036 7037
    int16_t bytes = pExprInfo->bytes;
    int16_t type = pExprInfo->type;

    for(int32_t i = 0; i < pQuery->numOfTags; ++i) {
      if (pQuery->tagColList[i].colId == pExprInfo->base.colInfo.colId) {
        bytes = pQuery->tagColList[i].bytes;
        type = pQuery->tagColList[i].type;
        break;
      }
    }

H
Haojun Liao 已提交
7038 7039
    while(pQInfo->tableIndex < num && count < pQuery->rec.capacity) {
      int32_t i = pQInfo->tableIndex++;
7040
      STableQueryInfo *item = taosArrayGetP(pa, i);
7041

7042
      char *output = pQuery->sdata[0]->data + count * rsize;
7043
      varDataSetLen(output, rsize - VARSTR_HEADER_SIZE);
7044

7045
      output = varDataVal(output);
H
Haojun Liao 已提交
7046
      STableId* id = TSDB_TABLEID(item->pTable);
7047

7048 7049 7050
      *(int16_t *)output = 0;
      output += sizeof(int16_t);

H
Haojun Liao 已提交
7051 7052
      *(int64_t *)output = id->uid;  // memory align problem, todo serialize
      output += sizeof(id->uid);
7053

H
Haojun Liao 已提交
7054 7055
      *(int32_t *)output = id->tid;
      output += sizeof(id->tid);
7056

7057
      *(int32_t *)output = pQInfo->vgId;
7058
      output += sizeof(pQInfo->vgId);
7059

7060
      if (pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
7061
        char* data = tsdbGetTableName(item->pTable);
7062
        memcpy(output, data, varDataTLen(data));
H
[td-90]  
Haojun Liao 已提交
7063
      } else {
7064 7065
        char* data = tsdbGetTableTagVal(item->pTable, pExprInfo->base.colInfo.colId, type, bytes);
        doSetTagValueToResultBuf(output, data, type, bytes);
H
[td-90]  
Haojun Liao 已提交
7066
      }
7067

H
Haojun Liao 已提交
7068
      count += 1;
7069
    }
7070

7071
    qDebug("QInfo:%p create (tableId, tag) info completed, rows:%d", pQInfo, count);
7072

H
Haojun Liao 已提交
7073 7074 7075 7076
  } else if (functionId == TSDB_FUNC_COUNT) {// handle the "count(tbname)" query
    *(int64_t*) pQuery->sdata[0]->data = num;

    count = 1;
H
Haojun Liao 已提交
7077
    SET_STABLE_QUERY_OVER(pQInfo);
7078
    qDebug("QInfo:%p create count(tbname) query, res:%d rows:1", pQInfo, count);
7079
  } else {  // return only the tags|table name etc.
H
Haojun Liao 已提交
7080
    count = 0;
H
Haojun Liao 已提交
7081
    SSchema tbnameSchema = tGetTableNameColumnSchema();
7082

S
TD-1057  
Shengliang Guan 已提交
7083
    int32_t maxNumOfTables = (int32_t)pQuery->rec.capacity;
7084
    if (pQuery->limit.limit >= 0 && pQuery->limit.limit < pQuery->rec.capacity) {
S
TD-1057  
Shengliang Guan 已提交
7085
      maxNumOfTables = (int32_t)pQuery->limit.limit;
7086 7087
    }

7088
    while(pQInfo->tableIndex < num && count < maxNumOfTables) {
H
Haojun Liao 已提交
7089
      int32_t i = pQInfo->tableIndex++;
7090

7091 7092 7093 7094 7095 7096
      // discard current result due to offset
      if (pQuery->limit.offset > 0) {
        pQuery->limit.offset -= 1;
        continue;
      }

7097
      SExprInfo* pExprInfo = pQuery->pSelectExpr;
7098
      STableQueryInfo* item = taosArrayGetP(pa, i);
7099

7100 7101
      char *data = NULL, *dst = NULL;
      int16_t type = 0, bytes = 0;
7102
      for(int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
7103
        // not assign value in case of user defined constant output column
Y
TD-1230  
yihaoDeng 已提交
7104
        if (TSDB_COL_IS_UD_COL(pExprInfo[j].base.colInfo.flag)) {
H
Haojun Liao 已提交
7105 7106
          continue;
        }
7107

7108
        if (pExprInfo[j].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
7109 7110 7111 7112 7113 7114 7115 7116
          bytes = tbnameSchema.bytes;
          type = tbnameSchema.type;

          data = tsdbGetTableName(item->pTable);
          dst = pQuery->sdata[j]->data + count * tbnameSchema.bytes;
        } else {
          type = pExprInfo[j].type;
          bytes = pExprInfo[j].bytes;
7117

7118 7119
          data = tsdbGetTableTagVal(item->pTable, pExprInfo[j].base.colInfo.colId, type, bytes);
          dst = pQuery->sdata[j]->data + count * pExprInfo[j].bytes;
7120

7121
        }
7122 7123

        doSetTagValueToResultBuf(dst, data, type, bytes);
H
hjxilinx 已提交
7124
      }
H
Haojun Liao 已提交
7125
      count += 1;
H
hjxilinx 已提交
7126
    }
7127

7128
    qDebug("QInfo:%p create tag values results completed, rows:%d", pQInfo, count);
H
hjxilinx 已提交
7129
  }
7130

H
Haojun Liao 已提交
7131
  pQuery->rec.rows = count;
H
hjxilinx 已提交
7132
  setQueryStatus(pQuery, QUERY_COMPLETED);
H
hjxilinx 已提交
7133 7134
}

H
Haojun Liao 已提交
7135
static int64_t getQuerySupportBufSize(size_t numOfTables) {
H
Haojun Liao 已提交
7136 7137 7138 7139
  size_t s1 = sizeof(STableQueryInfo);
  size_t s2 = sizeof(SHashNode);

//  size_t s3 = sizeof(STableCheckInfo);  buffer consumption in tsdb
H
Haojun Liao 已提交
7140
  return (int64_t)((s1 + s2) * 1.5 * numOfTables);
H
Haojun Liao 已提交
7141 7142
}

H
Haojun Liao 已提交
7143
int32_t checkForQueryBuf(size_t numOfTables) {
H
Haojun Liao 已提交
7144 7145
  int64_t t = getQuerySupportBufSize(numOfTables);
  if (tsQueryBufferSize < 0) {
H
Haojun Liao 已提交
7146
    return TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
7147 7148 7149 7150 7151 7152 7153 7154 7155 7156 7157 7158 7159 7160 7161 7162 7163 7164 7165
  } else if (tsQueryBufferSize > 0) {

    while(1) {
      int64_t s = tsQueryBufferSize;
      int64_t remain = s - t;
      if (remain >= 0) {
        if (atomic_val_compare_exchange_64(&tsQueryBufferSize, s, remain) == s) {
          return TSDB_CODE_SUCCESS;
        }
      } else {
        return TSDB_CODE_QRY_NOT_ENOUGH_BUFFER;
      }
    }
  }

  // disable query processing if the value of tsQueryBufferSize is zero.
  return TSDB_CODE_QRY_NOT_ENOUGH_BUFFER;
}

H
Haojun Liao 已提交
7166
void releaseQueryBuf(size_t numOfTables) {
H
Haojun Liao 已提交
7167 7168 7169 7170 7171 7172 7173 7174 7175 7176
  if (tsQueryBufferSize <= 0) {
    return;
  }

  int64_t t = getQuerySupportBufSize(numOfTables);

  // restore value is not enough buffer available
  atomic_add_fetch_64(&tsQueryBufferSize, t);
}

7177 7178 7179 7180 7181 7182 7183
void* qGetResultRetrieveMsg(qinfo_t qinfo) {
  SQInfo* pQInfo = (SQInfo*) qinfo;
  assert(pQInfo != NULL);

  return pQInfo->rspContext;
}

7184 7185 7186 7187 7188 7189 7190
void freeqinfoFn(void *qhandle) {
  void** handle = qhandle;
  if (handle == NULL || *handle == NULL) {
    return;
  }

  qKillQuery(*handle);
H
Haojun Liao 已提交
7191
  qDestroyQueryInfo(*handle);
7192 7193 7194
}

void* qOpenQueryMgmt(int32_t vgId) {
H
Haojun Liao 已提交
7195
  const int32_t REFRESH_HANDLE_INTERVAL = 30; // every 30 seconds, refresh handle pool
7196 7197 7198 7199

  char cacheName[128] = {0};
  sprintf(cacheName, "qhandle_%d", vgId);

7200
  SQueryMgmt* pQueryMgmt = calloc(1, sizeof(SQueryMgmt));
H
Haojun Liao 已提交
7201 7202 7203 7204
  if (pQueryMgmt == NULL) {
    terrno = TSDB_CODE_QRY_OUT_OF_MEMORY;
    return NULL;
  }
7205

S
TD-1530  
Shengliang Guan 已提交
7206
  pQueryMgmt->qinfoPool = taosCacheInit(TSDB_CACHE_PTR_KEY, REFRESH_HANDLE_INTERVAL, true, freeqinfoFn, cacheName);
7207 7208 7209 7210
  pQueryMgmt->closed    = false;
  pQueryMgmt->vgId      = vgId;

  pthread_mutex_init(&pQueryMgmt->lock, NULL);
7211 7212

  qDebug("vgId:%d, open querymgmt success", vgId);
7213
  return pQueryMgmt;
7214 7215
}

H
Haojun Liao 已提交
7216
static void queryMgmtKillQueryFn(void* handle) {
H
Hui Li 已提交
7217 7218
  void** fp = (void**)handle;
  qKillQuery(*fp);
H
Haojun Liao 已提交
7219 7220 7221
}

void qQueryMgmtNotifyClosed(void* pQMgmt) {
7222 7223 7224 7225 7226 7227 7228
  if (pQMgmt == NULL) {
    return;
  }

  SQueryMgmt* pQueryMgmt = pQMgmt;
  qDebug("vgId:%d, set querymgmt closed, wait for all queries cancelled", pQueryMgmt->vgId);

H
Haojun Liao 已提交
7229
//  pthread_mutex_lock(&pQueryMgmt->lock);
7230
  pQueryMgmt->closed = true;
H
Haojun Liao 已提交
7231
//  pthread_mutex_unlock(&pQueryMgmt->lock);
7232

H
Haojun Liao 已提交
7233
  taosCacheRefresh(pQueryMgmt->qinfoPool, queryMgmtKillQueryFn);
7234 7235 7236 7237 7238 7239 7240 7241 7242 7243 7244 7245 7246 7247 7248 7249 7250
}

void qCleanupQueryMgmt(void* pQMgmt) {
  if (pQMgmt == NULL) {
    return;
  }

  SQueryMgmt* pQueryMgmt = pQMgmt;
  int32_t vgId = pQueryMgmt->vgId;

  assert(pQueryMgmt->closed);

  SCacheObj* pqinfoPool = pQueryMgmt->qinfoPool;
  pQueryMgmt->qinfoPool = NULL;

  taosCacheCleanup(pqinfoPool);
  pthread_mutex_destroy(&pQueryMgmt->lock);
S
Shengliang Guan 已提交
7251
  taosTFree(pQueryMgmt);
7252

7253
  qDebug("vgId:%d queryMgmt cleanup completed", vgId);
7254 7255
}

7256
void** qRegisterQInfo(void* pMgmt, uint64_t qInfo) {
7257
  if (pMgmt == NULL) {
7258
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
7259 7260 7261
    return NULL;
  }

7262
  const int32_t DEFAULT_QHANDLE_LIFE_SPAN = tsShellActivityTimer * 2 * 1000;
7263

7264 7265
  SQueryMgmt *pQueryMgmt = pMgmt;
  if (pQueryMgmt->qinfoPool == NULL) {
7266
    qError("QInfo:%p failed to add qhandle into qMgmt, since qMgmt is closed", (void *)qInfo);
7267
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
7268 7269 7270
    return NULL;
  }

H
Haojun Liao 已提交
7271
//  pthread_mutex_lock(&pQueryMgmt->lock);
7272
  if (pQueryMgmt->closed) {
H
Haojun Liao 已提交
7273
//    pthread_mutex_unlock(&pQueryMgmt->lock);
7274
    qError("QInfo:%p failed to add qhandle into cache, since qMgmt is colsing", (void *)qInfo);
7275
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
7276 7277
    return NULL;
  } else {
S
TD-1530  
Shengliang Guan 已提交
7278 7279
    TSDB_CACHE_PTR_TYPE handleVal = (TSDB_CACHE_PTR_TYPE) qInfo;
    void** handle = taosCachePut(pQueryMgmt->qinfoPool, &handleVal, sizeof(TSDB_CACHE_PTR_TYPE), &qInfo, sizeof(TSDB_CACHE_PTR_TYPE), DEFAULT_QHANDLE_LIFE_SPAN);
H
Haojun Liao 已提交
7280
//    pthread_mutex_unlock(&pQueryMgmt->lock);
7281 7282 7283 7284 7285

    return handle;
  }
}

S
TD-1530  
Shengliang Guan 已提交
7286
void** qAcquireQInfo(void* pMgmt, uint64_t _key) {
7287 7288
  SQueryMgmt *pQueryMgmt = pMgmt;

B
Bomin Zhang 已提交
7289 7290 7291 7292 7293 7294 7295
  if (pQueryMgmt->closed) {
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
    return NULL;
  }

  if (pQueryMgmt->qinfoPool == NULL) {
    terrno = TSDB_CODE_QRY_INVALID_QHANDLE;
7296 7297 7298
    return NULL;
  }

S
TD-1530  
Shengliang Guan 已提交
7299 7300
  TSDB_CACHE_PTR_TYPE key = (TSDB_CACHE_PTR_TYPE)_key;
  void** handle = taosCacheAcquireByKey(pQueryMgmt->qinfoPool, &key, sizeof(TSDB_CACHE_PTR_TYPE));
7301
  if (handle == NULL || *handle == NULL) {
B
Bomin Zhang 已提交
7302
    terrno = TSDB_CODE_QRY_INVALID_QHANDLE;
7303 7304 7305 7306 7307 7308
    return NULL;
  } else {
    return handle;
  }
}

H
Haojun Liao 已提交
7309
void** qReleaseQInfo(void* pMgmt, void* pQInfo, bool freeHandle) {
7310 7311 7312 7313 7314
  SQueryMgmt *pQueryMgmt = pMgmt;
  if (pQueryMgmt->qinfoPool == NULL) {
    return NULL;
  }

H
Haojun Liao 已提交
7315
  taosCacheRelease(pQueryMgmt->qinfoPool, pQInfo, freeHandle);
7316 7317 7318
  return 0;
}

7319