qExecutor.c 263.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
#include "os.h"
H
Haojun Liao 已提交
16 17
#include "qFill.h"
#include "taosmsg.h"
18 19
#include "tcache.h"
#include "tglobal.h"
20

H
Haojun Liao 已提交
21
#include "exception.h"
22
#include "hash.h"
H
Haojun Liao 已提交
23 24 25 26
#include "qAst.h"
#include "qExecutor.h"
#include "qResultbuf.h"
#include "qUtil.h"
H
hjxilinx 已提交
27
#include "query.h"
S
slguan 已提交
28
#include "queryLog.h"
29
#include "tlosertree.h"
30
#include "ttype.h"
31

H
Haojun Liao 已提交
32
#define MAX_ROWS_PER_RESBUF_PAGE  ((1u<<12) - 1)
33 34 35 36 37

/**
 * check if the primary column is load by default, otherwise, the program will
 * forced to load primary column explicitly.
 */
H
Haojun Liao 已提交
38
#define Q_STATUS_EQUAL(p, s)  (((p) & (s)) != 0u)
39 40
#define QUERY_IS_ASC_QUERY(q) (GET_FORWARD_DIRECTION_FACTOR((q)->order.order) == QUERY_ASC_FORWARD_STEP)

41
#define IS_MASTER_SCAN(runtime)        ((runtime)->scanFlag == MASTER_SCAN)
H
hjxilinx 已提交
42
#define IS_REVERSE_SCAN(runtime)       ((runtime)->scanFlag == REVERSE_SCAN)
43
#define SET_MASTER_SCAN_FLAG(runtime)  ((runtime)->scanFlag = MASTER_SCAN)
H
hjxilinx 已提交
44
#define SET_REVERSE_SCAN_FLAG(runtime) ((runtime)->scanFlag = REVERSE_SCAN)
45

H
Haojun Liao 已提交
46
#define GET_QINFO_ADDR(x) ((SQInfo *)((char *)(x)-offsetof(SQInfo, runtimeEnv)))
47

48
#define GET_COL_DATA_POS(query, index, step) ((query)->pos + (index) * (step))
49
#define SWITCH_ORDER(n) (((n) = ((n) == TSDB_ORDER_ASC) ? TSDB_ORDER_DESC : TSDB_ORDER_ASC))
50

H
Haojun Liao 已提交
51 52
#define SDATA_BLOCK_INITIALIZER (SDataBlockInfo) {{0}, 0}

S
TD-1057  
Shengliang Guan 已提交
53
#define TIME_WINDOW_COPY(_dst, _src)  do {\
H
Haojun Liao 已提交
54 55
   (_dst).skey = (_src).skey;\
   (_dst).ekey = (_src).ekey;\
H
Haojun Liao 已提交
56
} while (0)
S
TD-1057  
Shengliang Guan 已提交
57

58
enum {
H
hjxilinx 已提交
59
  // when query starts to execute, this status will set
60 61
  QUERY_NOT_COMPLETED = 0x1u,

H
hjxilinx 已提交
62 63
  /* result output buffer is full, current query is paused.
   * this status is only exist in group-by clause and diff/add/division/multiply/ query.
64
   */
65 66
  QUERY_RESBUF_FULL = 0x2u,

H
hjxilinx 已提交
67 68 69
  /* query is over
   * 1. this status is used in one row result query process, e.g., count/sum/first/last/ avg...etc.
   * 2. when all data within queried time window, it is also denoted as query_completed
70
   */
71
  QUERY_COMPLETED = 0x4u,
72

H
hjxilinx 已提交
73 74
  /* when the result is not completed return to client, this status will be
   * usually used in case of interval query with interpolation option
75
   */
76
  QUERY_OVER = 0x8u,
77
};
78 79

enum {
80 81
  TS_JOIN_TS_EQUAL       = 0,
  TS_JOIN_TS_NOT_EQUALS  = 1,
82 83 84
  TS_JOIN_TAG_NOT_EQUALS = 2,
};

85
typedef struct {
86 87 88 89 90
  int32_t     status;       // query status
  TSKEY       lastKey;      // the lastKey value before query executed
  STimeWindow w;            // whole query time window
  int32_t     windowIndex;  // index of active time window result for interval query
  STSCursor   cur;
91 92
} SQueryStatusInfo;

H
Haojun Liao 已提交
93
#if 0
H
Haojun Liao 已提交
94
static UNUSED_FUNC void *u_malloc (size_t __size) {
H
Haojun Liao 已提交
95
  uint32_t v = rand();
H
Haojun Liao 已提交
96 97

  if (v % 1000 <= 0) {
H
Haojun Liao 已提交
98 99
    return NULL;
  } else {
H
Haojun Liao 已提交
100
    return malloc(__size);
H
Haojun Liao 已提交
101
  }
H
Haojun Liao 已提交
102 103
}

H
Haojun Liao 已提交
104 105
static UNUSED_FUNC void* u_calloc(size_t num, size_t __size) {
  uint32_t v = rand();
H
Haojun Liao 已提交
106
  if (v % 1000 <= 0) {
H
Haojun Liao 已提交
107 108 109 110 111 112
    return NULL;
  } else {
    return calloc(num, __size);
  }
}

H
Haojun Liao 已提交
113 114 115 116 117 118 119 120 121
static UNUSED_FUNC void* u_realloc(void* p, size_t __size) {
  uint32_t v = rand();
  if (v % 5 <= 1) {
    return NULL;
  } else {
    return realloc(p, __size);
  }
}

H
Haojun Liao 已提交
122
#define calloc  u_calloc
H
Haojun Liao 已提交
123
#define malloc  u_malloc
H
Haojun Liao 已提交
124
#define realloc u_realloc
H
Haojun Liao 已提交
125
#endif
H
Haojun Liao 已提交
126

127
#define CLEAR_QUERY_STATUS(q, st)   ((q)->status &= (~(st)))
H
Haojun Liao 已提交
128 129
#define GET_NUM_OF_TABLEGROUP(q)    taosArrayGetSize((q)->tableqinfoGroupInfo.pGroupList)
#define GET_TABLEGROUP(q, _index)   ((SArray*) taosArrayGetP((q)->tableqinfoGroupInfo.pGroupList, (_index)))
130
#define QUERY_IS_INTERVAL_QUERY(_q) ((_q)->interval.interval > 0)
H
Haojun Liao 已提交
131

132
static void setQueryStatus(SQuery *pQuery, int8_t status);
H
Haojun Liao 已提交
133
static void finalizeQueryResult(SQueryRuntimeEnv *pRuntimeEnv);
134

135 136 137
static int32_t getMaximumIdleDurationSec() {
  return tsShellActivityTimer * 2;
}
138

139 140
static void getNextTimeWindow(SQuery* pQuery, STimeWindow* tw) {
  int32_t factor = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
141 142 143
  if (pQuery->interval.intervalUnit != 'n' && pQuery->interval.intervalUnit != 'y') {
    tw->skey += pQuery->interval.sliding * factor;
    tw->ekey = tw->skey + pQuery->interval.interval - 1;
144 145 146
    return;
  }

147
  int64_t key = tw->skey / 1000, interval = pQuery->interval.interval;
148 149 150
  if (pQuery->precision == TSDB_TIME_PRECISION_MICRO) {
    key /= 1000;
  }
151
  if (pQuery->interval.intervalUnit == 'y') {
152 153
    interval *= 12;
  }
154 155 156 157 158

  struct tm tm;
  time_t t = (time_t)key;
  localtime_r(&t, &tm);

S
Shengliang Guan 已提交
159
  int mon = (int)(tm.tm_year * 12 + tm.tm_mon + interval * factor);
160 161 162 163
  tm.tm_year = mon / 12;
  tm.tm_mon = mon % 12;
  tw->skey = mktime(&tm) * 1000L;

S
Shengliang Guan 已提交
164
  mon = (int)(mon + interval);
165 166 167 168 169 170 171 172 173 174 175
  tm.tm_year = mon / 12;
  tm.tm_mon = mon % 12;
  tw->ekey = mktime(&tm) * 1000L;

  if (pQuery->precision == TSDB_TIME_PRECISION_MICRO) {
    tw->skey *= 1000L;
    tw->ekey *= 1000L;
  }
  tw->ekey -= 1;
}

176 177
#define SET_STABLE_QUERY_OVER(_q) ((_q)->tableIndex = (int32_t)((_q)->tableqinfoGroupInfo.numOfTables))
#define IS_STASBLE_QUERY_OVER(_q) ((_q)->tableIndex >= (int32_t)((_q)->tableqinfoGroupInfo.numOfTables))
H
Haojun Liao 已提交
178

H
hjxilinx 已提交
179
// todo move to utility
180
static int32_t mergeIntoGroupResultImpl(SGroupResInfo* pGroupResInfo, SArray *pTableList, SQInfo* pQInfo);
181

182
static void setResultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SResultRow *pResult);
H
Haojun Liao 已提交
183
static void setResultRowOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SResultRow *pResult);
184
static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId);
185

186
static void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
H
Haojun Liao 已提交
187
                          SDataStatis *pStatis, void *param, int32_t colIndex, int32_t vgId);
188

189
static void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
H
Haojun Liao 已提交
190
static void destroyTableQueryInfoImpl(STableQueryInfo *pTableQueryInfo);
H
Haojun Liao 已提交
191
static void resetDefaultResInfoOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
192
static bool hasMainOutput(SQuery *pQuery);
H
hjxilinx 已提交
193
static void buildTagQueryResult(SQInfo *pQInfo);
194

195
static int32_t setAdditionalInfo(SQInfo *pQInfo, void *pTable, STableQueryInfo *pTableQueryInfo);
H
Haojun Liao 已提交
196 197
static int32_t checkForQueryBuf(size_t numOfTables);
static void releaseQueryBuf(size_t numOfTables);
198
static int32_t binarySearchForKey(char *pValue, int num, TSKEY key, int order);
H
Haojun Liao 已提交
199
static void doRowwiseTimeWindowInterpolation(SQueryRuntimeEnv* pRuntimeEnv, SArray* pDataBlock, TSKEY prevTs, int32_t prevRowIndex, TSKEY curTs, int32_t curRowIndex, TSKEY windowKey, int32_t type);
H
Haojun Liao 已提交
200
static STsdbQueryCond createTsdbQueryCond(SQuery* pQuery, STimeWindow* win);
201
static STableIdInfo createTableIdInfo(SQuery* pQuery);
202

203
bool doFilterData(SQuery *pQuery, int32_t elemPos) {
204 205
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
206

S
TD-1057  
Shengliang Guan 已提交
207
    char *pElem = (char*)pFilterInfo->pData + pFilterInfo->info.bytes * elemPos;
208

209 210
    bool qualified = false;
    for (int32_t j = 0; j < pFilterInfo->numOfFilters; ++j) {
211
      SColumnFilterElem *pFilterElem = &pFilterInfo->pFilters[j];
212

H
Haojun Liao 已提交
213 214
      bool isnull = isNull(pElem, pFilterInfo->info.type);
      if (isnull) {
215
        if (pFilterElem->fp == isNullOperator) {
H
Haojun Liao 已提交
216 217 218 219 220 221
          qualified = true;
          break;
        } else {
          continue;
        }
      } else {
222
        if (pFilterElem->fp == notNullOperator) {
H
Haojun Liao 已提交
223 224
          qualified = true;
          break;
225
        } else if (pFilterElem->fp == isNullOperator) {
H
Haojun Liao 已提交
226 227 228 229
          continue;
        }
      }

230
      if (pFilterElem->fp(pFilterElem, pElem, pElem, pFilterInfo->info.type)) {
231 232 233 234
        qualified = true;
        break;
      }
    }
235

236 237 238 239
    if (!qualified) {
      return false;
    }
  }
240

241 242 243 244 245 246
  return true;
}

int64_t getNumOfResult(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  bool    hasMainFunction = hasMainOutput(pQuery);
247

248
  int64_t maxOutput = 0;
249
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
250
    int32_t functionId = pQuery->pExpr1[j].base.functionId;
251

252 253 254 255 256 257 258 259
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (hasMainFunction &&
        (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ)) {
      continue;
    }
260

H
Haojun Liao 已提交
261
    SResultRowCellInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
262 263 264 265
    if (pResInfo != NULL && maxOutput < pResInfo->numOfRes) {
      maxOutput = pResInfo->numOfRes;
    }
  }
266

267
  assert(maxOutput >= 0);
268 269 270
  return maxOutput;
}

271 272 273 274 275
/*
 * the value of number of result needs to be update due to offset value upated.
 */
void updateNumOfResult(SQueryRuntimeEnv *pRuntimeEnv, int32_t numOfRes) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
276

277
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
278
    SResultRowCellInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
279

H
Haojun Liao 已提交
280 281 282 283 284
    int16_t functionId = pRuntimeEnv->pCtx[j].functionId;
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ ||
        functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
285

H
Haojun Liao 已提交
286
    assert(pResInfo->numOfRes > numOfRes);
287 288 289 290
    pResInfo->numOfRes = numOfRes;
  }
}

291
static UNUSED_FUNC int32_t getMergeResultGroupId(int32_t groupIndex) {
H
Haojun Liao 已提交
292
  int32_t base = 50000000;
293 294 295 296 297 298 299
  return base + (groupIndex * 10000);
}

bool isGroupbyNormalCol(SSqlGroupbyExpr *pGroupbyExpr) {
  if (pGroupbyExpr == NULL || pGroupbyExpr->numOfGroupCols == 0) {
    return false;
  }
300

301
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
302
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
Y
TD-1230  
yihaoDeng 已提交
303
    if (TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
304
      //make sure the normal column locates at the second position if tbname exists in group by clause
305
      if (pGroupbyExpr->numOfGroupCols > 1) {
306
        assert(pColIndex->colIndex > 0);
307
      }
308

309 310 311
      return true;
    }
  }
312

313 314 315 316 317
  return false;
}

int16_t getGroupbyColumnType(SQuery *pQuery, SSqlGroupbyExpr *pGroupbyExpr) {
  assert(pGroupbyExpr != NULL);
318

319 320
  int32_t colId = -2;
  int16_t type = TSDB_DATA_TYPE_NULL;
321

322
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
323
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
Y
TD-1230  
yihaoDeng 已提交
324
    if (TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
325 326 327 328
      colId = pColIndex->colId;
      break;
    }
  }
329

330
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
331 332
    if (colId == pQuery->colList[i].colId) {
      type = pQuery->colList[i].type;
333 334 335
      break;
    }
  }
336

337 338 339 340 341 342
  return type;
}

bool isSelectivityWithTagsQuery(SQuery *pQuery) {
  bool    hasTags = false;
  int32_t numOfSelectivity = 0;
343

344
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
345
    int32_t functId = pQuery->pExpr1[i].base.functionId;
346 347 348 349
    if (functId == TSDB_FUNC_TAG_DUMMY || functId == TSDB_FUNC_TS_DUMMY) {
      hasTags = true;
      continue;
    }
350

351 352 353 354
    if ((aAggs[functId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
      numOfSelectivity++;
    }
  }
355

356 357 358
  if (numOfSelectivity > 0 && hasTags) {
    return true;
  }
359

360 361 362
  return false;
}

363 364
bool isProjQuery(SQuery *pQuery) {
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
365
    int32_t functId = pQuery->pExpr1[i].base.functionId;
366 367 368 369 370 371 372 373
    if (functId != TSDB_FUNC_PRJ && functId != TSDB_FUNC_TAGPRJ) {
      return false;
    }
  }

  return true;
}

H
Haojun Liao 已提交
374
bool isTSCompQuery(SQuery *pQuery) { return pQuery->pExpr1[0].base.functionId == TSDB_FUNC_TS_COMP; }
375

376 377 378
static bool limitResults(SQueryRuntimeEnv* pRuntimeEnv) {
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery* pQuery = pRuntimeEnv->pQuery;
379

380 381
  if ((pQuery->limit.limit > 0) && (pQuery->rec.total + pQuery->rec.rows > pQuery->limit.limit)) {
    pQuery->rec.rows = pQuery->limit.limit - pQuery->rec.total;
382

383
    qDebug("QInfo:%p discard remain data due to result limitation, limit:%"PRId64", current return:%" PRId64 ", total:%"PRId64,
384 385
        pQInfo, pQuery->limit.limit, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
    assert(pQuery->rec.rows >= 0);
386 387 388
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return true;
  }
389

390 391 392 393
  return false;
}

static bool isTopBottomQuery(SQuery *pQuery) {
394
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
395
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
396 397 398
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
399

400 401 402 403
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      return true;
    }
  }
404

405 406 407
  return false;
}

408 409 410 411 412 413 414 415 416 417 418
static bool timeWindowInterpoRequired(SQuery *pQuery) {
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
    if (functionId == TSDB_FUNC_TWA) {
      return true;
    }
  }

  return false;
}

H
Haojun Liao 已提交
419
static bool hasTagValOutput(SQuery* pQuery) {
H
Haojun Liao 已提交
420
  SExprInfo *pExprInfo = &pQuery->pExpr1[0];
H
Haojun Liao 已提交
421 422 423 424
  if (pQuery->numOfOutput == 1 && pExprInfo->base.functionId == TSDB_FUNC_TS_COMP) {
    return true;
  } else {  // set tag value, by which the results are aggregated.
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
H
Haojun Liao 已提交
425
      SExprInfo *pLocalExprInfo = &pQuery->pExpr1[idx];
H
Haojun Liao 已提交
426 427 428 429 430 431 432 433 434 435 436

      // ts_comp column required the tag value for join filter
      if (TSDB_COL_IS_TAG(pLocalExprInfo->base.colInfo.flag)) {
        return true;
      }
    }
  }

  return false;
}

437 438 439 440 441 442 443 444
/**
 * @param pQuery
 * @param col
 * @param pDataBlockInfo
 * @param pStatis
 * @param pColStatis
 * @return
 */
H
Haojun Liao 已提交
445
static bool hasNullValue(SColIndex* pColIndex, SDataStatis *pStatis, SDataStatis **pColStatis) {
H
Haojun Liao 已提交
446
  if (pStatis != NULL && TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
H
Haojun Liao 已提交
447 448
    *pColStatis = &pStatis[pColIndex->colIndex];
    assert((*pColStatis)->colId == pColIndex->colId);
H
hjxilinx 已提交
449 450
  } else {
    *pColStatis = NULL;
451
  }
452

H
Haojun Liao 已提交
453
  if (TSDB_COL_IS_TAG(pColIndex->flag) || TSDB_COL_IS_UD_COL(pColIndex->flag) || pColIndex->colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
H
Haojun Liao 已提交
454 455 456
    return false;
  }

457 458 459
  if ((*pColStatis) != NULL && (*pColStatis)->numOfNull == 0) {
    return false;
  }
460

461 462 463
  return true;
}

H
Haojun Liao 已提交
464
static SResultRow *doPrepareResultRowFromKey(SQueryRuntimeEnv *pRuntimeEnv, SResultRowInfo *pResultRowInfo, char *pData,
H
Haojun Liao 已提交
465
                                             int16_t bytes, bool masterscan, uint64_t uid) {
466
  bool existed = false;
H
Haojun Liao 已提交
467
  SET_RES_WINDOW_KEY(pRuntimeEnv->keyBuf, pData, bytes, uid);
468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486

  SResultRow **p1 =
      (SResultRow **)taosHashGet(pRuntimeEnv->pResultRowHashTable, pRuntimeEnv->keyBuf, GET_RES_WINDOW_KEY_LEN(bytes));

  // in case of repeat scan/reverse scan, no new time window added.
  if (QUERY_IS_INTERVAL_QUERY(pRuntimeEnv->pQuery)) {
    if (!masterscan) {  // the *p1 may be NULL in case of sliding+offset exists.
      return (p1 != NULL)? *p1:NULL;
    }

    if (p1 != NULL) {
      for(int32_t i = pResultRowInfo->size - 1; i >= 0; --i) {
        if (pResultRowInfo->pResult[i] == (*p1)) {
          pResultRowInfo->curIndex = i;
          existed = true;
          break;
        }
      }
    }
487
  } else {
488 489
    if (p1 != NULL) {  // group by column query
      return *p1;
H
Haojun Liao 已提交
490
    }
491
  }
492

493
  if (!existed) {
H
Haojun Liao 已提交
494
    // TODO refactor
H
Haojun Liao 已提交
495
    // more than the capacity, reallocate the resources
H
Haojun Liao 已提交
496
    if (pResultRowInfo->size >= pResultRowInfo->capacity) {
H
Haojun Liao 已提交
497
      int64_t newCapacity = 0;
H
Haojun Liao 已提交
498 499
      if (pResultRowInfo->capacity > 10000) {
        newCapacity = (int64_t)(pResultRowInfo->capacity * 1.25);
500
      } else {
H
Haojun Liao 已提交
501
        newCapacity = (int64_t)(pResultRowInfo->capacity * 1.5);
502 503
      }

H
Haojun Liao 已提交
504
      char *t = realloc(pResultRowInfo->pResult, (size_t)(newCapacity * POINTER_BYTES));
505 506
      if (t == NULL) {
        longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
507
      }
508

H
Haojun Liao 已提交
509
      pResultRowInfo->pResult = (SResultRow **)t;
510

H
Haojun Liao 已提交
511 512
      int32_t inc = (int32_t)newCapacity - pResultRowInfo->capacity;
      memset(&pResultRowInfo->pResult[pResultRowInfo->capacity], 0, POINTER_BYTES * inc);
513

H
Haojun Liao 已提交
514
      pResultRowInfo->capacity = (int32_t)newCapacity;
515
    }
516

517 518 519 520 521 522 523 524 525 526 527 528 529
    SResultRow *pResult = NULL;

    if (p1 == NULL) {
      pResult = getNewResultRow(pRuntimeEnv->pool);
      int32_t ret = initResultRow(pResult);
      if (ret != TSDB_CODE_SUCCESS) {
        longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
      }

      // add a new result set for a new group
      taosHashPut(pRuntimeEnv->pResultRowHashTable, pRuntimeEnv->keyBuf, GET_RES_WINDOW_KEY_LEN(bytes), &pResult, POINTER_BYTES);
    } else {
      pResult = *p1;
530
    }
H
Haojun Liao 已提交
531

532
    pResultRowInfo->pResult[pResultRowInfo->size] = pResult;
H
Haojun Liao 已提交
533
    pResultRowInfo->curIndex = pResultRowInfo->size++;
534
  }
535

536
  // too many time window in query
H
Haojun Liao 已提交
537
  if (pResultRowInfo->size > MAX_INTERVAL_TIME_WINDOW) {
538 539 540
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_TOO_MANY_TIMEWINDOW);
  }

H
Haojun Liao 已提交
541
  return getResultRow(pResultRowInfo, pResultRowInfo->curIndex);
542 543 544
}

// get the correct time window according to the handled timestamp
H
Haojun Liao 已提交
545
static STimeWindow getActiveTimeWindow(SResultRowInfo *pWindowResInfo, int64_t ts, SQuery *pQuery) {
546
  STimeWindow w = {0};
547

548
 if (pWindowResInfo->curIndex == -1) {  // the first window, from the previous stored value
549
    w.skey = pWindowResInfo->prevSKey;
550
    if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
551
      w.ekey = taosTimeAdd(w.skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
552
    } else {
553
      w.ekey = w.skey + pQuery->interval.interval - 1;
554
    }
555
  } else {
556
    int32_t slot = curTimeWindowIndex(pWindowResInfo);
557
    SResultRow* pWindowRes = getResultRow(pWindowResInfo, slot);
558
    w = pWindowRes->win;
559
  }
560

561
  if (w.skey > ts || w.ekey < ts) {
562 563 564
    if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
      w.skey = taosTimeTruncate(ts, &pQuery->interval, pQuery->precision);
      w.ekey = taosTimeAdd(w.skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
565 566
    } else {
      int64_t st = w.skey;
567

568
      if (st > ts) {
569
        st -= ((st - ts + pQuery->interval.sliding - 1) / pQuery->interval.sliding) * pQuery->interval.sliding;
570
      }
571

572
      int64_t et = st + pQuery->interval.interval - 1;
573
      if (et < ts) {
574
        st += ((ts - et + pQuery->interval.sliding - 1) / pQuery->interval.sliding) * pQuery->interval.sliding;
575
      }
576

577
      w.skey = st;
578
      w.ekey = w.skey + pQuery->interval.interval - 1;
579
    }
580
  }
581

582 583 584 585 586 587 588
  /*
   * query border check, skey should not be bounded by the query time range, since the value skey will
   * be used as the time window index value. So we only change ekey of time window accordingly.
   */
  if (w.ekey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) {
    w.ekey = pQuery->window.ekey;
  }
589

590 591 592
  return w;
}

H
Haojun Liao 已提交
593
static int32_t addNewWindowResultBuf(SResultRow *pWindowRes, SDiskbasedResultBuf *pResultBuf, int32_t tid,
594
                                     int32_t numOfRowsPerPage) {
595
  if (pWindowRes->pageId != -1) {
596 597
    return 0;
  }
598

599
  tFilePage *pData = NULL;
600

601 602
  // in the first scan, new space needed for results
  int32_t pageId = -1;
H
Haojun Liao 已提交
603
  SIDList list = getDataBufPagesIdList(pResultBuf, tid);
604

H
Haojun Liao 已提交
605
  if (taosArrayGetSize(list) == 0) {
H
Haojun Liao 已提交
606
    pData = getNewDataBuf(pResultBuf, tid, &pageId);
607
  } else {
H
Haojun Liao 已提交
608 609 610
    SPageInfo* pi = getLastPageInfo(list);
    pData = getResBufPage(pResultBuf, pi->pageId);
    pageId = pi->pageId;
611

612
    if (pData->num >= numOfRowsPerPage) {
H
Haojun Liao 已提交
613 614
      // release current page first, and prepare the next one
      releaseResBufPageInfo(pResultBuf, pi);
H
Haojun Liao 已提交
615
      pData = getNewDataBuf(pResultBuf, tid, &pageId);
616
      if (pData != NULL) {
617
        assert(pData->num == 0);  // number of elements must be 0 for new allocated buffer
618 619 620
      }
    }
  }
621

622 623 624
  if (pData == NULL) {
    return -1;
  }
625

626
  // set the number of rows in current disk page
627 628 629
  if (pWindowRes->pageId == -1) {  // not allocated yet, allocate new buffer
    pWindowRes->pageId = pageId;
    pWindowRes->rowId = (int32_t)(pData->num++);
H
Haojun Liao 已提交
630

631
    assert(pWindowRes->pageId >= 0);
632
  }
633

634 635 636
  return 0;
}

637 638
static int32_t setWindowOutputBufByKey(SQueryRuntimeEnv *pRuntimeEnv, SResultRowInfo *pResultRowInfo, STimeWindow *win,
    bool masterscan, SResultRow** pResult, int64_t groupId) {
639 640
  assert(win->skey <= win->ekey);
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
641

642
  SResultRow *pResultRow = doPrepareResultRowFromKey(pRuntimeEnv, pResultRowInfo, (char *)&win->skey, TSDB_KEYSIZE, masterscan, groupId);
H
Haojun Liao 已提交
643
  if (pResultRow == NULL) {
644 645
    *pResult = NULL;
    return TSDB_CODE_SUCCESS;
646
  }
647

648
  // not assign result buffer yet, add new result buffer
H
Haojun Liao 已提交
649
  if (pResultRow->pageId == -1) {
H
Haojun Liao 已提交
650
    int32_t ret = addNewWindowResultBuf(pResultRow, pResultBuf, (int32_t) groupId, pRuntimeEnv->numOfRowsPerPage);
H
Haojun Liao 已提交
651
    if (ret != TSDB_CODE_SUCCESS) {
652 653 654
      return -1;
    }
  }
655

656
  // set time window for current result
H
Haojun Liao 已提交
657
  pResultRow->win = (*win);
658
  *pResult = pResultRow;
H
Haojun Liao 已提交
659
  setResultRowOutputBufInitCtx(pRuntimeEnv, pResultRow);
660

661 662 663
  return TSDB_CODE_SUCCESS;
}

H
Haojun Liao 已提交
664
static bool getResultRowStatus(SResultRowInfo *pWindowResInfo, int32_t slot) {
665
  assert(slot >= 0 && slot < pWindowResInfo->size);
666
  return pWindowResInfo->pResult[slot]->closed;
667 668
}

669 670 671 672 673 674 675 676 677 678 679 680 681 682
typedef enum SResultTsInterpType {
  RESULT_ROW_START_INTERP = 1,
  RESULT_ROW_END_INTERP   = 2,
} SResultTsInterpType;

static void setResultRowInterpo(SResultRow* pResult, SResultTsInterpType type) {
  assert(pResult != NULL && (type == RESULT_ROW_START_INTERP || type == RESULT_ROW_END_INTERP));
  if (type == RESULT_ROW_START_INTERP) {
    pResult->startInterp = true;
  } else {
    pResult->endInterp   = true;
  }
}

H
Haojun Liao 已提交
683
static bool resultRowInterpolated(SResultRow* pResult, SResultTsInterpType type) {
684 685 686 687 688 689 690 691
  assert(pResult != NULL && (type == RESULT_ROW_START_INTERP || type == RESULT_ROW_END_INTERP));
  if (type == RESULT_ROW_START_INTERP) {
    return pResult->startInterp == true;
  } else {
    return pResult->endInterp   == true;
  }
}

H
Haojun Liao 已提交
692
static FORCE_INLINE int32_t getForwardStepsInBlock(int32_t numOfRows, __block_search_fn_t searchFn, TSKEY ekey, int16_t pos,
693 694
                                      int16_t order, int64_t *pData) {
  int32_t forwardStep = 0;
695

H
Haojun Liao 已提交
696 697 698 699
  if (order == TSDB_ORDER_ASC) {
    int32_t end = searchFn((char*) &pData[pos], numOfRows - pos, ekey, order);
    if (end >= 0) {
      forwardStep = end;
700

H
Haojun Liao 已提交
701 702 703 704 705 706 707 708 709 710 711 712
      if (pData[end + pos] == ekey) {
        forwardStep += 1;
      }
    }
  } else {
    int32_t end = searchFn((char *)pData, pos + 1, ekey, order);
    if (end >= 0) {
      forwardStep = pos - end;

      if (pData[end] == ekey) {
        forwardStep += 1;
      }
713 714
    }
  }
715

H
Haojun Liao 已提交
716
  assert(forwardStep > 0);
717 718 719
  return forwardStep;
}

720
static void doUpdateResultRowIndex(SResultRowInfo*pResultRowInfo, TSKEY lastKey, bool ascQuery, bool timeWindowInterpo) {
H
Haojun Liao 已提交
721
  int64_t skey = TSKEY_INITIAL_VAL;
722 723 724
  int32_t i = 0;
  for (i = pResultRowInfo->size - 1; i >= 0; --i) {
    SResultRow *pResult = pResultRowInfo->pResult[i];
H
Haojun Liao 已提交
725
    if (pResult->closed) {
726
      break;
H
Haojun Liao 已提交
727 728
    }

729
    // new closed result rows
730 731 732 733 734 735 736 737 738 739
    if (timeWindowInterpo) {
      if (pResult->endInterp && ((pResult->win.skey <= lastKey && ascQuery) || (pResult->win.skey >= lastKey && !ascQuery))) {
        if (i > 0) { // the first time window, the startInterp is false.
          assert(pResult->startInterp);
        }

        closeResultRow(pResultRowInfo, i);
      } else {
        skey = pResult->win.skey;
      }
H
Haojun Liao 已提交
740
    } else {
741 742 743 744 745
      if ((pResult->win.ekey <= lastKey && ascQuery) || (pResult->win.skey >= lastKey && !ascQuery)) {
        closeResultRow(pResultRowInfo, i);
      } else {
        skey = pResult->win.skey;
      }
H
Haojun Liao 已提交
746 747 748
    }
  }

749
  // all result rows are closed, set the last one to be the skey
H
Haojun Liao 已提交
750
  if (skey == TSKEY_INITIAL_VAL) {
751
    pResultRowInfo->curIndex = pResultRowInfo->size - 1;
H
Haojun Liao 已提交
752 753
  } else {

754 755 756 757 758
    for (i = pResultRowInfo->size - 1; i >= 0; --i) {
      SResultRow *pResult = pResultRowInfo->pResult[i];
      if (pResult->closed) {
        break;
      }
759
    }
760

D
fix bug  
dapan1121 已提交
761
    if (i == pResultRowInfo->size - 1) {
762
      pResultRowInfo->curIndex = i;
D
fix bug  
dapan1121 已提交
763 764 765 766
    } else {
      pResultRowInfo->curIndex = i + 1;  // current not closed result object
    }

767
    pResultRowInfo->prevSKey = pResultRowInfo->pResult[pResultRowInfo->curIndex]->win.skey;
768
  }
769
}
770

771
static void updateResultRowIndex(SResultRowInfo* pResultRowInfo, STableQueryInfo* pTableQueryInfo, bool ascQuery, bool timeWindowInterpo) {
772
  if ((pTableQueryInfo->lastKey > pTableQueryInfo->win.ekey && ascQuery) || (pTableQueryInfo->lastKey < pTableQueryInfo->win.ekey && (!ascQuery))) {
773 774 775
    closeAllResultRows(pResultRowInfo);
    pResultRowInfo->curIndex = pResultRowInfo->size - 1;
  } else {
776
    int32_t step = ascQuery? 1:-1;
777
    doUpdateResultRowIndex(pResultRowInfo, pTableQueryInfo->lastKey - step, ascQuery, timeWindowInterpo);
778
  }
779 780 781
}

static int32_t getNumOfRowsInTimeWindow(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo, TSKEY *pPrimaryColumn,
H
hjxilinx 已提交
782
                                        int32_t startPos, TSKEY ekey, __block_search_fn_t searchFn, bool updateLastKey) {
783
  assert(startPos >= 0 && startPos < pDataBlockInfo->rows);
784

H
Haojun Liao 已提交
785
  int32_t num   = -1;
786
  int32_t order = pQuery->order.order;
H
Haojun Liao 已提交
787
  int32_t step  = GET_FORWARD_DIRECTION_FACTOR(order);
788

H
hjxilinx 已提交
789
  STableQueryInfo* item = pQuery->current;
790

791 792
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (ekey < pDataBlockInfo->window.ekey) {
793
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
H
Haojun Liao 已提交
794 795
      if (updateLastKey) { // update the last key
        item->lastKey = pPrimaryColumn[startPos + (num - 1)] + step;
796 797
      }
    } else {
798
      num = pDataBlockInfo->rows - startPos;
799
      if (updateLastKey) {
H
hjxilinx 已提交
800
        item->lastKey = pDataBlockInfo->window.ekey + step;
801 802 803 804
      }
    }
  } else {  // desc
    if (ekey > pDataBlockInfo->window.skey) {
805
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
H
Haojun Liao 已提交
806 807
      if (updateLastKey) {  // update the last key
        item->lastKey = pPrimaryColumn[startPos - (num - 1)] + step;
808 809 810 811
      }
    } else {
      num = startPos + 1;
      if (updateLastKey) {
H
hjxilinx 已提交
812
        item->lastKey = pDataBlockInfo->window.skey + step;
813 814 815
      }
    }
  }
816

H
Haojun Liao 已提交
817
  assert(num > 0);
818 819 820
  return num;
}

821 822
static void doBlockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, STimeWindow *pWin, int32_t offset, int32_t forwardStep, TSKEY *tsCol, int32_t numOfTotal) {
  SQuery         *pQuery = pRuntimeEnv->pQuery;
823
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
824

H
Haojun Liao 已提交
825 826
  bool hasPrev = pCtx[0].preAggVals.isSet;

827 828 829 830
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
    pCtx[k].nStartQueryTimestamp = pWin->skey;
    pCtx[k].size = forwardStep;
    pCtx[k].startOffset = (QUERY_IS_ASC_QUERY(pQuery)) ? offset : offset - (forwardStep - 1);
831

832
    int32_t functionId = pQuery->pExpr1[k].base.functionId;
H
Haojun Liao 已提交
833

834 835 836 837 838
    // not a whole block involved in query processing, statistics data can not be used
    // NOTE: the original value of isSet have been changed here
    if (pCtx[k].preAggVals.isSet && forwardStep < numOfTotal) {
      pCtx[k].preAggVals.isSet = false;
    }
H
Haojun Liao 已提交
839

840 841
    if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
      aAggs[functionId].xFunction(&pCtx[k]);
842
    }
843 844 845

    // restore it
    pCtx[k].preAggVals.isSet = hasPrev;
846 847 848
  }
}

849 850
static void doRowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, STimeWindow *pWin, int32_t offset) {
  SQuery         *pQuery = pRuntimeEnv->pQuery;
851
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
852

853 854
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
    pCtx[k].nStartQueryTimestamp = pWin->skey;
855

856 857 858
    int32_t functionId = pQuery->pExpr1[k].base.functionId;
    if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
      aAggs[functionId].xFunctionF(&pCtx[k], offset);
859 860 861 862
    }
  }
}

H
Haojun Liao 已提交
863 864
static int32_t getNextQualifiedWindow(SQueryRuntimeEnv *pRuntimeEnv, STimeWindow *pNext, SDataBlockInfo *pDataBlockInfo,
    TSKEY *primaryKeys, __block_search_fn_t searchFn, int32_t prevPosition) {
865
  SQuery *pQuery = pRuntimeEnv->pQuery;
866

H
Haojun Liao 已提交
867
  getNextTimeWindow(pQuery, pNext);
868

H
Haojun Liao 已提交
869
  // next time window is not in current block
H
Haojun Liao 已提交
870 871
  if ((pNext->skey > pDataBlockInfo->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (pNext->ekey < pDataBlockInfo->window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
H
Haojun Liao 已提交
872 873
    return -1;
  }
874

H
Haojun Liao 已提交
875 876
  TSKEY startKey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
877
    startKey = pNext->skey;
H
Haojun Liao 已提交
878 879
    if (startKey < pQuery->window.skey) {
      startKey = pQuery->window.skey;
880
    }
H
Haojun Liao 已提交
881
  } else {
H
Haojun Liao 已提交
882
    startKey = pNext->ekey;
H
Haojun Liao 已提交
883 884
    if (startKey > pQuery->window.skey) {
      startKey = pQuery->window.skey;
885
    }
H
Haojun Liao 已提交
886
  }
887

H
Haojun Liao 已提交
888
  int32_t startPos = 0;
H
Haojun Liao 已提交
889

H
Haojun Liao 已提交
890
  // tumbling time window query, a special case of sliding time window query
891
  if (pQuery->interval.sliding == pQuery->interval.interval && prevPosition != -1) {
H
Haojun Liao 已提交
892 893 894
    int32_t factor = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
    startPos = prevPosition + factor;
  } else {
H
Haojun Liao 已提交
895
    if (startKey <= pDataBlockInfo->window.skey && QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
896
      startPos = 0;
H
Haojun Liao 已提交
897
    } else if (startKey >= pDataBlockInfo->window.ekey && !QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
898 899 900 901
      startPos = pDataBlockInfo->rows - 1;
    } else {
      startPos = searchFn((char *)primaryKeys, pDataBlockInfo->rows, startKey, pQuery->order.order);
    }
H
Haojun Liao 已提交
902
  }
903

H
Haojun Liao 已提交
904 905 906 907
  /*
   * This time window does not cover any data, try next time window,
   * this case may happen when the time window is too small
   */
H
Haojun Liao 已提交
908 909 910
  if (primaryKeys == NULL) {
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      assert(pDataBlockInfo->window.skey <= pNext->ekey);
911
    } else {
H
Haojun Liao 已提交
912
      assert(pDataBlockInfo->window.ekey >= pNext->skey);
913
    }
H
Haojun Liao 已提交
914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932
  } else {
    if (QUERY_IS_ASC_QUERY(pQuery) && primaryKeys[startPos] > pNext->ekey) {
      TSKEY next = primaryKeys[startPos];
      if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
        pNext->skey = taosTimeTruncate(next, &pQuery->interval, pQuery->precision);
        pNext->ekey = taosTimeAdd(pNext->skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
      } else {
        pNext->ekey += ((next - pNext->ekey + pQuery->interval.sliding - 1)/pQuery->interval.sliding) * pQuery->interval.sliding;
        pNext->skey = pNext->ekey - pQuery->interval.interval + 1;
      }
    } else if ((!QUERY_IS_ASC_QUERY(pQuery)) && primaryKeys[startPos] < pNext->skey) {
      TSKEY next = primaryKeys[startPos];
      if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
        pNext->skey = taosTimeTruncate(next, &pQuery->interval, pQuery->precision);
        pNext->ekey = taosTimeAdd(pNext->skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
      } else {
        pNext->skey -= ((pNext->skey - next + pQuery->interval.sliding - 1) / pQuery->interval.sliding) * pQuery->interval.sliding;
        pNext->ekey = pNext->skey + pQuery->interval.interval - 1;
      }
933
    }
934
  }
935

H
Haojun Liao 已提交
936
  return startPos;
937 938
}

H
Haojun Liao 已提交
939
static FORCE_INLINE TSKEY reviseWindowEkey(SQuery *pQuery, STimeWindow *pWindow) {
940 941 942 943 944 945 946 947 948 949 950 951
  TSKEY ekey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    ekey = pWindow->ekey;
    if (ekey > pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  } else {
    ekey = pWindow->skey;
    if (ekey < pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  }
952

953 954 955
  return ekey;
}

H
hjxilinx 已提交
956 957
//todo binary search
static void* getDataBlockImpl(SArray* pDataBlock, int32_t colId) {
S
TD-1057  
Shengliang Guan 已提交
958
  int32_t numOfCols = (int32_t)taosArrayGetSize(pDataBlock);
959

H
hjxilinx 已提交
960 961 962 963 964 965
  for (int32_t i = 0; i < numOfCols; ++i) {
    SColumnInfoData *p = taosArrayGet(pDataBlock, i);
    if (colId == p->info.colId) {
      return p->pData;
    }
  }
966

H
hjxilinx 已提交
967 968 969
  return NULL;
}

970
static char *getDataBlock(SQueryRuntimeEnv *pRuntimeEnv, SArithmeticSupport *sas, int32_t col, int32_t size, SArray *pDataBlock) {
dengyihao's avatar
dengyihao 已提交
971 972 973
  if (pDataBlock == NULL) {
    return NULL;
  }
974

H
Haojun Liao 已提交
975
  char *dataBlock = NULL;
H
Haojun Liao 已提交
976
  SQuery *pQuery = pRuntimeEnv->pQuery;
977

H
Haojun Liao 已提交
978
  int32_t functionId = pQuery->pExpr1[col].base.functionId;
979
  if (functionId == TSDB_FUNC_ARITHM) {
H
Haojun Liao 已提交
980
    sas->pArithExpr = &pQuery->pExpr1[col];
981 982 983
    sas->offset     = (QUERY_IS_ASC_QUERY(pQuery))? pQuery->pos : pQuery->pos - (size - 1);
    sas->colList    = pQuery->colList;
    sas->numOfCols  = pQuery->numOfCols;
H
Haojun Liao 已提交
984

985
    // here the pQuery->colList and sas->colList are identical
S
TD-1057  
Shengliang Guan 已提交
986
    int32_t numOfCols = (int32_t)taosArrayGetSize(pDataBlock);
987
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
988
      SColumnInfo *pColMsg = &pQuery->colList[i];
989

990 991 992 993 994 995 996 997
      dataBlock = NULL;
      for (int32_t k = 0; k < numOfCols; ++k) {  //todo refactor
        SColumnInfoData *p = taosArrayGet(pDataBlock, k);
        if (pColMsg->colId == p->info.colId) {
          dataBlock = p->pData;
          break;
        }
      }
998

999
      assert(dataBlock != NULL);
1000
      sas->data[i] = dataBlock;  // start from the offset
1001
    }
1002

1003
  } else {  // other type of query function
H
Haojun Liao 已提交
1004
    SColIndex *pCol = &pQuery->pExpr1[col].base.colInfo;
H
Haojun Liao 已提交
1005
    if (TSDB_COL_IS_NORMAL_COL(pCol->flag)) {
H
Haojun Liao 已提交
1006
      SColIndex* pColIndex = &pQuery->pExpr1[col].base.colInfo;
H
Haojun Liao 已提交
1007 1008 1009 1010
      SColumnInfoData *p = taosArrayGet(pDataBlock, pColIndex->colIndex);
      assert(p->info.colId == pColIndex->colId);

      dataBlock = p->pData;
H
Haojun Liao 已提交
1011 1012
    } else {
      dataBlock = NULL;
1013 1014
    }
  }
1015

1016 1017 1018
  return dataBlock;
}

H
Haojun Liao 已提交
1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030
static void setNotInterpoWindowKey(SQLFunctionCtx* pCtx, int32_t numOfOutput, int32_t type) {
  if (type == RESULT_ROW_START_INTERP) {
    for (int32_t k = 0; k < numOfOutput; ++k) {
      pCtx[k].start.key = INT64_MIN;
    }
  } else {
    for (int32_t k = 0; k < numOfOutput; ++k) {
      pCtx[k].end.key = INT64_MIN;
    }
  }
}

1031
// window start key interpolation
H
Haojun Liao 已提交
1032
static bool setTimeWindowInterpolationStartTs(SQueryRuntimeEnv* pRuntimeEnv, int32_t pos, int32_t numOfRows, SArray* pDataBlock, TSKEY* tsCols, STimeWindow* win) {
1033 1034
  SQuery* pQuery = pRuntimeEnv->pQuery;

H
Haojun Liao 已提交
1035
  TSKEY curTs  = tsCols[pos];
1036 1037
  TSKEY lastTs = *(TSKEY *) pRuntimeEnv->prevRow[0];

H
Haojun Liao 已提交
1038 1039 1040 1041
  // lastTs == INT64_MIN and pos == 0 means this is the first time window, interpolation is not needed.
  // start exactly from this point, no need to do interpolation
  TSKEY key = QUERY_IS_ASC_QUERY(pQuery)? win->skey:win->ekey;
  if (key == curTs) {
H
Haojun Liao 已提交
1042
    setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_START_INTERP);
1043
    return true;
H
Haojun Liao 已提交
1044
  }
1045

H
Haojun Liao 已提交
1046
  if (lastTs == INT64_MIN && ((pos == 0 && QUERY_IS_ASC_QUERY(pQuery)) || (pos == (numOfRows - 1) && !QUERY_IS_ASC_QUERY(pQuery)))) {
H
Haojun Liao 已提交
1047
    setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_START_INTERP);
H
Haojun Liao 已提交
1048
    return true;
1049 1050
  }

H
Haojun Liao 已提交
1051 1052 1053
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
  TSKEY   prevTs = ((pos == 0 && QUERY_IS_ASC_QUERY(pQuery)) || (pos == (numOfRows - 1) && !QUERY_IS_ASC_QUERY(pQuery)))?
      lastTs:tsCols[pos - step];
1054

H
Haojun Liao 已提交
1055 1056 1057
  doRowwiseTimeWindowInterpolation(pRuntimeEnv, pDataBlock, prevTs, pos - step, curTs, pos, key, RESULT_ROW_START_INTERP);
  return true;
}
1058

H
Haojun Liao 已提交
1059 1060 1061
static bool setTimeWindowInterpolationEndTs(SQueryRuntimeEnv* pRuntimeEnv, int32_t endRowIndex, SArray* pDataBlock, TSKEY* tsCols, TSKEY blockEkey, STimeWindow* win) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
  TSKEY   actualEndKey = tsCols[endRowIndex];
1062

H
Haojun Liao 已提交
1063
  TSKEY key = QUERY_IS_ASC_QUERY(pQuery)? win->ekey:win->skey;
1064

H
Haojun Liao 已提交
1065 1066
  // not ended in current data block, do not invoke interpolation
  if ((key > blockEkey && QUERY_IS_ASC_QUERY(pQuery)) || (key < blockEkey && !QUERY_IS_ASC_QUERY(pQuery))) {
H
Haojun Liao 已提交
1067
    setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_END_INTERP);
H
Haojun Liao 已提交
1068 1069
    return false;
  }
1070

H
Haojun Liao 已提交
1071 1072
  // there is actual end point of current time window, no interpolation need
  if (key == actualEndKey) {
H
Haojun Liao 已提交
1073
    setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_END_INTERP);
1074 1075
    return true;
  }
H
Haojun Liao 已提交
1076 1077 1078 1079 1080 1081 1082 1083

  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
  int32_t nextRowIndex = endRowIndex + step;
  assert(nextRowIndex >= 0);

  TSKEY nextKey = tsCols[nextRowIndex];
  doRowwiseTimeWindowInterpolation(pRuntimeEnv, pDataBlock, actualEndKey, endRowIndex, nextKey, nextRowIndex, key, RESULT_ROW_END_INTERP);
  return true;
1084 1085
}

1086 1087
static void saveDataBlockLastRow(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pDataBlockInfo, SArray* pDataBlock,
    int32_t rowIndex) {
1088 1089 1090 1091 1092 1093 1094
  if (pDataBlock == NULL) {
    return;
  }

  SQuery* pQuery = pRuntimeEnv->pQuery;
  for (int32_t k = 0; k < pQuery->numOfCols; ++k) {
    SColumnInfoData *pColInfo = taosArrayGet(pDataBlock, k);
H
Haojun Liao 已提交
1095
    memcpy(pRuntimeEnv->prevRow[k], ((char*)pColInfo->pData) + (pColInfo->info.bytes * rowIndex), pColInfo->info.bytes);
1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111
  }
}

static TSKEY getStartTsKey(SQuery* pQuery, SDataBlockInfo* pDataBlockInfo, TSKEY* tsCols, int32_t step) {
  TSKEY ts = TSKEY_INITIAL_VAL;

  if (tsCols == NULL) {
    ts = QUERY_IS_ASC_QUERY(pQuery) ? pDataBlockInfo->window.skey : pDataBlockInfo->window.ekey;
  } else {
    int32_t offset = GET_COL_DATA_POS(pQuery, 0, step);
    ts = tsCols[offset];
  }

  return ts;
}

H
Haojun Liao 已提交
1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150
static void doWindowBorderInterpolation(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pDataBlockInfo, SArray *pDataBlock,
    SResultRow* pResult, STimeWindow* win, int32_t startPos, int32_t forwardStep) {
  if (!pRuntimeEnv->timeWindowInterpo) {
    return;
  }

  assert(pDataBlock != NULL);

  SQuery* pQuery = pRuntimeEnv->pQuery;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

  SColumnInfoData *pColInfo = taosArrayGet(pDataBlock, 0);

  TSKEY  *tsCols = (TSKEY *)(pColInfo->pData);
  bool done = resultRowInterpolated(pResult, RESULT_ROW_START_INTERP);
  if (!done) {
    int32_t startRowIndex = startPos;
    bool    interp = setTimeWindowInterpolationStartTs(pRuntimeEnv, startRowIndex, pDataBlockInfo->rows, pDataBlock, tsCols, win);
    if (interp) {
      setResultRowInterpo(pResult, RESULT_ROW_START_INTERP);
    }
  } else {
    setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_START_INTERP);
  }

  done = resultRowInterpolated(pResult, RESULT_ROW_END_INTERP);
  if (!done) {
    int32_t endRowIndex = startPos + (forwardStep - 1) * step;

    TSKEY endKey = QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->window.ekey:pDataBlockInfo->window.skey;
    bool  interp = setTimeWindowInterpolationEndTs(pRuntimeEnv, endRowIndex, pDataBlock, tsCols, endKey, win);
    if (interp) {
      setResultRowInterpo(pResult, RESULT_ROW_END_INTERP);
    }
  } else {
    setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_END_INTERP);
  }
}

1151
/**
H
Haojun Liao 已提交
1152
 * todo set the last value for pQueryTableInfo as in rowwiseapplyfunctions
1153 1154
 * @param pRuntimeEnv
 * @param forwardStep
1155
 * @param tsCols
1156 1157 1158 1159 1160
 * @param pFields
 * @param isDiskFileBlock
 * @return                  the incremental number of output value, so it maybe 0 for fixed number of query,
 *                          such as count/min/max etc.
 */
H
Haojun Liao 已提交
1161
static void blockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis, SDataBlockInfo *pDataBlockInfo,
H
Haojun Liao 已提交
1162
                                    SResultRowInfo *pWindowResInfo, __block_search_fn_t searchFn, SArray *pDataBlock) {
1163
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1164
  bool            masterScan = IS_MASTER_SCAN(pRuntimeEnv);
1165

1166 1167 1168
  SQuery *pQuery  = pRuntimeEnv->pQuery;
  int64_t groupId = pQuery->current->groupIndex;

1169
  TSKEY  *tsCols = NULL;
1170
  if (pDataBlock != NULL) {
1171
    SColumnInfoData *pColInfo = taosArrayGet(pDataBlock, 0);
1172
    tsCols = (TSKEY *)(pColInfo->pData);
1173
  }
1174

1175
  SQInfo *pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
1176
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
1177 1178
    char *dataBlock = getDataBlock(pRuntimeEnv, &pRuntimeEnv->sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &pRuntimeEnv->sasArray[k], k, pQInfo->vgId);
1179
  }
1180

1181
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
1182
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
H
Haojun Liao 已提交
1183 1184
    int32_t prevIndex = curTimeWindowIndex(pWindowResInfo);

1185
    TSKEY ts = getStartTsKey(pQuery, pDataBlockInfo, tsCols, step);
H
Haojun Liao 已提交
1186
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
1187

1188
    SResultRow* pResult = NULL;
1189 1190 1191
    int32_t ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, &win, masterScan, &pResult, groupId);
    if (ret != TSDB_CODE_SUCCESS || pResult == NULL) {
      goto _end;
1192
    }
1193

H
Haojun Liao 已提交
1194 1195 1196
    int32_t forwardStep = 0;
    int32_t startPos = pQuery->pos;

1197 1198
    TSKEY ekey = reviseWindowEkey(pQuery, &win);
    forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, pQuery->pos, ekey, searchFn, true);
H
Haojun Liao 已提交
1199

1200 1201 1202
    // prev time window not interpolation yet.
    int32_t curIndex = curTimeWindowIndex(pWindowResInfo);
    if (prevIndex != -1 && prevIndex < curIndex && pRuntimeEnv->timeWindowInterpo) {
1203
      for(int32_t j = prevIndex; j < curIndex; ++j) { // previous time window may be all closed already.
1204
        SResultRow *pRes = pWindowResInfo->pResult[j];
1205 1206 1207 1208
        if (pRes->closed) {
          assert(resultRowInterpolated(pRes, RESULT_ROW_START_INTERP) && resultRowInterpolated(pRes, RESULT_ROW_END_INTERP));
          continue;
        }
H
Haojun Liao 已提交
1209

1210 1211 1212
        STimeWindow w = pRes->win;
        ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, &w, masterScan, &pResult, groupId);
        assert(ret == TSDB_CODE_SUCCESS && !resultRowInterpolated(pResult, RESULT_ROW_END_INTERP));
H
Haojun Liao 已提交
1213

1214 1215 1216 1217
        int32_t p = QUERY_IS_ASC_QUERY(pQuery)? 0:pDataBlockInfo->rows-1;
        doRowwiseTimeWindowInterpolation(pRuntimeEnv, pDataBlock, *(TSKEY*) pRuntimeEnv->prevRow[0], -1,  tsCols[0], p, w.ekey, RESULT_ROW_END_INTERP);
        setResultRowInterpo(pResult, RESULT_ROW_END_INTERP);
        setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_START_INTERP);
H
Haojun Liao 已提交
1218

1219
        doBlockwiseApplyFunctions(pRuntimeEnv, &w, startPos, 0, tsCols, pDataBlockInfo->rows);
H
Haojun Liao 已提交
1220 1221
      }

1222 1223 1224
      // restore current time window
      ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, &win, masterScan, &pResult, groupId);
      assert (ret == TSDB_CODE_SUCCESS);
1225
    }
1226

1227 1228 1229
    // window start key interpolation
    doWindowBorderInterpolation(pRuntimeEnv, pDataBlockInfo, pDataBlock, pResult, &win, pQuery->pos, forwardStep);
    doBlockwiseApplyFunctions(pRuntimeEnv, &win, startPos, forwardStep, tsCols, pDataBlockInfo->rows);
1230

1231
    STimeWindow nextWin = win;
1232
    while (1) {
H
Haojun Liao 已提交
1233 1234
      int32_t prevEndPos = (forwardStep - 1) * step + startPos;
      startPos = getNextQualifiedWindow(pRuntimeEnv, &nextWin, pDataBlockInfo, tsCols, searchFn, prevEndPos);
1235 1236 1237
      if (startPos < 0) {
        break;
      }
1238

1239
      // null data, failed to allocate more memory buffer
1240 1241
      int32_t code = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, &nextWin, masterScan, &pResult, groupId);
      if (code != TSDB_CODE_SUCCESS || pResult == NULL) {
1242 1243
        break;
      }
1244

1245
      ekey = reviseWindowEkey(pQuery, &nextWin);
H
Haojun Liao 已提交
1246
      forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, startPos, ekey, searchFn, true);
1247

1248
      // window start(end) key interpolation
H
Haojun Liao 已提交
1249
      doWindowBorderInterpolation(pRuntimeEnv, pDataBlockInfo, pDataBlock, pResult, &nextWin, startPos, forwardStep);
1250
      doBlockwiseApplyFunctions(pRuntimeEnv, &nextWin, startPos, forwardStep, tsCols, pDataBlockInfo->rows);
1251
    }
1252

1253 1254 1255 1256 1257 1258
  } else {
    /*
     * the sqlfunctionCtx parameters should be set done before all functions are invoked,
     * since the selectivity + tag_prj query needs all parameters been set done.
     * tag_prj function are changed to be TSDB_FUNC_TAG_DUMMY
     */
1259
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
Haojun Liao 已提交
1260
      int32_t functionId = pQuery->pExpr1[k].base.functionId;
1261
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
1262
        pCtx[k].nStartQueryTimestamp = pDataBlockInfo->window.skey;
1263 1264 1265 1266
        aAggs[functionId].xFunction(&pCtx[k]);
      }
    }
  }
1267

1268
  _end:
1269
  if (pRuntimeEnv->timeWindowInterpo) {
1270 1271
    int32_t rowIndex = QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->rows-1:0;
    saveDataBlockLastRow(pRuntimeEnv, pDataBlockInfo, pDataBlock, rowIndex);
1272
  }
1273 1274
}

1275
static int32_t setGroupResultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, char *pData, int16_t type, int16_t bytes, int32_t groupIndex) {
1276
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
1277

H
Haojun Liao 已提交
1278
  // not assign result buffer yet, add new result buffer, TODO remove it
1279 1280 1281 1282 1283 1284 1285
  char* d = pData;
  int16_t len = bytes;
  if (type == TSDB_DATA_TYPE_BINARY||type == TSDB_DATA_TYPE_NCHAR) {
    d = varDataVal(pData);
    len = varDataLen(pData);
  } else if (type == TSDB_DATA_TYPE_FLOAT || type == TSDB_DATA_TYPE_DOUBLE) {
    SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
H
Haojun Liao 已提交
1286
    qError("QInfo:%p group by not supported on double/float columns, abort", pQInfo);
1287
    return -1;
1288 1289
  }

1290 1291
  SResultRow *pResultRow = doPrepareResultRowFromKey(pRuntimeEnv, &pRuntimeEnv->windowResInfo, d, len, true, groupIndex);
  assert (pResultRow != NULL);
1292 1293

  int64_t v = -1;
H
Haojun Liao 已提交
1294
  GET_TYPED_DATA(v, int64_t, type, pData);
1295
  if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
1296 1297 1298 1299 1300 1301
    if (pResultRow->key == NULL) {
      pResultRow->key = malloc(varDataTLen(pData));
      varDataCopy(pResultRow->key, pData);
    } else {
      assert(memcmp(pResultRow->key, pData, varDataTLen(pData)) == 0);
    }
1302
  } else {
H
Haojun Liao 已提交
1303 1304
    pResultRow->win.skey = v;
    pResultRow->win.ekey = v;
1305
  }
1306

H
Haojun Liao 已提交
1307
  if (pResultRow->pageId == -1) {
1308
    int32_t ret = addNewWindowResultBuf(pResultRow, pResultBuf, groupIndex, pRuntimeEnv->numOfRowsPerPage);
1309 1310 1311 1312
    if (ret != 0) {
      return -1;
    }
  }
1313

H
Haojun Liao 已提交
1314
  setResultOutputBuf(pRuntimeEnv, pResultRow);
1315 1316 1317 1318
  initCtxOutputBuf(pRuntimeEnv);
  return TSDB_CODE_SUCCESS;
}

1319
static char *getGroupbyColumnData(SQuery *pQuery, int16_t *type, int16_t *bytes, SArray* pDataBlock) {
1320
  SSqlGroupbyExpr *pGroupbyExpr = pQuery->pGroupbyExpr;
1321

1322
  for (int32_t k = 0; k < pGroupbyExpr->numOfGroupCols; ++k) {
1323
    SColIndex* pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, k);
Y
TD-1230  
yihaoDeng 已提交
1324
    if (TSDB_COL_IS_TAG(pColIndex->flag)) {
1325 1326
      continue;
    }
1327

1328
    int16_t colIndex = -1;
1329
    int32_t colId = pColIndex->colId;
1330

1331
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
1332
      if (pQuery->colList[i].colId == colId) {
1333 1334 1335 1336
        colIndex = i;
        break;
      }
    }
1337

1338
    assert(colIndex >= 0 && colIndex < pQuery->numOfCols);
1339

1340 1341
    *type = pQuery->colList[colIndex].type;
    *bytes = pQuery->colList[colIndex].bytes;
1342
    /*
1343 1344 1345
     *  the colIndex is acquired from the first tables of all qualified tables in this vnode during query prepare
     * stage, the remain tables may not have the required column in cache actually. So, the validation of required
     * column in cache with the corresponding schema is reinforced.
1346
     */
S
TD-1057  
Shengliang Guan 已提交
1347
    int32_t numOfCols = (int32_t)taosArrayGetSize(pDataBlock);
1348

1349 1350 1351 1352 1353 1354
    for (int32_t i = 0; i < numOfCols; ++i) {
      SColumnInfoData *p = taosArrayGet(pDataBlock, i);
      if (pColIndex->colId == p->info.colId) {
        return p->pData;
      }
    }
1355
  }
1356

1357
  return NULL;
1358 1359 1360 1361
}

static int32_t doTSJoinFilter(SQueryRuntimeEnv *pRuntimeEnv, int32_t offset) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
1362

H
Haojun Liao 已提交
1363
  STSElem         elem = tsBufGetElem(pRuntimeEnv->pTsBuf);
1364
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1365

1366
  // compare tag first
H
Haojun Liao 已提交
1367
  if (tVariantCompare(&pCtx[0].tag, elem.tag) != 0) {
1368 1369
    return TS_JOIN_TAG_NOT_EQUALS;
  }
1370

S
TD-1057  
Shengliang Guan 已提交
1371
  TSKEY key = *(TSKEY *)((char*)pCtx[0].aInputElemBuf + TSDB_KEYSIZE * offset);
1372 1373

#if defined(_DEBUG_VIEW)
1374
  printf("elem in comp ts file:%" PRId64 ", key:%" PRId64 ", tag:%"PRIu64", query order:%d, ts order:%d, traverse:%d, index:%d\n",
1375
         elem.ts, key, elem.tag.i64, pQuery->order.order, pRuntimeEnv->pTsBuf->tsOrder,
H
Haojun Liao 已提交
1376
         pRuntimeEnv->pTsBuf->cur.order, pRuntimeEnv->pTsBuf->cur.tsIndex);
1377
#endif
1378

1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (key < elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key > elem.ts) {
      assert(false);
    }
  } else {
    if (key > elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key < elem.ts) {
      assert(false);
    }
  }
1392

1393 1394 1395 1396
  return TS_JOIN_TS_EQUAL;
}

static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId) {
H
Haojun Liao 已提交
1397
  SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx);
H
hjxilinx 已提交
1398
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
1399 1400 1401 1402 1403

  // in case of timestamp column, always generated results.
  if (functionId == TSDB_FUNC_TS) {
    return true;
  }
1404

1405 1406 1407
  if (pResInfo->complete || functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TS_DUMMY) {
    return false;
  }
1408

1409
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_FIRST) {
H
hjxilinx 已提交
1410 1411
    return QUERY_IS_ASC_QUERY(pQuery);
  }
1412

H
Haojun Liao 已提交
1413
  // denote the order type
1414
  if ((functionId == TSDB_FUNC_LAST_DST || functionId == TSDB_FUNC_LAST)) {
1415
    return pCtx->param[0].i64 == pQuery->order.order;
1416 1417
  }

1418
  // in the supplementary scan, only the following functions need to be executed
H
Haojun Liao 已提交
1419
  if (IS_REVERSE_SCAN(pRuntimeEnv)) {
1420 1421
    return false;
  }
1422

1423 1424 1425
  return true;
}

H
Haojun Liao 已提交
1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442
void doRowwiseTimeWindowInterpolation(SQueryRuntimeEnv* pRuntimeEnv, SArray* pDataBlock, TSKEY prevTs, int32_t prevRowIndex, TSKEY curTs, int32_t curRowIndex, TSKEY windowKey,  int32_t type) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
    int32_t functionId = pQuery->pExpr1[k].base.functionId;
    if (functionId != TSDB_FUNC_TWA) {
      pRuntimeEnv->pCtx[k].start.key = INT64_MIN;
      continue;
    }

    SColIndex* pColIndex = &pQuery->pExpr1[k].base.colInfo;
    int16_t index = pColIndex->colIndex;
    SColumnInfoData* pColInfo = taosArrayGet(pDataBlock, index);

    assert(pColInfo->info.colId == pColIndex->colId && curTs != windowKey);
    double v1 = 0, v2 = 0, v = 0;

    if (prevRowIndex == -1) {
1443
      GET_TYPED_DATA(v1, double, pColInfo->info.type, (char *)pRuntimeEnv->prevRow[index]);
H
Haojun Liao 已提交
1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464
    } else {
      GET_TYPED_DATA(v1, double, pColInfo->info.type, (char *)pColInfo->pData + prevRowIndex * pColInfo->info.bytes);
    }

    GET_TYPED_DATA(v2, double, pColInfo->info.type, (char *)pColInfo->pData + curRowIndex * pColInfo->info.bytes);

    SPoint point1 = (SPoint){.key = prevTs, .val = &v1};
    SPoint point2 = (SPoint){.key = curTs, .val = &v2};
    SPoint point  = (SPoint){.key = windowKey, .val = &v};
    taosGetLinearInterpolationVal(TSDB_DATA_TYPE_DOUBLE, &point1, &point2, &point);

    if (type == RESULT_ROW_START_INTERP) {
      pRuntimeEnv->pCtx[k].start.key = point.key;
      pRuntimeEnv->pCtx[k].start.val = v;
    } else {
      pRuntimeEnv->pCtx[k].end.key = point.key;
      pRuntimeEnv->pCtx[k].end.val = v;
    }
  }
}

H
Haojun Liao 已提交
1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501
static void setTimeWindowSKeyInterp(SQueryRuntimeEnv* pRuntimeEnv, SArray* pDataBlock, TSKEY prevTs, int32_t prevRowIndex, TSKEY ts, int32_t offset, SResultRow* pResult, STimeWindow* win) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  bool done = resultRowInterpolated(pResult, RESULT_ROW_START_INTERP);
  if (!done) {
    TSKEY key = QUERY_IS_ASC_QUERY(pQuery)? win->skey:win->ekey;
    if (key == ts) {
      setResultRowInterpo(pResult, RESULT_ROW_START_INTERP);
    } else if (prevTs != INT64_MIN && ((QUERY_IS_ASC_QUERY(pQuery) && prevTs < key) || (!QUERY_IS_ASC_QUERY(pQuery) && prevTs > key))) {
      doRowwiseTimeWindowInterpolation(pRuntimeEnv, pDataBlock, prevTs, prevRowIndex, ts, offset, key, RESULT_ROW_START_INTERP);
      setResultRowInterpo(pResult, RESULT_ROW_START_INTERP);
    } else {
      setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_START_INTERP);
    }

    setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_END_INTERP);
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
      pRuntimeEnv->pCtx[k].size = 1;
    }
  } else {
    setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_START_INTERP);
  }
}

static void setTimeWindowEKeyInterp(SQueryRuntimeEnv* pRuntimeEnv, SArray* pDataBlock, TSKEY prevTs, int32_t prevRowIndex, TSKEY ts, int32_t offset, SResultRow* pResult, STimeWindow* win) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  TSKEY key = QUERY_IS_ASC_QUERY(pQuery)? win->ekey:win->skey;
  doRowwiseTimeWindowInterpolation(pRuntimeEnv, pDataBlock, prevTs, prevRowIndex, ts, offset, key, RESULT_ROW_END_INTERP);
  setResultRowInterpo(pResult, RESULT_ROW_END_INTERP);

  setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_START_INTERP);
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    pRuntimeEnv->pCtx[i].size = 0;
  }
}

1502
static void rowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis, SDataBlockInfo *pDataBlockInfo,
1503
                                  SResultRowInfo *pWindowResInfo, SArray *pDataBlock) {
1504
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1505
  bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);
1506

1507
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1508
  STableQueryInfo* item = pQuery->current;
H
Haojun Liao 已提交
1509

1510 1511
  int64_t groupId = item->groupIndex;

H
Haojun Liao 已提交
1512 1513 1514
  SColumnInfoData* pColumnInfoData = (SColumnInfoData *)taosArrayGet(pDataBlock, 0);

  TSKEY  *tsCols = (pColumnInfoData->info.type == TSDB_DATA_TYPE_TIMESTAMP)? (TSKEY*) pColumnInfoData->pData:NULL;
H
Haojun Liao 已提交
1515
  bool    groupbyColumnValue = pRuntimeEnv->groupbyColumn;
H
Haojun Liao 已提交
1516

1517 1518
  int16_t type = 0;
  int16_t bytes = 0;
1519

1520
  char *groupbyColumnData = NULL;
H
Haojun Liao 已提交
1521
  if (groupbyColumnValue) {
1522
    groupbyColumnData = getGroupbyColumnData(pQuery, &type, &bytes, pDataBlock);
1523
  }
1524

H
Haojun Liao 已提交
1525
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
1526
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
1527 1528
    char *dataBlock = getDataBlock(pRuntimeEnv, &pRuntimeEnv->sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &pRuntimeEnv->sasArray[k], k, pQInfo->vgId);
H
Haojun Liao 已提交
1529
    pCtx[k].size = 1;
1530
  }
1531

1532 1533
  // set the input column data
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
1534
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
H
hjxilinx 已提交
1535 1536
    pFilterInfo->pData = getDataBlockImpl(pDataBlock, pFilterInfo->info.colId);
    assert(pFilterInfo->pData != NULL);
1537
  }
1538

1539
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1540

1541 1542
  // from top to bottom in desc
  // from bottom to top in asc order
H
Haojun Liao 已提交
1543
  if (pRuntimeEnv->pTsBuf != NULL) {
1544
    qDebug("QInfo:%p process data rows, numOfRows:%d, query order:%d, ts comp order:%d", pQInfo, pDataBlockInfo->rows,
H
Haojun Liao 已提交
1545
           pQuery->order.order, pRuntimeEnv->pTsBuf->cur.order);
1546
  }
1547

H
hjxilinx 已提交
1548
  int32_t offset = -1;
H
Haojun Liao 已提交
1549
  TSKEY   prevTs = *(TSKEY*) pRuntimeEnv->prevRow[0];
H
Haojun Liao 已提交
1550
  int32_t prevRowIndex = -1;
1551

1552
  for (int32_t j = 0; j < pDataBlockInfo->rows; ++j) {
H
hjxilinx 已提交
1553
    offset = GET_COL_DATA_POS(pQuery, j, step);
1554

H
Haojun Liao 已提交
1555
    if (pRuntimeEnv->pTsBuf != NULL) {
1556 1557
      int32_t ret = doTSJoinFilter(pRuntimeEnv, offset);
      if (ret == TS_JOIN_TAG_NOT_EQUALS) {
1558
        break;
1559
      } else if (ret == TS_JOIN_TS_NOT_EQUALS) {
1560 1561
        continue;
      } else {
1562
        assert(ret == TS_JOIN_TS_EQUAL);
1563 1564
      }
    }
1565

1566
    if (pQuery->numOfFilterCols > 0 && (!doFilterData(pQuery, offset))) {
1567 1568
      continue;
    }
1569

1570
    // interval window query, decide the time window according to the primary timestamp
H
Haojun Liao 已提交
1571
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
H
Haojun Liao 已提交
1572
      int32_t prevWindowIndex = curTimeWindowIndex(pWindowResInfo);
1573
      int64_t ts = tsCols[offset];
H
Haojun Liao 已提交
1574

1575
      STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
1576

1577
      SResultRow* pResult = NULL;
1578 1579 1580
      int32_t ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, &win, masterScan, &pResult, groupId);
      if (ret != TSDB_CODE_SUCCESS || pResult == NULL) {  // null data, too many state code
        goto _end;
1581
      }
H
Haojun Liao 已提交
1582

1583 1584
      // window start key interpolation
      if (pRuntimeEnv->timeWindowInterpo) {
H
Haojun Liao 已提交
1585 1586 1587 1588 1589
        // check for the time window end time interpolation
        int32_t curIndex = curTimeWindowIndex(pWindowResInfo);
        if (prevWindowIndex != -1 && prevWindowIndex < curIndex) {
          for (int32_t k = prevWindowIndex; k < curIndex; ++k) {
            SResultRow *pRes = pWindowResInfo->pResult[k];
1590 1591 1592 1593
            if (pRes->closed) {
              assert(resultRowInterpolated(pResult, RESULT_ROW_START_INTERP) && resultRowInterpolated(pResult, RESULT_ROW_END_INTERP));
              continue;
            }
H
Haojun Liao 已提交
1594

1595
            ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, &pRes->win, masterScan, &pResult, groupId);
H
Haojun Liao 已提交
1596
            assert(ret == TSDB_CODE_SUCCESS && !resultRowInterpolated(pResult, RESULT_ROW_END_INTERP));
H
Haojun Liao 已提交
1597

H
Haojun Liao 已提交
1598
            setTimeWindowEKeyInterp(pRuntimeEnv, pDataBlock, prevTs, prevRowIndex, ts, offset, pResult, &pRes->win);
1599
            doRowwiseApplyFunctions(pRuntimeEnv, &pRes->win, offset);
H
Haojun Liao 已提交
1600 1601 1602
          }

          // restore current time window
1603
          ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, &win, masterScan, &pResult, groupId);
H
Haojun Liao 已提交
1604 1605
          if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
            continue;
1606 1607
          }
        }
1608

H
Haojun Liao 已提交
1609
        setTimeWindowSKeyInterp(pRuntimeEnv, pDataBlock, prevTs, prevRowIndex, ts, offset, pResult, &win);
1610
      }
H
Haojun Liao 已提交
1611

1612
      doRowwiseApplyFunctions(pRuntimeEnv, &win, offset);
1613
      int32_t index = pWindowResInfo->curIndex;
1614

1615 1616
      STimeWindow nextWin = win;
      while (1) {
H
Haojun Liao 已提交
1617
        getNextTimeWindow(pQuery, &nextWin);
1618
        if ((nextWin.skey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
H
Haojun Liao 已提交
1619
            (nextWin.ekey < pQuery->window.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
1620 1621
          break;
        }
1622

1623 1624 1625
        if (ts < nextWin.skey || ts > nextWin.ekey) {
          break;
        }
1626

1627
        // null data, failed to allocate more memory buffer
1628 1629
        int32_t code = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, &nextWin, masterScan, &pResult, groupId);
        if (code != TSDB_CODE_SUCCESS || pResult == NULL) {
1630 1631
          break;
        }
1632

1633 1634
        setTimeWindowSKeyInterp(pRuntimeEnv, pDataBlock, prevTs, prevRowIndex, ts, offset, pResult, &nextWin);
        doRowwiseApplyFunctions(pRuntimeEnv, &nextWin, offset);
1635
      }
1636

1637 1638
      // restore the index, add the result row will move the index
      pWindowResInfo->curIndex = index;
1639 1640
    } else {  // other queries
      // decide which group this rows belongs to according to current state value
H
Haojun Liao 已提交
1641
      if (groupbyColumnValue) {
H
hjxilinx 已提交
1642
        char *val = groupbyColumnData + bytes * offset;
1643 1644 1645
        if (isNull(val, type)) {  // ignore the null value
          continue;
        }
1646

1647
        int32_t ret = setGroupResultOutputBuf(pRuntimeEnv, val, type, bytes, item->groupIndex);
1648
        if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
1649
          longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_APP_ERROR);
1650 1651
        }
      }
1652

1653
      for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
Haojun Liao 已提交
1654
        int32_t functionId = pQuery->pExpr1[k].base.functionId;
1655 1656 1657 1658 1659
        if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
          aAggs[functionId].xFunctionF(&pCtx[k], offset);
        }
      }
    }
1660

H
Haojun Liao 已提交
1661 1662
    prevTs = tsCols[offset];
    prevRowIndex = offset;
1663

H
Haojun Liao 已提交
1664
    if (pRuntimeEnv->pTsBuf != NULL) {
1665
      // if timestamp filter list is empty, quit current query
H
Haojun Liao 已提交
1666
      if (!tsBufNextPos(pRuntimeEnv->pTsBuf)) {
H
hjxilinx 已提交
1667
        setQueryStatus(pQuery, QUERY_COMPLETED);
1668 1669 1670 1671
        break;
      }
    }
  }
H
Haojun Liao 已提交
1672

1673
  _end:
1674
  assert(offset >= 0 && tsCols != NULL);
D
fix bug  
dapan1121 已提交
1675
  if (prevTs != INT64_MIN && prevTs != *(int64_t*)pRuntimeEnv->prevRow[0]) {
1676
    assert(prevRowIndex >= 0);
1677 1678 1679 1680 1681 1682
    item->lastKey = prevTs + step;
  }

  // In case of all rows in current block are not qualified
  if (pRuntimeEnv->timeWindowInterpo && prevRowIndex != -1) {
    saveDataBlockLastRow(pRuntimeEnv, pDataBlockInfo, pDataBlock, prevRowIndex);
H
Haojun Liao 已提交
1683 1684
  }

H
Haojun Liao 已提交
1685 1686
  if (pRuntimeEnv->pTsBuf != NULL) {
    item->cur = tsBufGetCursor(pRuntimeEnv->pTsBuf);
H
Haojun Liao 已提交
1687
  }
1688 1689 1690
}

static int32_t tableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo,
H
hjxilinx 已提交
1691
                                          SDataStatis *pStatis, __block_search_fn_t searchFn, SArray *pDataBlock) {
H
hjxilinx 已提交
1692
  SQuery *pQuery = pRuntimeEnv->pQuery;
1693

1694 1695
  STableQueryInfo* pTableQueryInfo = pQuery->current;
  SResultRowInfo*  pResultRowInfo = &pRuntimeEnv->windowResInfo;
1696

H
Haojun Liao 已提交
1697
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTsBuf != NULL || pRuntimeEnv->groupbyColumn) {
1698
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pResultRowInfo, pDataBlock);
1699
  } else {
1700
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pResultRowInfo, searchFn, pDataBlock);
1701
  }
1702

1703 1704 1705 1706
  // update the lastkey of current table for projection/aggregation query
  TSKEY lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pDataBlockInfo->window.ekey : pDataBlockInfo->window.skey;
  pTableQueryInfo->lastKey = lastKey + GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

1707
  // interval query with limit applied
1708
  int32_t numOfRes = 0;
H
Haojun Liao 已提交
1709
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyColumn) {
1710
    numOfRes = pResultRowInfo->size;
1711
    updateResultRowIndex(pResultRowInfo, pTableQueryInfo, QUERY_IS_ASC_QUERY(pQuery), pRuntimeEnv->timeWindowInterpo);
H
Haojun Liao 已提交
1712
  } else { // projection query
1713
    numOfRes = (int32_t) getNumOfResult(pRuntimeEnv);
1714

1715
    // update the number of output result
H
Haojun Liao 已提交
1716
    if (numOfRes > 0 && pQuery->checkResultBuf == 1) {
1717 1718
      assert(numOfRes >= pQuery->rec.rows);
      pQuery->rec.rows = numOfRes;
1719

1720 1721 1722
      if (numOfRes >= pQuery->rec.threshold) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
      }
1723

1724 1725 1726
      if ((pQuery->limit.limit >= 0) && (pQuery->limit.limit + pQuery->limit.offset) <= numOfRes) {
        setQueryStatus(pQuery, QUERY_COMPLETED);
      }
1727

1728 1729
      if (((pTableQueryInfo->lastKey > pTableQueryInfo->win.ekey) && QUERY_IS_ASC_QUERY(pQuery)) ||
          ((pTableQueryInfo->lastKey < pTableQueryInfo->win.ekey) && (!QUERY_IS_ASC_QUERY(pQuery)))) {
1730 1731
        setQueryStatus(pQuery, QUERY_COMPLETED);
      }
H
Haojun Liao 已提交
1732
    }
1733
  }
1734

1735
  return numOfRes;
1736 1737
}

H
Haojun Liao 已提交
1738
void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
H
Haojun Liao 已提交
1739
                   SDataStatis *pStatis, void *param, int32_t colIndex, int32_t vgId) {
1740

H
Haojun Liao 已提交
1741 1742
  int32_t functionId = pQuery->pExpr1[colIndex].base.functionId;
  int32_t colId = pQuery->pExpr1[colIndex].base.colInfo.colId;
1743

1744
  SDataStatis *tpField = NULL;
H
Haojun Liao 已提交
1745
  pCtx->hasNull = hasNullValue(&pQuery->pExpr1[colIndex].base.colInfo, pStatis, &tpField);
1746
  pCtx->aInputElemBuf = inputData;
1747

1748
  if (tpField != NULL) {
H
Haojun Liao 已提交
1749
    pCtx->preAggVals.isSet  = true;
1750 1751
    pCtx->preAggVals.statis = *tpField;
    assert(pCtx->preAggVals.statis.numOfNull <= pBlockInfo->rows);
1752 1753 1754
  } else {
    pCtx->preAggVals.isSet = false;
  }
1755

H
Haojun Liao 已提交
1756 1757
  pCtx->preAggVals.dataBlockLoaded = (inputData != NULL);

H
Haojun Liao 已提交
1758 1759
  // limit/offset query will affect this value
  pCtx->size = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->rows - pQuery->pos : pQuery->pos + 1;
1760

H
Haojun Liao 已提交
1761
  // minimum value no matter ascending/descending order query
H
Haojun Liao 已提交
1762 1763
  pCtx->startOffset = QUERY_IS_ASC_QUERY(pQuery) ? pQuery->pos: (pQuery->pos - pCtx->size + 1);
  assert(pCtx->startOffset >= 0);
H
Haojun Liao 已提交
1764

1765 1766
  uint32_t status = aAggs[functionId].nStatus;
  if (((status & (TSDB_FUNCSTATE_SELECTIVITY | TSDB_FUNCSTATE_NEED_TS)) != 0) && (tsCol != NULL)) {
1767
    pCtx->ptsList = tsCol;
1768
  }
1769

1770 1771 1772 1773 1774
  if (functionId >= TSDB_FUNC_FIRST_DST && functionId <= TSDB_FUNC_LAST_DST) {
    // last_dist or first_dist function
    // store the first&last timestamp into the intermediate buffer [1], the true
    // value may be null but timestamp will never be null
  } else if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_TWA ||
1775
             functionId == TSDB_FUNC_DIFF || (functionId >= TSDB_FUNC_RATE && functionId <= TSDB_FUNC_AVG_IRATE)) {
1776
    /*
H
Haojun Liao 已提交
1777
     * least squares function needs two columns of input, currently, the x value of linear equation is set to
H
Haojun Liao 已提交
1778
     * timestamp column, and the y-value is the column specified in pQuery->pExpr1[i].colIdxInBuffer
1779 1780 1781 1782 1783
     *
     * top/bottom function needs timestamp to indicate when the
     * top/bottom values emerge, so does diff function
     */
    if (functionId == TSDB_FUNC_TWA) {
1784
       pCtx->param[1].i64 = pQuery->window.skey;
1785
       pCtx->param[1].nType = TSDB_DATA_TYPE_BIGINT;
1786
       pCtx->param[2].i64 = pQuery->window.ekey;
1787
       pCtx->param[2].nType = TSDB_DATA_TYPE_BIGINT;
1788
    }
1789

1790 1791
  } else if (functionId == TSDB_FUNC_ARITHM) {
    pCtx->param[1].pz = param;
H
Haojun Liao 已提交
1792 1793 1794 1795 1796 1797
  } else if (functionId == TSDB_FUNC_SPREAD) {  // set the statistics data for primary time stamp column
    if (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
      pCtx->preAggVals.isSet  = true;
      pCtx->preAggVals.statis.min = pBlockInfo->window.skey;
      pCtx->preAggVals.statis.max = pBlockInfo->window.ekey;
    }
1798
  } else if (functionId == TSDB_FUNC_INTERP) {
H
Haojun Liao 已提交
1799 1800 1801
    SResultRowCellInfo* pInfo = GET_RES_INFO(pCtx);

    SInterpInfoDetail *pInterpInfo = (SInterpInfoDetail *)GET_ROWCELL_INTERBUF(pInfo);
S
TD-1057  
Shengliang Guan 已提交
1802
    pInterpInfo->type = (int8_t)pQuery->fillType;
1803 1804
    pInterpInfo->ts = pQuery->window.skey;
    pInterpInfo->primaryCol = (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX);
1805

1806 1807 1808 1809
    if (pQuery->fillVal != NULL) {
      if (isNull((const char*) &pQuery->fillVal[colIndex], pCtx->inputType)) {
        pCtx->param[1].nType = TSDB_DATA_TYPE_NULL;
      } else { // todo refactor, tVariantCreateFromBinary should handle the NULL value
H
Haojun Liao 已提交
1810 1811 1812
        if (pCtx->inputType != TSDB_DATA_TYPE_BINARY && pCtx->inputType != TSDB_DATA_TYPE_NCHAR) {
          tVariantCreateFromBinary(&pCtx->param[1], (char*) &pQuery->fillVal[colIndex], pCtx->inputBytes, pCtx->inputType);
        }
1813 1814
      }
    }
H
Haojun Liao 已提交
1815
  } else if (functionId == TSDB_FUNC_TS_COMP) {
1816
    pCtx->param[0].i64 = vgId;
H
Haojun Liao 已提交
1817
    pCtx->param[0].nType = TSDB_DATA_TYPE_BIGINT;
1818
  }
1819

1820 1821 1822 1823 1824 1825
#if defined(_DEBUG_VIEW)
  //  int64_t *tsList = (int64_t *)primaryColumnData;
//  int64_t  s = tsList[0];
//  int64_t  e = tsList[size - 1];

//    if (IS_DATA_BLOCK_LOADED(blockStatus)) {
1826
//        qDebug("QInfo:%p query ts:%lld-%lld, offset:%d, rows:%d, bstatus:%d,
1827 1828 1829
//        functId:%d", GET_QINFO_ADDR(pQuery),
//               s, e, startOffset, size, blockStatus, functionId);
//    } else {
1830
//        qDebug("QInfo:%p block not loaded, bstatus:%d",
1831 1832 1833 1834 1835 1836
//        GET_QINFO_ADDR(pQuery), blockStatus);
//    }
#endif
}

// set the output buffer for the selectivity + tag query
H
Haojun Liao 已提交
1837
static int32_t setCtxTagColumnInfo(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx) {
H
Haojun Liao 已提交
1838 1839
  SQuery* pQuery = pRuntimeEnv->pQuery;

1840
  if (isSelectivityWithTagsQuery(pQuery)) {
1841
    int32_t num = 0;
1842
    int16_t tagLen = 0;
1843

1844
    SQLFunctionCtx *p = NULL;
1845
    SQLFunctionCtx **pTagCtx = calloc(pQuery->numOfOutput, POINTER_BYTES);
H
Haojun Liao 已提交
1846 1847 1848
    if (pTagCtx == NULL) {
      return TSDB_CODE_QRY_OUT_OF_MEMORY;
    }
H
Haojun Liao 已提交
1849

1850
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1851
      SSqlFuncMsg *pSqlFuncMsg = &pQuery->pExpr1[i].base;
1852

1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865
      if (pSqlFuncMsg->functionId == TSDB_FUNC_TAG_DUMMY || pSqlFuncMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        tagLen += pCtx[i].outputBytes;
        pTagCtx[num++] = &pCtx[i];
      } else if ((aAggs[pSqlFuncMsg->functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
        p = &pCtx[i];
      } else if (pSqlFuncMsg->functionId == TSDB_FUNC_TS || pSqlFuncMsg->functionId == TSDB_FUNC_TAG) {
        // tag function may be the group by tag column
        // ts may be the required primary timestamp column
        continue;
      } else {
        // the column may be the normal column, group by normal_column, the functionId is TSDB_FUNC_PRJ
      }
    }
dengyihao's avatar
dengyihao 已提交
1866 1867 1868 1869 1870
    if (p != NULL) {
      p->tagInfo.pTagCtxList = pTagCtx;
      p->tagInfo.numOfTagCols = num;
      p->tagInfo.tagsLen = tagLen;
    } else {
S
TD-1848  
Shengliang Guan 已提交
1871
      tfree(pTagCtx);
dengyihao's avatar
dengyihao 已提交
1872
    }
1873
  }
H
Haojun Liao 已提交
1874 1875

  return TSDB_CODE_SUCCESS;
1876 1877
}

1878
static int32_t setupQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv, int16_t order) {
1879
  qDebug("QInfo:%p setup runtime env", GET_QINFO_ADDR(pRuntimeEnv));
1880 1881
  SQuery *pQuery = pRuntimeEnv->pQuery;

1882
  pRuntimeEnv->pCtx = (SQLFunctionCtx *)calloc(pQuery->numOfOutput, sizeof(SQLFunctionCtx));
H
Haojun Liao 已提交
1883
  pRuntimeEnv->offset = calloc(pQuery->numOfOutput, sizeof(int16_t));
H
Haojun Liao 已提交
1884
  pRuntimeEnv->rowCellInfoOffset = calloc(pQuery->numOfOutput, sizeof(int32_t));
1885
  pRuntimeEnv->sasArray = calloc(pQuery->numOfOutput, sizeof(SArithmeticSupport));
1886

1887
  if (pRuntimeEnv->offset == NULL || pRuntimeEnv->pCtx == NULL || pRuntimeEnv->rowCellInfoOffset == NULL || pRuntimeEnv->sasArray == NULL) {
1888
    goto _clean;
1889
  }
1890

1891
  pRuntimeEnv->offset[0] = 0;
1892
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1893
    SSqlFuncMsg *pSqlFuncMsg = &pQuery->pExpr1[i].base;
1894

1895
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1896
    SColIndex* pIndex = &pSqlFuncMsg->colInfo;
1897

Y
TD-1230  
yihaoDeng 已提交
1898
    if (TSDB_COL_REQ_NULL(pIndex->flag)) {
1899 1900
      pCtx->requireNull = true;
      pIndex->flag &= ~(TSDB_COL_NULL);
Y
TD-1230  
yihaoDeng 已提交
1901
    } else {
1902 1903
      pCtx->requireNull = false;
    }
Y
TD-1230  
yihaoDeng 已提交
1904

1905 1906
    int32_t index = pSqlFuncMsg->colInfo.colIndex;
    if (TSDB_COL_IS_TAG(pIndex->flag)) {
1907
      if (pIndex->colId == TSDB_TBNAME_COLUMN_INDEX) {  // todo refactor
H
Haojun Liao 已提交
1908 1909 1910 1911
        SSchema s = tGetTableNameColumnSchema();

        pCtx->inputBytes = s.bytes;
        pCtx->inputType = s.type;
1912 1913 1914 1915
      } else {
        pCtx->inputBytes = pQuery->tagColList[index].bytes;
        pCtx->inputType = pQuery->tagColList[index].type;
      }
H
Haojun Liao 已提交
1916 1917 1918
    } else if (TSDB_COL_IS_UD_COL(pIndex->flag)) {
      pCtx->inputBytes = pSqlFuncMsg->arg[0].argBytes;
      pCtx->inputType = pSqlFuncMsg->arg[0].argType;
1919 1920 1921 1922
    } else {
      pCtx->inputBytes = pQuery->colList[index].bytes;
      pCtx->inputType = pQuery->colList[index].type;
    }
1923

1924
    assert(isValidDataType(pCtx->inputType));
1925
    pCtx->ptsOutputBuf = NULL;
1926

H
Haojun Liao 已提交
1927 1928
    pCtx->outputBytes  = pQuery->pExpr1[i].bytes;
    pCtx->outputType   = pQuery->pExpr1[i].type;
1929

H
Haojun Liao 已提交
1930 1931 1932
    pCtx->order        = pQuery->order.order;
    pCtx->functionId   = pSqlFuncMsg->functionId;
    pCtx->stableQuery  = pRuntimeEnv->stableQuery;
H
Haojun Liao 已提交
1933
    pCtx->interBufBytes = pQuery->pExpr1[i].interBytes;
1934 1935
    pCtx->start.key    = INT64_MIN;
    pCtx->end.key      = INT64_MIN;
1936

H
Haojun Liao 已提交
1937
    pCtx->numOfParams  = pSqlFuncMsg->numOfParams;
1938 1939 1940 1941
    for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
      int16_t type = pSqlFuncMsg->arg[j].argType;
      int16_t bytes = pSqlFuncMsg->arg[j].argBytes;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
1942
        tVariantCreateFromBinary(&pCtx->param[j], pSqlFuncMsg->arg[j].argValue.pz, bytes, type);
1943 1944 1945 1946
      } else {
        tVariantCreateFromBinary(&pCtx->param[j], (char *)&pSqlFuncMsg->arg[j].argValue.i64, bytes, type);
      }
    }
1947

1948 1949
    // set the order information for top/bottom query
    int32_t functionId = pCtx->functionId;
1950

1951
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
H
Haojun Liao 已提交
1952
      int32_t f = pQuery->pExpr1[0].base.functionId;
1953
      assert(f == TSDB_FUNC_TS || f == TSDB_FUNC_TS_DUMMY);
1954

1955
      pCtx->param[2].i64 = order;
1956
      pCtx->param[2].nType = TSDB_DATA_TYPE_BIGINT;
1957
      pCtx->param[3].i64 = functionId;
1958
      pCtx->param[3].nType = TSDB_DATA_TYPE_BIGINT;
1959

1960
      pCtx->param[1].i64 = pQuery->order.orderColId;
1961
    }
1962

1963 1964 1965 1966 1967 1968 1969
    if (functionId == TSDB_FUNC_ARITHM) {
      pRuntimeEnv->sasArray[i].data = calloc(pQuery->numOfCols, POINTER_BYTES);
      if (pRuntimeEnv->sasArray[i].data == NULL) {
        goto _clean;
      }
    }

1970 1971
    if (i > 0) {
      pRuntimeEnv->offset[i] = pRuntimeEnv->offset[i - 1] + pRuntimeEnv->pCtx[i - 1].outputBytes;
H
Haojun Liao 已提交
1972
      pRuntimeEnv->rowCellInfoOffset[i] = pRuntimeEnv->rowCellInfoOffset[i - 1] + sizeof(SResultRowCellInfo) + pQuery->pExpr1[i - 1].interBytes;
1973 1974
    }
  }
1975

1976 1977
  *(int64_t*) pRuntimeEnv->prevRow[0] = INT64_MIN;

1978
  // if it is group by normal column, do not set output buffer, the output buffer is pResult
1979
  // fixed output query/multi-output query for normal table
H
Haojun Liao 已提交
1980
  if (!pRuntimeEnv->groupbyColumn && !pRuntimeEnv->stableQuery && !QUERY_IS_INTERVAL_QUERY(pRuntimeEnv->pQuery)) {
H
Haojun Liao 已提交
1981
    resetDefaultResInfoOutputBuf(pRuntimeEnv);
1982
  }
1983

H
Haojun Liao 已提交
1984 1985 1986
  if (setCtxTagColumnInfo(pRuntimeEnv, pRuntimeEnv->pCtx) != TSDB_CODE_SUCCESS) {
    goto _clean;
  }
1987

H
Haojun Liao 已提交
1988
  qDebug("QInfo:%p init runtime completed", GET_QINFO_ADDR(pRuntimeEnv));
1989
  return TSDB_CODE_SUCCESS;
1990

1991
_clean:
S
TD-1848  
Shengliang Guan 已提交
1992
  tfree(pRuntimeEnv->pCtx);
H
Haojun Liao 已提交
1993 1994
  tfree(pRuntimeEnv->offset);
  tfree(pRuntimeEnv->rowCellInfoOffset);
1995
  tfree(pRuntimeEnv->sasArray);
1996

1997
  return TSDB_CODE_QRY_OUT_OF_MEMORY;
1998 1999
}

H
Haojun Liao 已提交
2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012
static void doFreeQueryHandle(SQInfo* pQInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;

  tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
  tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);

  pRuntimeEnv->pQueryHandle = NULL;
  pRuntimeEnv->pSecQueryHandle = NULL;

  SMemRef* pMemRef = &pQInfo->memRef;
  assert(pMemRef->ref == 0 && pMemRef->imem == NULL && pMemRef->mem == NULL);
}

D
fix bug  
dapan1121 已提交
2013

2014 2015 2016 2017
static void teardownQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv) {
  if (pRuntimeEnv->pQuery == NULL) {
    return;
  }
2018

2019
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
2020
  SQInfo* pQInfo = (SQInfo*) GET_QINFO_ADDR(pRuntimeEnv);
2021

2022
  qDebug("QInfo:%p teardown runtime env", pQInfo);
H
Haojun Liao 已提交
2023
  cleanupResultRowInfo(&pRuntimeEnv->windowResInfo);
2024

D
fix bug  
dapan1121 已提交
2025 2026 2027 2028 2029 2030 2031 2032 2033 2034
  if (isTSCompQuery(pQuery)) {
    FILE *f = *(FILE **)pQuery->sdata[0]->data;

    if (f) {
      fclose(f);
      *(FILE **)pQuery->sdata[0]->data = NULL;
    }
  }


2035
  if (pRuntimeEnv->pCtx != NULL) {
2036
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2037
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
2038

2039 2040 2041
      for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
        tVariantDestroy(&pCtx->param[j]);
      }
2042

2043
      tVariantDestroy(&pCtx->tag);
S
TD-1848  
Shengliang Guan 已提交
2044
      tfree(pCtx->tagInfo.pTagCtxList);
2045
    }
2046

S
TD-1848  
Shengliang Guan 已提交
2047
    tfree(pRuntimeEnv->pCtx);
2048
  }
2049

2050 2051 2052 2053 2054 2055 2056 2057
  if (pRuntimeEnv->sasArray != NULL) {
    for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
      tfree(pRuntimeEnv->sasArray[i].data);
    }

    tfree(pRuntimeEnv->sasArray);
  }

2058
  pRuntimeEnv->pFillInfo = taosDestroyFillInfo(pRuntimeEnv->pFillInfo);
2059

H
Haojun Liao 已提交
2060
  destroyResultBuf(pRuntimeEnv->pResultBuf);
H
Haojun Liao 已提交
2061
  doFreeQueryHandle(pQInfo);
2062

H
Haojun Liao 已提交
2063
  pRuntimeEnv->pTsBuf = tsBufDestroy(pRuntimeEnv->pTsBuf);
H
Haojun Liao 已提交
2064 2065

  tfree(pRuntimeEnv->offset);
S
TD-1848  
Shengliang Guan 已提交
2066 2067
  tfree(pRuntimeEnv->keyBuf);
  tfree(pRuntimeEnv->rowCellInfoOffset);
2068
  tfree(pRuntimeEnv->prevRow);
H
Haojun Liao 已提交
2069

H
Haojun Liao 已提交
2070 2071
  taosHashCleanup(pRuntimeEnv->pResultRowHashTable);
  pRuntimeEnv->pResultRowHashTable = NULL;
2072

H
Haojun Liao 已提交
2073
  pRuntimeEnv->pool = destroyResultRowPool(pRuntimeEnv->pool);
2074 2075
}

2076 2077 2078 2079
static bool needBuildResAfterQueryComplete(SQInfo* pQInfo) {
  return pQInfo->rspContext != NULL;
}

H
Haojun Liao 已提交
2080
#define IS_QUERY_KILLED(_q) ((_q)->code == TSDB_CODE_TSC_QUERY_CANCELLED)
2081

2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100
static bool isQueryKilled(SQInfo *pQInfo) {
  if (IS_QUERY_KILLED(pQInfo)) {
    return true;
  }

  // query has been executed more than tsShellActivityTimer, and the retrieve has not arrived
  // abort current query execution.
  if (pQInfo->owner != 0 && ((taosGetTimestampSec() - pQInfo->startExecTs) > getMaximumIdleDurationSec()) &&
      (!needBuildResAfterQueryComplete(pQInfo))) {

    assert(pQInfo->startExecTs != 0);
    qDebug("QInfo:%p retrieve not arrive beyond %d sec, abort current query execution, start:%"PRId64", current:%d", pQInfo, 1,
           pQInfo->startExecTs, taosGetTimestampSec());
    return true;
  }

  return false;
}

H
Haojun Liao 已提交
2101
static void setQueryKilled(SQInfo *pQInfo) { pQInfo->code = TSDB_CODE_TSC_QUERY_CANCELLED;}
H
hjxilinx 已提交
2102

H
Haojun Liao 已提交
2103 2104 2105
static bool isFixedOutputQuery(SQueryRuntimeEnv* pRuntimeEnv) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
2106 2107
    return false;
  }
2108

2109
  // Note:top/bottom query is fixed output query
H
Haojun Liao 已提交
2110
  if (pRuntimeEnv->topBotQuery || pRuntimeEnv->groupbyColumn) {
2111 2112
    return true;
  }
2113

2114
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2115
    SSqlFuncMsg *pExprMsg = &pQuery->pExpr1[i].base;
2116

2117 2118
    // ignore the ts_comp function
    if (i == 0 && pExprMsg->functionId == TSDB_FUNC_PRJ && pExprMsg->numOfParams == 1 &&
2119
        pExprMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
2120 2121
      continue;
    }
2122

2123 2124 2125
    if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
2126

2127 2128 2129 2130
    if (!IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus)) {
      return true;
    }
  }
2131

2132 2133 2134
  return false;
}

2135
// todo refactor with isLastRowQuery
2136
bool isPointInterpoQuery(SQuery *pQuery) {
2137
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2138 2139
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
    if (functionId == TSDB_FUNC_INTERP) {
2140 2141 2142
      return true;
    }
  }
2143

2144 2145 2146 2147
  return false;
}

// TODO REFACTOR:MERGE WITH CLIENT-SIDE FUNCTION
H
hjxilinx 已提交
2148
static bool isSumAvgRateQuery(SQuery *pQuery) {
2149
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2150
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
2151 2152 2153
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
2154

2155 2156 2157 2158 2159
    if (functionId == TSDB_FUNC_SUM_RATE || functionId == TSDB_FUNC_SUM_IRATE || functionId == TSDB_FUNC_AVG_RATE ||
        functionId == TSDB_FUNC_AVG_IRATE) {
      return true;
    }
  }
2160

2161 2162 2163
  return false;
}

H
hjxilinx 已提交
2164
static bool isFirstLastRowQuery(SQuery *pQuery) {
2165
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2166
    int32_t functionID = pQuery->pExpr1[i].base.functionId;
2167 2168 2169 2170
    if (functionID == TSDB_FUNC_LAST_ROW) {
      return true;
    }
  }
2171

2172 2173 2174
  return false;
}

H
hjxilinx 已提交
2175
static bool needReverseScan(SQuery *pQuery) {
2176
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2177
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
2178 2179 2180
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG) {
      continue;
    }
2181

2182
    if ((functionId == TSDB_FUNC_FIRST || functionId == TSDB_FUNC_FIRST_DST) && !QUERY_IS_ASC_QUERY(pQuery)) {
2183 2184
      return true;
    }
2185 2186

    if (functionId == TSDB_FUNC_LAST || functionId == TSDB_FUNC_LAST_DST) {
H
Haojun Liao 已提交
2187
      // the scan order to acquire the last result of the specified column
H
Haojun Liao 已提交
2188
      int32_t order = (int32_t)pQuery->pExpr1[i].base.arg->argValue.i64;
H
Haojun Liao 已提交
2189 2190 2191
      if (order != pQuery->order.order) {
        return true;
      }
2192
    }
2193
  }
2194

2195 2196
  return false;
}
H
hjxilinx 已提交
2197

H
Haojun Liao 已提交
2198 2199 2200 2201
/**
 * The following 4 kinds of query are treated as the tags query
 * tagprj, tid_tag query, count(tbname), 'abc' (user defined constant value column) query
 */
H
hjxilinx 已提交
2202 2203
static bool onlyQueryTags(SQuery* pQuery) {
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2204
    SExprInfo* pExprInfo = &pQuery->pExpr1[i];
H
Haojun Liao 已提交
2205 2206

    int32_t functionId = pExprInfo->base.functionId;
H
Haojun Liao 已提交
2207 2208 2209 2210

    if (functionId != TSDB_FUNC_TAGPRJ &&
        functionId != TSDB_FUNC_TID_TAG &&
        (!(functionId == TSDB_FUNC_COUNT && pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX)) &&
Y
TD-1230  
yihaoDeng 已提交
2211
        (!(functionId == TSDB_FUNC_PRJ && TSDB_COL_IS_UD_COL(pExprInfo->base.colInfo.flag)))) {
H
hjxilinx 已提交
2212 2213 2214
      return false;
    }
  }
2215

H
hjxilinx 已提交
2216 2217 2218
  return true;
}

2219 2220
/////////////////////////////////////////////////////////////////////////////////////////////

H
Haojun Liao 已提交
2221
void getAlignQueryTimeWindow(SQuery *pQuery, int64_t key, int64_t keyFirst, int64_t keyLast, STimeWindow *win) {
2222 2223
  assert(key >= keyFirst && key <= keyLast && pQuery->interval.sliding <= pQuery->interval.interval);
  win->skey = taosTimeTruncate(key, &pQuery->interval, pQuery->precision);
H
Haojun Liao 已提交
2224 2225

  /*
2226
   * if the realSkey > INT64_MAX - pQuery->interval.interval, the query duration between
H
Haojun Liao 已提交
2227 2228
   * realSkey and realEkey must be less than one interval.Therefore, no need to adjust the query ranges.
   */
2229 2230
  if (keyFirst > (INT64_MAX - pQuery->interval.interval)) {
    assert(keyLast - keyFirst < pQuery->interval.interval);
2231
    win->ekey = INT64_MAX;
2232 2233
  } else if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
    win->ekey = taosTimeAdd(win->skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
H
Haojun Liao 已提交
2234
  } else {
2235
    win->ekey = win->skey + pQuery->interval.interval - 1;
2236 2237 2238 2239 2240
  }
}

static void setScanLimitationByResultBuffer(SQuery *pQuery) {
  if (isTopBottomQuery(pQuery)) {
H
Haojun Liao 已提交
2241
    pQuery->checkResultBuf = 0;
2242
  } else if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
H
Haojun Liao 已提交
2243
    pQuery->checkResultBuf = 0;
2244 2245
  } else {
    bool hasMultioutput = false;
2246
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2247
      SSqlFuncMsg *pExprMsg = &pQuery->pExpr1[i].base;
2248 2249 2250
      if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        continue;
      }
2251

2252 2253 2254 2255 2256
      hasMultioutput = IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus);
      if (!hasMultioutput) {
        break;
      }
    }
2257

H
Haojun Liao 已提交
2258
    pQuery->checkResultBuf = hasMultioutput ? 1 : 0;
2259 2260 2261 2262 2263 2264
  }
}

/*
 * todo add more parameters to check soon..
 */
2265
bool colIdCheck(SQuery *pQuery) {
2266 2267
  // load data column information is incorrect
  for (int32_t i = 0; i < pQuery->numOfCols - 1; ++i) {
2268
    if (pQuery->colList[i].colId == pQuery->colList[i + 1].colId) {
S
slguan 已提交
2269
      qError("QInfo:%p invalid data load column for query", GET_QINFO_ADDR(pQuery));
2270 2271 2272
      return false;
    }
  }
2273

2274 2275 2276 2277 2278 2279
  return true;
}

// todo ignore the avg/sum/min/max/count/stddev/top/bottom functions, of which
// the scan order is not matter
static bool onlyOneQueryType(SQuery *pQuery, int32_t functId, int32_t functIdDst) {
2280
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2281
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
2282

2283 2284 2285 2286
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG ||
        functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
2287

2288 2289 2290 2291
    if (functionId != functId && functionId != functIdDst) {
      return false;
    }
  }
2292

2293 2294 2295 2296 2297 2298 2299
  return true;
}

static bool onlyFirstQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_FIRST, TSDB_FUNC_FIRST_DST); }

static bool onlyLastQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_LAST, TSDB_FUNC_LAST_DST); }

H
Haojun Liao 已提交
2300
// todo refactor, add iterator
2301 2302
static void doExchangeTimeWindow(SQInfo* pQInfo, STimeWindow* win) {
  size_t t = taosArrayGetSize(pQInfo->tableGroupInfo.pGroupList);
H
Haojun Liao 已提交
2303
  for(int32_t i = 0; i < t; ++i) {
2304
    SArray* p1 = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, i);
H
Haojun Liao 已提交
2305 2306 2307

    size_t len = taosArrayGetSize(p1);
    for(int32_t j = 0; j < len; ++j) {
2308
      STableKeyInfo* pInfo = taosArrayGet(p1, j);
H
Haojun Liao 已提交
2309

2310 2311 2312 2313
      // update the new lastkey if it is equalled to the value of the old skey
      if (pInfo->lastKey == win->ekey) {
        pInfo->lastKey = win->skey;
      }
H
Haojun Liao 已提交
2314 2315 2316 2317
    }
  }
}

2318
static void changeExecuteScanOrder(SQInfo *pQInfo, SQueryTableMsg* pQueryMsg, bool stableQuery) {
H
Haojun Liao 已提交
2319 2320
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;

2321 2322 2323
  // in case of point-interpolation query, use asc order scan
  char msg[] = "QInfo:%p scan order changed for %s query, old:%d, new:%d, qrange exchanged, old qrange:%" PRId64
               "-%" PRId64 ", new qrange:%" PRId64 "-%" PRId64;
2324

2325 2326
  // todo handle the case the the order irrelevant query type mixed up with order critical query type
  // descending order query for last_row query
H
Haojun Liao 已提交
2327
  if (isFirstLastRowQuery(pQuery)) {
H
Haojun Liao 已提交
2328
    qDebug("QInfo:%p scan order changed for last_row query, old:%d, new:%d", pQInfo, pQuery->order.order, TSDB_ORDER_ASC);
2329

H
Haojun Liao 已提交
2330
    pQuery->order.order = TSDB_ORDER_ASC;
H
Haojun Liao 已提交
2331 2332 2333
    if (pQuery->window.skey > pQuery->window.ekey) {
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
2334

2335 2336
    return;
  }
2337

H
Haojun Liao 已提交
2338
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr) && pQuery->order.order == TSDB_ORDER_DESC) {
H
Haojun Liao 已提交
2339
    pQuery->order.order = TSDB_ORDER_ASC;
H
Haojun Liao 已提交
2340 2341 2342
    if (pQuery->window.skey > pQuery->window.ekey) {
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
H
Haojun Liao 已提交
2343

2344
    doExchangeTimeWindow(pQInfo, &pQuery->window);
H
Haojun Liao 已提交
2345 2346 2347
    return;
  }

2348
  if (isPointInterpoQuery(pQuery) && pQuery->interval.interval == 0) {
H
Haojun Liao 已提交
2349 2350 2351 2352 2353
    if (!QUERY_IS_ASC_QUERY(pQuery)) {
      qDebug(msg, GET_QINFO_ADDR(pQuery), "interp", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
             pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
2354

2355
    pQuery->order.order = TSDB_ORDER_ASC;
2356 2357
    return;
  }
2358

2359
  if (pQuery->interval.interval == 0) {
2360 2361
    if (onlyFirstQuery(pQuery)) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
2362
        qDebug(msg, GET_QINFO_ADDR(pQuery), "only-first", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
2363 2364
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2365
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2366
        doExchangeTimeWindow(pQInfo, &pQuery->window);
2367
      }
2368

2369
      pQuery->order.order = TSDB_ORDER_ASC;
2370 2371
    } else if (onlyLastQuery(pQuery)) {
      if (QUERY_IS_ASC_QUERY(pQuery)) {
2372
        qDebug(msg, GET_QINFO_ADDR(pQuery), "only-last", pQuery->order.order, TSDB_ORDER_DESC, pQuery->window.skey,
2373 2374
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2375
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2376
        doExchangeTimeWindow(pQInfo, &pQuery->window);
2377
      }
2378

2379
      pQuery->order.order = TSDB_ORDER_DESC;
2380
    }
2381

2382
  } else {  // interval query
2383
    if (stableQuery) {
2384 2385
      if (onlyFirstQuery(pQuery)) {
        if (!QUERY_IS_ASC_QUERY(pQuery)) {
2386
          qDebug(msg, GET_QINFO_ADDR(pQuery), "only-first stable", pQuery->order.order, TSDB_ORDER_ASC,
2387 2388
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2389
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2390
          doExchangeTimeWindow(pQInfo, &pQuery->window);
2391
        }
2392

2393
        pQuery->order.order = TSDB_ORDER_ASC;
2394 2395
      } else if (onlyLastQuery(pQuery)) {
        if (QUERY_IS_ASC_QUERY(pQuery)) {
2396
          qDebug(msg, GET_QINFO_ADDR(pQuery), "only-last stable", pQuery->order.order, TSDB_ORDER_DESC,
2397 2398
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2399
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2400
          doExchangeTimeWindow(pQInfo, &pQuery->window);
2401
        }
2402

2403
        pQuery->order.order = TSDB_ORDER_DESC;
2404 2405 2406 2407 2408 2409 2410 2411
      }
    }
  }
}

static int32_t getInitialPageNum(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  int32_t INITIAL_RESULT_ROWS_VALUE = 16;
2412

2413
  int32_t num = 0;
2414

2415 2416
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
    num = 128;
H
Haojun Liao 已提交
2417
  } else if (QUERY_IS_INTERVAL_QUERY(pQuery)) {  // time window query, allocate one page for each table
2418
    size_t s = pQInfo->tableqinfoGroupInfo.numOfTables;
S
TD-1057  
Shengliang Guan 已提交
2419
    num = (int32_t)(MAX(s, INITIAL_RESULT_ROWS_VALUE));
2420 2421
  } else {    // for super table query, one page for each subset
    num = 1;  // pQInfo->pSidSet->numOfSubSet;
2422
  }
2423

2424 2425 2426 2427
  assert(num > 0);
  return num;
}

2428 2429
static void getIntermediateBufInfo(SQueryRuntimeEnv* pRuntimeEnv, int32_t* ps, int32_t* rowsize) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
2430
  int32_t MIN_ROWS_PER_PAGE = 4;
2431

S
TD-1057  
Shengliang Guan 已提交
2432
  *rowsize = (int32_t)(pQuery->rowSize * GET_ROW_PARAM_FOR_MULTIOUTPUT(pQuery, pRuntimeEnv->topBotQuery, pRuntimeEnv->stableQuery));
2433 2434 2435 2436
  int32_t overhead = sizeof(tFilePage);

  // one page contains at least two rows
  *ps = DEFAULT_INTERN_BUF_PAGE_SIZE;
H
Haojun Liao 已提交
2437
  while(((*rowsize) * MIN_ROWS_PER_PAGE) > (*ps) - overhead) {
2438 2439 2440 2441
    *ps = (*ps << 1u);
  }

  pRuntimeEnv->numOfRowsPerPage = ((*ps) - sizeof(tFilePage)) / (*rowsize);
2442
  assert(pRuntimeEnv->numOfRowsPerPage <= MAX_ROWS_PER_RESBUF_PAGE);
2443 2444
}

H
Haojun Liao 已提交
2445
#define IS_PREFILTER_TYPE(_t) ((_t) != TSDB_DATA_TYPE_BINARY && (_t) != TSDB_DATA_TYPE_NCHAR)
2446

H
Haojun Liao 已提交
2447 2448 2449 2450
static bool needToLoadDataBlock(SQueryRuntimeEnv* pRuntimeEnv, SDataStatis *pDataStatis, SQLFunctionCtx *pCtx,
    int32_t numOfRows) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
  if (pDataStatis == NULL || (pQuery->numOfFilterCols == 0 && (!pRuntimeEnv->topBotQuery))) {
2451 2452 2453 2454 2455
    return true;
  }

  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
2456

H
Haojun Liao 已提交
2457 2458 2459 2460 2461 2462 2463 2464
    int32_t index = -1;
    for(int32_t i = 0; i < pQuery->numOfCols; ++i) {
      if (pDataStatis[i].colId == pFilterInfo->info.colId) {
        index = i;
        break;
      }
    }

2465
    // no statistics data, load the true data block
H
Haojun Liao 已提交
2466
    if (index == -1) {
H
Haojun Liao 已提交
2467
      return true;
2468
    }
2469

2470
    // not support pre-filter operation on binary/nchar data type
H
Haojun Liao 已提交
2471
    if (!IS_PREFILTER_TYPE(pFilterInfo->info.type)) {
H
Haojun Liao 已提交
2472
      return true;
2473
    }
2474

2475
    // all data in current column are NULL, no need to check its boundary value
H
Haojun Liao 已提交
2476
    if (pDataStatis[index].numOfNull == numOfRows) {
2477 2478 2479 2480

      // if isNULL query exists, load the null data column
      for (int32_t j = 0; j < pFilterInfo->numOfFilters; ++j) {
        SColumnFilterElem *pFilterElem = &pFilterInfo->pFilters[j];
2481
        if (pFilterElem->fp == isNullOperator) {
2482 2483 2484 2485
          return true;
        }
      }

2486 2487
      continue;
    }
2488

H
Haojun Liao 已提交
2489 2490 2491
    SDataStatis* pDataBlockst = &pDataStatis[index];

    if (pFilterInfo->info.type == TSDB_DATA_TYPE_FLOAT) {
S
TD-1057  
Shengliang Guan 已提交
2492 2493
      float minval = (float)(*(double *)(&pDataBlockst->min));
      float maxval = (float)(*(double *)(&pDataBlockst->max));
2494

2495
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
2496
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&minval, (char *)&maxval, TSDB_DATA_TYPE_FLOAT)) {
2497 2498 2499 2500 2501
          return true;
        }
      }
    } else {
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
2502
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&pDataBlockst->min, (char *)&pDataBlockst->max, pFilterInfo->info.type)) {
2503 2504 2505 2506 2507
          return true;
        }
      }
    }
  }
2508

H
Haojun Liao 已提交
2509 2510
  if (pRuntimeEnv->topBotQuery) {
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2511
      int32_t functionId = pQuery->pExpr1[i].base.functionId;
H
Haojun Liao 已提交
2512 2513 2514 2515 2516
      if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
        return topbot_datablock_filter(&pCtx[i], functionId, (char *)&pDataStatis[i].min, (char *)&pDataStatis[i].max);
      }
    }
  }
2517

H
Haojun Liao 已提交
2518
  return false;
2519 2520
}

H
Haojun Liao 已提交
2521 2522 2523 2524 2525 2526 2527 2528
static bool overlapWithTimeWindow(SQuery* pQuery, SDataBlockInfo* pBlockInfo) {
  STimeWindow w = {0};

  TSKEY sk = MIN(pQuery->window.skey, pQuery->window.ekey);
  TSKEY ek = MAX(pQuery->window.skey, pQuery->window.ekey);

  if (QUERY_IS_ASC_QUERY(pQuery)) {
    getAlignQueryTimeWindow(pQuery, pBlockInfo->window.skey, sk, ek, &w);
H
Haojun Liao 已提交
2529
    assert(w.ekey >= pBlockInfo->window.skey);
H
Haojun Liao 已提交
2530

H
Haojun Liao 已提交
2531
    if (w.ekey < pBlockInfo->window.ekey) {
H
Haojun Liao 已提交
2532 2533 2534 2535
      return true;
    }

    while(1) {
H
Haojun Liao 已提交
2536
      getNextTimeWindow(pQuery, &w);
H
Haojun Liao 已提交
2537
      if (w.skey > pBlockInfo->window.ekey) {
H
Haojun Liao 已提交
2538 2539 2540
        break;
      }

H
Haojun Liao 已提交
2541 2542
      assert(w.ekey > pBlockInfo->window.ekey);
      if (w.skey <= pBlockInfo->window.ekey && w.skey > pBlockInfo->window.skey) {
H
Haojun Liao 已提交
2543 2544 2545 2546 2547
        return true;
      }
    }
  } else {
    getAlignQueryTimeWindow(pQuery, pBlockInfo->window.ekey, sk, ek, &w);
H
Haojun Liao 已提交
2548 2549 2550
    assert(w.skey <= pBlockInfo->window.ekey);

    if (w.skey > pBlockInfo->window.skey) {
H
Haojun Liao 已提交
2551 2552 2553 2554
      return true;
    }

    while(1) {
H
Haojun Liao 已提交
2555
      getNextTimeWindow(pQuery, &w);
H
Haojun Liao 已提交
2556 2557 2558 2559
      if (w.ekey < pBlockInfo->window.skey) {
        break;
      }

H
Haojun Liao 已提交
2560 2561
      assert(w.skey < pBlockInfo->window.skey);
      if (w.ekey < pBlockInfo->window.ekey && w.ekey >= pBlockInfo->window.skey) {
H
Haojun Liao 已提交
2562 2563 2564 2565 2566 2567 2568 2569
        return true;
      }
    }
  }

  return false;
}

H
Haojun Liao 已提交
2570
int32_t loadDataBlockOnDemand(SQueryRuntimeEnv *pRuntimeEnv, SResultRowInfo * pWindowResInfo, void* pQueryHandle, SDataBlockInfo* pBlockInfo, SDataStatis **pStatis, SArray** pDataBlock, uint32_t* status) {
H
Haojun Liao 已提交
2571 2572
  *status = BLK_DATA_NO_NEEDED;

2573
  SQuery *pQuery = pRuntimeEnv->pQuery;
2574 2575
  int64_t groupId = pQuery->current->groupIndex;

2576 2577
  SQueryCostInfo* pCost = &pRuntimeEnv->summary;

H
Haojun Liao 已提交
2578
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTsBuf > 0) {
2579
    *status = BLK_DATA_ALL_NEEDED;
H
Haojun Liao 已提交
2580
  } else { // check if this data block is required to load
H
Haojun Liao 已提交
2581

H
Haojun Liao 已提交
2582
    // Calculate all time windows that are overlapping or contain current data block.
2583
    // If current data block is contained by all possible time window, do not load current data block.
H
Haojun Liao 已提交
2584
    if (QUERY_IS_INTERVAL_QUERY(pQuery) && overlapWithTimeWindow(pQuery, pBlockInfo)) {
2585
      *status = BLK_DATA_ALL_NEEDED;
2586
    }
2587

2588
    if ((*status) != BLK_DATA_ALL_NEEDED) {
H
Haojun Liao 已提交
2589 2590 2591
      // the pCtx[i] result is belonged to previous time window since the outputBuf has not been set yet,
      // the filter result may be incorrect. So in case of interval query, we need to set the correct time output buffer
      if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
2592 2593
        SResultRow* pResult = NULL;

H
Haojun Liao 已提交
2594 2595 2596 2597
        bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);

        TSKEY k = QUERY_IS_ASC_QUERY(pQuery)? pBlockInfo->window.skey:pBlockInfo->window.ekey;
        STimeWindow win = getActiveTimeWindow(pWindowResInfo, k, pQuery);
2598
        if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, &win, masterScan, &pResult, groupId) != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
2599 2600 2601 2602
          // todo handle error in set result for timewindow
        }
      }

H
Haojun Liao 已提交
2603
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2604
        SSqlFuncMsg* pSqlFunc = &pQuery->pExpr1[i].base;
H
Haojun Liao 已提交
2605 2606 2607

        int32_t functionId = pSqlFunc->functionId;
        int32_t colId = pSqlFunc->colInfo.colId;
2608 2609
        (*status) |= aAggs[functionId].dataReqFunc(&pRuntimeEnv->pCtx[i], pBlockInfo->window.skey, pBlockInfo->window.ekey, colId);
        if (((*status) & BLK_DATA_ALL_NEEDED) == BLK_DATA_ALL_NEEDED) {
H
Haojun Liao 已提交
2610 2611 2612
          break;
        }
      }
2613 2614
    }
  }
2615

2616
  if ((*status) == BLK_DATA_NO_NEEDED) {
H
Haojun Liao 已提交
2617 2618
    qDebug("QInfo:%p data block discard, brange:%"PRId64 "-%"PRId64", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
           pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
2619
    pCost->discardBlocks += 1;
2620 2621 2622 2623
  } else if ((*status) == BLK_DATA_STATIS_NEEDED) {

    // this function never returns error?
    tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis);
2624
    pCost->loadBlockStatis += 1;
2625

2626
    if (*pStatis == NULL) { // data block statistics does not exist, load data block
H
Haojun Liao 已提交
2627
      *pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2628
      pCost->totalCheckedRows += pBlockInfo->rows;
2629 2630
    }
  } else {
2631
    assert((*status) == BLK_DATA_ALL_NEEDED);
2632

2633
    // load the data block statistics to perform further filter
2634
    pCost->loadBlockStatis += 1;
2635
    tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis);
2636

H
Haojun Liao 已提交
2637
    if (!needToLoadDataBlock(pRuntimeEnv, *pStatis, pRuntimeEnv->pCtx, pBlockInfo->rows)) {
2638
      // current block has been discard due to filter applied
2639
      pCost->discardBlocks += 1;
H
Haojun Liao 已提交
2640 2641
      qDebug("QInfo:%p data block discard, brange:%"PRId64 "-%"PRId64", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
          pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
2642
      (*status) = BLK_DATA_DISCARD;
2643
    }
2644

2645 2646
    pCost->totalCheckedRows += pBlockInfo->rows;
    pCost->loadBlocks += 1;
H
Haojun Liao 已提交
2647
    *pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2648 2649 2650
    if (*pDataBlock == NULL) {
      return terrno;
    }
2651
  }
2652

H
Haojun Liao 已提交
2653
  return TSDB_CODE_SUCCESS;
2654 2655
}

H
hjxilinx 已提交
2656
int32_t binarySearchForKey(char *pValue, int num, TSKEY key, int order) {
2657
  int32_t midPos = -1;
H
Haojun Liao 已提交
2658
  int32_t numOfRows;
2659

2660 2661 2662
  if (num <= 0) {
    return -1;
  }
2663

2664
  assert(order == TSDB_ORDER_ASC || order == TSDB_ORDER_DESC);
2665 2666

  TSKEY * keyList = (TSKEY *)pValue;
2667
  int32_t firstPos = 0;
2668
  int32_t lastPos = num - 1;
2669

2670
  if (order == TSDB_ORDER_DESC) {
H
hjxilinx 已提交
2671 2672 2673 2674 2675
    // find the first position which is smaller than the key
    while (1) {
      if (key >= keyList[lastPos]) return lastPos;
      if (key == keyList[firstPos]) return firstPos;
      if (key < keyList[firstPos]) return firstPos - 1;
2676

H
Haojun Liao 已提交
2677 2678
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2679

H
hjxilinx 已提交
2680 2681 2682 2683 2684 2685 2686 2687
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
2688

H
hjxilinx 已提交
2689 2690 2691 2692 2693
  } else {
    // find the first position which is bigger than the key
    while (1) {
      if (key <= keyList[firstPos]) return firstPos;
      if (key == keyList[lastPos]) return lastPos;
2694

H
hjxilinx 已提交
2695 2696 2697 2698 2699 2700 2701
      if (key > keyList[lastPos]) {
        lastPos = lastPos + 1;
        if (lastPos >= num)
          return -1;
        else
          return lastPos;
      }
2702

H
Haojun Liao 已提交
2703
      numOfRows = lastPos - firstPos + 1;
H
Haojun Liao 已提交
2704
      midPos = (numOfRows >> 1u) + firstPos;
2705

H
hjxilinx 已提交
2706 2707 2708 2709 2710 2711 2712 2713 2714
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
  }
2715

H
hjxilinx 已提交
2716 2717 2718
  return midPos;
}

2719 2720 2721 2722 2723 2724 2725 2726
static void ensureOutputBufferSimple(SQueryRuntimeEnv* pRuntimeEnv, int32_t capacity) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  if (capacity < pQuery->rec.capacity) {
    return;
  }

  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2727
    int32_t bytes = pQuery->pExpr1[i].bytes;
2728 2729 2730
    assert(bytes > 0 && capacity > 0);

    char *tmp = realloc(pQuery->sdata[i], bytes * capacity + sizeof(tFilePage));
H
Haojun Liao 已提交
2731
    if (tmp == NULL) {
H
Haojun Liao 已提交
2732
      longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
2733 2734 2735 2736 2737 2738 2739 2740
    } else {
      pQuery->sdata[i] = (tFilePage *)tmp;
    }

    // set the pCtx output buffer position
    pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data;
  }

2741
  qDebug("QInfo:%p realloc output buffer to inc output buffer from: %" PRId64 " rows to:%d rows", GET_QINFO_ADDR(pRuntimeEnv),
2742 2743 2744 2745 2746
         pQuery->rec.capacity, capacity);

  pQuery->rec.capacity = capacity;
}

2747
// TODO merge with enuserOutputBufferSimple
2748 2749 2750
static void ensureOutputBuffer(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pBlockInfo) {
  // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
2751
  if (!QUERY_IS_INTERVAL_QUERY(pQuery) && !pRuntimeEnv->groupbyColumn && !isFixedOutputQuery(pRuntimeEnv) && !isTSCompQuery(pQuery)) {
2752
    SResultRec *pRec = &pQuery->rec;
2753

2754
    if (pQuery->rec.capacity - pQuery->rec.rows < pBlockInfo->rows) {
S
TD-1057  
Shengliang Guan 已提交
2755 2756
      int32_t remain = (int32_t)(pRec->capacity - pRec->rows);
      int32_t newSize = (int32_t)(pRec->capacity + (pBlockInfo->rows - remain));
2757

2758
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2759
        int32_t bytes = pQuery->pExpr1[i].bytes;
H
Haojun Liao 已提交
2760 2761
        assert(bytes > 0 && newSize > 0);

2762
        char *tmp = realloc(pQuery->sdata[i], bytes * newSize + sizeof(tFilePage));
H
Haojun Liao 已提交
2763
        if (tmp == NULL) {
H
Haojun Liao 已提交
2764
          longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
2765
        } else {
2766
          memset(tmp + sizeof(tFilePage) + bytes * pRec->rows, 0, (size_t)((newSize - pRec->rows) * bytes));
2767 2768
          pQuery->sdata[i] = (tFilePage *)tmp;
        }
H
Haojun Liao 已提交
2769

2770 2771
        // set the pCtx output buffer position
        pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data + pRec->rows * bytes;
H
Haojun Liao 已提交
2772

H
Haojun Liao 已提交
2773
        int32_t functionId = pQuery->pExpr1[i].base.functionId;
2774 2775 2776 2777
        if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
          pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
        }
      }
H
Haojun Liao 已提交
2778

2779
      qDebug("QInfo:%p realloc output buffer, new size: %d rows, old:%" PRId64 ", remain:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv),
2780
             newSize, pRec->capacity, newSize - pRec->rows);
2781

2782 2783 2784 2785 2786
      pRec->capacity = newSize;
    }
  }
}

2787 2788 2789 2790 2791
static void doSetInitialTimewindow(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pBlockInfo) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  if (QUERY_IS_INTERVAL_QUERY(pQuery) && pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL) {
    STimeWindow w = TSWINDOW_INITIALIZER;
H
Haojun Liao 已提交
2792
    SResultRowInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
2793 2794 2795 2796

    if (QUERY_IS_ASC_QUERY(pQuery)) {
      getAlignQueryTimeWindow(pQuery, pBlockInfo->window.skey, pBlockInfo->window.skey, pQuery->window.ekey, &w);
      pWindowResInfo->prevSKey = w.skey;
2797
    } else { // the start position of the first time window in the endpoint that spreads beyond the queried last timestamp
2798 2799 2800 2801 2802 2803
      getAlignQueryTimeWindow(pQuery, pBlockInfo->window.ekey, pQuery->window.ekey, pBlockInfo->window.ekey, &w);
      pWindowResInfo->prevSKey = w.skey;
    }
  }
}

2804 2805
static int64_t doScanAllDataBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
2806
  STableQueryInfo* pTableQueryInfo = pQuery->current;
H
Haojun Liao 已提交
2807
  SQueryCostInfo*  summary  = &pRuntimeEnv->summary;
2808

2809
  qDebug("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", lastkey:%" PRId64 ", order:%d",
H
hjxilinx 已提交
2810 2811
         GET_QINFO_ADDR(pRuntimeEnv), pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, pTableQueryInfo->lastKey,
         pQuery->order.order);
2812

2813
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
H
Haojun Liao 已提交
2814
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
2815

H
Haojun Liao 已提交
2816
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
2817
  while (tsdbNextDataBlock(pQueryHandle)) {
H
Haojun Liao 已提交
2818
    summary->totalBlocks += 1;
H
Haojun Liao 已提交
2819

2820
    if (isQueryKilled(GET_QINFO_ADDR(pRuntimeEnv))) {
H
Haojun Liao 已提交
2821
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
2822
    }
2823

H
Haojun Liao 已提交
2824
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
2825
    doSetInitialTimewindow(pRuntimeEnv, &blockInfo);
2826

H
hjxilinx 已提交
2827
    // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
2828
    ensureOutputBuffer(pRuntimeEnv, &blockInfo);
2829

2830
    SDataStatis *pStatis = NULL;
2831 2832
    SArray *     pDataBlock = NULL;
    uint32_t     status = 0;
2833

H
Haojun Liao 已提交
2834
    int32_t ret = loadDataBlockOnDemand(pRuntimeEnv, &pRuntimeEnv->windowResInfo, pQueryHandle, &blockInfo, &pStatis, &pDataBlock, &status);
2835
    if (ret != TSDB_CODE_SUCCESS) {
2836 2837 2838
      break;
    }

2839 2840 2841 2842 2843 2844
    if (status == BLK_DATA_DISCARD) {
      pQuery->current->lastKey =
              QUERY_IS_ASC_QUERY(pQuery) ? blockInfo.window.ekey + step : blockInfo.window.skey + step;
      continue;
    }

H
Haojun Liao 已提交
2845 2846
    // query start position can not move into tableApplyFunctionsOnBlock due to limit/offset condition
    pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : blockInfo.rows - 1;
H
hjxilinx 已提交
2847
    int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, binarySearchForKey, pDataBlock);
2848

H
Haojun Liao 已提交
2849
    summary->totalRows += blockInfo.rows;
2850
    qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
2851
           blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes, pQuery->current->lastKey);
2852

2853 2854
    // while the output buffer is full or limit/offset is applied, query may be paused here
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL | QUERY_COMPLETED)) {
H
hjxilinx 已提交
2855
      break;
2856 2857
    }
  }
2858

H
Haojun Liao 已提交
2859 2860 2861 2862
  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }

H
hjxilinx 已提交
2863
  // if the result buffer is not full, set the query complete
2864 2865 2866
  if (!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
2867

2868 2869 2870
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
    closeAllResultRows(&pRuntimeEnv->windowResInfo);
    pRuntimeEnv->windowResInfo.curIndex = pRuntimeEnv->windowResInfo.size - 1;  // point to the last time window
2871
  }
2872

2873
  return 0;
2874 2875 2876 2877 2878 2879
}

/*
 * set tag value in SQLFunctionCtx
 * e.g.,tag information into input buffer
 */
2880
static void doSetTagValueInParam(void *tsdb, void* pTable, int32_t tagColId, tVariant *tag, int16_t type, int16_t bytes) {
H
[td-90]  
Haojun Liao 已提交
2881
  tVariantDestroy(tag);
2882

2883
  if (tagColId == TSDB_TBNAME_COLUMN_INDEX) {
2884
    char* val = tsdbGetTableName(pTable);
H
[td-90]  
Haojun Liao 已提交
2885
    assert(val != NULL);
2886

H
[td-90]  
Haojun Liao 已提交
2887
    tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), TSDB_DATA_TYPE_BINARY);
2888
  } else {
2889
    char* val = tsdbGetTableTagVal(pTable, tagColId, type, bytes);
H
[td-90]  
Haojun Liao 已提交
2890 2891 2892 2893
    if (val == NULL) {
      tag->nType = TSDB_DATA_TYPE_NULL;
      return;
    }
2894

H
hjxilinx 已提交
2895
    if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
H
Hongze Cheng 已提交
2896
      if (isNull(val, type)) {
H
Haojun Liao 已提交
2897 2898 2899 2900
        tag->nType = TSDB_DATA_TYPE_NULL;
        return;
      }

H
[td-90]  
Haojun Liao 已提交
2901
      tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), type);
H
hjxilinx 已提交
2902
    } else {
H
Haojun Liao 已提交
2903 2904 2905 2906 2907
      if (isNull(val, type)) {
        tag->nType = TSDB_DATA_TYPE_NULL;
        return;
      }

H
[td-90]  
Haojun Liao 已提交
2908
      tVariantCreateFromBinary(tag, val, bytes, type);
H
hjxilinx 已提交
2909
    }
2910
  }
2911 2912
}

2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924
static SColumnInfo* doGetTagColumnInfoById(SColumnInfo* pTagColList, int32_t numOfTags, int16_t colId) {
  assert(pTagColList != NULL && numOfTags > 0);

  for(int32_t i = 0; i < numOfTags; ++i) {
    if (pTagColList[i].colId == colId) {
      return &pTagColList[i];
    }
  }

  return NULL;
}

2925
void setTagVal(SQueryRuntimeEnv *pRuntimeEnv, void *pTable, void *tsdb) {
2926
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
2927
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
2928

H
Haojun Liao 已提交
2929
  SExprInfo *pExprInfo = &pQuery->pExpr1[0];
2930
  if (pQuery->numOfOutput == 1 && pExprInfo->base.functionId == TSDB_FUNC_TS_COMP && pRuntimeEnv->stableQuery) {
H
[td-90]  
Haojun Liao 已提交
2931
    assert(pExprInfo->base.numOfParams == 1);
H
Haojun Liao 已提交
2932

S
TD-1057  
Shengliang Guan 已提交
2933
    int16_t tagColId = (int16_t)pExprInfo->base.arg->argValue.i64;
2934
    SColumnInfo* pColInfo = doGetTagColumnInfoById(pQuery->tagColList, pQuery->numOfTags, tagColId);
H
Haojun Liao 已提交
2935

2936
    doSetTagValueInParam(tsdb, pTable, tagColId, &pRuntimeEnv->pCtx[0].tag, pColInfo->type, pColInfo->bytes);
2937 2938
  } else {
    // set tag value, by which the results are aggregated.
2939
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
H
Haojun Liao 已提交
2940
      SExprInfo* pLocalExprInfo = &pQuery->pExpr1[idx];
2941

2942
      // ts_comp column required the tag value for join filter
H
Haojun Liao 已提交
2943
      if (!TSDB_COL_IS_TAG(pLocalExprInfo->base.colInfo.flag)) {
2944 2945
        continue;
      }
2946

2947
      // todo use tag column index to optimize performance
2948
      doSetTagValueInParam(tsdb, pTable, pLocalExprInfo->base.colInfo.colId, &pRuntimeEnv->pCtx[idx].tag,
H
Haojun Liao 已提交
2949
                           pLocalExprInfo->type, pLocalExprInfo->bytes);
2950
    }
2951

2952
    // set the join tag for first column
H
[td-90]  
Haojun Liao 已提交
2953
    SSqlFuncMsg *pFuncMsg = &pExprInfo->base;
H
Haojun Liao 已提交
2954
    if ((pFuncMsg->functionId == TSDB_FUNC_TS || pFuncMsg->functionId == TSDB_FUNC_PRJ) && pRuntimeEnv->pTsBuf != NULL &&
2955
        pFuncMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
2956
      assert(pFuncMsg->numOfParams == 1);
H
Haojun Liao 已提交
2957

2958 2959
      int16_t      tagColId = (int16_t)pExprInfo->base.arg->argValue.i64;
      SColumnInfo *pColInfo = doGetTagColumnInfoById(pQuery->tagColList, pQuery->numOfTags, tagColId);
H
Haojun Liao 已提交
2960

2961
      doSetTagValueInParam(tsdb, pTable, tagColId, &pRuntimeEnv->pCtx[0].tag, pColInfo->type, pColInfo->bytes);
H
Haojun Liao 已提交
2962

2963 2964 2965 2966 2967 2968
      int16_t tagType = pRuntimeEnv->pCtx[0].tag.nType;
      if (tagType == TSDB_DATA_TYPE_BINARY || tagType == TSDB_DATA_TYPE_NCHAR) {
        qDebug("QInfo:%p set tag value for join comparison, colId:%" PRId64 ", val:%s", pQInfo,
               pExprInfo->base.arg->argValue.i64, pRuntimeEnv->pCtx[0].tag.pz);
      } else {
        qDebug("QInfo:%p set tag value for join comparison, colId:%" PRId64 ", val:%" PRId64, pQInfo,
2969
               pExprInfo->base.arg->argValue.i64, pRuntimeEnv->pCtx[0].tag.i64);
2970
      }
2971 2972 2973 2974
    }
  }
}

2975
static UNUSED_FUNC void printBinaryData(int32_t functionId, char *data, int32_t srcDataType) {
2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_LAST_DST) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_BINARY:
        printf("%" PRId64 ",%s\t", *(TSKEY *)data, (data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int8_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int16_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 ",%" PRId64 "\t", *(TSKEY *)data, *(TSKEY *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int32_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%" PRId64 ",%f\t", *(TSKEY *)data, *(float *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%" PRId64 ",%lf\t", *(TSKEY *)data, *(double *)(data + TSDB_KEYSIZE + 1));
        break;
    }
  } else if (functionId == TSDB_FUNC_AVG) {
    printf("%lf,%d\t", *(double *)data, *(int32_t *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_SPREAD) {
    printf("%lf,%lf\t", *(double *)data, *(double *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_TWA) {
    data += 1;
    printf("%lf,%" PRId64 ",%" PRId64 ",%" PRId64 "\t", *(double *)data, *(int64_t *)(data + 8),
           *(int64_t *)(data + 16), *(int64_t *)(data + 24));
  } else if (functionId == TSDB_FUNC_MIN || functionId == TSDB_FUNC_MAX) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%d\t", *(int8_t *)data);
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%d\t", *(int16_t *)data);
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 "\t", *(int64_t *)data);
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%d\t", *(int *)data);
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%f\t", *(float *)data);
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%f\t", *(float *)data);
        break;
    }
  } else if (functionId == TSDB_FUNC_SUM) {
    if (srcDataType == TSDB_DATA_TYPE_FLOAT || srcDataType == TSDB_DATA_TYPE_DOUBLE) {
      printf("%lf\t", *(float *)data);
    } else {
      printf("%" PRId64 "\t", *(int64_t *)data);
    }
  } else {
    printf("%s\t", data);
  }
}

3044
void UNUSED_FUNC displayInterResult(tFilePage **pdata, SQueryRuntimeEnv* pRuntimeEnv, int32_t numOfRows) {
3045
  SQuery* pQuery = pRuntimeEnv->pQuery;
3046
  int32_t numOfCols = pQuery->numOfOutput;
3047
  printf("super table query intermediate result, total:%d\n", numOfRows);
3048

3049 3050
  for (int32_t j = 0; j < numOfRows; ++j) {
    for (int32_t i = 0; i < numOfCols; ++i) {
3051

H
Haojun Liao 已提交
3052
      switch (pQuery->pExpr1[i].type) {
3053
        case TSDB_DATA_TYPE_BINARY: {
H
Haojun Liao 已提交
3054 3055
          int32_t type = pQuery->pExpr1[i].type;
          printBinaryData(pQuery->pExpr1[i].base.functionId, pdata[i]->data + pQuery->pExpr1[i].bytes * j,
3056 3057 3058 3059 3060
                          type);
          break;
        }
        case TSDB_DATA_TYPE_TIMESTAMP:
        case TSDB_DATA_TYPE_BIGINT:
H
Haojun Liao 已提交
3061
          printf("%" PRId64 "\t", *(int64_t *)(pdata[i]->data + pQuery->pExpr1[i].bytes * j));
3062 3063
          break;
        case TSDB_DATA_TYPE_INT:
H
Haojun Liao 已提交
3064
          printf("%d\t", *(int32_t *)(pdata[i]->data + pQuery->pExpr1[i].bytes * j));
3065 3066
          break;
        case TSDB_DATA_TYPE_FLOAT:
H
Haojun Liao 已提交
3067
          printf("%f\t", *(float *)(pdata[i]->data + pQuery->pExpr1[i].bytes * j));
3068 3069
          break;
        case TSDB_DATA_TYPE_DOUBLE:
H
Haojun Liao 已提交
3070
          printf("%lf\t", *(double *)(pdata[i]->data + pQuery->pExpr1[i].bytes * j));
3071 3072 3073 3074 3075 3076 3077 3078
          break;
      }
    }
    printf("\n");
  }
}

typedef struct SCompSupporter {
H
hjxilinx 已提交
3079
  STableQueryInfo **pTableQueryInfo;
3080 3081
  int32_t          *rowIndex;
  int32_t           order;
3082 3083 3084
} SCompSupporter;

int32_t tableResultComparFn(const void *pLeft, const void *pRight, void *param) {
3085
  int32_t left  = *(int32_t *)pLeft;
3086
  int32_t right = *(int32_t *)pRight;
3087

3088
  SCompSupporter *  supporter = (SCompSupporter *)param;
3089

3090 3091
  int32_t leftPos  = supporter->rowIndex[left];
  int32_t rightPos = supporter->rowIndex[right];
3092

3093 3094 3095 3096
  /* left source is exhausted */
  if (leftPos == -1) {
    return 1;
  }
3097

3098 3099 3100 3101
  /* right source is exhausted*/
  if (rightPos == -1) {
    return -1;
  }
3102

3103
  STableQueryInfo** pList = supporter->pTableQueryInfo;
3104

3105 3106 3107
  SResultRowInfo *pWindowResInfo1 = &(pList[left]->windowResInfo);
  SResultRow * pWindowRes1 = getResultRow(pWindowResInfo1, leftPos);
  TSKEY leftTimestamp = pWindowRes1->win.skey;
3108

3109
  SResultRowInfo *pWindowResInfo2 = &(pList[right]->windowResInfo);
3110
  SResultRow * pWindowRes2 = getResultRow(pWindowResInfo2, rightPos);
3111
  TSKEY rightTimestamp = pWindowRes2->win.skey;
3112

3113 3114 3115
  if (leftTimestamp == rightTimestamp) {
    return 0;
  }
3116

3117 3118 3119 3120 3121
  if (supporter->order == TSDB_ORDER_ASC) {
    return (leftTimestamp > rightTimestamp)? 1:-1;
  } else {
    return (leftTimestamp < rightTimestamp)? 1:-1;
  }
3122 3123
}

3124
int32_t mergeGroupResult(SQInfo *pQInfo) {
H
Haojun Liao 已提交
3125
  int64_t st = taosGetTimestampUs();
3126

3127
  SGroupResInfo* pGroupResInfo = &pQInfo->groupResInfo;
3128

3129
  int32_t numOfGroups = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
3130
  while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
3131
    SArray *group = GET_TABLEGROUP(pQInfo, pQInfo->groupIndex);
3132 3133

    int32_t ret = mergeIntoGroupResultImpl(pGroupResInfo, group, pQInfo);
3134 3135
    if (ret != TSDB_CODE_SUCCESS) {
      return ret;
3136 3137 3138
    }

    // this group generates at least one result, return results
3139 3140
    pQInfo->groupIndex += 1;
    if (taosArrayGetSize(pGroupResInfo->pRows) > 0) {
3141 3142
      break;
    }
3143

3144
    qDebug("QInfo:%p no result in group %d, continue", pQInfo, pQInfo->groupIndex - 1);
3145 3146 3147 3148
    taosArrayClear(pGroupResInfo->pRows);

    pGroupResInfo->index = 0;
    pGroupResInfo->rowId = 0;
3149
  }
3150

3151
  if (pQInfo->groupIndex == numOfGroups && taosArrayGetSize(pGroupResInfo->pRows) == 0) {
H
Haojun Liao 已提交
3152 3153 3154
    SET_STABLE_QUERY_OVER(pQInfo);
  }

H
Haojun Liao 已提交
3155 3156 3157
  int64_t elapsedTime = taosGetTimestampUs() - st;
  qDebug("QInfo:%p merge res data into group, index:%d, total group:%d, elapsed time:%" PRId64 "us", pQInfo,
         pQInfo->groupIndex - 1, numOfGroups, elapsedTime);
3158

H
Haojun Liao 已提交
3159
  pQInfo->runtimeEnv.summary.firstStageMergeTime += elapsedTime;
3160 3161 3162
  return TSDB_CODE_SUCCESS;
}

3163 3164
static int32_t doCopyToSData(SQInfo *pQInfo, SResultRow **pRows, int32_t numOfRows, int32_t* index, int32_t orderType);

3165
void copyResToQueryResultBuf(SQInfo *pQInfo, SQuery *pQuery) {
H
Haojun Liao 已提交
3166 3167
  SGroupResInfo* pGroupResInfo = &pQInfo->groupResInfo;

3168 3169
  // all results in current group have been returned to client, try next group
  if (pGroupResInfo->index >= taosArrayGetSize(pGroupResInfo->pRows)) {
3170
    // current results of group has been sent to client, try next group
3171
    if (mergeGroupResult(pQInfo) != TSDB_CODE_SUCCESS) {
3172 3173
      return;  // failed to save data in the disk
    }
3174

3175
    // check if all results has been sent to client
S
TD-1057  
Shengliang Guan 已提交
3176
    int32_t numOfGroup = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
3177
    if (taosArrayGetSize(pGroupResInfo->pRows) == 0 && pQInfo->groupIndex == numOfGroup) {
H
Haojun Liao 已提交
3178
      SET_STABLE_QUERY_OVER(pQInfo);
3179 3180
      return;
    }
3181
  }
3182

3183 3184
  int32_t size = (int32_t) taosArrayGetSize(pGroupResInfo->pRows);
  pQuery->rec.rows = doCopyToSData(pQInfo, pGroupResInfo->pRows->pData, (int32_t) size, &pGroupResInfo->index, TSDB_ORDER_ASC);
3185 3186
}

3187 3188 3189
int64_t getNumOfResultWindowRes(SQueryRuntimeEnv* pRuntimeEnv, SResultRow *pResultRow) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

3190
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
3191
    int32_t functionId = pQuery->pExpr1[j].base.functionId;
3192

3193 3194 3195 3196 3197 3198 3199
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
      continue;
    }
3200

3201
    SResultRowCellInfo *pResultInfo = getResultCell(pRuntimeEnv, pResultRow, j);
H
Haojun Liao 已提交
3202
    assert(pResultInfo != NULL);
3203

H
Haojun Liao 已提交
3204 3205
    if (pResultInfo->numOfRes > 0) {
      return pResultInfo->numOfRes;
3206 3207
    }
  }
3208

H
Haojun Liao 已提交
3209
  return 0;
3210 3211
}

3212
int32_t mergeIntoGroupResultImpl(SGroupResInfo* pGroupResInfo, SArray *pTableList, SQInfo* pQInfo) {
3213
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
3214 3215 3216
  bool ascQuery = QUERY_IS_ASC_QUERY(pRuntimeEnv->pQuery);

  int32_t code = TSDB_CODE_SUCCESS;
3217

3218 3219 3220
  int32_t *posList = NULL;
  SLoserTreeInfo *pTree = NULL;
  STableQueryInfo **pTableQueryInfoList = NULL;
3221

3222 3223 3224 3225
  size_t size = taosArrayGetSize(pTableList);
  if (pGroupResInfo->pRows == NULL) {
    pGroupResInfo->pRows = taosArrayInit(100, POINTER_BYTES);
  }
3226

3227 3228
  posList = calloc(size, sizeof(int32_t));
  pTableQueryInfoList = malloc(POINTER_BYTES * size);
3229

3230
  if (pTableQueryInfoList == NULL || posList == NULL || pGroupResInfo->pRows == NULL) {
3231
    qError("QInfo:%p failed alloc memory", pQInfo);
3232 3233
    code = TSDB_CODE_QRY_OUT_OF_MEMORY;
    goto _end;
3234 3235
  }

3236
  int32_t numOfTables = 0;
3237
  for (int32_t i = 0; i < size; ++i) {
3238 3239 3240
    STableQueryInfo *item = taosArrayGetP(pTableList, i);
    if (item->windowResInfo.size > 0) {
      pTableQueryInfoList[numOfTables++] = item;
3241 3242
    }
  }
3243

H
Haojun Liao 已提交
3244
  // there is no data in current group
3245
  // no need to merge results since only one table in each group
3246
  if (numOfTables == 0) {
3247
    goto _end;
3248
  }
3249

3250
  SCompSupporter cs = {pTableQueryInfoList, posList, pRuntimeEnv->pQuery->order.order};
3251

3252 3253 3254 3255 3256
  int32_t ret = tLoserTreeCreate(&pTree, numOfTables, &cs, tableResultComparFn);
  if (ret != TSDB_CODE_SUCCESS) {
    code = TSDB_CODE_QRY_OUT_OF_MEMORY;
    goto _end;
  }
H
Haojun Liao 已提交
3257

3258
  int64_t lastTimestamp = ascQuery? INT64_MIN:INT64_MAX;
3259
  int64_t startt = taosGetTimestampMs();
3260

3261
  while (1) {
3262
    if (isQueryKilled(pQInfo)) {
3263
      qDebug("QInfo:%p it is already killed, abort", pQInfo);
3264 3265
      code = TSDB_CODE_TSC_QUERY_CANCELLED;
      goto _end;
3266 3267
    }

3268
    int32_t tableIndex = pTree->pNode[0].index;
3269

3270 3271
    SResultRowInfo *pWindowResInfo = &pTableQueryInfoList[tableIndex]->windowResInfo;
    SResultRow  *pWindowRes = getResultRow(pWindowResInfo, cs.rowIndex[tableIndex]);
3272

3273
    int64_t num = getNumOfResultWindowRes(pRuntimeEnv, pWindowRes);
3274
    if (num <= 0) {
3275
      cs.rowIndex[tableIndex] += 1;
3276

3277 3278 3279
      if (cs.rowIndex[tableIndex] >= pWindowResInfo->size) {
        cs.rowIndex[tableIndex] = -1;
        if (--numOfTables == 0) { // all input sources are exhausted
3280 3281 3282 3283
          break;
        }
      }
    } else {
3284
      assert((pWindowRes->win.skey >= lastTimestamp && ascQuery) || (pWindowRes->win.skey <= lastTimestamp && !ascQuery));
3285

3286 3287
      if (pWindowRes->win.skey != lastTimestamp) {
        taosArrayPush(pGroupResInfo->pRows, &pWindowRes);
H
Haojun Liao 已提交
3288
        pWindowRes->numOfRows = (uint32_t) num;
3289
      }
3290

3291
      lastTimestamp = pWindowRes->win.skey;
3292

3293 3294 3295
      // move to the next row of current entry
      if ((++cs.rowIndex[tableIndex]) >= pWindowResInfo->size) {
        cs.rowIndex[tableIndex] = -1;
3296

3297
        // all input sources are exhausted
3298
        if ((--numOfTables) == 0) {
3299 3300 3301 3302
          break;
        }
      }
    }
3303

3304
    tLoserTreeAdjust(pTree, tableIndex + pTree->numOfEntries);
3305
  }
3306

3307 3308 3309
  int64_t endt = taosGetTimestampMs();

#ifdef _DEBUG_VIEW
3310
  displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
3311
#endif
3312

3313
  qDebug("QInfo:%p result merge completed for group:%d, elapsed time:%" PRId64 " ms", pQInfo, pQInfo->groupIndex, endt - startt);
3314

3315 3316
  _end:
  tfree(pTableQueryInfoList);
S
TD-1848  
Shengliang Guan 已提交
3317 3318
  tfree(posList);
  tfree(pTree);
3319

3320
  return code;
3321 3322
}

3323 3324 3325 3326
static void updateTableQueryInfoForReverseScan(SQuery *pQuery, STableQueryInfo *pTableQueryInfo) {
  if (pTableQueryInfo == NULL) {
    return;
  }
3327

3328 3329
  SWAP(pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, TSKEY);
  pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
3330

3331 3332
  SWITCH_ORDER(pTableQueryInfo->cur.order);
  pTableQueryInfo->cur.vgroupIndex = -1;
H
Haojun Liao 已提交
3333 3334 3335

  // set the index at the end of time window
  pTableQueryInfo->windowResInfo.curIndex = pTableQueryInfo->windowResInfo.size - 1;
3336 3337
}

H
Haojun Liao 已提交
3338
static void disableFuncInReverseScanImpl(SQueryRuntimeEnv* pRuntimeEnv, SResultRowInfo *pWindowResInfo, int32_t order) {
3339
  SQuery* pQuery = pRuntimeEnv->pQuery;
3340

3341
  for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
3342
    bool closed = getResultRowStatus(pWindowResInfo, i);
3343
    if (!closed) {
3344 3345
      continue;
    }
3346

3347
    SResultRow *pRow = getResultRow(pWindowResInfo, i);
3348

3349
    // open/close the specified query for each group result
3350
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
3351
      int32_t functId = pQuery->pExpr1[j].base.functionId;
3352
      SResultRowCellInfo* pInfo = getResultCell(pRuntimeEnv, pRow, j);
3353

3354 3355
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
3356
        pInfo->complete = false;
3357
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
3358
        pInfo->complete = true;
3359 3360 3361 3362 3363
      }
    }
  }
}

3364 3365
void disableFuncInReverseScan(SQInfo *pQInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
3366
  SQuery *pQuery = pRuntimeEnv->pQuery;
3367
  int32_t order = pQuery->order.order;
3368

3369
  // group by normal columns and interval query on normal table
H
Haojun Liao 已提交
3370
  SResultRowInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
3371
  if (pRuntimeEnv->groupbyColumn || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3372
    disableFuncInReverseScanImpl(pRuntimeEnv, pWindowResInfo, order);
3373
  } else {  // for simple result of table query,
3374
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {  // todo refactor
H
Haojun Liao 已提交
3375
      int32_t functId = pQuery->pExpr1[j].base.functionId;
3376

3377
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[j];
3378 3379 3380
      if (pCtx->resultInfo == NULL) {
        continue; // resultInfo is NULL, means no data checked in previous scan
      }
3381

3382 3383
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
3384 3385 3386 3387 3388 3389
        pCtx->resultInfo->complete = false;
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
        pCtx->resultInfo->complete = true;
      }
    }
  }
H
Haojun Liao 已提交
3390 3391 3392 3393
}

static void setupQueryRangeForReverseScan(SQInfo* pQInfo) {
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
S
Shengliang Guan 已提交
3394
  int32_t numOfGroups = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
H
Haojun Liao 已提交
3395

H
hjxilinx 已提交
3396
  for(int32_t i = 0; i < numOfGroups; ++i) {
H
Haojun Liao 已提交
3397
    SArray *group = GET_TABLEGROUP(pQInfo, i);
H
Haojun Liao 已提交
3398 3399
    SArray *tableKeyGroup = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, i);

H
hjxilinx 已提交
3400 3401
    size_t t = taosArrayGetSize(group);
    for (int32_t j = 0; j < t; ++j) {
3402 3403
      STableQueryInfo *pCheckInfo = taosArrayGetP(group, j);
      updateTableQueryInfoForReverseScan(pQuery, pCheckInfo);
H
Haojun Liao 已提交
3404

H
Haojun Liao 已提交
3405 3406
      // update the last key in tableKeyInfo list, the tableKeyInfo is used to build the tsdbQueryHandle and decide
      // the start check timestamp of tsdbQueryHandle
H
Haojun Liao 已提交
3407 3408 3409 3410
      STableKeyInfo *pTableKeyInfo = taosArrayGet(tableKeyGroup, j);
      pTableKeyInfo->lastKey = pCheckInfo->lastKey;

      assert(pCheckInfo->pTable == pTableKeyInfo->pTable);
H
hjxilinx 已提交
3411 3412
    }
  }
3413 3414
}

3415
void switchCtxOrder(SQueryRuntimeEnv *pRuntimeEnv) {
3416
  SQuery *pQuery = pRuntimeEnv->pQuery;
3417
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
3418
    SWITCH_ORDER(pRuntimeEnv->pCtx[i].order);
3419 3420 3421
  }
}

3422
int32_t initResultRow(SResultRow *pResultRow) {
H
Haojun Liao 已提交
3423
  pResultRow->pCellInfo = (SResultRowCellInfo*)((char*)pResultRow + sizeof(SResultRow));
H
Haojun Liao 已提交
3424 3425
  pResultRow->pageId    = -1;
  pResultRow->rowId     = -1;
B
Bomin Zhang 已提交
3426
  return TSDB_CODE_SUCCESS;
3427 3428
}

H
Haojun Liao 已提交
3429
void resetDefaultResInfoOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
3430
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
3431

H
Haojun Liao 已提交
3432
  int32_t tid = 0;
3433
  int64_t uid = 0;
H
Haojun Liao 已提交
3434
  SResultRow* pRow = doPrepareResultRowFromKey(pRuntimeEnv, &pRuntimeEnv->windowResInfo, (char *)&tid, sizeof(tid), true, uid);
3435

3436
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3437 3438
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
    pCtx->aOutputBuf = pQuery->sdata[i]->data;
3439

3440 3441 3442 3443
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT/TAGPRJ/PRJ/TAG etc.
     */
3444 3445 3446
    SResultRowCellInfo* pCellInfo = getResultCell(pRuntimeEnv, pRow, i);
    RESET_RESULT_INFO(pCellInfo);
    pCtx->resultInfo = pCellInfo;
3447

3448
    // set the timestamp output buffer for top/bottom/diff query
H
Haojun Liao 已提交
3449
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
3450 3451 3452
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3453

H
Haojun Liao 已提交
3454
    memset(pQuery->sdata[i]->data, 0, (size_t)(pQuery->pExpr1[i].bytes * pQuery->rec.capacity));
3455
  }
3456

3457 3458 3459 3460 3461
  initCtxOutputBuf(pRuntimeEnv);
}

void forwardCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, int64_t output) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3462

3463
  // reset the execution contexts
3464
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
3465
    int32_t functionId = pQuery->pExpr1[j].base.functionId;
3466
    assert(functionId != TSDB_FUNC_DIFF);
3467

3468 3469 3470 3471
    // set next output position
    if (IS_OUTER_FORWARD(aAggs[functionId].nStatus)) {
      pRuntimeEnv->pCtx[j].aOutputBuf += pRuntimeEnv->pCtx[j].outputBytes * output;
    }
3472

3473 3474 3475 3476 3477 3478 3479 3480
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      /*
       * NOTE: for top/bottom query, the value of first column of output (timestamp) are assigned
       * in the procedure of top/bottom routine
       * the output buffer in top/bottom routine is ptsOutputBuf, so we need to forward the output buffer
       *
       * diff function is handled in multi-output function
       */
S
TD-1057  
Shengliang Guan 已提交
3481
      pRuntimeEnv->pCtx[j].ptsOutputBuf = (char*)pRuntimeEnv->pCtx[j].ptsOutputBuf + TSDB_KEYSIZE * output;
3482
    }
3483

3484
    RESET_RESULT_INFO(pRuntimeEnv->pCtx[j].resultInfo);
3485 3486 3487 3488 3489
  }
}

void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3490

3491
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
3492
    int32_t functionId = pQuery->pExpr1[j].base.functionId;
3493
    pRuntimeEnv->pCtx[j].currentStage = 0;
3494

H
Haojun Liao 已提交
3495
    SResultRowCellInfo* pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
H
Haojun Liao 已提交
3496 3497 3498
    if (pResInfo->initialized) {
      continue;
    }
3499

3500 3501 3502 3503
    aAggs[functionId].init(&pRuntimeEnv->pCtx[j]);
  }
}

3504
void skipResults(SQueryRuntimeEnv *pRuntimeEnv) {
3505
  SQuery *pQuery = pRuntimeEnv->pQuery;
3506
  if (pQuery->rec.rows == 0 || pQuery->limit.offset == 0) {
3507 3508
    return;
  }
3509

3510
  if (pQuery->rec.rows <= pQuery->limit.offset) {
3511
    qDebug("QInfo:%p skip rows:%" PRId64 ", new offset:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), pQuery->rec.rows,
3512
        pQuery->limit.offset - pQuery->rec.rows);
3513

3514 3515
    pQuery->limit.offset -= pQuery->rec.rows;
    pQuery->rec.rows = 0;
3516

H
Haojun Liao 已提交
3517
    resetDefaultResInfoOutputBuf(pRuntimeEnv);
3518

H
Haojun Liao 已提交
3519
    // clear the buffer full flag if exists
3520
    CLEAR_QUERY_STATUS(pQuery, QUERY_RESBUF_FULL);
3521
  } else {
3522
    int64_t numOfSkip = pQuery->limit.offset;
3523
    pQuery->rec.rows -= numOfSkip;
3524
    pQuery->limit.offset = 0;
3525

3526
    qDebug("QInfo:%p skip row:%"PRId64", new offset:%d, numOfRows remain:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), numOfSkip,
3527
           0, pQuery->rec.rows);
3528

3529
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
3530
      int32_t functionId = pQuery->pExpr1[i].base.functionId;
3531
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
H
Haojun Liao 已提交
3532

3533
      memmove(pQuery->sdata[i]->data, (char*)pQuery->sdata[i]->data + bytes * numOfSkip, (size_t)(pQuery->rec.rows * bytes));
H
Haojun Liao 已提交
3534
      pRuntimeEnv->pCtx[i].aOutputBuf = ((char*) pQuery->sdata[i]->data) + pQuery->rec.rows * bytes;
3535

3536
      if (functionId == TSDB_FUNC_DIFF || functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
3537
        pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
3538 3539
      }
    }
3540

S
TD-1057  
Shengliang Guan 已提交
3541
    updateNumOfResult(pRuntimeEnv, (int32_t)pQuery->rec.rows);
3542 3543 3544 3545 3546 3547 3548 3549
  }
}

void setQueryStatus(SQuery *pQuery, int8_t status) {
  if (status == QUERY_NOT_COMPLETED) {
    pQuery->status = status;
  } else {
    // QUERY_NOT_COMPLETED is not compatible with any other status, so clear its position first
3550
    CLEAR_QUERY_STATUS(pQuery, QUERY_NOT_COMPLETED);
3551 3552 3553 3554 3555 3556
    pQuery->status |= status;
  }
}

bool needScanDataBlocksAgain(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3557

H
hjxilinx 已提交
3558
  bool toContinue = false;
H
Haojun Liao 已提交
3559
  if (pRuntimeEnv->groupbyColumn || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3560
    // for each group result, call the finalize function for each column
H
Haojun Liao 已提交
3561
    SResultRowInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
3562

3563
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
3564
      SResultRow *pResult = getResultRow(pWindowResInfo, i);
3565

3566
      setResultOutputBuf(pRuntimeEnv, pResult);
3567
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
3568
        int16_t functId = pQuery->pExpr1[j].base.functionId;
3569 3570 3571
        if (functId == TSDB_FUNC_TS) {
          continue;
        }
3572

3573
        aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
H
Haojun Liao 已提交
3574
        SResultRowCellInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3575

3576 3577 3578 3579
        toContinue |= (!pResInfo->complete);
      }
    }
  } else {
3580
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
3581
      int16_t functId = pQuery->pExpr1[j].base.functionId;
3582 3583 3584
      if (functId == TSDB_FUNC_TS) {
        continue;
      }
3585

3586
      aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
H
Haojun Liao 已提交
3587
      SResultRowCellInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3588

3589 3590 3591
      toContinue |= (!pResInfo->complete);
    }
  }
3592

3593 3594 3595
  return toContinue;
}

H
Haojun Liao 已提交
3596
static SQueryStatusInfo getQueryStatusInfo(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
3597
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3598
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3599

H
Haojun Liao 已提交
3600 3601
  assert((start <= pTableQueryInfo->lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (start >= pTableQueryInfo->lastKey && !QUERY_IS_ASC_QUERY(pQuery)));
3602

3603
  SQueryStatusInfo info = {
H
hjxilinx 已提交
3604
      .status      = pQuery->status,
3605
      .windowIndex = pRuntimeEnv->windowResInfo.curIndex,
H
Haojun Liao 已提交
3606
      .lastKey     = start,
3607
  };
3608

S
TD-1057  
Shengliang Guan 已提交
3609
  TIME_WINDOW_COPY(info.w, pQuery->window);
3610 3611 3612
  return info;
}

3613 3614 3615 3616
static void setEnvBeforeReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQInfo *pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery *pQuery = pRuntimeEnv->pQuery;

H
Haojun Liao 已提交
3617 3618 3619 3620
  pStatus->cur = tsBufGetCursor(pRuntimeEnv->pTsBuf);  // save the cursor
  if (pRuntimeEnv->pTsBuf) {
    SWITCH_ORDER(pRuntimeEnv->pTsBuf->cur.order);
    bool ret = tsBufNextPos(pRuntimeEnv->pTsBuf);
3621
    assert(ret);
3622
  }
3623

3624
  // reverse order time range
3625
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
3626
  SWITCH_ORDER(pQuery->order.order);
3627 3628 3629 3630 3631 3632 3633

  if (QUERY_IS_ASC_QUERY(pQuery)) {
    assert(pQuery->window.skey <= pQuery->window.ekey);
  } else {
    assert(pQuery->window.skey >= pQuery->window.ekey);
  }

3634
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
H
Haojun Liao 已提交
3635
  STsdbQueryCond cond = createTsdbQueryCond(pQuery, &pQuery->window);
S
TD-1057  
Shengliang Guan 已提交
3636

H
Haojun Liao 已提交
3637 3638 3639 3640 3641
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
  disableFuncInReverseScan(pQInfo);
  setupQueryRangeForReverseScan(pQInfo);

3642 3643 3644 3645
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
3646

H
Haojun Liao 已提交
3647
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef);
B
Bomin Zhang 已提交
3648 3649 3650
  if (pRuntimeEnv->pSecQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
3651 3652
}

3653 3654
static void clearEnvAfterReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3655
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3656

3657 3658
  SWITCH_ORDER(pQuery->order.order);
  switchCtxOrder(pRuntimeEnv);
3659

H
Haojun Liao 已提交
3660 3661 3662
  tsBufSetCursor(pRuntimeEnv->pTsBuf, &pStatus->cur);
  if (pRuntimeEnv->pTsBuf) {
    pRuntimeEnv->pTsBuf->cur.order = pQuery->order.order;
3663
  }
3664

3665
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
3666

3667
  // update the pQuery->window.skey and pQuery->window.ekey to limit the scan scope of sliding query during reverse scan
H
hjxilinx 已提交
3668
  pTableQueryInfo->lastKey = pStatus->lastKey;
3669
  pQuery->status = pStatus->status;
3670

H
hjxilinx 已提交
3671
  pTableQueryInfo->win = pStatus->w;
3672
  pQuery->window = pTableQueryInfo->win;
3673 3674
}

H
Haojun Liao 已提交
3675 3676 3677 3678 3679 3680 3681
static void restoreTimeWindow(STableGroupInfo* pTableGroupInfo, STsdbQueryCond* pCond) {
  assert(pTableGroupInfo->numOfTables == 1);
  SArray* pTableKeyGroup = taosArrayGetP(pTableGroupInfo->pGroupList, 0);
  STableKeyInfo* pKeyInfo = taosArrayGet(pTableKeyGroup, 0);
  pKeyInfo->lastKey = pCond->twindow.skey;
}

3682
void scanOneTableDataBlocks(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
H
hjxilinx 已提交
3683
  SQInfo *pQInfo = (SQInfo *) GET_QINFO_ADDR(pRuntimeEnv);
3684
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3685
  STableQueryInfo *pTableQueryInfo = pQuery->current;
3686

3687
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
3688

3689
  // store the start query position
H
Haojun Liao 已提交
3690
  SQueryStatusInfo qstatus = getQueryStatusInfo(pRuntimeEnv, start);
3691
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
3692

3693 3694 3695 3696
  if (!pRuntimeEnv->groupbyColumn && pRuntimeEnv->hasTagResults) {
    setTagVal(pRuntimeEnv, pTableQueryInfo->pTable, pQInfo->tsdb);
  }

3697 3698
  while (1) {
    doScanAllDataBlocks(pRuntimeEnv);
3699

3700 3701
    if (pRuntimeEnv->scanFlag == MASTER_SCAN) {
      qstatus.status = pQuery->status;
3702 3703

      // do nothing if no data blocks are found qualified during scan
3704
      if (qstatus.lastKey == pTableQueryInfo->lastKey) {
H
Haojun Liao 已提交
3705
        qDebug("QInfo:%p no results generated in this scan", pQInfo);
3706
      }
3707
    }
3708

3709
    if (!needScanDataBlocksAgain(pRuntimeEnv)) {
3710
      // restore the status code and jump out of loop
3711
      if (pRuntimeEnv->scanFlag == REPEAT_SCAN) {
3712
        pQuery->status = qstatus.status;
3713
      }
3714

3715 3716
      break;
    }
3717

3718 3719
    if (pRuntimeEnv->pSecQueryHandle != NULL) {
      tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
3720
    }
3721

3722
    STsdbQueryCond cond = createTsdbQueryCond(pQuery, &pQuery->window);
H
Haojun Liao 已提交
3723
    restoreTimeWindow(&pQInfo->tableGroupInfo, &cond);
H
Haojun Liao 已提交
3724
    pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef);
B
Bomin Zhang 已提交
3725 3726 3727
    if (pRuntimeEnv->pSecQueryHandle == NULL) {
      longjmp(pRuntimeEnv->env, terrno);
    }
3728

3729
    pRuntimeEnv->windowResInfo.curIndex = qstatus.windowIndex;
3730 3731
    setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
    pRuntimeEnv->scanFlag = REPEAT_SCAN;
3732

3733
    qDebug("QInfo:%p start to repeat scan data blocks due to query func required, qrange:%"PRId64"-%"PRId64, pQInfo,
3734
        cond.twindow.skey, cond.twindow.ekey);
3735

3736
    // check if query is killed or not
3737
    if (isQueryKilled(pQInfo)) {
H
Haojun Liao 已提交
3738
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
3739 3740
    }
  }
3741

H
hjxilinx 已提交
3742
  if (!needReverseScan(pQuery)) {
3743 3744
    return;
  }
3745

3746
  setEnvBeforeReverseScan(pRuntimeEnv, &qstatus);
3747

3748
  // reverse scan from current position
3749
  qDebug("QInfo:%p start to reverse scan", pQInfo);
3750
  doScanAllDataBlocks(pRuntimeEnv);
3751 3752

  clearEnvAfterReverseScan(pRuntimeEnv, &qstatus);
3753 3754
}

H
hjxilinx 已提交
3755
void finalizeQueryResult(SQueryRuntimeEnv *pRuntimeEnv) {
3756
  SQuery *pQuery = pRuntimeEnv->pQuery;
3757

H
Haojun Liao 已提交
3758
  if (pRuntimeEnv->groupbyColumn || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3759
    // for each group result, call the finalize function for each column
H
Haojun Liao 已提交
3760
    SResultRowInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
3761
    if (pRuntimeEnv->groupbyColumn) {
H
Haojun Liao 已提交
3762
      closeAllResultRows(pWindowResInfo);
3763
    }
3764

3765
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
H
Haojun Liao 已提交
3766
      SResultRow *buf = pWindowResInfo->pResult[i];
H
Haojun Liao 已提交
3767
      if (!isResultRowClosed(pWindowResInfo, i)) {
3768 3769
        continue;
      }
3770

3771
      setResultOutputBuf(pRuntimeEnv, buf);
3772

3773
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
3774
        aAggs[pQuery->pExpr1[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3775
      }
3776

3777 3778 3779 3780
      /*
       * set the number of output results for group by normal columns, the number of output rows usually is 1 except
       * the top and bottom query
       */
S
TD-1057  
Shengliang Guan 已提交
3781
      buf->numOfRows = (uint16_t)getNumOfResult(pRuntimeEnv);
3782
    }
3783

3784
  } else {
3785
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
3786
      aAggs[pQuery->pExpr1[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3787 3788 3789 3790 3791
    }
  }
}

static bool hasMainOutput(SQuery *pQuery) {
3792
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
3793
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
3794

3795 3796 3797 3798
    if (functionId != TSDB_FUNC_TS && functionId != TSDB_FUNC_TAG && functionId != TSDB_FUNC_TAGPRJ) {
      return true;
    }
  }
3799

3800 3801 3802
  return false;
}

H
Haojun Liao 已提交
3803
static STableQueryInfo *createTableQueryInfo(SQueryRuntimeEnv *pRuntimeEnv, void* pTable, STimeWindow win, void* buf) {
H
Haojun Liao 已提交
3804
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
3805

H
Haojun Liao 已提交
3806
  STableQueryInfo *pTableQueryInfo = buf;
3807

H
hjxilinx 已提交
3808 3809
  pTableQueryInfo->win = win;
  pTableQueryInfo->lastKey = win.skey;
3810

3811
  pTableQueryInfo->pTable = pTable;
3812
  pTableQueryInfo->cur.vgroupIndex = -1;
3813

H
Haojun Liao 已提交
3814
  // set more initial size of interval/groupby query
H
Haojun Liao 已提交
3815
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyColumn) {
H
Haojun Liao 已提交
3816
    int32_t initialSize = 128;
H
Haojun Liao 已提交
3817
    int32_t code = initResultRowInfo(&pTableQueryInfo->windowResInfo, initialSize, TSDB_DATA_TYPE_INT);
B
Bomin Zhang 已提交
3818 3819 3820
    if (code != TSDB_CODE_SUCCESS) {
      return NULL;
    }
H
Haojun Liao 已提交
3821
  } else { // in other aggregate query, do not initialize the windowResInfo
H
Haojun Liao 已提交
3822 3823
  }

3824 3825 3826
  return pTableQueryInfo;
}

H
Haojun Liao 已提交
3827
void destroyTableQueryInfoImpl(STableQueryInfo *pTableQueryInfo) {
3828 3829 3830
  if (pTableQueryInfo == NULL) {
    return;
  }
3831

H
Haojun Liao 已提交
3832
  tVariantDestroy(&pTableQueryInfo->tag);
H
Haojun Liao 已提交
3833
  cleanupResultRowInfo(&pTableQueryInfo->windowResInfo);
3834 3835 3836 3837 3838
}

/**
 * set output buffer for different group
 * @param pRuntimeEnv
3839
 * @param pDataBlockInfo
3840
 */
H
Haojun Liao 已提交
3841
void setExecutionContext(SQInfo *pQInfo, int32_t groupIndex, TSKEY nextKey) {
3842
  SQueryRuntimeEnv *pRuntimeEnv     = &pQInfo->runtimeEnv;
H
Haojun Liao 已提交
3843
  STableQueryInfo  *pTableQueryInfo = pRuntimeEnv->pQuery->current;
3844
  SResultRowInfo   *pWindowResInfo  = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
3845

H
Haojun Liao 已提交
3846 3847
  // lastKey needs to be updated
  pTableQueryInfo->lastKey = nextKey;
H
Haojun Liao 已提交
3848

H
Haojun Liao 已提交
3849
  if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTsBuf != NULL) {
H
Haojun Liao 已提交
3850 3851
    setAdditionalInfo(pQInfo, pTableQueryInfo->pTable, pTableQueryInfo);
  }
H
Haojun Liao 已提交
3852

H
Haojun Liao 已提交
3853 3854 3855
  if (pRuntimeEnv->prevGroupId != INT32_MIN && pRuntimeEnv->prevGroupId == groupIndex) {
    return;
  }
3856

3857
  int64_t uid = 0;
H
Haojun Liao 已提交
3858
  SResultRow *pResultRow = doPrepareResultRowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&groupIndex,
H
Haojun Liao 已提交
3859
      sizeof(groupIndex), true, uid);
3860
  assert (pResultRow != NULL);
3861

3862 3863 3864 3865
  /*
   * not assign result buffer yet, add new result buffer
   * all group belong to one result set, and each group result has different group id so set the id to be one
   */
H
Haojun Liao 已提交
3866 3867
  if (pResultRow->pageId == -1) {
    if (addNewWindowResultBuf(pResultRow, pRuntimeEnv->pResultBuf, groupIndex, pRuntimeEnv->numOfRowsPerPage) !=
3868 3869 3870 3871
        TSDB_CODE_SUCCESS) {
      return;
    }
  }
3872

H
Haojun Liao 已提交
3873 3874
  // record the current active group id
  pRuntimeEnv->prevGroupId = groupIndex;
H
Haojun Liao 已提交
3875
  setResultOutputBuf(pRuntimeEnv, pResultRow);
3876 3877 3878
  initCtxOutputBuf(pRuntimeEnv);
}

3879
void setResultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SResultRow *pResult) {
3880
  SQuery *pQuery = pRuntimeEnv->pQuery;
3881

3882
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
3883
  tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pageId);
H
Haojun Liao 已提交
3884

3885
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3886
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
H
Haojun Liao 已提交
3887
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult, page);
3888

H
Haojun Liao 已提交
3889
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
3890 3891 3892
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3893

3894
    /*
3895
     * set the output buffer information and intermediate buffer,
3896 3897
     * not all queries require the interResultBuf, such as COUNT
     */
3898
    pCtx->resultInfo = getResultCell(pRuntimeEnv, pResult, i);
3899 3900 3901
  }
}

H
Haojun Liao 已提交
3902
void setResultRowOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SResultRow *pResult) {
H
Haojun Liao 已提交
3903
  SQuery *pQuery = pRuntimeEnv->pQuery;
3904

H
Haojun Liao 已提交
3905
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
3906
  tFilePage* bufPage = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pageId);
H
Haojun Liao 已提交
3907

H
Haojun Liao 已提交
3908 3909 3910
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];

3911
    pCtx->resultInfo = getResultCell(pRuntimeEnv, pResult, i);
H
Haojun Liao 已提交
3912
    if (pCtx->resultInfo->initialized && pCtx->resultInfo->complete) {
H
Haojun Liao 已提交
3913 3914
      continue;
    }
3915

H
Haojun Liao 已提交
3916
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult, bufPage);
H
Haojun Liao 已提交
3917
    pCtx->currentStage = 0;
3918

H
Haojun Liao 已提交
3919 3920 3921 3922
    int32_t functionId = pCtx->functionId;
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3923

H
Haojun Liao 已提交
3924 3925 3926 3927 3928 3929
    if (!pCtx->resultInfo->initialized) {
      aAggs[functionId].init(pCtx);
    }
  }
}

3930
int32_t setAdditionalInfo(SQInfo *pQInfo, void* pTable, STableQueryInfo *pTableQueryInfo) {
3931
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
3932

3933
  setTagVal(pRuntimeEnv, pTable, pQInfo->tsdb);
3934

3935
  // both the master and supplement scan needs to set the correct ts comp start position
H
Haojun Liao 已提交
3936
  if (pRuntimeEnv->pTsBuf != NULL) {
H
Haojun Liao 已提交
3937 3938
    tVariant* pTag = &pRuntimeEnv->pCtx[0].tag;

H
Haojun Liao 已提交
3939 3940
    if (pTableQueryInfo->cur.vgroupIndex == -1) {
      tVariantAssign(&pTableQueryInfo->tag, pTag);
3941

H
Haojun Liao 已提交
3942
      STSElem elem = tsBufGetElemStartPos(pRuntimeEnv->pTsBuf, pQInfo->vgId, &pTableQueryInfo->tag);
H
Haojun Liao 已提交
3943

H
Haojun Liao 已提交
3944
      // failed to find data with the specified tag value and vnodeId
3945
      if (!tsBufIsValidElem(&elem)) {
H
Haojun Liao 已提交
3946 3947 3948
        if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
          qError("QInfo:%p failed to find tag:%s in ts_comp", pQInfo, pTag->pz);
        } else {
3949
          qError("QInfo:%p failed to find tag:%" PRId64 " in ts_comp", pQInfo, pTag->i64);
H
Haojun Liao 已提交
3950 3951 3952 3953 3954
        }

        return false;
      }

H
Haojun Liao 已提交
3955
      // keep the cursor info of current meter
H
Haojun Liao 已提交
3956
      pTableQueryInfo->cur = tsBufGetCursor(pRuntimeEnv->pTsBuf);
H
Haojun Liao 已提交
3957
      if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
H
Haojun Liao 已提交
3958
        qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex);
H
Haojun Liao 已提交
3959
      } else {
3960
        qDebug("QInfo:%p find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex);
H
Haojun Liao 已提交
3961 3962
      }

H
Haojun Liao 已提交
3963
    } else {
H
Haojun Liao 已提交
3964
      tsBufSetCursor(pRuntimeEnv->pTsBuf, &pTableQueryInfo->cur);
H
Haojun Liao 已提交
3965 3966

      if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
H
Haojun Liao 已提交
3967
        qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex);
H
Haojun Liao 已提交
3968
      } else {
3969
        qDebug("QInfo:%p find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex);
H
Haojun Liao 已提交
3970
      }
3971 3972
    }
  }
3973

3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985
  return 0;
}

/*
 * There are two cases to handle:
 *
 * 1. Query range is not set yet (queryRangeSet = 0). we need to set the query range info, including pQuery->lastKey,
 *    pQuery->window.skey, and pQuery->eKey.
 * 2. Query range is set and query is in progress. There may be another result with the same query ranges to be
 *    merged during merge stage. In this case, we need the pTableQueryInfo->lastResRows to decide if there
 *    is a previous result generated or not.
 */
H
hjxilinx 已提交
3986
void setIntervalQueryRange(SQInfo *pQInfo, TSKEY key) {
3987 3988
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3989
  STableQueryInfo *pTableQueryInfo = pQuery->current;
3990

3991 3992 3993
  if (pTableQueryInfo->queryRangeSet) {
    pTableQueryInfo->lastKey = key;
  } else {
3994
    pTableQueryInfo->win.skey = key;
3995
    STimeWindow win = {.skey = key, .ekey = pQuery->window.ekey};
3996

3997 3998 3999 4000 4001
    // for too small query range, no data in this interval.
    if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey < pQuery->window.skey)) ||
        (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey < pQuery->window.ekey))) {
      return;
    }
4002

4003 4004 4005
    /**
     * In handling the both ascending and descending order super table query, we need to find the first qualified
     * timestamp of this table, and then set the first qualified start timestamp.
4006
     * In ascending query, the key is the first qualified timestamp. However, in the descending order query, additional
4007 4008
     * operations involve.
     */
H
Haojun Liao 已提交
4009
    STimeWindow     w = TSWINDOW_INITIALIZER;
H
Haojun Liao 已提交
4010
    SResultRowInfo *pWindowResInfo = &pTableQueryInfo->windowResInfo;
4011

H
Haojun Liao 已提交
4012 4013
    TSKEY sk = MIN(win.skey, win.ekey);
    TSKEY ek = MAX(win.skey, win.ekey);
H
Haojun Liao 已提交
4014
    getAlignQueryTimeWindow(pQuery, win.skey, sk, ek, &w);
4015

4016 4017
    if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
4018
        assert(win.ekey == pQuery->window.ekey);
4019
      }
4020

4021
      pWindowResInfo->prevSKey = w.skey;
4022
    }
4023

4024
    pTableQueryInfo->queryRangeSet = 1;
4025
    pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
4026 4027 4028 4029
  }
}

bool requireTimestamp(SQuery *pQuery) {
4030
  for (int32_t i = 0; i < pQuery->numOfOutput; i++) {
H
Haojun Liao 已提交
4031
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044
    if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_NEED_TS) != 0) {
      return true;
    }
  }
  return false;
}

bool needPrimaryTimestampCol(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo) {
  /*
   * 1. if skey or ekey locates in this block, we need to load the timestamp column to decide the precise position
   * 2. if there are top/bottom, first_dst/last_dst functions, we need to load timestamp column in any cases;
   */
  STimeWindow *w = &pDataBlockInfo->window;
H
hjxilinx 已提交
4045
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4046

H
hjxilinx 已提交
4047
  bool loadPrimaryTS = (pTableQueryInfo->lastKey >= w->skey && pTableQueryInfo->lastKey <= w->ekey) ||
4048 4049
                       (pQuery->window.ekey >= w->skey && pQuery->window.ekey <= w->ekey) || requireTimestamp(pQuery);

4050 4051 4052
  return loadPrimaryTS;
}

4053
static int32_t doCopyToSData(SQInfo *pQInfo, SResultRow **pRows, int32_t numOfRows, int32_t *index, int32_t orderType) {
4054 4055
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4056

4057
  int32_t numOfResult = 0;
4058
  int32_t start = 0;
4059
  int32_t step = -1;
4060

4061
  qDebug("QInfo:%p start to copy data from windowResInfo to query buf", pQInfo);
4062
  if (orderType == TSDB_ORDER_ASC) {
4063
    start = (*index);
4064 4065
    step = 1;
  } else {  // desc order copy all data
4066
    start = numOfRows - (*index) - 1;
4067 4068
    step = -1;
  }
4069

H
Haojun Liao 已提交
4070 4071
  SGroupResInfo* pGroupResInfo = &pQInfo->groupResInfo;

4072 4073 4074
  for (int32_t i = start; (i < numOfRows) && (i >= 0); i += step) {
    if (pRows[i]->numOfRows == 0) {
      (*index) += 1;
4075
      pGroupResInfo->rowId = 0;
4076 4077
      continue;
    }
4078

4079
    int32_t numOfRowsToCopy = pRows[i]->numOfRows - pGroupResInfo->rowId;
4080
    int32_t oldOffset = pGroupResInfo->rowId;
4081

4082
    /*
H
Haojun Liao 已提交
4083 4084
     * current output space is not enough to accommodate all data of this page, only partial results
     * will be copied to SQuery object's result buffer
4085
     */
4086
    if (numOfRowsToCopy > pQuery->rec.capacity - numOfResult) {
H
Haojun Liao 已提交
4087
      numOfRowsToCopy = (int32_t) pQuery->rec.capacity - numOfResult;
4088
      pGroupResInfo->rowId += numOfRowsToCopy;
4089
    } else {
4090
      pGroupResInfo->rowId = 0;
4091
      (*index) += 1;
4092
    }
4093

4094
    tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pRows[i]->pageId);
H
Haojun Liao 已提交
4095

4096
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
4097
      int32_t size = pRuntimeEnv->pCtx[j].outputBytes;
4098

4099
      char *out = pQuery->sdata[j]->data + numOfResult * size;
4100
      char *in  = getPosInResultPage(pRuntimeEnv, j, pRows[i], page);
4101 4102
      memcpy(out, in + oldOffset * size, size * numOfRowsToCopy);
    }
4103

4104
    numOfResult += numOfRowsToCopy;
4105 4106 4107
    if (numOfResult == pQuery->rec.capacity) {
      break;
    }
4108
  }
4109

4110
  qDebug("QInfo:%p copy data to query buf completed", pQInfo);
4111 4112

#ifdef _DEBUG_VIEW
4113
  displayInterResult(pQuery->sdata, pRuntimeEnv, numOfResult);
4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126
#endif
  return numOfResult;
}

/**
 * copyFromWindowResToSData support copy data in ascending/descending order
 * For interval query of both super table and table, copy the data in ascending order, since the output results are
 * ordered in SWindowResutl already. While handling the group by query for both table and super table,
 * all group result are completed already.
 *
 * @param pQInfo
 * @param result
 */
H
Haojun Liao 已提交
4127
void copyFromWindowResToSData(SQInfo *pQInfo, SResultRowInfo *pResultInfo) {
4128
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
4129

4130
  int32_t orderType = (pQuery->pGroupbyExpr != NULL) ? pQuery->pGroupbyExpr->orderType : TSDB_ORDER_ASC;
4131
  int32_t numOfResult = doCopyToSData(pQInfo, pResultInfo->pResult, pResultInfo->size, &pQInfo->groupIndex, orderType);
4132

4133 4134
  pQuery->rec.rows += numOfResult;
  assert(pQuery->rec.rows <= pQuery->rec.capacity);
4135 4136
}

H
Haojun Liao 已提交
4137
static void updateWindowResNumOfRes(SQueryRuntimeEnv *pRuntimeEnv) {
4138
  SQuery *pQuery = pRuntimeEnv->pQuery;
4139

4140
  // update the number of result for each, only update the number of rows for the corresponding window result.
H
Haojun Liao 已提交
4141 4142 4143
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
    return;
  }
4144

H
Haojun Liao 已提交
4145
  for (int32_t i = 0; i < pRuntimeEnv->windowResInfo.size; ++i) {
H
Haojun Liao 已提交
4146
    SResultRow *pResult = pRuntimeEnv->windowResInfo.pResult[i];
4147

H
Haojun Liao 已提交
4148 4149 4150 4151
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
      int32_t functionId = pRuntimeEnv->pCtx[j].functionId;
      if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
        continue;
4152
      }
H
Haojun Liao 已提交
4153

4154 4155
      SResultRowCellInfo* pCell = getResultCell(pRuntimeEnv, pResult, j);
      pResult->numOfRows = (uint16_t)(MAX(pResult->numOfRows, pCell->numOfRes));
4156 4157 4158 4159
    }
  }
}

H
Haojun Liao 已提交
4160
static void stableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo, SDataStatis *pStatis,
4161
    SArray *pDataBlock, __block_search_fn_t searchFn) {
4162
  SQuery *         pQuery = pRuntimeEnv->pQuery;
4163
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4164

4165
  SResultRowInfo * pResultRowInfo = &pTableQueryInfo->windowResInfo;
H
hjxilinx 已提交
4166
  pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : pDataBlockInfo->rows - 1;
4167

H
Haojun Liao 已提交
4168
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTsBuf != NULL || pRuntimeEnv->groupbyColumn) {
4169
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pResultRowInfo, pDataBlock);
4170
  } else {
4171
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pResultRowInfo, searchFn, pDataBlock);
4172
  }
H
Haojun Liao 已提交
4173 4174

  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
4175
    updateResultRowIndex(pResultRowInfo, pTableQueryInfo, QUERY_IS_ASC_QUERY(pQuery), pRuntimeEnv->timeWindowInterpo);
H
Haojun Liao 已提交
4176
  }
4177 4178
}

4179
bool hasNotReturnedResults(SQueryRuntimeEnv* pRuntimeEnv) {
4180 4181
  SQuery *pQuery = pRuntimeEnv->pQuery;
  SFillInfo *pFillInfo = pRuntimeEnv->pFillInfo;
4182

H
Haojun Liao 已提交
4183
  if (pQuery->limit.limit > 0 && pQuery->rec.total >= pQuery->limit.limit) {
4184 4185
    return false;
  }
4186

4187
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
H
Haojun Liao 已提交
4188
    // There are results not returned to client yet, so filling applied to the remain result is required firstly.
4189
    if (taosFillHasMoreResults(pFillInfo)) {
H
Haojun Liao 已提交
4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202
      return true;
    }

    /*
     * While the code reaches here, there are no results remains now.
     * If query is not completed yet, the gaps between two results blocks need to be handled after next data block
     * is retrieved from TSDB.
     *
     * NOTE: If the result set is not the first block, the gap in front of the result set will be filled. If the result
     * set is the FIRST result block, the gap between the start time of query time window and the timestamp of the
     * first result row in the actual result set will fill nothing.
     */
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4203
      int32_t numOfTotal = (int32_t)getNumOfResultsAfterFillGap(pFillInfo, pQuery->window.ekey, (int32_t)pQuery->rec.capacity);
H
Haojun Liao 已提交
4204 4205 4206 4207 4208 4209
      return numOfTotal > 0;
    }

  } else {
    // there are results waiting for returned to client.
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED) &&
H
Haojun Liao 已提交
4210
        (pRuntimeEnv->groupbyColumn || QUERY_IS_INTERVAL_QUERY(pQuery)) &&
H
Haojun Liao 已提交
4211 4212 4213
        (pRuntimeEnv->windowResInfo.size > 0)) {
      return true;
    }
4214
  }
4215 4216

  return false;
4217 4218
}

H
Haojun Liao 已提交
4219 4220 4221 4222
static int16_t getNumOfFinalResCol(SQuery* pQuery) {
  return pQuery->pExpr2 == NULL? pQuery->numOfOutput:pQuery->numOfExpr2;
}

4223
static void doCopyQueryResultToMsg(SQInfo *pQInfo, int32_t numOfRows, char *data) {
4224
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
4225

H
Haojun Liao 已提交
4226 4227
  if (pQuery->pExpr2 == NULL) {
    for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
H
Haojun Liao 已提交
4228
      int32_t bytes = pQuery->pExpr1[col].bytes;
4229

H
Haojun Liao 已提交
4230 4231 4232 4233 4234 4235 4236 4237 4238 4239
      memmove(data, pQuery->sdata[col]->data, bytes * numOfRows);
      data += bytes * numOfRows;
    }
  } else {
    for (int32_t col = 0; col < pQuery->numOfExpr2; ++col) {
      int32_t bytes = pQuery->pExpr2[col].bytes;

      memmove(data, pQuery->sdata[col]->data, bytes * numOfRows);
      data += bytes * numOfRows;
    }
4240
  }
4241

H
Haojun Liao 已提交
4242
  int32_t numOfTables = (int32_t) taosHashGetSize(pQInfo->arrTableIdInfo);
weixin_48148422's avatar
weixin_48148422 已提交
4243 4244
  *(int32_t*)data = htonl(numOfTables);
  data += sizeof(int32_t);
4245

4246
  int32_t total = 0;
4247
  STableIdInfo* item = taosHashIterate(pQInfo->arrTableIdInfo, NULL);
4248

4249
  while(item) {
weixin_48148422's avatar
weixin_48148422 已提交
4250
    STableIdInfo* pDst = (STableIdInfo*)data;
4251 4252 4253 4254
    pDst->uid = htobe64(item->uid);
    pDst->tid = htonl(item->tid);
    pDst->key = htobe64(item->key);

weixin_48148422's avatar
weixin_48148422 已提交
4255
    data += sizeof(STableIdInfo);
4256 4257 4258
    total++;

    qDebug("QInfo:%p set subscribe info, tid:%d, uid:%"PRIu64", skey:%"PRId64, pQInfo, item->tid, item->uid, item->key);
4259
    item = taosHashIterate(pQInfo->arrTableIdInfo, item);
weixin_48148422's avatar
weixin_48148422 已提交
4260 4261
  }

4262 4263
  qDebug("QInfo:%p set %d subscribe info", pQInfo, total);

H
Haojun Liao 已提交
4264
  // Check if query is completed or not for stable query or normal table query respectively.
H
hjxilinx 已提交
4265
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4266
    if (pQInfo->runtimeEnv.stableQuery) {
H
Haojun Liao 已提交
4267
      if (IS_STASBLE_QUERY_OVER(pQInfo)) {
4268 4269 4270
        setQueryStatus(pQuery, QUERY_OVER);
      }
    } else {
4271
      if (!hasNotReturnedResults(&pQInfo->runtimeEnv)) {
4272 4273
        setQueryStatus(pQuery, QUERY_OVER);
      }
4274
    }
H
hjxilinx 已提交
4275
  }
4276 4277
}

H
Haojun Liao 已提交
4278
int32_t doFillGapsInResults(SQueryRuntimeEnv* pRuntimeEnv, tFilePage **pDst, int32_t *numOfFilled) {
4279
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
4280
  SQuery *pQuery = pRuntimeEnv->pQuery;
4281
  SFillInfo* pFillInfo = pRuntimeEnv->pFillInfo;
4282

4283
  while (1) {
H
Haojun Liao 已提交
4284
    int32_t ret = (int32_t)taosFillResultDataBlock(pFillInfo, (tFilePage**)pQuery->sdata, (int32_t)pQuery->rec.capacity);
4285

4286
    // todo apply limit output function
4287 4288
    /* reached the start position of according to offset value, return immediately */
    if (pQuery->limit.offset == 0) {
4289
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows", pQInfo, pFillInfo->numOfRows, ret);
4290 4291
      return ret;
    }
4292

4293
    if (pQuery->limit.offset < ret) {
4294
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%" PRId64 ". Discard due to offset, remain:%" PRId64 ", new offset:%d",
4295
             pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, ret - pQuery->limit.offset, 0);
4296

S
TD-1057  
Shengliang Guan 已提交
4297
      ret -= (int32_t)pQuery->limit.offset;
4298
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
4299 4300
        memmove(pDst[i]->data, pDst[i]->data + pQuery->pExpr1[i].bytes * pQuery->limit.offset,
                ret * pQuery->pExpr1[i].bytes);
4301
      }
4302

4303 4304 4305
      pQuery->limit.offset = 0;
      return ret;
    } else {
4306
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%" PRId64 ". Discard due to offset, "
B
Bomin Zhang 已提交
4307
             "remain:%d, new offset:%" PRId64, pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, 0,
4308
          pQuery->limit.offset - ret);
4309

4310
      pQuery->limit.offset -= ret;
4311
      pQuery->rec.rows = 0;
4312 4313
      ret = 0;
    }
4314

4315
    if (!hasNotReturnedResults(pRuntimeEnv)) {
4316 4317 4318 4319 4320
      return ret;
    }
  }
}

4321
static void queryCostStatis(SQInfo *pQInfo) {
4322
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4323
  SQueryCostInfo *pSummary = &pRuntimeEnv->summary;
H
Haojun Liao 已提交
4324

H
Haojun Liao 已提交
4325
  uint64_t hashSize = taosHashGetMemSize(pQInfo->runtimeEnv.pResultRowHashTable);
H
Haojun Liao 已提交
4326 4327 4328
  hashSize += taosHashGetMemSize(pQInfo->tableqinfoGroupInfo.map);
  pSummary->hashSize = hashSize;

H
Haojun Liao 已提交
4329 4330 4331
  // add the merge time
  pSummary->elapsedTime += pSummary->firstStageMergeTime;

H
Haojun Liao 已提交
4332 4333 4334
  SResultRowPool* p = pQInfo->runtimeEnv.pool;
  pSummary->winInfoSize = getResultRowPoolMemSize(p);
  pSummary->numOfTimeWindows = getNumOfAllocatedResultRows(p);
4335

H
Haojun Liao 已提交
4336 4337 4338
  qDebug("QInfo:%p :cost summary: elapsed time:%"PRId64" us, first merge:%"PRId64" us, total blocks:%d, "
         "load block statis:%d, load data block:%d, total rows:%"PRId64 ", check rows:%"PRId64,
         pQInfo, pSummary->elapsedTime, pSummary->firstStageMergeTime, pSummary->totalBlocks, pSummary->loadBlockStatis,
H
Haojun Liao 已提交
4339
         pSummary->loadBlocks, pSummary->totalRows, pSummary->totalCheckedRows);
4340

4341
  qDebug("QInfo:%p :cost summary: winResPool size:%.2f Kb, numOfWin:%"PRId64", tableInfoSize:%.2f Kb, hashTable:%.2f Kb", pQInfo, pSummary->winInfoSize/1024.0,
H
Haojun Liao 已提交
4342
      pSummary->numOfTimeWindows, pSummary->tableInfoSize/1024.0, pSummary->hashSize/1024.0);
4343 4344
}

4345 4346
static void updateOffsetVal(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pBlockInfo) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
4347
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4348

4349
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
4350

4351
  if (pQuery->limit.offset == pBlockInfo->rows) {  // current block will ignore completed
H
hjxilinx 已提交
4352
    pTableQueryInfo->lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->window.ekey + step : pBlockInfo->window.skey + step;
4353 4354 4355
    pQuery->limit.offset = 0;
    return;
  }
4356

4357
  if (QUERY_IS_ASC_QUERY(pQuery)) {
S
TD-1057  
Shengliang Guan 已提交
4358
    pQuery->pos = (int32_t)pQuery->limit.offset;
4359
  } else {
S
TD-1057  
Shengliang Guan 已提交
4360
    pQuery->pos = pBlockInfo->rows - (int32_t)pQuery->limit.offset - 1;
4361
  }
4362

4363
  assert(pQuery->pos >= 0 && pQuery->pos <= pBlockInfo->rows - 1);
4364

4365
  SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
4366
  SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);
4367

4368
  // update the pQuery->limit.offset value, and pQuery->pos value
H
Haojun Liao 已提交
4369
  TSKEY *keys = (TSKEY *) pColInfoData->pData;
4370 4371

  // update the offset value
H
hjxilinx 已提交
4372
  pTableQueryInfo->lastKey = keys[pQuery->pos];
4373
  pQuery->limit.offset = 0;
4374

H
hjxilinx 已提交
4375
  int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, pBlockInfo, NULL, binarySearchForKey, pDataBlock);
4376

4377
  qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
4378
         pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows, numOfRes, pQuery->current->lastKey);
4379
}
4380

4381 4382 4383 4384 4385
void skipBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;

  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0) {
    return;
4386
  }
4387

4388 4389 4390
  pQuery->pos = 0;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

H
hjxilinx 已提交
4391
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4392
  TsdbQueryHandleT pQueryHandle = pRuntimeEnv->pQueryHandle;
4393

H
Haojun Liao 已提交
4394
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4395
  while (tsdbNextDataBlock(pQueryHandle)) {
4396
    if (isQueryKilled(GET_QINFO_ADDR(pRuntimeEnv))) {
H
Haojun Liao 已提交
4397
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4398
    }
4399

H
Haojun Liao 已提交
4400
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
4401

4402 4403
    if (pQuery->limit.offset > blockInfo.rows) {
      pQuery->limit.offset -= blockInfo.rows;
H
hjxilinx 已提交
4404 4405
      pTableQueryInfo->lastKey = (QUERY_IS_ASC_QUERY(pQuery)) ? blockInfo.window.ekey : blockInfo.window.skey;
      pTableQueryInfo->lastKey += step;
4406

4407
      qDebug("QInfo:%p skip rows:%d, offset:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv), blockInfo.rows,
4408 4409
             pQuery->limit.offset);
    } else {  // find the appropriated start position in current block
4410 4411 4412
      updateOffsetVal(pRuntimeEnv, &blockInfo);
      break;
    }
4413
  }
H
Haojun Liao 已提交
4414 4415 4416 4417

  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }
4418
}
4419

H
Haojun Liao 已提交
4420 4421
static TSKEY doSkipIntervalProcess(SQueryRuntimeEnv* pRuntimeEnv, STimeWindow* win, SDataBlockInfo* pBlockInfo, STableQueryInfo* pTableQueryInfo) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
4422
  SResultRowInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460

  assert(pQuery->limit.offset == 0);
  STimeWindow tw = *win;
  getNextTimeWindow(pQuery, &tw);

  if ((tw.skey <= pBlockInfo->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (tw.ekey >= pBlockInfo->window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {

    // load the data block and check data remaining in current data block
    // TODO optimize performance
    SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
    SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

    tw = *win;
    int32_t startPos =
        getNextQualifiedWindow(pRuntimeEnv, &tw, pBlockInfo, pColInfoData->pData, binarySearchForKey, -1);
    assert(startPos >= 0);

    // set the abort info
    pQuery->pos = startPos;

    // reset the query start timestamp
    pTableQueryInfo->win.skey = ((TSKEY *)pColInfoData->pData)[startPos];
    pQuery->window.skey = pTableQueryInfo->win.skey;
    TSKEY key = pTableQueryInfo->win.skey;

    pWindowResInfo->prevSKey = tw.skey;
    int32_t index = pRuntimeEnv->windowResInfo.curIndex;

    int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, pBlockInfo, NULL, binarySearchForKey, pDataBlock);
    pRuntimeEnv->windowResInfo.curIndex = index;  // restore the window index

    qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%" PRId64,
           GET_QINFO_ADDR(pRuntimeEnv), pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows, numOfRes,
           pQuery->current->lastKey);

    return key;
  } else {  // do nothing
4461
    pQuery->window.skey      = tw.skey;
H
Haojun Liao 已提交
4462
    pWindowResInfo->prevSKey = tw.skey;
4463
    pTableQueryInfo->lastKey = tw.skey;
H
Haojun Liao 已提交
4464 4465 4466 4467 4468 4469 4470

    return tw.skey;
  }

  return true;
}

H
Haojun Liao 已提交
4471
static bool skipTimeInterval(SQueryRuntimeEnv *pRuntimeEnv, TSKEY* start) {
4472
  SQuery *pQuery = pRuntimeEnv->pQuery;
4473 4474 4475 4476 4477
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    assert(*start <= pQuery->current->lastKey);
  } else {
    assert(*start >= pQuery->current->lastKey);
  }
4478

4479
  // if queried with value filter, do NOT forward query start position
H
Haojun Liao 已提交
4480
  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTsBuf != NULL || pRuntimeEnv->pFillInfo != NULL) {
4481
    return true;
4482
  }
4483

4484
  /*
4485 4486
   * 1. for interval without interpolation query we forward pQuery->interval.interval at a time for
   *    pQuery->limit.offset times. Since hole exists, pQuery->interval.interval*pQuery->limit.offset value is
4487 4488
   *    not valid. otherwise, we only forward pQuery->limit.offset number of points
   */
4489
  assert(pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL);
4490

H
Haojun Liao 已提交
4491
  STimeWindow w = TSWINDOW_INITIALIZER;
4492
  bool ascQuery = QUERY_IS_ASC_QUERY(pQuery);
4493

H
Haojun Liao 已提交
4494
  SResultRowInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
hjxilinx 已提交
4495
  STableQueryInfo *pTableQueryInfo = pQuery->current;
4496

H
Haojun Liao 已提交
4497
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4498
  while (tsdbNextDataBlock(pRuntimeEnv->pQueryHandle)) {
H
Haojun Liao 已提交
4499
    tsdbRetrieveDataBlockInfo(pRuntimeEnv->pQueryHandle, &blockInfo);
4500

H
Haojun Liao 已提交
4501 4502
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
H
Haojun Liao 已提交
4503
        getAlignQueryTimeWindow(pQuery, blockInfo.window.skey, blockInfo.window.skey, pQuery->window.ekey, &w);
H
Haojun Liao 已提交
4504 4505
        pWindowResInfo->prevSKey = w.skey;
      }
4506
    } else {
H
Haojun Liao 已提交
4507
      getAlignQueryTimeWindow(pQuery, blockInfo.window.ekey, pQuery->window.ekey, blockInfo.window.ekey, &w);
4508 4509
      pWindowResInfo->prevSKey = w.skey;
    }
4510

4511 4512
    // the first time window
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, pWindowResInfo->prevSKey, pQuery);
4513

4514
    while (pQuery->limit.offset > 0) {
H
Haojun Liao 已提交
4515 4516
      STimeWindow tw = win;

4517
      if ((win.ekey <= blockInfo.window.ekey && ascQuery) || (win.ekey >= blockInfo.window.skey && !ascQuery)) {
4518 4519
        pQuery->limit.offset -= 1;
        pWindowResInfo->prevSKey = win.skey;
4520 4521 4522 4523 4524 4525

        // current time window is aligned with blockInfo.window.ekey
        // restart it from next data block by set prevSKey to be TSKEY_INITIAL_VAL;
        if ((win.ekey == blockInfo.window.ekey && ascQuery) || (win.ekey == blockInfo.window.skey && !ascQuery)) {
          pWindowResInfo->prevSKey = TSKEY_INITIAL_VAL;
        }
H
Haojun Liao 已提交
4526
      }
4527

4528
      if (pQuery->limit.offset == 0) {
H
Haojun Liao 已提交
4529 4530
        *start = doSkipIntervalProcess(pRuntimeEnv, &win, &blockInfo, pTableQueryInfo);
        return true;
4531 4532
      }

4533 4534 4535
      // current window does not ended in current data block, try next data block
      getNextTimeWindow(pQuery, &tw);

H
Haojun Liao 已提交
4536 4537 4538 4539 4540 4541 4542
      /*
       * If the next time window still starts from current data block,
       * load the primary timestamp column first, and then find the start position for the next queried time window.
       * Note that only the primary timestamp column is required.
       * TODO: Optimize for this cases. All data blocks are not needed to be loaded, only if the first actually required
       * time window resides in current data block.
       */
4543 4544 4545
      if ((tw.skey <= blockInfo.window.ekey && ascQuery) || (tw.ekey >= blockInfo.window.skey && !ascQuery)) {

        SArray *pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
4546 4547
        SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

4548
        if ((win.ekey > blockInfo.window.ekey && ascQuery) || (win.ekey < blockInfo.window.skey && !ascQuery)) {
H
Haojun Liao 已提交
4549 4550 4551 4552
          pQuery->limit.offset -= 1;
        }

        if (pQuery->limit.offset == 0) {
H
Haojun Liao 已提交
4553 4554
          *start = doSkipIntervalProcess(pRuntimeEnv, &win, &blockInfo, pTableQueryInfo);
          return true;
H
Haojun Liao 已提交
4555 4556 4557 4558 4559
        } else {
          tw = win;
          int32_t startPos =
              getNextQualifiedWindow(pRuntimeEnv, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey, -1);
          assert(startPos >= 0);
4560

H
Haojun Liao 已提交
4561 4562 4563 4564 4565 4566
          // set the abort info
          pQuery->pos = startPos;
          pTableQueryInfo->lastKey = ((TSKEY *)pColInfoData->pData)[startPos];
          pWindowResInfo->prevSKey = tw.skey;
          win = tw;
        }
4567
      } else {
H
Haojun Liao 已提交
4568
        break;  // offset is not 0, and next time window begins or ends in the next block.
4569 4570 4571
      }
    }
  }
4572

H
Haojun Liao 已提交
4573 4574 4575 4576 4577
  // check for error
  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }

4578 4579 4580
  return true;
}

H
Haojun Liao 已提交
4581 4582
static void doDestroyTableQueryInfo(STableGroupInfo* pTableqinfoGroupInfo);

B
Bomin Zhang 已提交
4583
static int32_t setupQueryHandle(void* tsdb, SQInfo* pQInfo, bool isSTableQuery) {
B
Bomin Zhang 已提交
4584
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4585 4586
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

B
Bomin Zhang 已提交
4587
  if (onlyQueryTags(pQuery)) {
B
Bomin Zhang 已提交
4588
    return TSDB_CODE_SUCCESS;
B
Bomin Zhang 已提交
4589 4590
  }

H
Haojun Liao 已提交
4591
  if (isSTableQuery && (!QUERY_IS_INTERVAL_QUERY(pQuery)) && (!isFixedOutputQuery(pRuntimeEnv))) {
B
Bomin Zhang 已提交
4592
    return TSDB_CODE_SUCCESS;
B
Bomin Zhang 已提交
4593
  }
4594

H
Haojun Liao 已提交
4595
  STsdbQueryCond cond = createTsdbQueryCond(pQuery, &pQuery->window);
S
TD-1057  
Shengliang Guan 已提交
4596

B
Bomin Zhang 已提交
4597
  if (!isSTableQuery
4598
    && (pQInfo->tableqinfoGroupInfo.numOfTables == 1)
4599
    && (cond.order == TSDB_ORDER_ASC)
H
Haojun Liao 已提交
4600
    && (!QUERY_IS_INTERVAL_QUERY(pQuery))
B
Bomin Zhang 已提交
4601
    && (!isGroupbyNormalCol(pQuery->pGroupbyExpr))
H
Haojun Liao 已提交
4602
    && (!isFixedOutputQuery(pRuntimeEnv))
B
Bomin Zhang 已提交
4603
  ) {
H
Haojun Liao 已提交
4604
    SArray* pa = GET_TABLEGROUP(pQInfo, 0);
4605 4606
    STableQueryInfo* pCheckInfo = taosArrayGetP(pa, 0);
    cond.twindow = pCheckInfo->win;
4607
  }
B
Bomin Zhang 已提交
4608

B
Bomin Zhang 已提交
4609
  terrno = TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
4610
  if (isFirstLastRowQuery(pQuery)) {
H
Haojun Liao 已提交
4611
    pRuntimeEnv->pQueryHandle = tsdbQueryLastRow(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef);
H
Haojun Liao 已提交
4612

H
Haojun Liao 已提交
4613 4614
    // update the query time window
    pQuery->window = cond.twindow;
H
Haojun Liao 已提交
4615
    if (pQInfo->tableGroupInfo.numOfTables == 0) {
H
Haojun Liao 已提交
4616
      pQInfo->tableqinfoGroupInfo.numOfTables = 0;
H
Haojun Liao 已提交
4617 4618 4619 4620
    } else {
      size_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
      for(int32_t i = 0; i < numOfGroups; ++i) {
        SArray *group = GET_TABLEGROUP(pQInfo, i);
H
Haojun Liao 已提交
4621

H
Haojun Liao 已提交
4622 4623 4624
        size_t t = taosArrayGetSize(group);
        for (int32_t j = 0; j < t; ++j) {
          STableQueryInfo *pCheckInfo = taosArrayGetP(group, j);
H
Haojun Liao 已提交
4625

H
Haojun Liao 已提交
4626 4627 4628
          pCheckInfo->win = pQuery->window;
          pCheckInfo->lastKey = pCheckInfo->win.skey;
        }
H
Haojun Liao 已提交
4629 4630
      }
    }
4631
  } else if (isPointInterpoQuery(pQuery)) {
H
Haojun Liao 已提交
4632
    pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef);
H
Haojun Liao 已提交
4633
  } else {
H
Haojun Liao 已提交
4634
    pRuntimeEnv->pQueryHandle = tsdbQueryTables(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef);
H
Haojun Liao 已提交
4635
  }
4636

B
Bomin Zhang 已提交
4637
  return terrno;
B
Bomin Zhang 已提交
4638 4639
}

H
Haojun Liao 已提交
4640
static SFillColInfo* createFillColInfo(SQuery* pQuery) {
H
Haojun Liao 已提交
4641
  int32_t numOfCols = getNumOfFinalResCol(pQuery);
4642
  int32_t offset = 0;
4643

4644
  SFillColInfo* pFillCol = calloc(numOfCols, sizeof(SFillColInfo));
H
Haojun Liao 已提交
4645 4646 4647 4648
  if (pFillCol == NULL) {
    return NULL;
  }

H
Haojun Liao 已提交
4649
  // TODO refactor
4650
  for(int32_t i = 0; i < numOfCols; ++i) {
H
Haojun Liao 已提交
4651
    SExprInfo* pExprInfo = (pQuery->pExpr2 == NULL)? &pQuery->pExpr1[i]:&pQuery->pExpr2[i];
4652

4653
    pFillCol[i].col.bytes  = pExprInfo->bytes;
S
TD-1057  
Shengliang Guan 已提交
4654
    pFillCol[i].col.type   = (int8_t)pExprInfo->type;
4655
    pFillCol[i].col.offset = offset;
H
Haojun Liao 已提交
4656
    pFillCol[i].tagIndex   = -2;
4657 4658
    pFillCol[i].flag       = TSDB_COL_NORMAL;    // always be ta normal column for table query
    pFillCol[i].functionId = pExprInfo->base.functionId;
4659
    pFillCol[i].fillVal.i = pQuery->fillVal[i];
4660

4661 4662
    offset += pExprInfo->bytes;
  }
4663

4664 4665 4666
  return pFillCol;
}

4667
int32_t doInitQInfo(SQInfo *pQInfo, STSBuf *pTsBuf, void *tsdb, int32_t vgId, bool isSTableQuery) {
4668 4669 4670
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;

  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
4671 4672 4673

  pRuntimeEnv->topBotQuery = isTopBottomQuery(pQuery);
  pRuntimeEnv->hasTagResults = hasTagValOutput(pQuery);
4674
  pRuntimeEnv->timeWindowInterpo = timeWindowInterpoRequired(pQuery);
4675 4676

  setScanLimitationByResultBuffer(pQuery);
H
Haojun Liao 已提交
4677

H
Haojun Liao 已提交
4678
  int32_t code = setupQueryHandle(tsdb, pQInfo, isSTableQuery);
B
Bomin Zhang 已提交
4679 4680 4681
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }
4682

4683
  pQInfo->tsdb = tsdb;
4684
  pQInfo->vgId = vgId;
4685 4686

  pRuntimeEnv->pQuery = pQuery;
H
Haojun Liao 已提交
4687
  pRuntimeEnv->pTsBuf = pTsBuf;
4688
  pRuntimeEnv->cur.vgroupIndex = -1;
4689
  pRuntimeEnv->stableQuery = isSTableQuery;
H
Haojun Liao 已提交
4690
  pRuntimeEnv->prevGroupId = INT32_MIN;
H
Haojun Liao 已提交
4691
  pRuntimeEnv->groupbyColumn = isGroupbyNormalCol(pQuery->pGroupbyExpr);
4692

H
Haojun Liao 已提交
4693
  if (pTsBuf != NULL) {
H
Haojun Liao 已提交
4694 4695
    int16_t order = (pQuery->order.order == pRuntimeEnv->pTsBuf->tsOrder) ? TSDB_ORDER_ASC : TSDB_ORDER_DESC;
    tsBufSetTraverseOrder(pRuntimeEnv->pTsBuf, order);
4696 4697
  }

4698 4699 4700
  int32_t ps = DEFAULT_PAGE_SIZE;
  int32_t rowsize = 0;
  getIntermediateBufInfo(pRuntimeEnv, &ps, &rowsize);
H
Haojun Liao 已提交
4701
  int32_t TENMB = 1024*1024*10;
4702

H
Haojun Liao 已提交
4703
  if (isSTableQuery && !onlyQueryTags(pRuntimeEnv->pQuery)) {
H
Haojun Liao 已提交
4704
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rowsize, ps, TENMB, pQInfo);
4705 4706 4707 4708
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

H
Haojun Liao 已提交
4709
    if (!QUERY_IS_INTERVAL_QUERY(pQuery)) {
4710
      int16_t type = TSDB_DATA_TYPE_NULL;
H
Haojun Liao 已提交
4711
      if (pRuntimeEnv->groupbyColumn) {  // group by columns not tags;
4712 4713 4714 4715 4716
        type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
      } else {
        type = TSDB_DATA_TYPE_INT;  // group id
      }

H
Haojun Liao 已提交
4717
      code = initResultRowInfo(&pRuntimeEnv->windowResInfo, 8, type);
B
Bomin Zhang 已提交
4718 4719 4720
      if (code != TSDB_CODE_SUCCESS) {
        return code;
      }
4721
    }
H
Haojun Liao 已提交
4722
  } else if (pRuntimeEnv->groupbyColumn || QUERY_IS_INTERVAL_QUERY(pQuery) || (!isSTableQuery)) {
4723 4724
    int32_t numOfResultRows = getInitialPageNum(pQInfo);
    getIntermediateBufInfo(pRuntimeEnv, &ps, &rowsize);
H
Haojun Liao 已提交
4725
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rowsize, ps, TENMB, pQInfo);
4726 4727 4728 4729 4730
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

    int16_t type = TSDB_DATA_TYPE_NULL;
H
Haojun Liao 已提交
4731
    if (pRuntimeEnv->groupbyColumn) {
4732 4733 4734 4735 4736
      type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
    } else {
      type = TSDB_DATA_TYPE_TIMESTAMP;
    }

H
Haojun Liao 已提交
4737
    code = initResultRowInfo(&pRuntimeEnv->windowResInfo, numOfResultRows, type);
B
Bomin Zhang 已提交
4738 4739 4740
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }
4741 4742
  }

H
Haojun Liao 已提交
4743 4744 4745 4746 4747 4748
  // create runtime environment
  code = setupQueryRuntimeEnv(pRuntimeEnv, pQuery->order.order);
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }

4749
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
H
Haojun Liao 已提交
4750
    SFillColInfo* pColInfo = createFillColInfo(pQuery);
H
Haojun Liao 已提交
4751 4752 4753 4754 4755 4756
    STimeWindow w = TSWINDOW_INITIALIZER;

    TSKEY sk = MIN(pQuery->window.skey, pQuery->window.ekey);
    TSKEY ek = MAX(pQuery->window.skey, pQuery->window.ekey);
    getAlignQueryTimeWindow(pQuery, pQuery->window.skey, sk, ek, &w);

H
Haojun Liao 已提交
4757
    int32_t numOfCols = getNumOfFinalResCol(pQuery);
H
Haojun Liao 已提交
4758
    pRuntimeEnv->pFillInfo = taosInitFillInfo(pQuery->order.order, w.skey, 0, (int32_t)pQuery->rec.capacity, numOfCols,
4759
                                              pQuery->interval.sliding, pQuery->interval.slidingUnit, (int8_t)pQuery->precision,
H
Haojun Liao 已提交
4760
                                              pQuery->fillType, pColInfo, pQInfo);
4761
  }
4762

H
Haojun Liao 已提交
4763
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
4764
  return TSDB_CODE_SUCCESS;
4765 4766
}

4767
static void enableExecutionForNextTable(SQueryRuntimeEnv *pRuntimeEnv) {
4768
  SQuery *pQuery = pRuntimeEnv->pQuery;
4769

4770
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
4771
    SResultRowCellInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[i]);
4772 4773 4774 4775 4776 4777
    if (pResInfo != NULL) {
      pResInfo->complete = false;
    }
  }
}

H
Haojun Liao 已提交
4778
// TODO refactor: setAdditionalInfo
H
Haojun Liao 已提交
4779 4780 4781 4782 4783
static FORCE_INLINE void setEnvForEachBlock(SQInfo* pQInfo, STableQueryInfo* pTableQueryInfo, SDataBlockInfo* pBlockInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

H
Haojun Liao 已提交
4784
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
H
Haojun Liao 已提交
4785 4786 4787
    TSKEY nextKey = pBlockInfo->window.skey;
    setIntervalQueryRange(pQInfo, nextKey);

H
Haojun Liao 已提交
4788
    if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTsBuf != NULL) {
H
Haojun Liao 已提交
4789 4790
      setAdditionalInfo(pQInfo, pTableQueryInfo->pTable, pTableQueryInfo);
    }
H
Haojun Liao 已提交
4791 4792
  } else {  // non-interval query
    setExecutionContext(pQInfo, pTableQueryInfo->groupIndex, pBlockInfo->window.ekey + step);
H
Haojun Liao 已提交
4793 4794 4795
  }
}

H
Haojun Liao 已提交
4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809
static void doTableQueryInfoTimeWindowCheck(SQuery* pQuery, STableQueryInfo* pTableQueryInfo) {
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    assert(
        (pTableQueryInfo->win.skey <= pTableQueryInfo->win.ekey) &&
        (pTableQueryInfo->lastKey >= pTableQueryInfo->win.skey) &&
        (pTableQueryInfo->win.skey >= pQuery->window.skey && pTableQueryInfo->win.ekey <= pQuery->window.ekey));
  } else {
    assert(
        (pTableQueryInfo->win.skey >= pTableQueryInfo->win.ekey) &&
        (pTableQueryInfo->lastKey <= pTableQueryInfo->win.skey) &&
        (pTableQueryInfo->win.skey <= pQuery->window.skey && pTableQueryInfo->win.ekey >= pQuery->window.ekey));
  }
}

H
Haojun Liao 已提交
4810
static int64_t scanMultiTableDataBlocks(SQInfo *pQInfo) {
4811
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
Haojun Liao 已提交
4812 4813
  SQuery*           pQuery = pRuntimeEnv->pQuery;
  SQueryCostInfo*   summary  = &pRuntimeEnv->summary;
H
Haojun Liao 已提交
4814

H
hjxilinx 已提交
4815
  int64_t st = taosGetTimestampMs();
4816

4817
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
H
Haojun Liao 已提交
4818
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4819

H
Haojun Liao 已提交
4820 4821
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

H
Haojun Liao 已提交
4822
  while (tsdbNextDataBlock(pQueryHandle)) {
4823
    summary->totalBlocks += 1;
H
Haojun Liao 已提交
4824

4825
    if (isQueryKilled(pQInfo)) {
H
Haojun Liao 已提交
4826
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4827
    }
4828

H
Haojun Liao 已提交
4829
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
H
Haojun Liao 已提交
4830 4831 4832 4833
    STableQueryInfo **pTableQueryInfo = (STableQueryInfo**) taosHashGet(pQInfo->tableqinfoGroupInfo.map, &blockInfo.tid, sizeof(blockInfo.tid));
    if(pTableQueryInfo == NULL) {
      break;
    }
4834

H
Haojun Liao 已提交
4835
    pQuery->current = *pTableQueryInfo;
H
Haojun Liao 已提交
4836
    doTableQueryInfoTimeWindowCheck(pQuery, *pTableQueryInfo);
4837

H
Haojun Liao 已提交
4838
    if (!pRuntimeEnv->groupbyColumn) {
H
Haojun Liao 已提交
4839
      setEnvForEachBlock(pQInfo, *pTableQueryInfo, &blockInfo);
4840
    }
4841

4842
    uint32_t     status = 0;
H
Haojun Liao 已提交
4843 4844
    SDataStatis *pStatis = NULL;
    SArray      *pDataBlock = NULL;
4845

H
Haojun Liao 已提交
4846
    int32_t ret = loadDataBlockOnDemand(pRuntimeEnv, &pQuery->current->windowResInfo, pQueryHandle, &blockInfo, &pStatis, &pDataBlock, &status);
4847 4848 4849 4850 4851
    if (ret != TSDB_CODE_SUCCESS) {
      break;
    }

    if (status == BLK_DATA_DISCARD) {
H
Haojun Liao 已提交
4852
      pQuery->current->lastKey = QUERY_IS_ASC_QUERY(pQuery)? blockInfo.window.ekey + step : blockInfo.window.skey + step;
H
Haojun Liao 已提交
4853 4854 4855
      continue;
    }

4856 4857
    summary->totalRows += blockInfo.rows;
    stableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, pDataBlock, binarySearchForKey);
H
Haojun Liao 已提交
4858

H
Haojun Liao 已提交
4859 4860 4861 4862
    qDebug("QInfo:%p check data block completed, uid:%"PRId64", tid:%d, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, "
           "lastKey:%" PRId64,
           pQInfo, blockInfo.uid, blockInfo.tid, blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows,
           pQuery->current->lastKey);
4863
  }
4864

H
Haojun Liao 已提交
4865 4866 4867 4868
  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }

H
Haojun Liao 已提交
4869 4870
  updateWindowResNumOfRes(pRuntimeEnv);

H
hjxilinx 已提交
4871 4872
  int64_t et = taosGetTimestampMs();
  return et - st;
4873 4874
}

4875 4876
static bool multiTableMultioutputHelper(SQInfo *pQInfo, int32_t index) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4877
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4878

4879
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
H
Haojun Liao 已提交
4880
  SArray *group = GET_TABLEGROUP(pQInfo, 0);
4881
  STableQueryInfo* pCheckInfo = taosArrayGetP(group, index);
4882

H
Haojun Liao 已提交
4883
  if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTsBuf != NULL) {
H
Haojun Liao 已提交
4884 4885
    setTagVal(pRuntimeEnv, pCheckInfo->pTable, pQInfo->tsdb);
  }
4886

H
Haojun Liao 已提交
4887
  STableId* id = TSDB_TABLEID(pCheckInfo->pTable);
4888
  qDebug("QInfo:%p query on (%d): uid:%" PRIu64 ", tid:%d, qrange:%" PRId64 "-%" PRId64, pQInfo, index,
H
Haojun Liao 已提交
4889
         id->uid, id->tid, pCheckInfo->lastKey, pCheckInfo->win.ekey);
4890

4891
  STsdbQueryCond cond = {
4892
      .twindow   = {pCheckInfo->lastKey, pCheckInfo->win.ekey},
H
hjxilinx 已提交
4893 4894
      .order     = pQuery->order.order,
      .colList   = pQuery->colList,
4895
      .numOfCols = pQuery->numOfCols,
4896
  };
4897

H
hjxilinx 已提交
4898
  // todo refactor
4899
  SArray *g1 = taosArrayInit(1, POINTER_BYTES);
H
Haojun Liao 已提交
4900 4901 4902 4903
  SArray *tx = taosArrayInit(1, sizeof(STableKeyInfo));

  STableKeyInfo info = {.pTable = pCheckInfo->pTable, .lastKey = pCheckInfo->lastKey};
  taosArrayPush(tx, &info);
4904

4905
  taosArrayPush(g1, &tx);
4906
  STableGroupInfo gp = {.numOfTables = 1, .pGroupList = g1};
4907

4908
  // include only current table
4909 4910 4911 4912
  if (pRuntimeEnv->pQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
    pRuntimeEnv->pQueryHandle = NULL;
  }
4913

H
Haojun Liao 已提交
4914
  pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo, &pQInfo->memRef);
4915 4916
  taosArrayDestroy(tx);
  taosArrayDestroy(g1);
B
Bomin Zhang 已提交
4917 4918 4919
  if (pRuntimeEnv->pQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
4920

H
Haojun Liao 已提交
4921
  if (pRuntimeEnv->pTsBuf != NULL) {
H
Haojun Liao 已提交
4922 4923
      tVariant* pTag = &pRuntimeEnv->pCtx[0].tag;

4924
    if (pRuntimeEnv->cur.vgroupIndex == -1) {
H
Haojun Liao 已提交
4925
      STSElem elem = tsBufGetElemStartPos(pRuntimeEnv->pTsBuf, pQInfo->vgId, pTag);
H
Haojun Liao 已提交
4926
      // failed to find data with the specified tag value and vnodeId
4927
      if (!tsBufIsValidElem(&elem)) {
H
Haojun Liao 已提交
4928 4929 4930
        if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
          qError("QInfo:%p failed to find tag:%s in ts_comp", pQInfo, pTag->pz);
        } else {
4931
          qError("QInfo:%p failed to find tag:%"PRId64" in ts_comp", pQInfo, pTag->i64);
H
Haojun Liao 已提交
4932 4933
        }

4934
        return false;
H
Haojun Liao 已提交
4935
      } else {
H
Haojun Liao 已提交
4936
        STSCursor cur = tsBufGetCursor(pRuntimeEnv->pTsBuf);
H
Haojun Liao 已提交
4937 4938 4939 4940 4941

        if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
          qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz,
                 cur.blockIndex, cur.tsIndex);
        } else {
4942
          qDebug("QInfo:%p find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64,
H
Haojun Liao 已提交
4943 4944
                 cur.blockIndex, cur.tsIndex);
        }
4945 4946
      }
    } else {
H
Haojun Liao 已提交
4947
      STSElem elem = tsBufGetElem(pRuntimeEnv->pTsBuf);
H
Haojun Liao 已提交
4948
      if (tVariantCompare(elem.tag, &pRuntimeEnv->pCtx[0].tag) != 0) {
H
Haojun Liao 已提交
4949

H
Haojun Liao 已提交
4950
        STSElem elem1 = tsBufGetElemStartPos(pRuntimeEnv->pTsBuf, pQInfo->vgId, pTag);
H
Haojun Liao 已提交
4951
        // failed to find data with the specified tag value and vnodeId
4952
        if (!tsBufIsValidElem(&elem1)) {
H
Haojun Liao 已提交
4953 4954 4955
          if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
            qError("QInfo:%p failed to find tag:%s in ts_comp", pQInfo, pTag->pz);
          } else {
4956
            qError("QInfo:%p failed to find tag:%"PRId64" in ts_comp", pQInfo, pTag->i64);
H
Haojun Liao 已提交
4957
          }
H
Haojun Liao 已提交
4958

H
Haojun Liao 已提交
4959
          return false;
H
Haojun Liao 已提交
4960
        } else {
H
Haojun Liao 已提交
4961
          STSCursor cur = tsBufGetCursor(pRuntimeEnv->pTsBuf);
H
Haojun Liao 已提交
4962 4963 4964
          if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
            qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, cur.blockIndex, cur.tsIndex);
          } else {
4965
            qDebug("QInfo:%p find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64, cur.blockIndex, cur.tsIndex);
H
Haojun Liao 已提交
4966
          }
H
Haojun Liao 已提交
4967
        }
H
Haojun Liao 已提交
4968

H
Haojun Liao 已提交
4969
      } else {
H
Haojun Liao 已提交
4970 4971
        tsBufSetCursor(pRuntimeEnv->pTsBuf, &pRuntimeEnv->cur);
        STSCursor cur = tsBufGetCursor(pRuntimeEnv->pTsBuf);
H
Haojun Liao 已提交
4972 4973 4974
        if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
          qDebug("QInfo:%p continue scan ts_comp file, tag:%s blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, cur.blockIndex, cur.tsIndex);
        } else {
4975
          qDebug("QInfo:%p continue scan ts_comp file, tag:%"PRId64" blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64, cur.blockIndex, cur.tsIndex);
H
Haojun Liao 已提交
4976
        }
H
Haojun Liao 已提交
4977
      }
4978 4979
    }
  }
4980

4981
  initCtxOutputBuf(pRuntimeEnv);
4982 4983 4984
  return true;
}

H
Haojun Liao 已提交
4985
STsdbQueryCond createTsdbQueryCond(SQuery* pQuery, STimeWindow* win) {
4986 4987 4988 4989 4990 4991
  STsdbQueryCond cond = {
      .colList   = pQuery->colList,
      .order     = pQuery->order.order,
      .numOfCols = pQuery->numOfCols,
  };

H
Haojun Liao 已提交
4992
  TIME_WINDOW_COPY(cond.twindow, *win);
4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019
  return cond;
}

static STableIdInfo createTableIdInfo(SQuery* pQuery) {
  assert(pQuery != NULL && pQuery->current != NULL);

  STableIdInfo tidInfo;
  STableId* id = TSDB_TABLEID(pQuery->current->pTable);

  tidInfo.uid = id->uid;
  tidInfo.tid = id->tid;
  tidInfo.key = pQuery->current->lastKey;

  return tidInfo;
}

static void updateTableIdInfo(SQuery* pQuery, SHashObj* pTableIdInfo) {
  STableIdInfo tidInfo = createTableIdInfo(pQuery);
  STableIdInfo* idinfo = taosHashGet(pTableIdInfo, &tidInfo.tid, sizeof(tidInfo.tid));
  if (idinfo != NULL) {
    assert(idinfo->tid == tidInfo.tid && idinfo->uid == tidInfo.uid);
    idinfo->key = tidInfo.key;
  } else {
    taosHashPut(pTableIdInfo, &tidInfo.tid, sizeof(tidInfo.tid), &tidInfo, sizeof(STableIdInfo));
  }
}

5020 5021 5022 5023 5024 5025 5026
/**
 * super table query handler
 * 1. super table projection query, group-by on normal columns query, ts-comp query
 * 2. point interpolation query, last row query
 *
 * @param pQInfo
 */
5027
static void sequentialTableProcess(SQInfo *pQInfo) {
5028
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
5029
  SQuery *          pQuery = pRuntimeEnv->pQuery;
5030
  setQueryStatus(pQuery, QUERY_COMPLETED);
5031

H
Haojun Liao 已提交
5032
  size_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
5033

5034
  if (isPointInterpoQuery(pQuery)) {
H
Haojun Liao 已提交
5035
    resetDefaultResInfoOutputBuf(pRuntimeEnv);
5036
    assert(pQuery->limit.offset == 0 && pQuery->limit.limit != 0);
5037

5038
    while (pQInfo->groupIndex < numOfGroups) {
5039
      SArray *group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex);
H
Haojun Liao 已提交
5040

5041 5042
      qDebug("QInfo:%p point interpolation query on group:%d, total group:%" PRIzu ", current group:%p", pQInfo,
             pQInfo->groupIndex, numOfGroups, group);
H
Haojun Liao 已提交
5043
      STsdbQueryCond cond = createTsdbQueryCond(pQuery, &pQuery->window);
S
TD-1057  
Shengliang Guan 已提交
5044

H
Haojun Liao 已提交
5045
      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
H
Haojun Liao 已提交
5046
      SArray *tx = taosArrayDup(group);
H
Haojun Liao 已提交
5047
      taosArrayPush(g1, &tx);
5048

H
Haojun Liao 已提交
5049 5050 5051 5052 5053 5054 5055
      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }
5056

H
Haojun Liao 已提交
5057
      pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(pQInfo->tsdb, &cond, &gp, pQInfo, &pQInfo->memRef);
B
Bomin Zhang 已提交
5058 5059 5060 5061 5062 5063

      taosArrayDestroy(tx);
      taosArrayDestroy(g1);
      if (pRuntimeEnv->pQueryHandle == NULL) {
        longjmp(pRuntimeEnv->env, terrno);
      }
H
Haojun Liao 已提交
5064

H
Haojun Liao 已提交
5065
      initCtxOutputBuf(pRuntimeEnv);
5066

5067
      SArray *s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle);
5068
      assert(taosArrayGetSize(s) >= 1);
5069

5070
      setTagVal(pRuntimeEnv, taosArrayGetP(s, 0), pQInfo->tsdb);
dengyihao's avatar
dengyihao 已提交
5071
      taosArrayDestroy(s);
H
Haojun Liao 已提交
5072

H
Haojun Liao 已提交
5073
      // here we simply set the first table as current table
5074
      SArray *first = GET_TABLEGROUP(pQInfo, pQInfo->groupIndex);
5075 5076
      pQuery->current = taosArrayGetP(first, 0);

5077
      scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
5078

H
Haojun Liao 已提交
5079 5080 5081 5082 5083
      int64_t numOfRes = getNumOfResult(pRuntimeEnv);
      if (numOfRes > 0) {
        pQuery->rec.rows += numOfRes;
        forwardCtxOutputBuf(pRuntimeEnv, numOfRes);
      }
5084

H
Haojun Liao 已提交
5085 5086 5087 5088 5089
      skipResults(pRuntimeEnv);
      pQInfo->groupIndex += 1;

      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
5090 5091 5092 5093 5094 5095

      if (pQuery->rec.rows >= pQuery->rec.capacity) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
        break;
      }
    }
H
Haojun Liao 已提交
5096
  } else if (pRuntimeEnv->groupbyColumn) {  // group-by on normal columns query
5097
    while (pQInfo->groupIndex < numOfGroups) {
5098
      SArray *group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex);
5099

5100 5101
      qDebug("QInfo:%p group by normal columns group:%d, total group:%" PRIzu "", pQInfo, pQInfo->groupIndex,
             numOfGroups);
5102

H
Haojun Liao 已提交
5103
      STsdbQueryCond cond = createTsdbQueryCond(pQuery, &pQuery->window);
S
TD-1057  
Shengliang Guan 已提交
5104

5105
      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
H
Haojun Liao 已提交
5106
      SArray *tx = taosArrayDup(group);
5107 5108 5109 5110 5111 5112 5113 5114 5115 5116
      taosArrayPush(g1, &tx);

      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }

H
Haojun Liao 已提交
5117
      // no need to update the lastkey for each table
H
Haojun Liao 已提交
5118
      pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo, &pQInfo->memRef);
H
Haojun Liao 已提交
5119

B
Bomin Zhang 已提交
5120 5121
      taosArrayDestroy(g1);
      taosArrayDestroy(tx);
B
Bomin Zhang 已提交
5122 5123 5124
      if (pRuntimeEnv->pQueryHandle == NULL) {
        longjmp(pRuntimeEnv->env, terrno);
      }
5125

5126
      SArray *s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle);
5127 5128
      assert(taosArrayGetSize(s) >= 1);

5129
      setTagVal(pRuntimeEnv, taosArrayGetP(s, 0), pQInfo->tsdb);
5130 5131 5132 5133 5134

      // here we simply set the first table as current table
      scanMultiTableDataBlocks(pQInfo);
      pQInfo->groupIndex += 1;

5135
      taosArrayDestroy(s);
5136

5137
      // no results generated for current group, continue to try the next group
5138
      SResultRowInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
5139 5140 5141 5142 5143
      if (pWindowResInfo->size <= 0) {
        continue;
      }

      for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
5144
        pWindowResInfo->pResult[i]->closed = true;  // enable return all results for group by normal columns
5145

H
Haojun Liao 已提交
5146
        SResultRow *pResult = pWindowResInfo->pResult[i];
5147
        for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
5148
          SResultRowCellInfo *pCell = getResultCell(pRuntimeEnv, pResult, j);
5149
          pResult->numOfRows = (uint16_t)(MAX(pResult->numOfRows, pCell->numOfRes));
5150 5151 5152
        }
      }

5153
      qDebug("QInfo:%p generated groupby columns results %d rows for group %d completed", pQInfo, pWindowResInfo->size,
5154
             pQInfo->groupIndex);
5155 5156 5157 5158 5159 5160
      int32_t currentGroupIndex = pQInfo->groupIndex;

      pQuery->rec.rows = 0;
      pQInfo->groupIndex = 0;

      ensureOutputBufferSimple(pRuntimeEnv, pWindowResInfo->size);
5161
      copyFromWindowResToSData(pQInfo, pWindowResInfo);
5162

5163
      pQInfo->groupIndex = currentGroupIndex;  // restore the group index
5164
      assert(pQuery->rec.rows == pWindowResInfo->size);
5165
      resetResultRowInfo(pRuntimeEnv, &pRuntimeEnv->windowResInfo);
5166
      break;
5167
    }
H
Haojun Liao 已提交
5168
  } else if (pRuntimeEnv->queryWindowIdentical && pRuntimeEnv->pTsBuf == NULL && !isTSCompQuery(pQuery)) {
5169 5170 5171 5172 5173 5174 5175 5176 5177 5178
    //super table projection query with identical query time range for all tables.
    SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
    resetDefaultResInfoOutputBuf(pRuntimeEnv);

    SArray *group = GET_TABLEGROUP(pQInfo, 0);
    assert(taosArrayGetSize(group) == pQInfo->tableqinfoGroupInfo.numOfTables &&
           1 == taosArrayGetSize(pQInfo->tableqinfoGroupInfo.pGroupList));

    void *pQueryHandle = pRuntimeEnv->pQueryHandle;
    if (pQueryHandle == NULL) {
H
Haojun Liao 已提交
5179
      STsdbQueryCond con = createTsdbQueryCond(pQuery, &pQuery->window);
5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190
      pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &con, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef);
      pQueryHandle = pRuntimeEnv->pQueryHandle;
    }

    // skip blocks without load the actual data block from file if no filter condition present
    //    skipBlocks(&pQInfo->runtimeEnv);
    //    if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0) {
    //      setQueryStatus(pQuery, QUERY_COMPLETED);
    //      return;
    //    }

H
Haojun Liao 已提交
5191 5192 5193 5194 5195 5196
    if (pQuery->prjInfo.vgroupLimit != -1) {
      assert(pQuery->limit.limit == -1 && pQuery->limit.offset == 0);
    } else if (pQuery->limit.limit != -1) {
      assert(pQuery->prjInfo.vgroupLimit == -1);
    }

5197
    bool hasMoreBlock = true;
H
Haojun Liao 已提交
5198
    int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
5199 5200 5201 5202
    SQueryCostInfo *summary = &pRuntimeEnv->summary;
    while ((hasMoreBlock = tsdbNextDataBlock(pQueryHandle)) == true) {
      summary->totalBlocks += 1;

5203
      if (isQueryKilled(pQInfo)) {
5204 5205 5206 5207 5208
        longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
      }

      tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
      STableQueryInfo **pTableQueryInfo =
H
Haojun Liao 已提交
5209
          (STableQueryInfo **) taosHashGet(pQInfo->tableqinfoGroupInfo.map, &blockInfo.tid, sizeof(blockInfo.tid));
5210 5211 5212 5213 5214
      if (pTableQueryInfo == NULL) {
        break;
      }

      pQuery->current = *pTableQueryInfo;
H
Haojun Liao 已提交
5215
      doTableQueryInfoTimeWindowCheck(pQuery, *pTableQueryInfo);
5216 5217 5218 5219 5220

      if (pRuntimeEnv->hasTagResults) {
        setTagVal(pRuntimeEnv, pQuery->current->pTable, pQInfo->tsdb);
      }

H
Haojun Liao 已提交
5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239
      if (pQuery->prjInfo.vgroupLimit > 0 && pQuery->current->windowResInfo.size > pQuery->prjInfo.vgroupLimit) {
        pQuery->current->lastKey =
                QUERY_IS_ASC_QUERY(pQuery) ? blockInfo.window.ekey + step : blockInfo.window.skey + step;
        continue;
      }

      // it is a super table ordered projection query, check for the number of output for each vgroup
      if (pQuery->prjInfo.vgroupLimit > 0 && pQuery->rec.rows >= pQuery->prjInfo.vgroupLimit) {
        if (QUERY_IS_ASC_QUERY(pQuery) && blockInfo.window.skey >= pQuery->prjInfo.ts) {
          pQuery->current->lastKey =
                  QUERY_IS_ASC_QUERY(pQuery) ? blockInfo.window.ekey + step : blockInfo.window.skey + step;
          continue;
        } else if (!QUERY_IS_ASC_QUERY(pQuery) && blockInfo.window.ekey <= pQuery->prjInfo.ts) {
          pQuery->current->lastKey =
                  QUERY_IS_ASC_QUERY(pQuery) ? blockInfo.window.ekey + step : blockInfo.window.skey + step;
          continue;
        }
      }

5240 5241
      uint32_t     status = 0;
      SDataStatis *pStatis = NULL;
5242
      SArray      *pDataBlock = NULL;
5243 5244 5245 5246 5247 5248 5249

      int32_t ret = loadDataBlockOnDemand(pRuntimeEnv, &pQuery->current->windowResInfo, pQueryHandle, &blockInfo,
                                          &pStatis, &pDataBlock, &status);
      if (ret != TSDB_CODE_SUCCESS) {
        break;
      }

H
Haojun Liao 已提交
5250 5251 5252 5253 5254
      if(status == BLK_DATA_DISCARD) {
        pQuery->current->lastKey =
                QUERY_IS_ASC_QUERY(pQuery) ? blockInfo.window.ekey + step : blockInfo.window.skey + step;
        continue;
      }
5255

H
Haojun Liao 已提交
5256
      ensureOutputBuffer(pRuntimeEnv, &blockInfo);
H
Haojun Liao 已提交
5257 5258
      int64_t prev = getNumOfResult(pRuntimeEnv);

5259 5260 5261 5262 5263 5264 5265 5266 5267 5268
      pQuery->pos = QUERY_IS_ASC_QUERY(pQuery) ? 0 : blockInfo.rows - 1;
      int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, binarySearchForKey, pDataBlock);

      summary->totalRows += blockInfo.rows;
      qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%" PRId64,
             GET_QINFO_ADDR(pRuntimeEnv), blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes,
             pQuery->current->lastKey);

      pQuery->rec.rows = getNumOfResult(pRuntimeEnv);

H
Haojun Liao 已提交
5269
      int64_t inc = pQuery->rec.rows - prev;
H
Haojun Liao 已提交
5270
      pQuery->current->windowResInfo.size += (int32_t) inc;
H
Haojun Liao 已提交
5271

5272 5273 5274 5275 5276
      // the flag may be set by tableApplyFunctionsOnBlock, clear it here
      CLEAR_QUERY_STATUS(pQuery, QUERY_COMPLETED);

      updateTableIdInfo(pQuery, pQInfo->arrTableIdInfo);

H
Haojun Liao 已提交
5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292
      if (pQuery->prjInfo.vgroupLimit >= 0) {
        if (((pQuery->rec.rows + pQuery->rec.total) < pQuery->prjInfo.vgroupLimit) || ((pQuery->rec.rows + pQuery->rec.total) > pQuery->prjInfo.vgroupLimit && prev < pQuery->prjInfo.vgroupLimit)) {
          if (QUERY_IS_ASC_QUERY(pQuery) && pQuery->prjInfo.ts < blockInfo.window.ekey) {
            pQuery->prjInfo.ts = blockInfo.window.ekey;
          } else if (!QUERY_IS_ASC_QUERY(pQuery) && pQuery->prjInfo.ts > blockInfo.window.skey) {
            pQuery->prjInfo.ts = blockInfo.window.skey;
          }
        }
      } else {
        // the limitation of output result is reached, set the query completed
        skipResults(pRuntimeEnv);
        if (limitResults(pRuntimeEnv)) {
          setQueryStatus(pQuery, QUERY_COMPLETED);
          SET_STABLE_QUERY_OVER(pQInfo);
          break;
        }
5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304
      }

      // while the output buffer is full or limit/offset is applied, query may be paused here
      if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL|QUERY_COMPLETED)) {
        break;
      }
    }

    if (!hasMoreBlock) {
      setQueryStatus(pQuery, QUERY_COMPLETED);
      SET_STABLE_QUERY_OVER(pQInfo);
    }
5305 5306
  } else {
    /*
5307 5308 5309
     * the following two cases handled here.
     * 1. ts-comp query, and 2. the super table projection query with different query time range for each table.
     * If the subgroup index is larger than 0, results generated by group by tbname,k is existed.
5310 5311
     * we need to return it to client in the first place.
     */
5312
    if (pQInfo->groupIndex > 0) {
5313
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5314
      pQuery->rec.total += pQuery->rec.rows;
5315

5316
      if (pQuery->rec.rows > 0) {
5317 5318 5319
        return;
      }
    }
5320

5321
    // all data have returned already
5322
    if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
5323 5324
      return;
    }
5325

H
Haojun Liao 已提交
5326
    resetDefaultResInfoOutputBuf(pRuntimeEnv);
H
Haojun Liao 已提交
5327
    resetResultRowInfo(pRuntimeEnv, &pRuntimeEnv->windowResInfo);
5328

H
Haojun Liao 已提交
5329
    SArray *group = GET_TABLEGROUP(pQInfo, 0);
5330 5331
    assert(taosArrayGetSize(group) == pQInfo->tableqinfoGroupInfo.numOfTables &&
           1 == taosArrayGetSize(pQInfo->tableqinfoGroupInfo.pGroupList));
5332

5333
    while (pQInfo->tableIndex < pQInfo->tableqinfoGroupInfo.numOfTables) {
5334
      if (isQueryKilled(pQInfo)) {
H
Haojun Liao 已提交
5335
        longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
5336
      }
5337

5338
      pQuery->current = taosArrayGetP(group, pQInfo->tableIndex);
5339
      if (!multiTableMultioutputHelper(pQInfo, pQInfo->tableIndex)) {
5340
        pQInfo->tableIndex++;
5341 5342
        continue;
      }
5343

H
hjxilinx 已提交
5344
      // TODO handle the limit offset problem
5345
      if (pQuery->numOfFilterCols == 0 && pQuery->limit.offset > 0) {
5346 5347
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
          pQInfo->tableIndex++;
5348 5349 5350
          continue;
        }
      }
5351

5352
      scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
5353
      skipResults(pRuntimeEnv);
5354

5355
      // the limitation of output result is reached, set the query completed
5356
      if (limitResults(pRuntimeEnv)) {
H
Haojun Liao 已提交
5357
        SET_STABLE_QUERY_OVER(pQInfo);
5358 5359
        break;
      }
5360

5361 5362
      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
5363

5364
      if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
5365 5366 5367 5368 5369 5370
        /*
         * query range is identical in terms of all meters involved in query,
         * so we need to restore them at the *beginning* of query on each meter,
         * not the consecutive query on meter on which is aborted due to buffer limitation
         * to ensure that, we can reset the query range once query on a meter is completed.
         */
5371
        pQInfo->tableIndex++;
5372
        updateTableIdInfo(pQuery, pQInfo->arrTableIdInfo);
weixin_48148422's avatar
weixin_48148422 已提交
5373

5374
        // if the buffer is full or group by each table, we need to jump out of the loop
H
Haojun Liao 已提交
5375
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
5376 5377
          break;
        }
5378

H
Haojun Liao 已提交
5379 5380
        if (pRuntimeEnv->pTsBuf != NULL) {
          pRuntimeEnv->cur = pRuntimeEnv->pTsBuf->cur;
H
Haojun Liao 已提交
5381 5382
        }

5383
      } else {
5384
        // all data in the result buffer are skipped due to the offset, continue to retrieve data from current meter
5385 5386
        if (pQuery->rec.rows == 0) {
          assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
5387 5388
          continue;
        } else {
5389 5390 5391
          // buffer is full, wait for the next round to retrieve data from current meter
          assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
          break;
5392 5393 5394
        }
      }
    }
H
Haojun Liao 已提交
5395

5396
    if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
H
Haojun Liao 已提交
5397 5398
      setQueryStatus(pQuery, QUERY_COMPLETED);
    }
5399

5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413
    /*
     * 1. super table projection query, group-by on normal columns query, ts-comp query
     * 2. point interpolation query, last row query
     *
     * group-by on normal columns query and last_row query do NOT invoke the finalizer here,
     * since the finalize stage will be done at the client side.
     *
     * projection query, point interpolation query do not need the finalizer.
     *
     * Only the ts-comp query requires the finalizer function to be executed here.
     */
    if (isTSCompQuery(pQuery)) {
      finalizeQueryResult(pRuntimeEnv);
    }
5414

H
Haojun Liao 已提交
5415 5416
    if (pRuntimeEnv->pTsBuf != NULL) {
      pRuntimeEnv->cur = pRuntimeEnv->pTsBuf->cur;
5417
    }
5418

5419 5420 5421 5422 5423
    qDebug("QInfo %p numOfTables:%" PRIu64 ", index:%d, numOfGroups:%" PRIzu ", %" PRId64
           " points returned, total:%" PRId64 ", offset:%" PRId64,
           pQInfo, (uint64_t)pQInfo->tableqinfoGroupInfo.numOfTables, pQInfo->tableIndex, numOfGroups, pQuery->rec.rows,
           pQuery->rec.total, pQuery->limit.offset);
  }
5424 5425
}

H
Haojun Liao 已提交
5426
static int32_t doSaveContext(SQInfo *pQInfo) {
5427 5428 5429
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

5430 5431 5432
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
  SWITCH_ORDER(pQuery->order.order);
5433

H
Haojun Liao 已提交
5434 5435
  if (pRuntimeEnv->pTsBuf != NULL) {
    SWITCH_ORDER(pRuntimeEnv->pTsBuf->cur.order);
5436
  }
5437

H
Haojun Liao 已提交
5438
  STsdbQueryCond cond = createTsdbQueryCond(pQuery, &pQuery->window);
S
TD-1057  
Shengliang Guan 已提交
5439

5440 5441 5442 5443
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
H
Haojun Liao 已提交
5444

H
Haojun Liao 已提交
5445 5446 5447 5448 5449
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
  disableFuncInReverseScan(pQInfo);
  setupQueryRangeForReverseScan(pQInfo);

H
Haojun Liao 已提交
5450
  pRuntimeEnv->prevGroupId = INT32_MIN;
H
Haojun Liao 已提交
5451
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef);
H
Haojun Liao 已提交
5452
  return (pRuntimeEnv->pSecQueryHandle == NULL)? -1:0;
H
hjxilinx 已提交
5453 5454
}

5455 5456 5457 5458
static void doRestoreContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

H
hjxilinx 已提交
5459
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
5460
  SWITCH_ORDER(pQuery->order.order);
5461

H
Haojun Liao 已提交
5462 5463
  if (pRuntimeEnv->pTsBuf != NULL) {
    SWITCH_ORDER(pRuntimeEnv->pTsBuf->cur.order);
5464
  }
5465

5466
  switchCtxOrder(pRuntimeEnv);
5467 5468 5469
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
}

5470 5471 5472
static void doCloseAllTimeWindowAfterScan(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
Haojun Liao 已提交
5473
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
H
Haojun Liao 已提交
5474
    size_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
5475
    for (int32_t i = 0; i < numOfGroup; ++i) {
H
Haojun Liao 已提交
5476
      SArray *group = GET_TABLEGROUP(pQInfo, i);
5477

5478
      size_t num = taosArrayGetSize(group);
5479
      for (int32_t j = 0; j < num; ++j) {
5480
        STableQueryInfo* item = taosArrayGetP(group, j);
H
Haojun Liao 已提交
5481
        closeAllResultRows(&item->windowResInfo);
5482
      }
H
hjxilinx 已提交
5483 5484
    }
  } else {  // close results for group result
H
Haojun Liao 已提交
5485
    closeAllResultRows(&pQInfo->runtimeEnv.windowResInfo);
H
hjxilinx 已提交
5486 5487 5488 5489
  }
}

static void multiTableQueryProcess(SQInfo *pQInfo) {
5490
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
5491
  SQuery           *pQuery = pRuntimeEnv->pQuery;
5492

5493
  if (pQInfo->groupIndex > 0) {
5494
    /*
5495
     * if the groupIndex > 0, the query process must be completed yet, we only need to
5496 5497
     * copy the data into output buffer
     */
H
Haojun Liao 已提交
5498
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
5499 5500
      copyResToQueryResultBuf(pQInfo, pQuery);
    } else {
5501
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5502
    }
5503

5504
    qDebug("QInfo:%p current:%"PRId64", total:%"PRId64"", pQInfo, pQuery->rec.rows, pQuery->rec.total);
5505 5506
    return;
  }
5507

5508
  qDebug("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", order:%d, forward scan start", pQInfo,
5509 5510
         pQuery->window.skey, pQuery->window.ekey, pQuery->order.order);

H
hjxilinx 已提交
5511
  // do check all qualified data blocks
H
Haojun Liao 已提交
5512
  int64_t el = scanMultiTableDataBlocks(pQInfo);
5513
  qDebug("QInfo:%p master scan completed, elapsed time: %" PRId64 "ms, reverse scan start", pQInfo, el);
5514

H
hjxilinx 已提交
5515
  // query error occurred or query is killed, abort current execution
5516
  if (pQInfo->code != TSDB_CODE_SUCCESS || isQueryKilled(pQInfo)) {
5517
    qDebug("QInfo:%p query killed or error occurred, code:%s, abort", pQInfo, tstrerror(pQInfo->code));
H
Haojun Liao 已提交
5518
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
5519
  }
5520

H
hjxilinx 已提交
5521 5522
  // close all time window results
  doCloseAllTimeWindowAfterScan(pQInfo);
5523

H
hjxilinx 已提交
5524
  if (needReverseScan(pQuery)) {
H
Haojun Liao 已提交
5525 5526 5527 5528 5529 5530 5531 5532
    int32_t code = doSaveContext(pQInfo);
    if (code == TSDB_CODE_SUCCESS) {
      el = scanMultiTableDataBlocks(pQInfo);
      qDebug("QInfo:%p reversed scan completed, elapsed time: %" PRId64 "ms", pQInfo, el);
      doRestoreContext(pQInfo);
    } else {
      pQInfo->code = code;
    }
H
hjxilinx 已提交
5533
  } else {
5534
    qDebug("QInfo:%p no need to do reversed scan, query completed", pQInfo);
5535
  }
5536

5537
  setQueryStatus(pQuery, QUERY_COMPLETED);
5538

5539
  if (pQInfo->code != TSDB_CODE_SUCCESS || isQueryKilled(pQInfo)) {
5540
    qDebug("QInfo:%p query killed or error occurred, code:%s, abort", pQInfo, tstrerror(pQInfo->code));
H
Haojun Liao 已提交
5541
    //TODO finalizeQueryResult may cause SEGSEV, since the memory may not allocated yet, add a cleanup function instead
H
Haojun Liao 已提交
5542
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
H
hjxilinx 已提交
5543
  }
5544

H
Haojun Liao 已提交
5545
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || isSumAvgRateQuery(pQuery)) {
5546 5547
    int32_t code = mergeGroupResult(pQInfo);
    if (code == TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
5548
      copyResToQueryResultBuf(pQInfo, pQuery);
5549 5550

#ifdef _DEBUG_VIEW
5551
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
5552
#endif
5553 5554
    } else {  // set the error code
      pQInfo->code = code;
5555 5556
    }
  } else {  // not a interval query
5557
    copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5558
  }
5559

5560
  // handle the limitation of output buffer
5561
  qDebug("QInfo:%p points returned:%" PRId64 ", total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
5562 5563
}

H
Haojun Liao 已提交
5564 5565 5566 5567 5568 5569 5570 5571 5572 5573 5574 5575 5576 5577 5578 5579 5580
static char *getArithemicInputSrc(void *param, const char *name, int32_t colId) {
  SArithmeticSupport *pSupport = (SArithmeticSupport *) param;
  SExprInfo* pExprInfo = (SExprInfo*) pSupport->exprList;

  int32_t index = -1;
  for (int32_t i = 0; i < pSupport->numOfCols; ++i) {
    if (colId == pExprInfo[i].base.resColId) {
      index = i;
      break;
    }
  }

  assert(index >= 0 && index < pSupport->numOfCols);
  return pSupport->data[index] + pSupport->offset * pExprInfo[index].bytes;
}

static void doSecondaryArithmeticProcess(SQuery* pQuery) {
H
Haojun Liao 已提交
5581 5582 5583
  if (pQuery->numOfExpr2 == 0) {
    return;
  }
H
Haojun Liao 已提交
5584

H
Haojun Liao 已提交
5585
  SArithmeticSupport arithSup = {0};
H
Haojun Liao 已提交
5586 5587 5588
  tFilePage **data = calloc(pQuery->numOfExpr2, POINTER_BYTES);
  for (int32_t i = 0; i < pQuery->numOfExpr2; ++i) {
    int32_t bytes = pQuery->pExpr2[i].bytes;
P
plum-lihui 已提交
5589
    data[i] = (tFilePage *)malloc((size_t)(bytes * pQuery->rec.rows) + sizeof(tFilePage));
H
Haojun Liao 已提交
5590 5591
  }

H
Haojun Liao 已提交
5592 5593 5594 5595
  arithSup.offset = 0;
  arithSup.numOfCols = (int32_t)pQuery->numOfOutput;
  arithSup.exprList  = pQuery->pExpr1;
  arithSup.data      = calloc(arithSup.numOfCols, POINTER_BYTES);
H
Haojun Liao 已提交
5596

H
Haojun Liao 已提交
5597 5598
  for (int32_t k = 0; k < arithSup.numOfCols; ++k) {
    arithSup.data[k] = pQuery->sdata[k]->data;
H
Haojun Liao 已提交
5599 5600 5601 5602 5603 5604 5605 5606 5607 5608
  }

  for (int i = 0; i < pQuery->numOfExpr2; ++i) {
    SExprInfo *pExpr = &pQuery->pExpr2[i];

    // calculate the result from several other columns
    SSqlFuncMsg* pSqlFunc = &pExpr->base;
    if (pSqlFunc->functionId != TSDB_FUNC_ARITHM) {

      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
5609 5610
        if (pSqlFunc->functionId == pQuery->pExpr1[j].base.functionId &&
            pSqlFunc->colInfo.colId == pQuery->pExpr1[j].base.colInfo.colId) {
P
plum-lihui 已提交
5611
          memcpy(data[i]->data, pQuery->sdata[j]->data, (size_t)(pQuery->pExpr1[j].bytes * pQuery->rec.rows));
H
Haojun Liao 已提交
5612 5613 5614 5615
          break;
        }
      }
    } else {
H
Haojun Liao 已提交
5616
      arithSup.pArithExpr = pExpr;
H
Haojun Liao 已提交
5617
      arithmeticTreeTraverse(arithSup.pArithExpr->pExpr, (int32_t)pQuery->rec.rows, data[i]->data, &arithSup, TSDB_ORDER_ASC,
H
Haojun Liao 已提交
5618 5619 5620 5621 5622
                            getArithemicInputSrc);
    }
  }

  for (int32_t i = 0; i < pQuery->numOfExpr2; ++i) {
P
plum-lihui 已提交
5623
    memcpy(pQuery->sdata[i]->data, data[i]->data, (size_t)(pQuery->pExpr2[i].bytes * pQuery->rec.rows));
H
Haojun Liao 已提交
5624 5625
  }

H
Haojun Liao 已提交
5626 5627 5628 5629 5630 5631
  for (int32_t i = 0; i < pQuery->numOfExpr2; ++i) {
    tfree(data[i]);
  }

  tfree(data);
  tfree(arithSup.data);
H
Haojun Liao 已提交
5632 5633
}

5634 5635 5636 5637 5638 5639
/*
 * in each query, this function will be called only once, no retry for further result.
 *
 * select count(*)/top(field,k)/avg(field name) from table_name [where ts>now-1a];
 * select count(*) from table_name group by status_column;
 */
H
Haojun Liao 已提交
5640
static void tableAggregationProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
5641
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
5642

H
hjxilinx 已提交
5643
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
5644
  if (!pRuntimeEnv->topBotQuery && pQuery->limit.offset > 0) {  // no need to execute, since the output will be ignore.
H
Haojun Liao 已提交
5645 5646
    return;
  }
5647

5648
  scanOneTableDataBlocks(pRuntimeEnv, pTableInfo->lastKey);
H
hjxilinx 已提交
5649
  finalizeQueryResult(pRuntimeEnv);
5650

H
Haojun Liao 已提交
5651 5652 5653 5654
  // since the numOfRows must be identical for all sql functions that are allowed to be executed simutaneously.
  pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
  doSecondaryArithmeticProcess(pQuery);

5655
  if (isQueryKilled(pQInfo)) {
H
Haojun Liao 已提交
5656
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
5657
  }
5658

5659
  skipResults(pRuntimeEnv);
5660
  limitResults(pRuntimeEnv);
5661 5662
}

H
Haojun Liao 已提交
5663
static void tableProjectionProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
5664
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
5665

5666
  // for ts_comp query, re-initialized is not allowed
H
Haojun Liao 已提交
5667
  SQuery *pQuery = pRuntimeEnv->pQuery;
5668
  if (!isTSCompQuery(pQuery)) {
H
Haojun Liao 已提交
5669
    resetDefaultResInfoOutputBuf(pRuntimeEnv);
5670
  }
5671

5672 5673 5674 5675 5676 5677
  // skip blocks without load the actual data block from file if no filter condition present
  skipBlocks(&pQInfo->runtimeEnv);
  if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return;
  }
5678 5679

  while (1) {
5680
    scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
H
hjxilinx 已提交
5681
    finalizeQueryResult(pRuntimeEnv);
5682

5683 5684
    pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
    if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols > 0 && pQuery->rec.rows > 0) {
5685
      skipResults(pRuntimeEnv);
5686 5687 5688
    }

    /*
H
hjxilinx 已提交
5689 5690
     * 1. if pQuery->size == 0, pQuery->limit.offset >= 0, still need to check data
     * 2. if pQuery->size > 0, pQuery->limit.offset must be 0
5691
     */
5692
    if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
5693 5694 5695
      break;
    }

5696
    qDebug("QInfo:%p skip current result, offset:%" PRId64 ", next qrange:%" PRId64 "-%" PRId64,
B
Bomin Zhang 已提交
5697
           pQInfo, pQuery->limit.offset, pQuery->current->lastKey, pQuery->current->win.ekey);
5698

H
Haojun Liao 已提交
5699
    resetDefaultResInfoOutputBuf(pRuntimeEnv);
5700 5701
  }

5702
  limitResults(pRuntimeEnv);
5703
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
5704
    qDebug("QInfo:%p query paused due to output limitation, next qrange:%" PRId64 "-%" PRId64, pQInfo,
H
hjxilinx 已提交
5705
        pQuery->current->lastKey, pQuery->window.ekey);
weixin_48148422's avatar
weixin_48148422 已提交
5706
  } else if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
5707 5708
    STableIdInfo tidInfo = createTableIdInfo(pQuery);
    taosHashPut(pQInfo->arrTableIdInfo, &tidInfo.tid, sizeof(tidInfo.tid), &tidInfo, sizeof(STableIdInfo));
5709 5710
  }

5711 5712 5713
  if (!isTSCompQuery(pQuery)) {
    assert(pQuery->rec.rows <= pQuery->rec.capacity);
  }
5714 5715
}

5716
// handle time interval query on table
H
hjxilinx 已提交
5717
static void tableIntervalProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
5718
  SQueryRuntimeEnv *pRuntimeEnv = &(pQInfo->runtimeEnv);
H
hjxilinx 已提交
5719
  SQuery *pQuery = pRuntimeEnv->pQuery;
5720

5721
  TSKEY newStartKey = QUERY_IS_ASC_QUERY(pQuery)? INT64_MIN:INT64_MAX;
5722

5723
  // skip blocks without load the actual data block from file if no filter condition present
H
Haojun Liao 已提交
5724
  if (!pRuntimeEnv->groupbyColumn) {
H
Haojun Liao 已提交
5725 5726 5727 5728 5729
    skipTimeInterval(pRuntimeEnv, &newStartKey);
    if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0 && pRuntimeEnv->pFillInfo == NULL) {
      setQueryStatus(pQuery, QUERY_COMPLETED);
      return;
    }
5730 5731
  }

5732 5733
  scanOneTableDataBlocks(pRuntimeEnv, newStartKey);
  assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_NOT_COMPLETED));
5734

5735
  finalizeQueryResult(pRuntimeEnv);
5736

5737 5738
  // skip offset result rows
  pQuery->rec.rows = 0;
5739

5740 5741
  // not fill or no result generated during this query
  if (pQuery->fillType == TSDB_FILL_NONE || pRuntimeEnv->windowResInfo.size == 0) {
5742 5743 5744 5745
    // all data scanned, the group by normal column can return
    int32_t numOfClosed = numOfClosedResultRows(&pRuntimeEnv->windowResInfo);
    if (pQuery->limit.offset > numOfClosed) {
      return;
H
Haojun Liao 已提交
5746 5747
    }

H
Haojun Liao 已提交
5748
    pQInfo->groupIndex = (int32_t) pQuery->limit.offset;
5749

5750 5751
    copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
    doSecondaryArithmeticProcess(pQuery);
5752

5753 5754
    limitResults(pRuntimeEnv);
  } else {
5755

5756 5757
    copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
    doSecondaryArithmeticProcess(pQuery);
5758

5759 5760
    taosFillSetStartInfo(pRuntimeEnv->pFillInfo, (int32_t)pQuery->rec.rows, pQuery->window.ekey);
    taosFillCopyInputDataFromFilePage(pRuntimeEnv->pFillInfo, (const tFilePage **)pQuery->sdata);
H
Haojun Liao 已提交
5761

5762 5763
    int32_t numOfFilled = 0;
    pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, &numOfFilled);
H
Haojun Liao 已提交
5764

5765
    if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
H
Haojun Liao 已提交
5766 5767
      limitResults(pRuntimeEnv);
    }
5768 5769 5770
  }
}

5771 5772 5773 5774
static void tableQueryImpl(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

5775
  if (hasNotReturnedResults(pRuntimeEnv)) {
H
Haojun Liao 已提交
5776 5777 5778 5779 5780 5781 5782 5783 5784 5785 5786 5787
    if (pQuery->fillType != TSDB_FILL_NONE) {
      /*
       * There are remain results that are not returned due to result interpolation
       * So, we do keep in this procedure instead of launching retrieve procedure for next results.
       */
      int32_t numOfFilled = 0;
      pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, &numOfFilled);

      if (pQuery->rec.rows > 0) {
        limitResults(pRuntimeEnv);
      }

H
Haojun Liao 已提交
5788
      qDebug("QInfo:%p current:%" PRId64 " returned, total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total);
H
Haojun Liao 已提交
5789
    } else {
5790
      pQuery->rec.rows = 0;
5791
      assert(pRuntimeEnv->windowResInfo.size > 0);
5792

5793
      if (pQInfo->groupIndex < pRuntimeEnv->windowResInfo.size) {
5794
        copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5795
      }
H
Haojun Liao 已提交
5796

5797 5798 5799 5800
      if (pQuery->rec.rows > 0) {
        qDebug("QInfo:%p %" PRId64 " rows returned from group results, total:%" PRId64 "", pQInfo, pQuery->rec.rows,
               pQuery->rec.total);
      }
H
Haojun Liao 已提交
5801

5802 5803 5804
      // there are not data remains
      if (pQuery->rec.rows <= 0 || pRuntimeEnv->windowResInfo.size <= pQInfo->groupIndex) {
        qDebug("QInfo:%p query over, %" PRId64 " rows are returned", pQInfo, pQuery->rec.total);
5805 5806
      }
    }
H
Haojun Liao 已提交
5807 5808

    return;
5809
  }
5810

H
hjxilinx 已提交
5811
  // number of points returned during this query
5812
  pQuery->rec.rows = 0;
5813
  int64_t st = taosGetTimestampUs();
5814

5815
  assert(pQInfo->tableqinfoGroupInfo.numOfTables == 1);
H
Haojun Liao 已提交
5816
  SArray* g = GET_TABLEGROUP(pQInfo, 0);
H
Haojun Liao 已提交
5817

5818
  STableQueryInfo* item = taosArrayGetP(g, 0);
H
Haojun Liao 已提交
5819
  pQuery->current = item;
5820

5821
  // group by normal column, sliding window query, interval query are handled by interval query processor
H
Haojun Liao 已提交
5822
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyColumn) {  // interval (down sampling operation)
5823
    tableIntervalProcess(pQInfo, item);
H
Haojun Liao 已提交
5824
  } else if (isFixedOutputQuery(pRuntimeEnv)) {
H
Haojun Liao 已提交
5825
    tableAggregationProcess(pQInfo, item);
5826
  } else {  // diff/add/multiply/subtract/division
H
Haojun Liao 已提交
5827
    assert(pQuery->checkResultBuf == 1);
H
Haojun Liao 已提交
5828
    tableProjectionProcess(pQInfo, item);
5829
  }
5830

5831
  // record the total elapsed time
5832
  pRuntimeEnv->summary.elapsedTime += (taosGetTimestampUs() - st);
5833
  assert(pQInfo->tableqinfoGroupInfo.numOfTables == 1);
5834 5835
}

5836
static void stableQueryImpl(SQInfo *pQInfo) {
H
Haojun Liao 已提交
5837 5838
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *pQuery = pRuntimeEnv->pQuery;
5839
  pQuery->rec.rows = 0;
5840

5841
  int64_t st = taosGetTimestampUs();
5842

H
Haojun Liao 已提交
5843
  if (QUERY_IS_INTERVAL_QUERY(pQuery) ||
H
Haojun Liao 已提交
5844
      (isFixedOutputQuery(pRuntimeEnv) && (!isPointInterpoQuery(pQuery)) && (!pRuntimeEnv->groupbyColumn))) {
H
hjxilinx 已提交
5845
    multiTableQueryProcess(pQInfo);
5846
  } else {
H
Haojun Liao 已提交
5847
    assert((pQuery->checkResultBuf == 1 && pQuery->interval.interval == 0) || isPointInterpoQuery(pQuery) ||
H
Haojun Liao 已提交
5848
           pRuntimeEnv->groupbyColumn);
5849

5850
    sequentialTableProcess(pQInfo);
5851
  }
5852

H
hjxilinx 已提交
5853
  // record the total elapsed time
5854
  pQInfo->runtimeEnv.summary.elapsedTime += (taosGetTimestampUs() - st);
H
hjxilinx 已提交
5855 5856
}

5857
static int32_t getColumnIndexInSource(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
5858
  int32_t j = 0;
5859

5860
  if (TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) {
H
Haojun Liao 已提交
5861
    if (pExprMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
5862
      return TSDB_TBNAME_COLUMN_INDEX;
H
Haojun Liao 已提交
5863 5864
    }

5865 5866 5867 5868
    while(j < pQueryMsg->numOfTags) {
      if (pExprMsg->colInfo.colId == pTagCols[j].colId) {
        return j;
      }
5869

5870 5871
      j += 1;
    }
5872

Y
TD-1230  
yihaoDeng 已提交
5873
  } else if (TSDB_COL_IS_UD_COL(pExprMsg->colInfo.flag)) {  // user specified column data
H
Haojun Liao 已提交
5874
    return TSDB_UD_COLUMN_INDEX;
5875 5876 5877 5878 5879
  } else {
    while (j < pQueryMsg->numOfCols) {
      if (pExprMsg->colInfo.colId == pQueryMsg->colList[j].colId) {
        return j;
      }
5880

5881
      j += 1;
5882 5883
    }
  }
5884 5885

  return INT32_MIN;  // return a less than TSDB_TBNAME_COLUMN_INDEX value
5886 5887
}

5888 5889
bool validateExprColumnInfo(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
  int32_t j = getColumnIndexInSource(pQueryMsg, pExprMsg, pTagCols);
5890
  return j != INT32_MIN;
5891 5892
}

5893
static bool validateQueryMsg(SQueryTableMsg *pQueryMsg) {
5894 5895
  if (pQueryMsg->interval.interval < 0) {
    qError("qmsg:%p illegal value of interval time %" PRId64, pQueryMsg, pQueryMsg->interval.interval);
5896
    return false;
5897 5898
  }

H
hjxilinx 已提交
5899
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
5900
    qError("qmsg:%p illegal value of numOfTables %d", pQueryMsg, pQueryMsg->numOfTables);
5901
    return false;
5902 5903
  }

H
hjxilinx 已提交
5904
  if (pQueryMsg->numOfGroupCols < 0) {
S
slguan 已提交
5905
    qError("qmsg:%p illegal value of numOfGroupbyCols %d", pQueryMsg, pQueryMsg->numOfGroupCols);
5906
    return false;
5907 5908
  }

5909 5910
  if (pQueryMsg->numOfOutput > TSDB_MAX_COLUMNS || pQueryMsg->numOfOutput <= 0) {
    qError("qmsg:%p illegal value of output columns %d", pQueryMsg, pQueryMsg->numOfOutput);
5911
    return false;
5912 5913
  }

5914 5915 5916
  return true;
}

5917
static bool validateQuerySourceCols(SQueryTableMsg *pQueryMsg, SSqlFuncMsg** pExprMsg, SColumnInfo* pTagCols) {
5918 5919 5920 5921
  int32_t numOfTotal = pQueryMsg->numOfCols + pQueryMsg->numOfTags;
  if (pQueryMsg->numOfCols < 0 || pQueryMsg->numOfTags < 0 || numOfTotal > TSDB_MAX_COLUMNS) {
    qError("qmsg:%p illegal value of numOfCols %d numOfTags:%d", pQueryMsg, pQueryMsg->numOfCols, pQueryMsg->numOfTags);
    return false;
5922 5923 5924
  }

  if (numOfTotal == 0) {
5925
    for(int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
H
Haojun Liao 已提交
5926 5927 5928 5929 5930
      SSqlFuncMsg* pFuncMsg = pExprMsg[i];

      if ((pFuncMsg->functionId == TSDB_FUNC_TAGPRJ) ||
          (pFuncMsg->functionId == TSDB_FUNC_TID_TAG && pFuncMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) ||
          (pFuncMsg->functionId == TSDB_FUNC_COUNT && pFuncMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX)) {
5931
        continue;
5932
      }
5933

5934
      return false;
5935 5936
    }
  }
5937

5938 5939 5940 5941 5942 5943
  for(int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
    if (!validateExprColumnInfo(pQueryMsg, pExprMsg[i], pTagCols)) {
      return TSDB_CODE_QRY_INVALID_MSG;
    }
  }

5944
  return true;
5945 5946
}

5947
static char *createTableIdList(SQueryTableMsg *pQueryMsg, char *pMsg, SArray **pTableIdList) {
H
hjxilinx 已提交
5948
  assert(pQueryMsg->numOfTables > 0);
5949

weixin_48148422's avatar
weixin_48148422 已提交
5950
  *pTableIdList = taosArrayInit(pQueryMsg->numOfTables, sizeof(STableIdInfo));
5951

weixin_48148422's avatar
weixin_48148422 已提交
5952 5953
  for (int32_t j = 0; j < pQueryMsg->numOfTables; ++j) {
    STableIdInfo* pTableIdInfo = (STableIdInfo *)pMsg;
5954

5955
    pTableIdInfo->tid = htonl(pTableIdInfo->tid);
H
hjxilinx 已提交
5956 5957
    pTableIdInfo->uid = htobe64(pTableIdInfo->uid);
    pTableIdInfo->key = htobe64(pTableIdInfo->key);
5958

H
hjxilinx 已提交
5959 5960 5961
    taosArrayPush(*pTableIdList, pTableIdInfo);
    pMsg += sizeof(STableIdInfo);
  }
5962

H
hjxilinx 已提交
5963 5964
  return pMsg;
}
5965

5966
/**
H
hjxilinx 已提交
5967
 * pQueryMsg->head has been converted before this function is called.
5968
 *
H
hjxilinx 已提交
5969
 * @param pQueryMsg
5970 5971 5972 5973
 * @param pTableIdList
 * @param pExpr
 * @return
 */
H
Haojun Liao 已提交
5974
static int32_t convertQueryMsg(SQueryTableMsg *pQueryMsg, SArray **pTableIdList, SSqlFuncMsg ***pExpr, SSqlFuncMsg ***pSecStageExpr,
5975
                               char **tagCond, char** tbnameCond, SColIndex **groupbyCols, SColumnInfo** tagCols, char** sql) {
5976 5977
  int32_t code = TSDB_CODE_SUCCESS;

5978 5979 5980 5981
  if (taosCheckVersion(pQueryMsg->version, version, 3) != 0) {
    return TSDB_CODE_QRY_INVALID_MSG;
  }

5982 5983 5984 5985
  pQueryMsg->numOfTables = htonl(pQueryMsg->numOfTables);

  pQueryMsg->window.skey = htobe64(pQueryMsg->window.skey);
  pQueryMsg->window.ekey = htobe64(pQueryMsg->window.ekey);
5986 5987 5988
  pQueryMsg->interval.interval = htobe64(pQueryMsg->interval.interval);
  pQueryMsg->interval.sliding = htobe64(pQueryMsg->interval.sliding);
  pQueryMsg->interval.offset = htobe64(pQueryMsg->interval.offset);
5989 5990
  pQueryMsg->limit = htobe64(pQueryMsg->limit);
  pQueryMsg->offset = htobe64(pQueryMsg->offset);
H
Haojun Liao 已提交
5991
  pQueryMsg->vgroupLimit = htobe64(pQueryMsg->vgroupLimit);
H
hjxilinx 已提交
5992

5993 5994
  pQueryMsg->order = htons(pQueryMsg->order);
  pQueryMsg->orderColId = htons(pQueryMsg->orderColId);
H
Haojun Liao 已提交
5995
  pQueryMsg->queryType = htonl(pQueryMsg->queryType);
weixin_48148422's avatar
weixin_48148422 已提交
5996
  pQueryMsg->tagNameRelType = htons(pQueryMsg->tagNameRelType);
5997 5998

  pQueryMsg->numOfCols = htons(pQueryMsg->numOfCols);
5999
  pQueryMsg->numOfOutput = htons(pQueryMsg->numOfOutput);
H
hjxilinx 已提交
6000
  pQueryMsg->numOfGroupCols = htons(pQueryMsg->numOfGroupCols);
6001 6002 6003
  pQueryMsg->tagCondLen = htons(pQueryMsg->tagCondLen);
  pQueryMsg->tsOffset = htonl(pQueryMsg->tsOffset);
  pQueryMsg->tsLen = htonl(pQueryMsg->tsLen);
H
hjxilinx 已提交
6004
  pQueryMsg->tsNumOfBlocks = htonl(pQueryMsg->tsNumOfBlocks);
6005
  pQueryMsg->tsOrder = htonl(pQueryMsg->tsOrder);
6006
  pQueryMsg->numOfTags = htonl(pQueryMsg->numOfTags);
H
Haojun Liao 已提交
6007
  pQueryMsg->tbnameCondLen = htonl(pQueryMsg->tbnameCondLen);
H
Haojun Liao 已提交
6008
  pQueryMsg->secondStageOutput = htonl(pQueryMsg->secondStageOutput);
6009
  pQueryMsg->sqlstrLen = htonl(pQueryMsg->sqlstrLen);
6010

6011
  // query msg safety check
6012
  if (!validateQueryMsg(pQueryMsg)) {
6013 6014
    code = TSDB_CODE_QRY_INVALID_MSG;
    goto _cleanup;
6015 6016
  }

H
hjxilinx 已提交
6017 6018
  char *pMsg = (char *)(pQueryMsg->colList) + sizeof(SColumnInfo) * pQueryMsg->numOfCols;
  for (int32_t col = 0; col < pQueryMsg->numOfCols; ++col) {
6019 6020
    SColumnInfo *pColInfo = &pQueryMsg->colList[col];

H
hjxilinx 已提交
6021
    pColInfo->colId = htons(pColInfo->colId);
6022
    pColInfo->type = htons(pColInfo->type);
H
hjxilinx 已提交
6023 6024
    pColInfo->bytes = htons(pColInfo->bytes);
    pColInfo->numOfFilters = htons(pColInfo->numOfFilters);
6025

6026 6027 6028 6029 6030
    if (!isValidDataType(pColInfo->type)) {
      qDebug("qmsg:%p, invalid data type in source column, index:%d, type:%d", pQueryMsg, col, pColInfo->type);
      code = TSDB_CODE_QRY_INVALID_MSG;
      goto _cleanup;
    }
6031

H
hjxilinx 已提交
6032
    int32_t numOfFilters = pColInfo->numOfFilters;
6033
    if (numOfFilters > 0) {
H
hjxilinx 已提交
6034
      pColInfo->filters = calloc(numOfFilters, sizeof(SColumnFilterInfo));
H
Haojun Liao 已提交
6035 6036 6037 6038
      if (pColInfo->filters == NULL) {
        code = TSDB_CODE_QRY_OUT_OF_MEMORY;
        goto _cleanup;
      }
6039 6040 6041
    }

    for (int32_t f = 0; f < numOfFilters; ++f) {
6042
      SColumnFilterInfo *pFilterMsg = (SColumnFilterInfo *)pMsg;
6043

6044 6045
      SColumnFilterInfo *pColFilter = &pColInfo->filters[f];
      pColFilter->filterstr = htons(pFilterMsg->filterstr);
6046 6047 6048

      pMsg += sizeof(SColumnFilterInfo);

6049 6050
      if (pColFilter->filterstr) {
        pColFilter->len = htobe64(pFilterMsg->len);
6051

6052
        pColFilter->pz = (int64_t)calloc(1, (size_t)(pColFilter->len + 1 * TSDB_NCHAR_SIZE)); // note: null-terminator
H
Haojun Liao 已提交
6053 6054 6055 6056 6057
        if (pColFilter->pz == 0) {
          code = TSDB_CODE_QRY_OUT_OF_MEMORY;
          goto _cleanup;
        }

6058
        memcpy((void *)pColFilter->pz, pMsg, (size_t)pColFilter->len);
6059
        pMsg += (pColFilter->len + 1);
6060
      } else {
6061 6062
        pColFilter->lowerBndi = htobe64(pFilterMsg->lowerBndi);
        pColFilter->upperBndi = htobe64(pFilterMsg->upperBndi);
6063 6064
      }

6065 6066
      pColFilter->lowerRelOptr = htons(pFilterMsg->lowerRelOptr);
      pColFilter->upperRelOptr = htons(pFilterMsg->upperRelOptr);
6067 6068 6069
    }
  }

6070
  *pExpr = calloc(pQueryMsg->numOfOutput, POINTER_BYTES);
H
Haojun Liao 已提交
6071 6072 6073 6074 6075
  if (*pExpr == NULL) {
    code = TSDB_CODE_QRY_OUT_OF_MEMORY;
    goto _cleanup;
  }

6076
  SSqlFuncMsg *pExprMsg = (SSqlFuncMsg *)pMsg;
6077

6078
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
6079
    (*pExpr)[i] = pExprMsg;
6080

6081
    pExprMsg->colInfo.colIndex = htons(pExprMsg->colInfo.colIndex);
6082
    pExprMsg->colInfo.colId = htons(pExprMsg->colInfo.colId);
H
Haojun Liao 已提交
6083 6084 6085 6086
    pExprMsg->colInfo.flag  = htons(pExprMsg->colInfo.flag);
    pExprMsg->functionId    = htons(pExprMsg->functionId);
    pExprMsg->numOfParams   = htons(pExprMsg->numOfParams);
    pExprMsg->resColId      = htons(pExprMsg->resColId);
6087

6088
    pMsg += sizeof(SSqlFuncMsg);
6089 6090

    for (int32_t j = 0; j < pExprMsg->numOfParams; ++j) {
6091
      pExprMsg->arg[j].argType = htons(pExprMsg->arg[j].argType);
6092 6093 6094 6095
      pExprMsg->arg[j].argBytes = htons(pExprMsg->arg[j].argBytes);

      if (pExprMsg->arg[j].argType == TSDB_DATA_TYPE_BINARY) {
        pExprMsg->arg[j].argValue.pz = pMsg;
6096
        pMsg += pExprMsg->arg[j].argBytes;  // one more for the string terminated char.
6097 6098 6099 6100 6101
      } else {
        pExprMsg->arg[j].argValue.i64 = htobe64(pExprMsg->arg[j].argValue.i64);
      }
    }

H
Haojun Liao 已提交
6102 6103
    int16_t functionId = pExprMsg->functionId;
    if (functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ || functionId == TSDB_FUNC_TAG_DUMMY) {
Y
TD-1230  
yihaoDeng 已提交
6104
      if (!TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) {  // ignore the column  index check for arithmetic expression.
6105 6106
        code = TSDB_CODE_QRY_INVALID_MSG;
        goto _cleanup;
6107 6108 6109
      }
    }

6110
    pExprMsg = (SSqlFuncMsg *)pMsg;
6111
  }
6112

H
Haojun Liao 已提交
6113 6114 6115
  if (pQueryMsg->secondStageOutput) {
    pExprMsg = (SSqlFuncMsg *)pMsg;
    *pSecStageExpr = calloc(pQueryMsg->secondStageOutput, POINTER_BYTES);
6116

H
Haojun Liao 已提交
6117 6118 6119 6120 6121 6122 6123 6124 6125 6126 6127 6128 6129 6130 6131 6132 6133 6134 6135 6136 6137 6138 6139 6140 6141 6142 6143 6144 6145 6146 6147 6148 6149 6150 6151
    for (int32_t i = 0; i < pQueryMsg->secondStageOutput; ++i) {
      (*pSecStageExpr)[i] = pExprMsg;

      pExprMsg->colInfo.colIndex = htons(pExprMsg->colInfo.colIndex);
      pExprMsg->colInfo.colId = htons(pExprMsg->colInfo.colId);
      pExprMsg->colInfo.flag = htons(pExprMsg->colInfo.flag);
      pExprMsg->functionId = htons(pExprMsg->functionId);
      pExprMsg->numOfParams = htons(pExprMsg->numOfParams);

      pMsg += sizeof(SSqlFuncMsg);

      for (int32_t j = 0; j < pExprMsg->numOfParams; ++j) {
        pExprMsg->arg[j].argType = htons(pExprMsg->arg[j].argType);
        pExprMsg->arg[j].argBytes = htons(pExprMsg->arg[j].argBytes);

        if (pExprMsg->arg[j].argType == TSDB_DATA_TYPE_BINARY) {
          pExprMsg->arg[j].argValue.pz = pMsg;
          pMsg += pExprMsg->arg[j].argBytes;  // one more for the string terminated char.
        } else {
          pExprMsg->arg[j].argValue.i64 = htobe64(pExprMsg->arg[j].argValue.i64);
        }
      }

      int16_t functionId = pExprMsg->functionId;
      if (functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ || functionId == TSDB_FUNC_TAG_DUMMY) {
        if (!TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) {  // ignore the column  index check for arithmetic expression.
          code = TSDB_CODE_QRY_INVALID_MSG;
          goto _cleanup;
        }
      }

      pExprMsg = (SSqlFuncMsg *)pMsg;
    }
  }

H
hjxilinx 已提交
6152
  pMsg = createTableIdList(pQueryMsg, pMsg, pTableIdList);
6153

H
hjxilinx 已提交
6154
  if (pQueryMsg->numOfGroupCols > 0) {  // group by tag columns
6155
    *groupbyCols = malloc(pQueryMsg->numOfGroupCols * sizeof(SColIndex));
6156 6157 6158 6159
    if (*groupbyCols == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }
6160 6161

    for (int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
D
fix bug  
dapan1121 已提交
6162
      (*groupbyCols)[i].colId = htons(*(int16_t *)pMsg);
6163
      pMsg += sizeof((*groupbyCols)[i].colId);
6164

D
fix bug  
dapan1121 已提交
6165
      (*groupbyCols)[i].colIndex = htons(*(int16_t *)pMsg);
6166 6167
      pMsg += sizeof((*groupbyCols)[i].colIndex);

D
fix bug  
dapan1121 已提交
6168
      (*groupbyCols)[i].flag = htons(*(int16_t *)pMsg);
6169 6170 6171 6172 6173
      pMsg += sizeof((*groupbyCols)[i].flag);

      memcpy((*groupbyCols)[i].name, pMsg, tListLen(groupbyCols[i]->name));
      pMsg += tListLen((*groupbyCols)[i].name);
    }
6174

H
hjxilinx 已提交
6175 6176
    pQueryMsg->orderByIdx = htons(pQueryMsg->orderByIdx);
    pQueryMsg->orderType = htons(pQueryMsg->orderType);
6177 6178
  }

6179 6180
  pQueryMsg->fillType = htons(pQueryMsg->fillType);
  if (pQueryMsg->fillType != TSDB_FILL_NONE) {
6181
    pQueryMsg->fillVal = (uint64_t)(pMsg);
6182 6183

    int64_t *v = (int64_t *)pMsg;
6184
    for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
6185 6186
      v[i] = htobe64(v[i]);
    }
6187

6188
    pMsg += sizeof(int64_t) * pQueryMsg->numOfOutput;
6189
  }
6190

6191 6192
  if (pQueryMsg->numOfTags > 0) {
    (*tagCols) = calloc(1, sizeof(SColumnInfo) * pQueryMsg->numOfTags);
H
Haojun Liao 已提交
6193 6194 6195 6196 6197
    if (*tagCols == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }

6198 6199
    for (int32_t i = 0; i < pQueryMsg->numOfTags; ++i) {
      SColumnInfo* pTagCol = (SColumnInfo*) pMsg;
6200

6201 6202 6203 6204
      pTagCol->colId = htons(pTagCol->colId);
      pTagCol->bytes = htons(pTagCol->bytes);
      pTagCol->type  = htons(pTagCol->type);
      pTagCol->numOfFilters = 0;
6205

6206
      (*tagCols)[i] = *pTagCol;
6207
      pMsg += sizeof(SColumnInfo);
6208
    }
H
hjxilinx 已提交
6209
  }
6210

6211 6212 6213
  // the tag query condition expression string is located at the end of query msg
  if (pQueryMsg->tagCondLen > 0) {
    *tagCond = calloc(1, pQueryMsg->tagCondLen);
H
Haojun Liao 已提交
6214 6215 6216 6217 6218 6219

    if (*tagCond == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;

    }
6220 6221 6222
    memcpy(*tagCond, pMsg, pQueryMsg->tagCondLen);
    pMsg += pQueryMsg->tagCondLen;
  }
6223

H
Haojun Liao 已提交
6224 6225
  if (pQueryMsg->tbnameCondLen > 0) {
    *tbnameCond = calloc(1, pQueryMsg->tbnameCondLen + 1);
6226 6227 6228 6229 6230
    if (*tbnameCond == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }

H
Haojun Liao 已提交
6231 6232
    strncpy(*tbnameCond, pMsg, pQueryMsg->tbnameCondLen);
    pMsg += pQueryMsg->tbnameCondLen;
6233 6234 6235 6236 6237 6238 6239
  }

  *sql = strndup(pMsg, pQueryMsg->sqlstrLen);

  if (!validateQuerySourceCols(pQueryMsg, *pExpr, *tagCols)) {
    code = TSDB_CODE_QRY_INVALID_MSG;
    goto _cleanup;
weixin_48148422's avatar
weixin_48148422 已提交
6240
  }
6241

6242
  qDebug("qmsg:%p query %d tables, type:%d, qrange:%" PRId64 "-%" PRId64 ", numOfGroupbyTagCols:%d, order:%d, "
H
Haojun Liao 已提交
6243 6244
         "outputCols:%d, numOfCols:%d, interval:%" PRId64 ", fillType:%d, comptsLen:%d, compNumOfBlocks:%d, limit:%" PRId64 ", offset:%" PRId64,
         pQueryMsg, pQueryMsg->numOfTables, pQueryMsg->queryType, pQueryMsg->window.skey, pQueryMsg->window.ekey, pQueryMsg->numOfGroupCols,
6245
         pQueryMsg->order, pQueryMsg->numOfOutput, pQueryMsg->numOfCols, pQueryMsg->interval.interval,
H
Haojun Liao 已提交
6246
         pQueryMsg->fillType, pQueryMsg->tsLen, pQueryMsg->tsNumOfBlocks, pQueryMsg->limit, pQueryMsg->offset);
6247

6248
  qDebug("qmsg:%p, sql:%s", pQueryMsg, *sql);
6249
  return TSDB_CODE_SUCCESS;
dengyihao's avatar
dengyihao 已提交
6250 6251

_cleanup:
S
TD-1848  
Shengliang Guan 已提交
6252
  tfree(*pExpr);
dengyihao's avatar
dengyihao 已提交
6253 6254
  taosArrayDestroy(*pTableIdList);
  *pTableIdList = NULL;
S
TD-1848  
Shengliang Guan 已提交
6255 6256 6257 6258
  tfree(*tbnameCond);
  tfree(*groupbyCols);
  tfree(*tagCols);
  tfree(*tagCond);
6259
  tfree(*sql);
6260 6261

  return code;
6262 6263
}

H
Haojun Liao 已提交
6264 6265
static int32_t buildArithmeticExprFromMsg(SExprInfo *pArithExprInfo, SQueryTableMsg *pQueryMsg) {
  qDebug("qmsg:%p create arithmetic expr from binary", pQueryMsg);
weixin_48148422's avatar
weixin_48148422 已提交
6266 6267

  tExprNode* pExprNode = NULL;
dengyihao's avatar
dengyihao 已提交
6268
  TRY(TSDB_MAX_TAG_CONDITIONS) {
weixin_48148422's avatar
weixin_48148422 已提交
6269 6270 6271
    pExprNode = exprTreeFromBinary(pArithExprInfo->base.arg[0].argValue.pz, pArithExprInfo->base.arg[0].argBytes);
  } CATCH( code ) {
    CLEANUP_EXECUTE();
dengyihao's avatar
TD-816  
dengyihao 已提交
6272
    qError("qmsg:%p failed to create arithmetic expression string from:%s, reason: %s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz, tstrerror(code));
weixin_48148422's avatar
weixin_48148422 已提交
6273 6274 6275
    return code;
  } END_TRY

H
hjxilinx 已提交
6276
  if (pExprNode == NULL) {
6277
    qError("qmsg:%p failed to create arithmetic expression string from:%s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz);
6278
    return TSDB_CODE_QRY_APP_ERROR;
6279
  }
6280

6281
  pArithExprInfo->pExpr = pExprNode;
6282 6283 6284
  return TSDB_CODE_SUCCESS;
}

H
Haojun Liao 已提交
6285
static int32_t createQueryFuncExprFromMsg(SQueryTableMsg *pQueryMsg, int32_t numOfOutput, SExprInfo **pExprInfo, SSqlFuncMsg **pExprMsg,
6286 6287
    SColumnInfo* pTagCols) {
  *pExprInfo = NULL;
H
hjxilinx 已提交
6288
  int32_t code = TSDB_CODE_SUCCESS;
6289

H
Haojun Liao 已提交
6290
  SExprInfo *pExprs = (SExprInfo *)calloc(pQueryMsg->numOfOutput, sizeof(SExprInfo));
6291
  if (pExprs == NULL) {
6292
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
6293 6294 6295 6296 6297
  }

  bool    isSuperTable = QUERY_IS_STABLE_QUERY(pQueryMsg->queryType);
  int16_t tagLen = 0;

H
Haojun Liao 已提交
6298
  for (int32_t i = 0; i < numOfOutput; ++i) {
6299
    pExprs[i].base = *pExprMsg[i];
6300
    pExprs[i].bytes = 0;
6301 6302 6303 6304

    int16_t type = 0;
    int16_t bytes = 0;

6305
    // parse the arithmetic expression
6306
    if (pExprs[i].base.functionId == TSDB_FUNC_ARITHM) {
H
Haojun Liao 已提交
6307
      code = buildArithmeticExprFromMsg(&pExprs[i], pQueryMsg);
6308

6309
      if (code != TSDB_CODE_SUCCESS) {
S
TD-1848  
Shengliang Guan 已提交
6310
        tfree(pExprs);
6311
        return code;
6312 6313
      }

6314
      type  = TSDB_DATA_TYPE_DOUBLE;
H
Haojun Liao 已提交
6315
      bytes = tDataTypes[type].bytes;
H
Haojun Liao 已提交
6316
    } else if (pExprs[i].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX && pExprs[i].base.functionId == TSDB_FUNC_TAGPRJ) {  // parse the normal column
H
Haojun Liao 已提交
6317
      SSchema s = tGetTableNameColumnSchema();
H
Haojun Liao 已提交
6318
      type = s.type;
H
Haojun Liao 已提交
6319
      bytes = s.bytes;
6320 6321
    } else if (pExprs[i].base.colInfo.colId <= TSDB_UD_COLUMN_INDEX) {
      // it is a user-defined constant value column
H
Haojun Liao 已提交
6322 6323
      assert(pExprs[i].base.functionId == TSDB_FUNC_PRJ);

6324 6325
      type = pExprs[i].base.arg[1].argType;
      bytes = pExprs[i].base.arg[1].argBytes;
H
Haojun Liao 已提交
6326 6327 6328 6329 6330

      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        bytes += VARSTR_HEADER_SIZE;
      }
    } else {
6331
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
6332 6333 6334 6335 6336 6337 6338 6339 6340
      if (TSDB_COL_IS_TAG(pExprs[i].base.colInfo.flag)) {
        if (j < TSDB_TBNAME_COLUMN_INDEX || j >= pQueryMsg->numOfTags) {
          return TSDB_CODE_QRY_INVALID_MSG;
        }
      } else {
        if (j < PRIMARYKEY_TIMESTAMP_COL_INDEX || j >= pQueryMsg->numOfCols) {
          return TSDB_CODE_QRY_INVALID_MSG;
        }
      }
H
Haojun Liao 已提交
6341

dengyihao's avatar
dengyihao 已提交
6342
      if (pExprs[i].base.colInfo.colId != TSDB_TBNAME_COLUMN_INDEX && j >= 0) {
H
Haojun Liao 已提交
6343 6344 6345 6346
        SColumnInfo* pCol = (TSDB_COL_IS_TAG(pExprs[i].base.colInfo.flag))? &pTagCols[j]:&pQueryMsg->colList[j];
        type = pCol->type;
        bytes = pCol->bytes;
      } else {
H
Haojun Liao 已提交
6347
        SSchema s = tGetTableNameColumnSchema();
H
hjxilinx 已提交
6348

H
Haojun Liao 已提交
6349 6350 6351
        type  = s.type;
        bytes = s.bytes;
      }
6352 6353
    }

S
TD-1057  
Shengliang Guan 已提交
6354
    int32_t param = (int32_t)pExprs[i].base.arg[0].argValue.i64;
6355
    if (getResultDataInfo(type, bytes, pExprs[i].base.functionId, param, &pExprs[i].type, &pExprs[i].bytes,
6356
                          &pExprs[i].interBytes, 0, isSuperTable) != TSDB_CODE_SUCCESS) {
S
TD-1848  
Shengliang Guan 已提交
6357
      tfree(pExprs);
6358
      return TSDB_CODE_QRY_INVALID_MSG;
6359 6360
    }

6361
    if (pExprs[i].base.functionId == TSDB_FUNC_TAG_DUMMY || pExprs[i].base.functionId == TSDB_FUNC_TS_DUMMY) {
6362
      tagLen += pExprs[i].bytes;
6363
    }
6364

6365
    assert(isValidDataType(pExprs[i].type));
6366 6367 6368
  }

  // TODO refactor
H
Haojun Liao 已提交
6369
  for (int32_t i = 0; i < numOfOutput; ++i) {
6370 6371
    pExprs[i].base = *pExprMsg[i];
    int16_t functId = pExprs[i].base.functionId;
6372

6373
    if (functId == TSDB_FUNC_TOP || functId == TSDB_FUNC_BOTTOM) {
6374
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
6375 6376 6377 6378 6379 6380 6381 6382 6383
      if (j < 0 || j >= pQueryMsg->numOfCols) {
        assert(0);
      } else {
        SColumnInfo *pCol = &pQueryMsg->colList[j];
        int32_t ret =
            getResultDataInfo(pCol->type, pCol->bytes, functId, (int32_t)pExprs[i].base.arg[0].argValue.i64,
                              &pExprs[i].type, &pExprs[i].bytes, &pExprs[i].interBytes, tagLen, isSuperTable);
        assert(ret == TSDB_CODE_SUCCESS);
      }
6384 6385 6386
    }
  }

6387
  *pExprInfo = pExprs;
6388 6389 6390
  return TSDB_CODE_SUCCESS;
}

6391
static SSqlGroupbyExpr *createGroupbyExprFromMsg(SQueryTableMsg *pQueryMsg, SColIndex *pColIndex, int32_t *code) {
6392 6393 6394 6395 6396
  if (pQueryMsg->numOfGroupCols == 0) {
    return NULL;
  }

  // using group by tag columns
6397
  SSqlGroupbyExpr *pGroupbyExpr = (SSqlGroupbyExpr *)calloc(1, sizeof(SSqlGroupbyExpr));
6398
  if (pGroupbyExpr == NULL) {
6399
    *code = TSDB_CODE_QRY_OUT_OF_MEMORY;
6400 6401 6402 6403 6404 6405 6406
    return NULL;
  }

  pGroupbyExpr->numOfGroupCols = pQueryMsg->numOfGroupCols;
  pGroupbyExpr->orderType = pQueryMsg->orderType;
  pGroupbyExpr->orderIndex = pQueryMsg->orderByIdx;

6407 6408 6409 6410
  pGroupbyExpr->columnInfo = taosArrayInit(pQueryMsg->numOfGroupCols, sizeof(SColIndex));
  for(int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
    taosArrayPush(pGroupbyExpr->columnInfo, &pColIndex[i]);
  }
6411

6412 6413 6414
  return pGroupbyExpr;
}

6415
static int32_t createFilterInfo(void *pQInfo, SQuery *pQuery) {
6416
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
6417
    if (pQuery->colList[i].numOfFilters > 0) {
6418 6419 6420 6421 6422 6423 6424 6425 6426
      pQuery->numOfFilterCols++;
    }
  }

  if (pQuery->numOfFilterCols == 0) {
    return TSDB_CODE_SUCCESS;
  }

  pQuery->pFilterInfo = calloc(1, sizeof(SSingleColumnFilterInfo) * pQuery->numOfFilterCols);
H
Haojun Liao 已提交
6427 6428 6429
  if (pQuery->pFilterInfo == NULL) {
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
  }
6430 6431

  for (int32_t i = 0, j = 0; i < pQuery->numOfCols; ++i) {
6432
    if (pQuery->colList[i].numOfFilters > 0) {
6433 6434
      SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[j];

B
Bomin Zhang 已提交
6435
      memcpy(&pFilterInfo->info, &pQuery->colList[i], sizeof(SColumnInfo));
6436
      pFilterInfo->info = pQuery->colList[i];
6437

6438
      pFilterInfo->numOfFilters = pQuery->colList[i].numOfFilters;
6439
      pFilterInfo->pFilters = calloc(pFilterInfo->numOfFilters, sizeof(SColumnFilterElem));
H
Haojun Liao 已提交
6440 6441 6442
      if (pFilterInfo->pFilters == NULL) {
        return TSDB_CODE_QRY_OUT_OF_MEMORY;
      }
6443 6444 6445

      for (int32_t f = 0; f < pFilterInfo->numOfFilters; ++f) {
        SColumnFilterElem *pSingleColFilter = &pFilterInfo->pFilters[f];
6446
        pSingleColFilter->filterInfo = pQuery->colList[i].filters[f];
6447 6448 6449 6450

        int32_t lower = pSingleColFilter->filterInfo.lowerRelOptr;
        int32_t upper = pSingleColFilter->filterInfo.upperRelOptr;
        if (lower == TSDB_RELATION_INVALID && upper == TSDB_RELATION_INVALID) {
S
slguan 已提交
6451
          qError("QInfo:%p invalid filter info", pQInfo);
6452
          return TSDB_CODE_QRY_INVALID_MSG;
6453 6454
        }

6455 6456 6457
        pSingleColFilter->fp = getFilterOperator(lower, upper);
        if (pSingleColFilter->fp == NULL) {
          qError("QInfo:%p invalid filter info", pQInfo);
6458
          return TSDB_CODE_QRY_INVALID_MSG;
6459 6460
        }

6461
        pSingleColFilter->bytes = pQuery->colList[i].bytes;
6462 6463 6464 6465 6466 6467 6468 6469 6470
      }

      j++;
    }
  }

  return TSDB_CODE_SUCCESS;
}

6471
static void doUpdateExprColumnIndex(SQuery *pQuery) {
H
Haojun Liao 已提交
6472
  assert(pQuery->pExpr1 != NULL && pQuery != NULL);
6473

6474
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
Haojun Liao 已提交
6475
    SSqlFuncMsg *pSqlExprMsg = &pQuery->pExpr1[k].base;
6476
    if (pSqlExprMsg->functionId == TSDB_FUNC_ARITHM) {
6477 6478
      continue;
    }
6479

6480
    // todo opt performance
H
Haojun Liao 已提交
6481
    SColIndex *pColIndex = &pSqlExprMsg->colInfo;
H
Haojun Liao 已提交
6482
    if (TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
6483 6484
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfCols; ++f) {
H
Haojun Liao 已提交
6485 6486
        if (pColIndex->colId == pQuery->colList[f].colId) {
          pColIndex->colIndex = f;
6487 6488 6489
          break;
        }
      }
H
Haojun Liao 已提交
6490 6491

      assert(f < pQuery->numOfCols);
6492 6493
    } else if (pColIndex->colId <= TSDB_UD_COLUMN_INDEX) {
      // do nothing for user-defined constant value result columns
6494
    } else {
6495 6496
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfTags; ++f) {
H
Haojun Liao 已提交
6497 6498
        if (pColIndex->colId == pQuery->tagColList[f].colId) {
          pColIndex->colIndex = f;
6499 6500
          break;
        }
6501
      }
6502

6503
      assert(f < pQuery->numOfTags || pColIndex->colId == TSDB_TBNAME_COLUMN_INDEX);
6504 6505 6506 6507
    }
  }
}

dengyihao's avatar
dengyihao 已提交
6508 6509
static void freeQInfo(SQInfo *pQInfo);

H
Haojun Liao 已提交
6510 6511 6512
static void calResultBufSize(SQuery* pQuery) {
  const int32_t RESULT_MSG_MIN_SIZE  = 1024 * (1024 + 512);  // bytes
  const int32_t RESULT_MSG_MIN_ROWS  = 8192;
S
TD-1057  
Shengliang Guan 已提交
6513
  const float RESULT_THRESHOLD_RATIO = 0.85f;
H
Haojun Liao 已提交
6514

6515 6516 6517 6518 6519
  if (isProjQuery(pQuery)) {
    int32_t numOfRes = RESULT_MSG_MIN_SIZE / pQuery->rowSize;
    if (numOfRes < RESULT_MSG_MIN_ROWS) {
      numOfRes = RESULT_MSG_MIN_ROWS;
    }
H
Haojun Liao 已提交
6520

6521
    pQuery->rec.capacity  = numOfRes;
S
TD-1057  
Shengliang Guan 已提交
6522
    pQuery->rec.threshold = (int32_t)(numOfRes * RESULT_THRESHOLD_RATIO);
6523 6524
  } else {  // in case of non-prj query, a smaller output buffer will be used.
    pQuery->rec.capacity = 4096;
S
TD-1057  
Shengliang Guan 已提交
6525
    pQuery->rec.threshold = (int32_t)(pQuery->rec.capacity * RESULT_THRESHOLD_RATIO);
6526
  }
H
Haojun Liao 已提交
6527 6528
}

6529
static SQInfo *createQInfoImpl(SQueryTableMsg *pQueryMsg, SSqlGroupbyExpr *pGroupbyExpr, SExprInfo *pExprs,
6530
                               SExprInfo *pSecExprs, STableGroupInfo *pTableGroupInfo, SColumnInfo* pTagCols, bool stableQuery, char* sql) {
B
Bomin Zhang 已提交
6531 6532 6533
  int16_t numOfCols = pQueryMsg->numOfCols;
  int16_t numOfOutput = pQueryMsg->numOfOutput;

6534 6535
  SQInfo *pQInfo = (SQInfo *)calloc(1, sizeof(SQInfo));
  if (pQInfo == NULL) {
B
Bomin Zhang 已提交
6536
    goto _cleanup_qinfo;
6537
  }
6538

B
Bomin Zhang 已提交
6539 6540 6541
  // to make sure third party won't overwrite this structure
  pQInfo->signature = pQInfo;
  pQInfo->tableGroupInfo = *pTableGroupInfo;
6542 6543

  SQuery *pQuery = calloc(1, sizeof(SQuery));
B
Bomin Zhang 已提交
6544 6545 6546
  if (pQuery == NULL) {
    goto _cleanup_query;
  }
H
Haojun Liao 已提交
6547

6548 6549
  pQInfo->runtimeEnv.pQuery = pQuery;

6550
  pQuery->numOfCols       = numOfCols;
H
hjxilinx 已提交
6551
  pQuery->numOfOutput     = numOfOutput;
6552 6553 6554
  pQuery->limit.limit     = pQueryMsg->limit;
  pQuery->limit.offset    = pQueryMsg->offset;
  pQuery->order.order     = pQueryMsg->order;
6555
  pQuery->order.orderColId = pQueryMsg->orderColId;
H
Haojun Liao 已提交
6556
  pQuery->pExpr1          = pExprs;
H
Haojun Liao 已提交
6557 6558
  pQuery->pExpr2          = pSecExprs;
  pQuery->numOfExpr2      = pQueryMsg->secondStageOutput;
6559
  pQuery->pGroupbyExpr    = pGroupbyExpr;
6560
  memcpy(&pQuery->interval, &pQueryMsg->interval, sizeof(pQuery->interval));
6561
  pQuery->fillType        = pQueryMsg->fillType;
6562
  pQuery->numOfTags       = pQueryMsg->numOfTags;
B
Bomin Zhang 已提交
6563
  pQuery->tagColList      = pTagCols;
H
Haojun Liao 已提交
6564 6565
  pQuery->prjInfo.vgroupLimit = pQueryMsg->vgroupLimit;
  pQuery->prjInfo.ts      = (pQueryMsg->order == TSDB_ORDER_ASC)? INT64_MIN:INT64_MAX;
H
Haojun Liao 已提交
6566

6567
  pQuery->colList = calloc(numOfCols, sizeof(SSingleColumnFilterInfo));
6568
  if (pQuery->colList == NULL) {
6569
    goto _cleanup;
6570
  }
6571

6572
  int32_t srcSize = 0;
H
hjxilinx 已提交
6573
  for (int16_t i = 0; i < numOfCols; ++i) {
6574
    pQuery->colList[i] = pQueryMsg->colList[i];
6575
    pQuery->colList[i].filters = tscFilterInfoClone(pQueryMsg->colList[i].filters, pQuery->colList[i].numOfFilters);
6576
    srcSize += pQuery->colList[i].bytes;
H
hjxilinx 已提交
6577
  }
6578

6579
  // calculate the result row size
6580 6581 6582
  for (int16_t col = 0; col < numOfOutput; ++col) {
    assert(pExprs[col].bytes > 0);
    pQuery->rowSize += pExprs[col].bytes;
6583
  }
6584

6585
  doUpdateExprColumnIndex(pQuery);
6586

6587
  int32_t ret = createFilterInfo(pQInfo, pQuery);
6588
  if (ret != TSDB_CODE_SUCCESS) {
6589
    goto _cleanup;
6590 6591 6592
  }

  // prepare the result buffer
6593
  pQuery->sdata = (tFilePage **)calloc(pQuery->numOfOutput, POINTER_BYTES);
6594
  if (pQuery->sdata == NULL) {
6595
    goto _cleanup;
6596 6597
  }

H
Haojun Liao 已提交
6598
  calResultBufSize(pQuery);
6599

6600
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
6601
    // allocate additional memory for interResults that are usually larger then final results
H
Haojun Liao 已提交
6602
    // TODO refactor
H
Haojun Liao 已提交
6603 6604 6605 6606 6607 6608 6609 6610
    int16_t bytes = 0;
    if (pQuery->pExpr2 == NULL || col > pQuery->numOfExpr2) {
      bytes = pExprs[col].bytes;
    } else {
      bytes = MAX(pQuery->pExpr2[col].bytes, pExprs[col].bytes);
    }

    size_t size = (size_t)((pQuery->rec.capacity + 1) * bytes + pExprs[col].interBytes + sizeof(tFilePage));
6611
    pQuery->sdata[col] = (tFilePage *)calloc(1, size);
6612
    if (pQuery->sdata[col] == NULL) {
6613
      goto _cleanup;
6614 6615 6616
    }
  }

6617
  if (pQuery->fillType != TSDB_FILL_NONE) {
6618 6619
    pQuery->fillVal = malloc(sizeof(int64_t) * pQuery->numOfOutput);
    if (pQuery->fillVal == NULL) {
6620
      goto _cleanup;
6621 6622 6623
    }

    // the first column is the timestamp
6624
    memcpy(pQuery->fillVal, (char *)pQueryMsg->fillVal, pQuery->numOfOutput * sizeof(int64_t));
6625 6626
  }

dengyihao's avatar
dengyihao 已提交
6627 6628 6629 6630 6631 6632
  size_t numOfGroups = 0;
  if (pTableGroupInfo->pGroupList != NULL) {
    numOfGroups = taosArrayGetSize(pTableGroupInfo->pGroupList);

    pQInfo->tableqinfoGroupInfo.pGroupList = taosArrayInit(numOfGroups, POINTER_BYTES);
    pQInfo->tableqinfoGroupInfo.numOfTables = pTableGroupInfo->numOfTables;
H
Haojun Liao 已提交
6633
    pQInfo->tableqinfoGroupInfo.map = taosHashInit(pTableGroupInfo->numOfTables,
H
Haojun Liao 已提交
6634
                                                   taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_NO_LOCK);
H
Haojun Liao 已提交
6635
  }
6636

H
Haojun Liao 已提交
6637
  pQInfo->runtimeEnv.interBufSize = getOutputInterResultBufSize(pQuery);
H
Haojun Liao 已提交
6638 6639
  pQInfo->runtimeEnv.summary.tableInfoSize += (pTableGroupInfo->numOfTables * sizeof(STableQueryInfo));

H
Haojun Liao 已提交
6640
  pQInfo->runtimeEnv.pResultRowHashTable = taosHashInit(pTableGroupInfo->numOfTables, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_NO_LOCK);
H
Haojun Liao 已提交
6641
  pQInfo->runtimeEnv.keyBuf = malloc(TSDB_MAX_BYTES_PER_ROW);
H
Haojun Liao 已提交
6642
  pQInfo->runtimeEnv.pool = initResultRowPool(getResultRowSize(&pQInfo->runtimeEnv));
6643 6644 6645 6646 6647 6648 6649 6650
  pQInfo->runtimeEnv.prevRow = malloc(POINTER_BYTES * pQuery->numOfCols + srcSize);

  char* start = POINTER_BYTES * pQuery->numOfCols + (char*) pQInfo->runtimeEnv.prevRow;
  pQInfo->runtimeEnv.prevRow[0] = start;

  for(int32_t i = 1; i < pQuery->numOfCols; ++i) {
    pQInfo->runtimeEnv.prevRow[i] = pQInfo->runtimeEnv.prevRow[i - 1] + pQuery->colList[i-1].bytes;
  }
H
Haojun Liao 已提交
6651

H
Haojun Liao 已提交
6652
  pQInfo->pBuf = calloc(pTableGroupInfo->numOfTables, sizeof(STableQueryInfo));
H
Haojun Liao 已提交
6653 6654 6655 6656
  if (pQInfo->pBuf == NULL) {
    goto _cleanup;
  }

H
Haojun Liao 已提交
6657
  // NOTE: pTableCheckInfo need to update the query time range and the lastKey info
6658
  pQInfo->arrTableIdInfo = taosHashInit(pTableGroupInfo->numOfTables, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false, HASH_NO_LOCK);
6659
  pQInfo->dataReady = QUERY_RESULT_NOT_READY;
H
Haojun Liao 已提交
6660
  pQInfo->rspContext = NULL;
6661
  pQInfo->sql = sql;
6662
  pthread_mutex_init(&pQInfo->lock, NULL);
H
Haojun Liao 已提交
6663
  tsem_init(&pQInfo->ready, 0, 0);
6664 6665 6666 6667 6668

  pQuery->pos = -1;
  pQuery->window = pQueryMsg->window;
  changeExecuteScanOrder(pQInfo, pQueryMsg, stableQuery);

6669
  pQInfo->runtimeEnv.queryWindowIdentical = true;
6670
  STimeWindow window = pQuery->window;
H
Haojun Liao 已提交
6671

H
Haojun Liao 已提交
6672
  int32_t index = 0;
H
hjxilinx 已提交
6673
  for(int32_t i = 0; i < numOfGroups; ++i) {
6674
    SArray* pa = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, i);
6675

H
Haojun Liao 已提交
6676
    size_t s = taosArrayGetSize(pa);
6677
    SArray* p1 = taosArrayInit(s, POINTER_BYTES);
B
Bomin Zhang 已提交
6678 6679 6680
    if (p1 == NULL) {
      goto _cleanup;
    }
H
Haojun Liao 已提交
6681

Y
yihaoDeng 已提交
6682
    taosArrayPush(pQInfo->tableqinfoGroupInfo.pGroupList, &p1);
6683

H
hjxilinx 已提交
6684
    for(int32_t j = 0; j < s; ++j) {
H
Haojun Liao 已提交
6685
      STableKeyInfo* info = taosArrayGet(pa, j);
6686

6687
      window.skey = info->lastKey;
6688 6689 6690 6691 6692
      if (info->lastKey != pQuery->window.skey) {
        pQInfo->runtimeEnv.queryWindowIdentical = false;
      }

      void* buf = (char*) pQInfo->pBuf + index * sizeof(STableQueryInfo);
H
Haojun Liao 已提交
6693
      STableQueryInfo* item = createTableQueryInfo(&pQInfo->runtimeEnv, info->pTable, window, buf);
B
Bomin Zhang 已提交
6694 6695 6696
      if (item == NULL) {
        goto _cleanup;
      }
H
Haojun Liao 已提交
6697

6698
      item->groupIndex = i;
H
hjxilinx 已提交
6699
      taosArrayPush(p1, &item);
6700 6701

      STableId* id = TSDB_TABLEID(info->pTable);
H
Haojun Liao 已提交
6702 6703
      taosHashPut(pQInfo->tableqinfoGroupInfo.map, &id->tid, sizeof(id->tid), &item, POINTER_BYTES);
      index += 1;
H
hjxilinx 已提交
6704 6705
    }
  }
6706

6707
  colIdCheck(pQuery);
6708

6709
  qDebug("qmsg:%p QInfo:%p created", pQueryMsg, pQInfo);
6710 6711
  return pQInfo;

B
Bomin Zhang 已提交
6712
_cleanup_qinfo:
H
Haojun Liao 已提交
6713
  tsdbDestroyTableGroup(pTableGroupInfo);
B
Bomin Zhang 已提交
6714 6715

_cleanup_query:
6716 6717 6718 6719
  if (pGroupbyExpr != NULL) {
    taosArrayDestroy(pGroupbyExpr->columnInfo);
    free(pGroupbyExpr);
  }
H
Haojun Liao 已提交
6720

S
TD-1848  
Shengliang Guan 已提交
6721
  tfree(pTagCols);
B
Bomin Zhang 已提交
6722 6723 6724 6725 6726 6727
  for (int32_t i = 0; i < numOfOutput; ++i) {
    SExprInfo* pExprInfo = &pExprs[i];
    if (pExprInfo->pExpr != NULL) {
      tExprTreeDestroy(&pExprInfo->pExpr, NULL);
    }
  }
H
Haojun Liao 已提交
6728

S
TD-1848  
Shengliang Guan 已提交
6729
  tfree(pExprs);
B
Bomin Zhang 已提交
6730

6731
_cleanup:
dengyihao's avatar
dengyihao 已提交
6732
  freeQInfo(pQInfo);
6733 6734 6735
  return NULL;
}

H
hjxilinx 已提交
6736
static bool isValidQInfo(void *param) {
H
hjxilinx 已提交
6737 6738 6739 6740
  SQInfo *pQInfo = (SQInfo *)param;
  if (pQInfo == NULL) {
    return false;
  }
6741

H
hjxilinx 已提交
6742 6743 6744 6745
  /*
   * pQInfo->signature may be changed by another thread, so we assign value of signature
   * into local variable, then compare by using local variable
   */
6746
  uint64_t sig = (uint64_t)pQInfo->signature;
H
hjxilinx 已提交
6747 6748 6749
  return (sig == (uint64_t)pQInfo);
}

6750
static int32_t initQInfo(SQueryTableMsg *pQueryMsg, void *tsdb, int32_t vgId, SQInfo *pQInfo, bool isSTable) {
H
hjxilinx 已提交
6751
  int32_t code = TSDB_CODE_SUCCESS;
6752
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
6753

H
hjxilinx 已提交
6754
  STSBuf *pTSBuf = NULL;
6755
  if (pQueryMsg->tsLen > 0) { // open new file to save the result
H
Haojun Liao 已提交
6756
    char *tsBlock = (char *) pQueryMsg + pQueryMsg->tsOffset;
H
Haojun Liao 已提交
6757
    pTSBuf = tsBufCreateFromCompBlocks(tsBlock, pQueryMsg->tsNumOfBlocks, pQueryMsg->tsLen, pQueryMsg->tsOrder, vgId);
6758

H
hjxilinx 已提交
6759
    tsBufResetPos(pTSBuf);
dengyihao's avatar
dengyihao 已提交
6760
    bool ret = tsBufNextPos(pTSBuf);
6761

dengyihao's avatar
dengyihao 已提交
6762
    UNUSED(ret);
H
hjxilinx 已提交
6763
  }
6764

Y
TD-1665  
yihaoDeng 已提交
6765
  pQuery->precision = tsdbGetCfg(tsdb)->precision;
6766

6767 6768
  if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey > pQuery->window.ekey)) ||
      (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey > pQuery->window.skey))) {
6769
    qDebug("QInfo:%p no result in time range %" PRId64 "-%" PRId64 ", order %d", pQInfo, pQuery->window.skey,
6770
           pQuery->window.ekey, pQuery->order.order);
6771
    setQueryStatus(pQuery, QUERY_COMPLETED);
B
Bomin Zhang 已提交
6772
    pQInfo->tableqinfoGroupInfo.numOfTables = 0;
6773 6774
    return TSDB_CODE_SUCCESS;
  }
6775

6776
  if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
6777
    qDebug("QInfo:%p no table qualified for tag filter, abort query", pQInfo);
6778 6779 6780
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return TSDB_CODE_SUCCESS;
  }
H
hjxilinx 已提交
6781 6782

  // filter the qualified
6783
  if ((code = doInitQInfo(pQInfo, pTSBuf, tsdb, vgId, isSTable)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6784 6785
    goto _error;
  }
6786

H
hjxilinx 已提交
6787 6788 6789 6790
  return code;

_error:
  // table query ref will be decrease during error handling
6791
  freeQInfo(pQInfo);
H
hjxilinx 已提交
6792 6793 6794
  return code;
}

B
Bomin Zhang 已提交
6795
static void freeColumnFilterInfo(SColumnFilterInfo* pFilter, int32_t numOfFilters) {
H
Haojun Liao 已提交
6796
    if (pFilter == NULL || numOfFilters == 0) {
B
Bomin Zhang 已提交
6797 6798
      return;
    }
H
Haojun Liao 已提交
6799

B
Bomin Zhang 已提交
6800 6801 6802 6803 6804
    for (int32_t i = 0; i < numOfFilters; i++) {
      if (pFilter[i].filterstr) {
        free((void*)(pFilter[i].pz));
      }
    }
H
Haojun Liao 已提交
6805

B
Bomin Zhang 已提交
6806 6807 6808
    free(pFilter);
}

H
Haojun Liao 已提交
6809 6810
static void doDestroyTableQueryInfo(STableGroupInfo* pTableqinfoGroupInfo) {
  if (pTableqinfoGroupInfo->pGroupList != NULL) {
H
Haojun Liao 已提交
6811
    int32_t numOfGroups = (int32_t) taosArrayGetSize(pTableqinfoGroupInfo->pGroupList);
H
Haojun Liao 已提交
6812 6813 6814 6815 6816 6817 6818 6819 6820 6821 6822 6823 6824 6825 6826 6827 6828 6829 6830 6831 6832
    for (int32_t i = 0; i < numOfGroups; ++i) {
      SArray *p = taosArrayGetP(pTableqinfoGroupInfo->pGroupList, i);

      size_t num = taosArrayGetSize(p);
      for(int32_t j = 0; j < num; ++j) {
        STableQueryInfo* item = taosArrayGetP(p, j);
        destroyTableQueryInfoImpl(item);
      }

      taosArrayDestroy(p);
    }
  }

  taosArrayDestroy(pTableqinfoGroupInfo->pGroupList);
  taosHashCleanup(pTableqinfoGroupInfo->map);

  pTableqinfoGroupInfo->pGroupList = NULL;
  pTableqinfoGroupInfo->map = NULL;
  pTableqinfoGroupInfo->numOfTables = 0;
}

H
Haojun Liao 已提交
6833 6834 6835 6836 6837 6838 6839 6840 6841 6842 6843 6844 6845 6846 6847 6848
static void* destroyQueryFuncExpr(SExprInfo* pExprInfo, int32_t numOfExpr) {
  if (pExprInfo == NULL) {
    assert(numOfExpr == 0);
    return NULL;
  }

  for (int32_t i = 0; i < numOfExpr; ++i) {
    if (pExprInfo[i].pExpr != NULL) {
      tExprNodeDestroy(pExprInfo[i].pExpr, NULL);
    }
  }

  tfree(pExprInfo);
  return NULL;
}

H
hjxilinx 已提交
6849 6850 6851 6852
static void freeQInfo(SQInfo *pQInfo) {
  if (!isValidQInfo(pQInfo)) {
    return;
  }
6853

6854
  qDebug("QInfo:%p start to free QInfo", pQInfo);
6855

H
Haojun Liao 已提交
6856
  releaseQueryBuf(pQInfo->tableqinfoGroupInfo.numOfTables);
H
Haojun Liao 已提交
6857

6858
  teardownQueryRuntimeEnv(&pQInfo->runtimeEnv);
6859

H
Haojun Liao 已提交
6860 6861 6862 6863
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  if (pQuery != NULL) {
    if (pQuery->sdata != NULL) {
      for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
S
TD-1848  
Shengliang Guan 已提交
6864
        tfree(pQuery->sdata[col]);
H
Haojun Liao 已提交
6865
      }
S
TD-1848  
Shengliang Guan 已提交
6866
      tfree(pQuery->sdata);
H
hjxilinx 已提交
6867
    }
6868

H
Haojun Liao 已提交
6869
    if (pQuery->fillVal != NULL) {
S
TD-1848  
Shengliang Guan 已提交
6870
      tfree(pQuery->fillVal);
H
Haojun Liao 已提交
6871
    }
6872

H
Haojun Liao 已提交
6873 6874 6875
    for (int32_t i = 0; i < pQuery->numOfFilterCols; ++i) {
      SSingleColumnFilterInfo *pColFilter = &pQuery->pFilterInfo[i];
      if (pColFilter->numOfFilters > 0) {
S
TD-1848  
Shengliang Guan 已提交
6876
        tfree(pColFilter->pFilters);
H
hjxilinx 已提交
6877
      }
H
hjxilinx 已提交
6878
    }
6879

H
Haojun Liao 已提交
6880 6881
    pQuery->pExpr1 = destroyQueryFuncExpr(pQuery->pExpr1, pQuery->numOfOutput);
    pQuery->pExpr2 = destroyQueryFuncExpr(pQuery->pExpr2, pQuery->numOfExpr2);
6882

S
TD-1848  
Shengliang Guan 已提交
6883 6884
    tfree(pQuery->tagColList);
    tfree(pQuery->pFilterInfo);
H
Haojun Liao 已提交
6885 6886 6887 6888 6889 6890

    if (pQuery->colList != NULL) {
      for (int32_t i = 0; i < pQuery->numOfCols; i++) {
        SColumnInfo *column = pQuery->colList + i;
        freeColumnFilterInfo(column->filters, column->numOfFilters);
      }
S
TD-1848  
Shengliang Guan 已提交
6891
      tfree(pQuery->colList);
H
Haojun Liao 已提交
6892 6893
    }

H
Haojun Liao 已提交
6894 6895 6896 6897 6898
    if (pQuery->pGroupbyExpr != NULL) {
      taosArrayDestroy(pQuery->pGroupbyExpr->columnInfo);
      tfree(pQuery->pGroupbyExpr);
    }

S
TD-1848  
Shengliang Guan 已提交
6899
    tfree(pQuery);
H
hjxilinx 已提交
6900
  }
6901

H
Haojun Liao 已提交
6902
  doDestroyTableQueryInfo(&pQInfo->tableqinfoGroupInfo);
H
Haojun Liao 已提交
6903

S
TD-1848  
Shengliang Guan 已提交
6904
  tfree(pQInfo->pBuf);
6905 6906
  tfree(pQInfo->sql);

H
Haojun Liao 已提交
6907
  tsdbDestroyTableGroup(&pQInfo->tableGroupInfo);
6908
  taosHashCleanup(pQInfo->arrTableIdInfo);
6909

6910 6911
  taosArrayDestroy(pQInfo->groupResInfo.pRows);

6912
  pQInfo->signature = 0;
6913

6914
  qDebug("QInfo:%p QInfo is freed", pQInfo);
6915

S
TD-1848  
Shengliang Guan 已提交
6916
  tfree(pQInfo);
H
hjxilinx 已提交
6917 6918
}

H
hjxilinx 已提交
6919
static size_t getResultSize(SQInfo *pQInfo, int64_t *numOfRows) {
6920 6921
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
hjxilinx 已提交
6922 6923 6924 6925 6926 6927
  /*
   * get the file size and set the numOfRows to be the file size, since for tsComp query,
   * the returned row size is equalled to 1
   * TODO handle the case that the file is too large to send back one time
   */
  if (isTSCompQuery(pQuery) && (*numOfRows) > 0) {
D
fix bug  
dapan1121 已提交
6928
    struct stat fStat;
6929 6930
    FILE *f = *(FILE **)pQuery->sdata[0]->data;
    if ((f != NULL) && (fstat(fileno(f), &fStat) == 0)) {
D
fix bug  
dapan1121 已提交
6931 6932
      *numOfRows = fStat.st_size;
      return fStat.st_size;
H
hjxilinx 已提交
6933
    } else {
6934
      qError("QInfo:%p failed to get file info, file:%p, reason:%s", pQInfo, f, strerror(errno));
H
hjxilinx 已提交
6935 6936 6937
      return 0;
    }
  } else {
6938
    return (size_t)(pQuery->rowSize * (*numOfRows));
6939
  }
H
hjxilinx 已提交
6940
}
6941

H
hjxilinx 已提交
6942 6943 6944
static int32_t doDumpQueryResult(SQInfo *pQInfo, char *data) {
  // the remained number of retrieved rows, not the interpolated result
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
6945

H
hjxilinx 已提交
6946 6947
  // load data from file to msg buffer
  if (isTSCompQuery(pQuery)) {
D
fix bug  
dapan1121 已提交
6948

6949
    FILE *f = *(FILE **)pQuery->sdata[0]->data;  // TODO refactor
6950

H
hjxilinx 已提交
6951
    // make sure file exist
D
fix bug  
dapan1121 已提交
6952 6953
    if (f) {
      off_t s = lseek(fileno(f), 0, SEEK_END);
6954

D
fix bug  
dapan1121 已提交
6955 6956 6957
      qDebug("QInfo:%p ts comp data return, file:%p, size:%"PRId64, pQInfo, f, s);
      if (fseek(f, 0, SEEK_SET) >= 0) {
        size_t sz = fread(data, 1, s, f);
6958 6959 6960
        if(sz < s) {  // todo handle error
          assert(0);
        }
H
Haojun Liao 已提交
6961
      } else {
6962
        UNUSED(s);
dengyihao's avatar
dengyihao 已提交
6963
      }
H
Haojun Liao 已提交
6964

D
fix bug  
dapan1121 已提交
6965 6966
      fclose(f);
      *(FILE **)pQuery->sdata[0]->data = NULL;
H
hjxilinx 已提交
6967
    }
6968

H
hjxilinx 已提交
6969 6970 6971 6972
    // all data returned, set query over
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
      setQueryStatus(pQuery, QUERY_OVER);
    }
H
hjxilinx 已提交
6973
  } else {
S
TD-1057  
Shengliang Guan 已提交
6974
    doCopyQueryResultToMsg(pQInfo, (int32_t)pQuery->rec.rows, data);
6975
  }
6976

6977
  pQuery->rec.total += pQuery->rec.rows;
6978
  qDebug("QInfo:%p current numOfRes rows:%" PRId64 ", total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total);
6979

6980
  if (pQuery->limit.limit > 0 && pQuery->limit.limit == pQuery->rec.total) {
6981
    qDebug("QInfo:%p results limitation reached, limitation:%"PRId64, pQInfo, pQuery->limit.limit);
6982 6983
    setQueryStatus(pQuery, QUERY_OVER);
  }
6984

H
hjxilinx 已提交
6985
  return TSDB_CODE_SUCCESS;
6986 6987
}

6988 6989 6990 6991 6992 6993 6994
typedef struct SQueryMgmt {
  SCacheObj      *qinfoPool;      // query handle pool
  int32_t         vgId;
  bool            closed;
  pthread_mutex_t lock;
} SQueryMgmt;

6995
int32_t qCreateQueryInfo(void* tsdb, int32_t vgId, SQueryTableMsg* pQueryMsg, qinfo_t* pQInfo) {
6996
  assert(pQueryMsg != NULL && tsdb != NULL);
6997 6998

  int32_t code = TSDB_CODE_SUCCESS;
6999

7000
  char            *sql          = NULL;
H
Haojun Liao 已提交
7001 7002
  char            *tagCond      = NULL;
  char            *tbnameCond   = NULL;
7003
  SArray          *pTableIdList = NULL;
H
Haojun Liao 已提交
7004 7005 7006 7007 7008
  SSqlFuncMsg    **pExprMsg     = NULL;
  SSqlFuncMsg    **pSecExprMsg  = NULL;
  SExprInfo       *pExprs       = NULL;
  SExprInfo       *pSecExprs    = NULL;

7009 7010 7011
  SColIndex       *pGroupColIndex = NULL;
  SColumnInfo     *pTagColumnInfo = NULL;
  SSqlGroupbyExpr *pGroupbyExpr   = NULL;
7012

7013
  code = convertQueryMsg(pQueryMsg, &pTableIdList, &pExprMsg, &pSecExprMsg, &tagCond, &tbnameCond, &pGroupColIndex, &pTagColumnInfo, &sql);
7014
  if (code != TSDB_CODE_SUCCESS) {
B
Bomin Zhang 已提交
7015
    goto _over;
7016 7017
  }

H
hjxilinx 已提交
7018
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
7019
    qError("Invalid number of tables to query, numOfTables:%d", pQueryMsg->numOfTables);
7020
    code = TSDB_CODE_QRY_INVALID_MSG;
H
hjxilinx 已提交
7021
    goto _over;
7022 7023
  }

H
hjxilinx 已提交
7024
  if (pTableIdList == NULL || taosArrayGetSize(pTableIdList) == 0) {
S
slguan 已提交
7025
    qError("qmsg:%p, SQueryTableMsg wrong format", pQueryMsg);
7026
    code = TSDB_CODE_QRY_INVALID_MSG;
H
hjxilinx 已提交
7027
    goto _over;
7028 7029
  }

H
Haojun Liao 已提交
7030
  if ((code = createQueryFuncExprFromMsg(pQueryMsg, pQueryMsg->numOfOutput, &pExprs, pExprMsg, pTagColumnInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
7031
    goto _over;
7032 7033
  }

H
Haojun Liao 已提交
7034
  if (pSecExprMsg != NULL) {
H
Haojun Liao 已提交
7035
    if ((code = createQueryFuncExprFromMsg(pQueryMsg, pQueryMsg->secondStageOutput, &pSecExprs, pSecExprMsg, pTagColumnInfo)) != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
7036 7037 7038 7039
      goto _over;
    }
  }

dengyihao's avatar
dengyihao 已提交
7040
  pGroupbyExpr = createGroupbyExprFromMsg(pQueryMsg, pGroupColIndex, &code);
H
hjxilinx 已提交
7041
  if ((pGroupbyExpr == NULL && pQueryMsg->numOfGroupCols != 0) || code != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
7042
    goto _over;
7043
  }
7044

H
hjxilinx 已提交
7045
  bool isSTableQuery = false;
7046
  STableGroupInfo tableGroupInfo = {0};
7047 7048
  int64_t st = taosGetTimestampUs();

H
Haojun Liao 已提交
7049
  if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_TABLE_QUERY)) {
weixin_48148422's avatar
weixin_48148422 已提交
7050
    STableIdInfo *id = taosArrayGet(pTableIdList, 0);
H
Haojun Liao 已提交
7051

7052
    qDebug("qmsg:%p query normal table, uid:%"PRId64", tid:%d", pQueryMsg, id->uid, id->tid);
H
Haojun Liao 已提交
7053
    if ((code = tsdbGetOneTableGroup(tsdb, id->uid, pQueryMsg->window.skey, &tableGroupInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
7054
      goto _over;
7055
    }
H
Haojun Liao 已提交
7056
  } else if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY|TSDB_QUERY_TYPE_STABLE_QUERY)) {
7057
    isSTableQuery = true;
H
Haojun Liao 已提交
7058 7059 7060

    // also note there's possibility that only one table in the super table
    if (!TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY)) {
weixin_48148422's avatar
weixin_48148422 已提交
7061 7062 7063 7064 7065 7066 7067
      STableIdInfo *id = taosArrayGet(pTableIdList, 0);

      // group by normal column, do not pass the group by condition to tsdb to group table into different group
      int32_t numOfGroupByCols = pQueryMsg->numOfGroupCols;
      if (pQueryMsg->numOfGroupCols == 1 && !TSDB_COL_IS_TAG(pGroupColIndex->flag)) {
        numOfGroupByCols = 0;
      }
7068 7069

      qDebug("qmsg:%p query stable, uid:%"PRId64", tid:%d", pQueryMsg, id->uid, id->tid);
H
Haojun Liao 已提交
7070 7071 7072
      code = tsdbQuerySTableByTagCond(tsdb, id->uid, pQueryMsg->window.skey, tagCond, pQueryMsg->tagCondLen,
          pQueryMsg->tagNameRelType, tbnameCond, &tableGroupInfo, pGroupColIndex, numOfGroupByCols);

7073
      if (code != TSDB_CODE_SUCCESS) {
7074
        qError("qmsg:%p failed to query stable, reason: %s", pQueryMsg, tstrerror(code));
7075 7076
        goto _over;
      }
weixin_48148422's avatar
weixin_48148422 已提交
7077
    } else {
7078 7079 7080 7081
      code = tsdbGetTableGroupFromIdList(tsdb, pTableIdList, &tableGroupInfo);
      if (code != TSDB_CODE_SUCCESS) {
        goto _over;
      }
H
Haojun Liao 已提交
7082

S
TD-1057  
Shengliang Guan 已提交
7083
      qDebug("qmsg:%p query on %" PRIzu " tables in one group from client", pQueryMsg, tableGroupInfo.numOfTables);
7084
    }
7085 7086

    int64_t el = taosGetTimestampUs() - st;
S
TD-1057  
Shengliang Guan 已提交
7087
    qDebug("qmsg:%p tag filter completed, numOfTables:%" PRIzu ", elapsed time:%"PRId64"us", pQueryMsg, tableGroupInfo.numOfTables, el);
H
hjxilinx 已提交
7088
  } else {
7089
    assert(0);
7090
  }
7091

H
Haojun Liao 已提交
7092 7093 7094 7095 7096
  code = checkForQueryBuf(tableGroupInfo.numOfTables);
  if (code != TSDB_CODE_SUCCESS) {  // not enough query buffer, abort
    goto _over;
  }

7097
  (*pQInfo) = createQInfoImpl(pQueryMsg, pGroupbyExpr, pExprs, pSecExprs, &tableGroupInfo, pTagColumnInfo, isSTableQuery, sql);
H
Haojun Liao 已提交
7098

7099
  sql    = NULL;
dengyihao's avatar
dengyihao 已提交
7100
  pExprs = NULL;
H
Haojun Liao 已提交
7101
  pSecExprs = NULL;
dengyihao's avatar
dengyihao 已提交
7102 7103
  pGroupbyExpr = NULL;
  pTagColumnInfo = NULL;
7104

7105
  if ((*pQInfo) == NULL) {
7106
    code = TSDB_CODE_QRY_OUT_OF_MEMORY;
H
hjxilinx 已提交
7107
    goto _over;
7108
  }
7109

7110
  code = initQInfo(pQueryMsg, tsdb, vgId, *pQInfo, isSTableQuery);
7111

H
hjxilinx 已提交
7112
_over:
dengyihao's avatar
dengyihao 已提交
7113 7114 7115
  free(tagCond);
  free(tbnameCond);
  free(pGroupColIndex);
H
Haojun Liao 已提交
7116

dengyihao's avatar
dengyihao 已提交
7117 7118
  if (pGroupbyExpr != NULL) {
    taosArrayDestroy(pGroupbyExpr->columnInfo);
dengyihao's avatar
dengyihao 已提交
7119
    free(pGroupbyExpr);
7120
  }
H
Haojun Liao 已提交
7121

dengyihao's avatar
dengyihao 已提交
7122
  free(pTagColumnInfo);
7123
  free(sql);
dengyihao's avatar
dengyihao 已提交
7124
  free(pExprs);
H
Haojun Liao 已提交
7125 7126
  free(pSecExprs);

dengyihao's avatar
dengyihao 已提交
7127
  free(pExprMsg);
H
Haojun Liao 已提交
7128 7129
  free(pSecExprMsg);

H
hjxilinx 已提交
7130
  taosArrayDestroy(pTableIdList);
7131

B
Bomin Zhang 已提交
7132 7133 7134 7135 7136
  for (int32_t i = 0; i < pQueryMsg->numOfCols; i++) {
    SColumnInfo* column = pQueryMsg->colList + i;
    freeColumnFilterInfo(column->filters, column->numOfFilters);
  }

H
Haojun Liao 已提交
7137
  //pQInfo already freed in initQInfo, but *pQInfo may not pointer to null;
7138 7139 7140 7141
  if (code != TSDB_CODE_SUCCESS) {
    *pQInfo = NULL;
  }

7142
  // if failed to add ref for all tables in this query, abort current query
7143
  return code;
H
hjxilinx 已提交
7144 7145
}

H
Haojun Liao 已提交
7146
void qDestroyQueryInfo(qinfo_t qHandle) {
H
Haojun Liao 已提交
7147 7148 7149 7150 7151
  SQInfo* pQInfo = (SQInfo*) qHandle;
  if (!isValidQInfo(pQInfo)) {
    return;
  }

H
Haojun Liao 已提交
7152 7153 7154
  qDebug("QInfo:%p query completed", pQInfo);
  queryCostStatis(pQInfo);   // print the query cost summary
  freeQInfo(pQInfo);
H
Haojun Liao 已提交
7155 7156
}

7157 7158 7159 7160 7161 7162
static bool doBuildResCheck(SQInfo* pQInfo) {
  bool buildRes = false;

  pthread_mutex_lock(&pQInfo->lock);

  pQInfo->dataReady = QUERY_RESULT_READY;
7163
  buildRes = needBuildResAfterQueryComplete(pQInfo);
7164

H
Haojun Liao 已提交
7165 7166
  // clear qhandle owner, it must be in the secure area. other thread may run ahead before current, after it is
  // put into task to be executed.
S
TD-2616  
Shengliang Guan 已提交
7167
  assert(pQInfo->owner == taosGetSelfPthreadId());
7168
  pQInfo->owner = 0;
H
Haojun Liao 已提交
7169

H
Haojun Liao 已提交
7170
  pthread_mutex_unlock(&pQInfo->lock);
H
Haojun Liao 已提交
7171

7172
  // used in retrieve blocking model.
H
Haojun Liao 已提交
7173
  tsem_post(&pQInfo->ready);
7174 7175 7176
  return buildRes;
}

7177
bool qTableQuery(qinfo_t qinfo) {
7178
  SQInfo *pQInfo = (SQInfo *)qinfo;
H
Haojun Liao 已提交
7179
  assert(pQInfo && pQInfo->signature == pQInfo);
S
TD-2616  
Shengliang Guan 已提交
7180
  int64_t threadId = taosGetSelfPthreadId();
7181

7182 7183 7184 7185
  int64_t curOwner = 0;
  if ((curOwner = atomic_val_compare_exchange_64(&pQInfo->owner, 0, threadId)) != 0) {
    qError("QInfo:%p qhandle is now executed by thread:%p", pQInfo, (void*) curOwner);
    pQInfo->code = TSDB_CODE_QRY_IN_EXEC;
7186
    return false;
H
hjxilinx 已提交
7187
  }
7188

7189 7190 7191
  pQInfo->startExecTs = taosGetTimestampSec();

  if (isQueryKilled(pQInfo)) {
7192
    qDebug("QInfo:%p it is already killed, abort", pQInfo);
7193
    return doBuildResCheck(pQInfo);
H
hjxilinx 已提交
7194
  }
7195

7196 7197
  if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
    qDebug("QInfo:%p no table exists for query, abort", pQInfo);
7198 7199
    setQueryStatus(pQInfo->runtimeEnv.pQuery, QUERY_COMPLETED);
    return doBuildResCheck(pQInfo);
7200 7201 7202
  }

  // error occurs, record the error code and return to client
H
Haojun Liao 已提交
7203
  int32_t ret = setjmp(pQInfo->runtimeEnv.env);
7204 7205
  if (ret != TSDB_CODE_SUCCESS) {
    pQInfo->code = ret;
H
Haojun Liao 已提交
7206
    qDebug("QInfo:%p query abort due to error/cancel occurs, code:%s", pQInfo, tstrerror(pQInfo->code));
7207
    return doBuildResCheck(pQInfo);
7208 7209
  }

7210
  qDebug("QInfo:%p query task is launched", pQInfo);
7211

7212
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
7213
  if (onlyQueryTags(pQInfo->runtimeEnv.pQuery)) {
H
Haojun Liao 已提交
7214
    assert(pQInfo->runtimeEnv.pQueryHandle == NULL);
7215
    buildTagQueryResult(pQInfo);
H
hjxilinx 已提交
7216
  } else if (pQInfo->runtimeEnv.stableQuery) {
7217
    stableQueryImpl(pQInfo);
H
hjxilinx 已提交
7218
  } else {
7219
    tableQueryImpl(pQInfo);
H
hjxilinx 已提交
7220
  }
7221

7222
  SQuery* pQuery = pRuntimeEnv->pQuery;
7223
  if (isQueryKilled(pQInfo)) {
7224 7225
    qDebug("QInfo:%p query is killed", pQInfo);
  } else if (pQuery->rec.rows == 0) {
S
TD-1057  
Shengliang Guan 已提交
7226
    qDebug("QInfo:%p over, %" PRIzu " tables queried, %"PRId64" rows are returned", pQInfo, pQInfo->tableqinfoGroupInfo.numOfTables, pQuery->rec.total);
7227 7228 7229 7230 7231
  } else {
    qDebug("QInfo:%p query paused, %" PRId64 " rows returned, numOfTotal:%" PRId64 " rows",
           pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
  }

7232
  return doBuildResCheck(pQInfo);
H
hjxilinx 已提交
7233 7234
}

7235
int32_t qRetrieveQueryResultInfo(qinfo_t qinfo, bool* buildRes, void* pRspContext) {
7236 7237
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
7238
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
H
Haojun Liao 已提交
7239
    qError("QInfo:%p invalid qhandle", pQInfo);
7240
    return TSDB_CODE_QRY_INVALID_QHANDLE;
H
hjxilinx 已提交
7241
  }
7242

7243
  *buildRes = false;
H
Haojun Liao 已提交
7244
  if (IS_QUERY_KILLED(pQInfo)) {
B
Bomin Zhang 已提交
7245
    qDebug("QInfo:%p query is killed, code:0x%08x", pQInfo, pQInfo->code);
H
hjxilinx 已提交
7246
    return pQInfo->code;
H
hjxilinx 已提交
7247
  }
7248

7249
  int32_t code = TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
7250

H
Haojun Liao 已提交
7251
  if (tsRetrieveBlockingModel) {
7252
    pQInfo->rspContext = pRspContext;
H
Haojun Liao 已提交
7253
    tsem_wait(&pQInfo->ready);
7254
    *buildRes = true;
H
Haojun Liao 已提交
7255
    code = pQInfo->code;
7256
  } else {
H
Haojun Liao 已提交
7257
    SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
7258

H
Haojun Liao 已提交
7259 7260
    pthread_mutex_lock(&pQInfo->lock);

7261
    assert(pQInfo->rspContext == NULL);
H
Haojun Liao 已提交
7262 7263
    if (pQInfo->dataReady == QUERY_RESULT_READY) {
      *buildRes = true;
7264 7265
      qDebug("QInfo:%p retrieve result info, rowsize:%d, rows:%" PRId64 ", code:%s", pQInfo, pQuery->rowSize,
             pQuery->rec.rows, tstrerror(pQInfo->code));
H
Haojun Liao 已提交
7266 7267 7268 7269 7270 7271 7272 7273 7274 7275
    } else {
      *buildRes = false;
      qDebug("QInfo:%p retrieve req set query return result after paused", pQInfo);
      pQInfo->rspContext = pRspContext;
      assert(pQInfo->rspContext != NULL);
    }

    code = pQInfo->code;
    pthread_mutex_unlock(&pQInfo->lock);
  }
H
Haojun Liao 已提交
7276

7277
  return code;
H
hjxilinx 已提交
7278
}
7279

7280
int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *contLen, bool* continueExec) {
7281 7282
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
7283
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
7284
    return TSDB_CODE_QRY_INVALID_QHANDLE;
7285
  }
7286

7287
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
7288 7289
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  size_t  size = getResultSize(pQInfo, &pQuery->rec.rows);
7290

weixin_48148422's avatar
weixin_48148422 已提交
7291
  size += sizeof(int32_t);
7292
  size += sizeof(STableIdInfo) * taosHashGetSize(pQInfo->arrTableIdInfo);
7293

S
TD-1057  
Shengliang Guan 已提交
7294
  *contLen = (int32_t)(size + sizeof(SRetrieveTableRsp));
7295

B
Bomin Zhang 已提交
7296 7297
  // todo proper handle failed to allocate memory,
  // current solution only avoid crash, but cannot return error code to client
7298
  *pRsp = (SRetrieveTableRsp *)rpcMallocCont(*contLen);
B
Bomin Zhang 已提交
7299 7300 7301
  if (*pRsp == NULL) {
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
  }
7302

S
TD-1057  
Shengliang Guan 已提交
7303
  (*pRsp)->numOfRows = htonl((int32_t)pQuery->rec.rows);
7304

H
Haojun Liao 已提交
7305
  if (pQInfo->code == TSDB_CODE_SUCCESS) {
7306
    (*pRsp)->offset   = htobe64(pQuery->limit.offset);
7307
    (*pRsp)->useconds = htobe64(pRuntimeEnv->summary.elapsedTime);
7308
  } else {
7309 7310
    (*pRsp)->offset   = 0;
    (*pRsp)->useconds = htobe64(pRuntimeEnv->summary.elapsedTime);
7311
  }
7312

7313
  (*pRsp)->precision = htons(pQuery->precision);
H
Haojun Liao 已提交
7314 7315
  if (pQuery->rec.rows > 0 && pQInfo->code == TSDB_CODE_SUCCESS) {
    doDumpQueryResult(pQInfo, (*pRsp)->data);
7316
  } else {
H
hjxilinx 已提交
7317
    setQueryStatus(pQuery, QUERY_OVER);
7318
  }
7319

7320
  pQInfo->rspContext = NULL;
7321
  pQInfo->dataReady  = QUERY_RESULT_NOT_READY;
7322

H
Haojun Liao 已提交
7323
  if (IS_QUERY_KILLED(pQInfo) || Q_STATUS_EQUAL(pQuery->status, QUERY_OVER)) {
H
Haojun Liao 已提交
7324
    // here current thread hold the refcount, so it is safe to free tsdbQueryHandle.
7325
    *continueExec = false;
7326
    (*pRsp)->completed = 1;  // notify no more result to client
7327
  } else {
7328
    *continueExec = true;
7329
    qDebug("QInfo:%p has more results to retrieve", pQInfo);
7330 7331
  }

H
Haojun Liao 已提交
7332
  return pQInfo->code;
7333
}
H
hjxilinx 已提交
7334

7335 7336 7337 7338 7339 7340 7341 7342
int32_t qQueryCompleted(qinfo_t qinfo) {
  SQInfo *pQInfo = (SQInfo *)qinfo;

  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
    return TSDB_CODE_QRY_INVALID_QHANDLE;
  }

  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
7343
  return isQueryKilled(pQInfo) || Q_STATUS_EQUAL(pQuery->status, QUERY_OVER);
7344 7345
}

H
Haojun Liao 已提交
7346
int32_t qKillQuery(qinfo_t qinfo) {
H
Haojun Liao 已提交
7347 7348 7349 7350 7351 7352 7353
  SQInfo *pQInfo = (SQInfo *)qinfo;

  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
    return TSDB_CODE_QRY_INVALID_QHANDLE;
  }

  setQueryKilled(pQInfo);
7354 7355 7356

  // Wait for the query executing thread being stopped/
  // Once the query is stopped, the owner of qHandle will be cleared immediately.
7357
  while (pQInfo->owner != 0) {
7358 7359 7360
    taosMsleep(100);
  }

H
Haojun Liao 已提交
7361 7362 7363
  return TSDB_CODE_SUCCESS;
}

7364 7365 7366 7367 7368 7369 7370 7371 7372 7373 7374 7375 7376 7377 7378 7379
static void doSetTagValueToResultBuf(char* output, const char* val, int16_t type, int16_t bytes) {
  if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
    if (val == NULL) {
      setVardataNull(output, type);
    } else {
      memcpy(output, val, varDataTLen(val));
    }
  } else {
    if (val == NULL) {
      setNull(output, type, bytes);
    } else {  // todo here stop will cause client crash
      memcpy(output, val, bytes);
    }
  }
}

H
hjxilinx 已提交
7380 7381 7382
static void buildTagQueryResult(SQInfo* pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
7383

H
Haojun Liao 已提交
7384
  size_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
H
Haojun Liao 已提交
7385
  assert(numOfGroup == 0 || numOfGroup == 1);
7386

H
Haojun Liao 已提交
7387
  if (numOfGroup == 0) {
7388 7389
    return;
  }
7390

H
Haojun Liao 已提交
7391
  SArray* pa = GET_TABLEGROUP(pQInfo, 0);
7392

H
Haojun Liao 已提交
7393
  size_t num = taosArrayGetSize(pa);
7394
  assert(num == pQInfo->tableqinfoGroupInfo.numOfTables);
7395

H
Haojun Liao 已提交
7396
  int32_t count = 0;
H
Haojun Liao 已提交
7397
  int32_t functionId = pQuery->pExpr1[0].base.functionId;
7398 7399
  if (functionId == TSDB_FUNC_TID_TAG) { // return the tags & table Id
    assert(pQuery->numOfOutput == 1);
7400

H
Haojun Liao 已提交
7401
    SExprInfo* pExprInfo = &pQuery->pExpr1[0];
7402
    int32_t rsize = pExprInfo->bytes;
H
Haojun Liao 已提交
7403
    count = 0;
7404

H
Haojun Liao 已提交
7405 7406 7407 7408 7409 7410 7411 7412 7413 7414 7415
    int16_t bytes = pExprInfo->bytes;
    int16_t type = pExprInfo->type;

    for(int32_t i = 0; i < pQuery->numOfTags; ++i) {
      if (pQuery->tagColList[i].colId == pExprInfo->base.colInfo.colId) {
        bytes = pQuery->tagColList[i].bytes;
        type = pQuery->tagColList[i].type;
        break;
      }
    }

H
Haojun Liao 已提交
7416 7417
    while(pQInfo->tableIndex < num && count < pQuery->rec.capacity) {
      int32_t i = pQInfo->tableIndex++;
7418
      STableQueryInfo *item = taosArrayGetP(pa, i);
7419

7420
      char *output = pQuery->sdata[0]->data + count * rsize;
7421
      varDataSetLen(output, rsize - VARSTR_HEADER_SIZE);
7422

7423
      output = varDataVal(output);
H
Haojun Liao 已提交
7424
      STableId* id = TSDB_TABLEID(item->pTable);
7425

7426 7427 7428
      *(int16_t *)output = 0;
      output += sizeof(int16_t);

H
Haojun Liao 已提交
7429 7430
      *(int64_t *)output = id->uid;  // memory align problem, todo serialize
      output += sizeof(id->uid);
7431

H
Haojun Liao 已提交
7432 7433
      *(int32_t *)output = id->tid;
      output += sizeof(id->tid);
7434

7435
      *(int32_t *)output = pQInfo->vgId;
7436
      output += sizeof(pQInfo->vgId);
7437

7438
      if (pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
7439
        char* data = tsdbGetTableName(item->pTable);
7440
        memcpy(output, data, varDataTLen(data));
H
[td-90]  
Haojun Liao 已提交
7441
      } else {
7442 7443
        char* data = tsdbGetTableTagVal(item->pTable, pExprInfo->base.colInfo.colId, type, bytes);
        doSetTagValueToResultBuf(output, data, type, bytes);
H
[td-90]  
Haojun Liao 已提交
7444
      }
7445

H
Haojun Liao 已提交
7446
      count += 1;
7447
    }
7448

7449
    qDebug("QInfo:%p create (tableId, tag) info completed, rows:%d", pQInfo, count);
7450

H
Haojun Liao 已提交
7451 7452 7453 7454
  } else if (functionId == TSDB_FUNC_COUNT) {// handle the "count(tbname)" query
    *(int64_t*) pQuery->sdata[0]->data = num;

    count = 1;
H
Haojun Liao 已提交
7455
    SET_STABLE_QUERY_OVER(pQInfo);
7456
    qDebug("QInfo:%p create count(tbname) query, res:%d rows:1", pQInfo, count);
7457
  } else {  // return only the tags|table name etc.
H
Haojun Liao 已提交
7458
    count = 0;
H
Haojun Liao 已提交
7459
    SSchema tbnameSchema = tGetTableNameColumnSchema();
7460

S
TD-1057  
Shengliang Guan 已提交
7461
    int32_t maxNumOfTables = (int32_t)pQuery->rec.capacity;
7462
    if (pQuery->limit.limit >= 0 && pQuery->limit.limit < pQuery->rec.capacity) {
S
TD-1057  
Shengliang Guan 已提交
7463
      maxNumOfTables = (int32_t)pQuery->limit.limit;
7464 7465
    }

7466
    while(pQInfo->tableIndex < num && count < maxNumOfTables) {
H
Haojun Liao 已提交
7467
      int32_t i = pQInfo->tableIndex++;
7468

7469 7470 7471 7472 7473 7474
      // discard current result due to offset
      if (pQuery->limit.offset > 0) {
        pQuery->limit.offset -= 1;
        continue;
      }

H
Haojun Liao 已提交
7475
      SExprInfo* pExprInfo = pQuery->pExpr1;
7476
      STableQueryInfo* item = taosArrayGetP(pa, i);
7477

7478 7479
      char *data = NULL, *dst = NULL;
      int16_t type = 0, bytes = 0;
7480
      for(int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
7481
        // not assign value in case of user defined constant output column
Y
TD-1230  
yihaoDeng 已提交
7482
        if (TSDB_COL_IS_UD_COL(pExprInfo[j].base.colInfo.flag)) {
H
Haojun Liao 已提交
7483 7484
          continue;
        }
7485

7486
        if (pExprInfo[j].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
7487 7488 7489 7490 7491 7492 7493 7494
          bytes = tbnameSchema.bytes;
          type = tbnameSchema.type;

          data = tsdbGetTableName(item->pTable);
          dst = pQuery->sdata[j]->data + count * tbnameSchema.bytes;
        } else {
          type = pExprInfo[j].type;
          bytes = pExprInfo[j].bytes;
7495

7496 7497
          data = tsdbGetTableTagVal(item->pTable, pExprInfo[j].base.colInfo.colId, type, bytes);
          dst = pQuery->sdata[j]->data + count * pExprInfo[j].bytes;
7498

7499
        }
7500 7501

        doSetTagValueToResultBuf(dst, data, type, bytes);
H
hjxilinx 已提交
7502
      }
H
Haojun Liao 已提交
7503
      count += 1;
H
hjxilinx 已提交
7504
    }
7505

7506
    qDebug("QInfo:%p create tag values results completed, rows:%d", pQInfo, count);
H
hjxilinx 已提交
7507
  }
7508

H
Haojun Liao 已提交
7509
  pQuery->rec.rows = count;
H
hjxilinx 已提交
7510
  setQueryStatus(pQuery, QUERY_COMPLETED);
H
hjxilinx 已提交
7511 7512
}

H
Haojun Liao 已提交
7513
static int64_t getQuerySupportBufSize(size_t numOfTables) {
H
Haojun Liao 已提交
7514 7515 7516 7517
  size_t s1 = sizeof(STableQueryInfo);
  size_t s2 = sizeof(SHashNode);

//  size_t s3 = sizeof(STableCheckInfo);  buffer consumption in tsdb
H
Haojun Liao 已提交
7518
  return (int64_t)((s1 + s2) * 1.5 * numOfTables);
H
Haojun Liao 已提交
7519 7520
}

H
Haojun Liao 已提交
7521
int32_t checkForQueryBuf(size_t numOfTables) {
H
Haojun Liao 已提交
7522 7523
  int64_t t = getQuerySupportBufSize(numOfTables);
  if (tsQueryBufferSize < 0) {
H
Haojun Liao 已提交
7524
    return TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
7525 7526 7527 7528 7529 7530 7531 7532 7533 7534 7535 7536 7537 7538 7539 7540 7541 7542 7543
  } else if (tsQueryBufferSize > 0) {

    while(1) {
      int64_t s = tsQueryBufferSize;
      int64_t remain = s - t;
      if (remain >= 0) {
        if (atomic_val_compare_exchange_64(&tsQueryBufferSize, s, remain) == s) {
          return TSDB_CODE_SUCCESS;
        }
      } else {
        return TSDB_CODE_QRY_NOT_ENOUGH_BUFFER;
      }
    }
  }

  // disable query processing if the value of tsQueryBufferSize is zero.
  return TSDB_CODE_QRY_NOT_ENOUGH_BUFFER;
}

H
Haojun Liao 已提交
7544
void releaseQueryBuf(size_t numOfTables) {
H
Haojun Liao 已提交
7545 7546 7547 7548 7549 7550 7551 7552 7553 7554
  if (tsQueryBufferSize <= 0) {
    return;
  }

  int64_t t = getQuerySupportBufSize(numOfTables);

  // restore value is not enough buffer available
  atomic_add_fetch_64(&tsQueryBufferSize, t);
}

7555 7556 7557 7558 7559 7560 7561
void* qGetResultRetrieveMsg(qinfo_t qinfo) {
  SQInfo* pQInfo = (SQInfo*) qinfo;
  assert(pQInfo != NULL);

  return pQInfo->rspContext;
}

7562 7563 7564 7565 7566 7567 7568
void freeqinfoFn(void *qhandle) {
  void** handle = qhandle;
  if (handle == NULL || *handle == NULL) {
    return;
  }

  qKillQuery(*handle);
H
Haojun Liao 已提交
7569
  qDestroyQueryInfo(*handle);
7570 7571 7572
}

void* qOpenQueryMgmt(int32_t vgId) {
H
Haojun Liao 已提交
7573
  const int32_t REFRESH_HANDLE_INTERVAL = 30; // every 30 seconds, refresh handle pool
7574 7575 7576 7577

  char cacheName[128] = {0};
  sprintf(cacheName, "qhandle_%d", vgId);

7578
  SQueryMgmt* pQueryMgmt = calloc(1, sizeof(SQueryMgmt));
H
Haojun Liao 已提交
7579 7580 7581 7582
  if (pQueryMgmt == NULL) {
    terrno = TSDB_CODE_QRY_OUT_OF_MEMORY;
    return NULL;
  }
7583

S
TD-1530  
Shengliang Guan 已提交
7584
  pQueryMgmt->qinfoPool = taosCacheInit(TSDB_CACHE_PTR_KEY, REFRESH_HANDLE_INTERVAL, true, freeqinfoFn, cacheName);
7585 7586 7587 7588
  pQueryMgmt->closed    = false;
  pQueryMgmt->vgId      = vgId;

  pthread_mutex_init(&pQueryMgmt->lock, NULL);
7589 7590

  qDebug("vgId:%d, open querymgmt success", vgId);
7591
  return pQueryMgmt;
7592 7593
}

H
Haojun Liao 已提交
7594
static void queryMgmtKillQueryFn(void* handle) {
H
Hui Li 已提交
7595 7596
  void** fp = (void**)handle;
  qKillQuery(*fp);
H
Haojun Liao 已提交
7597 7598 7599
}

void qQueryMgmtNotifyClosed(void* pQMgmt) {
7600 7601 7602 7603 7604 7605 7606
  if (pQMgmt == NULL) {
    return;
  }

  SQueryMgmt* pQueryMgmt = pQMgmt;
  qDebug("vgId:%d, set querymgmt closed, wait for all queries cancelled", pQueryMgmt->vgId);

7607
  pthread_mutex_lock(&pQueryMgmt->lock);
7608
  pQueryMgmt->closed = true;
7609
  pthread_mutex_unlock(&pQueryMgmt->lock);
7610

H
Haojun Liao 已提交
7611
  taosCacheRefresh(pQueryMgmt->qinfoPool, queryMgmtKillQueryFn);
7612 7613
}

S
TD-2640  
Shengliang Guan 已提交
7614 7615 7616 7617 7618 7619 7620 7621 7622 7623 7624 7625 7626
void qQueryMgmtReOpen(void *pQMgmt) {
  if (pQMgmt == NULL) {
    return;
  }

  SQueryMgmt *pQueryMgmt = pQMgmt;
  qDebug("vgId:%d, set querymgmt reopen", pQueryMgmt->vgId);

  pthread_mutex_lock(&pQueryMgmt->lock);
  pQueryMgmt->closed = false;
  pthread_mutex_unlock(&pQueryMgmt->lock);
}

7627 7628 7629 7630 7631 7632 7633 7634 7635 7636 7637 7638 7639 7640 7641
void qCleanupQueryMgmt(void* pQMgmt) {
  if (pQMgmt == NULL) {
    return;
  }

  SQueryMgmt* pQueryMgmt = pQMgmt;
  int32_t vgId = pQueryMgmt->vgId;

  assert(pQueryMgmt->closed);

  SCacheObj* pqinfoPool = pQueryMgmt->qinfoPool;
  pQueryMgmt->qinfoPool = NULL;

  taosCacheCleanup(pqinfoPool);
  pthread_mutex_destroy(&pQueryMgmt->lock);
S
TD-1848  
Shengliang Guan 已提交
7642
  tfree(pQueryMgmt);
7643

S
Shengliang Guan 已提交
7644
  qDebug("vgId:%d, queryMgmt cleanup completed", vgId);
7645 7646
}

7647
void** qRegisterQInfo(void* pMgmt, uint64_t qInfo) {
7648
  if (pMgmt == NULL) {
7649
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
7650 7651 7652 7653 7654
    return NULL;
  }

  SQueryMgmt *pQueryMgmt = pMgmt;
  if (pQueryMgmt->qinfoPool == NULL) {
7655
    qError("QInfo:%p failed to add qhandle into qMgmt, since qMgmt is closed", (void *)qInfo);
7656
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
7657 7658 7659
    return NULL;
  }

7660
  pthread_mutex_lock(&pQueryMgmt->lock);
7661
  if (pQueryMgmt->closed) {
7662
    pthread_mutex_unlock(&pQueryMgmt->lock);
7663
    qError("QInfo:%p failed to add qhandle into cache, since qMgmt is colsing", (void *)qInfo);
7664
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
7665 7666
    return NULL;
  } else {
S
TD-1530  
Shengliang Guan 已提交
7667
    TSDB_CACHE_PTR_TYPE handleVal = (TSDB_CACHE_PTR_TYPE) qInfo;
7668 7669
    void** handle = taosCachePut(pQueryMgmt->qinfoPool, &handleVal, sizeof(TSDB_CACHE_PTR_TYPE), &qInfo, sizeof(TSDB_CACHE_PTR_TYPE),
        (getMaximumIdleDurationSec()*1000));
7670
    pthread_mutex_unlock(&pQueryMgmt->lock);
7671 7672 7673 7674 7675

    return handle;
  }
}

S
TD-1530  
Shengliang Guan 已提交
7676
void** qAcquireQInfo(void* pMgmt, uint64_t _key) {
7677 7678
  SQueryMgmt *pQueryMgmt = pMgmt;

B
Bomin Zhang 已提交
7679 7680 7681 7682 7683 7684 7685
  if (pQueryMgmt->closed) {
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
    return NULL;
  }

  if (pQueryMgmt->qinfoPool == NULL) {
    terrno = TSDB_CODE_QRY_INVALID_QHANDLE;
7686 7687 7688
    return NULL;
  }

S
TD-1530  
Shengliang Guan 已提交
7689 7690
  TSDB_CACHE_PTR_TYPE key = (TSDB_CACHE_PTR_TYPE)_key;
  void** handle = taosCacheAcquireByKey(pQueryMgmt->qinfoPool, &key, sizeof(TSDB_CACHE_PTR_TYPE));
7691
  if (handle == NULL || *handle == NULL) {
B
Bomin Zhang 已提交
7692
    terrno = TSDB_CODE_QRY_INVALID_QHANDLE;
7693 7694 7695 7696 7697 7698
    return NULL;
  } else {
    return handle;
  }
}

H
Haojun Liao 已提交
7699
void** qReleaseQInfo(void* pMgmt, void* pQInfo, bool freeHandle) {
7700 7701 7702 7703 7704
  SQueryMgmt *pQueryMgmt = pMgmt;
  if (pQueryMgmt->qinfoPool == NULL) {
    return NULL;
  }

H
Haojun Liao 已提交
7705
  taosCacheRelease(pQueryMgmt->qinfoPool, pQInfo, freeHandle);
7706
  return 0;
D
fix bug  
dapan1121 已提交
7707
}