qExecutor.c 265.6 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
#include "os.h"
H
Haojun Liao 已提交
16 17
#include "qFill.h"
#include "taosmsg.h"
18 19
#include "tcache.h"
#include "tglobal.h"
20

H
Haojun Liao 已提交
21
#include "exception.h"
22
#include "hash.h"
H
Haojun Liao 已提交
23 24 25 26
#include "qAst.h"
#include "qExecutor.h"
#include "qResultbuf.h"
#include "qUtil.h"
H
hjxilinx 已提交
27
#include "query.h"
S
slguan 已提交
28
#include "queryLog.h"
29
#include "tlosertree.h"
30
#include "ttype.h"
31

H
Haojun Liao 已提交
32
#define MAX_ROWS_PER_RESBUF_PAGE  ((1u<<12) - 1)
33 34 35 36 37

/**
 * check if the primary column is load by default, otherwise, the program will
 * forced to load primary column explicitly.
 */
H
Haojun Liao 已提交
38
#define Q_STATUS_EQUAL(p, s)  (((p) & (s)) != 0u)
39 40
#define QUERY_IS_ASC_QUERY(q) (GET_FORWARD_DIRECTION_FACTOR((q)->order.order) == QUERY_ASC_FORWARD_STEP)

41
#define IS_MASTER_SCAN(runtime)        ((runtime)->scanFlag == MASTER_SCAN)
H
hjxilinx 已提交
42
#define IS_REVERSE_SCAN(runtime)       ((runtime)->scanFlag == REVERSE_SCAN)
43
#define SET_MASTER_SCAN_FLAG(runtime)  ((runtime)->scanFlag = MASTER_SCAN)
H
hjxilinx 已提交
44
#define SET_REVERSE_SCAN_FLAG(runtime) ((runtime)->scanFlag = REVERSE_SCAN)
45

H
Haojun Liao 已提交
46
#define GET_QINFO_ADDR(x) ((SQInfo *)((char *)(x)-offsetof(SQInfo, runtimeEnv)))
47

48
#define GET_COL_DATA_POS(query, index, step) ((query)->pos + (index) * (step))
49
#define SWITCH_ORDER(n) (((n) = ((n) == TSDB_ORDER_ASC) ? TSDB_ORDER_DESC : TSDB_ORDER_ASC))
50

H
Haojun Liao 已提交
51 52
#define SDATA_BLOCK_INITIALIZER (SDataBlockInfo) {{0}, 0}

S
TD-1057  
Shengliang Guan 已提交
53
#define TIME_WINDOW_COPY(_dst, _src)  do {\
H
Haojun Liao 已提交
54 55
   (_dst).skey = (_src).skey;\
   (_dst).ekey = (_src).ekey;\
H
Haojun Liao 已提交
56
} while (0)
S
TD-1057  
Shengliang Guan 已提交
57

58
enum {
H
hjxilinx 已提交
59
  // when query starts to execute, this status will set
60 61
  QUERY_NOT_COMPLETED = 0x1u,

H
hjxilinx 已提交
62 63
  /* result output buffer is full, current query is paused.
   * this status is only exist in group-by clause and diff/add/division/multiply/ query.
64
   */
65 66
  QUERY_RESBUF_FULL = 0x2u,

H
hjxilinx 已提交
67 68 69
  /* query is over
   * 1. this status is used in one row result query process, e.g., count/sum/first/last/ avg...etc.
   * 2. when all data within queried time window, it is also denoted as query_completed
70
   */
71
  QUERY_COMPLETED = 0x4u,
72

H
hjxilinx 已提交
73 74
  /* when the result is not completed return to client, this status will be
   * usually used in case of interval query with interpolation option
75
   */
76
  QUERY_OVER = 0x8u,
77
};
78 79

enum {
80 81
  TS_JOIN_TS_EQUAL       = 0,
  TS_JOIN_TS_NOT_EQUALS  = 1,
82 83 84
  TS_JOIN_TAG_NOT_EQUALS = 2,
};

85
typedef struct {
86 87 88 89 90 91
  int32_t     status;       // query status
  TSKEY       lastKey;      // the lastKey value before query executed
  STimeWindow w;            // whole query time window
  STimeWindow curWindow;    // current query window
  int32_t     windowIndex;  // index of active time window result for interval query
  STSCursor   cur;
92 93
} SQueryStatusInfo;

H
Haojun Liao 已提交
94
#if 0
H
Haojun Liao 已提交
95
static UNUSED_FUNC void *u_malloc (size_t __size) {
H
Haojun Liao 已提交
96
  uint32_t v = rand();
H
Haojun Liao 已提交
97 98

  if (v % 1000 <= 0) {
H
Haojun Liao 已提交
99 100
    return NULL;
  } else {
H
Haojun Liao 已提交
101
    return malloc(__size);
H
Haojun Liao 已提交
102
  }
H
Haojun Liao 已提交
103 104
}

H
Haojun Liao 已提交
105 106
static UNUSED_FUNC void* u_calloc(size_t num, size_t __size) {
  uint32_t v = rand();
H
Haojun Liao 已提交
107
  if (v % 1000 <= 0) {
H
Haojun Liao 已提交
108 109 110 111 112 113
    return NULL;
  } else {
    return calloc(num, __size);
  }
}

H
Haojun Liao 已提交
114 115 116 117 118 119 120 121 122
static UNUSED_FUNC void* u_realloc(void* p, size_t __size) {
  uint32_t v = rand();
  if (v % 5 <= 1) {
    return NULL;
  } else {
    return realloc(p, __size);
  }
}

H
Haojun Liao 已提交
123
#define calloc  u_calloc
H
Haojun Liao 已提交
124
#define malloc  u_malloc
H
Haojun Liao 已提交
125
#define realloc u_realloc
H
Haojun Liao 已提交
126
#endif
H
Haojun Liao 已提交
127

128
#define CLEAR_QUERY_STATUS(q, st)   ((q)->status &= (~(st)))
H
Haojun Liao 已提交
129 130
#define GET_NUM_OF_TABLEGROUP(q)    taosArrayGetSize((q)->tableqinfoGroupInfo.pGroupList)
#define GET_TABLEGROUP(q, _index)   ((SArray*) taosArrayGetP((q)->tableqinfoGroupInfo.pGroupList, (_index)))
131
#define QUERY_IS_INTERVAL_QUERY(_q) ((_q)->interval.interval > 0)
H
Haojun Liao 已提交
132

133
static void setQueryStatus(SQuery *pQuery, int8_t status);
H
Haojun Liao 已提交
134
static void finalizeQueryResult(SQueryRuntimeEnv *pRuntimeEnv);
135

136 137 138
static int32_t getMaximumIdleDurationSec() {
  return tsShellActivityTimer * 2;
}
139

140 141
static void getNextTimeWindow(SQuery* pQuery, STimeWindow* tw) {
  int32_t factor = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
142 143 144
  if (pQuery->interval.intervalUnit != 'n' && pQuery->interval.intervalUnit != 'y') {
    tw->skey += pQuery->interval.sliding * factor;
    tw->ekey = tw->skey + pQuery->interval.interval - 1;
145 146 147
    return;
  }

148
  int64_t key = tw->skey / 1000, interval = pQuery->interval.interval;
149 150 151
  if (pQuery->precision == TSDB_TIME_PRECISION_MICRO) {
    key /= 1000;
  }
152
  if (pQuery->interval.intervalUnit == 'y') {
153 154
    interval *= 12;
  }
155 156 157 158 159

  struct tm tm;
  time_t t = (time_t)key;
  localtime_r(&t, &tm);

S
Shengliang Guan 已提交
160
  int mon = (int)(tm.tm_year * 12 + tm.tm_mon + interval * factor);
161 162 163 164
  tm.tm_year = mon / 12;
  tm.tm_mon = mon % 12;
  tw->skey = mktime(&tm) * 1000L;

S
Shengliang Guan 已提交
165
  mon = (int)(mon + interval);
166 167 168 169 170 171 172 173 174 175 176
  tm.tm_year = mon / 12;
  tm.tm_mon = mon % 12;
  tw->ekey = mktime(&tm) * 1000L;

  if (pQuery->precision == TSDB_TIME_PRECISION_MICRO) {
    tw->skey *= 1000L;
    tw->ekey *= 1000L;
  }
  tw->ekey -= 1;
}

177 178
#define SET_STABLE_QUERY_OVER(_q) ((_q)->tableIndex = (int32_t)((_q)->tableqinfoGroupInfo.numOfTables))
#define IS_STASBLE_QUERY_OVER(_q) ((_q)->tableIndex >= (int32_t)((_q)->tableqinfoGroupInfo.numOfTables))
H
Haojun Liao 已提交
179

H
hjxilinx 已提交
180
// todo move to utility
181
static int32_t mergeIntoGroupResultImpl(SGroupResInfo* pGroupResInfo, SArray *pTableList, SQInfo* pQInfo);
182

183
static void setResultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SResultRow *pResult);
H
Haojun Liao 已提交
184
static void setResultRowOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SResultRow *pResult);
185
static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId);
186

187
static void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
H
Haojun Liao 已提交
188
                          SDataStatis *pStatis, void *param, int32_t colIndex, int32_t vgId);
189

190
static void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
H
Haojun Liao 已提交
191
static void destroyTableQueryInfoImpl(STableQueryInfo *pTableQueryInfo);
H
Haojun Liao 已提交
192
static void resetDefaultResInfoOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
193
static bool hasMainOutput(SQuery *pQuery);
H
hjxilinx 已提交
194
static void buildTagQueryResult(SQInfo *pQInfo);
195

196
static int32_t setAdditionalInfo(SQInfo *pQInfo, void *pTable, STableQueryInfo *pTableQueryInfo);
H
Haojun Liao 已提交
197 198
static int32_t checkForQueryBuf(size_t numOfTables);
static void releaseQueryBuf(size_t numOfTables);
199
static int32_t binarySearchForKey(char *pValue, int num, TSKEY key, int order);
H
Haojun Liao 已提交
200
static void doRowwiseTimeWindowInterpolation(SQueryRuntimeEnv* pRuntimeEnv, SArray* pDataBlock, TSKEY prevTs, int32_t prevRowIndex, TSKEY curTs, int32_t curRowIndex, TSKEY windowKey, int32_t type);
H
Haojun Liao 已提交
201
static STsdbQueryCond createTsdbQueryCond(SQuery* pQuery, STimeWindow* win);
202
static STableIdInfo createTableIdInfo(SQuery* pQuery);
203

204
bool doFilterData(SQuery *pQuery, int32_t elemPos) {
205 206
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
207

S
TD-1057  
Shengliang Guan 已提交
208
    char *pElem = (char*)pFilterInfo->pData + pFilterInfo->info.bytes * elemPos;
209

210 211
    bool qualified = false;
    for (int32_t j = 0; j < pFilterInfo->numOfFilters; ++j) {
212
      SColumnFilterElem *pFilterElem = &pFilterInfo->pFilters[j];
213

H
Haojun Liao 已提交
214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230
      bool isnull = isNull(pElem, pFilterInfo->info.type);
      if (isnull) {
        if (pFilterElem->fp == isNull_filter) {
          qualified = true;
          break;
        } else {
          continue;
        }
      } else {
        if (pFilterElem->fp == notNull_filter) {
          qualified = true;
          break;
        } else if (pFilterElem->fp == isNull_filter) {
          continue;
        }
      }

231 232 233 234 235
      if (pFilterElem->fp(pFilterElem, pElem, pElem)) {
        qualified = true;
        break;
      }
    }
236

237 238 239 240
    if (!qualified) {
      return false;
    }
  }
241

242 243 244 245 246 247
  return true;
}

int64_t getNumOfResult(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  bool    hasMainFunction = hasMainOutput(pQuery);
248

249
  int64_t maxOutput = 0;
250
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
251
    int32_t functionId = pQuery->pExpr1[j].base.functionId;
252

253 254 255 256 257 258 259 260
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (hasMainFunction &&
        (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ)) {
      continue;
    }
261

H
Haojun Liao 已提交
262
    SResultRowCellInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
263 264 265 266
    if (pResInfo != NULL && maxOutput < pResInfo->numOfRes) {
      maxOutput = pResInfo->numOfRes;
    }
  }
267

268
  assert(maxOutput >= 0);
269 270 271
  return maxOutput;
}

272 273 274 275 276
/*
 * the value of number of result needs to be update due to offset value upated.
 */
void updateNumOfResult(SQueryRuntimeEnv *pRuntimeEnv, int32_t numOfRes) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
277

278
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
279
    SResultRowCellInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
280

H
Haojun Liao 已提交
281 282 283 284 285
    int16_t functionId = pRuntimeEnv->pCtx[j].functionId;
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ ||
        functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
286

H
Haojun Liao 已提交
287
    assert(pResInfo->numOfRes > numOfRes);
288 289 290 291
    pResInfo->numOfRes = numOfRes;
  }
}

292
static UNUSED_FUNC int32_t getMergeResultGroupId(int32_t groupIndex) {
H
Haojun Liao 已提交
293
  int32_t base = 50000000;
294 295 296 297 298 299 300
  return base + (groupIndex * 10000);
}

bool isGroupbyNormalCol(SSqlGroupbyExpr *pGroupbyExpr) {
  if (pGroupbyExpr == NULL || pGroupbyExpr->numOfGroupCols == 0) {
    return false;
  }
301

302
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
303
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
Y
TD-1230  
yihaoDeng 已提交
304
    if (TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
305
      //make sure the normal column locates at the second position if tbname exists in group by clause
306
      if (pGroupbyExpr->numOfGroupCols > 1) {
307
        assert(pColIndex->colIndex > 0);
308
      }
309

310 311 312
      return true;
    }
  }
313

314 315 316 317 318
  return false;
}

int16_t getGroupbyColumnType(SQuery *pQuery, SSqlGroupbyExpr *pGroupbyExpr) {
  assert(pGroupbyExpr != NULL);
319

320 321
  int32_t colId = -2;
  int16_t type = TSDB_DATA_TYPE_NULL;
322

323
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
324
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
Y
TD-1230  
yihaoDeng 已提交
325
    if (TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
326 327 328 329
      colId = pColIndex->colId;
      break;
    }
  }
330

331
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
332 333
    if (colId == pQuery->colList[i].colId) {
      type = pQuery->colList[i].type;
334 335 336
      break;
    }
  }
337

338 339 340 341 342 343
  return type;
}

bool isSelectivityWithTagsQuery(SQuery *pQuery) {
  bool    hasTags = false;
  int32_t numOfSelectivity = 0;
344

345
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
346
    int32_t functId = pQuery->pExpr1[i].base.functionId;
347 348 349 350
    if (functId == TSDB_FUNC_TAG_DUMMY || functId == TSDB_FUNC_TS_DUMMY) {
      hasTags = true;
      continue;
    }
351

352 353 354 355
    if ((aAggs[functId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
      numOfSelectivity++;
    }
  }
356

357 358 359
  if (numOfSelectivity > 0 && hasTags) {
    return true;
  }
360

361 362 363
  return false;
}

364 365
bool isProjQuery(SQuery *pQuery) {
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
366
    int32_t functId = pQuery->pExpr1[i].base.functionId;
367 368 369 370 371 372 373 374
    if (functId != TSDB_FUNC_PRJ && functId != TSDB_FUNC_TAGPRJ) {
      return false;
    }
  }

  return true;
}

H
Haojun Liao 已提交
375
bool isTSCompQuery(SQuery *pQuery) { return pQuery->pExpr1[0].base.functionId == TSDB_FUNC_TS_COMP; }
376

377 378 379
static bool limitResults(SQueryRuntimeEnv* pRuntimeEnv) {
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery* pQuery = pRuntimeEnv->pQuery;
380

381 382
  if ((pQuery->limit.limit > 0) && (pQuery->rec.total + pQuery->rec.rows > pQuery->limit.limit)) {
    pQuery->rec.rows = pQuery->limit.limit - pQuery->rec.total;
383

384
    qDebug("QInfo:%p discard remain data due to result limitation, limit:%"PRId64", current return:%" PRId64 ", total:%"PRId64,
385 386
        pQInfo, pQuery->limit.limit, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
    assert(pQuery->rec.rows >= 0);
387 388 389
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return true;
  }
390

391 392 393 394
  return false;
}

static bool isTopBottomQuery(SQuery *pQuery) {
395
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
396
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
397 398 399
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
400

401 402 403 404
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      return true;
    }
  }
405

406 407 408
  return false;
}

409 410 411 412 413 414 415 416 417 418 419
static bool timeWindowInterpoRequired(SQuery *pQuery) {
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
    if (functionId == TSDB_FUNC_TWA) {
      return true;
    }
  }

  return false;
}

H
Haojun Liao 已提交
420
static bool hasTagValOutput(SQuery* pQuery) {
H
Haojun Liao 已提交
421
  SExprInfo *pExprInfo = &pQuery->pExpr1[0];
H
Haojun Liao 已提交
422 423 424 425
  if (pQuery->numOfOutput == 1 && pExprInfo->base.functionId == TSDB_FUNC_TS_COMP) {
    return true;
  } else {  // set tag value, by which the results are aggregated.
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
H
Haojun Liao 已提交
426
      SExprInfo *pLocalExprInfo = &pQuery->pExpr1[idx];
H
Haojun Liao 已提交
427 428 429 430 431 432 433 434 435 436 437

      // ts_comp column required the tag value for join filter
      if (TSDB_COL_IS_TAG(pLocalExprInfo->base.colInfo.flag)) {
        return true;
      }
    }
  }

  return false;
}

438 439 440 441 442 443 444 445
/**
 * @param pQuery
 * @param col
 * @param pDataBlockInfo
 * @param pStatis
 * @param pColStatis
 * @return
 */
H
Haojun Liao 已提交
446
static bool hasNullValue(SColIndex* pColIndex, SDataStatis *pStatis, SDataStatis **pColStatis) {
H
Haojun Liao 已提交
447
  if (pStatis != NULL && TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
H
Haojun Liao 已提交
448 449
    *pColStatis = &pStatis[pColIndex->colIndex];
    assert((*pColStatis)->colId == pColIndex->colId);
H
hjxilinx 已提交
450 451
  } else {
    *pColStatis = NULL;
452
  }
453

H
Haojun Liao 已提交
454
  if (TSDB_COL_IS_TAG(pColIndex->flag) || TSDB_COL_IS_UD_COL(pColIndex->flag) || pColIndex->colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
H
Haojun Liao 已提交
455 456 457
    return false;
  }

458 459 460
  if ((*pColStatis) != NULL && (*pColStatis)->numOfNull == 0) {
    return false;
  }
461

462 463 464
  return true;
}

H
Haojun Liao 已提交
465
static SResultRow *doPrepareResultRowFromKey(SQueryRuntimeEnv *pRuntimeEnv, SResultRowInfo *pResultRowInfo, char *pData,
H
Haojun Liao 已提交
466
                                             int16_t bytes, bool masterscan, uint64_t uid) {
467
  bool existed = false;
H
Haojun Liao 已提交
468
  SET_RES_WINDOW_KEY(pRuntimeEnv->keyBuf, pData, bytes, uid);
469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487

  SResultRow **p1 =
      (SResultRow **)taosHashGet(pRuntimeEnv->pResultRowHashTable, pRuntimeEnv->keyBuf, GET_RES_WINDOW_KEY_LEN(bytes));

  // in case of repeat scan/reverse scan, no new time window added.
  if (QUERY_IS_INTERVAL_QUERY(pRuntimeEnv->pQuery)) {
    if (!masterscan) {  // the *p1 may be NULL in case of sliding+offset exists.
      return (p1 != NULL)? *p1:NULL;
    }

    if (p1 != NULL) {
      for(int32_t i = pResultRowInfo->size - 1; i >= 0; --i) {
        if (pResultRowInfo->pResult[i] == (*p1)) {
          pResultRowInfo->curIndex = i;
          existed = true;
          break;
        }
      }
    }
488
  } else {
489 490
    if (p1 != NULL) {  // group by column query
      return *p1;
H
Haojun Liao 已提交
491
    }
492
  }
493

494
  if (!existed) {
H
Haojun Liao 已提交
495
    // TODO refactor
H
Haojun Liao 已提交
496
    // more than the capacity, reallocate the resources
H
Haojun Liao 已提交
497
    if (pResultRowInfo->size >= pResultRowInfo->capacity) {
H
Haojun Liao 已提交
498
      int64_t newCapacity = 0;
H
Haojun Liao 已提交
499 500
      if (pResultRowInfo->capacity > 10000) {
        newCapacity = (int64_t)(pResultRowInfo->capacity * 1.25);
501
      } else {
H
Haojun Liao 已提交
502
        newCapacity = (int64_t)(pResultRowInfo->capacity * 1.5);
503 504
      }

H
Haojun Liao 已提交
505
      char *t = realloc(pResultRowInfo->pResult, (size_t)(newCapacity * POINTER_BYTES));
506 507
      if (t == NULL) {
        longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
508
      }
509

H
Haojun Liao 已提交
510
      pResultRowInfo->pResult = (SResultRow **)t;
511

H
Haojun Liao 已提交
512 513
      int32_t inc = (int32_t)newCapacity - pResultRowInfo->capacity;
      memset(&pResultRowInfo->pResult[pResultRowInfo->capacity], 0, POINTER_BYTES * inc);
514

H
Haojun Liao 已提交
515
      pResultRowInfo->capacity = (int32_t)newCapacity;
516
    }
517

518 519 520 521 522 523 524 525 526 527 528 529 530
    SResultRow *pResult = NULL;

    if (p1 == NULL) {
      pResult = getNewResultRow(pRuntimeEnv->pool);
      int32_t ret = initResultRow(pResult);
      if (ret != TSDB_CODE_SUCCESS) {
        longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
      }

      // add a new result set for a new group
      taosHashPut(pRuntimeEnv->pResultRowHashTable, pRuntimeEnv->keyBuf, GET_RES_WINDOW_KEY_LEN(bytes), &pResult, POINTER_BYTES);
    } else {
      pResult = *p1;
531
    }
H
Haojun Liao 已提交
532

533
    pResultRowInfo->pResult[pResultRowInfo->size] = pResult;
H
Haojun Liao 已提交
534
    pResultRowInfo->curIndex = pResultRowInfo->size++;
535
  }
536

537
  // too many time window in query
H
Haojun Liao 已提交
538
  if (pResultRowInfo->size > MAX_INTERVAL_TIME_WINDOW) {
539 540 541
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_TOO_MANY_TIMEWINDOW);
  }

H
Haojun Liao 已提交
542
  return getResultRow(pResultRowInfo, pResultRowInfo->curIndex);
543 544 545
}

// get the correct time window according to the handled timestamp
H
Haojun Liao 已提交
546
static STimeWindow getActiveTimeWindow(SResultRowInfo *pWindowResInfo, int64_t ts, SQuery *pQuery) {
547
  STimeWindow w = {0};
548

549
 if (pWindowResInfo->curIndex == -1) {  // the first window, from the previous stored value
550
    w.skey = pWindowResInfo->prevSKey;
551 552
    if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
      w.ekey = taosTimeAdd(w.skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision);
553
    } else {
554
      w.ekey = w.skey + pQuery->interval.interval - 1;
555
    }
556
  } else {
557
    int32_t slot = curTimeWindowIndex(pWindowResInfo);
558
    SResultRow* pWindowRes = getResultRow(pWindowResInfo, slot);
559
    w = pWindowRes->win;
560
  }
561

562
  if (w.skey > ts || w.ekey < ts) {
563 564 565
    if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
      w.skey = taosTimeTruncate(ts, &pQuery->interval, pQuery->precision);
      w.ekey = taosTimeAdd(w.skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
566 567
    } else {
      int64_t st = w.skey;
568

569
      if (st > ts) {
570
        st -= ((st - ts + pQuery->interval.sliding - 1) / pQuery->interval.sliding) * pQuery->interval.sliding;
571
      }
572

573
      int64_t et = st + pQuery->interval.interval - 1;
574
      if (et < ts) {
575
        st += ((ts - et + pQuery->interval.sliding - 1) / pQuery->interval.sliding) * pQuery->interval.sliding;
576
      }
577

578
      w.skey = st;
579
      w.ekey = w.skey + pQuery->interval.interval - 1;
580
    }
581
  }
582

583 584 585 586 587 588 589
  /*
   * query border check, skey should not be bounded by the query time range, since the value skey will
   * be used as the time window index value. So we only change ekey of time window accordingly.
   */
  if (w.ekey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) {
    w.ekey = pQuery->window.ekey;
  }
590

591 592 593
  return w;
}

H
Haojun Liao 已提交
594
static int32_t addNewWindowResultBuf(SResultRow *pWindowRes, SDiskbasedResultBuf *pResultBuf, int32_t tid,
595
                                     int32_t numOfRowsPerPage) {
596
  if (pWindowRes->pageId != -1) {
597 598
    return 0;
  }
599

600
  tFilePage *pData = NULL;
601

602 603
  // in the first scan, new space needed for results
  int32_t pageId = -1;
H
Haojun Liao 已提交
604
  SIDList list = getDataBufPagesIdList(pResultBuf, tid);
605

H
Haojun Liao 已提交
606
  if (taosArrayGetSize(list) == 0) {
H
Haojun Liao 已提交
607
    pData = getNewDataBuf(pResultBuf, tid, &pageId);
608
  } else {
H
Haojun Liao 已提交
609 610 611
    SPageInfo* pi = getLastPageInfo(list);
    pData = getResBufPage(pResultBuf, pi->pageId);
    pageId = pi->pageId;
612

613
    if (pData->num >= numOfRowsPerPage) {
H
Haojun Liao 已提交
614 615
      // release current page first, and prepare the next one
      releaseResBufPageInfo(pResultBuf, pi);
H
Haojun Liao 已提交
616
      pData = getNewDataBuf(pResultBuf, tid, &pageId);
617
      if (pData != NULL) {
618
        assert(pData->num == 0);  // number of elements must be 0 for new allocated buffer
619 620 621
      }
    }
  }
622

623 624 625
  if (pData == NULL) {
    return -1;
  }
626

627
  // set the number of rows in current disk page
628 629 630
  if (pWindowRes->pageId == -1) {  // not allocated yet, allocate new buffer
    pWindowRes->pageId = pageId;
    pWindowRes->rowId = (int32_t)(pData->num++);
H
Haojun Liao 已提交
631

632
    assert(pWindowRes->pageId >= 0);
633
  }
634

635 636 637
  return 0;
}

638 639
static int32_t setWindowOutputBufByKey(SQueryRuntimeEnv *pRuntimeEnv, SResultRowInfo *pResultRowInfo, STimeWindow *win,
    bool masterscan, SResultRow** pResult, int64_t groupId) {
640 641
  assert(win->skey <= win->ekey);
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
642

643
  SResultRow *pResultRow = doPrepareResultRowFromKey(pRuntimeEnv, pResultRowInfo, (char *)&win->skey, TSDB_KEYSIZE, masterscan, groupId);
H
Haojun Liao 已提交
644
  if (pResultRow == NULL) {
645 646
    *pResult = NULL;
    return TSDB_CODE_SUCCESS;
647
  }
648

649
  // not assign result buffer yet, add new result buffer
H
Haojun Liao 已提交
650
  if (pResultRow->pageId == -1) {
H
Haojun Liao 已提交
651
    int32_t ret = addNewWindowResultBuf(pResultRow, pResultBuf, (int32_t) groupId, pRuntimeEnv->numOfRowsPerPage);
H
Haojun Liao 已提交
652
    if (ret != TSDB_CODE_SUCCESS) {
653 654 655
      return -1;
    }
  }
656

657
  // set time window for current result
H
Haojun Liao 已提交
658
  pResultRow->win = (*win);
659
  *pResult = pResultRow;
H
Haojun Liao 已提交
660
  setResultRowOutputBufInitCtx(pRuntimeEnv, pResultRow);
661

662 663 664
  return TSDB_CODE_SUCCESS;
}

H
Haojun Liao 已提交
665
static bool getResultRowStatus(SResultRowInfo *pWindowResInfo, int32_t slot) {
666
  assert(slot >= 0 && slot < pWindowResInfo->size);
667
  return pWindowResInfo->pResult[slot]->closed;
668 669
}

670 671 672 673 674 675 676 677 678 679 680 681 682 683
typedef enum SResultTsInterpType {
  RESULT_ROW_START_INTERP = 1,
  RESULT_ROW_END_INTERP   = 2,
} SResultTsInterpType;

static void setResultRowInterpo(SResultRow* pResult, SResultTsInterpType type) {
  assert(pResult != NULL && (type == RESULT_ROW_START_INTERP || type == RESULT_ROW_END_INTERP));
  if (type == RESULT_ROW_START_INTERP) {
    pResult->startInterp = true;
  } else {
    pResult->endInterp   = true;
  }
}

H
Haojun Liao 已提交
684
static bool resultRowInterpolated(SResultRow* pResult, SResultTsInterpType type) {
685 686 687 688 689 690 691 692
  assert(pResult != NULL && (type == RESULT_ROW_START_INTERP || type == RESULT_ROW_END_INTERP));
  if (type == RESULT_ROW_START_INTERP) {
    return pResult->startInterp == true;
  } else {
    return pResult->endInterp   == true;
  }
}

H
Haojun Liao 已提交
693
static FORCE_INLINE int32_t getForwardStepsInBlock(int32_t numOfRows, __block_search_fn_t searchFn, TSKEY ekey, int16_t pos,
694 695
                                      int16_t order, int64_t *pData) {
  int32_t forwardStep = 0;
696

H
Haojun Liao 已提交
697 698 699 700
  if (order == TSDB_ORDER_ASC) {
    int32_t end = searchFn((char*) &pData[pos], numOfRows - pos, ekey, order);
    if (end >= 0) {
      forwardStep = end;
701

H
Haojun Liao 已提交
702 703 704 705 706 707 708 709 710 711 712 713
      if (pData[end + pos] == ekey) {
        forwardStep += 1;
      }
    }
  } else {
    int32_t end = searchFn((char *)pData, pos + 1, ekey, order);
    if (end >= 0) {
      forwardStep = pos - end;

      if (pData[end] == ekey) {
        forwardStep += 1;
      }
714 715
    }
  }
716

H
Haojun Liao 已提交
717
  assert(forwardStep > 0);
718 719 720
  return forwardStep;
}

721
static void doUpdateResultRowIndex(SResultRowInfo*pResultRowInfo, TSKEY lastKey, bool ascQuery) {
H
Haojun Liao 已提交
722
  int64_t skey = TSKEY_INITIAL_VAL;
723 724 725
  int32_t i = 0;
  for (i = pResultRowInfo->size - 1; i >= 0; --i) {
    SResultRow *pResult = pResultRowInfo->pResult[i];
H
Haojun Liao 已提交
726
    if (pResult->closed) {
727
      break;
H
Haojun Liao 已提交
728 729
    }

730 731 732
    // new closed result rows
    if ((pResult->win.ekey <= lastKey && ascQuery) || (pResult->win.skey >= lastKey && !ascQuery)) {
      closeResultRow(pResultRowInfo, i);
H
Haojun Liao 已提交
733 734 735 736 737
    } else {
      skey = pResult->win.skey;
    }
  }

738
  // all result rows are closed, set the last one to be the skey
H
Haojun Liao 已提交
739
  if (skey == TSKEY_INITIAL_VAL) {
740
    pResultRowInfo->curIndex = pResultRowInfo->size - 1;
H
Haojun Liao 已提交
741 742
  } else {

743 744 745 746 747
    for (i = pResultRowInfo->size - 1; i >= 0; --i) {
      SResultRow *pResult = pResultRowInfo->pResult[i];
      if (pResult->closed) {
        break;
      }
748
    }
749

750 751
    pResultRowInfo->curIndex = i + 1;  // current not closed result object
    pResultRowInfo->prevSKey = pResultRowInfo->pResult[pResultRowInfo->curIndex]->win.skey;
752
  }
753
}
754

755
static void updateResultRowIndex(SResultRowInfo* pResultRowInfo, STableQueryInfo* pTableQueryInfo, bool ascQuery) {
756
  if ((pTableQueryInfo->lastKey > pTableQueryInfo->win.ekey && ascQuery) || (pTableQueryInfo->lastKey < pTableQueryInfo->win.ekey && (!ascQuery))) {
757 758 759
    closeAllResultRows(pResultRowInfo);
    pResultRowInfo->curIndex = pResultRowInfo->size - 1;
  } else {
760 761
    int32_t step = ascQuery? 1:-1;
    doUpdateResultRowIndex(pResultRowInfo, pTableQueryInfo->lastKey - step, ascQuery);
762
  }
763 764 765
}

static int32_t getNumOfRowsInTimeWindow(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo, TSKEY *pPrimaryColumn,
H
hjxilinx 已提交
766
                                        int32_t startPos, TSKEY ekey, __block_search_fn_t searchFn, bool updateLastKey) {
767
  assert(startPos >= 0 && startPos < pDataBlockInfo->rows);
768

H
Haojun Liao 已提交
769
  int32_t num   = -1;
770
  int32_t order = pQuery->order.order;
H
Haojun Liao 已提交
771
  int32_t step  = GET_FORWARD_DIRECTION_FACTOR(order);
772

H
hjxilinx 已提交
773
  STableQueryInfo* item = pQuery->current;
774

775 776
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (ekey < pDataBlockInfo->window.ekey) {
777
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
H
Haojun Liao 已提交
778 779
      if (updateLastKey) { // update the last key
        item->lastKey = pPrimaryColumn[startPos + (num - 1)] + step;
780 781
      }
    } else {
782
      num = pDataBlockInfo->rows - startPos;
783
      if (updateLastKey) {
H
hjxilinx 已提交
784
        item->lastKey = pDataBlockInfo->window.ekey + step;
785 786 787 788
      }
    }
  } else {  // desc
    if (ekey > pDataBlockInfo->window.skey) {
789
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
H
Haojun Liao 已提交
790 791
      if (updateLastKey) {  // update the last key
        item->lastKey = pPrimaryColumn[startPos - (num - 1)] + step;
792 793 794 795
      }
    } else {
      num = startPos + 1;
      if (updateLastKey) {
H
hjxilinx 已提交
796
        item->lastKey = pDataBlockInfo->window.skey + step;
797 798 799
      }
    }
  }
800

H
Haojun Liao 已提交
801
  assert(num > 0);
802 803 804
  return num;
}

805 806
static void doBlockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, STimeWindow *pWin, int32_t offset, int32_t forwardStep, TSKEY *tsCol, int32_t numOfTotal) {
  SQuery         *pQuery = pRuntimeEnv->pQuery;
807
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
808

H
Haojun Liao 已提交
809 810
  bool hasPrev = pCtx[0].preAggVals.isSet;

811 812 813 814
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
    pCtx[k].nStartQueryTimestamp = pWin->skey;
    pCtx[k].size = forwardStep;
    pCtx[k].startOffset = (QUERY_IS_ASC_QUERY(pQuery)) ? offset : offset - (forwardStep - 1);
815

816 817 818 819
    int32_t functionId = pQuery->pExpr1[k].base.functionId;
    if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
      pCtx[k].ptsList = &tsCol[pCtx[k].startOffset];
    }
H
Haojun Liao 已提交
820

821 822 823 824 825
    // not a whole block involved in query processing, statistics data can not be used
    // NOTE: the original value of isSet have been changed here
    if (pCtx[k].preAggVals.isSet && forwardStep < numOfTotal) {
      pCtx[k].preAggVals.isSet = false;
    }
H
Haojun Liao 已提交
826

827 828
    if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
      aAggs[functionId].xFunction(&pCtx[k]);
829
    }
830 831 832

    // restore it
    pCtx[k].preAggVals.isSet = hasPrev;
833 834 835
  }
}

836 837
static void doRowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, STimeWindow *pWin, int32_t offset) {
  SQuery         *pQuery = pRuntimeEnv->pQuery;
838
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
839

840 841
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
    pCtx[k].nStartQueryTimestamp = pWin->skey;
842

843 844 845
    int32_t functionId = pQuery->pExpr1[k].base.functionId;
    if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
      aAggs[functionId].xFunctionF(&pCtx[k], offset);
846 847 848 849
    }
  }
}

H
Haojun Liao 已提交
850 851
static int32_t getNextQualifiedWindow(SQueryRuntimeEnv *pRuntimeEnv, STimeWindow *pNext, SDataBlockInfo *pDataBlockInfo,
    TSKEY *primaryKeys, __block_search_fn_t searchFn, int32_t prevPosition) {
852
  SQuery *pQuery = pRuntimeEnv->pQuery;
853

H
Haojun Liao 已提交
854
  getNextTimeWindow(pQuery, pNext);
855

H
Haojun Liao 已提交
856
  // next time window is not in current block
H
Haojun Liao 已提交
857 858
  if ((pNext->skey > pDataBlockInfo->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (pNext->ekey < pDataBlockInfo->window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
H
Haojun Liao 已提交
859 860
    return -1;
  }
861

H
Haojun Liao 已提交
862 863
  TSKEY startKey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
864
    startKey = pNext->skey;
H
Haojun Liao 已提交
865 866
    if (startKey < pQuery->window.skey) {
      startKey = pQuery->window.skey;
867
    }
H
Haojun Liao 已提交
868
  } else {
H
Haojun Liao 已提交
869
    startKey = pNext->ekey;
H
Haojun Liao 已提交
870 871
    if (startKey > pQuery->window.skey) {
      startKey = pQuery->window.skey;
872
    }
H
Haojun Liao 已提交
873
  }
874

H
Haojun Liao 已提交
875
  int32_t startPos = 0;
H
Haojun Liao 已提交
876

H
Haojun Liao 已提交
877
  // tumbling time window query, a special case of sliding time window query
878
  if (pQuery->interval.sliding == pQuery->interval.interval && prevPosition != -1) {
H
Haojun Liao 已提交
879 880 881
    int32_t factor = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
    startPos = prevPosition + factor;
  } else {
H
Haojun Liao 已提交
882
    if (startKey <= pDataBlockInfo->window.skey && QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
883
      startPos = 0;
H
Haojun Liao 已提交
884
    } else if (startKey >= pDataBlockInfo->window.ekey && !QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
885 886 887 888
      startPos = pDataBlockInfo->rows - 1;
    } else {
      startPos = searchFn((char *)primaryKeys, pDataBlockInfo->rows, startKey, pQuery->order.order);
    }
H
Haojun Liao 已提交
889
  }
890

H
Haojun Liao 已提交
891 892 893 894
  /*
   * This time window does not cover any data, try next time window,
   * this case may happen when the time window is too small
   */
H
Haojun Liao 已提交
895 896 897
  if (primaryKeys == NULL) {
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      assert(pDataBlockInfo->window.skey <= pNext->ekey);
898
    } else {
H
Haojun Liao 已提交
899
      assert(pDataBlockInfo->window.ekey >= pNext->skey);
900
    }
H
Haojun Liao 已提交
901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919
  } else {
    if (QUERY_IS_ASC_QUERY(pQuery) && primaryKeys[startPos] > pNext->ekey) {
      TSKEY next = primaryKeys[startPos];
      if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
        pNext->skey = taosTimeTruncate(next, &pQuery->interval, pQuery->precision);
        pNext->ekey = taosTimeAdd(pNext->skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
      } else {
        pNext->ekey += ((next - pNext->ekey + pQuery->interval.sliding - 1)/pQuery->interval.sliding) * pQuery->interval.sliding;
        pNext->skey = pNext->ekey - pQuery->interval.interval + 1;
      }
    } else if ((!QUERY_IS_ASC_QUERY(pQuery)) && primaryKeys[startPos] < pNext->skey) {
      TSKEY next = primaryKeys[startPos];
      if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
        pNext->skey = taosTimeTruncate(next, &pQuery->interval, pQuery->precision);
        pNext->ekey = taosTimeAdd(pNext->skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
      } else {
        pNext->skey -= ((pNext->skey - next + pQuery->interval.sliding - 1) / pQuery->interval.sliding) * pQuery->interval.sliding;
        pNext->ekey = pNext->skey + pQuery->interval.interval - 1;
      }
920
    }
921
  }
922

H
Haojun Liao 已提交
923
  return startPos;
924 925
}

H
Haojun Liao 已提交
926
static FORCE_INLINE TSKEY reviseWindowEkey(SQuery *pQuery, STimeWindow *pWindow) {
927 928 929 930 931 932 933 934 935 936 937 938
  TSKEY ekey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    ekey = pWindow->ekey;
    if (ekey > pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  } else {
    ekey = pWindow->skey;
    if (ekey < pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  }
939

940 941 942
  return ekey;
}

H
hjxilinx 已提交
943 944
//todo binary search
static void* getDataBlockImpl(SArray* pDataBlock, int32_t colId) {
S
TD-1057  
Shengliang Guan 已提交
945
  int32_t numOfCols = (int32_t)taosArrayGetSize(pDataBlock);
946

H
hjxilinx 已提交
947 948 949 950 951 952
  for (int32_t i = 0; i < numOfCols; ++i) {
    SColumnInfoData *p = taosArrayGet(pDataBlock, i);
    if (colId == p->info.colId) {
      return p->pData;
    }
  }
953

H
hjxilinx 已提交
954 955 956 957
  return NULL;
}

static char *getDataBlock(SQueryRuntimeEnv *pRuntimeEnv, SArithmeticSupport *sas, int32_t col, int32_t size,
958
                    SArray *pDataBlock) {
dengyihao's avatar
dengyihao 已提交
959 960 961
  if (pDataBlock == NULL) {
    return NULL;
  }
962

H
Haojun Liao 已提交
963
  char *dataBlock = NULL;
H
Haojun Liao 已提交
964
  SQuery *pQuery = pRuntimeEnv->pQuery;
965

H
Haojun Liao 已提交
966
  int32_t functionId = pQuery->pExpr1[col].base.functionId;
967
  if (functionId == TSDB_FUNC_ARITHM) {
H
Haojun Liao 已提交
968
    sas->pArithExpr = &pQuery->pExpr1[col];
969

H
Haojun Liao 已提交
970
    sas->offset    = (QUERY_IS_ASC_QUERY(pQuery))? pQuery->pos : pQuery->pos - (size - 1);
H
Haojun Liao 已提交
971
    sas->colList   = pQuery->colList;
972
    sas->numOfCols = pQuery->numOfCols;
H
Haojun Liao 已提交
973
    sas->data      = calloc(pQuery->numOfCols, POINTER_BYTES);
974

H
Haojun Liao 已提交
975 976 977 978
    if (sas->data == NULL) {
      longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
    }

979
    // here the pQuery->colList and sas->colList are identical
S
TD-1057  
Shengliang Guan 已提交
980
    int32_t numOfCols = (int32_t)taosArrayGetSize(pDataBlock);
981
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
982
      SColumnInfo *pColMsg = &pQuery->colList[i];
983

984 985 986 987 988 989 990 991
      dataBlock = NULL;
      for (int32_t k = 0; k < numOfCols; ++k) {  //todo refactor
        SColumnInfoData *p = taosArrayGet(pDataBlock, k);
        if (pColMsg->colId == p->info.colId) {
          dataBlock = p->pData;
          break;
        }
      }
992

993
      assert(dataBlock != NULL);
994
      sas->data[i] = dataBlock;  // start from the offset
995
    }
996

997
  } else {  // other type of query function
H
Haojun Liao 已提交
998
    SColIndex *pCol = &pQuery->pExpr1[col].base.colInfo;
H
Haojun Liao 已提交
999
    if (TSDB_COL_IS_NORMAL_COL(pCol->flag)) {
H
Haojun Liao 已提交
1000
      SColIndex* pColIndex = &pQuery->pExpr1[col].base.colInfo;
H
Haojun Liao 已提交
1001 1002 1003 1004
      SColumnInfoData *p = taosArrayGet(pDataBlock, pColIndex->colIndex);
      assert(p->info.colId == pColIndex->colId);

      dataBlock = p->pData;
H
Haojun Liao 已提交
1005 1006
    } else {
      dataBlock = NULL;
1007 1008
    }
  }
1009

1010 1011 1012
  return dataBlock;
}

H
Haojun Liao 已提交
1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024
static void setNotInterpoWindowKey(SQLFunctionCtx* pCtx, int32_t numOfOutput, int32_t type) {
  if (type == RESULT_ROW_START_INTERP) {
    for (int32_t k = 0; k < numOfOutput; ++k) {
      pCtx[k].start.key = INT64_MIN;
    }
  } else {
    for (int32_t k = 0; k < numOfOutput; ++k) {
      pCtx[k].end.key = INT64_MIN;
    }
  }
}

1025
// window start key interpolation
H
Haojun Liao 已提交
1026
static bool setTimeWindowInterpolationStartTs(SQueryRuntimeEnv* pRuntimeEnv, int32_t pos, int32_t numOfRows, SArray* pDataBlock, TSKEY* tsCols, STimeWindow* win) {
1027 1028
  SQuery* pQuery = pRuntimeEnv->pQuery;

H
Haojun Liao 已提交
1029
  TSKEY curTs  = tsCols[pos];
1030 1031
  TSKEY lastTs = *(TSKEY *) pRuntimeEnv->prevRow[0];

H
Haojun Liao 已提交
1032 1033 1034 1035
  // lastTs == INT64_MIN and pos == 0 means this is the first time window, interpolation is not needed.
  // start exactly from this point, no need to do interpolation
  TSKEY key = QUERY_IS_ASC_QUERY(pQuery)? win->skey:win->ekey;
  if (key == curTs) {
H
Haojun Liao 已提交
1036
    setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_START_INTERP);
1037
    return true;
H
Haojun Liao 已提交
1038
  }
1039

H
Haojun Liao 已提交
1040
  if (lastTs == INT64_MIN && ((pos == 0 && QUERY_IS_ASC_QUERY(pQuery)) || (pos == (numOfRows - 1) && !QUERY_IS_ASC_QUERY(pQuery)))) {
H
Haojun Liao 已提交
1041
    setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_START_INTERP);
H
Haojun Liao 已提交
1042
    return true;
1043 1044
  }

H
Haojun Liao 已提交
1045 1046 1047
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
  TSKEY   prevTs = ((pos == 0 && QUERY_IS_ASC_QUERY(pQuery)) || (pos == (numOfRows - 1) && !QUERY_IS_ASC_QUERY(pQuery)))?
      lastTs:tsCols[pos - step];
1048

H
Haojun Liao 已提交
1049 1050 1051
  doRowwiseTimeWindowInterpolation(pRuntimeEnv, pDataBlock, prevTs, pos - step, curTs, pos, key, RESULT_ROW_START_INTERP);
  return true;
}
1052

H
Haojun Liao 已提交
1053 1054 1055
static bool setTimeWindowInterpolationEndTs(SQueryRuntimeEnv* pRuntimeEnv, int32_t endRowIndex, SArray* pDataBlock, TSKEY* tsCols, TSKEY blockEkey, STimeWindow* win) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
  TSKEY   actualEndKey = tsCols[endRowIndex];
1056

H
Haojun Liao 已提交
1057
  TSKEY key = QUERY_IS_ASC_QUERY(pQuery)? win->ekey:win->skey;
1058

H
Haojun Liao 已提交
1059 1060
  // not ended in current data block, do not invoke interpolation
  if ((key > blockEkey && QUERY_IS_ASC_QUERY(pQuery)) || (key < blockEkey && !QUERY_IS_ASC_QUERY(pQuery))) {
H
Haojun Liao 已提交
1061
    setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_END_INTERP);
H
Haojun Liao 已提交
1062 1063
    return false;
  }
1064

H
Haojun Liao 已提交
1065 1066
  // there is actual end point of current time window, no interpolation need
  if (key == actualEndKey) {
H
Haojun Liao 已提交
1067
    setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_END_INTERP);
1068 1069
    return true;
  }
H
Haojun Liao 已提交
1070 1071 1072 1073 1074 1075 1076 1077

  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
  int32_t nextRowIndex = endRowIndex + step;
  assert(nextRowIndex >= 0);

  TSKEY nextKey = tsCols[nextRowIndex];
  doRowwiseTimeWindowInterpolation(pRuntimeEnv, pDataBlock, actualEndKey, endRowIndex, nextKey, nextRowIndex, key, RESULT_ROW_END_INTERP);
  return true;
1078 1079 1080 1081 1082 1083 1084 1085
}

static void saveDataBlockLastRow(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pDataBlockInfo, SArray* pDataBlock) {
  if (pDataBlock == NULL) {
    return;
  }

  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
1086
  int32_t rowIndex = QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->rows-1:0;
1087 1088
  for (int32_t k = 0; k < pQuery->numOfCols; ++k) {
    SColumnInfoData *pColInfo = taosArrayGet(pDataBlock, k);
H
Haojun Liao 已提交
1089
    memcpy(pRuntimeEnv->prevRow[k], ((char*)pColInfo->pData) + (pColInfo->info.bytes * rowIndex), pColInfo->info.bytes);
1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105
  }
}

static TSKEY getStartTsKey(SQuery* pQuery, SDataBlockInfo* pDataBlockInfo, TSKEY* tsCols, int32_t step) {
  TSKEY ts = TSKEY_INITIAL_VAL;

  if (tsCols == NULL) {
    ts = QUERY_IS_ASC_QUERY(pQuery) ? pDataBlockInfo->window.skey : pDataBlockInfo->window.ekey;
  } else {
    int32_t offset = GET_COL_DATA_POS(pQuery, 0, step);
    ts = tsCols[offset];
  }

  return ts;
}

H
Haojun Liao 已提交
1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144
static void doWindowBorderInterpolation(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pDataBlockInfo, SArray *pDataBlock,
    SResultRow* pResult, STimeWindow* win, int32_t startPos, int32_t forwardStep) {
  if (!pRuntimeEnv->timeWindowInterpo) {
    return;
  }

  assert(pDataBlock != NULL);

  SQuery* pQuery = pRuntimeEnv->pQuery;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

  SColumnInfoData *pColInfo = taosArrayGet(pDataBlock, 0);

  TSKEY  *tsCols = (TSKEY *)(pColInfo->pData);
  bool done = resultRowInterpolated(pResult, RESULT_ROW_START_INTERP);
  if (!done) {
    int32_t startRowIndex = startPos;
    bool    interp = setTimeWindowInterpolationStartTs(pRuntimeEnv, startRowIndex, pDataBlockInfo->rows, pDataBlock, tsCols, win);
    if (interp) {
      setResultRowInterpo(pResult, RESULT_ROW_START_INTERP);
    }
  } else {
    setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_START_INTERP);
  }

  done = resultRowInterpolated(pResult, RESULT_ROW_END_INTERP);
  if (!done) {
    int32_t endRowIndex = startPos + (forwardStep - 1) * step;

    TSKEY endKey = QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->window.ekey:pDataBlockInfo->window.skey;
    bool  interp = setTimeWindowInterpolationEndTs(pRuntimeEnv, endRowIndex, pDataBlock, tsCols, endKey, win);
    if (interp) {
      setResultRowInterpo(pResult, RESULT_ROW_END_INTERP);
    }
  } else {
    setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_END_INTERP);
  }
}

1145
/**
H
Haojun Liao 已提交
1146
 * todo set the last value for pQueryTableInfo as in rowwiseapplyfunctions
1147 1148
 * @param pRuntimeEnv
 * @param forwardStep
1149
 * @param tsCols
1150 1151 1152 1153 1154
 * @param pFields
 * @param isDiskFileBlock
 * @return                  the incremental number of output value, so it maybe 0 for fixed number of query,
 *                          such as count/min/max etc.
 */
H
Haojun Liao 已提交
1155
static void blockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis, SDataBlockInfo *pDataBlockInfo,
H
Haojun Liao 已提交
1156
                                    SResultRowInfo *pWindowResInfo, __block_search_fn_t searchFn, SArray *pDataBlock) {
1157
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1158
  bool            masterScan = IS_MASTER_SCAN(pRuntimeEnv);
1159

1160 1161 1162
  SQuery *pQuery  = pRuntimeEnv->pQuery;
  int64_t groupId = pQuery->current->groupIndex;

1163
  TSKEY  *tsCols = NULL;
1164
  if (pDataBlock != NULL) {
1165
    SColumnInfoData *pColInfo = taosArrayGet(pDataBlock, 0);
1166
    tsCols = (TSKEY *)(pColInfo->pData);
1167
  }
1168

1169
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
H
Haojun Liao 已提交
1170 1171 1172
  if (sasArray == NULL) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }
1173

1174
  SQInfo *pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
1175
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
1176
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
H
Haojun Liao 已提交
1177
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k, pQInfo->vgId);
1178
  }
1179

1180
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
1181
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
H
Haojun Liao 已提交
1182 1183
    int32_t prevIndex = curTimeWindowIndex(pWindowResInfo);

1184
    TSKEY ts = getStartTsKey(pQuery, pDataBlockInfo, tsCols, step);
H
Haojun Liao 已提交
1185
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
1186

1187
    SResultRow* pResult = NULL;
1188 1189 1190
    int32_t ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, &win, masterScan, &pResult, groupId);
    if (ret != TSDB_CODE_SUCCESS || pResult == NULL) {
      goto _end;
1191
    }
1192

H
Haojun Liao 已提交
1193 1194 1195
    int32_t forwardStep = 0;
    int32_t startPos = pQuery->pos;

1196 1197
    TSKEY ekey = reviseWindowEkey(pQuery, &win);
    forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, pQuery->pos, ekey, searchFn, true);
H
Haojun Liao 已提交
1198

1199 1200 1201
    // prev time window not interpolation yet.
    int32_t curIndex = curTimeWindowIndex(pWindowResInfo);
    if (prevIndex != -1 && prevIndex < curIndex && pRuntimeEnv->timeWindowInterpo) {
1202
      for(int32_t j = prevIndex; j < curIndex; ++j) { // previous time window may be all closed already.
1203
        SResultRow *pRes = pWindowResInfo->pResult[j];
1204 1205 1206 1207
        if (pRes->closed) {
          assert(resultRowInterpolated(pRes, RESULT_ROW_START_INTERP) && resultRowInterpolated(pRes, RESULT_ROW_END_INTERP));
          continue;
        }
H
Haojun Liao 已提交
1208

1209 1210 1211
        STimeWindow w = pRes->win;
        ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, &w, masterScan, &pResult, groupId);
        assert(ret == TSDB_CODE_SUCCESS && !resultRowInterpolated(pResult, RESULT_ROW_END_INTERP));
H
Haojun Liao 已提交
1212

1213 1214 1215 1216
        int32_t p = QUERY_IS_ASC_QUERY(pQuery)? 0:pDataBlockInfo->rows-1;
        doRowwiseTimeWindowInterpolation(pRuntimeEnv, pDataBlock, *(TSKEY*) pRuntimeEnv->prevRow[0], -1,  tsCols[0], p, w.ekey, RESULT_ROW_END_INTERP);
        setResultRowInterpo(pResult, RESULT_ROW_END_INTERP);
        setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_START_INTERP);
H
Haojun Liao 已提交
1217

1218
        doBlockwiseApplyFunctions(pRuntimeEnv, &w, startPos, 0, tsCols, pDataBlockInfo->rows);
H
Haojun Liao 已提交
1219 1220
      }

1221 1222 1223
      // restore current time window
      ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, &win, masterScan, &pResult, groupId);
      assert (ret == TSDB_CODE_SUCCESS);
1224
    }
1225

1226 1227 1228
    // window start key interpolation
    doWindowBorderInterpolation(pRuntimeEnv, pDataBlockInfo, pDataBlock, pResult, &win, pQuery->pos, forwardStep);
    doBlockwiseApplyFunctions(pRuntimeEnv, &win, startPos, forwardStep, tsCols, pDataBlockInfo->rows);
1229

1230
    STimeWindow nextWin = win;
1231
    while (1) {
H
Haojun Liao 已提交
1232 1233
      int32_t prevEndPos = (forwardStep - 1) * step + startPos;
      startPos = getNextQualifiedWindow(pRuntimeEnv, &nextWin, pDataBlockInfo, tsCols, searchFn, prevEndPos);
1234 1235 1236
      if (startPos < 0) {
        break;
      }
1237

1238
      // null data, failed to allocate more memory buffer
1239 1240
      int32_t code = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, &nextWin, masterScan, &pResult, groupId);
      if (code != TSDB_CODE_SUCCESS || pResult == NULL) {
1241 1242
        break;
      }
1243

1244
      ekey = reviseWindowEkey(pQuery, &nextWin);
H
Haojun Liao 已提交
1245
      forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, startPos, ekey, searchFn, true);
1246

1247
      // window start(end) key interpolation
H
Haojun Liao 已提交
1248
      doWindowBorderInterpolation(pRuntimeEnv, pDataBlockInfo, pDataBlock, pResult, &nextWin, startPos, forwardStep);
1249
      doBlockwiseApplyFunctions(pRuntimeEnv, &nextWin, startPos, forwardStep, tsCols, pDataBlockInfo->rows);
1250
    }
1251

1252 1253 1254 1255 1256 1257
  } else {
    /*
     * the sqlfunctionCtx parameters should be set done before all functions are invoked,
     * since the selectivity + tag_prj query needs all parameters been set done.
     * tag_prj function are changed to be TSDB_FUNC_TAG_DUMMY
     */
1258
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
Haojun Liao 已提交
1259
      int32_t functionId = pQuery->pExpr1[k].base.functionId;
1260
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
1261
        pCtx[k].nStartQueryTimestamp = pDataBlockInfo->window.skey;
1262 1263 1264 1265
        aAggs[functionId].xFunction(&pCtx[k]);
      }
    }
  }
1266

1267
  _end:
1268 1269 1270 1271 1272
  if (pRuntimeEnv->timeWindowInterpo) {
    saveDataBlockLastRow(pRuntimeEnv, pDataBlockInfo, pDataBlock);
  }

  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1273
    if (pQuery->pExpr1[i].base.functionId != TSDB_FUNC_ARITHM) {
1274 1275
      continue;
    }
1276

S
TD-1848  
Shengliang Guan 已提交
1277
    tfree(sasArray[i].data);
1278
  }
1279

S
TD-1848  
Shengliang Guan 已提交
1280
  tfree(sasArray);
1281 1282
}

1283
static int32_t setGroupResultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, char *pData, int16_t type, int16_t bytes, int32_t groupIndex) {
1284 1285 1286
  if (isNull(pData, type)) {  // ignore the null value
    return -1;
  }
1287

1288
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
1289

H
Haojun Liao 已提交
1290
  // not assign result buffer yet, add new result buffer, TODO remove it
1291 1292 1293 1294 1295 1296 1297
  char* d = pData;
  int16_t len = bytes;
  if (type == TSDB_DATA_TYPE_BINARY||type == TSDB_DATA_TYPE_NCHAR) {
    d = varDataVal(pData);
    len = varDataLen(pData);
  } else if (type == TSDB_DATA_TYPE_FLOAT || type == TSDB_DATA_TYPE_DOUBLE) {
    SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
H
Haojun Liao 已提交
1298
    qError("QInfo:%p group by not supported on double/float columns, abort", pQInfo);
1299 1300 1301 1302

    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_APP_ERROR);
  }

1303 1304
  SResultRow *pResultRow = doPrepareResultRowFromKey(pRuntimeEnv, &pRuntimeEnv->windowResInfo, d, len, true, groupIndex);
  assert (pResultRow != NULL);
1305 1306

  int64_t v = -1;
H
Haojun Liao 已提交
1307
  GET_TYPED_DATA(v, int64_t, type, pData);
1308
  if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
1309 1310 1311 1312 1313 1314
    if (pResultRow->key == NULL) {
      pResultRow->key = malloc(varDataTLen(pData));
      varDataCopy(pResultRow->key, pData);
    } else {
      assert(memcmp(pResultRow->key, pData, varDataTLen(pData)) == 0);
    }
1315
  } else {
H
Haojun Liao 已提交
1316 1317
    pResultRow->win.skey = v;
    pResultRow->win.ekey = v;
1318
  }
1319

H
Haojun Liao 已提交
1320
  if (pResultRow->pageId == -1) {
1321
    int32_t ret = addNewWindowResultBuf(pResultRow, pResultBuf, groupIndex, pRuntimeEnv->numOfRowsPerPage);
1322 1323 1324 1325
    if (ret != 0) {
      return -1;
    }
  }
1326

H
Haojun Liao 已提交
1327
  setResultOutputBuf(pRuntimeEnv, pResultRow);
1328 1329 1330 1331
  initCtxOutputBuf(pRuntimeEnv);
  return TSDB_CODE_SUCCESS;
}

1332
static char *getGroupbyColumnData(SQuery *pQuery, int16_t *type, int16_t *bytes, SArray* pDataBlock) {
1333
  SSqlGroupbyExpr *pGroupbyExpr = pQuery->pGroupbyExpr;
1334

1335
  for (int32_t k = 0; k < pGroupbyExpr->numOfGroupCols; ++k) {
1336
    SColIndex* pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, k);
Y
TD-1230  
yihaoDeng 已提交
1337
    if (TSDB_COL_IS_TAG(pColIndex->flag)) {
1338 1339
      continue;
    }
1340

1341
    int16_t colIndex = -1;
1342
    int32_t colId = pColIndex->colId;
1343

1344
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
1345
      if (pQuery->colList[i].colId == colId) {
1346 1347 1348 1349
        colIndex = i;
        break;
      }
    }
1350

1351
    assert(colIndex >= 0 && colIndex < pQuery->numOfCols);
1352

1353 1354
    *type = pQuery->colList[colIndex].type;
    *bytes = pQuery->colList[colIndex].bytes;
1355
    /*
1356 1357 1358
     *  the colIndex is acquired from the first tables of all qualified tables in this vnode during query prepare
     * stage, the remain tables may not have the required column in cache actually. So, the validation of required
     * column in cache with the corresponding schema is reinforced.
1359
     */
S
TD-1057  
Shengliang Guan 已提交
1360
    int32_t numOfCols = (int32_t)taosArrayGetSize(pDataBlock);
1361

1362 1363 1364 1365 1366 1367
    for (int32_t i = 0; i < numOfCols; ++i) {
      SColumnInfoData *p = taosArrayGet(pDataBlock, i);
      if (pColIndex->colId == p->info.colId) {
        return p->pData;
      }
    }
1368
  }
1369

1370
  return NULL;
1371 1372 1373 1374
}

static int32_t doTSJoinFilter(SQueryRuntimeEnv *pRuntimeEnv, int32_t offset) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
1375

H
Haojun Liao 已提交
1376
  STSElem         elem = tsBufGetElem(pRuntimeEnv->pTsBuf);
1377
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1378

1379
  // compare tag first
H
Haojun Liao 已提交
1380
  if (tVariantCompare(&pCtx[0].tag, elem.tag) != 0) {
1381 1382
    return TS_JOIN_TAG_NOT_EQUALS;
  }
1383

S
TD-1057  
Shengliang Guan 已提交
1384
  TSKEY key = *(TSKEY *)((char*)pCtx[0].aInputElemBuf + TSDB_KEYSIZE * offset);
1385 1386

#if defined(_DEBUG_VIEW)
1387
  printf("elem in comp ts file:%" PRId64 ", key:%" PRId64 ", tag:%"PRIu64", query order:%d, ts order:%d, traverse:%d, index:%d\n",
H
Haojun Liao 已提交
1388 1389
         elem.ts, key, elem.tag.i64Key, pQuery->order.order, pRuntimeEnv->pTsBuf->tsOrder,
         pRuntimeEnv->pTsBuf->cur.order, pRuntimeEnv->pTsBuf->cur.tsIndex);
1390
#endif
1391

1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (key < elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key > elem.ts) {
      assert(false);
    }
  } else {
    if (key > elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key < elem.ts) {
      assert(false);
    }
  }
1405

1406 1407 1408 1409
  return TS_JOIN_TS_EQUAL;
}

static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId) {
H
Haojun Liao 已提交
1410
  SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx);
H
hjxilinx 已提交
1411
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
1412 1413 1414 1415 1416

  // in case of timestamp column, always generated results.
  if (functionId == TSDB_FUNC_TS) {
    return true;
  }
1417

1418 1419 1420
  if (pResInfo->complete || functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TS_DUMMY) {
    return false;
  }
1421

1422
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_FIRST) {
H
hjxilinx 已提交
1423 1424
    return QUERY_IS_ASC_QUERY(pQuery);
  }
1425

H
Haojun Liao 已提交
1426
  // denote the order type
1427 1428 1429 1430
  if ((functionId == TSDB_FUNC_LAST_DST || functionId == TSDB_FUNC_LAST)) {
    return pCtx->param[0].i64Key == pQuery->order.order;
  }

1431
  // in the supplementary scan, only the following functions need to be executed
H
Haojun Liao 已提交
1432
  if (IS_REVERSE_SCAN(pRuntimeEnv)) {
1433 1434
    return false;
  }
1435

1436 1437 1438
  return true;
}

H
Haojun Liao 已提交
1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455
void doRowwiseTimeWindowInterpolation(SQueryRuntimeEnv* pRuntimeEnv, SArray* pDataBlock, TSKEY prevTs, int32_t prevRowIndex, TSKEY curTs, int32_t curRowIndex, TSKEY windowKey,  int32_t type) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
    int32_t functionId = pQuery->pExpr1[k].base.functionId;
    if (functionId != TSDB_FUNC_TWA) {
      pRuntimeEnv->pCtx[k].start.key = INT64_MIN;
      continue;
    }

    SColIndex* pColIndex = &pQuery->pExpr1[k].base.colInfo;
    int16_t index = pColIndex->colIndex;
    SColumnInfoData* pColInfo = taosArrayGet(pDataBlock, index);

    assert(pColInfo->info.colId == pColIndex->colId && curTs != windowKey);
    double v1 = 0, v2 = 0, v = 0;

    if (prevRowIndex == -1) {
1456
      GET_TYPED_DATA(v1, double, pColInfo->info.type, (char *)pRuntimeEnv->prevRow[index]);
H
Haojun Liao 已提交
1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477
    } else {
      GET_TYPED_DATA(v1, double, pColInfo->info.type, (char *)pColInfo->pData + prevRowIndex * pColInfo->info.bytes);
    }

    GET_TYPED_DATA(v2, double, pColInfo->info.type, (char *)pColInfo->pData + curRowIndex * pColInfo->info.bytes);

    SPoint point1 = (SPoint){.key = prevTs, .val = &v1};
    SPoint point2 = (SPoint){.key = curTs, .val = &v2};
    SPoint point  = (SPoint){.key = windowKey, .val = &v};
    taosGetLinearInterpolationVal(TSDB_DATA_TYPE_DOUBLE, &point1, &point2, &point);

    if (type == RESULT_ROW_START_INTERP) {
      pRuntimeEnv->pCtx[k].start.key = point.key;
      pRuntimeEnv->pCtx[k].start.val = v;
    } else {
      pRuntimeEnv->pCtx[k].end.key = point.key;
      pRuntimeEnv->pCtx[k].end.val = v;
    }
  }
}

H
Haojun Liao 已提交
1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514
static void setTimeWindowSKeyInterp(SQueryRuntimeEnv* pRuntimeEnv, SArray* pDataBlock, TSKEY prevTs, int32_t prevRowIndex, TSKEY ts, int32_t offset, SResultRow* pResult, STimeWindow* win) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  bool done = resultRowInterpolated(pResult, RESULT_ROW_START_INTERP);
  if (!done) {
    TSKEY key = QUERY_IS_ASC_QUERY(pQuery)? win->skey:win->ekey;
    if (key == ts) {
      setResultRowInterpo(pResult, RESULT_ROW_START_INTERP);
    } else if (prevTs != INT64_MIN && ((QUERY_IS_ASC_QUERY(pQuery) && prevTs < key) || (!QUERY_IS_ASC_QUERY(pQuery) && prevTs > key))) {
      doRowwiseTimeWindowInterpolation(pRuntimeEnv, pDataBlock, prevTs, prevRowIndex, ts, offset, key, RESULT_ROW_START_INTERP);
      setResultRowInterpo(pResult, RESULT_ROW_START_INTERP);
    } else {
      setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_START_INTERP);
    }

    setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_END_INTERP);
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
      pRuntimeEnv->pCtx[k].size = 1;
    }
  } else {
    setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_START_INTERP);
  }
}

static void setTimeWindowEKeyInterp(SQueryRuntimeEnv* pRuntimeEnv, SArray* pDataBlock, TSKEY prevTs, int32_t prevRowIndex, TSKEY ts, int32_t offset, SResultRow* pResult, STimeWindow* win) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  TSKEY key = QUERY_IS_ASC_QUERY(pQuery)? win->ekey:win->skey;
  doRowwiseTimeWindowInterpolation(pRuntimeEnv, pDataBlock, prevTs, prevRowIndex, ts, offset, key, RESULT_ROW_END_INTERP);
  setResultRowInterpo(pResult, RESULT_ROW_END_INTERP);

  setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_START_INTERP);
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    pRuntimeEnv->pCtx[i].size = 0;
  }
}

1515
static void rowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis, SDataBlockInfo *pDataBlockInfo,
H
Haojun Liao 已提交
1516
    SResultRowInfo *pWindowResInfo, SArray *pDataBlock) {
1517
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1518
  bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);
1519

1520
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1521
  STableQueryInfo* item = pQuery->current;
H
Haojun Liao 已提交
1522

1523 1524
  int64_t groupId = item->groupIndex;

H
Haojun Liao 已提交
1525 1526 1527
  SColumnInfoData* pColumnInfoData = (SColumnInfoData *)taosArrayGet(pDataBlock, 0);

  TSKEY  *tsCols = (pColumnInfoData->info.type == TSDB_DATA_TYPE_TIMESTAMP)? (TSKEY*) pColumnInfoData->pData:NULL;
H
Haojun Liao 已提交
1528 1529
  bool    groupbyColumnValue = pRuntimeEnv->groupbyNormalCol;

1530
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
H
Haojun Liao 已提交
1531 1532 1533
  if (sasArray == NULL) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }
1534

1535 1536
  int16_t type = 0;
  int16_t bytes = 0;
1537

1538
  char *groupbyColumnData = NULL;
H
Haojun Liao 已提交
1539
  if (groupbyColumnValue) {
1540
    groupbyColumnData = getGroupbyColumnData(pQuery, &type, &bytes, pDataBlock);
1541
  }
1542

H
Haojun Liao 已提交
1543
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
1544
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
1545
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
H
Haojun Liao 已提交
1546
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k, pQInfo->vgId);
H
Haojun Liao 已提交
1547
    pCtx[k].size = 1;
1548
  }
1549

1550 1551
  // set the input column data
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
1552
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
H
hjxilinx 已提交
1553 1554
    pFilterInfo->pData = getDataBlockImpl(pDataBlock, pFilterInfo->info.colId);
    assert(pFilterInfo->pData != NULL);
1555
  }
1556

1557
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1558

1559 1560
  // from top to bottom in desc
  // from bottom to top in asc order
H
Haojun Liao 已提交
1561
  if (pRuntimeEnv->pTsBuf != NULL) {
1562
    qDebug("QInfo:%p process data rows, numOfRows:%d, query order:%d, ts comp order:%d", pQInfo, pDataBlockInfo->rows,
H
Haojun Liao 已提交
1563
           pQuery->order.order, pRuntimeEnv->pTsBuf->cur.order);
1564
  }
1565

H
hjxilinx 已提交
1566
  int32_t offset = -1;
H
Haojun Liao 已提交
1567
  TSKEY   prevTs = *(TSKEY*) pRuntimeEnv->prevRow[0];
H
Haojun Liao 已提交
1568
  int32_t prevRowIndex = -1;
1569

1570
  for (int32_t j = 0; j < pDataBlockInfo->rows; ++j) {
H
hjxilinx 已提交
1571
    offset = GET_COL_DATA_POS(pQuery, j, step);
1572

H
Haojun Liao 已提交
1573
    if (pRuntimeEnv->pTsBuf != NULL) {
1574 1575
      int32_t ret = doTSJoinFilter(pRuntimeEnv, offset);
      if (ret == TS_JOIN_TAG_NOT_EQUALS) {
1576
        break;
1577
      } else if (ret == TS_JOIN_TS_NOT_EQUALS) {
1578 1579
        continue;
      } else {
1580
        assert(ret == TS_JOIN_TS_EQUAL);
1581 1582
      }
    }
1583

1584
    if (pQuery->numOfFilterCols > 0 && (!doFilterData(pQuery, offset))) {
1585 1586
      continue;
    }
1587

1588
    // interval window query, decide the time window according to the primary timestamp
H
Haojun Liao 已提交
1589
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
H
Haojun Liao 已提交
1590
      int32_t prevWindowIndex = curTimeWindowIndex(pWindowResInfo);
H
Haojun Liao 已提交
1591
      int64_t ts  = tsCols[offset];
H
Haojun Liao 已提交
1592

1593
      STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
1594

1595
      SResultRow* pResult = NULL;
1596 1597 1598
      int32_t ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, &win, masterScan, &pResult, groupId);
      if (ret != TSDB_CODE_SUCCESS || pResult == NULL) {  // null data, too many state code
        goto _end;
1599
      }
H
Haojun Liao 已提交
1600

1601 1602
      // window start key interpolation
      if (pRuntimeEnv->timeWindowInterpo) {
H
Haojun Liao 已提交
1603 1604 1605 1606 1607
        // check for the time window end time interpolation
        int32_t curIndex = curTimeWindowIndex(pWindowResInfo);
        if (prevWindowIndex != -1 && prevWindowIndex < curIndex) {
          for (int32_t k = prevWindowIndex; k < curIndex; ++k) {
            SResultRow *pRes = pWindowResInfo->pResult[k];
1608 1609 1610 1611
            if (pRes->closed) {
              assert(resultRowInterpolated(pResult, RESULT_ROW_START_INTERP) && resultRowInterpolated(pResult, RESULT_ROW_END_INTERP));
              continue;
            }
H
Haojun Liao 已提交
1612

1613
            ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, &pRes->win, masterScan, &pResult, groupId);
H
Haojun Liao 已提交
1614
            assert(ret == TSDB_CODE_SUCCESS && !resultRowInterpolated(pResult, RESULT_ROW_END_INTERP));
H
Haojun Liao 已提交
1615

H
Haojun Liao 已提交
1616
            setTimeWindowEKeyInterp(pRuntimeEnv, pDataBlock, prevTs, prevRowIndex, ts, offset, pResult, &pRes->win);
1617
            doRowwiseApplyFunctions(pRuntimeEnv, &pRes->win, offset);
H
Haojun Liao 已提交
1618 1619 1620
          }

          // restore current time window
1621
          ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, &win, masterScan, &pResult, groupId);
H
Haojun Liao 已提交
1622 1623
          if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
            continue;
1624 1625
          }
        }
1626

H
Haojun Liao 已提交
1627
        setTimeWindowSKeyInterp(pRuntimeEnv, pDataBlock, prevTs, prevRowIndex, ts, offset, pResult, &win);
1628
      }
H
Haojun Liao 已提交
1629

1630
      doRowwiseApplyFunctions(pRuntimeEnv, &win, offset);
1631

1632 1633
      STimeWindow nextWin = win;
      int32_t     index = pWindowResInfo->curIndex;
1634

1635
      while (1) {
H
Haojun Liao 已提交
1636
        getNextTimeWindow(pQuery, &nextWin);
1637
        if ((nextWin.skey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
H
Haojun Liao 已提交
1638
            (nextWin.ekey < pQuery->window.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
1639 1640
          break;
        }
1641

1642 1643 1644
        if (ts < nextWin.skey || ts > nextWin.ekey) {
          break;
        }
1645

1646
        // null data, failed to allocate more memory buffer
1647 1648
        int32_t code = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, &nextWin, masterScan, &pResult, groupId);
        if (code != TSDB_CODE_SUCCESS || pResult == NULL) {
1649 1650
          break;
        }
1651

1652 1653
        setTimeWindowSKeyInterp(pRuntimeEnv, pDataBlock, prevTs, prevRowIndex, ts, offset, pResult, &nextWin);
        doRowwiseApplyFunctions(pRuntimeEnv, &nextWin, offset);
1654
      }
1655

1656 1657 1658
      pWindowResInfo->curIndex = index;
    } else {  // other queries
      // decide which group this rows belongs to according to current state value
H
Haojun Liao 已提交
1659
      if (groupbyColumnValue) {
H
hjxilinx 已提交
1660
        char *val = groupbyColumnData + bytes * offset;
1661

1662
        int32_t ret = setGroupResultOutputBuf(pRuntimeEnv, val, type, bytes, item->groupIndex);
1663 1664 1665 1666
        if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
          continue;
        }
      }
1667

1668
      for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
Haojun Liao 已提交
1669
        int32_t functionId = pQuery->pExpr1[k].base.functionId;
1670 1671 1672 1673 1674
        if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
          aAggs[functionId].xFunctionF(&pCtx[k], offset);
        }
      }
    }
1675

H
Haojun Liao 已提交
1676 1677
    prevTs = tsCols[offset];
    prevRowIndex = offset;
1678

H
Haojun Liao 已提交
1679
    if (pRuntimeEnv->pTsBuf != NULL) {
1680
      // if timestamp filter list is empty, quit current query
H
Haojun Liao 已提交
1681
      if (!tsBufNextPos(pRuntimeEnv->pTsBuf)) {
H
hjxilinx 已提交
1682
        setQueryStatus(pQuery, QUERY_COMPLETED);
1683 1684 1685 1686
        break;
      }
    }
  }
H
Haojun Liao 已提交
1687

1688
  _end:
H
Haojun Liao 已提交
1689 1690 1691 1692 1693 1694 1695
  assert(offset >= 0);
  if (tsCols != NULL) {
    item->lastKey = tsCols[offset] + step;
  } else {
    item->lastKey = (QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->window.ekey:pDataBlockInfo->window.skey) + step;
  }

H
Haojun Liao 已提交
1696 1697
  if (pRuntimeEnv->pTsBuf != NULL) {
    item->cur = tsBufGetCursor(pRuntimeEnv->pTsBuf);
H
Haojun Liao 已提交
1698
  }
H
Haojun Liao 已提交
1699

1700 1701
  // todo refactor: extract method
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1702
    if (pQuery->pExpr1[i].base.functionId != TSDB_FUNC_ARITHM) {
1703 1704
      continue;
    }
1705

S
TD-1848  
Shengliang Guan 已提交
1706
    tfree(sasArray[i].data);
1707
  }
1708

1709 1710 1711 1712
  free(sasArray);
}

static int32_t tableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo,
H
hjxilinx 已提交
1713
                                          SDataStatis *pStatis, __block_search_fn_t searchFn, SArray *pDataBlock) {
H
hjxilinx 已提交
1714
  SQuery *pQuery = pRuntimeEnv->pQuery;
1715

1716 1717
  STableQueryInfo* pTableQueryInfo = pQuery->current;
  SResultRowInfo*  pResultRowInfo = &pRuntimeEnv->windowResInfo;
1718

H
Haojun Liao 已提交
1719
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTsBuf != NULL || pRuntimeEnv->groupbyNormalCol) {
1720
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pResultRowInfo, pDataBlock);
1721
  } else {
1722
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pResultRowInfo, searchFn, pDataBlock);
1723
  }
1724

1725 1726 1727 1728
  // update the lastkey of current table for projection/aggregation query
  TSKEY lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pDataBlockInfo->window.ekey : pDataBlockInfo->window.skey;
  pTableQueryInfo->lastKey = lastKey + GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

1729
  // interval query with limit applied
1730
  int32_t numOfRes = 0;
1731 1732 1733
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyNormalCol) {
    numOfRes = pResultRowInfo->size;
    updateResultRowIndex(pResultRowInfo, pTableQueryInfo, QUERY_IS_ASC_QUERY(pQuery));
H
Haojun Liao 已提交
1734
  } else { // projection query
1735
    numOfRes = (int32_t) getNumOfResult(pRuntimeEnv);
1736

1737 1738 1739 1740
    // update the number of output result
    if (numOfRes > 0 && pQuery->checkBuffer == 1) {
      assert(numOfRes >= pQuery->rec.rows);
      pQuery->rec.rows = numOfRes;
1741

1742 1743 1744
      if (numOfRes >= pQuery->rec.threshold) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
      }
1745

1746 1747 1748
      if ((pQuery->limit.limit >= 0) && (pQuery->limit.limit + pQuery->limit.offset) <= numOfRes) {
        setQueryStatus(pQuery, QUERY_COMPLETED);
      }
1749

1750 1751
      if (((pTableQueryInfo->lastKey > pTableQueryInfo->win.ekey) && QUERY_IS_ASC_QUERY(pQuery)) ||
          ((pTableQueryInfo->lastKey < pTableQueryInfo->win.ekey) && (!QUERY_IS_ASC_QUERY(pQuery)))) {
1752 1753
        setQueryStatus(pQuery, QUERY_COMPLETED);
      }
H
Haojun Liao 已提交
1754
    }
1755
  }
1756

1757
  return numOfRes;
1758 1759
}

H
Haojun Liao 已提交
1760
void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
H
Haojun Liao 已提交
1761
                   SDataStatis *pStatis, void *param, int32_t colIndex, int32_t vgId) {
1762

H
Haojun Liao 已提交
1763 1764
  int32_t functionId = pQuery->pExpr1[colIndex].base.functionId;
  int32_t colId = pQuery->pExpr1[colIndex].base.colInfo.colId;
1765

1766
  SDataStatis *tpField = NULL;
H
Haojun Liao 已提交
1767
  pCtx->hasNull = hasNullValue(&pQuery->pExpr1[colIndex].base.colInfo, pStatis, &tpField);
1768
  pCtx->aInputElemBuf = inputData;
1769

1770
  if (tpField != NULL) {
H
Haojun Liao 已提交
1771
    pCtx->preAggVals.isSet  = true;
1772 1773
    pCtx->preAggVals.statis = *tpField;
    assert(pCtx->preAggVals.statis.numOfNull <= pBlockInfo->rows);
1774 1775 1776
  } else {
    pCtx->preAggVals.isSet = false;
  }
1777

H
Haojun Liao 已提交
1778 1779
  pCtx->preAggVals.dataBlockLoaded = (inputData != NULL);

H
Haojun Liao 已提交
1780 1781
  // limit/offset query will affect this value
  pCtx->size = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->rows - pQuery->pos : pQuery->pos + 1;
1782

H
Haojun Liao 已提交
1783
  // minimum value no matter ascending/descending order query
H
Haojun Liao 已提交
1784 1785
  pCtx->startOffset = QUERY_IS_ASC_QUERY(pQuery) ? pQuery->pos: (pQuery->pos - pCtx->size + 1);
  assert(pCtx->startOffset >= 0);
H
Haojun Liao 已提交
1786

1787 1788
  uint32_t status = aAggs[functionId].nStatus;
  if (((status & (TSDB_FUNCSTATE_SELECTIVITY | TSDB_FUNCSTATE_NEED_TS)) != 0) && (tsCol != NULL)) {
H
Haojun Liao 已提交
1789
    pCtx->ptsList = &tsCol[pCtx->startOffset];
1790
  }
1791

1792 1793 1794 1795 1796
  if (functionId >= TSDB_FUNC_FIRST_DST && functionId <= TSDB_FUNC_LAST_DST) {
    // last_dist or first_dist function
    // store the first&last timestamp into the intermediate buffer [1], the true
    // value may be null but timestamp will never be null
  } else if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_TWA ||
1797
             functionId == TSDB_FUNC_DIFF || (functionId >= TSDB_FUNC_RATE && functionId <= TSDB_FUNC_AVG_IRATE)) {
1798
    /*
H
Haojun Liao 已提交
1799
     * least squares function needs two columns of input, currently, the x value of linear equation is set to
H
Haojun Liao 已提交
1800
     * timestamp column, and the y-value is the column specified in pQuery->pExpr1[i].colIdxInBuffer
1801 1802 1803 1804 1805
     *
     * top/bottom function needs timestamp to indicate when the
     * top/bottom values emerge, so does diff function
     */
    if (functionId == TSDB_FUNC_TWA) {
1806 1807 1808 1809
       pCtx->param[1].i64Key = pQuery->window.skey;
       pCtx->param[1].nType = TSDB_DATA_TYPE_BIGINT;
       pCtx->param[2].i64Key = pQuery->window.ekey;
       pCtx->param[2].nType = TSDB_DATA_TYPE_BIGINT;
1810
    }
1811

1812 1813
  } else if (functionId == TSDB_FUNC_ARITHM) {
    pCtx->param[1].pz = param;
H
Haojun Liao 已提交
1814 1815 1816 1817 1818 1819
  } else if (functionId == TSDB_FUNC_SPREAD) {  // set the statistics data for primary time stamp column
    if (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
      pCtx->preAggVals.isSet  = true;
      pCtx->preAggVals.statis.min = pBlockInfo->window.skey;
      pCtx->preAggVals.statis.max = pBlockInfo->window.ekey;
    }
1820
  } else if (functionId == TSDB_FUNC_INTERP) {
H
Haojun Liao 已提交
1821 1822 1823
    SResultRowCellInfo* pInfo = GET_RES_INFO(pCtx);

    SInterpInfoDetail *pInterpInfo = (SInterpInfoDetail *)GET_ROWCELL_INTERBUF(pInfo);
S
TD-1057  
Shengliang Guan 已提交
1824
    pInterpInfo->type = (int8_t)pQuery->fillType;
1825 1826
    pInterpInfo->ts = pQuery->window.skey;
    pInterpInfo->primaryCol = (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX);
1827

1828 1829 1830 1831
    if (pQuery->fillVal != NULL) {
      if (isNull((const char*) &pQuery->fillVal[colIndex], pCtx->inputType)) {
        pCtx->param[1].nType = TSDB_DATA_TYPE_NULL;
      } else { // todo refactor, tVariantCreateFromBinary should handle the NULL value
H
Haojun Liao 已提交
1832 1833 1834
        if (pCtx->inputType != TSDB_DATA_TYPE_BINARY && pCtx->inputType != TSDB_DATA_TYPE_NCHAR) {
          tVariantCreateFromBinary(&pCtx->param[1], (char*) &pQuery->fillVal[colIndex], pCtx->inputBytes, pCtx->inputType);
        }
1835 1836
      }
    }
H
Haojun Liao 已提交
1837 1838 1839
  } else if (functionId == TSDB_FUNC_TS_COMP) {
    pCtx->param[0].i64Key = vgId;
    pCtx->param[0].nType = TSDB_DATA_TYPE_BIGINT;
1840
  }
1841

1842 1843 1844 1845 1846 1847
#if defined(_DEBUG_VIEW)
  //  int64_t *tsList = (int64_t *)primaryColumnData;
//  int64_t  s = tsList[0];
//  int64_t  e = tsList[size - 1];

//    if (IS_DATA_BLOCK_LOADED(blockStatus)) {
1848
//        qDebug("QInfo:%p query ts:%lld-%lld, offset:%d, rows:%d, bstatus:%d,
1849 1850 1851
//        functId:%d", GET_QINFO_ADDR(pQuery),
//               s, e, startOffset, size, blockStatus, functionId);
//    } else {
1852
//        qDebug("QInfo:%p block not loaded, bstatus:%d",
1853 1854 1855 1856 1857 1858
//        GET_QINFO_ADDR(pQuery), blockStatus);
//    }
#endif
}

// set the output buffer for the selectivity + tag query
H
Haojun Liao 已提交
1859
static int32_t setCtxTagColumnInfo(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx) {
H
Haojun Liao 已提交
1860 1861
  SQuery* pQuery = pRuntimeEnv->pQuery;

1862
  if (isSelectivityWithTagsQuery(pQuery)) {
1863
    int32_t num = 0;
1864
    int16_t tagLen = 0;
1865

1866
    SQLFunctionCtx *p = NULL;
1867
    SQLFunctionCtx **pTagCtx = calloc(pQuery->numOfOutput, POINTER_BYTES);
H
Haojun Liao 已提交
1868 1869 1870
    if (pTagCtx == NULL) {
      return TSDB_CODE_QRY_OUT_OF_MEMORY;
    }
H
Haojun Liao 已提交
1871

1872
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1873
      SSqlFuncMsg *pSqlFuncMsg = &pQuery->pExpr1[i].base;
1874

1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887
      if (pSqlFuncMsg->functionId == TSDB_FUNC_TAG_DUMMY || pSqlFuncMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        tagLen += pCtx[i].outputBytes;
        pTagCtx[num++] = &pCtx[i];
      } else if ((aAggs[pSqlFuncMsg->functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
        p = &pCtx[i];
      } else if (pSqlFuncMsg->functionId == TSDB_FUNC_TS || pSqlFuncMsg->functionId == TSDB_FUNC_TAG) {
        // tag function may be the group by tag column
        // ts may be the required primary timestamp column
        continue;
      } else {
        // the column may be the normal column, group by normal_column, the functionId is TSDB_FUNC_PRJ
      }
    }
dengyihao's avatar
dengyihao 已提交
1888 1889 1890 1891 1892
    if (p != NULL) {
      p->tagInfo.pTagCtxList = pTagCtx;
      p->tagInfo.numOfTagCols = num;
      p->tagInfo.tagsLen = tagLen;
    } else {
S
TD-1848  
Shengliang Guan 已提交
1893
      tfree(pTagCtx);
dengyihao's avatar
dengyihao 已提交
1894
    }
1895
  }
H
Haojun Liao 已提交
1896 1897

  return TSDB_CODE_SUCCESS;
1898 1899
}

1900
static int32_t setupQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv, int16_t order) {
1901
  qDebug("QInfo:%p setup runtime env", GET_QINFO_ADDR(pRuntimeEnv));
1902 1903
  SQuery *pQuery = pRuntimeEnv->pQuery;

1904
  pRuntimeEnv->pCtx = (SQLFunctionCtx *)calloc(pQuery->numOfOutput, sizeof(SQLFunctionCtx));
H
Haojun Liao 已提交
1905
  pRuntimeEnv->offset = calloc(pQuery->numOfOutput, sizeof(int16_t));
H
Haojun Liao 已提交
1906
  pRuntimeEnv->rowCellInfoOffset = calloc(pQuery->numOfOutput, sizeof(int32_t));
1907

H
Haojun Liao 已提交
1908
  if (pRuntimeEnv->offset == NULL || pRuntimeEnv->pCtx == NULL || pRuntimeEnv->rowCellInfoOffset == NULL) {
1909
    goto _clean;
1910
  }
1911

1912
  pRuntimeEnv->offset[0] = 0;
1913
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1914
    SSqlFuncMsg *pSqlFuncMsg = &pQuery->pExpr1[i].base;
1915

1916
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1917
    SColIndex* pIndex = &pSqlFuncMsg->colInfo;
1918

Y
TD-1230  
yihaoDeng 已提交
1919
    if (TSDB_COL_REQ_NULL(pIndex->flag)) {
1920 1921
      pCtx->requireNull = true;
      pIndex->flag &= ~(TSDB_COL_NULL);
Y
TD-1230  
yihaoDeng 已提交
1922
    } else {
1923 1924
      pCtx->requireNull = false;
    }
Y
TD-1230  
yihaoDeng 已提交
1925

1926 1927
    int32_t index = pSqlFuncMsg->colInfo.colIndex;
    if (TSDB_COL_IS_TAG(pIndex->flag)) {
1928
      if (pIndex->colId == TSDB_TBNAME_COLUMN_INDEX) {  // todo refactor
H
Haojun Liao 已提交
1929 1930 1931 1932
        SSchema s = tGetTableNameColumnSchema();

        pCtx->inputBytes = s.bytes;
        pCtx->inputType = s.type;
1933 1934 1935 1936
      } else {
        pCtx->inputBytes = pQuery->tagColList[index].bytes;
        pCtx->inputType = pQuery->tagColList[index].type;
      }
H
Haojun Liao 已提交
1937 1938 1939
    } else if (TSDB_COL_IS_UD_COL(pIndex->flag)) {
      pCtx->inputBytes = pSqlFuncMsg->arg[0].argBytes;
      pCtx->inputType = pSqlFuncMsg->arg[0].argType;
1940 1941 1942 1943
    } else {
      pCtx->inputBytes = pQuery->colList[index].bytes;
      pCtx->inputType = pQuery->colList[index].type;
    }
1944

1945
    assert(isValidDataType(pCtx->inputType));
1946
    pCtx->ptsOutputBuf = NULL;
1947

H
Haojun Liao 已提交
1948 1949
    pCtx->outputBytes  = pQuery->pExpr1[i].bytes;
    pCtx->outputType   = pQuery->pExpr1[i].type;
1950

H
Haojun Liao 已提交
1951 1952 1953
    pCtx->order        = pQuery->order.order;
    pCtx->functionId   = pSqlFuncMsg->functionId;
    pCtx->stableQuery  = pRuntimeEnv->stableQuery;
H
Haojun Liao 已提交
1954
    pCtx->interBufBytes = pQuery->pExpr1[i].interBytes;
1955 1956
    pCtx->start.key    = INT64_MIN;
    pCtx->end.key      = INT64_MIN;
1957

H
Haojun Liao 已提交
1958
    pCtx->numOfParams  = pSqlFuncMsg->numOfParams;
1959 1960 1961 1962
    for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
      int16_t type = pSqlFuncMsg->arg[j].argType;
      int16_t bytes = pSqlFuncMsg->arg[j].argBytes;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
1963
        tVariantCreateFromBinary(&pCtx->param[j], pSqlFuncMsg->arg[j].argValue.pz, bytes, type);
1964 1965 1966 1967
      } else {
        tVariantCreateFromBinary(&pCtx->param[j], (char *)&pSqlFuncMsg->arg[j].argValue.i64, bytes, type);
      }
    }
1968

1969 1970
    // set the order information for top/bottom query
    int32_t functionId = pCtx->functionId;
1971

1972
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
H
Haojun Liao 已提交
1973
      int32_t f = pQuery->pExpr1[0].base.functionId;
1974
      assert(f == TSDB_FUNC_TS || f == TSDB_FUNC_TS_DUMMY);
1975

1976 1977 1978 1979
      pCtx->param[2].i64Key = order;
      pCtx->param[2].nType = TSDB_DATA_TYPE_BIGINT;
      pCtx->param[3].i64Key = functionId;
      pCtx->param[3].nType = TSDB_DATA_TYPE_BIGINT;
1980

1981 1982
      pCtx->param[1].i64Key = pQuery->order.orderColId;
    }
1983

1984 1985
    if (i > 0) {
      pRuntimeEnv->offset[i] = pRuntimeEnv->offset[i - 1] + pRuntimeEnv->pCtx[i - 1].outputBytes;
H
Haojun Liao 已提交
1986
      pRuntimeEnv->rowCellInfoOffset[i] = pRuntimeEnv->rowCellInfoOffset[i - 1] + sizeof(SResultRowCellInfo) + pQuery->pExpr1[i - 1].interBytes;
1987
    }
H
Haojun Liao 已提交
1988

1989
  }
1990

1991 1992
  *(int64_t*) pRuntimeEnv->prevRow[0] = INT64_MIN;

1993
  // if it is group by normal column, do not set output buffer, the output buffer is pResult
1994 1995
  // fixed output query/multi-output query for normal table
  if (!pRuntimeEnv->groupbyNormalCol && !pRuntimeEnv->stableQuery && !QUERY_IS_INTERVAL_QUERY(pRuntimeEnv->pQuery)) {
H
Haojun Liao 已提交
1996
    resetDefaultResInfoOutputBuf(pRuntimeEnv);
1997
  }
1998

H
Haojun Liao 已提交
1999 2000 2001
  if (setCtxTagColumnInfo(pRuntimeEnv, pRuntimeEnv->pCtx) != TSDB_CODE_SUCCESS) {
    goto _clean;
  }
2002

H
Haojun Liao 已提交
2003
  qDebug("QInfo:%p init runtime completed", GET_QINFO_ADDR(pRuntimeEnv));
2004
  return TSDB_CODE_SUCCESS;
2005

2006
_clean:
S
TD-1848  
Shengliang Guan 已提交
2007
  tfree(pRuntimeEnv->pCtx);
H
Haojun Liao 已提交
2008 2009
  tfree(pRuntimeEnv->offset);
  tfree(pRuntimeEnv->rowCellInfoOffset);
2010

2011
  return TSDB_CODE_QRY_OUT_OF_MEMORY;
2012 2013
}

H
Haojun Liao 已提交
2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026
static void doFreeQueryHandle(SQInfo* pQInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;

  tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
  tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);

  pRuntimeEnv->pQueryHandle = NULL;
  pRuntimeEnv->pSecQueryHandle = NULL;

  SMemRef* pMemRef = &pQInfo->memRef;
  assert(pMemRef->ref == 0 && pMemRef->imem == NULL && pMemRef->mem == NULL);
}

2027 2028 2029 2030
static void teardownQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv) {
  if (pRuntimeEnv->pQuery == NULL) {
    return;
  }
2031

2032
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
2033
  SQInfo* pQInfo = (SQInfo*) GET_QINFO_ADDR(pRuntimeEnv);
2034

2035
  qDebug("QInfo:%p teardown runtime env", pQInfo);
H
Haojun Liao 已提交
2036
  cleanupResultRowInfo(&pRuntimeEnv->windowResInfo);
2037

2038
  if (pRuntimeEnv->pCtx != NULL) {
2039
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2040
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
2041

2042 2043 2044
      for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
        tVariantDestroy(&pCtx->param[j]);
      }
2045

2046
      tVariantDestroy(&pCtx->tag);
S
TD-1848  
Shengliang Guan 已提交
2047
      tfree(pCtx->tagInfo.pTagCtxList);
2048
    }
2049

S
TD-1848  
Shengliang Guan 已提交
2050
    tfree(pRuntimeEnv->pCtx);
2051
  }
2052

2053
  pRuntimeEnv->pFillInfo = taosDestroyFillInfo(pRuntimeEnv->pFillInfo);
2054

H
Haojun Liao 已提交
2055
  destroyResultBuf(pRuntimeEnv->pResultBuf);
H
Haojun Liao 已提交
2056
  doFreeQueryHandle(pQInfo);
2057

H
Haojun Liao 已提交
2058
  pRuntimeEnv->pTsBuf = tsBufDestroy(pRuntimeEnv->pTsBuf);
H
Haojun Liao 已提交
2059 2060

  tfree(pRuntimeEnv->offset);
S
TD-1848  
Shengliang Guan 已提交
2061 2062
  tfree(pRuntimeEnv->keyBuf);
  tfree(pRuntimeEnv->rowCellInfoOffset);
2063
  tfree(pRuntimeEnv->prevRow);
H
Haojun Liao 已提交
2064

H
Haojun Liao 已提交
2065 2066
  taosHashCleanup(pRuntimeEnv->pResultRowHashTable);
  pRuntimeEnv->pResultRowHashTable = NULL;
2067

H
Haojun Liao 已提交
2068
  pRuntimeEnv->pool = destroyResultRowPool(pRuntimeEnv->pool);
2069 2070
}

2071 2072 2073 2074
static bool needBuildResAfterQueryComplete(SQInfo* pQInfo) {
  return pQInfo->rspContext != NULL;
}

H
Haojun Liao 已提交
2075
#define IS_QUERY_KILLED(_q) ((_q)->code == TSDB_CODE_TSC_QUERY_CANCELLED)
2076

2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095
static bool isQueryKilled(SQInfo *pQInfo) {
  if (IS_QUERY_KILLED(pQInfo)) {
    return true;
  }

  // query has been executed more than tsShellActivityTimer, and the retrieve has not arrived
  // abort current query execution.
  if (pQInfo->owner != 0 && ((taosGetTimestampSec() - pQInfo->startExecTs) > getMaximumIdleDurationSec()) &&
      (!needBuildResAfterQueryComplete(pQInfo))) {

    assert(pQInfo->startExecTs != 0);
    qDebug("QInfo:%p retrieve not arrive beyond %d sec, abort current query execution, start:%"PRId64", current:%d", pQInfo, 1,
           pQInfo->startExecTs, taosGetTimestampSec());
    return true;
  }

  return false;
}

H
Haojun Liao 已提交
2096
static void setQueryKilled(SQInfo *pQInfo) { pQInfo->code = TSDB_CODE_TSC_QUERY_CANCELLED;}
H
hjxilinx 已提交
2097

H
Haojun Liao 已提交
2098 2099 2100
static bool isFixedOutputQuery(SQueryRuntimeEnv* pRuntimeEnv) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
2101 2102
    return false;
  }
2103

2104
  // Note:top/bottom query is fixed output query
H
Haojun Liao 已提交
2105
  if (pRuntimeEnv->topBotQuery || pRuntimeEnv->groupbyNormalCol) {
2106 2107
    return true;
  }
2108

2109
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2110
    SSqlFuncMsg *pExprMsg = &pQuery->pExpr1[i].base;
2111

2112 2113
    // ignore the ts_comp function
    if (i == 0 && pExprMsg->functionId == TSDB_FUNC_PRJ && pExprMsg->numOfParams == 1 &&
2114
        pExprMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
2115 2116
      continue;
    }
2117

2118 2119 2120
    if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
2121

2122 2123 2124 2125
    if (!IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus)) {
      return true;
    }
  }
2126

2127 2128 2129
  return false;
}

2130
// todo refactor with isLastRowQuery
2131
bool isPointInterpoQuery(SQuery *pQuery) {
2132
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2133 2134
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
    if (functionId == TSDB_FUNC_INTERP) {
2135 2136 2137
      return true;
    }
  }
2138

2139 2140 2141 2142
  return false;
}

// TODO REFACTOR:MERGE WITH CLIENT-SIDE FUNCTION
H
hjxilinx 已提交
2143
static bool isSumAvgRateQuery(SQuery *pQuery) {
2144
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2145
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
2146 2147 2148
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
2149

2150 2151 2152 2153 2154
    if (functionId == TSDB_FUNC_SUM_RATE || functionId == TSDB_FUNC_SUM_IRATE || functionId == TSDB_FUNC_AVG_RATE ||
        functionId == TSDB_FUNC_AVG_IRATE) {
      return true;
    }
  }
2155

2156 2157 2158
  return false;
}

H
hjxilinx 已提交
2159
static bool isFirstLastRowQuery(SQuery *pQuery) {
2160
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2161
    int32_t functionID = pQuery->pExpr1[i].base.functionId;
2162 2163 2164 2165
    if (functionID == TSDB_FUNC_LAST_ROW) {
      return true;
    }
  }
2166

2167 2168 2169
  return false;
}

H
hjxilinx 已提交
2170
static bool needReverseScan(SQuery *pQuery) {
2171
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2172
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
2173 2174 2175
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG) {
      continue;
    }
2176

2177
    if ((functionId == TSDB_FUNC_FIRST || functionId == TSDB_FUNC_FIRST_DST) && !QUERY_IS_ASC_QUERY(pQuery)) {
2178 2179
      return true;
    }
2180 2181

    if (functionId == TSDB_FUNC_LAST || functionId == TSDB_FUNC_LAST_DST) {
H
Haojun Liao 已提交
2182
      // the scan order to acquire the last result of the specified column
H
Haojun Liao 已提交
2183
      int32_t order = (int32_t)pQuery->pExpr1[i].base.arg->argValue.i64;
H
Haojun Liao 已提交
2184 2185 2186
      if (order != pQuery->order.order) {
        return true;
      }
2187
    }
2188
  }
2189

2190 2191
  return false;
}
H
hjxilinx 已提交
2192

H
Haojun Liao 已提交
2193 2194 2195 2196
/**
 * The following 4 kinds of query are treated as the tags query
 * tagprj, tid_tag query, count(tbname), 'abc' (user defined constant value column) query
 */
H
hjxilinx 已提交
2197 2198
static bool onlyQueryTags(SQuery* pQuery) {
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2199
    SExprInfo* pExprInfo = &pQuery->pExpr1[i];
H
Haojun Liao 已提交
2200 2201

    int32_t functionId = pExprInfo->base.functionId;
H
Haojun Liao 已提交
2202 2203 2204 2205

    if (functionId != TSDB_FUNC_TAGPRJ &&
        functionId != TSDB_FUNC_TID_TAG &&
        (!(functionId == TSDB_FUNC_COUNT && pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX)) &&
Y
TD-1230  
yihaoDeng 已提交
2206
        (!(functionId == TSDB_FUNC_PRJ && TSDB_COL_IS_UD_COL(pExprInfo->base.colInfo.flag)))) {
H
hjxilinx 已提交
2207 2208 2209
      return false;
    }
  }
2210

H
hjxilinx 已提交
2211 2212 2213
  return true;
}

2214 2215
/////////////////////////////////////////////////////////////////////////////////////////////

H
Haojun Liao 已提交
2216
void getAlignQueryTimeWindow(SQuery *pQuery, int64_t key, int64_t keyFirst, int64_t keyLast, STimeWindow *win) {
2217 2218
  assert(key >= keyFirst && key <= keyLast && pQuery->interval.sliding <= pQuery->interval.interval);
  win->skey = taosTimeTruncate(key, &pQuery->interval, pQuery->precision);
H
Haojun Liao 已提交
2219 2220

  /*
2221
   * if the realSkey > INT64_MAX - pQuery->interval.interval, the query duration between
H
Haojun Liao 已提交
2222 2223
   * realSkey and realEkey must be less than one interval.Therefore, no need to adjust the query ranges.
   */
2224 2225
  if (keyFirst > (INT64_MAX - pQuery->interval.interval)) {
    assert(keyLast - keyFirst < pQuery->interval.interval);
2226
    win->ekey = INT64_MAX;
2227 2228
  } else if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
    win->ekey = taosTimeAdd(win->skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
H
Haojun Liao 已提交
2229
  } else {
2230
    win->ekey = win->skey + pQuery->interval.interval - 1;
2231 2232 2233 2234 2235
  }
}

static void setScanLimitationByResultBuffer(SQuery *pQuery) {
  if (isTopBottomQuery(pQuery)) {
2236
    pQuery->checkBuffer = 0;
2237
  } else if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
2238
    pQuery->checkBuffer = 0;
2239 2240
  } else {
    bool hasMultioutput = false;
2241
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2242
      SSqlFuncMsg *pExprMsg = &pQuery->pExpr1[i].base;
2243 2244 2245
      if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        continue;
      }
2246

2247 2248 2249 2250 2251
      hasMultioutput = IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus);
      if (!hasMultioutput) {
        break;
      }
    }
2252

2253
    pQuery->checkBuffer = hasMultioutput ? 1 : 0;
2254 2255 2256 2257 2258 2259
  }
}

/*
 * todo add more parameters to check soon..
 */
2260
bool colIdCheck(SQuery *pQuery) {
2261 2262
  // load data column information is incorrect
  for (int32_t i = 0; i < pQuery->numOfCols - 1; ++i) {
2263
    if (pQuery->colList[i].colId == pQuery->colList[i + 1].colId) {
S
slguan 已提交
2264
      qError("QInfo:%p invalid data load column for query", GET_QINFO_ADDR(pQuery));
2265 2266 2267
      return false;
    }
  }
2268

2269 2270 2271 2272 2273 2274
  return true;
}

// todo ignore the avg/sum/min/max/count/stddev/top/bottom functions, of which
// the scan order is not matter
static bool onlyOneQueryType(SQuery *pQuery, int32_t functId, int32_t functIdDst) {
2275
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2276
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
2277

2278 2279 2280 2281
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG ||
        functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
2282

2283 2284 2285 2286
    if (functionId != functId && functionId != functIdDst) {
      return false;
    }
  }
2287

2288 2289 2290 2291 2292 2293 2294
  return true;
}

static bool onlyFirstQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_FIRST, TSDB_FUNC_FIRST_DST); }

static bool onlyLastQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_LAST, TSDB_FUNC_LAST_DST); }

H
Haojun Liao 已提交
2295
// todo refactor, add iterator
2296 2297
static void doExchangeTimeWindow(SQInfo* pQInfo, STimeWindow* win) {
  size_t t = taosArrayGetSize(pQInfo->tableGroupInfo.pGroupList);
H
Haojun Liao 已提交
2298
  for(int32_t i = 0; i < t; ++i) {
2299
    SArray* p1 = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, i);
H
Haojun Liao 已提交
2300 2301 2302

    size_t len = taosArrayGetSize(p1);
    for(int32_t j = 0; j < len; ++j) {
2303
      STableKeyInfo* pInfo = taosArrayGet(p1, j);
H
Haojun Liao 已提交
2304

2305 2306 2307 2308
      // update the new lastkey if it is equalled to the value of the old skey
      if (pInfo->lastKey == win->ekey) {
        pInfo->lastKey = win->skey;
      }
H
Haojun Liao 已提交
2309 2310 2311 2312
    }
  }
}

2313
static void changeExecuteScanOrder(SQInfo *pQInfo, SQueryTableMsg* pQueryMsg, bool stableQuery) {
H
Haojun Liao 已提交
2314 2315
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;

2316 2317 2318
  // in case of point-interpolation query, use asc order scan
  char msg[] = "QInfo:%p scan order changed for %s query, old:%d, new:%d, qrange exchanged, old qrange:%" PRId64
               "-%" PRId64 ", new qrange:%" PRId64 "-%" PRId64;
2319

2320 2321
  // todo handle the case the the order irrelevant query type mixed up with order critical query type
  // descending order query for last_row query
H
Haojun Liao 已提交
2322
  if (isFirstLastRowQuery(pQuery)) {
H
Haojun Liao 已提交
2323
    qDebug("QInfo:%p scan order changed for last_row query, old:%d, new:%d", pQInfo, pQuery->order.order, TSDB_ORDER_ASC);
2324

H
Haojun Liao 已提交
2325
    pQuery->order.order = TSDB_ORDER_ASC;
H
Haojun Liao 已提交
2326 2327 2328
    if (pQuery->window.skey > pQuery->window.ekey) {
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
2329

2330 2331
    return;
  }
2332

H
Haojun Liao 已提交
2333
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr) && pQuery->order.order == TSDB_ORDER_DESC) {
H
Haojun Liao 已提交
2334
    pQuery->order.order = TSDB_ORDER_ASC;
H
Haojun Liao 已提交
2335 2336 2337
    if (pQuery->window.skey > pQuery->window.ekey) {
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
H
Haojun Liao 已提交
2338

2339
    doExchangeTimeWindow(pQInfo, &pQuery->window);
H
Haojun Liao 已提交
2340 2341 2342
    return;
  }

2343
  if (isPointInterpoQuery(pQuery) && pQuery->interval.interval == 0) {
H
Haojun Liao 已提交
2344 2345 2346 2347 2348
    if (!QUERY_IS_ASC_QUERY(pQuery)) {
      qDebug(msg, GET_QINFO_ADDR(pQuery), "interp", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
             pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
2349

2350
    pQuery->order.order = TSDB_ORDER_ASC;
2351 2352
    return;
  }
2353

2354
  if (pQuery->interval.interval == 0) {
2355 2356
    if (onlyFirstQuery(pQuery)) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
2357
        qDebug(msg, GET_QINFO_ADDR(pQuery), "only-first", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
2358 2359
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2360
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2361
        doExchangeTimeWindow(pQInfo, &pQuery->window);
2362
      }
2363

2364
      pQuery->order.order = TSDB_ORDER_ASC;
2365 2366
    } else if (onlyLastQuery(pQuery)) {
      if (QUERY_IS_ASC_QUERY(pQuery)) {
2367
        qDebug(msg, GET_QINFO_ADDR(pQuery), "only-last", pQuery->order.order, TSDB_ORDER_DESC, pQuery->window.skey,
2368 2369
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2370
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2371
        doExchangeTimeWindow(pQInfo, &pQuery->window);
2372
      }
2373

2374
      pQuery->order.order = TSDB_ORDER_DESC;
2375
    }
2376

2377
  } else {  // interval query
2378
    if (stableQuery) {
2379 2380
      if (onlyFirstQuery(pQuery)) {
        if (!QUERY_IS_ASC_QUERY(pQuery)) {
2381
          qDebug(msg, GET_QINFO_ADDR(pQuery), "only-first stable", pQuery->order.order, TSDB_ORDER_ASC,
2382 2383
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2384
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2385
          doExchangeTimeWindow(pQInfo, &pQuery->window);
2386
        }
2387

2388
        pQuery->order.order = TSDB_ORDER_ASC;
2389 2390
      } else if (onlyLastQuery(pQuery)) {
        if (QUERY_IS_ASC_QUERY(pQuery)) {
2391
          qDebug(msg, GET_QINFO_ADDR(pQuery), "only-last stable", pQuery->order.order, TSDB_ORDER_DESC,
2392 2393
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2394
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2395
          doExchangeTimeWindow(pQInfo, &pQuery->window);
2396
        }
2397

2398
        pQuery->order.order = TSDB_ORDER_DESC;
2399 2400 2401 2402 2403 2404 2405 2406
      }
    }
  }
}

static int32_t getInitialPageNum(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  int32_t INITIAL_RESULT_ROWS_VALUE = 16;
2407

2408
  int32_t num = 0;
2409

2410 2411
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
    num = 128;
H
Haojun Liao 已提交
2412
  } else if (QUERY_IS_INTERVAL_QUERY(pQuery)) {  // time window query, allocate one page for each table
2413
    size_t s = pQInfo->tableqinfoGroupInfo.numOfTables;
S
TD-1057  
Shengliang Guan 已提交
2414
    num = (int32_t)(MAX(s, INITIAL_RESULT_ROWS_VALUE));
2415 2416
  } else {    // for super table query, one page for each subset
    num = 1;  // pQInfo->pSidSet->numOfSubSet;
2417
  }
2418

2419 2420 2421 2422
  assert(num > 0);
  return num;
}

2423 2424
static void getIntermediateBufInfo(SQueryRuntimeEnv* pRuntimeEnv, int32_t* ps, int32_t* rowsize) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
2425
  int32_t MIN_ROWS_PER_PAGE = 4;
2426

S
TD-1057  
Shengliang Guan 已提交
2427
  *rowsize = (int32_t)(pQuery->rowSize * GET_ROW_PARAM_FOR_MULTIOUTPUT(pQuery, pRuntimeEnv->topBotQuery, pRuntimeEnv->stableQuery));
2428 2429 2430 2431
  int32_t overhead = sizeof(tFilePage);

  // one page contains at least two rows
  *ps = DEFAULT_INTERN_BUF_PAGE_SIZE;
H
Haojun Liao 已提交
2432
  while(((*rowsize) * MIN_ROWS_PER_PAGE) > (*ps) - overhead) {
2433 2434 2435 2436
    *ps = (*ps << 1u);
  }

  pRuntimeEnv->numOfRowsPerPage = ((*ps) - sizeof(tFilePage)) / (*rowsize);
2437
  assert(pRuntimeEnv->numOfRowsPerPage <= MAX_ROWS_PER_RESBUF_PAGE);
2438 2439
}

H
Haojun Liao 已提交
2440
#define IS_PREFILTER_TYPE(_t) ((_t) != TSDB_DATA_TYPE_BINARY && (_t) != TSDB_DATA_TYPE_NCHAR)
2441

H
Haojun Liao 已提交
2442 2443 2444 2445
static bool needToLoadDataBlock(SQueryRuntimeEnv* pRuntimeEnv, SDataStatis *pDataStatis, SQLFunctionCtx *pCtx,
    int32_t numOfRows) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
  if (pDataStatis == NULL || (pQuery->numOfFilterCols == 0 && (!pRuntimeEnv->topBotQuery))) {
2446 2447 2448 2449 2450
    return true;
  }

  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
2451

H
Haojun Liao 已提交
2452 2453 2454 2455 2456 2457 2458 2459
    int32_t index = -1;
    for(int32_t i = 0; i < pQuery->numOfCols; ++i) {
      if (pDataStatis[i].colId == pFilterInfo->info.colId) {
        index = i;
        break;
      }
    }

2460
    // no statistics data, load the true data block
H
Haojun Liao 已提交
2461
    if (index == -1) {
H
Haojun Liao 已提交
2462
      return true;
2463
    }
2464

2465
    // not support pre-filter operation on binary/nchar data type
H
Haojun Liao 已提交
2466
    if (!IS_PREFILTER_TYPE(pFilterInfo->info.type)) {
H
Haojun Liao 已提交
2467
      return true;
2468
    }
2469

2470
    // all data in current column are NULL, no need to check its boundary value
H
Haojun Liao 已提交
2471
    if (pDataStatis[index].numOfNull == numOfRows) {
2472 2473 2474 2475 2476 2477 2478 2479 2480

      // if isNULL query exists, load the null data column
      for (int32_t j = 0; j < pFilterInfo->numOfFilters; ++j) {
        SColumnFilterElem *pFilterElem = &pFilterInfo->pFilters[j];
        if (pFilterElem->fp == isNull_filter) {
          return true;
        }
      }

2481 2482
      continue;
    }
2483

H
Haojun Liao 已提交
2484 2485 2486
    SDataStatis* pDataBlockst = &pDataStatis[index];

    if (pFilterInfo->info.type == TSDB_DATA_TYPE_FLOAT) {
S
TD-1057  
Shengliang Guan 已提交
2487 2488
      float minval = (float)(*(double *)(&pDataBlockst->min));
      float maxval = (float)(*(double *)(&pDataBlockst->max));
2489

2490 2491 2492 2493 2494 2495 2496
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&minval, (char *)&maxval)) {
          return true;
        }
      }
    } else {
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
H
Haojun Liao 已提交
2497
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&pDataBlockst->min, (char *)&pDataBlockst->max)) {
2498 2499 2500 2501 2502
          return true;
        }
      }
    }
  }
2503

H
Haojun Liao 已提交
2504 2505
  if (pRuntimeEnv->topBotQuery) {
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2506
      int32_t functionId = pQuery->pExpr1[i].base.functionId;
H
Haojun Liao 已提交
2507 2508 2509 2510 2511
      if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
        return topbot_datablock_filter(&pCtx[i], functionId, (char *)&pDataStatis[i].min, (char *)&pDataStatis[i].max);
      }
    }
  }
2512

H
Haojun Liao 已提交
2513
  return false;
2514 2515
}

H
Haojun Liao 已提交
2516 2517 2518 2519 2520 2521 2522 2523
static bool overlapWithTimeWindow(SQuery* pQuery, SDataBlockInfo* pBlockInfo) {
  STimeWindow w = {0};

  TSKEY sk = MIN(pQuery->window.skey, pQuery->window.ekey);
  TSKEY ek = MAX(pQuery->window.skey, pQuery->window.ekey);

  if (QUERY_IS_ASC_QUERY(pQuery)) {
    getAlignQueryTimeWindow(pQuery, pBlockInfo->window.skey, sk, ek, &w);
H
Haojun Liao 已提交
2524
    assert(w.ekey >= pBlockInfo->window.skey);
H
Haojun Liao 已提交
2525

H
Haojun Liao 已提交
2526
    if (w.ekey < pBlockInfo->window.ekey) {
H
Haojun Liao 已提交
2527 2528 2529 2530
      return true;
    }

    while(1) {
H
Haojun Liao 已提交
2531
      getNextTimeWindow(pQuery, &w);
H
Haojun Liao 已提交
2532
      if (w.skey > pBlockInfo->window.ekey) {
H
Haojun Liao 已提交
2533 2534 2535
        break;
      }

H
Haojun Liao 已提交
2536 2537
      assert(w.ekey > pBlockInfo->window.ekey);
      if (w.skey <= pBlockInfo->window.ekey && w.skey > pBlockInfo->window.skey) {
H
Haojun Liao 已提交
2538 2539 2540 2541 2542
        return true;
      }
    }
  } else {
    getAlignQueryTimeWindow(pQuery, pBlockInfo->window.ekey, sk, ek, &w);
H
Haojun Liao 已提交
2543 2544 2545
    assert(w.skey <= pBlockInfo->window.ekey);

    if (w.skey > pBlockInfo->window.skey) {
H
Haojun Liao 已提交
2546 2547 2548 2549
      return true;
    }

    while(1) {
H
Haojun Liao 已提交
2550
      getNextTimeWindow(pQuery, &w);
H
Haojun Liao 已提交
2551 2552 2553 2554
      if (w.ekey < pBlockInfo->window.skey) {
        break;
      }

H
Haojun Liao 已提交
2555 2556
      assert(w.skey < pBlockInfo->window.skey);
      if (w.ekey < pBlockInfo->window.ekey && w.ekey >= pBlockInfo->window.skey) {
H
Haojun Liao 已提交
2557 2558 2559 2560 2561 2562 2563 2564
        return true;
      }
    }
  }

  return false;
}

H
Haojun Liao 已提交
2565
int32_t loadDataBlockOnDemand(SQueryRuntimeEnv *pRuntimeEnv, SResultRowInfo * pWindowResInfo, void* pQueryHandle, SDataBlockInfo* pBlockInfo, SDataStatis **pStatis, SArray** pDataBlock, uint32_t* status) {
H
Haojun Liao 已提交
2566 2567
  *status = BLK_DATA_NO_NEEDED;

2568
  SQuery *pQuery = pRuntimeEnv->pQuery;
2569 2570
  int64_t groupId = pQuery->current->groupIndex;

2571 2572
  SQueryCostInfo* pCost = &pRuntimeEnv->summary;

H
Haojun Liao 已提交
2573
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTsBuf > 0) {
2574
    *status = BLK_DATA_ALL_NEEDED;
H
Haojun Liao 已提交
2575
  } else { // check if this data block is required to load
H
Haojun Liao 已提交
2576

H
Haojun Liao 已提交
2577
    // Calculate all time windows that are overlapping or contain current data block.
2578
    // If current data block is contained by all possible time window, do not load current data block.
H
Haojun Liao 已提交
2579
    if (QUERY_IS_INTERVAL_QUERY(pQuery) && overlapWithTimeWindow(pQuery, pBlockInfo)) {
2580
      *status = BLK_DATA_ALL_NEEDED;
2581
    }
2582

2583
    if ((*status) != BLK_DATA_ALL_NEEDED) {
H
Haojun Liao 已提交
2584 2585 2586
      // the pCtx[i] result is belonged to previous time window since the outputBuf has not been set yet,
      // the filter result may be incorrect. So in case of interval query, we need to set the correct time output buffer
      if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
2587 2588
        SResultRow* pResult = NULL;

H
Haojun Liao 已提交
2589 2590 2591 2592
        bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);

        TSKEY k = QUERY_IS_ASC_QUERY(pQuery)? pBlockInfo->window.skey:pBlockInfo->window.ekey;
        STimeWindow win = getActiveTimeWindow(pWindowResInfo, k, pQuery);
2593
        if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, &win, masterScan, &pResult, groupId) != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
2594 2595 2596 2597
          // todo handle error in set result for timewindow
        }
      }

H
Haojun Liao 已提交
2598
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2599
        SSqlFuncMsg* pSqlFunc = &pQuery->pExpr1[i].base;
H
Haojun Liao 已提交
2600 2601 2602

        int32_t functionId = pSqlFunc->functionId;
        int32_t colId = pSqlFunc->colInfo.colId;
2603 2604
        (*status) |= aAggs[functionId].dataReqFunc(&pRuntimeEnv->pCtx[i], pBlockInfo->window.skey, pBlockInfo->window.ekey, colId);
        if (((*status) & BLK_DATA_ALL_NEEDED) == BLK_DATA_ALL_NEEDED) {
H
Haojun Liao 已提交
2605 2606 2607
          break;
        }
      }
2608 2609
    }
  }
2610

2611
  if ((*status) == BLK_DATA_NO_NEEDED) {
H
Haojun Liao 已提交
2612 2613
    qDebug("QInfo:%p data block discard, brange:%"PRId64 "-%"PRId64", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
           pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
2614
    pCost->discardBlocks += 1;
2615 2616 2617 2618
  } else if ((*status) == BLK_DATA_STATIS_NEEDED) {

    // this function never returns error?
    tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis);
2619
    pCost->loadBlockStatis += 1;
2620

2621
    if (*pStatis == NULL) { // data block statistics does not exist, load data block
H
Haojun Liao 已提交
2622
      *pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2623
      pCost->totalCheckedRows += pBlockInfo->rows;
2624 2625
    }
  } else {
2626
    assert((*status) == BLK_DATA_ALL_NEEDED);
2627

2628
    // load the data block statistics to perform further filter
2629
    pCost->loadBlockStatis += 1;
2630
    tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis);
2631

H
Haojun Liao 已提交
2632
    if (!needToLoadDataBlock(pRuntimeEnv, *pStatis, pRuntimeEnv->pCtx, pBlockInfo->rows)) {
2633
      // current block has been discard due to filter applied
2634
      pCost->discardBlocks += 1;
H
Haojun Liao 已提交
2635 2636
      qDebug("QInfo:%p data block discard, brange:%"PRId64 "-%"PRId64", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
          pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
2637
      (*status) = BLK_DATA_DISCARD;
2638
    }
2639

2640 2641
    pCost->totalCheckedRows += pBlockInfo->rows;
    pCost->loadBlocks += 1;
H
Haojun Liao 已提交
2642
    *pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2643 2644 2645
    if (*pDataBlock == NULL) {
      return terrno;
    }
2646
  }
2647

H
Haojun Liao 已提交
2648
  return TSDB_CODE_SUCCESS;
2649 2650
}

H
hjxilinx 已提交
2651
int32_t binarySearchForKey(char *pValue, int num, TSKEY key, int order) {
2652
  int32_t midPos = -1;
H
Haojun Liao 已提交
2653
  int32_t numOfRows;
2654

2655 2656 2657
  if (num <= 0) {
    return -1;
  }
2658

2659
  assert(order == TSDB_ORDER_ASC || order == TSDB_ORDER_DESC);
2660 2661

  TSKEY * keyList = (TSKEY *)pValue;
2662
  int32_t firstPos = 0;
2663
  int32_t lastPos = num - 1;
2664

2665
  if (order == TSDB_ORDER_DESC) {
H
hjxilinx 已提交
2666 2667 2668 2669 2670
    // find the first position which is smaller than the key
    while (1) {
      if (key >= keyList[lastPos]) return lastPos;
      if (key == keyList[firstPos]) return firstPos;
      if (key < keyList[firstPos]) return firstPos - 1;
2671

H
Haojun Liao 已提交
2672 2673
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2674

H
hjxilinx 已提交
2675 2676 2677 2678 2679 2680 2681 2682
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
2683

H
hjxilinx 已提交
2684 2685 2686 2687 2688
  } else {
    // find the first position which is bigger than the key
    while (1) {
      if (key <= keyList[firstPos]) return firstPos;
      if (key == keyList[lastPos]) return lastPos;
2689

H
hjxilinx 已提交
2690 2691 2692 2693 2694 2695 2696
      if (key > keyList[lastPos]) {
        lastPos = lastPos + 1;
        if (lastPos >= num)
          return -1;
        else
          return lastPos;
      }
2697

H
Haojun Liao 已提交
2698
      numOfRows = lastPos - firstPos + 1;
H
Haojun Liao 已提交
2699
      midPos = (numOfRows >> 1u) + firstPos;
2700

H
hjxilinx 已提交
2701 2702 2703 2704 2705 2706 2707 2708 2709
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
  }
2710

H
hjxilinx 已提交
2711 2712 2713
  return midPos;
}

2714 2715 2716 2717 2718 2719 2720 2721
static void ensureOutputBufferSimple(SQueryRuntimeEnv* pRuntimeEnv, int32_t capacity) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  if (capacity < pQuery->rec.capacity) {
    return;
  }

  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2722
    int32_t bytes = pQuery->pExpr1[i].bytes;
2723 2724 2725
    assert(bytes > 0 && capacity > 0);

    char *tmp = realloc(pQuery->sdata[i], bytes * capacity + sizeof(tFilePage));
H
Haojun Liao 已提交
2726
    if (tmp == NULL) {
H
Haojun Liao 已提交
2727
      longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
2728 2729 2730 2731 2732 2733 2734 2735
    } else {
      pQuery->sdata[i] = (tFilePage *)tmp;
    }

    // set the pCtx output buffer position
    pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data;
  }

2736
  qDebug("QInfo:%p realloc output buffer to inc output buffer from: %" PRId64 " rows to:%d rows", GET_QINFO_ADDR(pRuntimeEnv),
2737 2738 2739 2740 2741
         pQuery->rec.capacity, capacity);

  pQuery->rec.capacity = capacity;
}

2742
// TODO merge with enuserOutputBufferSimple
2743 2744 2745
static void ensureOutputBuffer(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pBlockInfo) {
  // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
  SQuery* pQuery = pRuntimeEnv->pQuery;
2746
  if (!QUERY_IS_INTERVAL_QUERY(pQuery) && !pRuntimeEnv->groupbyNormalCol && !isFixedOutputQuery(pRuntimeEnv) && !isTSCompQuery(pQuery)) {
2747
    SResultRec *pRec = &pQuery->rec;
2748

2749
    if (pQuery->rec.capacity - pQuery->rec.rows < pBlockInfo->rows) {
S
TD-1057  
Shengliang Guan 已提交
2750 2751
      int32_t remain = (int32_t)(pRec->capacity - pRec->rows);
      int32_t newSize = (int32_t)(pRec->capacity + (pBlockInfo->rows - remain));
2752

2753
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2754
        int32_t bytes = pQuery->pExpr1[i].bytes;
H
Haojun Liao 已提交
2755 2756
        assert(bytes > 0 && newSize > 0);

2757
        char *tmp = realloc(pQuery->sdata[i], bytes * newSize + sizeof(tFilePage));
H
Haojun Liao 已提交
2758
        if (tmp == NULL) {
H
Haojun Liao 已提交
2759
          longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
2760
        } else {
2761
          memset(tmp + sizeof(tFilePage) + bytes * pRec->rows, 0, (size_t)((newSize - pRec->rows) * bytes));
2762 2763
          pQuery->sdata[i] = (tFilePage *)tmp;
        }
H
Haojun Liao 已提交
2764

2765 2766
        // set the pCtx output buffer position
        pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data + pRec->rows * bytes;
H
Haojun Liao 已提交
2767

H
Haojun Liao 已提交
2768
        int32_t functionId = pQuery->pExpr1[i].base.functionId;
2769 2770 2771 2772
        if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
          pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
        }
      }
H
Haojun Liao 已提交
2773

2774
      qDebug("QInfo:%p realloc output buffer, new size: %d rows, old:%" PRId64 ", remain:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv),
2775
             newSize, pRec->capacity, newSize - pRec->rows);
2776

2777 2778 2779 2780 2781
      pRec->capacity = newSize;
    }
  }
}

2782 2783 2784 2785 2786
static void doSetInitialTimewindow(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pBlockInfo) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  if (QUERY_IS_INTERVAL_QUERY(pQuery) && pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL) {
    STimeWindow w = TSWINDOW_INITIALIZER;
H
Haojun Liao 已提交
2787
    SResultRowInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
2788 2789 2790 2791

    if (QUERY_IS_ASC_QUERY(pQuery)) {
      getAlignQueryTimeWindow(pQuery, pBlockInfo->window.skey, pBlockInfo->window.skey, pQuery->window.ekey, &w);
      pWindowResInfo->prevSKey = w.skey;
2792
    } else { // the start position of the first time window in the endpoint that spreads beyond the queried last timestamp
2793 2794 2795 2796 2797 2798
      getAlignQueryTimeWindow(pQuery, pBlockInfo->window.ekey, pQuery->window.ekey, pBlockInfo->window.ekey, &w);
      pWindowResInfo->prevSKey = w.skey;
    }
  }
}

2799 2800
static int64_t doScanAllDataBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
2801
  STableQueryInfo* pTableQueryInfo = pQuery->current;
H
Haojun Liao 已提交
2802
  SQueryCostInfo*  summary  = &pRuntimeEnv->summary;
2803

2804
  qDebug("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", lastkey:%" PRId64 ", order:%d",
H
hjxilinx 已提交
2805 2806
         GET_QINFO_ADDR(pRuntimeEnv), pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, pTableQueryInfo->lastKey,
         pQuery->order.order);
2807

2808
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
H
Haojun Liao 已提交
2809
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
2810

H
Haojun Liao 已提交
2811
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
2812
  while (tsdbNextDataBlock(pQueryHandle)) {
H
Haojun Liao 已提交
2813
    summary->totalBlocks += 1;
H
Haojun Liao 已提交
2814

2815
    if (isQueryKilled(GET_QINFO_ADDR(pRuntimeEnv))) {
H
Haojun Liao 已提交
2816
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
2817
    }
2818

H
Haojun Liao 已提交
2819
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
2820
    doSetInitialTimewindow(pRuntimeEnv, &blockInfo);
2821

H
hjxilinx 已提交
2822
    // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
2823
    ensureOutputBuffer(pRuntimeEnv, &blockInfo);
2824

2825
    SDataStatis *pStatis = NULL;
2826 2827
    SArray *     pDataBlock = NULL;
    uint32_t     status = 0;
2828

H
Haojun Liao 已提交
2829
    int32_t ret = loadDataBlockOnDemand(pRuntimeEnv, &pRuntimeEnv->windowResInfo, pQueryHandle, &blockInfo, &pStatis, &pDataBlock, &status);
2830
    if (ret != TSDB_CODE_SUCCESS) {
2831 2832 2833
      break;
    }

2834 2835 2836 2837 2838 2839
    if (status == BLK_DATA_DISCARD) {
      pQuery->current->lastKey =
              QUERY_IS_ASC_QUERY(pQuery) ? blockInfo.window.ekey + step : blockInfo.window.skey + step;
      continue;
    }

H
Haojun Liao 已提交
2840 2841
    // query start position can not move into tableApplyFunctionsOnBlock due to limit/offset condition
    pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : blockInfo.rows - 1;
H
hjxilinx 已提交
2842
    int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, binarySearchForKey, pDataBlock);
2843

H
Haojun Liao 已提交
2844
    summary->totalRows += blockInfo.rows;
2845
    qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
2846
           blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes, pQuery->current->lastKey);
2847

2848 2849
    // while the output buffer is full or limit/offset is applied, query may be paused here
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL | QUERY_COMPLETED)) {
H
hjxilinx 已提交
2850
      break;
2851 2852
    }
  }
2853

H
Haojun Liao 已提交
2854 2855 2856 2857
  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }

H
hjxilinx 已提交
2858
  // if the result buffer is not full, set the query complete
2859 2860 2861
  if (!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
2862

2863 2864 2865
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
    closeAllResultRows(&pRuntimeEnv->windowResInfo);
    pRuntimeEnv->windowResInfo.curIndex = pRuntimeEnv->windowResInfo.size - 1;  // point to the last time window
2866
  }
2867

2868
  return 0;
2869 2870 2871 2872 2873 2874
}

/*
 * set tag value in SQLFunctionCtx
 * e.g.,tag information into input buffer
 */
2875
static void doSetTagValueInParam(void *tsdb, void* pTable, int32_t tagColId, tVariant *tag, int16_t type, int16_t bytes) {
H
[td-90]  
Haojun Liao 已提交
2876
  tVariantDestroy(tag);
2877

2878
  if (tagColId == TSDB_TBNAME_COLUMN_INDEX) {
2879
    char* val = tsdbGetTableName(pTable);
H
[td-90]  
Haojun Liao 已提交
2880
    assert(val != NULL);
2881

H
[td-90]  
Haojun Liao 已提交
2882
    tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), TSDB_DATA_TYPE_BINARY);
2883
  } else {
2884
    char* val = tsdbGetTableTagVal(pTable, tagColId, type, bytes);
H
[td-90]  
Haojun Liao 已提交
2885 2886 2887 2888
    if (val == NULL) {
      tag->nType = TSDB_DATA_TYPE_NULL;
      return;
    }
2889

H
hjxilinx 已提交
2890
    if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
H
Hongze Cheng 已提交
2891
      if (isNull(val, type)) {
H
Haojun Liao 已提交
2892 2893 2894 2895
        tag->nType = TSDB_DATA_TYPE_NULL;
        return;
      }

H
[td-90]  
Haojun Liao 已提交
2896
      tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), type);
H
hjxilinx 已提交
2897
    } else {
H
Haojun Liao 已提交
2898 2899 2900 2901 2902
      if (isNull(val, type)) {
        tag->nType = TSDB_DATA_TYPE_NULL;
        return;
      }

H
[td-90]  
Haojun Liao 已提交
2903
      tVariantCreateFromBinary(tag, val, bytes, type);
H
hjxilinx 已提交
2904
    }
2905
  }
2906 2907
}

2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919
static SColumnInfo* doGetTagColumnInfoById(SColumnInfo* pTagColList, int32_t numOfTags, int16_t colId) {
  assert(pTagColList != NULL && numOfTags > 0);

  for(int32_t i = 0; i < numOfTags; ++i) {
    if (pTagColList[i].colId == colId) {
      return &pTagColList[i];
    }
  }

  return NULL;
}

2920
void setTagVal(SQueryRuntimeEnv *pRuntimeEnv, void *pTable, void *tsdb) {
2921
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
2922
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
2923

H
Haojun Liao 已提交
2924
  SExprInfo *pExprInfo = &pQuery->pExpr1[0];
H
[td-90]  
Haojun Liao 已提交
2925 2926
  if (pQuery->numOfOutput == 1 && pExprInfo->base.functionId == TSDB_FUNC_TS_COMP) {
    assert(pExprInfo->base.numOfParams == 1);
H
Haojun Liao 已提交
2927

S
TD-1057  
Shengliang Guan 已提交
2928
    int16_t tagColId = (int16_t)pExprInfo->base.arg->argValue.i64;
2929
    SColumnInfo* pColInfo = doGetTagColumnInfoById(pQuery->tagColList, pQuery->numOfTags, tagColId);
H
Haojun Liao 已提交
2930

2931
    doSetTagValueInParam(tsdb, pTable, tagColId, &pRuntimeEnv->pCtx[0].tag, pColInfo->type, pColInfo->bytes);
2932 2933
  } else {
    // set tag value, by which the results are aggregated.
2934
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
H
Haojun Liao 已提交
2935
      SExprInfo* pLocalExprInfo = &pQuery->pExpr1[idx];
2936

2937
      // ts_comp column required the tag value for join filter
H
Haojun Liao 已提交
2938
      if (!TSDB_COL_IS_TAG(pLocalExprInfo->base.colInfo.flag)) {
2939 2940
        continue;
      }
2941

2942
      // todo use tag column index to optimize performance
2943
      doSetTagValueInParam(tsdb, pTable, pLocalExprInfo->base.colInfo.colId, &pRuntimeEnv->pCtx[idx].tag,
H
Haojun Liao 已提交
2944
                           pLocalExprInfo->type, pLocalExprInfo->bytes);
2945
    }
2946

2947
    // set the join tag for first column
H
[td-90]  
Haojun Liao 已提交
2948
    SSqlFuncMsg *pFuncMsg = &pExprInfo->base;
H
Haojun Liao 已提交
2949
    if ((pFuncMsg->functionId == TSDB_FUNC_TS || pFuncMsg->functionId == TSDB_FUNC_PRJ) && pRuntimeEnv->pTsBuf != NULL &&
2950
        pFuncMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
2951
      assert(pFuncMsg->numOfParams == 1);
H
Haojun Liao 已提交
2952

2953 2954
      int16_t      tagColId = (int16_t)pExprInfo->base.arg->argValue.i64;
      SColumnInfo *pColInfo = doGetTagColumnInfoById(pQuery->tagColList, pQuery->numOfTags, tagColId);
H
Haojun Liao 已提交
2955

2956
      doSetTagValueInParam(tsdb, pTable, tagColId, &pRuntimeEnv->pCtx[0].tag, pColInfo->type, pColInfo->bytes);
H
Haojun Liao 已提交
2957

2958 2959 2960 2961 2962 2963 2964 2965
      int16_t tagType = pRuntimeEnv->pCtx[0].tag.nType;
      if (tagType == TSDB_DATA_TYPE_BINARY || tagType == TSDB_DATA_TYPE_NCHAR) {
        qDebug("QInfo:%p set tag value for join comparison, colId:%" PRId64 ", val:%s", pQInfo,
               pExprInfo->base.arg->argValue.i64, pRuntimeEnv->pCtx[0].tag.pz);
      } else {
        qDebug("QInfo:%p set tag value for join comparison, colId:%" PRId64 ", val:%" PRId64, pQInfo,
               pExprInfo->base.arg->argValue.i64, pRuntimeEnv->pCtx[0].tag.i64Key);
      }
2966 2967 2968 2969
    }
  }
}

2970
static UNUSED_FUNC void doMerge(SQueryRuntimeEnv *pRuntimeEnv, int64_t timestamp, SResultRow *pWindowRes, bool mergeFlag) {
2971 2972
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
2973

2974
  tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes->pageId);
H
Haojun Liao 已提交
2975

2976
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2977
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
2978 2979 2980
    if (!mergeFlag) {
      pCtx[i].aOutputBuf = pCtx[i].aOutputBuf + pCtx[i].outputBytes;
      pCtx[i].currentStage = FIRST_STAGE_MERGE;
2981

2982
      RESET_RESULT_INFO(pCtx[i].resultInfo);
2983 2984
      aAggs[functionId].init(&pCtx[i]);
    }
2985

2986 2987
    pCtx[i].hasNull = true;
    pCtx[i].nStartQueryTimestamp = timestamp;
H
Haojun Liao 已提交
2988
    pCtx[i].aInputElemBuf = getPosInResultPage(pRuntimeEnv, i, pWindowRes, page);
2989

2990 2991 2992
    // in case of tag column, the tag information should be extracted from input buffer
    if (functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TAG) {
      tVariantDestroy(&pCtx[i].tag);
2993

2994 2995 2996 2997 2998 2999
      int32_t type = pCtx[i].outputType;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        tVariantCreateFromBinary(&pCtx[i].tag, varDataVal(pCtx[i].aInputElemBuf), varDataLen(pCtx[i].aInputElemBuf), type);
      } else {
        tVariantCreateFromBinary(&pCtx[i].tag, pCtx[i].aInputElemBuf, pCtx[i].inputBytes, pCtx[i].inputType);
      }
3000

3001 3002
    }
  }
3003

3004
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
3005
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
3006 3007 3008
    if (functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
3009

3010 3011 3012 3013
    aAggs[functionId].distMergeFunc(&pCtx[i]);
  }
}

3014
static UNUSED_FUNC void printBinaryData(int32_t functionId, char *data, int32_t srcDataType) {
3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_LAST_DST) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_BINARY:
        printf("%" PRId64 ",%s\t", *(TSKEY *)data, (data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int8_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int16_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 ",%" PRId64 "\t", *(TSKEY *)data, *(TSKEY *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int32_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%" PRId64 ",%f\t", *(TSKEY *)data, *(float *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%" PRId64 ",%lf\t", *(TSKEY *)data, *(double *)(data + TSDB_KEYSIZE + 1));
        break;
    }
  } else if (functionId == TSDB_FUNC_AVG) {
    printf("%lf,%d\t", *(double *)data, *(int32_t *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_SPREAD) {
    printf("%lf,%lf\t", *(double *)data, *(double *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_TWA) {
    data += 1;
    printf("%lf,%" PRId64 ",%" PRId64 ",%" PRId64 "\t", *(double *)data, *(int64_t *)(data + 8),
           *(int64_t *)(data + 16), *(int64_t *)(data + 24));
  } else if (functionId == TSDB_FUNC_MIN || functionId == TSDB_FUNC_MAX) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%d\t", *(int8_t *)data);
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%d\t", *(int16_t *)data);
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 "\t", *(int64_t *)data);
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%d\t", *(int *)data);
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%f\t", *(float *)data);
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%f\t", *(float *)data);
        break;
    }
  } else if (functionId == TSDB_FUNC_SUM) {
    if (srcDataType == TSDB_DATA_TYPE_FLOAT || srcDataType == TSDB_DATA_TYPE_DOUBLE) {
      printf("%lf\t", *(float *)data);
    } else {
      printf("%" PRId64 "\t", *(int64_t *)data);
    }
  } else {
    printf("%s\t", data);
  }
}

3083
void UNUSED_FUNC displayInterResult(tFilePage **pdata, SQueryRuntimeEnv* pRuntimeEnv, int32_t numOfRows) {
3084
  SQuery* pQuery = pRuntimeEnv->pQuery;
3085
  int32_t numOfCols = pQuery->numOfOutput;
3086
  printf("super table query intermediate result, total:%d\n", numOfRows);
3087

3088 3089
  for (int32_t j = 0; j < numOfRows; ++j) {
    for (int32_t i = 0; i < numOfCols; ++i) {
3090

H
Haojun Liao 已提交
3091
      switch (pQuery->pExpr1[i].type) {
3092
        case TSDB_DATA_TYPE_BINARY: {
H
Haojun Liao 已提交
3093 3094
          int32_t type = pQuery->pExpr1[i].type;
          printBinaryData(pQuery->pExpr1[i].base.functionId, pdata[i]->data + pQuery->pExpr1[i].bytes * j,
3095 3096 3097 3098 3099
                          type);
          break;
        }
        case TSDB_DATA_TYPE_TIMESTAMP:
        case TSDB_DATA_TYPE_BIGINT:
H
Haojun Liao 已提交
3100
          printf("%" PRId64 "\t", *(int64_t *)(pdata[i]->data + pQuery->pExpr1[i].bytes * j));
3101 3102
          break;
        case TSDB_DATA_TYPE_INT:
H
Haojun Liao 已提交
3103
          printf("%d\t", *(int32_t *)(pdata[i]->data + pQuery->pExpr1[i].bytes * j));
3104 3105
          break;
        case TSDB_DATA_TYPE_FLOAT:
H
Haojun Liao 已提交
3106
          printf("%f\t", *(float *)(pdata[i]->data + pQuery->pExpr1[i].bytes * j));
3107 3108
          break;
        case TSDB_DATA_TYPE_DOUBLE:
H
Haojun Liao 已提交
3109
          printf("%lf\t", *(double *)(pdata[i]->data + pQuery->pExpr1[i].bytes * j));
3110 3111 3112 3113 3114 3115 3116 3117
          break;
      }
    }
    printf("\n");
  }
}

typedef struct SCompSupporter {
H
hjxilinx 已提交
3118
  STableQueryInfo **pTableQueryInfo;
3119 3120
  int32_t          *rowIndex;
  int32_t           order;
3121 3122 3123
} SCompSupporter;

int32_t tableResultComparFn(const void *pLeft, const void *pRight, void *param) {
3124
  int32_t left  = *(int32_t *)pLeft;
3125
  int32_t right = *(int32_t *)pRight;
3126

3127
  SCompSupporter *  supporter = (SCompSupporter *)param;
3128

3129 3130
  int32_t leftPos  = supporter->rowIndex[left];
  int32_t rightPos = supporter->rowIndex[right];
3131

3132 3133 3134 3135
  /* left source is exhausted */
  if (leftPos == -1) {
    return 1;
  }
3136

3137 3138 3139 3140
  /* right source is exhausted*/
  if (rightPos == -1) {
    return -1;
  }
3141

3142
  STableQueryInfo** pList = supporter->pTableQueryInfo;
3143

3144 3145 3146
  SResultRowInfo *pWindowResInfo1 = &(pList[left]->windowResInfo);
  SResultRow * pWindowRes1 = getResultRow(pWindowResInfo1, leftPos);
  TSKEY leftTimestamp = pWindowRes1->win.skey;
3147

3148
  SResultRowInfo *pWindowResInfo2 = &(pList[right]->windowResInfo);
3149
  SResultRow * pWindowRes2 = getResultRow(pWindowResInfo2, rightPos);
3150
  TSKEY rightTimestamp = pWindowRes2->win.skey;
3151

3152 3153 3154
  if (leftTimestamp == rightTimestamp) {
    return 0;
  }
3155

3156 3157 3158 3159 3160
  if (supporter->order == TSDB_ORDER_ASC) {
    return (leftTimestamp > rightTimestamp)? 1:-1;
  } else {
    return (leftTimestamp < rightTimestamp)? 1:-1;
  }
3161 3162
}

3163
int32_t mergeGroupResult(SQInfo *pQInfo) {
H
Haojun Liao 已提交
3164
  int64_t st = taosGetTimestampUs();
3165

3166
  SGroupResInfo* pGroupResInfo = &pQInfo->groupResInfo;
3167

3168
  int32_t numOfGroups = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
3169
  while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
3170
    SArray *group = GET_TABLEGROUP(pQInfo, pQInfo->groupIndex);
3171 3172 3173

    int32_t ret = mergeIntoGroupResultImpl(pGroupResInfo, group, pQInfo);
    if (ret < 0) {
3174 3175 3176 3177
      return -1;
    }

    // this group generates at least one result, return results
3178 3179
    pQInfo->groupIndex += 1;
    if (taosArrayGetSize(pGroupResInfo->pRows) > 0) {
3180 3181
      break;
    }
3182

3183
    qDebug("QInfo:%p no result in group %d, continue", pQInfo, pQInfo->groupIndex - 1);
3184 3185 3186 3187
    taosArrayClear(pGroupResInfo->pRows);

    pGroupResInfo->index = 0;
    pGroupResInfo->rowId = 0;
3188
  }
3189

3190
  if (pQInfo->groupIndex == numOfGroups && taosArrayGetSize(pGroupResInfo->pRows) == 0) {
H
Haojun Liao 已提交
3191 3192 3193
    SET_STABLE_QUERY_OVER(pQInfo);
  }

H
Haojun Liao 已提交
3194 3195 3196
  int64_t elapsedTime = taosGetTimestampUs() - st;
  qDebug("QInfo:%p merge res data into group, index:%d, total group:%d, elapsed time:%" PRId64 "us", pQInfo,
         pQInfo->groupIndex - 1, numOfGroups, elapsedTime);
3197

H
Haojun Liao 已提交
3198
  pQInfo->runtimeEnv.summary.firstStageMergeTime += elapsedTime;
3199 3200 3201
  return TSDB_CODE_SUCCESS;
}

3202 3203
static int32_t doCopyToSData(SQInfo *pQInfo, SResultRow **pRows, int32_t numOfRows, int32_t* index, int32_t orderType);

3204
void copyResToQueryResultBuf(SQInfo *pQInfo, SQuery *pQuery) {
H
Haojun Liao 已提交
3205 3206
  SGroupResInfo* pGroupResInfo = &pQInfo->groupResInfo;

3207 3208
  // all results in current group have been returned to client, try next group
  if (pGroupResInfo->index >= taosArrayGetSize(pGroupResInfo->pRows)) {
3209
    // current results of group has been sent to client, try next group
3210
    if (mergeGroupResult(pQInfo) != TSDB_CODE_SUCCESS) {
3211 3212
      return;  // failed to save data in the disk
    }
3213

3214
    // check if all results has been sent to client
S
TD-1057  
Shengliang Guan 已提交
3215
    int32_t numOfGroup = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
3216
    if (taosArrayGetSize(pGroupResInfo->pRows) == 0 && pQInfo->groupIndex == numOfGroup) {
H
Haojun Liao 已提交
3217
      SET_STABLE_QUERY_OVER(pQInfo);
3218 3219
      return;
    }
3220
  }
3221

3222 3223
  int32_t size = (int32_t) taosArrayGetSize(pGroupResInfo->pRows);
  pQuery->rec.rows = doCopyToSData(pQInfo, pGroupResInfo->pRows->pData, (int32_t) size, &pGroupResInfo->index, TSDB_ORDER_ASC);
3224 3225
}

3226 3227 3228
int64_t getNumOfResultWindowRes(SQueryRuntimeEnv* pRuntimeEnv, SResultRow *pResultRow) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

3229
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
3230
    int32_t functionId = pQuery->pExpr1[j].base.functionId;
3231

3232 3233 3234 3235 3236 3237 3238
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
      continue;
    }
3239

3240
    SResultRowCellInfo *pResultInfo = getResultCell(pRuntimeEnv, pResultRow, j);
H
Haojun Liao 已提交
3241
    assert(pResultInfo != NULL);
3242

H
Haojun Liao 已提交
3243 3244
    if (pResultInfo->numOfRes > 0) {
      return pResultInfo->numOfRes;
3245 3246
    }
  }
3247

H
Haojun Liao 已提交
3248
  return 0;
3249 3250
}

3251
int32_t mergeIntoGroupResultImpl(SGroupResInfo* pGroupResInfo, SArray *pTableList, SQInfo* pQInfo) {
3252
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
3253 3254 3255
  bool ascQuery = QUERY_IS_ASC_QUERY(pRuntimeEnv->pQuery);

  int32_t code = TSDB_CODE_SUCCESS;
3256

3257 3258 3259
  int32_t *posList = NULL;
  SLoserTreeInfo *pTree = NULL;
  STableQueryInfo **pTableQueryInfoList = NULL;
3260

3261 3262 3263 3264
  size_t size = taosArrayGetSize(pTableList);
  if (pGroupResInfo->pRows == NULL) {
    pGroupResInfo->pRows = taosArrayInit(100, POINTER_BYTES);
  }
3265

3266 3267
  posList = calloc(size, sizeof(int32_t));
  pTableQueryInfoList = malloc(POINTER_BYTES * size);
3268

3269
  if (pTableQueryInfoList == NULL || posList == NULL) {
3270
    qError("QInfo:%p failed alloc memory", pQInfo);
3271 3272
    code = TSDB_CODE_QRY_OUT_OF_MEMORY;
    goto _end;
3273 3274
  }

3275
  int32_t numOfTables = 0;
3276
  for (int32_t i = 0; i < size; ++i) {
3277 3278 3279
    STableQueryInfo *item = taosArrayGetP(pTableList, i);
    if (item->windowResInfo.size > 0) {
      pTableQueryInfoList[numOfTables++] = item;
3280 3281
    }
  }
3282

H
Haojun Liao 已提交
3283
  // there is no data in current group
3284
  // no need to merge results since only one table in each group
3285
  if (numOfTables == 0) {
3286
    goto _end;
3287
  }
3288

3289
  SCompSupporter cs = {pTableQueryInfoList, posList, pRuntimeEnv->pQuery->order.order};
3290

3291 3292 3293 3294 3295
  int32_t ret = tLoserTreeCreate(&pTree, numOfTables, &cs, tableResultComparFn);
  if (ret != TSDB_CODE_SUCCESS) {
    code = TSDB_CODE_QRY_OUT_OF_MEMORY;
    goto _end;
  }
H
Haojun Liao 已提交
3296

3297
  int64_t lastTimestamp = ascQuery? INT64_MIN:INT64_MAX;
3298
  int64_t startt = taosGetTimestampMs();
3299

3300
  while (1) {
3301
    if (isQueryKilled(pQInfo)) {
3302
      qDebug("QInfo:%p it is already killed, abort", pQInfo);
3303 3304
      code = TSDB_CODE_TSC_QUERY_CANCELLED;
      goto _end;
3305 3306
    }

3307
    int32_t tableIndex = pTree->pNode[0].index;
3308

3309 3310
    SResultRowInfo *pWindowResInfo = &pTableQueryInfoList[tableIndex]->windowResInfo;
    SResultRow  *pWindowRes = getResultRow(pWindowResInfo, cs.rowIndex[tableIndex]);
3311

3312
    int64_t num = getNumOfResultWindowRes(pRuntimeEnv, pWindowRes);
3313
    if (num <= 0) {
3314
      cs.rowIndex[tableIndex] += 1;
3315

3316 3317 3318
      if (cs.rowIndex[tableIndex] >= pWindowResInfo->size) {
        cs.rowIndex[tableIndex] = -1;
        if (--numOfTables == 0) { // all input sources are exhausted
3319 3320 3321 3322
          break;
        }
      }
    } else {
3323
      assert((pWindowRes->win.skey >= lastTimestamp && ascQuery) || (pWindowRes->win.skey <= lastTimestamp && !ascQuery));
3324

3325 3326
      if (pWindowRes->win.skey != lastTimestamp) {
        taosArrayPush(pGroupResInfo->pRows, &pWindowRes);
H
Haojun Liao 已提交
3327
        pWindowRes->numOfRows = (uint32_t) num;
3328
      }
3329

3330
      lastTimestamp = pWindowRes->win.skey;
3331

3332 3333 3334
      // move to the next row of current entry
      if ((++cs.rowIndex[tableIndex]) >= pWindowResInfo->size) {
        cs.rowIndex[tableIndex] = -1;
3335

3336
        // all input sources are exhausted
3337
        if ((--numOfTables) == 0) {
3338 3339 3340 3341
          break;
        }
      }
    }
3342

3343
    tLoserTreeAdjust(pTree, tableIndex + pTree->numOfEntries);
3344
  }
3345

3346 3347 3348
  int64_t endt = taosGetTimestampMs();

#ifdef _DEBUG_VIEW
3349
  displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
3350
#endif
3351

3352
  qDebug("QInfo:%p result merge completed for group:%d, elapsed time:%" PRId64 " ms", pQInfo, pQInfo->groupIndex, endt - startt);
3353

3354 3355
  _end:
  tfree(pTableQueryInfoList);
S
TD-1848  
Shengliang Guan 已提交
3356 3357
  tfree(posList);
  tfree(pTree);
3358

3359 3360
  if (code != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, code);
3361
  }
3362

3363
  return code;
3364 3365
}

3366 3367 3368 3369
static void updateTableQueryInfoForReverseScan(SQuery *pQuery, STableQueryInfo *pTableQueryInfo) {
  if (pTableQueryInfo == NULL) {
    return;
  }
3370

H
Haojun Liao 已提交
3371
  // order has changed already
3372
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
3373 3374
  if (pTableQueryInfo->lastKey == pTableQueryInfo->win.skey) {
    // do nothing, no results
H
Haojun Liao 已提交
3375
  } else {// NOTE: even win.skey != lastKey, the results may not generated.
H
Haojun Liao 已提交
3376 3377 3378
    pTableQueryInfo->win.ekey = pTableQueryInfo->lastKey + step;
  }

3379 3380
  SWAP(pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, TSKEY);
  pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
3381

3382 3383
  SWITCH_ORDER(pTableQueryInfo->cur.order);
  pTableQueryInfo->cur.vgroupIndex = -1;
H
Haojun Liao 已提交
3384 3385 3386

  // set the index at the end of time window
  pTableQueryInfo->windowResInfo.curIndex = pTableQueryInfo->windowResInfo.size - 1;
3387 3388
}

H
Haojun Liao 已提交
3389
static void disableFuncInReverseScanImpl(SQueryRuntimeEnv* pRuntimeEnv, SResultRowInfo *pWindowResInfo, int32_t order) {
3390
  SQuery* pQuery = pRuntimeEnv->pQuery;
3391

3392
  for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
3393
    bool closed = getResultRowStatus(pWindowResInfo, i);
3394
    if (!closed) {
3395 3396
      continue;
    }
3397

3398
    SResultRow *pRow = getResultRow(pWindowResInfo, i);
3399

3400
    // open/close the specified query for each group result
3401
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
3402
      int32_t functId = pQuery->pExpr1[j].base.functionId;
3403
      SResultRowCellInfo* pInfo = getResultCell(pRuntimeEnv, pRow, j);
3404

3405 3406
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
3407
        pInfo->complete = false;
3408
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
3409
        pInfo->complete = true;
3410 3411 3412 3413 3414
      }
    }
  }
}

3415 3416
void disableFuncInReverseScan(SQInfo *pQInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
3417
  SQuery *pQuery = pRuntimeEnv->pQuery;
3418
  int32_t order = pQuery->order.order;
3419

3420
  // group by normal columns and interval query on normal table
H
Haojun Liao 已提交
3421
  SResultRowInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
3422
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3423
    disableFuncInReverseScanImpl(pRuntimeEnv, pWindowResInfo, order);
3424
  } else {  // for simple result of table query,
3425
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {  // todo refactor
H
Haojun Liao 已提交
3426
      int32_t functId = pQuery->pExpr1[j].base.functionId;
3427

3428
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[j];
3429 3430 3431
      if (pCtx->resultInfo == NULL) {
        continue; // resultInfo is NULL, means no data checked in previous scan
      }
3432

3433 3434
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
3435 3436 3437 3438 3439 3440
        pCtx->resultInfo->complete = false;
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
        pCtx->resultInfo->complete = true;
      }
    }
  }
H
Haojun Liao 已提交
3441 3442 3443 3444
}

static void setupQueryRangeForReverseScan(SQInfo* pQInfo) {
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
S
Shengliang Guan 已提交
3445
  int32_t numOfGroups = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
H
Haojun Liao 已提交
3446

H
hjxilinx 已提交
3447
  for(int32_t i = 0; i < numOfGroups; ++i) {
H
Haojun Liao 已提交
3448
    SArray *group = GET_TABLEGROUP(pQInfo, i);
H
Haojun Liao 已提交
3449 3450
    SArray *tableKeyGroup = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, i);

H
hjxilinx 已提交
3451 3452
    size_t t = taosArrayGetSize(group);
    for (int32_t j = 0; j < t; ++j) {
3453 3454
      STableQueryInfo *pCheckInfo = taosArrayGetP(group, j);
      updateTableQueryInfoForReverseScan(pQuery, pCheckInfo);
H
Haojun Liao 已提交
3455

H
Haojun Liao 已提交
3456 3457
      // update the last key in tableKeyInfo list, the tableKeyInfo is used to build the tsdbQueryHandle and decide
      // the start check timestamp of tsdbQueryHandle
H
Haojun Liao 已提交
3458 3459 3460 3461
      STableKeyInfo *pTableKeyInfo = taosArrayGet(tableKeyGroup, j);
      pTableKeyInfo->lastKey = pCheckInfo->lastKey;

      assert(pCheckInfo->pTable == pTableKeyInfo->pTable);
H
hjxilinx 已提交
3462 3463
    }
  }
3464 3465
}

3466
void switchCtxOrder(SQueryRuntimeEnv *pRuntimeEnv) {
3467
  SQuery *pQuery = pRuntimeEnv->pQuery;
3468
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
3469
    SWITCH_ORDER(pRuntimeEnv->pCtx[i].order);
3470 3471 3472
  }
}

3473
int32_t initResultRow(SResultRow *pResultRow) {
H
Haojun Liao 已提交
3474
  pResultRow->pCellInfo = (SResultRowCellInfo*)((char*)pResultRow + sizeof(SResultRow));
H
Haojun Liao 已提交
3475 3476
  pResultRow->pageId    = -1;
  pResultRow->rowId     = -1;
B
Bomin Zhang 已提交
3477
  return TSDB_CODE_SUCCESS;
3478 3479
}

H
Haojun Liao 已提交
3480
void resetDefaultResInfoOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
3481
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
3482

H
Haojun Liao 已提交
3483
  int32_t tid = 0;
3484
  int64_t uid = 0;
H
Haojun Liao 已提交
3485
  SResultRow* pRow = doPrepareResultRowFromKey(pRuntimeEnv, &pRuntimeEnv->windowResInfo, (char *)&tid, sizeof(tid), true, uid);
3486

3487
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3488 3489
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
    pCtx->aOutputBuf = pQuery->sdata[i]->data;
3490

3491 3492 3493 3494
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT/TAGPRJ/PRJ/TAG etc.
     */
3495 3496 3497
    SResultRowCellInfo* pCellInfo = getResultCell(pRuntimeEnv, pRow, i);
    RESET_RESULT_INFO(pCellInfo);
    pCtx->resultInfo = pCellInfo;
3498

3499
    // set the timestamp output buffer for top/bottom/diff query
H
Haojun Liao 已提交
3500
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
3501 3502 3503
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3504

H
Haojun Liao 已提交
3505
    memset(pQuery->sdata[i]->data, 0, (size_t)(pQuery->pExpr1[i].bytes * pQuery->rec.capacity));
3506
  }
3507

3508 3509 3510 3511 3512
  initCtxOutputBuf(pRuntimeEnv);
}

void forwardCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, int64_t output) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3513

3514
  // reset the execution contexts
3515
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
3516
    int32_t functionId = pQuery->pExpr1[j].base.functionId;
3517
    assert(functionId != TSDB_FUNC_DIFF);
3518

3519 3520 3521 3522
    // set next output position
    if (IS_OUTER_FORWARD(aAggs[functionId].nStatus)) {
      pRuntimeEnv->pCtx[j].aOutputBuf += pRuntimeEnv->pCtx[j].outputBytes * output;
    }
3523

3524 3525 3526 3527 3528 3529 3530 3531
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      /*
       * NOTE: for top/bottom query, the value of first column of output (timestamp) are assigned
       * in the procedure of top/bottom routine
       * the output buffer in top/bottom routine is ptsOutputBuf, so we need to forward the output buffer
       *
       * diff function is handled in multi-output function
       */
S
TD-1057  
Shengliang Guan 已提交
3532
      pRuntimeEnv->pCtx[j].ptsOutputBuf = (char*)pRuntimeEnv->pCtx[j].ptsOutputBuf + TSDB_KEYSIZE * output;
3533
    }
3534

3535
    RESET_RESULT_INFO(pRuntimeEnv->pCtx[j].resultInfo);
3536 3537 3538 3539 3540
  }
}

void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3541

3542
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
3543
    int32_t functionId = pQuery->pExpr1[j].base.functionId;
3544
    pRuntimeEnv->pCtx[j].currentStage = 0;
3545

H
Haojun Liao 已提交
3546
    SResultRowCellInfo* pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
H
Haojun Liao 已提交
3547 3548 3549
    if (pResInfo->initialized) {
      continue;
    }
3550

3551 3552 3553 3554
    aAggs[functionId].init(&pRuntimeEnv->pCtx[j]);
  }
}

3555
void skipResults(SQueryRuntimeEnv *pRuntimeEnv) {
3556
  SQuery *pQuery = pRuntimeEnv->pQuery;
3557
  if (pQuery->rec.rows == 0 || pQuery->limit.offset == 0) {
3558 3559
    return;
  }
3560

3561
  if (pQuery->rec.rows <= pQuery->limit.offset) {
3562
    qDebug("QInfo:%p skip rows:%" PRId64 ", new offset:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), pQuery->rec.rows,
3563
        pQuery->limit.offset - pQuery->rec.rows);
3564

3565 3566
    pQuery->limit.offset -= pQuery->rec.rows;
    pQuery->rec.rows = 0;
3567

H
Haojun Liao 已提交
3568
    resetDefaultResInfoOutputBuf(pRuntimeEnv);
3569

H
Haojun Liao 已提交
3570
    // clear the buffer full flag if exists
3571
    CLEAR_QUERY_STATUS(pQuery, QUERY_RESBUF_FULL);
3572
  } else {
3573
    int64_t numOfSkip = pQuery->limit.offset;
3574
    pQuery->rec.rows -= numOfSkip;
3575
    pQuery->limit.offset = 0;
3576

3577
    qDebug("QInfo:%p skip row:%"PRId64", new offset:%d, numOfRows remain:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), numOfSkip,
3578
           0, pQuery->rec.rows);
3579

3580
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
3581
      int32_t functionId = pQuery->pExpr1[i].base.functionId;
3582
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
H
Haojun Liao 已提交
3583

3584
      memmove(pQuery->sdata[i]->data, (char*)pQuery->sdata[i]->data + bytes * numOfSkip, (size_t)(pQuery->rec.rows * bytes));
H
Haojun Liao 已提交
3585
      pRuntimeEnv->pCtx[i].aOutputBuf = ((char*) pQuery->sdata[i]->data) + pQuery->rec.rows * bytes;
3586

3587
      if (functionId == TSDB_FUNC_DIFF || functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
3588
        pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
3589 3590
      }
    }
3591

S
TD-1057  
Shengliang Guan 已提交
3592
    updateNumOfResult(pRuntimeEnv, (int32_t)pQuery->rec.rows);
3593 3594 3595 3596 3597 3598 3599 3600
  }
}

void setQueryStatus(SQuery *pQuery, int8_t status) {
  if (status == QUERY_NOT_COMPLETED) {
    pQuery->status = status;
  } else {
    // QUERY_NOT_COMPLETED is not compatible with any other status, so clear its position first
3601
    CLEAR_QUERY_STATUS(pQuery, QUERY_NOT_COMPLETED);
3602 3603 3604 3605 3606 3607
    pQuery->status |= status;
  }
}

bool needScanDataBlocksAgain(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3608

H
hjxilinx 已提交
3609
  bool toContinue = false;
H
Haojun Liao 已提交
3610
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3611
    // for each group result, call the finalize function for each column
H
Haojun Liao 已提交
3612
    SResultRowInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
3613

3614
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
3615
      SResultRow *pResult = getResultRow(pWindowResInfo, i);
3616

3617
      setResultOutputBuf(pRuntimeEnv, pResult);
3618
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
3619
        int16_t functId = pQuery->pExpr1[j].base.functionId;
3620 3621 3622
        if (functId == TSDB_FUNC_TS) {
          continue;
        }
3623

3624
        aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
H
Haojun Liao 已提交
3625
        SResultRowCellInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3626

3627 3628 3629 3630
        toContinue |= (!pResInfo->complete);
      }
    }
  } else {
3631
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
3632
      int16_t functId = pQuery->pExpr1[j].base.functionId;
3633 3634 3635
      if (functId == TSDB_FUNC_TS) {
        continue;
      }
3636

3637
      aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
H
Haojun Liao 已提交
3638
      SResultRowCellInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3639

3640 3641 3642
      toContinue |= (!pResInfo->complete);
    }
  }
3643

3644 3645 3646
  return toContinue;
}

H
Haojun Liao 已提交
3647
static SQueryStatusInfo getQueryStatusInfo(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
3648
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3649
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3650

H
Haojun Liao 已提交
3651 3652
  assert((start <= pTableQueryInfo->lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (start >= pTableQueryInfo->lastKey && !QUERY_IS_ASC_QUERY(pQuery)));
3653

3654
  SQueryStatusInfo info = {
H
hjxilinx 已提交
3655
      .status      = pQuery->status,
3656
      .windowIndex = pRuntimeEnv->windowResInfo.curIndex,
H
Haojun Liao 已提交
3657
      .lastKey     = start,
3658
  };
3659

S
TD-1057  
Shengliang Guan 已提交
3660 3661 3662 3663 3664
  TIME_WINDOW_COPY(info.w, pQuery->window);
  TIME_WINDOW_COPY(info.curWindow, pTableQueryInfo->win);

  info.curWindow.skey = start;

3665 3666 3667
  return info;
}

3668 3669 3670 3671
static void setEnvBeforeReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQInfo *pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery *pQuery = pRuntimeEnv->pQuery;

H
Haojun Liao 已提交
3672 3673 3674 3675
  pStatus->cur = tsBufGetCursor(pRuntimeEnv->pTsBuf);  // save the cursor
  if (pRuntimeEnv->pTsBuf) {
    SWITCH_ORDER(pRuntimeEnv->pTsBuf->cur.order);
    bool ret = tsBufNextPos(pRuntimeEnv->pTsBuf);
3676
    assert(ret);
3677
  }
3678

3679
  // reverse order time range
3680 3681 3682
  pQuery->window = pStatus->curWindow;
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);

3683
  SWITCH_ORDER(pQuery->order.order);
3684 3685 3686 3687 3688 3689 3690

  if (QUERY_IS_ASC_QUERY(pQuery)) {
    assert(pQuery->window.skey <= pQuery->window.ekey);
  } else {
    assert(pQuery->window.skey >= pQuery->window.ekey);
  }

3691
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
H
Haojun Liao 已提交
3692
  STsdbQueryCond cond = createTsdbQueryCond(pQuery, &pQuery->window);
S
TD-1057  
Shengliang Guan 已提交
3693

H
Haojun Liao 已提交
3694 3695 3696 3697 3698
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
  disableFuncInReverseScan(pQInfo);
  setupQueryRangeForReverseScan(pQInfo);

3699 3700 3701 3702
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
3703

H
Haojun Liao 已提交
3704
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef);
B
Bomin Zhang 已提交
3705 3706 3707
  if (pRuntimeEnv->pSecQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
3708 3709
}

3710 3711
static void clearEnvAfterReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3712
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3713

3714 3715
  SWITCH_ORDER(pQuery->order.order);
  switchCtxOrder(pRuntimeEnv);
3716

H
Haojun Liao 已提交
3717 3718 3719
  tsBufSetCursor(pRuntimeEnv->pTsBuf, &pStatus->cur);
  if (pRuntimeEnv->pTsBuf) {
    pRuntimeEnv->pTsBuf->cur.order = pQuery->order.order;
3720
  }
3721

3722
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
3723

3724
  // update the pQuery->window.skey and pQuery->window.ekey to limit the scan scope of sliding query during reverse scan
H
hjxilinx 已提交
3725
  pTableQueryInfo->lastKey = pStatus->lastKey;
3726
  pQuery->status = pStatus->status;
3727

H
hjxilinx 已提交
3728
  pTableQueryInfo->win = pStatus->w;
3729
  pQuery->window = pTableQueryInfo->win;
3730 3731
}

H
Haojun Liao 已提交
3732 3733 3734 3735 3736 3737 3738
static void restoreTimeWindow(STableGroupInfo* pTableGroupInfo, STsdbQueryCond* pCond) {
  assert(pTableGroupInfo->numOfTables == 1);
  SArray* pTableKeyGroup = taosArrayGetP(pTableGroupInfo->pGroupList, 0);
  STableKeyInfo* pKeyInfo = taosArrayGet(pTableKeyGroup, 0);
  pKeyInfo->lastKey = pCond->twindow.skey;
}

3739
void scanOneTableDataBlocks(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
H
hjxilinx 已提交
3740
  SQInfo *pQInfo = (SQInfo *) GET_QINFO_ADDR(pRuntimeEnv);
3741
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3742
  STableQueryInfo *pTableQueryInfo = pQuery->current;
3743

3744
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
3745

3746
  // store the start query position
H
Haojun Liao 已提交
3747
  SQueryStatusInfo qstatus = getQueryStatusInfo(pRuntimeEnv, start);
3748

3749 3750
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
3751

3752 3753
  while (1) {
    doScanAllDataBlocks(pRuntimeEnv);
3754

3755 3756
    if (pRuntimeEnv->scanFlag == MASTER_SCAN) {
      qstatus.status = pQuery->status;
3757 3758 3759 3760

      // do nothing if no data blocks are found qualified during scan
      if (qstatus.lastKey != pTableQueryInfo->lastKey) {
        qstatus.curWindow.ekey = pTableQueryInfo->lastKey - step;
3761
      } else { // the lastkey does not increase, which means no data checked yet
H
Haojun Liao 已提交
3762
        qDebug("QInfo:%p no results generated in this scan", pQInfo);
3763
      }
H
Haojun Liao 已提交
3764 3765

      qstatus.lastKey = pTableQueryInfo->lastKey;
3766
    }
3767

3768
    if (!needScanDataBlocksAgain(pRuntimeEnv)) {
3769
      // restore the status code and jump out of loop
3770
      if (pRuntimeEnv->scanFlag == REPEAT_SCAN) {
3771
        pQuery->status = qstatus.status;
3772
      }
3773

3774 3775
      break;
    }
3776

3777 3778
    if (pRuntimeEnv->pSecQueryHandle != NULL) {
      tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
3779
    }
3780

H
Haojun Liao 已提交
3781
    STsdbQueryCond cond = createTsdbQueryCond(pQuery, &qstatus.curWindow);
H
Haojun Liao 已提交
3782
    restoreTimeWindow(&pQInfo->tableGroupInfo, &cond);
H
Haojun Liao 已提交
3783
    pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef);
B
Bomin Zhang 已提交
3784 3785 3786
    if (pRuntimeEnv->pSecQueryHandle == NULL) {
      longjmp(pRuntimeEnv->env, terrno);
    }
3787

3788
    pRuntimeEnv->windowResInfo.curIndex = qstatus.windowIndex;
3789 3790
    setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
    pRuntimeEnv->scanFlag = REPEAT_SCAN;
3791

3792
    qDebug("QInfo:%p start to repeat scan data blocks due to query func required, qrange:%"PRId64"-%"PRId64, pQInfo,
3793
        cond.twindow.skey, cond.twindow.ekey);
3794

3795
    // check if query is killed or not
3796
    if (isQueryKilled(pQInfo)) {
H
Haojun Liao 已提交
3797
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
3798 3799
    }
  }
3800

H
hjxilinx 已提交
3801
  if (!needReverseScan(pQuery)) {
3802 3803
    return;
  }
3804

3805
  setEnvBeforeReverseScan(pRuntimeEnv, &qstatus);
3806

3807
  // reverse scan from current position
3808
  qDebug("QInfo:%p start to reverse scan", pQInfo);
3809
  doScanAllDataBlocks(pRuntimeEnv);
3810 3811

  clearEnvAfterReverseScan(pRuntimeEnv, &qstatus);
3812 3813
}

H
hjxilinx 已提交
3814
void finalizeQueryResult(SQueryRuntimeEnv *pRuntimeEnv) {
3815
  SQuery *pQuery = pRuntimeEnv->pQuery;
3816

H
Haojun Liao 已提交
3817
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3818
    // for each group result, call the finalize function for each column
H
Haojun Liao 已提交
3819
    SResultRowInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
3820
    if (pRuntimeEnv->groupbyNormalCol) {
H
Haojun Liao 已提交
3821
      closeAllResultRows(pWindowResInfo);
3822
    }
3823

3824
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
H
Haojun Liao 已提交
3825
      SResultRow *buf = pWindowResInfo->pResult[i];
H
Haojun Liao 已提交
3826
      if (!isResultRowClosed(pWindowResInfo, i)) {
3827 3828
        continue;
      }
3829

3830
      setResultOutputBuf(pRuntimeEnv, buf);
3831

3832
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
3833
        aAggs[pQuery->pExpr1[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3834
      }
3835

3836 3837 3838 3839
      /*
       * set the number of output results for group by normal columns, the number of output rows usually is 1 except
       * the top and bottom query
       */
S
TD-1057  
Shengliang Guan 已提交
3840
      buf->numOfRows = (uint16_t)getNumOfResult(pRuntimeEnv);
3841
    }
3842

3843
  } else {
3844
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
3845
      aAggs[pQuery->pExpr1[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3846 3847 3848 3849 3850
    }
  }
}

static bool hasMainOutput(SQuery *pQuery) {
3851
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
3852
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
3853

3854 3855 3856 3857
    if (functionId != TSDB_FUNC_TS && functionId != TSDB_FUNC_TAG && functionId != TSDB_FUNC_TAGPRJ) {
      return true;
    }
  }
3858

3859 3860 3861
  return false;
}

H
Haojun Liao 已提交
3862
static STableQueryInfo *createTableQueryInfo(SQueryRuntimeEnv *pRuntimeEnv, void* pTable, STimeWindow win, void* buf) {
H
Haojun Liao 已提交
3863
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
3864

H
Haojun Liao 已提交
3865
  STableQueryInfo *pTableQueryInfo = buf;
3866

H
hjxilinx 已提交
3867 3868
  pTableQueryInfo->win = win;
  pTableQueryInfo->lastKey = win.skey;
3869

3870
  pTableQueryInfo->pTable = pTable;
3871
  pTableQueryInfo->cur.vgroupIndex = -1;
3872

H
Haojun Liao 已提交
3873 3874
  // set more initial size of interval/groupby query
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyNormalCol) {
H
Haojun Liao 已提交
3875
    int32_t initialSize = 128;
H
Haojun Liao 已提交
3876
    int32_t code = initResultRowInfo(&pTableQueryInfo->windowResInfo, initialSize, TSDB_DATA_TYPE_INT);
B
Bomin Zhang 已提交
3877 3878 3879
    if (code != TSDB_CODE_SUCCESS) {
      return NULL;
    }
H
Haojun Liao 已提交
3880
  } else { // in other aggregate query, do not initialize the windowResInfo
H
Haojun Liao 已提交
3881 3882
  }

3883 3884 3885
  return pTableQueryInfo;
}

H
Haojun Liao 已提交
3886
void destroyTableQueryInfoImpl(STableQueryInfo *pTableQueryInfo) {
3887 3888 3889
  if (pTableQueryInfo == NULL) {
    return;
  }
3890

H
Haojun Liao 已提交
3891
  tVariantDestroy(&pTableQueryInfo->tag);
H
Haojun Liao 已提交
3892
  cleanupResultRowInfo(&pTableQueryInfo->windowResInfo);
3893 3894 3895 3896 3897
}

/**
 * set output buffer for different group
 * @param pRuntimeEnv
3898
 * @param pDataBlockInfo
3899
 */
H
Haojun Liao 已提交
3900
void setExecutionContext(SQInfo *pQInfo, int32_t groupIndex, TSKEY nextKey) {
3901
  SQueryRuntimeEnv *pRuntimeEnv     = &pQInfo->runtimeEnv;
H
Haojun Liao 已提交
3902
  STableQueryInfo  *pTableQueryInfo = pRuntimeEnv->pQuery->current;
3903
  SResultRowInfo   *pWindowResInfo  = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
3904

H
Haojun Liao 已提交
3905 3906
  // lastKey needs to be updated
  pTableQueryInfo->lastKey = nextKey;
H
Haojun Liao 已提交
3907

H
Haojun Liao 已提交
3908
  if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTsBuf != NULL) {
H
Haojun Liao 已提交
3909 3910
    setAdditionalInfo(pQInfo, pTableQueryInfo->pTable, pTableQueryInfo);
  }
H
Haojun Liao 已提交
3911

H
Haojun Liao 已提交
3912 3913 3914
  if (pRuntimeEnv->prevGroupId != INT32_MIN && pRuntimeEnv->prevGroupId == groupIndex) {
    return;
  }
3915

3916
  int64_t uid = 0;
H
Haojun Liao 已提交
3917
  SResultRow *pResultRow = doPrepareResultRowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&groupIndex,
H
Haojun Liao 已提交
3918
      sizeof(groupIndex), true, uid);
3919
  assert (pResultRow != NULL);
3920

3921 3922 3923 3924
  /*
   * not assign result buffer yet, add new result buffer
   * all group belong to one result set, and each group result has different group id so set the id to be one
   */
H
Haojun Liao 已提交
3925 3926
  if (pResultRow->pageId == -1) {
    if (addNewWindowResultBuf(pResultRow, pRuntimeEnv->pResultBuf, groupIndex, pRuntimeEnv->numOfRowsPerPage) !=
3927 3928 3929 3930
        TSDB_CODE_SUCCESS) {
      return;
    }
  }
3931

H
Haojun Liao 已提交
3932 3933
  // record the current active group id
  pRuntimeEnv->prevGroupId = groupIndex;
H
Haojun Liao 已提交
3934
  setResultOutputBuf(pRuntimeEnv, pResultRow);
3935 3936 3937
  initCtxOutputBuf(pRuntimeEnv);
}

3938
void setResultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SResultRow *pResult) {
3939
  SQuery *pQuery = pRuntimeEnv->pQuery;
3940

3941
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
3942
  tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pageId);
H
Haojun Liao 已提交
3943

3944
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3945
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
H
Haojun Liao 已提交
3946
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult, page);
3947

H
Haojun Liao 已提交
3948
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
3949 3950 3951
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3952

3953
    /*
3954
     * set the output buffer information and intermediate buffer,
3955 3956
     * not all queries require the interResultBuf, such as COUNT
     */
3957
    pCtx->resultInfo = getResultCell(pRuntimeEnv, pResult, i);
3958 3959 3960
  }
}

H
Haojun Liao 已提交
3961
void setResultRowOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SResultRow *pResult) {
H
Haojun Liao 已提交
3962
  SQuery *pQuery = pRuntimeEnv->pQuery;
3963

H
Haojun Liao 已提交
3964
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
3965
  tFilePage* bufPage = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pageId);
H
Haojun Liao 已提交
3966

H
Haojun Liao 已提交
3967 3968 3969
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];

3970
    pCtx->resultInfo = getResultCell(pRuntimeEnv, pResult, i);
H
Haojun Liao 已提交
3971
    if (pCtx->resultInfo->initialized && pCtx->resultInfo->complete) {
H
Haojun Liao 已提交
3972 3973
      continue;
    }
3974

H
Haojun Liao 已提交
3975
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult, bufPage);
H
Haojun Liao 已提交
3976
    pCtx->currentStage = 0;
3977

H
Haojun Liao 已提交
3978 3979 3980 3981
    int32_t functionId = pCtx->functionId;
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3982

H
Haojun Liao 已提交
3983 3984 3985 3986 3987 3988
    if (!pCtx->resultInfo->initialized) {
      aAggs[functionId].init(pCtx);
    }
  }
}

3989
int32_t setAdditionalInfo(SQInfo *pQInfo, void* pTable, STableQueryInfo *pTableQueryInfo) {
3990
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
3991

3992
  setTagVal(pRuntimeEnv, pTable, pQInfo->tsdb);
3993

3994
  // both the master and supplement scan needs to set the correct ts comp start position
H
Haojun Liao 已提交
3995
  if (pRuntimeEnv->pTsBuf != NULL) {
H
Haojun Liao 已提交
3996 3997
    tVariant* pTag = &pRuntimeEnv->pCtx[0].tag;

H
Haojun Liao 已提交
3998 3999
    if (pTableQueryInfo->cur.vgroupIndex == -1) {
      tVariantAssign(&pTableQueryInfo->tag, pTag);
4000

H
Haojun Liao 已提交
4001
      STSElem elem = tsBufGetElemStartPos(pRuntimeEnv->pTsBuf, pQInfo->vgId, &pTableQueryInfo->tag);
H
Haojun Liao 已提交
4002

H
Haojun Liao 已提交
4003
      // failed to find data with the specified tag value and vnodeId
4004
      if (!tsBufIsValidElem(&elem)) {
H
Haojun Liao 已提交
4005 4006 4007
        if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
          qError("QInfo:%p failed to find tag:%s in ts_comp", pQInfo, pTag->pz);
        } else {
H
Haojun Liao 已提交
4008
          qError("QInfo:%p failed to find tag:%" PRId64 " in ts_comp", pQInfo, pTag->i64Key);
H
Haojun Liao 已提交
4009 4010 4011 4012 4013
        }

        return false;
      }

H
Haojun Liao 已提交
4014
      // keep the cursor info of current meter
H
Haojun Liao 已提交
4015
      pTableQueryInfo->cur = tsBufGetCursor(pRuntimeEnv->pTsBuf);
H
Haojun Liao 已提交
4016
      if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
H
Haojun Liao 已提交
4017
        qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex);
H
Haojun Liao 已提交
4018
      } else {
H
Haojun Liao 已提交
4019
        qDebug("QInfo:%p find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex);
H
Haojun Liao 已提交
4020 4021
      }

H
Haojun Liao 已提交
4022
    } else {
H
Haojun Liao 已提交
4023
      tsBufSetCursor(pRuntimeEnv->pTsBuf, &pTableQueryInfo->cur);
H
Haojun Liao 已提交
4024 4025

      if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
H
Haojun Liao 已提交
4026
        qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex);
H
Haojun Liao 已提交
4027
      } else {
H
Haojun Liao 已提交
4028
        qDebug("QInfo:%p find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex);
H
Haojun Liao 已提交
4029
      }
4030 4031
    }
  }
4032

4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044
  return 0;
}

/*
 * There are two cases to handle:
 *
 * 1. Query range is not set yet (queryRangeSet = 0). we need to set the query range info, including pQuery->lastKey,
 *    pQuery->window.skey, and pQuery->eKey.
 * 2. Query range is set and query is in progress. There may be another result with the same query ranges to be
 *    merged during merge stage. In this case, we need the pTableQueryInfo->lastResRows to decide if there
 *    is a previous result generated or not.
 */
H
hjxilinx 已提交
4045
void setIntervalQueryRange(SQInfo *pQInfo, TSKEY key) {
4046 4047
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
4048
  STableQueryInfo *pTableQueryInfo = pQuery->current;
4049

4050 4051 4052
  if (pTableQueryInfo->queryRangeSet) {
    pTableQueryInfo->lastKey = key;
  } else {
4053
    pTableQueryInfo->win.skey = key;
4054
    STimeWindow win = {.skey = key, .ekey = pQuery->window.ekey};
4055

4056 4057 4058 4059 4060
    // for too small query range, no data in this interval.
    if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey < pQuery->window.skey)) ||
        (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey < pQuery->window.ekey))) {
      return;
    }
4061

4062 4063 4064
    /**
     * In handling the both ascending and descending order super table query, we need to find the first qualified
     * timestamp of this table, and then set the first qualified start timestamp.
4065
     * In ascending query, the key is the first qualified timestamp. However, in the descending order query, additional
4066 4067
     * operations involve.
     */
H
Haojun Liao 已提交
4068
    STimeWindow     w = TSWINDOW_INITIALIZER;
H
Haojun Liao 已提交
4069
    SResultRowInfo *pWindowResInfo = &pTableQueryInfo->windowResInfo;
4070

H
Haojun Liao 已提交
4071 4072
    TSKEY sk = MIN(win.skey, win.ekey);
    TSKEY ek = MAX(win.skey, win.ekey);
H
Haojun Liao 已提交
4073
    getAlignQueryTimeWindow(pQuery, win.skey, sk, ek, &w);
4074

4075 4076
    if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
4077
        assert(win.ekey == pQuery->window.ekey);
4078
      }
4079

4080
      pWindowResInfo->prevSKey = w.skey;
4081
    }
4082

4083
    pTableQueryInfo->queryRangeSet = 1;
4084
    pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
4085 4086 4087 4088
  }
}

bool requireTimestamp(SQuery *pQuery) {
4089
  for (int32_t i = 0; i < pQuery->numOfOutput; i++) {
H
Haojun Liao 已提交
4090
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103
    if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_NEED_TS) != 0) {
      return true;
    }
  }
  return false;
}

bool needPrimaryTimestampCol(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo) {
  /*
   * 1. if skey or ekey locates in this block, we need to load the timestamp column to decide the precise position
   * 2. if there are top/bottom, first_dst/last_dst functions, we need to load timestamp column in any cases;
   */
  STimeWindow *w = &pDataBlockInfo->window;
H
hjxilinx 已提交
4104
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4105

H
hjxilinx 已提交
4106
  bool loadPrimaryTS = (pTableQueryInfo->lastKey >= w->skey && pTableQueryInfo->lastKey <= w->ekey) ||
4107 4108
                       (pQuery->window.ekey >= w->skey && pQuery->window.ekey <= w->ekey) || requireTimestamp(pQuery);

4109 4110 4111
  return loadPrimaryTS;
}

4112
static int32_t doCopyToSData(SQInfo *pQInfo, SResultRow **pRows, int32_t numOfRows, int32_t *index, int32_t orderType) {
4113 4114
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4115

4116
  int32_t numOfResult = 0;
4117
  int32_t start = 0;
4118
  int32_t step = -1;
4119

4120
  qDebug("QInfo:%p start to copy data from windowResInfo to query buf", pQInfo);
4121
  if (orderType == TSDB_ORDER_ASC) {
4122
    start = (*index);
4123 4124
    step = 1;
  } else {  // desc order copy all data
4125
    start = numOfRows - (*index) - 1;
4126 4127
    step = -1;
  }
4128

H
Haojun Liao 已提交
4129 4130
  SGroupResInfo* pGroupResInfo = &pQInfo->groupResInfo;

4131 4132 4133
  for (int32_t i = start; (i < numOfRows) && (i >= 0); i += step) {
    if (pRows[i]->numOfRows == 0) {
      (*index) += 1;
4134
      pGroupResInfo->rowId = 0;
4135 4136
      continue;
    }
4137

4138
    int32_t numOfRowsToCopy = pRows[i]->numOfRows - pGroupResInfo->rowId;
4139
    int32_t oldOffset = pGroupResInfo->rowId;
4140

4141
    /*
H
Haojun Liao 已提交
4142 4143
     * current output space is not enough to accommodate all data of this page, only partial results
     * will be copied to SQuery object's result buffer
4144
     */
4145
    if (numOfRowsToCopy > pQuery->rec.capacity - numOfResult) {
H
Haojun Liao 已提交
4146
      numOfRowsToCopy = (int32_t) pQuery->rec.capacity - numOfResult;
4147
      pGroupResInfo->rowId += numOfRowsToCopy;
4148
    } else {
4149
      pGroupResInfo->rowId = 0;
4150
      (*index) += 1;
4151
    }
4152

4153
    tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pRows[i]->pageId);
H
Haojun Liao 已提交
4154

4155
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
4156
      int32_t size = pRuntimeEnv->pCtx[j].outputBytes;
4157

4158
      char *out = pQuery->sdata[j]->data + numOfResult * size;
4159
      char *in  = getPosInResultPage(pRuntimeEnv, j, pRows[i], page);
4160 4161
      memcpy(out, in + oldOffset * size, size * numOfRowsToCopy);
    }
4162

4163
    numOfResult += numOfRowsToCopy;
4164 4165 4166
    if (numOfResult == pQuery->rec.capacity) {
      break;
    }
4167
  }
4168

4169
  qDebug("QInfo:%p copy data to query buf completed", pQInfo);
4170 4171

#ifdef _DEBUG_VIEW
4172
  displayInterResult(pQuery->sdata, pRuntimeEnv, numOfResult);
4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185
#endif
  return numOfResult;
}

/**
 * copyFromWindowResToSData support copy data in ascending/descending order
 * For interval query of both super table and table, copy the data in ascending order, since the output results are
 * ordered in SWindowResutl already. While handling the group by query for both table and super table,
 * all group result are completed already.
 *
 * @param pQInfo
 * @param result
 */
H
Haojun Liao 已提交
4186
void copyFromWindowResToSData(SQInfo *pQInfo, SResultRowInfo *pResultInfo) {
4187
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
4188

4189
  int32_t orderType = (pQuery->pGroupbyExpr != NULL) ? pQuery->pGroupbyExpr->orderType : TSDB_ORDER_ASC;
4190
  int32_t numOfResult = doCopyToSData(pQInfo, pResultInfo->pResult, pResultInfo->size, &pQInfo->groupIndex, orderType);
4191

4192 4193
  pQuery->rec.rows += numOfResult;
  assert(pQuery->rec.rows <= pQuery->rec.capacity);
4194 4195
}

H
Haojun Liao 已提交
4196
static void updateWindowResNumOfRes(SQueryRuntimeEnv *pRuntimeEnv) {
4197
  SQuery *pQuery = pRuntimeEnv->pQuery;
4198

4199
  // update the number of result for each, only update the number of rows for the corresponding window result.
H
Haojun Liao 已提交
4200 4201 4202
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
    return;
  }
4203

H
Haojun Liao 已提交
4204
  for (int32_t i = 0; i < pRuntimeEnv->windowResInfo.size; ++i) {
H
Haojun Liao 已提交
4205
    SResultRow *pResult = pRuntimeEnv->windowResInfo.pResult[i];
4206

H
Haojun Liao 已提交
4207 4208 4209 4210
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
      int32_t functionId = pRuntimeEnv->pCtx[j].functionId;
      if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
        continue;
4211
      }
H
Haojun Liao 已提交
4212

4213 4214
      SResultRowCellInfo* pCell = getResultCell(pRuntimeEnv, pResult, j);
      pResult->numOfRows = (uint16_t)(MAX(pResult->numOfRows, pCell->numOfRes));
4215 4216 4217 4218
    }
  }
}

H
Haojun Liao 已提交
4219
static void stableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo, SDataStatis *pStatis,
4220
    SArray *pDataBlock, __block_search_fn_t searchFn) {
4221
  SQuery *         pQuery = pRuntimeEnv->pQuery;
4222
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4223

4224
  SResultRowInfo * pResultRowInfo = &pTableQueryInfo->windowResInfo;
H
hjxilinx 已提交
4225
  pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : pDataBlockInfo->rows - 1;
4226

H
Haojun Liao 已提交
4227
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTsBuf != NULL || pRuntimeEnv->groupbyNormalCol) {
4228
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pResultRowInfo, pDataBlock);
4229
  } else {
4230
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pResultRowInfo, searchFn, pDataBlock);
4231
  }
H
Haojun Liao 已提交
4232 4233

  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
4234
    updateResultRowIndex(pResultRowInfo, pTableQueryInfo, QUERY_IS_ASC_QUERY(pQuery));
H
Haojun Liao 已提交
4235
  }
4236 4237
}

H
Haojun Liao 已提交
4238
bool queryHasRemainResForTableQuery(SQueryRuntimeEnv* pRuntimeEnv) {
4239 4240
  SQuery *pQuery = pRuntimeEnv->pQuery;
  SFillInfo *pFillInfo = pRuntimeEnv->pFillInfo;
4241

H
Haojun Liao 已提交
4242
  if (pQuery->limit.limit > 0 && pQuery->rec.total >= pQuery->limit.limit) {
4243 4244
    return false;
  }
4245

4246
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
H
Haojun Liao 已提交
4247
    // There are results not returned to client yet, so filling applied to the remain result is required firstly.
H
Haojun Liao 已提交
4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262
    int32_t remain = taosNumOfRemainRows(pFillInfo);
    if (remain > 0) {
      return true;
    }

    /*
     * While the code reaches here, there are no results remains now.
     * If query is not completed yet, the gaps between two results blocks need to be handled after next data block
     * is retrieved from TSDB.
     *
     * NOTE: If the result set is not the first block, the gap in front of the result set will be filled. If the result
     * set is the FIRST result block, the gap between the start time of query time window and the timestamp of the
     * first result row in the actual result set will fill nothing.
     */
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
H
Haojun Liao 已提交
4263
      int32_t numOfTotal = (int32_t)getNumOfResWithFill(pFillInfo, pQuery->window.ekey, (int32_t)pQuery->rec.capacity);
H
Haojun Liao 已提交
4264 4265 4266 4267 4268 4269
      return numOfTotal > 0;
    }

  } else {
    // there are results waiting for returned to client.
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED) &&
H
Haojun Liao 已提交
4270
        (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) &&
H
Haojun Liao 已提交
4271 4272 4273
        (pRuntimeEnv->windowResInfo.size > 0)) {
      return true;
    }
4274
  }
4275 4276

  return false;
4277 4278
}

H
Haojun Liao 已提交
4279 4280 4281 4282
static int16_t getNumOfFinalResCol(SQuery* pQuery) {
  return pQuery->pExpr2 == NULL? pQuery->numOfOutput:pQuery->numOfExpr2;
}

4283
static void doCopyQueryResultToMsg(SQInfo *pQInfo, int32_t numOfRows, char *data) {
4284
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
4285

H
Haojun Liao 已提交
4286 4287
  if (pQuery->pExpr2 == NULL) {
    for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
H
Haojun Liao 已提交
4288
      int32_t bytes = pQuery->pExpr1[col].bytes;
4289

H
Haojun Liao 已提交
4290 4291 4292 4293 4294 4295 4296 4297 4298 4299
      memmove(data, pQuery->sdata[col]->data, bytes * numOfRows);
      data += bytes * numOfRows;
    }
  } else {
    for (int32_t col = 0; col < pQuery->numOfExpr2; ++col) {
      int32_t bytes = pQuery->pExpr2[col].bytes;

      memmove(data, pQuery->sdata[col]->data, bytes * numOfRows);
      data += bytes * numOfRows;
    }
4300
  }
4301

H
Haojun Liao 已提交
4302
  int32_t numOfTables = (int32_t) taosHashGetSize(pQInfo->arrTableIdInfo);
weixin_48148422's avatar
weixin_48148422 已提交
4303 4304
  *(int32_t*)data = htonl(numOfTables);
  data += sizeof(int32_t);
4305

4306
  int32_t total = 0;
4307
  STableIdInfo* item = taosHashIterate(pQInfo->arrTableIdInfo, NULL);
4308

4309
  while(item) {
weixin_48148422's avatar
weixin_48148422 已提交
4310
    STableIdInfo* pDst = (STableIdInfo*)data;
4311 4312 4313 4314
    pDst->uid = htobe64(item->uid);
    pDst->tid = htonl(item->tid);
    pDst->key = htobe64(item->key);

weixin_48148422's avatar
weixin_48148422 已提交
4315
    data += sizeof(STableIdInfo);
4316 4317 4318
    total++;

    qDebug("QInfo:%p set subscribe info, tid:%d, uid:%"PRIu64", skey:%"PRId64, pQInfo, item->tid, item->uid, item->key);
4319
    item = taosHashIterate(pQInfo->arrTableIdInfo, item);
weixin_48148422's avatar
weixin_48148422 已提交
4320 4321
  }

4322 4323
  qDebug("QInfo:%p set %d subscribe info", pQInfo, total);

H
Haojun Liao 已提交
4324
  // Check if query is completed or not for stable query or normal table query respectively.
H
hjxilinx 已提交
4325
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4326
    if (pQInfo->runtimeEnv.stableQuery) {
H
Haojun Liao 已提交
4327
      if (IS_STASBLE_QUERY_OVER(pQInfo)) {
4328 4329 4330
        setQueryStatus(pQuery, QUERY_OVER);
      }
    } else {
H
Haojun Liao 已提交
4331
      if (!queryHasRemainResForTableQuery(&pQInfo->runtimeEnv)) {
4332 4333
        setQueryStatus(pQuery, QUERY_OVER);
      }
4334
    }
H
hjxilinx 已提交
4335
  }
4336 4337
}

H
Haojun Liao 已提交
4338
int32_t doFillGapsInResults(SQueryRuntimeEnv* pRuntimeEnv, tFilePage **pDst, int32_t *numOfFilled) {
4339
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
4340
  SQuery *pQuery = pRuntimeEnv->pQuery;
4341
  SFillInfo* pFillInfo = pRuntimeEnv->pFillInfo;
4342

4343
  while (1) {
H
Haojun Liao 已提交
4344
    int32_t ret = (int32_t)taosFillResultDataBlock(pFillInfo, (tFilePage**)pQuery->sdata, (int32_t)pQuery->rec.capacity);
4345

4346
    // todo apply limit output function
4347 4348
    /* reached the start position of according to offset value, return immediately */
    if (pQuery->limit.offset == 0) {
4349
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows", pQInfo, pFillInfo->numOfRows, ret);
4350 4351
      return ret;
    }
4352

4353
    if (pQuery->limit.offset < ret) {
4354
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%" PRId64 ". Discard due to offset, remain:%" PRId64 ", new offset:%d",
4355
             pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, ret - pQuery->limit.offset, 0);
4356

S
TD-1057  
Shengliang Guan 已提交
4357
      ret -= (int32_t)pQuery->limit.offset;
4358
      // todo !!!!there exactly number of interpo is not valid.
4359
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
4360 4361
        memmove(pDst[i]->data, pDst[i]->data + pQuery->pExpr1[i].bytes * pQuery->limit.offset,
                ret * pQuery->pExpr1[i].bytes);
4362
      }
4363

4364 4365 4366
      pQuery->limit.offset = 0;
      return ret;
    } else {
4367
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%" PRId64 ". Discard due to offset, "
B
Bomin Zhang 已提交
4368
             "remain:%d, new offset:%" PRId64, pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, 0,
4369
          pQuery->limit.offset - ret);
4370

4371
      pQuery->limit.offset -= ret;
4372
      pQuery->rec.rows = 0;
4373 4374
      ret = 0;
    }
4375

H
Haojun Liao 已提交
4376
    if (!queryHasRemainResForTableQuery(pRuntimeEnv)) {
4377 4378 4379 4380 4381
      return ret;
    }
  }
}

4382
static void queryCostStatis(SQInfo *pQInfo) {
4383
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4384
  SQueryCostInfo *pSummary = &pRuntimeEnv->summary;
H
Haojun Liao 已提交
4385

H
Haojun Liao 已提交
4386
  uint64_t hashSize = taosHashGetMemSize(pQInfo->runtimeEnv.pResultRowHashTable);
H
Haojun Liao 已提交
4387 4388 4389
  hashSize += taosHashGetMemSize(pQInfo->tableqinfoGroupInfo.map);
  pSummary->hashSize = hashSize;

H
Haojun Liao 已提交
4390 4391 4392
  // add the merge time
  pSummary->elapsedTime += pSummary->firstStageMergeTime;

H
Haojun Liao 已提交
4393 4394 4395
  SResultRowPool* p = pQInfo->runtimeEnv.pool;
  pSummary->winInfoSize = getResultRowPoolMemSize(p);
  pSummary->numOfTimeWindows = getNumOfAllocatedResultRows(p);
4396

H
Haojun Liao 已提交
4397 4398 4399
  qDebug("QInfo:%p :cost summary: elapsed time:%"PRId64" us, first merge:%"PRId64" us, total blocks:%d, "
         "load block statis:%d, load data block:%d, total rows:%"PRId64 ", check rows:%"PRId64,
         pQInfo, pSummary->elapsedTime, pSummary->firstStageMergeTime, pSummary->totalBlocks, pSummary->loadBlockStatis,
H
Haojun Liao 已提交
4400
         pSummary->loadBlocks, pSummary->totalRows, pSummary->totalCheckedRows);
4401

4402
  qDebug("QInfo:%p :cost summary: winResPool size:%.2f Kb, numOfWin:%"PRId64", tableInfoSize:%.2f Kb, hashTable:%.2f Kb", pQInfo, pSummary->winInfoSize/1024.0,
H
Haojun Liao 已提交
4403
      pSummary->numOfTimeWindows, pSummary->tableInfoSize/1024.0, pSummary->hashSize/1024.0);
4404 4405
}

4406 4407
static void updateOffsetVal(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pBlockInfo) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
4408
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4409

4410
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
4411

4412
  if (pQuery->limit.offset == pBlockInfo->rows) {  // current block will ignore completed
H
hjxilinx 已提交
4413
    pTableQueryInfo->lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->window.ekey + step : pBlockInfo->window.skey + step;
4414 4415 4416
    pQuery->limit.offset = 0;
    return;
  }
4417

4418
  if (QUERY_IS_ASC_QUERY(pQuery)) {
S
TD-1057  
Shengliang Guan 已提交
4419
    pQuery->pos = (int32_t)pQuery->limit.offset;
4420
  } else {
S
TD-1057  
Shengliang Guan 已提交
4421
    pQuery->pos = pBlockInfo->rows - (int32_t)pQuery->limit.offset - 1;
4422
  }
4423

4424
  assert(pQuery->pos >= 0 && pQuery->pos <= pBlockInfo->rows - 1);
4425

4426
  SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
4427
  SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);
4428

4429
  // update the pQuery->limit.offset value, and pQuery->pos value
H
Haojun Liao 已提交
4430
  TSKEY *keys = (TSKEY *) pColInfoData->pData;
4431 4432

  // update the offset value
H
hjxilinx 已提交
4433
  pTableQueryInfo->lastKey = keys[pQuery->pos];
4434
  pQuery->limit.offset = 0;
4435

H
hjxilinx 已提交
4436
  int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, pBlockInfo, NULL, binarySearchForKey, pDataBlock);
4437

4438
  qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
4439
         pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows, numOfRes, pQuery->current->lastKey);
4440
}
4441

4442 4443 4444 4445 4446
void skipBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;

  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0) {
    return;
4447
  }
4448

4449 4450 4451
  pQuery->pos = 0;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

H
hjxilinx 已提交
4452
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4453
  TsdbQueryHandleT pQueryHandle = pRuntimeEnv->pQueryHandle;
4454

H
Haojun Liao 已提交
4455
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4456
  while (tsdbNextDataBlock(pQueryHandle)) {
4457
    if (isQueryKilled(GET_QINFO_ADDR(pRuntimeEnv))) {
H
Haojun Liao 已提交
4458
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4459
    }
4460

H
Haojun Liao 已提交
4461
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
4462

4463 4464
    if (pQuery->limit.offset > blockInfo.rows) {
      pQuery->limit.offset -= blockInfo.rows;
H
hjxilinx 已提交
4465 4466
      pTableQueryInfo->lastKey = (QUERY_IS_ASC_QUERY(pQuery)) ? blockInfo.window.ekey : blockInfo.window.skey;
      pTableQueryInfo->lastKey += step;
4467

4468
      qDebug("QInfo:%p skip rows:%d, offset:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv), blockInfo.rows,
4469 4470
             pQuery->limit.offset);
    } else {  // find the appropriated start position in current block
4471 4472 4473
      updateOffsetVal(pRuntimeEnv, &blockInfo);
      break;
    }
4474
  }
H
Haojun Liao 已提交
4475 4476 4477 4478

  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }
4479
}
4480

H
Haojun Liao 已提交
4481 4482
static TSKEY doSkipIntervalProcess(SQueryRuntimeEnv* pRuntimeEnv, STimeWindow* win, SDataBlockInfo* pBlockInfo, STableQueryInfo* pTableQueryInfo) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
4483
  SResultRowInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530

  assert(pQuery->limit.offset == 0);
  STimeWindow tw = *win;
  getNextTimeWindow(pQuery, &tw);

  if ((tw.skey <= pBlockInfo->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (tw.ekey >= pBlockInfo->window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {

    // load the data block and check data remaining in current data block
    // TODO optimize performance
    SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
    SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

    tw = *win;
    int32_t startPos =
        getNextQualifiedWindow(pRuntimeEnv, &tw, pBlockInfo, pColInfoData->pData, binarySearchForKey, -1);
    assert(startPos >= 0);

    // set the abort info
    pQuery->pos = startPos;

    // reset the query start timestamp
    pTableQueryInfo->win.skey = ((TSKEY *)pColInfoData->pData)[startPos];
    pQuery->window.skey = pTableQueryInfo->win.skey;
    TSKEY key = pTableQueryInfo->win.skey;

    pWindowResInfo->prevSKey = tw.skey;
    int32_t index = pRuntimeEnv->windowResInfo.curIndex;

    int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, pBlockInfo, NULL, binarySearchForKey, pDataBlock);
    pRuntimeEnv->windowResInfo.curIndex = index;  // restore the window index

    qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%" PRId64,
           GET_QINFO_ADDR(pRuntimeEnv), pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows, numOfRes,
           pQuery->current->lastKey);

    return key;
  } else {  // do nothing
    pQuery->window.skey = tw.skey;
    pWindowResInfo->prevSKey = tw.skey;

    return tw.skey;
  }

  return true;
}

H
Haojun Liao 已提交
4531
static bool skipTimeInterval(SQueryRuntimeEnv *pRuntimeEnv, TSKEY* start) {
4532
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549

  // get the first unclosed time window
  bool assign = false;
  for(int32_t i = 0; i < pRuntimeEnv->windowResInfo.size; ++i) {
    if (pRuntimeEnv->windowResInfo.pResult[i]->closed) {
      continue;
    }

    assign = true;
    *start = pRuntimeEnv->windowResInfo.pResult[i]->win.skey;
  }

  if (!assign) {
    *start = pQuery->current->lastKey;
  }

  assert(*start <= pQuery->current->lastKey);
4550

4551
  // if queried with value filter, do NOT forward query start position
H
Haojun Liao 已提交
4552
  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTsBuf != NULL || pRuntimeEnv->pFillInfo != NULL) {
4553
    return true;
4554
  }
4555

4556
  /*
4557 4558
   * 1. for interval without interpolation query we forward pQuery->interval.interval at a time for
   *    pQuery->limit.offset times. Since hole exists, pQuery->interval.interval*pQuery->limit.offset value is
4559 4560
   *    not valid. otherwise, we only forward pQuery->limit.offset number of points
   */
4561
  assert(pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL);
4562

H
Haojun Liao 已提交
4563
  STimeWindow w = TSWINDOW_INITIALIZER;
4564

H
Haojun Liao 已提交
4565
  SResultRowInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
hjxilinx 已提交
4566
  STableQueryInfo *pTableQueryInfo = pQuery->current;
4567

H
Haojun Liao 已提交
4568
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4569
  while (tsdbNextDataBlock(pRuntimeEnv->pQueryHandle)) {
H
Haojun Liao 已提交
4570
    tsdbRetrieveDataBlockInfo(pRuntimeEnv->pQueryHandle, &blockInfo);
4571

H
Haojun Liao 已提交
4572 4573
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
H
Haojun Liao 已提交
4574
        getAlignQueryTimeWindow(pQuery, blockInfo.window.skey, blockInfo.window.skey, pQuery->window.ekey, &w);
H
Haojun Liao 已提交
4575 4576
        pWindowResInfo->prevSKey = w.skey;
      }
4577
    } else {
H
Haojun Liao 已提交
4578
      getAlignQueryTimeWindow(pQuery, blockInfo.window.ekey, pQuery->window.ekey, blockInfo.window.ekey, &w);
4579 4580
      pWindowResInfo->prevSKey = w.skey;
    }
4581

4582 4583
    // the first time window
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, pWindowResInfo->prevSKey, pQuery);
4584

4585
    while (pQuery->limit.offset > 0) {
H
Haojun Liao 已提交
4586 4587
      STimeWindow tw = win;

4588 4589 4590 4591
      if ((win.ekey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (win.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        pQuery->limit.offset -= 1;
        pWindowResInfo->prevSKey = win.skey;
H
Haojun Liao 已提交
4592
      }
4593

H
Haojun Liao 已提交
4594 4595
      // current window does not ended in current data block, try next data block
      getNextTimeWindow(pQuery, &tw);
4596
      if (pQuery->limit.offset == 0) {
H
Haojun Liao 已提交
4597 4598
        *start = doSkipIntervalProcess(pRuntimeEnv, &win, &blockInfo, pTableQueryInfo);
        return true;
4599 4600
      }

H
Haojun Liao 已提交
4601 4602 4603 4604 4605 4606 4607
      /*
       * If the next time window still starts from current data block,
       * load the primary timestamp column first, and then find the start position for the next queried time window.
       * Note that only the primary timestamp column is required.
       * TODO: Optimize for this cases. All data blocks are not needed to be loaded, only if the first actually required
       * time window resides in current data block.
       */
4608 4609 4610 4611 4612
      if ((tw.skey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (tw.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
        SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

H
Haojun Liao 已提交
4613 4614 4615 4616 4617 4618
        if ((win.ekey > blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
            (win.ekey < blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
          pQuery->limit.offset -= 1;
        }

        if (pQuery->limit.offset == 0) {
H
Haojun Liao 已提交
4619 4620
          *start = doSkipIntervalProcess(pRuntimeEnv, &win, &blockInfo, pTableQueryInfo);
          return true;
H
Haojun Liao 已提交
4621 4622 4623 4624 4625
        } else {
          tw = win;
          int32_t startPos =
              getNextQualifiedWindow(pRuntimeEnv, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey, -1);
          assert(startPos >= 0);
4626

H
Haojun Liao 已提交
4627 4628 4629 4630 4631 4632
          // set the abort info
          pQuery->pos = startPos;
          pTableQueryInfo->lastKey = ((TSKEY *)pColInfoData->pData)[startPos];
          pWindowResInfo->prevSKey = tw.skey;
          win = tw;
        }
4633
      } else {
H
Haojun Liao 已提交
4634
        break;  // offset is not 0, and next time window begins or ends in the next block.
4635 4636 4637
      }
    }
  }
4638

H
Haojun Liao 已提交
4639 4640 4641 4642 4643
  // check for error
  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }

4644 4645 4646
  return true;
}

H
Haojun Liao 已提交
4647 4648
static void doDestroyTableQueryInfo(STableGroupInfo* pTableqinfoGroupInfo);

B
Bomin Zhang 已提交
4649
static int32_t setupQueryHandle(void* tsdb, SQInfo* pQInfo, bool isSTableQuery) {
B
Bomin Zhang 已提交
4650
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4651 4652
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

B
Bomin Zhang 已提交
4653
  if (onlyQueryTags(pQuery)) {
B
Bomin Zhang 已提交
4654
    return TSDB_CODE_SUCCESS;
B
Bomin Zhang 已提交
4655 4656
  }

H
Haojun Liao 已提交
4657
  if (isSTableQuery && (!QUERY_IS_INTERVAL_QUERY(pQuery)) && (!isFixedOutputQuery(pRuntimeEnv))) {
B
Bomin Zhang 已提交
4658
    return TSDB_CODE_SUCCESS;
B
Bomin Zhang 已提交
4659
  }
4660

H
Haojun Liao 已提交
4661
  STsdbQueryCond cond = createTsdbQueryCond(pQuery, &pQuery->window);
S
TD-1057  
Shengliang Guan 已提交
4662

B
Bomin Zhang 已提交
4663
  if (!isSTableQuery
4664
    && (pQInfo->tableqinfoGroupInfo.numOfTables == 1)
4665
    && (cond.order == TSDB_ORDER_ASC)
H
Haojun Liao 已提交
4666
    && (!QUERY_IS_INTERVAL_QUERY(pQuery))
B
Bomin Zhang 已提交
4667
    && (!isGroupbyNormalCol(pQuery->pGroupbyExpr))
H
Haojun Liao 已提交
4668
    && (!isFixedOutputQuery(pRuntimeEnv))
B
Bomin Zhang 已提交
4669
  ) {
H
Haojun Liao 已提交
4670
    SArray* pa = GET_TABLEGROUP(pQInfo, 0);
4671 4672
    STableQueryInfo* pCheckInfo = taosArrayGetP(pa, 0);
    cond.twindow = pCheckInfo->win;
4673
  }
B
Bomin Zhang 已提交
4674

B
Bomin Zhang 已提交
4675
  terrno = TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
4676
  if (isFirstLastRowQuery(pQuery)) {
H
Haojun Liao 已提交
4677
    pRuntimeEnv->pQueryHandle = tsdbQueryLastRow(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef);
H
Haojun Liao 已提交
4678

H
Haojun Liao 已提交
4679 4680
    // update the query time window
    pQuery->window = cond.twindow;
H
Haojun Liao 已提交
4681
    if (pQInfo->tableGroupInfo.numOfTables == 0) {
H
Haojun Liao 已提交
4682
      pQInfo->tableqinfoGroupInfo.numOfTables = 0;
H
Haojun Liao 已提交
4683 4684 4685 4686
    } else {
      size_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
      for(int32_t i = 0; i < numOfGroups; ++i) {
        SArray *group = GET_TABLEGROUP(pQInfo, i);
H
Haojun Liao 已提交
4687

H
Haojun Liao 已提交
4688 4689 4690
        size_t t = taosArrayGetSize(group);
        for (int32_t j = 0; j < t; ++j) {
          STableQueryInfo *pCheckInfo = taosArrayGetP(group, j);
H
Haojun Liao 已提交
4691

H
Haojun Liao 已提交
4692 4693 4694
          pCheckInfo->win = pQuery->window;
          pCheckInfo->lastKey = pCheckInfo->win.skey;
        }
H
Haojun Liao 已提交
4695 4696
      }
    }
4697
  } else if (isPointInterpoQuery(pQuery)) {
H
Haojun Liao 已提交
4698
    pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef);
H
Haojun Liao 已提交
4699
  } else {
H
Haojun Liao 已提交
4700
    pRuntimeEnv->pQueryHandle = tsdbQueryTables(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef);
H
Haojun Liao 已提交
4701
  }
4702

B
Bomin Zhang 已提交
4703
  return terrno;
B
Bomin Zhang 已提交
4704 4705
}

H
Haojun Liao 已提交
4706
static SFillColInfo* createFillColInfo(SQuery* pQuery) {
H
Haojun Liao 已提交
4707
  int32_t numOfCols = getNumOfFinalResCol(pQuery);
4708
  int32_t offset = 0;
4709

4710
  SFillColInfo* pFillCol = calloc(numOfCols, sizeof(SFillColInfo));
H
Haojun Liao 已提交
4711 4712 4713 4714
  if (pFillCol == NULL) {
    return NULL;
  }

H
Haojun Liao 已提交
4715
  // TODO refactor
4716
  for(int32_t i = 0; i < numOfCols; ++i) {
H
Haojun Liao 已提交
4717
    SExprInfo* pExprInfo = (pQuery->pExpr2 == NULL)? &pQuery->pExpr1[i]:&pQuery->pExpr2[i];
4718

4719
    pFillCol[i].col.bytes  = pExprInfo->bytes;
S
TD-1057  
Shengliang Guan 已提交
4720
    pFillCol[i].col.type   = (int8_t)pExprInfo->type;
4721
    pFillCol[i].col.offset = offset;
H
Haojun Liao 已提交
4722
    pFillCol[i].tagIndex   = -2;
4723 4724
    pFillCol[i].flag       = TSDB_COL_NORMAL;    // always be ta normal column for table query
    pFillCol[i].functionId = pExprInfo->base.functionId;
4725
    pFillCol[i].fillVal.i = pQuery->fillVal[i];
4726

4727 4728
    offset += pExprInfo->bytes;
  }
4729

4730 4731 4732
  return pFillCol;
}

4733
int32_t doInitQInfo(SQInfo *pQInfo, STSBuf *pTsBuf, void *tsdb, int32_t vgId, bool isSTableQuery) {
4734 4735 4736
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;

  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
4737 4738 4739

  pRuntimeEnv->topBotQuery = isTopBottomQuery(pQuery);
  pRuntimeEnv->hasTagResults = hasTagValOutput(pQuery);
4740
  pRuntimeEnv->timeWindowInterpo = timeWindowInterpoRequired(pQuery);
4741 4742

  setScanLimitationByResultBuffer(pQuery);
H
Haojun Liao 已提交
4743

H
Haojun Liao 已提交
4744
  int32_t code = setupQueryHandle(tsdb, pQInfo, isSTableQuery);
B
Bomin Zhang 已提交
4745 4746 4747
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }
4748

4749
  pQInfo->tsdb = tsdb;
4750
  pQInfo->vgId = vgId;
4751 4752

  pRuntimeEnv->pQuery = pQuery;
H
Haojun Liao 已提交
4753
  pRuntimeEnv->pTsBuf = pTsBuf;
4754
  pRuntimeEnv->cur.vgroupIndex = -1;
4755
  pRuntimeEnv->stableQuery = isSTableQuery;
H
Haojun Liao 已提交
4756
  pRuntimeEnv->prevGroupId = INT32_MIN;
H
Haojun Liao 已提交
4757
  pRuntimeEnv->groupbyNormalCol = isGroupbyNormalCol(pQuery->pGroupbyExpr);
4758

H
Haojun Liao 已提交
4759
  if (pTsBuf != NULL) {
H
Haojun Liao 已提交
4760 4761
    int16_t order = (pQuery->order.order == pRuntimeEnv->pTsBuf->tsOrder) ? TSDB_ORDER_ASC : TSDB_ORDER_DESC;
    tsBufSetTraverseOrder(pRuntimeEnv->pTsBuf, order);
4762 4763
  }

4764 4765 4766
  int32_t ps = DEFAULT_PAGE_SIZE;
  int32_t rowsize = 0;
  getIntermediateBufInfo(pRuntimeEnv, &ps, &rowsize);
H
Haojun Liao 已提交
4767
  int32_t TENMB = 1024*1024*10;
4768

H
Haojun Liao 已提交
4769
  if (isSTableQuery && !onlyQueryTags(pRuntimeEnv->pQuery)) {
H
Haojun Liao 已提交
4770
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rowsize, ps, TENMB, pQInfo);
4771 4772 4773 4774
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

H
Haojun Liao 已提交
4775
    if (!QUERY_IS_INTERVAL_QUERY(pQuery)) {
4776
      int16_t type = TSDB_DATA_TYPE_NULL;
H
Haojun Liao 已提交
4777
      if (pRuntimeEnv->groupbyNormalCol) {  // group by columns not tags;
4778 4779 4780 4781 4782
        type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
      } else {
        type = TSDB_DATA_TYPE_INT;  // group id
      }

H
Haojun Liao 已提交
4783
      code = initResultRowInfo(&pRuntimeEnv->windowResInfo, 8, type);
B
Bomin Zhang 已提交
4784 4785 4786
      if (code != TSDB_CODE_SUCCESS) {
        return code;
      }
4787
    }
H
Haojun Liao 已提交
4788
  } else if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery) || (!isSTableQuery)) {
4789 4790
    int32_t numOfResultRows = getInitialPageNum(pQInfo);
    getIntermediateBufInfo(pRuntimeEnv, &ps, &rowsize);
H
Haojun Liao 已提交
4791
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rowsize, ps, TENMB, pQInfo);
4792 4793 4794 4795 4796
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

    int16_t type = TSDB_DATA_TYPE_NULL;
H
Haojun Liao 已提交
4797
    if (pRuntimeEnv->groupbyNormalCol) {
4798 4799 4800 4801 4802
      type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
    } else {
      type = TSDB_DATA_TYPE_TIMESTAMP;
    }

H
Haojun Liao 已提交
4803
    code = initResultRowInfo(&pRuntimeEnv->windowResInfo, numOfResultRows, type);
B
Bomin Zhang 已提交
4804 4805 4806
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }
4807 4808
  }

H
Haojun Liao 已提交
4809 4810 4811 4812 4813 4814
  // create runtime environment
  code = setupQueryRuntimeEnv(pRuntimeEnv, pQuery->order.order);
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }

4815
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
H
Haojun Liao 已提交
4816
    SFillColInfo* pColInfo = createFillColInfo(pQuery);
H
Haojun Liao 已提交
4817 4818 4819 4820 4821 4822
    STimeWindow w = TSWINDOW_INITIALIZER;

    TSKEY sk = MIN(pQuery->window.skey, pQuery->window.ekey);
    TSKEY ek = MAX(pQuery->window.skey, pQuery->window.ekey);
    getAlignQueryTimeWindow(pQuery, pQuery->window.skey, sk, ek, &w);

H
Haojun Liao 已提交
4823
    int32_t numOfCols = getNumOfFinalResCol(pQuery);
H
Haojun Liao 已提交
4824
    pRuntimeEnv->pFillInfo = taosInitFillInfo(pQuery->order.order, w.skey, 0, (int32_t)pQuery->rec.capacity, numOfCols,
4825
                                              pQuery->interval.sliding, pQuery->interval.slidingUnit, (int8_t)pQuery->precision,
H
Haojun Liao 已提交
4826
                                              pQuery->fillType, pColInfo, pQInfo);
4827
  }
4828

H
Haojun Liao 已提交
4829
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
4830
  return TSDB_CODE_SUCCESS;
4831 4832
}

4833
static void enableExecutionForNextTable(SQueryRuntimeEnv *pRuntimeEnv) {
4834
  SQuery *pQuery = pRuntimeEnv->pQuery;
4835

4836
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
4837
    SResultRowCellInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[i]);
4838 4839 4840 4841 4842 4843
    if (pResInfo != NULL) {
      pResInfo->complete = false;
    }
  }
}

H
Haojun Liao 已提交
4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854
static FORCE_INLINE void setEnvForEachBlock(SQInfo* pQInfo, STableQueryInfo* pTableQueryInfo, SDataBlockInfo* pBlockInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

  if (!QUERY_IS_INTERVAL_QUERY(pQuery)) {
    setExecutionContext(pQInfo, pTableQueryInfo->groupIndex, pBlockInfo->window.ekey + step);
  } else {  // interval query
    TSKEY nextKey = pBlockInfo->window.skey;
    setIntervalQueryRange(pQInfo, nextKey);

H
Haojun Liao 已提交
4855
    if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTsBuf != NULL) {
H
Haojun Liao 已提交
4856 4857 4858 4859 4860
      setAdditionalInfo(pQInfo, pTableQueryInfo->pTable, pTableQueryInfo);
    }
  }
}

H
Haojun Liao 已提交
4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874
static void doTableQueryInfoTimeWindowCheck(SQuery* pQuery, STableQueryInfo* pTableQueryInfo) {
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    assert(
        (pTableQueryInfo->win.skey <= pTableQueryInfo->win.ekey) &&
        (pTableQueryInfo->lastKey >= pTableQueryInfo->win.skey) &&
        (pTableQueryInfo->win.skey >= pQuery->window.skey && pTableQueryInfo->win.ekey <= pQuery->window.ekey));
  } else {
    assert(
        (pTableQueryInfo->win.skey >= pTableQueryInfo->win.ekey) &&
        (pTableQueryInfo->lastKey <= pTableQueryInfo->win.skey) &&
        (pTableQueryInfo->win.skey <= pQuery->window.skey && pTableQueryInfo->win.ekey >= pQuery->window.ekey));
  }
}

H
Haojun Liao 已提交
4875
static int64_t scanMultiTableDataBlocks(SQInfo *pQInfo) {
4876
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
Haojun Liao 已提交
4877 4878
  SQuery*           pQuery = pRuntimeEnv->pQuery;
  SQueryCostInfo*   summary  = &pRuntimeEnv->summary;
H
Haojun Liao 已提交
4879

H
hjxilinx 已提交
4880
  int64_t st = taosGetTimestampMs();
4881

4882
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
H
Haojun Liao 已提交
4883
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4884

H
Haojun Liao 已提交
4885 4886
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

H
Haojun Liao 已提交
4887
  while (tsdbNextDataBlock(pQueryHandle)) {
4888
    summary->totalBlocks += 1;
H
Haojun Liao 已提交
4889

4890
    if (isQueryKilled(pQInfo)) {
H
Haojun Liao 已提交
4891
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4892
    }
4893

H
Haojun Liao 已提交
4894
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
H
Haojun Liao 已提交
4895 4896 4897 4898
    STableQueryInfo **pTableQueryInfo = (STableQueryInfo**) taosHashGet(pQInfo->tableqinfoGroupInfo.map, &blockInfo.tid, sizeof(blockInfo.tid));
    if(pTableQueryInfo == NULL) {
      break;
    }
4899

H
Haojun Liao 已提交
4900
    pQuery->current = *pTableQueryInfo;
H
Haojun Liao 已提交
4901
    doTableQueryInfoTimeWindowCheck(pQuery, *pTableQueryInfo);
4902

H
Haojun Liao 已提交
4903
    if (!pRuntimeEnv->groupbyNormalCol) {
H
Haojun Liao 已提交
4904
      setEnvForEachBlock(pQInfo, *pTableQueryInfo, &blockInfo);
4905
    }
4906

4907
    uint32_t     status = 0;
H
Haojun Liao 已提交
4908 4909
    SDataStatis *pStatis = NULL;
    SArray      *pDataBlock = NULL;
4910

H
Haojun Liao 已提交
4911
    int32_t ret = loadDataBlockOnDemand(pRuntimeEnv, &pQuery->current->windowResInfo, pQueryHandle, &blockInfo, &pStatis, &pDataBlock, &status);
4912 4913 4914 4915 4916
    if (ret != TSDB_CODE_SUCCESS) {
      break;
    }

    if (status == BLK_DATA_DISCARD) {
H
Haojun Liao 已提交
4917
      pQuery->current->lastKey = QUERY_IS_ASC_QUERY(pQuery)? blockInfo.window.ekey + step : blockInfo.window.skey + step;
H
Haojun Liao 已提交
4918 4919 4920
      continue;
    }

4921 4922
    summary->totalRows += blockInfo.rows;
    stableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, pDataBlock, binarySearchForKey);
H
Haojun Liao 已提交
4923

H
Haojun Liao 已提交
4924 4925 4926 4927
    qDebug("QInfo:%p check data block completed, uid:%"PRId64", tid:%d, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, "
           "lastKey:%" PRId64,
           pQInfo, blockInfo.uid, blockInfo.tid, blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows,
           pQuery->current->lastKey);
4928
  }
4929

H
Haojun Liao 已提交
4930 4931 4932 4933
  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }

H
Haojun Liao 已提交
4934 4935
  updateWindowResNumOfRes(pRuntimeEnv);

H
hjxilinx 已提交
4936 4937
  int64_t et = taosGetTimestampMs();
  return et - st;
4938 4939
}

4940 4941
static bool multiTableMultioutputHelper(SQInfo *pQInfo, int32_t index) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4942
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4943

4944
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
H
Haojun Liao 已提交
4945
  SArray *group = GET_TABLEGROUP(pQInfo, 0);
4946
  STableQueryInfo* pCheckInfo = taosArrayGetP(group, index);
4947

H
Haojun Liao 已提交
4948
  if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTsBuf != NULL) {
H
Haojun Liao 已提交
4949 4950
    setTagVal(pRuntimeEnv, pCheckInfo->pTable, pQInfo->tsdb);
  }
4951

H
Haojun Liao 已提交
4952
  STableId* id = TSDB_TABLEID(pCheckInfo->pTable);
4953
  qDebug("QInfo:%p query on (%d): uid:%" PRIu64 ", tid:%d, qrange:%" PRId64 "-%" PRId64, pQInfo, index,
H
Haojun Liao 已提交
4954
         id->uid, id->tid, pCheckInfo->lastKey, pCheckInfo->win.ekey);
4955

4956
  STsdbQueryCond cond = {
4957
      .twindow   = {pCheckInfo->lastKey, pCheckInfo->win.ekey},
H
hjxilinx 已提交
4958 4959
      .order     = pQuery->order.order,
      .colList   = pQuery->colList,
4960
      .numOfCols = pQuery->numOfCols,
4961
  };
4962

H
hjxilinx 已提交
4963
  // todo refactor
4964
  SArray *g1 = taosArrayInit(1, POINTER_BYTES);
H
Haojun Liao 已提交
4965 4966 4967 4968
  SArray *tx = taosArrayInit(1, sizeof(STableKeyInfo));

  STableKeyInfo info = {.pTable = pCheckInfo->pTable, .lastKey = pCheckInfo->lastKey};
  taosArrayPush(tx, &info);
4969

4970
  taosArrayPush(g1, &tx);
4971
  STableGroupInfo gp = {.numOfTables = 1, .pGroupList = g1};
4972

4973
  // include only current table
4974 4975 4976 4977
  if (pRuntimeEnv->pQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
    pRuntimeEnv->pQueryHandle = NULL;
  }
4978

H
Haojun Liao 已提交
4979
  pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo, &pQInfo->memRef);
4980 4981
  taosArrayDestroy(tx);
  taosArrayDestroy(g1);
B
Bomin Zhang 已提交
4982 4983 4984
  if (pRuntimeEnv->pQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
4985

H
Haojun Liao 已提交
4986
  if (pRuntimeEnv->pTsBuf != NULL) {
H
Haojun Liao 已提交
4987 4988
      tVariant* pTag = &pRuntimeEnv->pCtx[0].tag;

4989
    if (pRuntimeEnv->cur.vgroupIndex == -1) {
H
Haojun Liao 已提交
4990
      STSElem elem = tsBufGetElemStartPos(pRuntimeEnv->pTsBuf, pQInfo->vgId, pTag);
H
Haojun Liao 已提交
4991
      // failed to find data with the specified tag value and vnodeId
4992
      if (!tsBufIsValidElem(&elem)) {
H
Haojun Liao 已提交
4993 4994 4995 4996 4997 4998
        if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
          qError("QInfo:%p failed to find tag:%s in ts_comp", pQInfo, pTag->pz);
        } else {
          qError("QInfo:%p failed to find tag:%"PRId64" in ts_comp", pQInfo, pTag->i64Key);
        }

4999
        return false;
H
Haojun Liao 已提交
5000
      } else {
H
Haojun Liao 已提交
5001
        STSCursor cur = tsBufGetCursor(pRuntimeEnv->pTsBuf);
H
Haojun Liao 已提交
5002 5003 5004 5005 5006 5007 5008 5009

        if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
          qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz,
                 cur.blockIndex, cur.tsIndex);
        } else {
          qDebug("QInfo:%p find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key,
                 cur.blockIndex, cur.tsIndex);
        }
5010 5011
      }
    } else {
H
Haojun Liao 已提交
5012
      STSElem elem = tsBufGetElem(pRuntimeEnv->pTsBuf);
H
Haojun Liao 已提交
5013
      if (tVariantCompare(elem.tag, &pRuntimeEnv->pCtx[0].tag) != 0) {
H
Haojun Liao 已提交
5014

H
Haojun Liao 已提交
5015
        STSElem elem1 = tsBufGetElemStartPos(pRuntimeEnv->pTsBuf, pQInfo->vgId, pTag);
H
Haojun Liao 已提交
5016
        // failed to find data with the specified tag value and vnodeId
5017
        if (!tsBufIsValidElem(&elem1)) {
H
Haojun Liao 已提交
5018 5019 5020 5021 5022
          if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
            qError("QInfo:%p failed to find tag:%s in ts_comp", pQInfo, pTag->pz);
          } else {
            qError("QInfo:%p failed to find tag:%"PRId64" in ts_comp", pQInfo, pTag->i64Key);
          }
H
Haojun Liao 已提交
5023

H
Haojun Liao 已提交
5024
          return false;
H
Haojun Liao 已提交
5025
        } else {
H
Haojun Liao 已提交
5026
          STSCursor cur = tsBufGetCursor(pRuntimeEnv->pTsBuf);
H
Haojun Liao 已提交
5027 5028 5029 5030 5031
          if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
            qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, cur.blockIndex, cur.tsIndex);
          } else {
            qDebug("QInfo:%p find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key, cur.blockIndex, cur.tsIndex);
          }
H
Haojun Liao 已提交
5032
        }
H
Haojun Liao 已提交
5033

H
Haojun Liao 已提交
5034
      } else {
H
Haojun Liao 已提交
5035 5036
        tsBufSetCursor(pRuntimeEnv->pTsBuf, &pRuntimeEnv->cur);
        STSCursor cur = tsBufGetCursor(pRuntimeEnv->pTsBuf);
H
Haojun Liao 已提交
5037 5038 5039 5040 5041
        if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
          qDebug("QInfo:%p continue scan ts_comp file, tag:%s blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, cur.blockIndex, cur.tsIndex);
        } else {
          qDebug("QInfo:%p continue scan ts_comp file, tag:%"PRId64" blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key, cur.blockIndex, cur.tsIndex);
        }
H
Haojun Liao 已提交
5042
      }
5043 5044
    }
  }
5045

5046
  initCtxOutputBuf(pRuntimeEnv);
5047 5048 5049
  return true;
}

H
Haojun Liao 已提交
5050
STsdbQueryCond createTsdbQueryCond(SQuery* pQuery, STimeWindow* win) {
5051 5052 5053 5054 5055 5056
  STsdbQueryCond cond = {
      .colList   = pQuery->colList,
      .order     = pQuery->order.order,
      .numOfCols = pQuery->numOfCols,
  };

H
Haojun Liao 已提交
5057
  TIME_WINDOW_COPY(cond.twindow, *win);
5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084
  return cond;
}

static STableIdInfo createTableIdInfo(SQuery* pQuery) {
  assert(pQuery != NULL && pQuery->current != NULL);

  STableIdInfo tidInfo;
  STableId* id = TSDB_TABLEID(pQuery->current->pTable);

  tidInfo.uid = id->uid;
  tidInfo.tid = id->tid;
  tidInfo.key = pQuery->current->lastKey;

  return tidInfo;
}

static void updateTableIdInfo(SQuery* pQuery, SHashObj* pTableIdInfo) {
  STableIdInfo tidInfo = createTableIdInfo(pQuery);
  STableIdInfo* idinfo = taosHashGet(pTableIdInfo, &tidInfo.tid, sizeof(tidInfo.tid));
  if (idinfo != NULL) {
    assert(idinfo->tid == tidInfo.tid && idinfo->uid == tidInfo.uid);
    idinfo->key = tidInfo.key;
  } else {
    taosHashPut(pTableIdInfo, &tidInfo.tid, sizeof(tidInfo.tid), &tidInfo, sizeof(STableIdInfo));
  }
}

5085 5086 5087 5088 5089 5090 5091
/**
 * super table query handler
 * 1. super table projection query, group-by on normal columns query, ts-comp query
 * 2. point interpolation query, last row query
 *
 * @param pQInfo
 */
5092
static void sequentialTableProcess(SQInfo *pQInfo) {
5093
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
5094
  SQuery *          pQuery = pRuntimeEnv->pQuery;
5095
  setQueryStatus(pQuery, QUERY_COMPLETED);
5096

H
Haojun Liao 已提交
5097
  size_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
5098

5099
  if (isPointInterpoQuery(pQuery)) {
H
Haojun Liao 已提交
5100
    resetDefaultResInfoOutputBuf(pRuntimeEnv);
5101
    assert(pQuery->limit.offset == 0 && pQuery->limit.limit != 0);
5102

5103
    while (pQInfo->groupIndex < numOfGroups) {
5104
      SArray *group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex);
H
Haojun Liao 已提交
5105

5106 5107
      qDebug("QInfo:%p point interpolation query on group:%d, total group:%" PRIzu ", current group:%p", pQInfo,
             pQInfo->groupIndex, numOfGroups, group);
H
Haojun Liao 已提交
5108
      STsdbQueryCond cond = createTsdbQueryCond(pQuery, &pQuery->window);
S
TD-1057  
Shengliang Guan 已提交
5109

H
Haojun Liao 已提交
5110 5111 5112
      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);
5113

H
Haojun Liao 已提交
5114 5115 5116 5117 5118 5119 5120
      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }
5121

H
Haojun Liao 已提交
5122
      pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(pQInfo->tsdb, &cond, &gp, pQInfo, &pQInfo->memRef);
B
Bomin Zhang 已提交
5123 5124 5125 5126 5127 5128

      taosArrayDestroy(tx);
      taosArrayDestroy(g1);
      if (pRuntimeEnv->pQueryHandle == NULL) {
        longjmp(pRuntimeEnv->env, terrno);
      }
H
Haojun Liao 已提交
5129

H
Haojun Liao 已提交
5130
      initCtxOutputBuf(pRuntimeEnv);
5131

5132
      SArray *s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle);
5133
      assert(taosArrayGetSize(s) >= 1);
5134

5135
      setTagVal(pRuntimeEnv, taosArrayGetP(s, 0), pQInfo->tsdb);
dengyihao's avatar
dengyihao 已提交
5136
      taosArrayDestroy(s);
H
Haojun Liao 已提交
5137

H
Haojun Liao 已提交
5138
      // here we simply set the first table as current table
5139
      SArray *first = GET_TABLEGROUP(pQInfo, pQInfo->groupIndex);
5140 5141
      pQuery->current = taosArrayGetP(first, 0);

5142
      scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
5143

H
Haojun Liao 已提交
5144 5145 5146 5147 5148
      int64_t numOfRes = getNumOfResult(pRuntimeEnv);
      if (numOfRes > 0) {
        pQuery->rec.rows += numOfRes;
        forwardCtxOutputBuf(pRuntimeEnv, numOfRes);
      }
5149

H
Haojun Liao 已提交
5150 5151 5152 5153 5154
      skipResults(pRuntimeEnv);
      pQInfo->groupIndex += 1;

      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
5155 5156 5157 5158 5159 5160

      if (pQuery->rec.rows >= pQuery->rec.capacity) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
        break;
      }
    }
5161
  } else if (pRuntimeEnv->groupbyNormalCol) {  // group-by on normal columns query
5162
    while (pQInfo->groupIndex < numOfGroups) {
5163
      SArray *group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex);
5164

5165 5166
      qDebug("QInfo:%p group by normal columns group:%d, total group:%" PRIzu "", pQInfo, pQInfo->groupIndex,
             numOfGroups);
5167

H
Haojun Liao 已提交
5168
      STsdbQueryCond cond = createTsdbQueryCond(pQuery, &pQuery->window);
S
TD-1057  
Shengliang Guan 已提交
5169

5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181
      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);

      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }

H
Haojun Liao 已提交
5182
      // no need to update the lastkey for each table
H
Haojun Liao 已提交
5183
      pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo, &pQInfo->memRef);
H
Haojun Liao 已提交
5184

B
Bomin Zhang 已提交
5185 5186
      taosArrayDestroy(g1);
      taosArrayDestroy(tx);
B
Bomin Zhang 已提交
5187 5188 5189
      if (pRuntimeEnv->pQueryHandle == NULL) {
        longjmp(pRuntimeEnv->env, terrno);
      }
5190

5191
      SArray *s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle);
5192 5193
      assert(taosArrayGetSize(s) >= 1);

5194
      setTagVal(pRuntimeEnv, taosArrayGetP(s, 0), pQInfo->tsdb);
5195 5196 5197 5198 5199

      // here we simply set the first table as current table
      scanMultiTableDataBlocks(pQInfo);
      pQInfo->groupIndex += 1;

5200
      taosArrayDestroy(s);
5201

5202
      // no results generated for current group, continue to try the next group
5203
      SResultRowInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
5204 5205 5206 5207 5208
      if (pWindowResInfo->size <= 0) {
        continue;
      }

      for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
5209
        pWindowResInfo->pResult[i]->closed = true;  // enable return all results for group by normal columns
5210

H
Haojun Liao 已提交
5211
        SResultRow *pResult = pWindowResInfo->pResult[i];
5212
        for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
5213
          SResultRowCellInfo *pCell = getResultCell(pRuntimeEnv, pResult, j);
5214
          pResult->numOfRows = (uint16_t)(MAX(pResult->numOfRows, pCell->numOfRes));
5215 5216 5217
        }
      }

5218
      qDebug("QInfo:%p generated groupby columns results %d rows for group %d completed", pQInfo, pWindowResInfo->size,
5219
             pQInfo->groupIndex);
5220 5221 5222 5223 5224 5225
      int32_t currentGroupIndex = pQInfo->groupIndex;

      pQuery->rec.rows = 0;
      pQInfo->groupIndex = 0;

      ensureOutputBufferSimple(pRuntimeEnv, pWindowResInfo->size);
5226
      copyFromWindowResToSData(pQInfo, pWindowResInfo);
5227

5228
      pQInfo->groupIndex = currentGroupIndex;  // restore the group index
5229
      assert(pQuery->rec.rows == pWindowResInfo->size);
5230
      resetResultRowInfo(pRuntimeEnv, &pRuntimeEnv->windowResInfo);
5231
      break;
5232
    }
H
Haojun Liao 已提交
5233
  } else if (pRuntimeEnv->queryWindowIdentical && pRuntimeEnv->pTsBuf == NULL && !isTSCompQuery(pQuery)) {
5234 5235 5236 5237 5238 5239 5240 5241 5242 5243
    //super table projection query with identical query time range for all tables.
    SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
    resetDefaultResInfoOutputBuf(pRuntimeEnv);

    SArray *group = GET_TABLEGROUP(pQInfo, 0);
    assert(taosArrayGetSize(group) == pQInfo->tableqinfoGroupInfo.numOfTables &&
           1 == taosArrayGetSize(pQInfo->tableqinfoGroupInfo.pGroupList));

    void *pQueryHandle = pRuntimeEnv->pQueryHandle;
    if (pQueryHandle == NULL) {
H
Haojun Liao 已提交
5244
      STsdbQueryCond con = createTsdbQueryCond(pQuery, &pQuery->window);
5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255
      pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &con, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef);
      pQueryHandle = pRuntimeEnv->pQueryHandle;
    }

    // skip blocks without load the actual data block from file if no filter condition present
    //    skipBlocks(&pQInfo->runtimeEnv);
    //    if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0) {
    //      setQueryStatus(pQuery, QUERY_COMPLETED);
    //      return;
    //    }

H
Haojun Liao 已提交
5256 5257 5258 5259 5260 5261
    if (pQuery->prjInfo.vgroupLimit != -1) {
      assert(pQuery->limit.limit == -1 && pQuery->limit.offset == 0);
    } else if (pQuery->limit.limit != -1) {
      assert(pQuery->prjInfo.vgroupLimit == -1);
    }

5262
    bool hasMoreBlock = true;
H
Haojun Liao 已提交
5263
    int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
5264 5265 5266 5267
    SQueryCostInfo *summary = &pRuntimeEnv->summary;
    while ((hasMoreBlock = tsdbNextDataBlock(pQueryHandle)) == true) {
      summary->totalBlocks += 1;

5268
      if (isQueryKilled(pQInfo)) {
5269 5270 5271 5272 5273
        longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
      }

      tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
      STableQueryInfo **pTableQueryInfo =
H
Haojun Liao 已提交
5274
          (STableQueryInfo **) taosHashGet(pQInfo->tableqinfoGroupInfo.map, &blockInfo.tid, sizeof(blockInfo.tid));
5275 5276 5277 5278 5279
      if (pTableQueryInfo == NULL) {
        break;
      }

      pQuery->current = *pTableQueryInfo;
H
Haojun Liao 已提交
5280
      doTableQueryInfoTimeWindowCheck(pQuery, *pTableQueryInfo);
5281 5282 5283 5284 5285

      if (pRuntimeEnv->hasTagResults) {
        setTagVal(pRuntimeEnv, pQuery->current->pTable, pQInfo->tsdb);
      }

H
Haojun Liao 已提交
5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304
      if (pQuery->prjInfo.vgroupLimit > 0 && pQuery->current->windowResInfo.size > pQuery->prjInfo.vgroupLimit) {
        pQuery->current->lastKey =
                QUERY_IS_ASC_QUERY(pQuery) ? blockInfo.window.ekey + step : blockInfo.window.skey + step;
        continue;
      }

      // it is a super table ordered projection query, check for the number of output for each vgroup
      if (pQuery->prjInfo.vgroupLimit > 0 && pQuery->rec.rows >= pQuery->prjInfo.vgroupLimit) {
        if (QUERY_IS_ASC_QUERY(pQuery) && blockInfo.window.skey >= pQuery->prjInfo.ts) {
          pQuery->current->lastKey =
                  QUERY_IS_ASC_QUERY(pQuery) ? blockInfo.window.ekey + step : blockInfo.window.skey + step;
          continue;
        } else if (!QUERY_IS_ASC_QUERY(pQuery) && blockInfo.window.ekey <= pQuery->prjInfo.ts) {
          pQuery->current->lastKey =
                  QUERY_IS_ASC_QUERY(pQuery) ? blockInfo.window.ekey + step : blockInfo.window.skey + step;
          continue;
        }
      }

5305 5306
      uint32_t     status = 0;
      SDataStatis *pStatis = NULL;
5307
      SArray      *pDataBlock = NULL;
5308 5309 5310 5311 5312 5313 5314

      int32_t ret = loadDataBlockOnDemand(pRuntimeEnv, &pQuery->current->windowResInfo, pQueryHandle, &blockInfo,
                                          &pStatis, &pDataBlock, &status);
      if (ret != TSDB_CODE_SUCCESS) {
        break;
      }

H
Haojun Liao 已提交
5315 5316 5317 5318 5319
      if(status == BLK_DATA_DISCARD) {
        pQuery->current->lastKey =
                QUERY_IS_ASC_QUERY(pQuery) ? blockInfo.window.ekey + step : blockInfo.window.skey + step;
        continue;
      }
5320

H
Haojun Liao 已提交
5321
      ensureOutputBuffer(pRuntimeEnv, &blockInfo);
H
Haojun Liao 已提交
5322 5323
      int64_t prev = getNumOfResult(pRuntimeEnv);

5324 5325 5326 5327 5328 5329 5330 5331 5332 5333
      pQuery->pos = QUERY_IS_ASC_QUERY(pQuery) ? 0 : blockInfo.rows - 1;
      int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, binarySearchForKey, pDataBlock);

      summary->totalRows += blockInfo.rows;
      qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%" PRId64,
             GET_QINFO_ADDR(pRuntimeEnv), blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes,
             pQuery->current->lastKey);

      pQuery->rec.rows = getNumOfResult(pRuntimeEnv);

H
Haojun Liao 已提交
5334
      int64_t inc = pQuery->rec.rows - prev;
H
Haojun Liao 已提交
5335
      pQuery->current->windowResInfo.size += (int32_t) inc;
H
Haojun Liao 已提交
5336

5337 5338 5339 5340 5341
      // the flag may be set by tableApplyFunctionsOnBlock, clear it here
      CLEAR_QUERY_STATUS(pQuery, QUERY_COMPLETED);

      updateTableIdInfo(pQuery, pQInfo->arrTableIdInfo);

H
Haojun Liao 已提交
5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357
      if (pQuery->prjInfo.vgroupLimit >= 0) {
        if (((pQuery->rec.rows + pQuery->rec.total) < pQuery->prjInfo.vgroupLimit) || ((pQuery->rec.rows + pQuery->rec.total) > pQuery->prjInfo.vgroupLimit && prev < pQuery->prjInfo.vgroupLimit)) {
          if (QUERY_IS_ASC_QUERY(pQuery) && pQuery->prjInfo.ts < blockInfo.window.ekey) {
            pQuery->prjInfo.ts = blockInfo.window.ekey;
          } else if (!QUERY_IS_ASC_QUERY(pQuery) && pQuery->prjInfo.ts > blockInfo.window.skey) {
            pQuery->prjInfo.ts = blockInfo.window.skey;
          }
        }
      } else {
        // the limitation of output result is reached, set the query completed
        skipResults(pRuntimeEnv);
        if (limitResults(pRuntimeEnv)) {
          setQueryStatus(pQuery, QUERY_COMPLETED);
          SET_STABLE_QUERY_OVER(pQInfo);
          break;
        }
5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369
      }

      // while the output buffer is full or limit/offset is applied, query may be paused here
      if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL|QUERY_COMPLETED)) {
        break;
      }
    }

    if (!hasMoreBlock) {
      setQueryStatus(pQuery, QUERY_COMPLETED);
      SET_STABLE_QUERY_OVER(pQInfo);
    }
5370 5371
  } else {
    /*
5372 5373 5374
     * the following two cases handled here.
     * 1. ts-comp query, and 2. the super table projection query with different query time range for each table.
     * If the subgroup index is larger than 0, results generated by group by tbname,k is existed.
5375 5376
     * we need to return it to client in the first place.
     */
5377
    if (pQInfo->groupIndex > 0) {
5378
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5379
      pQuery->rec.total += pQuery->rec.rows;
5380

5381
      if (pQuery->rec.rows > 0) {
5382 5383 5384
        return;
      }
    }
5385

5386
    // all data have returned already
5387
    if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
5388 5389
      return;
    }
5390

H
Haojun Liao 已提交
5391
    resetDefaultResInfoOutputBuf(pRuntimeEnv);
H
Haojun Liao 已提交
5392
    resetResultRowInfo(pRuntimeEnv, &pRuntimeEnv->windowResInfo);
5393

H
Haojun Liao 已提交
5394
    SArray *group = GET_TABLEGROUP(pQInfo, 0);
5395 5396
    assert(taosArrayGetSize(group) == pQInfo->tableqinfoGroupInfo.numOfTables &&
           1 == taosArrayGetSize(pQInfo->tableqinfoGroupInfo.pGroupList));
5397

5398
    while (pQInfo->tableIndex < pQInfo->tableqinfoGroupInfo.numOfTables) {
5399
      if (isQueryKilled(pQInfo)) {
H
Haojun Liao 已提交
5400
        longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
5401
      }
5402

5403
      pQuery->current = taosArrayGetP(group, pQInfo->tableIndex);
5404
      if (!multiTableMultioutputHelper(pQInfo, pQInfo->tableIndex)) {
5405
        pQInfo->tableIndex++;
5406 5407
        continue;
      }
5408

H
hjxilinx 已提交
5409
      // TODO handle the limit offset problem
5410
      if (pQuery->numOfFilterCols == 0 && pQuery->limit.offset > 0) {
5411 5412
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
          pQInfo->tableIndex++;
5413 5414 5415
          continue;
        }
      }
5416

5417
      scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
5418
      skipResults(pRuntimeEnv);
5419

5420
      // the limitation of output result is reached, set the query completed
5421
      if (limitResults(pRuntimeEnv)) {
H
Haojun Liao 已提交
5422
        SET_STABLE_QUERY_OVER(pQInfo);
5423 5424
        break;
      }
5425

5426 5427
      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
5428

5429
      if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
5430 5431 5432 5433 5434 5435
        /*
         * query range is identical in terms of all meters involved in query,
         * so we need to restore them at the *beginning* of query on each meter,
         * not the consecutive query on meter on which is aborted due to buffer limitation
         * to ensure that, we can reset the query range once query on a meter is completed.
         */
5436
        pQInfo->tableIndex++;
5437
        updateTableIdInfo(pQuery, pQInfo->arrTableIdInfo);
weixin_48148422's avatar
weixin_48148422 已提交
5438

5439
        // if the buffer is full or group by each table, we need to jump out of the loop
H
Haojun Liao 已提交
5440
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
5441 5442
          break;
        }
5443

H
Haojun Liao 已提交
5444 5445
        if (pRuntimeEnv->pTsBuf != NULL) {
          pRuntimeEnv->cur = pRuntimeEnv->pTsBuf->cur;
H
Haojun Liao 已提交
5446 5447
        }

5448
      } else {
5449
        // all data in the result buffer are skipped due to the offset, continue to retrieve data from current meter
5450 5451
        if (pQuery->rec.rows == 0) {
          assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
5452 5453
          continue;
        } else {
5454 5455 5456
          // buffer is full, wait for the next round to retrieve data from current meter
          assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
          break;
5457 5458 5459
        }
      }
    }
H
Haojun Liao 已提交
5460

5461
    if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
H
Haojun Liao 已提交
5462 5463
      setQueryStatus(pQuery, QUERY_COMPLETED);
    }
5464

5465 5466 5467 5468 5469 5470 5471 5472 5473 5474 5475 5476 5477 5478
    /*
     * 1. super table projection query, group-by on normal columns query, ts-comp query
     * 2. point interpolation query, last row query
     *
     * group-by on normal columns query and last_row query do NOT invoke the finalizer here,
     * since the finalize stage will be done at the client side.
     *
     * projection query, point interpolation query do not need the finalizer.
     *
     * Only the ts-comp query requires the finalizer function to be executed here.
     */
    if (isTSCompQuery(pQuery)) {
      finalizeQueryResult(pRuntimeEnv);
    }
5479

H
Haojun Liao 已提交
5480 5481
    if (pRuntimeEnv->pTsBuf != NULL) {
      pRuntimeEnv->cur = pRuntimeEnv->pTsBuf->cur;
5482
    }
5483

5484 5485 5486 5487 5488
    qDebug("QInfo %p numOfTables:%" PRIu64 ", index:%d, numOfGroups:%" PRIzu ", %" PRId64
           " points returned, total:%" PRId64 ", offset:%" PRId64,
           pQInfo, (uint64_t)pQInfo->tableqinfoGroupInfo.numOfTables, pQInfo->tableIndex, numOfGroups, pQuery->rec.rows,
           pQuery->rec.total, pQuery->limit.offset);
  }
5489 5490
}

5491 5492 5493 5494
static void doSaveContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

5495 5496 5497
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
  SWITCH_ORDER(pQuery->order.order);
5498

H
Haojun Liao 已提交
5499 5500
  if (pRuntimeEnv->pTsBuf != NULL) {
    SWITCH_ORDER(pRuntimeEnv->pTsBuf->cur.order);
5501
  }
5502

H
Haojun Liao 已提交
5503
  STsdbQueryCond cond = createTsdbQueryCond(pQuery, &pQuery->window);
S
TD-1057  
Shengliang Guan 已提交
5504

5505 5506 5507 5508
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
H
Haojun Liao 已提交
5509

H
Haojun Liao 已提交
5510 5511 5512 5513 5514
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
  disableFuncInReverseScan(pQInfo);
  setupQueryRangeForReverseScan(pQInfo);

H
Haojun Liao 已提交
5515
  pRuntimeEnv->prevGroupId = INT32_MIN;
H
Haojun Liao 已提交
5516
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef);
B
Bomin Zhang 已提交
5517 5518 5519
  if (pRuntimeEnv->pSecQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
H
hjxilinx 已提交
5520 5521
}

5522 5523 5524 5525
static void doRestoreContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

H
hjxilinx 已提交
5526
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
5527
  SWITCH_ORDER(pQuery->order.order);
5528

H
Haojun Liao 已提交
5529 5530
  if (pRuntimeEnv->pTsBuf != NULL) {
    SWITCH_ORDER(pRuntimeEnv->pTsBuf->cur.order);
5531
  }
5532

5533
  switchCtxOrder(pRuntimeEnv);
5534 5535 5536
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
}

5537 5538 5539
static void doCloseAllTimeWindowAfterScan(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
Haojun Liao 已提交
5540
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
H
Haojun Liao 已提交
5541
    size_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
5542
    for (int32_t i = 0; i < numOfGroup; ++i) {
H
Haojun Liao 已提交
5543
      SArray *group = GET_TABLEGROUP(pQInfo, i);
5544

5545
      size_t num = taosArrayGetSize(group);
5546
      for (int32_t j = 0; j < num; ++j) {
5547
        STableQueryInfo* item = taosArrayGetP(group, j);
H
Haojun Liao 已提交
5548
        closeAllResultRows(&item->windowResInfo);
5549
      }
H
hjxilinx 已提交
5550 5551
    }
  } else {  // close results for group result
H
Haojun Liao 已提交
5552
    closeAllResultRows(&pQInfo->runtimeEnv.windowResInfo);
H
hjxilinx 已提交
5553 5554 5555 5556
  }
}

static void multiTableQueryProcess(SQInfo *pQInfo) {
5557 5558 5559
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

5560
  if (pQInfo->groupIndex > 0) {
5561
    /*
5562
     * if the groupIndex > 0, the query process must be completed yet, we only need to
5563 5564
     * copy the data into output buffer
     */
H
Haojun Liao 已提交
5565
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
5566 5567
      copyResToQueryResultBuf(pQInfo, pQuery);
    } else {
5568
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5569
    }
5570

5571
    qDebug("QInfo:%p current:%"PRId64", total:%"PRId64"", pQInfo, pQuery->rec.rows, pQuery->rec.total);
5572 5573
    return;
  }
5574

5575
  qDebug("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", order:%d, forward scan start", pQInfo,
5576 5577
         pQuery->window.skey, pQuery->window.ekey, pQuery->order.order);

H
hjxilinx 已提交
5578
  // do check all qualified data blocks
H
Haojun Liao 已提交
5579
  int64_t el = scanMultiTableDataBlocks(pQInfo);
5580
  qDebug("QInfo:%p master scan completed, elapsed time: %" PRId64 "ms, reverse scan start", pQInfo, el);
5581

H
hjxilinx 已提交
5582
  // query error occurred or query is killed, abort current execution
5583
  if (pQInfo->code != TSDB_CODE_SUCCESS || isQueryKilled(pQInfo)) {
5584
    qDebug("QInfo:%p query killed or error occurred, code:%s, abort", pQInfo, tstrerror(pQInfo->code));
H
Haojun Liao 已提交
5585
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
5586
  }
5587

H
hjxilinx 已提交
5588 5589
  // close all time window results
  doCloseAllTimeWindowAfterScan(pQInfo);
5590

H
hjxilinx 已提交
5591 5592
  if (needReverseScan(pQuery)) {
    doSaveContext(pQInfo);
5593

H
Haojun Liao 已提交
5594
    el = scanMultiTableDataBlocks(pQInfo);
5595
    qDebug("QInfo:%p reversed scan completed, elapsed time: %" PRId64 "ms", pQInfo, el);
5596

H
Haojun Liao 已提交
5597
    doRestoreContext(pQInfo);
H
hjxilinx 已提交
5598
  } else {
5599
    qDebug("QInfo:%p no need to do reversed scan, query completed", pQInfo);
5600
  }
5601

5602
  setQueryStatus(pQuery, QUERY_COMPLETED);
5603

5604
  if (pQInfo->code != TSDB_CODE_SUCCESS || isQueryKilled(pQInfo)) {
5605
    qDebug("QInfo:%p query killed or error occurred, code:%s, abort", pQInfo, tstrerror(pQInfo->code));
H
Haojun Liao 已提交
5606
    //TODO finalizeQueryResult may cause SEGSEV, since the memory may not allocated yet, add a cleanup function instead
H
Haojun Liao 已提交
5607
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
H
hjxilinx 已提交
5608
  }
5609

H
Haojun Liao 已提交
5610
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || isSumAvgRateQuery(pQuery)) {
5611
    if (mergeGroupResult(pQInfo) == TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
5612
      copyResToQueryResultBuf(pQInfo, pQuery);
5613 5614

#ifdef _DEBUG_VIEW
5615
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
5616 5617 5618
#endif
    }
  } else {  // not a interval query
5619
    copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5620
  }
5621

5622
  // handle the limitation of output buffer
5623
  qDebug("QInfo:%p points returned:%" PRId64 ", total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
5624 5625
}

H
Haojun Liao 已提交
5626 5627 5628 5629 5630 5631 5632 5633 5634 5635 5636 5637 5638 5639 5640 5641 5642 5643

static char *getArithemicInputSrc(void *param, const char *name, int32_t colId) {
  SArithmeticSupport *pSupport = (SArithmeticSupport *) param;
  SExprInfo* pExprInfo = (SExprInfo*) pSupport->exprList;

  int32_t index = -1;
  for (int32_t i = 0; i < pSupport->numOfCols; ++i) {
    if (colId == pExprInfo[i].base.resColId) {
      index = i;
      break;
    }
  }

  assert(index >= 0 && index < pSupport->numOfCols);
  return pSupport->data[index] + pSupport->offset * pExprInfo[index].bytes;
}

static void doSecondaryArithmeticProcess(SQuery* pQuery) {
H
Haojun Liao 已提交
5644 5645 5646
  if (pQuery->numOfExpr2 == 0) {
    return;
  }
H
Haojun Liao 已提交
5647

H
Haojun Liao 已提交
5648
  SArithmeticSupport arithSup = {0};
H
Haojun Liao 已提交
5649 5650 5651
  tFilePage **data = calloc(pQuery->numOfExpr2, POINTER_BYTES);
  for (int32_t i = 0; i < pQuery->numOfExpr2; ++i) {
    int32_t bytes = pQuery->pExpr2[i].bytes;
P
plum-lihui 已提交
5652
    data[i] = (tFilePage *)malloc((size_t)(bytes * pQuery->rec.rows) + sizeof(tFilePage));
H
Haojun Liao 已提交
5653 5654
  }

H
Haojun Liao 已提交
5655 5656 5657 5658
  arithSup.offset = 0;
  arithSup.numOfCols = (int32_t)pQuery->numOfOutput;
  arithSup.exprList  = pQuery->pExpr1;
  arithSup.data      = calloc(arithSup.numOfCols, POINTER_BYTES);
H
Haojun Liao 已提交
5659

H
Haojun Liao 已提交
5660 5661
  for (int32_t k = 0; k < arithSup.numOfCols; ++k) {
    arithSup.data[k] = pQuery->sdata[k]->data;
H
Haojun Liao 已提交
5662 5663 5664 5665 5666 5667 5668 5669 5670 5671
  }

  for (int i = 0; i < pQuery->numOfExpr2; ++i) {
    SExprInfo *pExpr = &pQuery->pExpr2[i];

    // calculate the result from several other columns
    SSqlFuncMsg* pSqlFunc = &pExpr->base;
    if (pSqlFunc->functionId != TSDB_FUNC_ARITHM) {

      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
5672 5673
        if (pSqlFunc->functionId == pQuery->pExpr1[j].base.functionId &&
            pSqlFunc->colInfo.colId == pQuery->pExpr1[j].base.colInfo.colId) {
P
plum-lihui 已提交
5674
          memcpy(data[i]->data, pQuery->sdata[j]->data, (size_t)(pQuery->pExpr1[j].bytes * pQuery->rec.rows));
H
Haojun Liao 已提交
5675 5676 5677 5678
          break;
        }
      }
    } else {
H
Haojun Liao 已提交
5679 5680
      arithSup.pArithExpr = pExpr;
      tExprTreeCalcTraverse(arithSup.pArithExpr->pExpr, (int32_t)pQuery->rec.rows, data[i]->data, &arithSup, TSDB_ORDER_ASC,
H
Haojun Liao 已提交
5681 5682 5683 5684 5685
                            getArithemicInputSrc);
    }
  }

  for (int32_t i = 0; i < pQuery->numOfExpr2; ++i) {
P
plum-lihui 已提交
5686
    memcpy(pQuery->sdata[i]->data, data[i]->data, (size_t)(pQuery->pExpr2[i].bytes * pQuery->rec.rows));
H
Haojun Liao 已提交
5687 5688
  }

H
Haojun Liao 已提交
5689 5690 5691 5692 5693 5694
  for (int32_t i = 0; i < pQuery->numOfExpr2; ++i) {
    tfree(data[i]);
  }

  tfree(data);
  tfree(arithSup.data);
H
Haojun Liao 已提交
5695 5696
}

5697 5698 5699 5700 5701 5702
/*
 * in each query, this function will be called only once, no retry for further result.
 *
 * select count(*)/top(field,k)/avg(field name) from table_name [where ts>now-1a];
 * select count(*) from table_name group by status_column;
 */
H
hjxilinx 已提交
5703
static void tableFixedOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
5704
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
5705

H
hjxilinx 已提交
5706
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
5707
  if (!pRuntimeEnv->topBotQuery && pQuery->limit.offset > 0) {  // no need to execute, since the output will be ignore.
H
Haojun Liao 已提交
5708 5709
    return;
  }
5710

H
hjxilinx 已提交
5711
  pQuery->current = pTableInfo;  // set current query table info
5712

5713
  scanOneTableDataBlocks(pRuntimeEnv, pTableInfo->lastKey);
H
hjxilinx 已提交
5714
  finalizeQueryResult(pRuntimeEnv);
5715

H
Haojun Liao 已提交
5716 5717 5718 5719
  // since the numOfRows must be identical for all sql functions that are allowed to be executed simutaneously.
  pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
  doSecondaryArithmeticProcess(pQuery);

5720
  if (isQueryKilled(pQInfo)) {
H
Haojun Liao 已提交
5721
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
5722
  }
5723

5724
  skipResults(pRuntimeEnv);
5725
  limitResults(pRuntimeEnv);
5726 5727
}

H
hjxilinx 已提交
5728
static void tableMultiOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
5729
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
5730

H
hjxilinx 已提交
5731 5732
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
5733

5734 5735
  // for ts_comp query, re-initialized is not allowed
  if (!isTSCompQuery(pQuery)) {
H
Haojun Liao 已提交
5736
    resetDefaultResInfoOutputBuf(pRuntimeEnv);
5737
  }
5738

5739 5740 5741 5742 5743 5744
  // skip blocks without load the actual data block from file if no filter condition present
  skipBlocks(&pQInfo->runtimeEnv);
  if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return;
  }
5745 5746

  while (1) {
5747
    scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
H
hjxilinx 已提交
5748
    finalizeQueryResult(pRuntimeEnv);
5749

5750 5751
    pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
    if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols > 0 && pQuery->rec.rows > 0) {
5752
      skipResults(pRuntimeEnv);
5753 5754 5755
    }

    /*
H
hjxilinx 已提交
5756 5757
     * 1. if pQuery->size == 0, pQuery->limit.offset >= 0, still need to check data
     * 2. if pQuery->size > 0, pQuery->limit.offset must be 0
5758
     */
5759
    if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
5760 5761 5762
      break;
    }

5763
    qDebug("QInfo:%p skip current result, offset:%" PRId64 ", next qrange:%" PRId64 "-%" PRId64,
B
Bomin Zhang 已提交
5764
           pQInfo, pQuery->limit.offset, pQuery->current->lastKey, pQuery->current->win.ekey);
5765

H
Haojun Liao 已提交
5766
    resetDefaultResInfoOutputBuf(pRuntimeEnv);
5767 5768
  }

5769
  limitResults(pRuntimeEnv);
5770
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
5771
    qDebug("QInfo:%p query paused due to output limitation, next qrange:%" PRId64 "-%" PRId64, pQInfo,
H
hjxilinx 已提交
5772
        pQuery->current->lastKey, pQuery->window.ekey);
weixin_48148422's avatar
weixin_48148422 已提交
5773
  } else if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
5774 5775
    STableIdInfo tidInfo = createTableIdInfo(pQuery);
    taosHashPut(pQInfo->arrTableIdInfo, &tidInfo.tid, sizeof(tidInfo.tid), &tidInfo, sizeof(STableIdInfo));
5776 5777
  }

5778 5779 5780
  if (!isTSCompQuery(pQuery)) {
    assert(pQuery->rec.rows <= pQuery->rec.capacity);
  }
5781 5782
}

5783
// handle time interval query on table
H
hjxilinx 已提交
5784
static void tableIntervalProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
5785 5786
  SQueryRuntimeEnv *pRuntimeEnv = &(pQInfo->runtimeEnv);

H
hjxilinx 已提交
5787 5788
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
5789

H
Haojun Liao 已提交
5790
  TSKEY newStartKey = TSKEY_INITIAL_VAL;
5791

5792
  // skip blocks without load the actual data block from file if no filter condition present
H
Haojun Liao 已提交
5793 5794 5795 5796 5797 5798
  if (!pRuntimeEnv->groupbyNormalCol) {
    skipTimeInterval(pRuntimeEnv, &newStartKey);
    if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0 && pRuntimeEnv->pFillInfo == NULL) {
      setQueryStatus(pQuery, QUERY_COMPLETED);
      return;
    }
5799 5800
  }

5801 5802
  scanOneTableDataBlocks(pRuntimeEnv, newStartKey);
  assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_NOT_COMPLETED));
5803

5804
  finalizeQueryResult(pRuntimeEnv);
5805

5806 5807
  // skip offset result rows
  pQuery->rec.rows = 0;
5808

5809 5810 5811 5812 5813
  if (pQuery->fillType == TSDB_FILL_NONE) {
    // all data scanned, the group by normal column can return
    int32_t numOfClosed = numOfClosedResultRows(&pRuntimeEnv->windowResInfo);
    if (pQuery->limit.offset > numOfClosed) {
      return;
H
Haojun Liao 已提交
5814 5815
    }

H
Haojun Liao 已提交
5816
    pQInfo->groupIndex = (int32_t) pQuery->limit.offset;
5817

5818 5819
    copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
    doSecondaryArithmeticProcess(pQuery);
5820

5821 5822
    limitResults(pRuntimeEnv);
  } else {
5823

5824 5825
    copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
    doSecondaryArithmeticProcess(pQuery);
5826

5827 5828
    taosFillSetStartInfo(pRuntimeEnv->pFillInfo, (int32_t)pQuery->rec.rows, pQuery->window.ekey);
    taosFillCopyInputDataFromFilePage(pRuntimeEnv->pFillInfo, (const tFilePage **)pQuery->sdata);
H
Haojun Liao 已提交
5829

5830 5831
    int32_t numOfFilled = 0;
    pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, &numOfFilled);
H
Haojun Liao 已提交
5832

5833
    if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
H
Haojun Liao 已提交
5834 5835
      limitResults(pRuntimeEnv);
    }
5836 5837 5838
  }
}

5839 5840 5841 5842
static void tableQueryImpl(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

H
Haojun Liao 已提交
5843
  if (queryHasRemainResForTableQuery(pRuntimeEnv)) {
H
Haojun Liao 已提交
5844 5845 5846 5847 5848 5849 5850 5851 5852 5853 5854 5855
    if (pQuery->fillType != TSDB_FILL_NONE) {
      /*
       * There are remain results that are not returned due to result interpolation
       * So, we do keep in this procedure instead of launching retrieve procedure for next results.
       */
      int32_t numOfFilled = 0;
      pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, &numOfFilled);

      if (pQuery->rec.rows > 0) {
        limitResults(pRuntimeEnv);
      }

H
Haojun Liao 已提交
5856
      qDebug("QInfo:%p current:%" PRId64 " returned, total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total);
5857
      return;
H
Haojun Liao 已提交
5858
    } else {
5859
      pQuery->rec.rows = 0;
5860
      assert(pRuntimeEnv->windowResInfo.size > 0);
5861

5862
      if (pQInfo->groupIndex < pRuntimeEnv->windowResInfo.size) {
5863
        copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5864
      }
H
Haojun Liao 已提交
5865

5866 5867 5868 5869
      if (pQuery->rec.rows > 0) {
        qDebug("QInfo:%p %" PRId64 " rows returned from group results, total:%" PRId64 "", pQInfo, pQuery->rec.rows,
               pQuery->rec.total);
      }
H
Haojun Liao 已提交
5870

5871 5872 5873
      // there are not data remains
      if (pQuery->rec.rows <= 0 || pRuntimeEnv->windowResInfo.size <= pQInfo->groupIndex) {
        qDebug("QInfo:%p query over, %" PRId64 " rows are returned", pQInfo, pQuery->rec.total);
5874
      }
5875 5876

      return;
5877 5878
    }
  }
5879

H
hjxilinx 已提交
5880
  // number of points returned during this query
5881
  pQuery->rec.rows = 0;
5882
  int64_t st = taosGetTimestampUs();
5883

5884
  assert(pQInfo->tableqinfoGroupInfo.numOfTables == 1);
H
Haojun Liao 已提交
5885
  SArray* g = GET_TABLEGROUP(pQInfo, 0);
5886
  STableQueryInfo* item = taosArrayGetP(g, 0);
5887

5888
  // group by normal column, sliding window query, interval query are handled by interval query processor
H
Haojun Liao 已提交
5889
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyNormalCol) {  // interval (down sampling operation)
5890
    tableIntervalProcess(pQInfo, item);
H
Haojun Liao 已提交
5891
  } else if (isFixedOutputQuery(pRuntimeEnv)) {
5892
    tableFixedOutputProcess(pQInfo, item);
5893 5894
  } else {  // diff/add/multiply/subtract/division
    assert(pQuery->checkBuffer == 1);
5895
    tableMultiOutputProcess(pQInfo, item);
5896
  }
5897

5898
  // record the total elapsed time
5899
  pRuntimeEnv->summary.elapsedTime += (taosGetTimestampUs() - st);
5900
  assert(pQInfo->tableqinfoGroupInfo.numOfTables == 1);
5901 5902
}

5903
static void stableQueryImpl(SQInfo *pQInfo) {
H
Haojun Liao 已提交
5904 5905
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *pQuery = pRuntimeEnv->pQuery;
5906
  pQuery->rec.rows = 0;
5907

5908
  int64_t st = taosGetTimestampUs();
5909

H
Haojun Liao 已提交
5910
  if (QUERY_IS_INTERVAL_QUERY(pQuery) ||
5911
      (isFixedOutputQuery(pRuntimeEnv) && (!isPointInterpoQuery(pQuery)) && (!pRuntimeEnv->groupbyNormalCol))) {
H
hjxilinx 已提交
5912
    multiTableQueryProcess(pQInfo);
5913
  } else {
5914
    assert((pQuery->checkBuffer == 1 && pQuery->interval.interval == 0) || isPointInterpoQuery(pQuery) ||
H
Haojun Liao 已提交
5915
            isFirstLastRowQuery(pQuery) || pRuntimeEnv->groupbyNormalCol);
5916

5917
    sequentialTableProcess(pQInfo);
5918
  }
5919

H
hjxilinx 已提交
5920
  // record the total elapsed time
5921
  pQInfo->runtimeEnv.summary.elapsedTime += (taosGetTimestampUs() - st);
H
hjxilinx 已提交
5922 5923
}

5924
static int32_t getColumnIndexInSource(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
5925
  int32_t j = 0;
5926

5927
  if (TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) {
H
Haojun Liao 已提交
5928
    if (pExprMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
5929
      return TSDB_TBNAME_COLUMN_INDEX;
H
Haojun Liao 已提交
5930 5931
    }

5932 5933 5934 5935
    while(j < pQueryMsg->numOfTags) {
      if (pExprMsg->colInfo.colId == pTagCols[j].colId) {
        return j;
      }
5936

5937 5938
      j += 1;
    }
5939

Y
TD-1230  
yihaoDeng 已提交
5940
  } else if (TSDB_COL_IS_UD_COL(pExprMsg->colInfo.flag)) {  // user specified column data
H
Haojun Liao 已提交
5941
    return TSDB_UD_COLUMN_INDEX;
5942 5943 5944 5945 5946
  } else {
    while (j < pQueryMsg->numOfCols) {
      if (pExprMsg->colInfo.colId == pQueryMsg->colList[j].colId) {
        return j;
      }
5947

5948
      j += 1;
5949 5950
    }
  }
5951
  assert(0);
5952
  return -1;
5953 5954
}

5955 5956 5957
bool validateExprColumnInfo(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
  int32_t j = getColumnIndexInSource(pQueryMsg, pExprMsg, pTagCols);
  return j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags;
5958 5959
}

5960
static bool validateQueryMsg(SQueryTableMsg *pQueryMsg) {
5961 5962
  if (pQueryMsg->interval.interval < 0) {
    qError("qmsg:%p illegal value of interval time %" PRId64, pQueryMsg, pQueryMsg->interval.interval);
5963
    return false;
5964 5965
  }

H
hjxilinx 已提交
5966
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
5967
    qError("qmsg:%p illegal value of numOfTables %d", pQueryMsg, pQueryMsg->numOfTables);
5968
    return false;
5969 5970
  }

H
hjxilinx 已提交
5971
  if (pQueryMsg->numOfGroupCols < 0) {
S
slguan 已提交
5972
    qError("qmsg:%p illegal value of numOfGroupbyCols %d", pQueryMsg, pQueryMsg->numOfGroupCols);
5973
    return false;
5974 5975
  }

5976 5977
  if (pQueryMsg->numOfOutput > TSDB_MAX_COLUMNS || pQueryMsg->numOfOutput <= 0) {
    qError("qmsg:%p illegal value of output columns %d", pQueryMsg, pQueryMsg->numOfOutput);
5978
    return false;
5979 5980
  }

5981 5982 5983 5984 5985 5986 5987 5988 5989 5990
  return true;
}

static bool validateQuerySourceCols(SQueryTableMsg *pQueryMsg, SSqlFuncMsg** pExprMsg) {
  int32_t numOfTotal = pQueryMsg->numOfCols + pQueryMsg->numOfTags;
  if (pQueryMsg->numOfCols < 0 || pQueryMsg->numOfTags < 0 || numOfTotal > TSDB_MAX_COLUMNS) {
    qError("qmsg:%p illegal value of numOfCols %d numOfTags:%d", pQueryMsg, pQueryMsg->numOfCols, pQueryMsg->numOfTags);
    return false;
  } else if (numOfTotal == 0) {
    for(int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
H
Haojun Liao 已提交
5991 5992 5993 5994 5995
      SSqlFuncMsg* pFuncMsg = pExprMsg[i];

      if ((pFuncMsg->functionId == TSDB_FUNC_TAGPRJ) ||
          (pFuncMsg->functionId == TSDB_FUNC_TID_TAG && pFuncMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) ||
          (pFuncMsg->functionId == TSDB_FUNC_COUNT && pFuncMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX)) {
5996
        continue;
5997
      }
5998

5999
      return false;
6000 6001
    }
  }
6002

6003
  return true;
6004 6005
}

6006
static char *createTableIdList(SQueryTableMsg *pQueryMsg, char *pMsg, SArray **pTableIdList) {
H
hjxilinx 已提交
6007
  assert(pQueryMsg->numOfTables > 0);
6008

weixin_48148422's avatar
weixin_48148422 已提交
6009
  *pTableIdList = taosArrayInit(pQueryMsg->numOfTables, sizeof(STableIdInfo));
6010

weixin_48148422's avatar
weixin_48148422 已提交
6011 6012
  for (int32_t j = 0; j < pQueryMsg->numOfTables; ++j) {
    STableIdInfo* pTableIdInfo = (STableIdInfo *)pMsg;
6013

6014
    pTableIdInfo->tid = htonl(pTableIdInfo->tid);
H
hjxilinx 已提交
6015 6016
    pTableIdInfo->uid = htobe64(pTableIdInfo->uid);
    pTableIdInfo->key = htobe64(pTableIdInfo->key);
6017

H
hjxilinx 已提交
6018 6019 6020
    taosArrayPush(*pTableIdList, pTableIdInfo);
    pMsg += sizeof(STableIdInfo);
  }
6021

H
hjxilinx 已提交
6022 6023
  return pMsg;
}
6024

6025
/**
H
hjxilinx 已提交
6026
 * pQueryMsg->head has been converted before this function is called.
6027
 *
H
hjxilinx 已提交
6028
 * @param pQueryMsg
6029 6030 6031 6032
 * @param pTableIdList
 * @param pExpr
 * @return
 */
H
Haojun Liao 已提交
6033
static int32_t convertQueryMsg(SQueryTableMsg *pQueryMsg, SArray **pTableIdList, SSqlFuncMsg ***pExpr, SSqlFuncMsg ***pSecStageExpr,
weixin_48148422's avatar
weixin_48148422 已提交
6034
                               char **tagCond, char** tbnameCond, SColIndex **groupbyCols, SColumnInfo** tagCols) {
6035 6036
  int32_t code = TSDB_CODE_SUCCESS;

6037 6038 6039 6040
  if (taosCheckVersion(pQueryMsg->version, version, 3) != 0) {
    return TSDB_CODE_QRY_INVALID_MSG;
  }

6041 6042 6043 6044
  pQueryMsg->numOfTables = htonl(pQueryMsg->numOfTables);

  pQueryMsg->window.skey = htobe64(pQueryMsg->window.skey);
  pQueryMsg->window.ekey = htobe64(pQueryMsg->window.ekey);
6045 6046 6047
  pQueryMsg->interval.interval = htobe64(pQueryMsg->interval.interval);
  pQueryMsg->interval.sliding = htobe64(pQueryMsg->interval.sliding);
  pQueryMsg->interval.offset = htobe64(pQueryMsg->interval.offset);
6048 6049
  pQueryMsg->limit = htobe64(pQueryMsg->limit);
  pQueryMsg->offset = htobe64(pQueryMsg->offset);
H
Haojun Liao 已提交
6050
  pQueryMsg->vgroupLimit = htobe64(pQueryMsg->vgroupLimit);
H
hjxilinx 已提交
6051

6052 6053
  pQueryMsg->order = htons(pQueryMsg->order);
  pQueryMsg->orderColId = htons(pQueryMsg->orderColId);
H
Haojun Liao 已提交
6054
  pQueryMsg->queryType = htonl(pQueryMsg->queryType);
weixin_48148422's avatar
weixin_48148422 已提交
6055
  pQueryMsg->tagNameRelType = htons(pQueryMsg->tagNameRelType);
6056 6057

  pQueryMsg->numOfCols = htons(pQueryMsg->numOfCols);
6058
  pQueryMsg->numOfOutput = htons(pQueryMsg->numOfOutput);
H
hjxilinx 已提交
6059
  pQueryMsg->numOfGroupCols = htons(pQueryMsg->numOfGroupCols);
6060 6061 6062
  pQueryMsg->tagCondLen = htons(pQueryMsg->tagCondLen);
  pQueryMsg->tsOffset = htonl(pQueryMsg->tsOffset);
  pQueryMsg->tsLen = htonl(pQueryMsg->tsLen);
H
hjxilinx 已提交
6063
  pQueryMsg->tsNumOfBlocks = htonl(pQueryMsg->tsNumOfBlocks);
6064
  pQueryMsg->tsOrder = htonl(pQueryMsg->tsOrder);
6065
  pQueryMsg->numOfTags = htonl(pQueryMsg->numOfTags);
H
Haojun Liao 已提交
6066
  pQueryMsg->secondStageOutput = htonl(pQueryMsg->secondStageOutput);
6067

6068
  // query msg safety check
6069
  if (!validateQueryMsg(pQueryMsg)) {
6070 6071
    code = TSDB_CODE_QRY_INVALID_MSG;
    goto _cleanup;
6072 6073
  }

H
hjxilinx 已提交
6074 6075
  char *pMsg = (char *)(pQueryMsg->colList) + sizeof(SColumnInfo) * pQueryMsg->numOfCols;
  for (int32_t col = 0; col < pQueryMsg->numOfCols; ++col) {
6076 6077
    SColumnInfo *pColInfo = &pQueryMsg->colList[col];

H
hjxilinx 已提交
6078
    pColInfo->colId = htons(pColInfo->colId);
6079
    pColInfo->type = htons(pColInfo->type);
H
hjxilinx 已提交
6080 6081
    pColInfo->bytes = htons(pColInfo->bytes);
    pColInfo->numOfFilters = htons(pColInfo->numOfFilters);
6082

H
hjxilinx 已提交
6083
    assert(pColInfo->type >= TSDB_DATA_TYPE_BOOL && pColInfo->type <= TSDB_DATA_TYPE_NCHAR);
6084

H
hjxilinx 已提交
6085
    int32_t numOfFilters = pColInfo->numOfFilters;
6086
    if (numOfFilters > 0) {
H
hjxilinx 已提交
6087
      pColInfo->filters = calloc(numOfFilters, sizeof(SColumnFilterInfo));
H
Haojun Liao 已提交
6088 6089 6090 6091
      if (pColInfo->filters == NULL) {
        code = TSDB_CODE_QRY_OUT_OF_MEMORY;
        goto _cleanup;
      }
6092 6093 6094
    }

    for (int32_t f = 0; f < numOfFilters; ++f) {
6095
      SColumnFilterInfo *pFilterMsg = (SColumnFilterInfo *)pMsg;
6096

6097 6098
      SColumnFilterInfo *pColFilter = &pColInfo->filters[f];
      pColFilter->filterstr = htons(pFilterMsg->filterstr);
6099 6100 6101

      pMsg += sizeof(SColumnFilterInfo);

6102 6103
      if (pColFilter->filterstr) {
        pColFilter->len = htobe64(pFilterMsg->len);
6104

6105
        pColFilter->pz = (int64_t)calloc(1, (size_t)(pColFilter->len + 1 * TSDB_NCHAR_SIZE)); // note: null-terminator
H
Haojun Liao 已提交
6106 6107 6108 6109 6110
        if (pColFilter->pz == 0) {
          code = TSDB_CODE_QRY_OUT_OF_MEMORY;
          goto _cleanup;
        }

6111
        memcpy((void *)pColFilter->pz, pMsg, (size_t)pColFilter->len);
6112
        pMsg += (pColFilter->len + 1);
6113
      } else {
6114 6115
        pColFilter->lowerBndi = htobe64(pFilterMsg->lowerBndi);
        pColFilter->upperBndi = htobe64(pFilterMsg->upperBndi);
6116 6117
      }

6118 6119
      pColFilter->lowerRelOptr = htons(pFilterMsg->lowerRelOptr);
      pColFilter->upperRelOptr = htons(pFilterMsg->upperRelOptr);
6120 6121 6122
    }
  }

6123
  *pExpr = calloc(pQueryMsg->numOfOutput, POINTER_BYTES);
H
Haojun Liao 已提交
6124 6125 6126 6127 6128
  if (*pExpr == NULL) {
    code = TSDB_CODE_QRY_OUT_OF_MEMORY;
    goto _cleanup;
  }

6129
  SSqlFuncMsg *pExprMsg = (SSqlFuncMsg *)pMsg;
6130

6131
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
6132
    (*pExpr)[i] = pExprMsg;
6133

6134
    pExprMsg->colInfo.colIndex = htons(pExprMsg->colInfo.colIndex);
6135
    pExprMsg->colInfo.colId = htons(pExprMsg->colInfo.colId);
H
Haojun Liao 已提交
6136 6137 6138 6139
    pExprMsg->colInfo.flag  = htons(pExprMsg->colInfo.flag);
    pExprMsg->functionId    = htons(pExprMsg->functionId);
    pExprMsg->numOfParams   = htons(pExprMsg->numOfParams);
    pExprMsg->resColId      = htons(pExprMsg->resColId);
6140

6141
    pMsg += sizeof(SSqlFuncMsg);
6142 6143

    for (int32_t j = 0; j < pExprMsg->numOfParams; ++j) {
6144
      pExprMsg->arg[j].argType = htons(pExprMsg->arg[j].argType);
6145 6146 6147 6148
      pExprMsg->arg[j].argBytes = htons(pExprMsg->arg[j].argBytes);

      if (pExprMsg->arg[j].argType == TSDB_DATA_TYPE_BINARY) {
        pExprMsg->arg[j].argValue.pz = pMsg;
6149
        pMsg += pExprMsg->arg[j].argBytes;  // one more for the string terminated char.
6150 6151 6152 6153 6154
      } else {
        pExprMsg->arg[j].argValue.i64 = htobe64(pExprMsg->arg[j].argValue.i64);
      }
    }

H
Haojun Liao 已提交
6155 6156
    int16_t functionId = pExprMsg->functionId;
    if (functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ || functionId == TSDB_FUNC_TAG_DUMMY) {
Y
TD-1230  
yihaoDeng 已提交
6157
      if (!TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) {  // ignore the column  index check for arithmetic expression.
6158 6159
        code = TSDB_CODE_QRY_INVALID_MSG;
        goto _cleanup;
6160 6161
      }
    } else {
6162
//      if (!validateExprColumnInfo(pQueryMsg, pExprMsg)) {
6163
//        return TSDB_CODE_QRY_INVALID_MSG;
6164
//      }
6165 6166
    }

6167
    pExprMsg = (SSqlFuncMsg *)pMsg;
6168
  }
6169

6170
  if (!validateQuerySourceCols(pQueryMsg, *pExpr)) {
6171
    code = TSDB_CODE_QRY_INVALID_MSG;
dengyihao's avatar
dengyihao 已提交
6172
    goto _cleanup;
6173
  }
6174

H
Haojun Liao 已提交
6175 6176 6177 6178 6179 6180 6181 6182 6183 6184 6185 6186 6187 6188 6189 6190 6191 6192 6193 6194 6195 6196 6197 6198 6199 6200 6201 6202 6203 6204 6205 6206 6207 6208 6209 6210 6211 6212 6213 6214 6215 6216 6217
  if (pQueryMsg->secondStageOutput) {
    pExprMsg = (SSqlFuncMsg *)pMsg;
    *pSecStageExpr = calloc(pQueryMsg->secondStageOutput, POINTER_BYTES);
    
    for (int32_t i = 0; i < pQueryMsg->secondStageOutput; ++i) {
      (*pSecStageExpr)[i] = pExprMsg;

      pExprMsg->colInfo.colIndex = htons(pExprMsg->colInfo.colIndex);
      pExprMsg->colInfo.colId = htons(pExprMsg->colInfo.colId);
      pExprMsg->colInfo.flag = htons(pExprMsg->colInfo.flag);
      pExprMsg->functionId = htons(pExprMsg->functionId);
      pExprMsg->numOfParams = htons(pExprMsg->numOfParams);

      pMsg += sizeof(SSqlFuncMsg);

      for (int32_t j = 0; j < pExprMsg->numOfParams; ++j) {
        pExprMsg->arg[j].argType = htons(pExprMsg->arg[j].argType);
        pExprMsg->arg[j].argBytes = htons(pExprMsg->arg[j].argBytes);

        if (pExprMsg->arg[j].argType == TSDB_DATA_TYPE_BINARY) {
          pExprMsg->arg[j].argValue.pz = pMsg;
          pMsg += pExprMsg->arg[j].argBytes;  // one more for the string terminated char.
        } else {
          pExprMsg->arg[j].argValue.i64 = htobe64(pExprMsg->arg[j].argValue.i64);
        }
      }

      int16_t functionId = pExprMsg->functionId;
      if (functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ || functionId == TSDB_FUNC_TAG_DUMMY) {
        if (!TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) {  // ignore the column  index check for arithmetic expression.
          code = TSDB_CODE_QRY_INVALID_MSG;
          goto _cleanup;
        }
      } else {
//      if (!validateExprColumnInfo(pQueryMsg, pExprMsg)) {
//        return TSDB_CODE_QRY_INVALID_MSG;
//      }
      }

      pExprMsg = (SSqlFuncMsg *)pMsg;
    }
  }

H
hjxilinx 已提交
6218
  pMsg = createTableIdList(pQueryMsg, pMsg, pTableIdList);
6219

H
hjxilinx 已提交
6220
  if (pQueryMsg->numOfGroupCols > 0) {  // group by tag columns
6221
    *groupbyCols = malloc(pQueryMsg->numOfGroupCols * sizeof(SColIndex));
6222 6223 6224 6225
    if (*groupbyCols == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }
6226 6227

    for (int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
D
fix bug  
dapan1121 已提交
6228
      (*groupbyCols)[i].colId = htons(*(int16_t *)pMsg);
6229
      pMsg += sizeof((*groupbyCols)[i].colId);
6230

D
fix bug  
dapan1121 已提交
6231
      (*groupbyCols)[i].colIndex = htons(*(int16_t *)pMsg);
6232 6233
      pMsg += sizeof((*groupbyCols)[i].colIndex);

D
fix bug  
dapan1121 已提交
6234
      (*groupbyCols)[i].flag = htons(*(int16_t *)pMsg);
6235 6236 6237 6238 6239
      pMsg += sizeof((*groupbyCols)[i].flag);

      memcpy((*groupbyCols)[i].name, pMsg, tListLen(groupbyCols[i]->name));
      pMsg += tListLen((*groupbyCols)[i].name);
    }
6240

H
hjxilinx 已提交
6241 6242
    pQueryMsg->orderByIdx = htons(pQueryMsg->orderByIdx);
    pQueryMsg->orderType = htons(pQueryMsg->orderType);
6243 6244
  }

6245 6246
  pQueryMsg->fillType = htons(pQueryMsg->fillType);
  if (pQueryMsg->fillType != TSDB_FILL_NONE) {
6247
    pQueryMsg->fillVal = (uint64_t)(pMsg);
6248 6249

    int64_t *v = (int64_t *)pMsg;
6250
    for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
6251 6252
      v[i] = htobe64(v[i]);
    }
6253

6254
    pMsg += sizeof(int64_t) * pQueryMsg->numOfOutput;
6255
  }
6256

6257 6258
  if (pQueryMsg->numOfTags > 0) {
    (*tagCols) = calloc(1, sizeof(SColumnInfo) * pQueryMsg->numOfTags);
H
Haojun Liao 已提交
6259 6260 6261 6262 6263
    if (*tagCols == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }

6264 6265
    for (int32_t i = 0; i < pQueryMsg->numOfTags; ++i) {
      SColumnInfo* pTagCol = (SColumnInfo*) pMsg;
6266

6267 6268 6269 6270
      pTagCol->colId = htons(pTagCol->colId);
      pTagCol->bytes = htons(pTagCol->bytes);
      pTagCol->type  = htons(pTagCol->type);
      pTagCol->numOfFilters = 0;
6271

6272
      (*tagCols)[i] = *pTagCol;
6273
      pMsg += sizeof(SColumnInfo);
6274
    }
H
hjxilinx 已提交
6275
  }
6276

6277 6278 6279
  // the tag query condition expression string is located at the end of query msg
  if (pQueryMsg->tagCondLen > 0) {
    *tagCond = calloc(1, pQueryMsg->tagCondLen);
H
Haojun Liao 已提交
6280 6281 6282 6283 6284 6285

    if (*tagCond == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;

    }
6286 6287 6288
    memcpy(*tagCond, pMsg, pQueryMsg->tagCondLen);
    pMsg += pQueryMsg->tagCondLen;
  }
6289

weixin_48148422's avatar
weixin_48148422 已提交
6290
  if (*pMsg != 0) {
6291
    size_t len = strlen(pMsg) + 1;
6292

6293
    *tbnameCond = malloc(len);
6294 6295 6296 6297 6298
    if (*tbnameCond == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }

weixin_48148422's avatar
weixin_48148422 已提交
6299
    strcpy(*tbnameCond, pMsg);
6300
    pMsg += len;
weixin_48148422's avatar
weixin_48148422 已提交
6301
  }
6302

6303
  qDebug("qmsg:%p query %d tables, type:%d, qrange:%" PRId64 "-%" PRId64 ", numOfGroupbyTagCols:%d, order:%d, "
H
Haojun Liao 已提交
6304 6305
         "outputCols:%d, numOfCols:%d, interval:%" PRId64 ", fillType:%d, comptsLen:%d, compNumOfBlocks:%d, limit:%" PRId64 ", offset:%" PRId64,
         pQueryMsg, pQueryMsg->numOfTables, pQueryMsg->queryType, pQueryMsg->window.skey, pQueryMsg->window.ekey, pQueryMsg->numOfGroupCols,
6306
         pQueryMsg->order, pQueryMsg->numOfOutput, pQueryMsg->numOfCols, pQueryMsg->interval.interval,
H
Haojun Liao 已提交
6307
         pQueryMsg->fillType, pQueryMsg->tsLen, pQueryMsg->tsNumOfBlocks, pQueryMsg->limit, pQueryMsg->offset);
6308 6309

  return TSDB_CODE_SUCCESS;
dengyihao's avatar
dengyihao 已提交
6310 6311

_cleanup:
S
TD-1848  
Shengliang Guan 已提交
6312
  tfree(*pExpr);
dengyihao's avatar
dengyihao 已提交
6313 6314
  taosArrayDestroy(*pTableIdList);
  *pTableIdList = NULL;
S
TD-1848  
Shengliang Guan 已提交
6315 6316 6317 6318
  tfree(*tbnameCond);
  tfree(*groupbyCols);
  tfree(*tagCols);
  tfree(*tagCond);
6319 6320

  return code;
6321 6322
}

H
Haojun Liao 已提交
6323 6324
static int32_t buildArithmeticExprFromMsg(SExprInfo *pArithExprInfo, SQueryTableMsg *pQueryMsg) {
  qDebug("qmsg:%p create arithmetic expr from binary", pQueryMsg);
weixin_48148422's avatar
weixin_48148422 已提交
6325 6326

  tExprNode* pExprNode = NULL;
dengyihao's avatar
dengyihao 已提交
6327
  TRY(TSDB_MAX_TAG_CONDITIONS) {
weixin_48148422's avatar
weixin_48148422 已提交
6328 6329 6330
    pExprNode = exprTreeFromBinary(pArithExprInfo->base.arg[0].argValue.pz, pArithExprInfo->base.arg[0].argBytes);
  } CATCH( code ) {
    CLEANUP_EXECUTE();
dengyihao's avatar
TD-816  
dengyihao 已提交
6331
    qError("qmsg:%p failed to create arithmetic expression string from:%s, reason: %s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz, tstrerror(code));
weixin_48148422's avatar
weixin_48148422 已提交
6332 6333 6334
    return code;
  } END_TRY

H
hjxilinx 已提交
6335
  if (pExprNode == NULL) {
6336
    qError("qmsg:%p failed to create arithmetic expression string from:%s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz);
6337
    return TSDB_CODE_QRY_APP_ERROR;
6338
  }
6339

6340
  pArithExprInfo->pExpr = pExprNode;
6341 6342 6343
  return TSDB_CODE_SUCCESS;
}

H
Haojun Liao 已提交
6344
static int32_t createQueryFuncExprFromMsg(SQueryTableMsg *pQueryMsg, int32_t numOfOutput, SExprInfo **pExprInfo, SSqlFuncMsg **pExprMsg,
6345 6346
    SColumnInfo* pTagCols) {
  *pExprInfo = NULL;
H
hjxilinx 已提交
6347
  int32_t code = TSDB_CODE_SUCCESS;
6348

H
Haojun Liao 已提交
6349
  SExprInfo *pExprs = (SExprInfo *)calloc(pQueryMsg->numOfOutput, sizeof(SExprInfo));
6350
  if (pExprs == NULL) {
6351
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
6352 6353 6354 6355 6356
  }

  bool    isSuperTable = QUERY_IS_STABLE_QUERY(pQueryMsg->queryType);
  int16_t tagLen = 0;

H
Haojun Liao 已提交
6357
  for (int32_t i = 0; i < numOfOutput; ++i) {
6358
    pExprs[i].base = *pExprMsg[i];
6359
    pExprs[i].bytes = 0;
6360 6361 6362 6363

    int16_t type = 0;
    int16_t bytes = 0;

6364
    // parse the arithmetic expression
6365
    if (pExprs[i].base.functionId == TSDB_FUNC_ARITHM) {
H
Haojun Liao 已提交
6366
      code = buildArithmeticExprFromMsg(&pExprs[i], pQueryMsg);
6367

6368
      if (code != TSDB_CODE_SUCCESS) {
S
TD-1848  
Shengliang Guan 已提交
6369
        tfree(pExprs);
6370
        return code;
6371 6372
      }

6373
      type  = TSDB_DATA_TYPE_DOUBLE;
6374
      bytes = tDataTypeDesc[type].nSize;
H
Haojun Liao 已提交
6375
    } else if (pExprs[i].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX && pExprs[i].base.functionId == TSDB_FUNC_TAGPRJ) {  // parse the normal column
H
Haojun Liao 已提交
6376
      SSchema s = tGetTableNameColumnSchema();
H
Haojun Liao 已提交
6377
      type = s.type;
H
Haojun Liao 已提交
6378
      bytes = s.bytes;
6379 6380
    } else if (pExprs[i].base.colInfo.colId <= TSDB_UD_COLUMN_INDEX) {
      // it is a user-defined constant value column
H
Haojun Liao 已提交
6381 6382
      assert(pExprs[i].base.functionId == TSDB_FUNC_PRJ);

6383 6384
      type = pExprs[i].base.arg[1].argType;
      bytes = pExprs[i].base.arg[1].argBytes;
H
Haojun Liao 已提交
6385 6386 6387 6388 6389

      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        bytes += VARSTR_HEADER_SIZE;
      }
    } else {
6390
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
dengyihao's avatar
dengyihao 已提交
6391
      assert(j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags);
H
Haojun Liao 已提交
6392

dengyihao's avatar
dengyihao 已提交
6393
      if (pExprs[i].base.colInfo.colId != TSDB_TBNAME_COLUMN_INDEX && j >= 0) {
H
Haojun Liao 已提交
6394 6395 6396 6397
        SColumnInfo* pCol = (TSDB_COL_IS_TAG(pExprs[i].base.colInfo.flag))? &pTagCols[j]:&pQueryMsg->colList[j];
        type = pCol->type;
        bytes = pCol->bytes;
      } else {
H
Haojun Liao 已提交
6398
        SSchema s = tGetTableNameColumnSchema();
H
hjxilinx 已提交
6399

H
Haojun Liao 已提交
6400 6401 6402
        type  = s.type;
        bytes = s.bytes;
      }
6403 6404
    }

S
TD-1057  
Shengliang Guan 已提交
6405
    int32_t param = (int32_t)pExprs[i].base.arg[0].argValue.i64;
6406
    if (getResultDataInfo(type, bytes, pExprs[i].base.functionId, param, &pExprs[i].type, &pExprs[i].bytes,
6407
                          &pExprs[i].interBytes, 0, isSuperTable) != TSDB_CODE_SUCCESS) {
S
TD-1848  
Shengliang Guan 已提交
6408
      tfree(pExprs);
6409
      return TSDB_CODE_QRY_INVALID_MSG;
6410 6411
    }

6412
    if (pExprs[i].base.functionId == TSDB_FUNC_TAG_DUMMY || pExprs[i].base.functionId == TSDB_FUNC_TS_DUMMY) {
6413
      tagLen += pExprs[i].bytes;
6414
    }
6415
    assert(isValidDataType(pExprs[i].type));
6416 6417 6418
  }

  // TODO refactor
H
Haojun Liao 已提交
6419
  for (int32_t i = 0; i < numOfOutput; ++i) {
6420 6421
    pExprs[i].base = *pExprMsg[i];
    int16_t functId = pExprs[i].base.functionId;
6422

6423
    if (functId == TSDB_FUNC_TOP || functId == TSDB_FUNC_BOTTOM) {
6424
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
6425 6426 6427 6428 6429 6430 6431 6432 6433
      if (j < 0 || j >= pQueryMsg->numOfCols) {
        assert(0);
      } else {
        SColumnInfo *pCol = &pQueryMsg->colList[j];
        int32_t ret =
            getResultDataInfo(pCol->type, pCol->bytes, functId, (int32_t)pExprs[i].base.arg[0].argValue.i64,
                              &pExprs[i].type, &pExprs[i].bytes, &pExprs[i].interBytes, tagLen, isSuperTable);
        assert(ret == TSDB_CODE_SUCCESS);
      }
6434 6435 6436
    }
  }

6437
  *pExprInfo = pExprs;
6438 6439 6440
  return TSDB_CODE_SUCCESS;
}

6441
static SSqlGroupbyExpr *createGroupbyExprFromMsg(SQueryTableMsg *pQueryMsg, SColIndex *pColIndex, int32_t *code) {
6442 6443 6444 6445 6446
  if (pQueryMsg->numOfGroupCols == 0) {
    return NULL;
  }

  // using group by tag columns
6447
  SSqlGroupbyExpr *pGroupbyExpr = (SSqlGroupbyExpr *)calloc(1, sizeof(SSqlGroupbyExpr));
6448
  if (pGroupbyExpr == NULL) {
6449
    *code = TSDB_CODE_QRY_OUT_OF_MEMORY;
6450 6451 6452 6453 6454 6455 6456
    return NULL;
  }

  pGroupbyExpr->numOfGroupCols = pQueryMsg->numOfGroupCols;
  pGroupbyExpr->orderType = pQueryMsg->orderType;
  pGroupbyExpr->orderIndex = pQueryMsg->orderByIdx;

6457 6458 6459 6460
  pGroupbyExpr->columnInfo = taosArrayInit(pQueryMsg->numOfGroupCols, sizeof(SColIndex));
  for(int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
    taosArrayPush(pGroupbyExpr->columnInfo, &pColIndex[i]);
  }
6461

6462 6463 6464
  return pGroupbyExpr;
}

6465
static int32_t createFilterInfo(void *pQInfo, SQuery *pQuery) {
6466
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
6467
    if (pQuery->colList[i].numOfFilters > 0) {
6468 6469 6470 6471 6472 6473 6474 6475 6476
      pQuery->numOfFilterCols++;
    }
  }

  if (pQuery->numOfFilterCols == 0) {
    return TSDB_CODE_SUCCESS;
  }

  pQuery->pFilterInfo = calloc(1, sizeof(SSingleColumnFilterInfo) * pQuery->numOfFilterCols);
H
Haojun Liao 已提交
6477 6478 6479
  if (pQuery->pFilterInfo == NULL) {
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
  }
6480 6481

  for (int32_t i = 0, j = 0; i < pQuery->numOfCols; ++i) {
6482
    if (pQuery->colList[i].numOfFilters > 0) {
6483 6484
      SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[j];

B
Bomin Zhang 已提交
6485
      memcpy(&pFilterInfo->info, &pQuery->colList[i], sizeof(SColumnInfo));
6486
      pFilterInfo->info = pQuery->colList[i];
6487

6488
      pFilterInfo->numOfFilters = pQuery->colList[i].numOfFilters;
6489
      pFilterInfo->pFilters = calloc(pFilterInfo->numOfFilters, sizeof(SColumnFilterElem));
H
Haojun Liao 已提交
6490 6491 6492
      if (pFilterInfo->pFilters == NULL) {
        return TSDB_CODE_QRY_OUT_OF_MEMORY;
      }
6493 6494 6495

      for (int32_t f = 0; f < pFilterInfo->numOfFilters; ++f) {
        SColumnFilterElem *pSingleColFilter = &pFilterInfo->pFilters[f];
6496
        pSingleColFilter->filterInfo = pQuery->colList[i].filters[f];
6497 6498 6499 6500 6501

        int32_t lower = pSingleColFilter->filterInfo.lowerRelOptr;
        int32_t upper = pSingleColFilter->filterInfo.upperRelOptr;

        if (lower == TSDB_RELATION_INVALID && upper == TSDB_RELATION_INVALID) {
S
slguan 已提交
6502
          qError("QInfo:%p invalid filter info", pQInfo);
6503
          return TSDB_CODE_QRY_INVALID_MSG;
6504 6505
        }

6506 6507
        int16_t type  = pQuery->colList[i].type;
        int16_t bytes = pQuery->colList[i].bytes;
6508

6509 6510 6511
        // todo refactor
        __filter_func_t *rangeFilterArray = getRangeFilterFuncArray(type);
        __filter_func_t *filterArray = getValueFilterFuncArray(type);
6512 6513

        if (rangeFilterArray == NULL && filterArray == NULL) {
S
slguan 已提交
6514
          qError("QInfo:%p failed to get filter function, invalid data type:%d", pQInfo, type);
6515
          return TSDB_CODE_QRY_INVALID_MSG;
6516 6517
        }

6518
        if ((lower == TSDB_RELATION_GREATER_EQUAL || lower == TSDB_RELATION_GREATER) &&
6519
            (upper == TSDB_RELATION_LESS_EQUAL || upper == TSDB_RELATION_LESS)) {
dengyihao's avatar
dengyihao 已提交
6520
          assert(rangeFilterArray != NULL);
6521
          if (lower == TSDB_RELATION_GREATER_EQUAL) {
6522 6523 6524 6525 6526 6527 6528 6529 6530 6531 6532 6533 6534
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[4];
            } else {
              pSingleColFilter->fp = rangeFilterArray[2];
            }
          } else {
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[3];
            } else {
              pSingleColFilter->fp = rangeFilterArray[1];
            }
          }
        } else {  // set callback filter function
dengyihao's avatar
dengyihao 已提交
6535
          assert(filterArray != NULL);
6536 6537 6538 6539
          if (lower != TSDB_RELATION_INVALID) {
            pSingleColFilter->fp = filterArray[lower];

            if (upper != TSDB_RELATION_INVALID) {
dengyihao's avatar
dengyihao 已提交
6540
              qError("pQInfo:%p failed to get filter function, invalid filter condition: %d", pQInfo, type);
6541
              return TSDB_CODE_QRY_INVALID_MSG;
6542 6543 6544 6545 6546 6547 6548 6549 6550 6551 6552 6553 6554 6555 6556 6557
            }
          } else {
            pSingleColFilter->fp = filterArray[upper];
          }
        }
        assert(pSingleColFilter->fp != NULL);
        pSingleColFilter->bytes = bytes;
      }

      j++;
    }
  }

  return TSDB_CODE_SUCCESS;
}

6558
static void doUpdateExprColumnIndex(SQuery *pQuery) {
H
Haojun Liao 已提交
6559
  assert(pQuery->pExpr1 != NULL && pQuery != NULL);
6560

6561
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
Haojun Liao 已提交
6562
    SSqlFuncMsg *pSqlExprMsg = &pQuery->pExpr1[k].base;
6563
    if (pSqlExprMsg->functionId == TSDB_FUNC_ARITHM) {
6564 6565
      continue;
    }
6566

6567
    // todo opt performance
H
Haojun Liao 已提交
6568
    SColIndex *pColIndex = &pSqlExprMsg->colInfo;
H
Haojun Liao 已提交
6569
    if (TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
6570 6571
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfCols; ++f) {
H
Haojun Liao 已提交
6572 6573
        if (pColIndex->colId == pQuery->colList[f].colId) {
          pColIndex->colIndex = f;
6574 6575 6576
          break;
        }
      }
H
Haojun Liao 已提交
6577 6578

      assert(f < pQuery->numOfCols);
6579 6580
    } else if (pColIndex->colId <= TSDB_UD_COLUMN_INDEX) {
      // do nothing for user-defined constant value result columns
6581
    } else {
6582 6583
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfTags; ++f) {
H
Haojun Liao 已提交
6584 6585
        if (pColIndex->colId == pQuery->tagColList[f].colId) {
          pColIndex->colIndex = f;
6586 6587
          break;
        }
6588
      }
6589

6590
      assert(f < pQuery->numOfTags || pColIndex->colId == TSDB_TBNAME_COLUMN_INDEX);
6591 6592 6593 6594
    }
  }
}

dengyihao's avatar
dengyihao 已提交
6595 6596
static void freeQInfo(SQInfo *pQInfo);

H
Haojun Liao 已提交
6597 6598 6599
static void calResultBufSize(SQuery* pQuery) {
  const int32_t RESULT_MSG_MIN_SIZE  = 1024 * (1024 + 512);  // bytes
  const int32_t RESULT_MSG_MIN_ROWS  = 8192;
S
TD-1057  
Shengliang Guan 已提交
6600
  const float RESULT_THRESHOLD_RATIO = 0.85f;
H
Haojun Liao 已提交
6601

6602 6603 6604 6605 6606
  if (isProjQuery(pQuery)) {
    int32_t numOfRes = RESULT_MSG_MIN_SIZE / pQuery->rowSize;
    if (numOfRes < RESULT_MSG_MIN_ROWS) {
      numOfRes = RESULT_MSG_MIN_ROWS;
    }
H
Haojun Liao 已提交
6607

6608
    pQuery->rec.capacity  = numOfRes;
S
TD-1057  
Shengliang Guan 已提交
6609
    pQuery->rec.threshold = (int32_t)(numOfRes * RESULT_THRESHOLD_RATIO);
6610 6611
  } else {  // in case of non-prj query, a smaller output buffer will be used.
    pQuery->rec.capacity = 4096;
S
TD-1057  
Shengliang Guan 已提交
6612
    pQuery->rec.threshold = (int32_t)(pQuery->rec.capacity * RESULT_THRESHOLD_RATIO);
6613
  }
H
Haojun Liao 已提交
6614 6615
}

6616
static SQInfo *createQInfoImpl(SQueryTableMsg *pQueryMsg, SSqlGroupbyExpr *pGroupbyExpr, SExprInfo *pExprs,
H
Haojun Liao 已提交
6617
                               SExprInfo *pSecExprs, STableGroupInfo *pTableGroupInfo, SColumnInfo* pTagCols, bool stableQuery) {
B
Bomin Zhang 已提交
6618 6619 6620
  int16_t numOfCols = pQueryMsg->numOfCols;
  int16_t numOfOutput = pQueryMsg->numOfOutput;

6621 6622
  SQInfo *pQInfo = (SQInfo *)calloc(1, sizeof(SQInfo));
  if (pQInfo == NULL) {
B
Bomin Zhang 已提交
6623
    goto _cleanup_qinfo;
6624
  }
6625

B
Bomin Zhang 已提交
6626 6627 6628
  // to make sure third party won't overwrite this structure
  pQInfo->signature = pQInfo;
  pQInfo->tableGroupInfo = *pTableGroupInfo;
6629 6630

  SQuery *pQuery = calloc(1, sizeof(SQuery));
B
Bomin Zhang 已提交
6631 6632 6633
  if (pQuery == NULL) {
    goto _cleanup_query;
  }
H
Haojun Liao 已提交
6634

6635 6636
  pQInfo->runtimeEnv.pQuery = pQuery;

6637
  pQuery->numOfCols       = numOfCols;
H
hjxilinx 已提交
6638
  pQuery->numOfOutput     = numOfOutput;
6639 6640 6641
  pQuery->limit.limit     = pQueryMsg->limit;
  pQuery->limit.offset    = pQueryMsg->offset;
  pQuery->order.order     = pQueryMsg->order;
6642
  pQuery->order.orderColId = pQueryMsg->orderColId;
H
Haojun Liao 已提交
6643
  pQuery->pExpr1          = pExprs;
H
Haojun Liao 已提交
6644 6645
  pQuery->pExpr2          = pSecExprs;
  pQuery->numOfExpr2      = pQueryMsg->secondStageOutput;
6646
  pQuery->pGroupbyExpr    = pGroupbyExpr;
6647
  memcpy(&pQuery->interval, &pQueryMsg->interval, sizeof(pQuery->interval));
6648
  pQuery->fillType        = pQueryMsg->fillType;
6649
  pQuery->numOfTags       = pQueryMsg->numOfTags;
B
Bomin Zhang 已提交
6650
  pQuery->tagColList      = pTagCols;
H
Haojun Liao 已提交
6651 6652
  pQuery->prjInfo.vgroupLimit = pQueryMsg->vgroupLimit;
  pQuery->prjInfo.ts      = (pQueryMsg->order == TSDB_ORDER_ASC)? INT64_MIN:INT64_MAX;
H
Haojun Liao 已提交
6653

6654
  pQuery->colList = calloc(numOfCols, sizeof(SSingleColumnFilterInfo));
6655
  if (pQuery->colList == NULL) {
6656
    goto _cleanup;
6657
  }
6658

6659
  int32_t srcSize = 0;
H
hjxilinx 已提交
6660
  for (int16_t i = 0; i < numOfCols; ++i) {
6661
    pQuery->colList[i] = pQueryMsg->colList[i];
6662
    pQuery->colList[i].filters = tscFilterInfoClone(pQueryMsg->colList[i].filters, pQuery->colList[i].numOfFilters);
6663
    srcSize += pQuery->colList[i].bytes;
H
hjxilinx 已提交
6664
  }
6665

6666
  // calculate the result row size
6667 6668 6669
  for (int16_t col = 0; col < numOfOutput; ++col) {
    assert(pExprs[col].bytes > 0);
    pQuery->rowSize += pExprs[col].bytes;
6670
  }
6671

6672
  doUpdateExprColumnIndex(pQuery);
6673

6674
  int32_t ret = createFilterInfo(pQInfo, pQuery);
6675
  if (ret != TSDB_CODE_SUCCESS) {
6676
    goto _cleanup;
6677 6678 6679
  }

  // prepare the result buffer
6680
  pQuery->sdata = (tFilePage **)calloc(pQuery->numOfOutput, POINTER_BYTES);
6681
  if (pQuery->sdata == NULL) {
6682
    goto _cleanup;
6683 6684
  }

H
Haojun Liao 已提交
6685
  calResultBufSize(pQuery);
6686

6687
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
6688
    // allocate additional memory for interResults that are usually larger then final results
H
Haojun Liao 已提交
6689
    // TODO refactor
H
Haojun Liao 已提交
6690 6691 6692 6693 6694 6695 6696 6697
    int16_t bytes = 0;
    if (pQuery->pExpr2 == NULL || col > pQuery->numOfExpr2) {
      bytes = pExprs[col].bytes;
    } else {
      bytes = MAX(pQuery->pExpr2[col].bytes, pExprs[col].bytes);
    }

    size_t size = (size_t)((pQuery->rec.capacity + 1) * bytes + pExprs[col].interBytes + sizeof(tFilePage));
6698
    pQuery->sdata[col] = (tFilePage *)calloc(1, size);
6699
    if (pQuery->sdata[col] == NULL) {
6700
      goto _cleanup;
6701 6702 6703
    }
  }

6704
  if (pQuery->fillType != TSDB_FILL_NONE) {
6705 6706
    pQuery->fillVal = malloc(sizeof(int64_t) * pQuery->numOfOutput);
    if (pQuery->fillVal == NULL) {
6707
      goto _cleanup;
6708 6709 6710
    }

    // the first column is the timestamp
6711
    memcpy(pQuery->fillVal, (char *)pQueryMsg->fillVal, pQuery->numOfOutput * sizeof(int64_t));
6712 6713
  }

dengyihao's avatar
dengyihao 已提交
6714 6715 6716 6717 6718 6719
  size_t numOfGroups = 0;
  if (pTableGroupInfo->pGroupList != NULL) {
    numOfGroups = taosArrayGetSize(pTableGroupInfo->pGroupList);

    pQInfo->tableqinfoGroupInfo.pGroupList = taosArrayInit(numOfGroups, POINTER_BYTES);
    pQInfo->tableqinfoGroupInfo.numOfTables = pTableGroupInfo->numOfTables;
H
Haojun Liao 已提交
6720
    pQInfo->tableqinfoGroupInfo.map = taosHashInit(pTableGroupInfo->numOfTables,
H
Haojun Liao 已提交
6721
                                                   taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_NO_LOCK);
H
Haojun Liao 已提交
6722
  }
6723

H
Haojun Liao 已提交
6724
  pQInfo->runtimeEnv.interBufSize = getOutputInterResultBufSize(pQuery);
H
Haojun Liao 已提交
6725 6726
  pQInfo->runtimeEnv.summary.tableInfoSize += (pTableGroupInfo->numOfTables * sizeof(STableQueryInfo));

H
Haojun Liao 已提交
6727
  pQInfo->runtimeEnv.pResultRowHashTable = taosHashInit(pTableGroupInfo->numOfTables, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_NO_LOCK);
H
Haojun Liao 已提交
6728
  pQInfo->runtimeEnv.keyBuf = malloc(TSDB_MAX_BYTES_PER_ROW);
H
Haojun Liao 已提交
6729
  pQInfo->runtimeEnv.pool = initResultRowPool(getResultRowSize(&pQInfo->runtimeEnv));
6730 6731 6732 6733 6734 6735 6736 6737
  pQInfo->runtimeEnv.prevRow = malloc(POINTER_BYTES * pQuery->numOfCols + srcSize);

  char* start = POINTER_BYTES * pQuery->numOfCols + (char*) pQInfo->runtimeEnv.prevRow;
  pQInfo->runtimeEnv.prevRow[0] = start;

  for(int32_t i = 1; i < pQuery->numOfCols; ++i) {
    pQInfo->runtimeEnv.prevRow[i] = pQInfo->runtimeEnv.prevRow[i - 1] + pQuery->colList[i-1].bytes;
  }
H
Haojun Liao 已提交
6738

H
Haojun Liao 已提交
6739
  pQInfo->pBuf = calloc(pTableGroupInfo->numOfTables, sizeof(STableQueryInfo));
H
Haojun Liao 已提交
6740 6741 6742 6743
  if (pQInfo->pBuf == NULL) {
    goto _cleanup;
  }

H
Haojun Liao 已提交
6744
  // NOTE: pTableCheckInfo need to update the query time range and the lastKey info
6745
  pQInfo->arrTableIdInfo = taosHashInit(pTableGroupInfo->numOfTables, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false, HASH_NO_LOCK);
6746
  pQInfo->dataReady = QUERY_RESULT_NOT_READY;
H
Haojun Liao 已提交
6747
  pQInfo->rspContext = NULL;
6748
  pthread_mutex_init(&pQInfo->lock, NULL);
H
Haojun Liao 已提交
6749
  tsem_init(&pQInfo->ready, 0, 0);
6750 6751 6752 6753 6754

  pQuery->pos = -1;
  pQuery->window = pQueryMsg->window;
  changeExecuteScanOrder(pQInfo, pQueryMsg, stableQuery);

6755
  pQInfo->runtimeEnv.queryWindowIdentical = true;
6756
  STimeWindow window = pQuery->window;
H
Haojun Liao 已提交
6757

H
Haojun Liao 已提交
6758
  int32_t index = 0;
H
hjxilinx 已提交
6759
  for(int32_t i = 0; i < numOfGroups; ++i) {
6760
    SArray* pa = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, i);
6761

H
Haojun Liao 已提交
6762
    size_t s = taosArrayGetSize(pa);
6763
    SArray* p1 = taosArrayInit(s, POINTER_BYTES);
B
Bomin Zhang 已提交
6764 6765 6766
    if (p1 == NULL) {
      goto _cleanup;
    }
H
Haojun Liao 已提交
6767

Y
yihaoDeng 已提交
6768
    taosArrayPush(pQInfo->tableqinfoGroupInfo.pGroupList, &p1);
6769

H
hjxilinx 已提交
6770
    for(int32_t j = 0; j < s; ++j) {
H
Haojun Liao 已提交
6771
      STableKeyInfo* info = taosArrayGet(pa, j);
6772

6773
      window.skey = info->lastKey;
6774 6775 6776 6777 6778
      if (info->lastKey != pQuery->window.skey) {
        pQInfo->runtimeEnv.queryWindowIdentical = false;
      }

      void* buf = (char*) pQInfo->pBuf + index * sizeof(STableQueryInfo);
H
Haojun Liao 已提交
6779
      STableQueryInfo* item = createTableQueryInfo(&pQInfo->runtimeEnv, info->pTable, window, buf);
B
Bomin Zhang 已提交
6780 6781 6782
      if (item == NULL) {
        goto _cleanup;
      }
H
Haojun Liao 已提交
6783

6784
      item->groupIndex = i;
H
hjxilinx 已提交
6785
      taosArrayPush(p1, &item);
6786 6787

      STableId* id = TSDB_TABLEID(info->pTable);
H
Haojun Liao 已提交
6788 6789
      taosHashPut(pQInfo->tableqinfoGroupInfo.map, &id->tid, sizeof(id->tid), &item, POINTER_BYTES);
      index += 1;
H
hjxilinx 已提交
6790 6791
    }
  }
6792

6793
  colIdCheck(pQuery);
6794

6795
  qDebug("qmsg:%p QInfo:%p created", pQueryMsg, pQInfo);
6796 6797
  return pQInfo;

B
Bomin Zhang 已提交
6798
_cleanup_qinfo:
H
Haojun Liao 已提交
6799
  tsdbDestroyTableGroup(pTableGroupInfo);
B
Bomin Zhang 已提交
6800 6801

_cleanup_query:
6802 6803 6804 6805
  if (pGroupbyExpr != NULL) {
    taosArrayDestroy(pGroupbyExpr->columnInfo);
    free(pGroupbyExpr);
  }
H
Haojun Liao 已提交
6806

S
TD-1848  
Shengliang Guan 已提交
6807
  tfree(pTagCols);
B
Bomin Zhang 已提交
6808 6809 6810 6811 6812 6813
  for (int32_t i = 0; i < numOfOutput; ++i) {
    SExprInfo* pExprInfo = &pExprs[i];
    if (pExprInfo->pExpr != NULL) {
      tExprTreeDestroy(&pExprInfo->pExpr, NULL);
    }
  }
H
Haojun Liao 已提交
6814

S
TD-1848  
Shengliang Guan 已提交
6815
  tfree(pExprs);
B
Bomin Zhang 已提交
6816

6817
_cleanup:
dengyihao's avatar
dengyihao 已提交
6818
  freeQInfo(pQInfo);
6819 6820 6821
  return NULL;
}

H
hjxilinx 已提交
6822
static bool isValidQInfo(void *param) {
H
hjxilinx 已提交
6823 6824 6825 6826
  SQInfo *pQInfo = (SQInfo *)param;
  if (pQInfo == NULL) {
    return false;
  }
6827

H
hjxilinx 已提交
6828 6829 6830 6831
  /*
   * pQInfo->signature may be changed by another thread, so we assign value of signature
   * into local variable, then compare by using local variable
   */
6832
  uint64_t sig = (uint64_t)pQInfo->signature;
H
hjxilinx 已提交
6833 6834 6835
  return (sig == (uint64_t)pQInfo);
}

6836
static int32_t initQInfo(SQueryTableMsg *pQueryMsg, void *tsdb, int32_t vgId, SQInfo *pQInfo, bool isSTable) {
H
hjxilinx 已提交
6837
  int32_t code = TSDB_CODE_SUCCESS;
6838
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
6839

H
hjxilinx 已提交
6840
  STSBuf *pTSBuf = NULL;
6841
  if (pQueryMsg->tsLen > 0) { // open new file to save the result
H
Haojun Liao 已提交
6842
    char *tsBlock = (char *) pQueryMsg + pQueryMsg->tsOffset;
H
Haojun Liao 已提交
6843
    pTSBuf = tsBufCreateFromCompBlocks(tsBlock, pQueryMsg->tsNumOfBlocks, pQueryMsg->tsLen, pQueryMsg->tsOrder, vgId);
6844

H
hjxilinx 已提交
6845
    tsBufResetPos(pTSBuf);
dengyihao's avatar
dengyihao 已提交
6846
    bool ret = tsBufNextPos(pTSBuf);
6847

dengyihao's avatar
dengyihao 已提交
6848
    UNUSED(ret);
H
hjxilinx 已提交
6849
  }
Y
TD-1665  
yihaoDeng 已提交
6850 6851
  
  pQuery->precision = tsdbGetCfg(tsdb)->precision;
6852

6853 6854
  if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey > pQuery->window.ekey)) ||
      (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey > pQuery->window.skey))) {
6855
    qDebug("QInfo:%p no result in time range %" PRId64 "-%" PRId64 ", order %d", pQInfo, pQuery->window.skey,
6856
           pQuery->window.ekey, pQuery->order.order);
6857
    setQueryStatus(pQuery, QUERY_COMPLETED);
B
Bomin Zhang 已提交
6858
    pQInfo->tableqinfoGroupInfo.numOfTables = 0;
6859 6860
    return TSDB_CODE_SUCCESS;
  }
6861

6862
  if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
6863
    qDebug("QInfo:%p no table qualified for tag filter, abort query", pQInfo);
6864 6865 6866
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return TSDB_CODE_SUCCESS;
  }
H
hjxilinx 已提交
6867 6868

  // filter the qualified
6869
  if ((code = doInitQInfo(pQInfo, pTSBuf, tsdb, vgId, isSTable)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6870 6871
    goto _error;
  }
6872

H
hjxilinx 已提交
6873 6874 6875 6876
  return code;

_error:
  // table query ref will be decrease during error handling
6877
  freeQInfo(pQInfo);
H
hjxilinx 已提交
6878 6879 6880
  return code;
}

B
Bomin Zhang 已提交
6881
static void freeColumnFilterInfo(SColumnFilterInfo* pFilter, int32_t numOfFilters) {
H
Haojun Liao 已提交
6882
    if (pFilter == NULL || numOfFilters == 0) {
B
Bomin Zhang 已提交
6883 6884
      return;
    }
H
Haojun Liao 已提交
6885

B
Bomin Zhang 已提交
6886 6887 6888 6889 6890
    for (int32_t i = 0; i < numOfFilters; i++) {
      if (pFilter[i].filterstr) {
        free((void*)(pFilter[i].pz));
      }
    }
H
Haojun Liao 已提交
6891

B
Bomin Zhang 已提交
6892 6893 6894
    free(pFilter);
}

H
Haojun Liao 已提交
6895 6896
static void doDestroyTableQueryInfo(STableGroupInfo* pTableqinfoGroupInfo) {
  if (pTableqinfoGroupInfo->pGroupList != NULL) {
H
Haojun Liao 已提交
6897
    int32_t numOfGroups = (int32_t) taosArrayGetSize(pTableqinfoGroupInfo->pGroupList);
H
Haojun Liao 已提交
6898 6899 6900 6901 6902 6903 6904 6905 6906 6907 6908 6909 6910 6911 6912 6913 6914 6915 6916 6917 6918
    for (int32_t i = 0; i < numOfGroups; ++i) {
      SArray *p = taosArrayGetP(pTableqinfoGroupInfo->pGroupList, i);

      size_t num = taosArrayGetSize(p);
      for(int32_t j = 0; j < num; ++j) {
        STableQueryInfo* item = taosArrayGetP(p, j);
        destroyTableQueryInfoImpl(item);
      }

      taosArrayDestroy(p);
    }
  }

  taosArrayDestroy(pTableqinfoGroupInfo->pGroupList);
  taosHashCleanup(pTableqinfoGroupInfo->map);

  pTableqinfoGroupInfo->pGroupList = NULL;
  pTableqinfoGroupInfo->map = NULL;
  pTableqinfoGroupInfo->numOfTables = 0;
}

H
Haojun Liao 已提交
6919 6920 6921 6922 6923 6924 6925 6926 6927 6928 6929 6930 6931 6932 6933 6934
static void* destroyQueryFuncExpr(SExprInfo* pExprInfo, int32_t numOfExpr) {
  if (pExprInfo == NULL) {
    assert(numOfExpr == 0);
    return NULL;
  }

  for (int32_t i = 0; i < numOfExpr; ++i) {
    if (pExprInfo[i].pExpr != NULL) {
      tExprNodeDestroy(pExprInfo[i].pExpr, NULL);
    }
  }

  tfree(pExprInfo);
  return NULL;
}

H
hjxilinx 已提交
6935 6936 6937 6938
static void freeQInfo(SQInfo *pQInfo) {
  if (!isValidQInfo(pQInfo)) {
    return;
  }
6939

6940
  qDebug("QInfo:%p start to free QInfo", pQInfo);
6941

H
Haojun Liao 已提交
6942
  releaseQueryBuf(pQInfo->tableqinfoGroupInfo.numOfTables);
H
Haojun Liao 已提交
6943

6944
  teardownQueryRuntimeEnv(&pQInfo->runtimeEnv);
6945

H
Haojun Liao 已提交
6946 6947 6948 6949
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  if (pQuery != NULL) {
    if (pQuery->sdata != NULL) {
      for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
S
TD-1848  
Shengliang Guan 已提交
6950
        tfree(pQuery->sdata[col]);
H
Haojun Liao 已提交
6951
      }
S
TD-1848  
Shengliang Guan 已提交
6952
      tfree(pQuery->sdata);
H
hjxilinx 已提交
6953
    }
6954

H
Haojun Liao 已提交
6955
    if (pQuery->fillVal != NULL) {
S
TD-1848  
Shengliang Guan 已提交
6956
      tfree(pQuery->fillVal);
H
Haojun Liao 已提交
6957
    }
6958

H
Haojun Liao 已提交
6959 6960 6961
    for (int32_t i = 0; i < pQuery->numOfFilterCols; ++i) {
      SSingleColumnFilterInfo *pColFilter = &pQuery->pFilterInfo[i];
      if (pColFilter->numOfFilters > 0) {
S
TD-1848  
Shengliang Guan 已提交
6962
        tfree(pColFilter->pFilters);
H
hjxilinx 已提交
6963
      }
H
hjxilinx 已提交
6964
    }
6965

H
Haojun Liao 已提交
6966 6967
    pQuery->pExpr1 = destroyQueryFuncExpr(pQuery->pExpr1, pQuery->numOfOutput);
    pQuery->pExpr2 = destroyQueryFuncExpr(pQuery->pExpr2, pQuery->numOfExpr2);
6968

S
TD-1848  
Shengliang Guan 已提交
6969 6970
    tfree(pQuery->tagColList);
    tfree(pQuery->pFilterInfo);
H
Haojun Liao 已提交
6971 6972 6973 6974 6975 6976

    if (pQuery->colList != NULL) {
      for (int32_t i = 0; i < pQuery->numOfCols; i++) {
        SColumnInfo *column = pQuery->colList + i;
        freeColumnFilterInfo(column->filters, column->numOfFilters);
      }
S
TD-1848  
Shengliang Guan 已提交
6977
      tfree(pQuery->colList);
H
Haojun Liao 已提交
6978 6979
    }

H
Haojun Liao 已提交
6980 6981 6982 6983 6984
    if (pQuery->pGroupbyExpr != NULL) {
      taosArrayDestroy(pQuery->pGroupbyExpr->columnInfo);
      tfree(pQuery->pGroupbyExpr);
    }

S
TD-1848  
Shengliang Guan 已提交
6985
    tfree(pQuery);
H
hjxilinx 已提交
6986
  }
6987

H
Haojun Liao 已提交
6988
  doDestroyTableQueryInfo(&pQInfo->tableqinfoGroupInfo);
H
Haojun Liao 已提交
6989

S
TD-1848  
Shengliang Guan 已提交
6990
  tfree(pQInfo->pBuf);
H
Haojun Liao 已提交
6991
  tsdbDestroyTableGroup(&pQInfo->tableGroupInfo);
6992
  taosHashCleanup(pQInfo->arrTableIdInfo);
6993

6994 6995
  taosArrayDestroy(pQInfo->groupResInfo.pRows);

6996
  pQInfo->signature = 0;
6997

6998
  qDebug("QInfo:%p QInfo is freed", pQInfo);
6999

S
TD-1848  
Shengliang Guan 已提交
7000
  tfree(pQInfo);
H
hjxilinx 已提交
7001 7002
}

H
hjxilinx 已提交
7003
static size_t getResultSize(SQInfo *pQInfo, int64_t *numOfRows) {
7004 7005
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
hjxilinx 已提交
7006 7007 7008 7009 7010 7011 7012 7013 7014 7015 7016
  /*
   * get the file size and set the numOfRows to be the file size, since for tsComp query,
   * the returned row size is equalled to 1
   * TODO handle the case that the file is too large to send back one time
   */
  if (isTSCompQuery(pQuery) && (*numOfRows) > 0) {
    struct stat fstat;
    if (stat(pQuery->sdata[0]->data, &fstat) == 0) {
      *numOfRows = fstat.st_size;
      return fstat.st_size;
    } else {
S
slguan 已提交
7017
      qError("QInfo:%p failed to get file info, path:%s, reason:%s", pQInfo, pQuery->sdata[0]->data, strerror(errno));
H
hjxilinx 已提交
7018 7019 7020
      return 0;
    }
  } else {
7021
    return (size_t)(pQuery->rowSize * (*numOfRows));
7022
  }
H
hjxilinx 已提交
7023
}
7024

H
hjxilinx 已提交
7025 7026 7027
static int32_t doDumpQueryResult(SQInfo *pQInfo, char *data) {
  // the remained number of retrieved rows, not the interpolated result
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
7028

H
hjxilinx 已提交
7029 7030 7031
  // load data from file to msg buffer
  if (isTSCompQuery(pQuery)) {
    int32_t fd = open(pQuery->sdata[0]->data, O_RDONLY, 0666);
7032

H
hjxilinx 已提交
7033 7034
    // make sure file exist
    if (FD_VALID(fd)) {
7035 7036 7037
      uint64_t s = lseek(fd, 0, SEEK_END);

      qDebug("QInfo:%p ts comp data return, file:%s, size:%"PRId64, pQInfo, pQuery->sdata[0]->data, s);
H
Haojun Liao 已提交
7038
      if (lseek(fd, 0, SEEK_SET) >= 0) {
H
Haojun Liao 已提交
7039
        size_t sz = read(fd, data, (uint32_t) s);
7040 7041 7042
        if(sz < s) {  // todo handle error
          assert(0);
        }
H
Haojun Liao 已提交
7043
      } else {
7044
        UNUSED(s);
dengyihao's avatar
dengyihao 已提交
7045
      }
H
Haojun Liao 已提交
7046

H
hjxilinx 已提交
7047 7048 7049
      close(fd);
      unlink(pQuery->sdata[0]->data);
    } else {
dengyihao's avatar
dengyihao 已提交
7050
      // todo return the error code to client and handle invalid fd
S
slguan 已提交
7051
      qError("QInfo:%p failed to open tmp file to send ts-comp data to client, path:%s, reason:%s", pQInfo,
H
hjxilinx 已提交
7052
             pQuery->sdata[0]->data, strerror(errno));
dengyihao's avatar
dengyihao 已提交
7053
      if (fd != -1) {
7054
        close(fd);
dengyihao's avatar
dengyihao 已提交
7055
      }
H
hjxilinx 已提交
7056
    }
7057

H
hjxilinx 已提交
7058 7059 7060 7061
    // all data returned, set query over
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
      setQueryStatus(pQuery, QUERY_OVER);
    }
H
hjxilinx 已提交
7062
  } else {
S
TD-1057  
Shengliang Guan 已提交
7063
    doCopyQueryResultToMsg(pQInfo, (int32_t)pQuery->rec.rows, data);
7064
  }
7065

7066
  pQuery->rec.total += pQuery->rec.rows;
7067
  qDebug("QInfo:%p current numOfRes rows:%" PRId64 ", total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total);
7068

7069
  if (pQuery->limit.limit > 0 && pQuery->limit.limit == pQuery->rec.total) {
7070
    qDebug("QInfo:%p results limitation reached, limitation:%"PRId64, pQInfo, pQuery->limit.limit);
7071 7072
    setQueryStatus(pQuery, QUERY_OVER);
  }
7073

H
hjxilinx 已提交
7074
  return TSDB_CODE_SUCCESS;
7075 7076
}

7077 7078 7079 7080 7081 7082 7083
typedef struct SQueryMgmt {
  SCacheObj      *qinfoPool;      // query handle pool
  int32_t         vgId;
  bool            closed;
  pthread_mutex_t lock;
} SQueryMgmt;

7084
int32_t qCreateQueryInfo(void* tsdb, int32_t vgId, SQueryTableMsg* pQueryMsg, qinfo_t* pQInfo) {
7085
  assert(pQueryMsg != NULL && tsdb != NULL);
7086 7087

  int32_t code = TSDB_CODE_SUCCESS;
7088

H
Haojun Liao 已提交
7089 7090
  char            *tagCond      = NULL;
  char            *tbnameCond   = NULL;
7091
  SArray          *pTableIdList = NULL;
H
Haojun Liao 已提交
7092 7093 7094 7095 7096
  SSqlFuncMsg    **pExprMsg     = NULL;
  SSqlFuncMsg    **pSecExprMsg  = NULL;
  SExprInfo       *pExprs       = NULL;
  SExprInfo       *pSecExprs    = NULL;

7097 7098 7099
  SColIndex       *pGroupColIndex = NULL;
  SColumnInfo     *pTagColumnInfo = NULL;
  SSqlGroupbyExpr *pGroupbyExpr   = NULL;
7100

H
Haojun Liao 已提交
7101
  code = convertQueryMsg(pQueryMsg, &pTableIdList, &pExprMsg, &pSecExprMsg, &tagCond, &tbnameCond, &pGroupColIndex, &pTagColumnInfo);
7102
  if (code != TSDB_CODE_SUCCESS) {
B
Bomin Zhang 已提交
7103
    goto _over;
7104 7105
  }

H
hjxilinx 已提交
7106
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
7107
    qError("Invalid number of tables to query, numOfTables:%d", pQueryMsg->numOfTables);
7108
    code = TSDB_CODE_QRY_INVALID_MSG;
H
hjxilinx 已提交
7109
    goto _over;
7110 7111
  }

H
hjxilinx 已提交
7112
  if (pTableIdList == NULL || taosArrayGetSize(pTableIdList) == 0) {
S
slguan 已提交
7113
    qError("qmsg:%p, SQueryTableMsg wrong format", pQueryMsg);
7114
    code = TSDB_CODE_QRY_INVALID_MSG;
H
hjxilinx 已提交
7115
    goto _over;
7116 7117
  }

H
Haojun Liao 已提交
7118
  if ((code = createQueryFuncExprFromMsg(pQueryMsg, pQueryMsg->numOfOutput, &pExprs, pExprMsg, pTagColumnInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
7119
    goto _over;
7120 7121
  }

H
Haojun Liao 已提交
7122
  if (pSecExprMsg != NULL) {
H
Haojun Liao 已提交
7123
    if ((code = createQueryFuncExprFromMsg(pQueryMsg, pQueryMsg->secondStageOutput, &pSecExprs, pSecExprMsg, pTagColumnInfo)) != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
7124 7125 7126 7127
      goto _over;
    }
  }

dengyihao's avatar
dengyihao 已提交
7128
  pGroupbyExpr = createGroupbyExprFromMsg(pQueryMsg, pGroupColIndex, &code);
H
hjxilinx 已提交
7129
  if ((pGroupbyExpr == NULL && pQueryMsg->numOfGroupCols != 0) || code != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
7130
    goto _over;
7131
  }
7132

H
hjxilinx 已提交
7133
  bool isSTableQuery = false;
7134
  STableGroupInfo tableGroupInfo = {0};
7135 7136
  int64_t st = taosGetTimestampUs();

H
Haojun Liao 已提交
7137
  if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_TABLE_QUERY)) {
weixin_48148422's avatar
weixin_48148422 已提交
7138
    STableIdInfo *id = taosArrayGet(pTableIdList, 0);
H
Haojun Liao 已提交
7139

7140
    qDebug("qmsg:%p query normal table, uid:%"PRId64", tid:%d", pQueryMsg, id->uid, id->tid);
H
Haojun Liao 已提交
7141
    if ((code = tsdbGetOneTableGroup(tsdb, id->uid, pQueryMsg->window.skey, &tableGroupInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
7142
      goto _over;
7143
    }
H
Haojun Liao 已提交
7144
  } else if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY|TSDB_QUERY_TYPE_STABLE_QUERY)) {
7145
    isSTableQuery = true;
H
Haojun Liao 已提交
7146 7147 7148

    // also note there's possibility that only one table in the super table
    if (!TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY)) {
weixin_48148422's avatar
weixin_48148422 已提交
7149 7150 7151 7152 7153 7154 7155
      STableIdInfo *id = taosArrayGet(pTableIdList, 0);

      // group by normal column, do not pass the group by condition to tsdb to group table into different group
      int32_t numOfGroupByCols = pQueryMsg->numOfGroupCols;
      if (pQueryMsg->numOfGroupCols == 1 && !TSDB_COL_IS_TAG(pGroupColIndex->flag)) {
        numOfGroupByCols = 0;
      }
7156 7157

      qDebug("qmsg:%p query stable, uid:%"PRId64", tid:%d", pQueryMsg, id->uid, id->tid);
H
Haojun Liao 已提交
7158 7159 7160
      code = tsdbQuerySTableByTagCond(tsdb, id->uid, pQueryMsg->window.skey, tagCond, pQueryMsg->tagCondLen,
          pQueryMsg->tagNameRelType, tbnameCond, &tableGroupInfo, pGroupColIndex, numOfGroupByCols);

7161
      if (code != TSDB_CODE_SUCCESS) {
7162
        qError("qmsg:%p failed to query stable, reason: %s", pQueryMsg, tstrerror(code));
7163 7164
        goto _over;
      }
weixin_48148422's avatar
weixin_48148422 已提交
7165
    } else {
7166 7167 7168 7169
      code = tsdbGetTableGroupFromIdList(tsdb, pTableIdList, &tableGroupInfo);
      if (code != TSDB_CODE_SUCCESS) {
        goto _over;
      }
H
Haojun Liao 已提交
7170

S
TD-1057  
Shengliang Guan 已提交
7171
      qDebug("qmsg:%p query on %" PRIzu " tables in one group from client", pQueryMsg, tableGroupInfo.numOfTables);
7172
    }
7173 7174

    int64_t el = taosGetTimestampUs() - st;
S
TD-1057  
Shengliang Guan 已提交
7175
    qDebug("qmsg:%p tag filter completed, numOfTables:%" PRIzu ", elapsed time:%"PRId64"us", pQueryMsg, tableGroupInfo.numOfTables, el);
H
hjxilinx 已提交
7176
  } else {
7177
    assert(0);
7178
  }
7179

H
Haojun Liao 已提交
7180 7181 7182 7183 7184
  code = checkForQueryBuf(tableGroupInfo.numOfTables);
  if (code != TSDB_CODE_SUCCESS) {  // not enough query buffer, abort
    goto _over;
  }

H
Haojun Liao 已提交
7185
  (*pQInfo) = createQInfoImpl(pQueryMsg, pGroupbyExpr, pExprs, pSecExprs, &tableGroupInfo, pTagColumnInfo, isSTableQuery);
H
Haojun Liao 已提交
7186

dengyihao's avatar
dengyihao 已提交
7187
  pExprs = NULL;
H
Haojun Liao 已提交
7188
  pSecExprs = NULL;
dengyihao's avatar
dengyihao 已提交
7189 7190
  pGroupbyExpr = NULL;
  pTagColumnInfo = NULL;
7191

7192
  if ((*pQInfo) == NULL) {
7193
    code = TSDB_CODE_QRY_OUT_OF_MEMORY;
H
hjxilinx 已提交
7194
    goto _over;
7195
  }
7196

7197
  code = initQInfo(pQueryMsg, tsdb, vgId, *pQInfo, isSTableQuery);
7198

H
hjxilinx 已提交
7199
_over:
dengyihao's avatar
dengyihao 已提交
7200 7201 7202
  free(tagCond);
  free(tbnameCond);
  free(pGroupColIndex);
H
Haojun Liao 已提交
7203

dengyihao's avatar
dengyihao 已提交
7204 7205
  if (pGroupbyExpr != NULL) {
    taosArrayDestroy(pGroupbyExpr->columnInfo);
dengyihao's avatar
dengyihao 已提交
7206
    free(pGroupbyExpr);
7207
  }
H
Haojun Liao 已提交
7208

dengyihao's avatar
dengyihao 已提交
7209 7210
  free(pTagColumnInfo);
  free(pExprs);
H
Haojun Liao 已提交
7211 7212
  free(pSecExprs);

dengyihao's avatar
dengyihao 已提交
7213
  free(pExprMsg);
H
Haojun Liao 已提交
7214 7215
  free(pSecExprMsg);

H
hjxilinx 已提交
7216
  taosArrayDestroy(pTableIdList);
7217

B
Bomin Zhang 已提交
7218 7219 7220 7221 7222
  for (int32_t i = 0; i < pQueryMsg->numOfCols; i++) {
    SColumnInfo* column = pQueryMsg->colList + i;
    freeColumnFilterInfo(column->filters, column->numOfFilters);
  }

H
Haojun Liao 已提交
7223
  //pQInfo already freed in initQInfo, but *pQInfo may not pointer to null;
7224 7225 7226 7227
  if (code != TSDB_CODE_SUCCESS) {
    *pQInfo = NULL;
  }

7228
  // if failed to add ref for all tables in this query, abort current query
7229
  return code;
H
hjxilinx 已提交
7230 7231
}

H
Haojun Liao 已提交
7232
void qDestroyQueryInfo(qinfo_t qHandle) {
H
Haojun Liao 已提交
7233 7234 7235 7236 7237
  SQInfo* pQInfo = (SQInfo*) qHandle;
  if (!isValidQInfo(pQInfo)) {
    return;
  }

H
Haojun Liao 已提交
7238 7239 7240
  qDebug("QInfo:%p query completed", pQInfo);
  queryCostStatis(pQInfo);   // print the query cost summary
  freeQInfo(pQInfo);
H
Haojun Liao 已提交
7241 7242
}

7243 7244 7245 7246 7247 7248
static bool doBuildResCheck(SQInfo* pQInfo) {
  bool buildRes = false;

  pthread_mutex_lock(&pQInfo->lock);

  pQInfo->dataReady = QUERY_RESULT_READY;
7249
  buildRes = needBuildResAfterQueryComplete(pQInfo);
7250

H
Haojun Liao 已提交
7251 7252
  // clear qhandle owner, it must be in the secure area. other thread may run ahead before current, after it is
  // put into task to be executed.
7253 7254
  assert(pQInfo->owner == taosGetPthreadId());
  pQInfo->owner = 0;
H
Haojun Liao 已提交
7255

H
Haojun Liao 已提交
7256
  pthread_mutex_unlock(&pQInfo->lock);
H
Haojun Liao 已提交
7257

7258
  // used in retrieve blocking model.
H
Haojun Liao 已提交
7259
  tsem_post(&pQInfo->ready);
7260 7261 7262
  return buildRes;
}

7263
bool qTableQuery(qinfo_t qinfo) {
7264
  SQInfo *pQInfo = (SQInfo *)qinfo;
H
Haojun Liao 已提交
7265
  assert(pQInfo && pQInfo->signature == pQInfo);
7266
  int64_t threadId = taosGetPthreadId();
7267

7268 7269 7270 7271
  int64_t curOwner = 0;
  if ((curOwner = atomic_val_compare_exchange_64(&pQInfo->owner, 0, threadId)) != 0) {
    qError("QInfo:%p qhandle is now executed by thread:%p", pQInfo, (void*) curOwner);
    pQInfo->code = TSDB_CODE_QRY_IN_EXEC;
7272
    return false;
H
hjxilinx 已提交
7273
  }
7274

7275 7276 7277
  pQInfo->startExecTs = taosGetTimestampSec();

  if (isQueryKilled(pQInfo)) {
7278
    qDebug("QInfo:%p it is already killed, abort", pQInfo);
7279
    return doBuildResCheck(pQInfo);
H
hjxilinx 已提交
7280
  }
7281

7282 7283
  if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
    qDebug("QInfo:%p no table exists for query, abort", pQInfo);
7284 7285
    setQueryStatus(pQInfo->runtimeEnv.pQuery, QUERY_COMPLETED);
    return doBuildResCheck(pQInfo);
7286 7287 7288
  }

  // error occurs, record the error code and return to client
H
Haojun Liao 已提交
7289
  int32_t ret = setjmp(pQInfo->runtimeEnv.env);
7290 7291
  if (ret != TSDB_CODE_SUCCESS) {
    pQInfo->code = ret;
H
Haojun Liao 已提交
7292
    qDebug("QInfo:%p query abort due to error/cancel occurs, code:%s", pQInfo, tstrerror(pQInfo->code));
7293
    return doBuildResCheck(pQInfo);
7294 7295
  }

7296
  qDebug("QInfo:%p query task is launched", pQInfo);
7297

7298
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
7299
  if (onlyQueryTags(pQInfo->runtimeEnv.pQuery)) {
H
Haojun Liao 已提交
7300
    assert(pQInfo->runtimeEnv.pQueryHandle == NULL);
7301
    buildTagQueryResult(pQInfo);
H
hjxilinx 已提交
7302
  } else if (pQInfo->runtimeEnv.stableQuery) {
7303
    stableQueryImpl(pQInfo);
H
hjxilinx 已提交
7304
  } else {
7305
    tableQueryImpl(pQInfo);
H
hjxilinx 已提交
7306
  }
7307

7308
  SQuery* pQuery = pRuntimeEnv->pQuery;
7309
  if (isQueryKilled(pQInfo)) {
7310 7311
    qDebug("QInfo:%p query is killed", pQInfo);
  } else if (pQuery->rec.rows == 0) {
S
TD-1057  
Shengliang Guan 已提交
7312
    qDebug("QInfo:%p over, %" PRIzu " tables queried, %"PRId64" rows are returned", pQInfo, pQInfo->tableqinfoGroupInfo.numOfTables, pQuery->rec.total);
7313 7314 7315 7316 7317
  } else {
    qDebug("QInfo:%p query paused, %" PRId64 " rows returned, numOfTotal:%" PRId64 " rows",
           pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
  }

7318
  return doBuildResCheck(pQInfo);
H
hjxilinx 已提交
7319 7320
}

7321
int32_t qRetrieveQueryResultInfo(qinfo_t qinfo, bool* buildRes, void* pRspContext) {
7322 7323
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
7324
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
H
Haojun Liao 已提交
7325
    qError("QInfo:%p invalid qhandle", pQInfo);
7326
    return TSDB_CODE_QRY_INVALID_QHANDLE;
H
hjxilinx 已提交
7327
  }
7328

7329
  *buildRes = false;
H
Haojun Liao 已提交
7330
  if (IS_QUERY_KILLED(pQInfo)) {
B
Bomin Zhang 已提交
7331
    qDebug("QInfo:%p query is killed, code:0x%08x", pQInfo, pQInfo->code);
H
hjxilinx 已提交
7332
    return pQInfo->code;
H
hjxilinx 已提交
7333
  }
7334

7335
  int32_t code = TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
7336

H
Haojun Liao 已提交
7337
  if (tsRetrieveBlockingModel) {
7338
    pQInfo->rspContext = pRspContext;
H
Haojun Liao 已提交
7339
    tsem_wait(&pQInfo->ready);
7340
    *buildRes = true;
H
Haojun Liao 已提交
7341
    code = pQInfo->code;
7342
  } else {
H
Haojun Liao 已提交
7343
    SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
7344

H
Haojun Liao 已提交
7345 7346
    pthread_mutex_lock(&pQInfo->lock);

7347
    assert(pQInfo->rspContext == NULL);
H
Haojun Liao 已提交
7348 7349
    if (pQInfo->dataReady == QUERY_RESULT_READY) {
      *buildRes = true;
7350 7351
      qDebug("QInfo:%p retrieve result info, rowsize:%d, rows:%" PRId64 ", code:%s", pQInfo, pQuery->rowSize,
             pQuery->rec.rows, tstrerror(pQInfo->code));
H
Haojun Liao 已提交
7352 7353 7354 7355 7356 7357 7358 7359 7360 7361
    } else {
      *buildRes = false;
      qDebug("QInfo:%p retrieve req set query return result after paused", pQInfo);
      pQInfo->rspContext = pRspContext;
      assert(pQInfo->rspContext != NULL);
    }

    code = pQInfo->code;
    pthread_mutex_unlock(&pQInfo->lock);
  }
H
Haojun Liao 已提交
7362

7363
  return code;
H
hjxilinx 已提交
7364
}
7365

7366
int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *contLen, bool* continueExec) {
7367 7368
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
7369
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
7370
    return TSDB_CODE_QRY_INVALID_QHANDLE;
7371
  }
7372

7373
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
7374 7375
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  size_t  size = getResultSize(pQInfo, &pQuery->rec.rows);
7376

weixin_48148422's avatar
weixin_48148422 已提交
7377
  size += sizeof(int32_t);
7378
  size += sizeof(STableIdInfo) * taosHashGetSize(pQInfo->arrTableIdInfo);
7379

S
TD-1057  
Shengliang Guan 已提交
7380
  *contLen = (int32_t)(size + sizeof(SRetrieveTableRsp));
7381

B
Bomin Zhang 已提交
7382 7383
  // todo proper handle failed to allocate memory,
  // current solution only avoid crash, but cannot return error code to client
7384
  *pRsp = (SRetrieveTableRsp *)rpcMallocCont(*contLen);
B
Bomin Zhang 已提交
7385 7386 7387
  if (*pRsp == NULL) {
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
  }
7388

S
TD-1057  
Shengliang Guan 已提交
7389
  (*pRsp)->numOfRows = htonl((int32_t)pQuery->rec.rows);
7390

H
Haojun Liao 已提交
7391
  if (pQInfo->code == TSDB_CODE_SUCCESS) {
7392
    (*pRsp)->offset   = htobe64(pQuery->limit.offset);
7393
    (*pRsp)->useconds = htobe64(pRuntimeEnv->summary.elapsedTime);
7394
  } else {
7395 7396
    (*pRsp)->offset   = 0;
    (*pRsp)->useconds = htobe64(pRuntimeEnv->summary.elapsedTime);
7397
  }
7398

7399
  (*pRsp)->precision = htons(pQuery->precision);
H
Haojun Liao 已提交
7400 7401
  if (pQuery->rec.rows > 0 && pQInfo->code == TSDB_CODE_SUCCESS) {
    doDumpQueryResult(pQInfo, (*pRsp)->data);
7402
  } else {
H
hjxilinx 已提交
7403
    setQueryStatus(pQuery, QUERY_OVER);
7404
  }
7405

7406
  pQInfo->rspContext = NULL;
7407
  pQInfo->dataReady  = QUERY_RESULT_NOT_READY;
7408

H
Haojun Liao 已提交
7409
  if (IS_QUERY_KILLED(pQInfo) || Q_STATUS_EQUAL(pQuery->status, QUERY_OVER)) {
H
Haojun Liao 已提交
7410
    // here current thread hold the refcount, so it is safe to free tsdbQueryHandle.
7411
    *continueExec = false;
7412
    (*pRsp)->completed = 1;  // notify no more result to client
7413
  } else {
7414
    *continueExec = true;
7415
    qDebug("QInfo:%p has more results to retrieve", pQInfo);
7416 7417
  }

H
Haojun Liao 已提交
7418
  return pQInfo->code;
7419
}
H
hjxilinx 已提交
7420

7421 7422 7423 7424 7425 7426 7427 7428
int32_t qQueryCompleted(qinfo_t qinfo) {
  SQInfo *pQInfo = (SQInfo *)qinfo;

  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
    return TSDB_CODE_QRY_INVALID_QHANDLE;
  }

  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
7429
  return isQueryKilled(pQInfo) || Q_STATUS_EQUAL(pQuery->status, QUERY_OVER);
7430 7431
}

H
Haojun Liao 已提交
7432
int32_t qKillQuery(qinfo_t qinfo) {
H
Haojun Liao 已提交
7433 7434 7435 7436 7437 7438 7439
  SQInfo *pQInfo = (SQInfo *)qinfo;

  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
    return TSDB_CODE_QRY_INVALID_QHANDLE;
  }

  setQueryKilled(pQInfo);
7440 7441 7442

  // Wait for the query executing thread being stopped/
  // Once the query is stopped, the owner of qHandle will be cleared immediately.
7443
  while (pQInfo->owner != 0) {
7444 7445 7446
    taosMsleep(100);
  }

H
Haojun Liao 已提交
7447 7448 7449
  return TSDB_CODE_SUCCESS;
}

7450 7451 7452 7453 7454 7455 7456 7457 7458 7459 7460 7461 7462 7463 7464 7465
static void doSetTagValueToResultBuf(char* output, const char* val, int16_t type, int16_t bytes) {
  if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
    if (val == NULL) {
      setVardataNull(output, type);
    } else {
      memcpy(output, val, varDataTLen(val));
    }
  } else {
    if (val == NULL) {
      setNull(output, type, bytes);
    } else {  // todo here stop will cause client crash
      memcpy(output, val, bytes);
    }
  }
}

H
hjxilinx 已提交
7466 7467 7468
static void buildTagQueryResult(SQInfo* pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
7469

H
Haojun Liao 已提交
7470
  size_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
H
Haojun Liao 已提交
7471
  assert(numOfGroup == 0 || numOfGroup == 1);
7472

H
Haojun Liao 已提交
7473
  if (numOfGroup == 0) {
7474 7475
    return;
  }
7476

H
Haojun Liao 已提交
7477
  SArray* pa = GET_TABLEGROUP(pQInfo, 0);
7478

H
Haojun Liao 已提交
7479
  size_t num = taosArrayGetSize(pa);
7480
  assert(num == pQInfo->tableqinfoGroupInfo.numOfTables);
7481

H
Haojun Liao 已提交
7482
  int32_t count = 0;
H
Haojun Liao 已提交
7483
  int32_t functionId = pQuery->pExpr1[0].base.functionId;
7484 7485
  if (functionId == TSDB_FUNC_TID_TAG) { // return the tags & table Id
    assert(pQuery->numOfOutput == 1);
7486

H
Haojun Liao 已提交
7487
    SExprInfo* pExprInfo = &pQuery->pExpr1[0];
7488
    int32_t rsize = pExprInfo->bytes;
H
Haojun Liao 已提交
7489
    count = 0;
7490

H
Haojun Liao 已提交
7491 7492 7493 7494 7495 7496 7497 7498 7499 7500 7501
    int16_t bytes = pExprInfo->bytes;
    int16_t type = pExprInfo->type;

    for(int32_t i = 0; i < pQuery->numOfTags; ++i) {
      if (pQuery->tagColList[i].colId == pExprInfo->base.colInfo.colId) {
        bytes = pQuery->tagColList[i].bytes;
        type = pQuery->tagColList[i].type;
        break;
      }
    }

H
Haojun Liao 已提交
7502 7503
    while(pQInfo->tableIndex < num && count < pQuery->rec.capacity) {
      int32_t i = pQInfo->tableIndex++;
7504
      STableQueryInfo *item = taosArrayGetP(pa, i);
7505

7506
      char *output = pQuery->sdata[0]->data + count * rsize;
7507
      varDataSetLen(output, rsize - VARSTR_HEADER_SIZE);
7508

7509
      output = varDataVal(output);
H
Haojun Liao 已提交
7510
      STableId* id = TSDB_TABLEID(item->pTable);
7511

7512 7513 7514
      *(int16_t *)output = 0;
      output += sizeof(int16_t);

H
Haojun Liao 已提交
7515 7516
      *(int64_t *)output = id->uid;  // memory align problem, todo serialize
      output += sizeof(id->uid);
7517

H
Haojun Liao 已提交
7518 7519
      *(int32_t *)output = id->tid;
      output += sizeof(id->tid);
7520

7521
      *(int32_t *)output = pQInfo->vgId;
7522
      output += sizeof(pQInfo->vgId);
7523

7524
      if (pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
7525
        char* data = tsdbGetTableName(item->pTable);
7526
        memcpy(output, data, varDataTLen(data));
H
[td-90]  
Haojun Liao 已提交
7527
      } else {
7528 7529
        char* data = tsdbGetTableTagVal(item->pTable, pExprInfo->base.colInfo.colId, type, bytes);
        doSetTagValueToResultBuf(output, data, type, bytes);
H
[td-90]  
Haojun Liao 已提交
7530
      }
7531

H
Haojun Liao 已提交
7532
      count += 1;
7533
    }
7534

7535
    qDebug("QInfo:%p create (tableId, tag) info completed, rows:%d", pQInfo, count);
7536

H
Haojun Liao 已提交
7537 7538 7539 7540
  } else if (functionId == TSDB_FUNC_COUNT) {// handle the "count(tbname)" query
    *(int64_t*) pQuery->sdata[0]->data = num;

    count = 1;
H
Haojun Liao 已提交
7541
    SET_STABLE_QUERY_OVER(pQInfo);
7542
    qDebug("QInfo:%p create count(tbname) query, res:%d rows:1", pQInfo, count);
7543
  } else {  // return only the tags|table name etc.
H
Haojun Liao 已提交
7544
    count = 0;
H
Haojun Liao 已提交
7545
    SSchema tbnameSchema = tGetTableNameColumnSchema();
7546

S
TD-1057  
Shengliang Guan 已提交
7547
    int32_t maxNumOfTables = (int32_t)pQuery->rec.capacity;
7548
    if (pQuery->limit.limit >= 0 && pQuery->limit.limit < pQuery->rec.capacity) {
S
TD-1057  
Shengliang Guan 已提交
7549
      maxNumOfTables = (int32_t)pQuery->limit.limit;
7550 7551
    }

7552
    while(pQInfo->tableIndex < num && count < maxNumOfTables) {
H
Haojun Liao 已提交
7553
      int32_t i = pQInfo->tableIndex++;
7554

7555 7556 7557 7558 7559 7560
      // discard current result due to offset
      if (pQuery->limit.offset > 0) {
        pQuery->limit.offset -= 1;
        continue;
      }

H
Haojun Liao 已提交
7561
      SExprInfo* pExprInfo = pQuery->pExpr1;
7562
      STableQueryInfo* item = taosArrayGetP(pa, i);
7563

7564 7565
      char *data = NULL, *dst = NULL;
      int16_t type = 0, bytes = 0;
7566
      for(int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
7567
        // not assign value in case of user defined constant output column
Y
TD-1230  
yihaoDeng 已提交
7568
        if (TSDB_COL_IS_UD_COL(pExprInfo[j].base.colInfo.flag)) {
H
Haojun Liao 已提交
7569 7570
          continue;
        }
7571

7572
        if (pExprInfo[j].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
7573 7574 7575 7576 7577 7578 7579 7580
          bytes = tbnameSchema.bytes;
          type = tbnameSchema.type;

          data = tsdbGetTableName(item->pTable);
          dst = pQuery->sdata[j]->data + count * tbnameSchema.bytes;
        } else {
          type = pExprInfo[j].type;
          bytes = pExprInfo[j].bytes;
7581

7582 7583
          data = tsdbGetTableTagVal(item->pTable, pExprInfo[j].base.colInfo.colId, type, bytes);
          dst = pQuery->sdata[j]->data + count * pExprInfo[j].bytes;
7584

7585
        }
7586 7587

        doSetTagValueToResultBuf(dst, data, type, bytes);
H
hjxilinx 已提交
7588
      }
H
Haojun Liao 已提交
7589
      count += 1;
H
hjxilinx 已提交
7590
    }
7591

7592
    qDebug("QInfo:%p create tag values results completed, rows:%d", pQInfo, count);
H
hjxilinx 已提交
7593
  }
7594

H
Haojun Liao 已提交
7595
  pQuery->rec.rows = count;
H
hjxilinx 已提交
7596
  setQueryStatus(pQuery, QUERY_COMPLETED);
H
hjxilinx 已提交
7597 7598
}

H
Haojun Liao 已提交
7599
static int64_t getQuerySupportBufSize(size_t numOfTables) {
H
Haojun Liao 已提交
7600 7601 7602 7603
  size_t s1 = sizeof(STableQueryInfo);
  size_t s2 = sizeof(SHashNode);

//  size_t s3 = sizeof(STableCheckInfo);  buffer consumption in tsdb
H
Haojun Liao 已提交
7604
  return (int64_t)((s1 + s2) * 1.5 * numOfTables);
H
Haojun Liao 已提交
7605 7606
}

H
Haojun Liao 已提交
7607
int32_t checkForQueryBuf(size_t numOfTables) {
H
Haojun Liao 已提交
7608 7609
  int64_t t = getQuerySupportBufSize(numOfTables);
  if (tsQueryBufferSize < 0) {
H
Haojun Liao 已提交
7610
    return TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
7611 7612 7613 7614 7615 7616 7617 7618 7619 7620 7621 7622 7623 7624 7625 7626 7627 7628 7629
  } else if (tsQueryBufferSize > 0) {

    while(1) {
      int64_t s = tsQueryBufferSize;
      int64_t remain = s - t;
      if (remain >= 0) {
        if (atomic_val_compare_exchange_64(&tsQueryBufferSize, s, remain) == s) {
          return TSDB_CODE_SUCCESS;
        }
      } else {
        return TSDB_CODE_QRY_NOT_ENOUGH_BUFFER;
      }
    }
  }

  // disable query processing if the value of tsQueryBufferSize is zero.
  return TSDB_CODE_QRY_NOT_ENOUGH_BUFFER;
}

H
Haojun Liao 已提交
7630
void releaseQueryBuf(size_t numOfTables) {
H
Haojun Liao 已提交
7631 7632 7633 7634 7635 7636 7637 7638 7639 7640
  if (tsQueryBufferSize <= 0) {
    return;
  }

  int64_t t = getQuerySupportBufSize(numOfTables);

  // restore value is not enough buffer available
  atomic_add_fetch_64(&tsQueryBufferSize, t);
}

7641 7642 7643 7644 7645 7646 7647
void* qGetResultRetrieveMsg(qinfo_t qinfo) {
  SQInfo* pQInfo = (SQInfo*) qinfo;
  assert(pQInfo != NULL);

  return pQInfo->rspContext;
}

7648 7649 7650 7651 7652 7653 7654
void freeqinfoFn(void *qhandle) {
  void** handle = qhandle;
  if (handle == NULL || *handle == NULL) {
    return;
  }

  qKillQuery(*handle);
H
Haojun Liao 已提交
7655
  qDestroyQueryInfo(*handle);
7656 7657 7658
}

void* qOpenQueryMgmt(int32_t vgId) {
H
Haojun Liao 已提交
7659
  const int32_t REFRESH_HANDLE_INTERVAL = 30; // every 30 seconds, refresh handle pool
7660 7661 7662 7663

  char cacheName[128] = {0};
  sprintf(cacheName, "qhandle_%d", vgId);

7664
  SQueryMgmt* pQueryMgmt = calloc(1, sizeof(SQueryMgmt));
H
Haojun Liao 已提交
7665 7666 7667 7668
  if (pQueryMgmt == NULL) {
    terrno = TSDB_CODE_QRY_OUT_OF_MEMORY;
    return NULL;
  }
7669

S
TD-1530  
Shengliang Guan 已提交
7670
  pQueryMgmt->qinfoPool = taosCacheInit(TSDB_CACHE_PTR_KEY, REFRESH_HANDLE_INTERVAL, true, freeqinfoFn, cacheName);
7671 7672 7673 7674
  pQueryMgmt->closed    = false;
  pQueryMgmt->vgId      = vgId;

  pthread_mutex_init(&pQueryMgmt->lock, NULL);
7675 7676

  qDebug("vgId:%d, open querymgmt success", vgId);
7677
  return pQueryMgmt;
7678 7679
}

H
Haojun Liao 已提交
7680
static void queryMgmtKillQueryFn(void* handle) {
H
Hui Li 已提交
7681 7682
  void** fp = (void**)handle;
  qKillQuery(*fp);
H
Haojun Liao 已提交
7683 7684 7685
}

void qQueryMgmtNotifyClosed(void* pQMgmt) {
7686 7687 7688 7689 7690 7691 7692
  if (pQMgmt == NULL) {
    return;
  }

  SQueryMgmt* pQueryMgmt = pQMgmt;
  qDebug("vgId:%d, set querymgmt closed, wait for all queries cancelled", pQueryMgmt->vgId);

7693
  pthread_mutex_lock(&pQueryMgmt->lock);
7694
  pQueryMgmt->closed = true;
7695
  pthread_mutex_unlock(&pQueryMgmt->lock);
7696

H
Haojun Liao 已提交
7697
  taosCacheRefresh(pQueryMgmt->qinfoPool, queryMgmtKillQueryFn);
7698 7699 7700 7701 7702 7703 7704 7705 7706 7707 7708 7709 7710 7711 7712 7713 7714
}

void qCleanupQueryMgmt(void* pQMgmt) {
  if (pQMgmt == NULL) {
    return;
  }

  SQueryMgmt* pQueryMgmt = pQMgmt;
  int32_t vgId = pQueryMgmt->vgId;

  assert(pQueryMgmt->closed);

  SCacheObj* pqinfoPool = pQueryMgmt->qinfoPool;
  pQueryMgmt->qinfoPool = NULL;

  taosCacheCleanup(pqinfoPool);
  pthread_mutex_destroy(&pQueryMgmt->lock);
S
TD-1848  
Shengliang Guan 已提交
7715
  tfree(pQueryMgmt);
7716

S
Shengliang Guan 已提交
7717
  qDebug("vgId:%d, queryMgmt cleanup completed", vgId);
7718 7719
}

7720
void** qRegisterQInfo(void* pMgmt, uint64_t qInfo) {
7721
  if (pMgmt == NULL) {
7722
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
7723 7724 7725 7726 7727
    return NULL;
  }

  SQueryMgmt *pQueryMgmt = pMgmt;
  if (pQueryMgmt->qinfoPool == NULL) {
7728
    qError("QInfo:%p failed to add qhandle into qMgmt, since qMgmt is closed", (void *)qInfo);
7729
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
7730 7731 7732
    return NULL;
  }

7733
  pthread_mutex_lock(&pQueryMgmt->lock);
7734
  if (pQueryMgmt->closed) {
7735
    pthread_mutex_unlock(&pQueryMgmt->lock);
7736
    qError("QInfo:%p failed to add qhandle into cache, since qMgmt is colsing", (void *)qInfo);
7737
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
7738 7739
    return NULL;
  } else {
S
TD-1530  
Shengliang Guan 已提交
7740
    TSDB_CACHE_PTR_TYPE handleVal = (TSDB_CACHE_PTR_TYPE) qInfo;
7741 7742
    void** handle = taosCachePut(pQueryMgmt->qinfoPool, &handleVal, sizeof(TSDB_CACHE_PTR_TYPE), &qInfo, sizeof(TSDB_CACHE_PTR_TYPE),
        (getMaximumIdleDurationSec()*1000));
7743
    pthread_mutex_unlock(&pQueryMgmt->lock);
7744 7745 7746 7747 7748

    return handle;
  }
}

S
TD-1530  
Shengliang Guan 已提交
7749
void** qAcquireQInfo(void* pMgmt, uint64_t _key) {
7750 7751
  SQueryMgmt *pQueryMgmt = pMgmt;

B
Bomin Zhang 已提交
7752 7753 7754 7755 7756 7757 7758
  if (pQueryMgmt->closed) {
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
    return NULL;
  }

  if (pQueryMgmt->qinfoPool == NULL) {
    terrno = TSDB_CODE_QRY_INVALID_QHANDLE;
7759 7760 7761
    return NULL;
  }

S
TD-1530  
Shengliang Guan 已提交
7762 7763
  TSDB_CACHE_PTR_TYPE key = (TSDB_CACHE_PTR_TYPE)_key;
  void** handle = taosCacheAcquireByKey(pQueryMgmt->qinfoPool, &key, sizeof(TSDB_CACHE_PTR_TYPE));
7764
  if (handle == NULL || *handle == NULL) {
B
Bomin Zhang 已提交
7765
    terrno = TSDB_CODE_QRY_INVALID_QHANDLE;
7766 7767 7768 7769 7770 7771
    return NULL;
  } else {
    return handle;
  }
}

H
Haojun Liao 已提交
7772
void** qReleaseQInfo(void* pMgmt, void* pQInfo, bool freeHandle) {
7773 7774 7775 7776 7777
  SQueryMgmt *pQueryMgmt = pMgmt;
  if (pQueryMgmt->qinfoPool == NULL) {
    return NULL;
  }

H
Haojun Liao 已提交
7778
  taosCacheRelease(pQueryMgmt->qinfoPool, pQInfo, freeHandle);
7779
  return 0;
7780
}