qExecutor.c 268.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
#include "os.h"
H
Haojun Liao 已提交
16 17
#include "qFill.h"
#include "taosmsg.h"
18 19
#include "tcache.h"
#include "tglobal.h"
20

H
Haojun Liao 已提交
21
#include "exception.h"
22
#include "hash.h"
H
Haojun Liao 已提交
23 24 25 26
#include "qAst.h"
#include "qExecutor.h"
#include "qResultbuf.h"
#include "qUtil.h"
H
hjxilinx 已提交
27
#include "query.h"
S
slguan 已提交
28
#include "queryLog.h"
29
#include "tlosertree.h"
30
#include "ttype.h"
Y
yihaoDeng 已提交
31
#include "tcompare.h"
32

H
Haojun Liao 已提交
33
#define MAX_ROWS_PER_RESBUF_PAGE  ((1u<<12) - 1)
34 35 36 37 38

/**
 * check if the primary column is load by default, otherwise, the program will
 * forced to load primary column explicitly.
 */
H
Haojun Liao 已提交
39
#define Q_STATUS_EQUAL(p, s)  (((p) & (s)) != 0u)
40 41
#define QUERY_IS_ASC_QUERY(q) (GET_FORWARD_DIRECTION_FACTOR((q)->order.order) == QUERY_ASC_FORWARD_STEP)

42
#define IS_MASTER_SCAN(runtime)        ((runtime)->scanFlag == MASTER_SCAN)
H
hjxilinx 已提交
43
#define IS_REVERSE_SCAN(runtime)       ((runtime)->scanFlag == REVERSE_SCAN)
44
#define SET_MASTER_SCAN_FLAG(runtime)  ((runtime)->scanFlag = MASTER_SCAN)
H
hjxilinx 已提交
45
#define SET_REVERSE_SCAN_FLAG(runtime) ((runtime)->scanFlag = REVERSE_SCAN)
46

H
Haojun Liao 已提交
47
#define GET_QINFO_ADDR(x) ((SQInfo *)((char *)(x)-offsetof(SQInfo, runtimeEnv)))
48

49
#define GET_COL_DATA_POS(query, index, step) ((query)->pos + (index) * (step))
50
#define SWITCH_ORDER(n) (((n) = ((n) == TSDB_ORDER_ASC) ? TSDB_ORDER_DESC : TSDB_ORDER_ASC))
51

H
Haojun Liao 已提交
52 53
#define SDATA_BLOCK_INITIALIZER (SDataBlockInfo) {{0}, 0}

S
TD-1057  
Shengliang Guan 已提交
54
#define TIME_WINDOW_COPY(_dst, _src)  do {\
H
Haojun Liao 已提交
55 56
   (_dst).skey = (_src).skey;\
   (_dst).ekey = (_src).ekey;\
H
Haojun Liao 已提交
57
} while (0)
S
TD-1057  
Shengliang Guan 已提交
58

59
enum {
H
hjxilinx 已提交
60
  // when query starts to execute, this status will set
61 62
  QUERY_NOT_COMPLETED = 0x1u,

H
hjxilinx 已提交
63 64
  /* result output buffer is full, current query is paused.
   * this status is only exist in group-by clause and diff/add/division/multiply/ query.
65
   */
66 67
  QUERY_RESBUF_FULL = 0x2u,

H
hjxilinx 已提交
68 69 70
  /* query is over
   * 1. this status is used in one row result query process, e.g., count/sum/first/last/ avg...etc.
   * 2. when all data within queried time window, it is also denoted as query_completed
71
   */
72
  QUERY_COMPLETED = 0x4u,
73

H
hjxilinx 已提交
74 75
  /* when the result is not completed return to client, this status will be
   * usually used in case of interval query with interpolation option
76
   */
77
  QUERY_OVER = 0x8u,
78
};
79 80

enum {
81 82
  TS_JOIN_TS_EQUAL       = 0,
  TS_JOIN_TS_NOT_EQUALS  = 1,
83 84 85
  TS_JOIN_TAG_NOT_EQUALS = 2,
};

86
typedef struct {
87 88 89 90 91
  int32_t     status;       // query status
  TSKEY       lastKey;      // the lastKey value before query executed
  STimeWindow w;            // whole query time window
  int32_t     windowIndex;  // index of active time window result for interval query
  STSCursor   cur;
92 93
} SQueryStatusInfo;

Y
yihaoDeng 已提交
94 95 96 97 98 99 100
typedef struct {
  SArray  *dataBlockInfos; 
  int64_t firstSeekTimeUs; 
  int64_t numOfRowsInMemTable;
  char    *result;
} STableBlockDist;

H
Haojun Liao 已提交
101
#if 0
H
Haojun Liao 已提交
102
static UNUSED_FUNC void *u_malloc (size_t __size) {
H
Haojun Liao 已提交
103
  uint32_t v = rand();
H
Haojun Liao 已提交
104 105

  if (v % 1000 <= 0) {
H
Haojun Liao 已提交
106 107
    return NULL;
  } else {
H
Haojun Liao 已提交
108
    return malloc(__size);
H
Haojun Liao 已提交
109
  }
H
Haojun Liao 已提交
110 111
}

H
Haojun Liao 已提交
112 113
static UNUSED_FUNC void* u_calloc(size_t num, size_t __size) {
  uint32_t v = rand();
H
Haojun Liao 已提交
114
  if (v % 1000 <= 0) {
H
Haojun Liao 已提交
115 116 117 118 119 120
    return NULL;
  } else {
    return calloc(num, __size);
  }
}

H
Haojun Liao 已提交
121 122 123 124 125 126 127 128 129
static UNUSED_FUNC void* u_realloc(void* p, size_t __size) {
  uint32_t v = rand();
  if (v % 5 <= 1) {
    return NULL;
  } else {
    return realloc(p, __size);
  }
}

H
Haojun Liao 已提交
130
#define calloc  u_calloc
H
Haojun Liao 已提交
131
#define malloc  u_malloc
H
Haojun Liao 已提交
132
#define realloc u_realloc
H
Haojun Liao 已提交
133
#endif
H
Haojun Liao 已提交
134

135
#define CLEAR_QUERY_STATUS(q, st)   ((q)->status &= (~(st)))
H
Haojun Liao 已提交
136 137
#define GET_NUM_OF_TABLEGROUP(q)    taosArrayGetSize((q)->tableqinfoGroupInfo.pGroupList)
#define GET_TABLEGROUP(q, _index)   ((SArray*) taosArrayGetP((q)->tableqinfoGroupInfo.pGroupList, (_index)))
138
#define QUERY_IS_INTERVAL_QUERY(_q) ((_q)->interval.interval > 0)
H
Haojun Liao 已提交
139

140
static void setQueryStatus(SQuery *pQuery, int8_t status);
H
Haojun Liao 已提交
141
static void finalizeQueryResult(SQueryRuntimeEnv *pRuntimeEnv);
142

143 144 145
static int32_t getMaximumIdleDurationSec() {
  return tsShellActivityTimer * 2;
}
146

147 148
static void getNextTimeWindow(SQuery* pQuery, STimeWindow* tw) {
  int32_t factor = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
149 150 151
  if (pQuery->interval.intervalUnit != 'n' && pQuery->interval.intervalUnit != 'y') {
    tw->skey += pQuery->interval.sliding * factor;
    tw->ekey = tw->skey + pQuery->interval.interval - 1;
152 153 154
    return;
  }

155
  int64_t key = tw->skey / 1000, interval = pQuery->interval.interval;
156 157 158
  if (pQuery->precision == TSDB_TIME_PRECISION_MICRO) {
    key /= 1000;
  }
159
  if (pQuery->interval.intervalUnit == 'y') {
160 161
    interval *= 12;
  }
162 163 164 165 166

  struct tm tm;
  time_t t = (time_t)key;
  localtime_r(&t, &tm);

S
Shengliang Guan 已提交
167
  int mon = (int)(tm.tm_year * 12 + tm.tm_mon + interval * factor);
168 169 170 171
  tm.tm_year = mon / 12;
  tm.tm_mon = mon % 12;
  tw->skey = mktime(&tm) * 1000L;

S
Shengliang Guan 已提交
172
  mon = (int)(mon + interval);
173 174 175 176 177 178 179 180 181 182 183
  tm.tm_year = mon / 12;
  tm.tm_mon = mon % 12;
  tw->ekey = mktime(&tm) * 1000L;

  if (pQuery->precision == TSDB_TIME_PRECISION_MICRO) {
    tw->skey *= 1000L;
    tw->ekey *= 1000L;
  }
  tw->ekey -= 1;
}

184 185
#define SET_STABLE_QUERY_OVER(_q) ((_q)->tableIndex = (int32_t)((_q)->tableqinfoGroupInfo.numOfTables))
#define IS_STASBLE_QUERY_OVER(_q) ((_q)->tableIndex >= (int32_t)((_q)->tableqinfoGroupInfo.numOfTables))
H
Haojun Liao 已提交
186

H
hjxilinx 已提交
187
// todo move to utility
188
static int32_t mergeIntoGroupResultImpl(SGroupResInfo* pGroupResInfo, SArray *pTableList, SQInfo* pQInfo);
189

190
static void setResultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SResultRow *pResult);
H
Haojun Liao 已提交
191
static void setResultRowOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SResultRow *pResult);
192
static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId);
193

194
static void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
H
Haojun Liao 已提交
195
                          SDataStatis *pStatis, void *param, int32_t colIndex, int32_t vgId);
196

197
static void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
H
Haojun Liao 已提交
198
static void destroyTableQueryInfoImpl(STableQueryInfo *pTableQueryInfo);
H
Haojun Liao 已提交
199
static void resetDefaultResInfoOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
200
static bool hasMainOutput(SQuery *pQuery);
H
hjxilinx 已提交
201
static void buildTagQueryResult(SQInfo *pQInfo);
202

203
static int32_t setAdditionalInfo(SQInfo *pQInfo, void *pTable, STableQueryInfo *pTableQueryInfo);
H
Haojun Liao 已提交
204 205
static int32_t checkForQueryBuf(size_t numOfTables);
static void releaseQueryBuf(size_t numOfTables);
206
static int32_t binarySearchForKey(char *pValue, int num, TSKEY key, int order);
H
Haojun Liao 已提交
207
static void doRowwiseTimeWindowInterpolation(SQueryRuntimeEnv* pRuntimeEnv, SArray* pDataBlock, TSKEY prevTs, int32_t prevRowIndex, TSKEY curTs, int32_t curRowIndex, TSKEY windowKey, int32_t type);
H
Haojun Liao 已提交
208
static STsdbQueryCond createTsdbQueryCond(SQuery* pQuery, STimeWindow* win);
209
static STableIdInfo createTableIdInfo(SQuery* pQuery);
210

211
bool doFilterData(SQuery *pQuery, int32_t elemPos) {
212 213
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
214

S
TD-1057  
Shengliang Guan 已提交
215
    char *pElem = (char*)pFilterInfo->pData + pFilterInfo->info.bytes * elemPos;
216

217 218
    bool qualified = false;
    for (int32_t j = 0; j < pFilterInfo->numOfFilters; ++j) {
219
      SColumnFilterElem *pFilterElem = &pFilterInfo->pFilters[j];
220

H
Haojun Liao 已提交
221 222
      bool isnull = isNull(pElem, pFilterInfo->info.type);
      if (isnull) {
223
        if (pFilterElem->fp == isNullOperator) {
H
Haojun Liao 已提交
224 225 226 227 228 229
          qualified = true;
          break;
        } else {
          continue;
        }
      } else {
230
        if (pFilterElem->fp == notNullOperator) {
H
Haojun Liao 已提交
231 232
          qualified = true;
          break;
233
        } else if (pFilterElem->fp == isNullOperator) {
H
Haojun Liao 已提交
234 235 236 237
          continue;
        }
      }

238
      if (pFilterElem->fp(pFilterElem, pElem, pElem, pFilterInfo->info.type)) {
239 240 241 242
        qualified = true;
        break;
      }
    }
243

244 245 246 247
    if (!qualified) {
      return false;
    }
  }
248

249 250 251 252 253 254
  return true;
}

int64_t getNumOfResult(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  bool    hasMainFunction = hasMainOutput(pQuery);
255

256
  int64_t maxOutput = 0;
257
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
258
    int32_t functionId = pQuery->pExpr1[j].base.functionId;
259

260 261 262 263 264 265 266 267
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (hasMainFunction &&
        (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ)) {
      continue;
    }
268

H
Haojun Liao 已提交
269
    SResultRowCellInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
270 271 272 273
    if (pResInfo != NULL && maxOutput < pResInfo->numOfRes) {
      maxOutput = pResInfo->numOfRes;
    }
  }
274

275
  assert(maxOutput >= 0);
276 277 278
  return maxOutput;
}

279 280 281 282 283
/*
 * the value of number of result needs to be update due to offset value upated.
 */
void updateNumOfResult(SQueryRuntimeEnv *pRuntimeEnv, int32_t numOfRes) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
284

285
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
286
    SResultRowCellInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
287

H
Haojun Liao 已提交
288 289 290 291 292
    int16_t functionId = pRuntimeEnv->pCtx[j].functionId;
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ ||
        functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
293

H
Haojun Liao 已提交
294
    assert(pResInfo->numOfRes > numOfRes);
295 296 297 298
    pResInfo->numOfRes = numOfRes;
  }
}

299
static UNUSED_FUNC int32_t getMergeResultGroupId(int32_t groupIndex) {
H
Haojun Liao 已提交
300
  int32_t base = 50000000;
301 302 303 304 305 306 307
  return base + (groupIndex * 10000);
}

bool isGroupbyNormalCol(SSqlGroupbyExpr *pGroupbyExpr) {
  if (pGroupbyExpr == NULL || pGroupbyExpr->numOfGroupCols == 0) {
    return false;
  }
308

309
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
310
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
Y
TD-1230  
yihaoDeng 已提交
311
    if (TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
312
      //make sure the normal column locates at the second position if tbname exists in group by clause
313
      if (pGroupbyExpr->numOfGroupCols > 1) {
314
        assert(pColIndex->colIndex > 0);
315
      }
316

317 318 319
      return true;
    }
  }
320

321 322 323 324 325
  return false;
}

int16_t getGroupbyColumnType(SQuery *pQuery, SSqlGroupbyExpr *pGroupbyExpr) {
  assert(pGroupbyExpr != NULL);
326

327 328
  int32_t colId = -2;
  int16_t type = TSDB_DATA_TYPE_NULL;
329

330
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
331
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
Y
TD-1230  
yihaoDeng 已提交
332
    if (TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
333 334 335 336
      colId = pColIndex->colId;
      break;
    }
  }
337

338
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
339 340
    if (colId == pQuery->colList[i].colId) {
      type = pQuery->colList[i].type;
341 342 343
      break;
    }
  }
344

345 346 347 348 349 350
  return type;
}

bool isSelectivityWithTagsQuery(SQuery *pQuery) {
  bool    hasTags = false;
  int32_t numOfSelectivity = 0;
351

352
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
353
    int32_t functId = pQuery->pExpr1[i].base.functionId;
354 355 356 357
    if (functId == TSDB_FUNC_TAG_DUMMY || functId == TSDB_FUNC_TS_DUMMY) {
      hasTags = true;
      continue;
    }
358

359 360 361 362
    if ((aAggs[functId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
      numOfSelectivity++;
    }
  }
363

364 365 366
  if (numOfSelectivity > 0 && hasTags) {
    return true;
  }
367

368 369 370
  return false;
}

371 372
bool isProjQuery(SQuery *pQuery) {
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
373
    int32_t functId = pQuery->pExpr1[i].base.functionId;
374 375 376 377 378 379 380 381
    if (functId != TSDB_FUNC_PRJ && functId != TSDB_FUNC_TAGPRJ) {
      return false;
    }
  }

  return true;
}

H
Haojun Liao 已提交
382
bool isTSCompQuery(SQuery *pQuery) { return pQuery->pExpr1[0].base.functionId == TSDB_FUNC_TS_COMP; }
383

384 385 386
static bool limitResults(SQueryRuntimeEnv* pRuntimeEnv) {
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery* pQuery = pRuntimeEnv->pQuery;
387

388 389
  if ((pQuery->limit.limit > 0) && (pQuery->rec.total + pQuery->rec.rows > pQuery->limit.limit)) {
    pQuery->rec.rows = pQuery->limit.limit - pQuery->rec.total;
390

391
    qDebug("QInfo:%p discard remain data due to result limitation, limit:%"PRId64", current return:%" PRId64 ", total:%"PRId64,
392 393
        pQInfo, pQuery->limit.limit, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
    assert(pQuery->rec.rows >= 0);
394 395 396
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return true;
  }
397

398 399 400 401
  return false;
}

static bool isTopBottomQuery(SQuery *pQuery) {
402
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
403
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
404 405 406
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
407

408 409 410 411
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      return true;
    }
  }
412

413 414 415
  return false;
}

416 417 418 419 420 421 422 423 424 425 426
static bool timeWindowInterpoRequired(SQuery *pQuery) {
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
    if (functionId == TSDB_FUNC_TWA) {
      return true;
    }
  }

  return false;
}

H
Haojun Liao 已提交
427
static bool hasTagValOutput(SQuery* pQuery) {
H
Haojun Liao 已提交
428
  SExprInfo *pExprInfo = &pQuery->pExpr1[0];
H
Haojun Liao 已提交
429 430 431 432
  if (pQuery->numOfOutput == 1 && pExprInfo->base.functionId == TSDB_FUNC_TS_COMP) {
    return true;
  } else {  // set tag value, by which the results are aggregated.
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
H
Haojun Liao 已提交
433
      SExprInfo *pLocalExprInfo = &pQuery->pExpr1[idx];
H
Haojun Liao 已提交
434 435 436 437 438 439 440 441 442 443 444

      // ts_comp column required the tag value for join filter
      if (TSDB_COL_IS_TAG(pLocalExprInfo->base.colInfo.flag)) {
        return true;
      }
    }
  }

  return false;
}

445 446 447 448 449 450 451 452
/**
 * @param pQuery
 * @param col
 * @param pDataBlockInfo
 * @param pStatis
 * @param pColStatis
 * @return
 */
H
Haojun Liao 已提交
453
static bool hasNullValue(SColIndex* pColIndex, SDataStatis *pStatis, SDataStatis **pColStatis) {
H
Haojun Liao 已提交
454
  if (pStatis != NULL && TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
H
Haojun Liao 已提交
455 456
    *pColStatis = &pStatis[pColIndex->colIndex];
    assert((*pColStatis)->colId == pColIndex->colId);
H
hjxilinx 已提交
457 458
  } else {
    *pColStatis = NULL;
459
  }
460

H
Haojun Liao 已提交
461
  if (TSDB_COL_IS_TAG(pColIndex->flag) || TSDB_COL_IS_UD_COL(pColIndex->flag) || pColIndex->colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
H
Haojun Liao 已提交
462 463 464
    return false;
  }

465 466 467
  if ((*pColStatis) != NULL && (*pColStatis)->numOfNull == 0) {
    return false;
  }
468

469 470 471
  return true;
}

H
Haojun Liao 已提交
472
static SResultRow *doPrepareResultRowFromKey(SQueryRuntimeEnv *pRuntimeEnv, SResultRowInfo *pResultRowInfo, char *pData,
H
Haojun Liao 已提交
473
                                             int16_t bytes, bool masterscan, uint64_t uid) {
474
  bool existed = false;
H
Haojun Liao 已提交
475
  SET_RES_WINDOW_KEY(pRuntimeEnv->keyBuf, pData, bytes, uid);
476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494

  SResultRow **p1 =
      (SResultRow **)taosHashGet(pRuntimeEnv->pResultRowHashTable, pRuntimeEnv->keyBuf, GET_RES_WINDOW_KEY_LEN(bytes));

  // in case of repeat scan/reverse scan, no new time window added.
  if (QUERY_IS_INTERVAL_QUERY(pRuntimeEnv->pQuery)) {
    if (!masterscan) {  // the *p1 may be NULL in case of sliding+offset exists.
      return (p1 != NULL)? *p1:NULL;
    }

    if (p1 != NULL) {
      for(int32_t i = pResultRowInfo->size - 1; i >= 0; --i) {
        if (pResultRowInfo->pResult[i] == (*p1)) {
          pResultRowInfo->curIndex = i;
          existed = true;
          break;
        }
      }
    }
495
  } else {
496 497
    if (p1 != NULL) {  // group by column query
      return *p1;
H
Haojun Liao 已提交
498
    }
499
  }
500

501
  if (!existed) {
H
Haojun Liao 已提交
502
    // TODO refactor
H
Haojun Liao 已提交
503
    // more than the capacity, reallocate the resources
H
Haojun Liao 已提交
504
    if (pResultRowInfo->size >= pResultRowInfo->capacity) {
H
Haojun Liao 已提交
505
      int64_t newCapacity = 0;
H
Haojun Liao 已提交
506 507
      if (pResultRowInfo->capacity > 10000) {
        newCapacity = (int64_t)(pResultRowInfo->capacity * 1.25);
508
      } else {
H
Haojun Liao 已提交
509
        newCapacity = (int64_t)(pResultRowInfo->capacity * 1.5);
510 511
      }

H
Haojun Liao 已提交
512
      char *t = realloc(pResultRowInfo->pResult, (size_t)(newCapacity * POINTER_BYTES));
513 514
      if (t == NULL) {
        longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
515
      }
516

H
Haojun Liao 已提交
517
      pResultRowInfo->pResult = (SResultRow **)t;
518

H
Haojun Liao 已提交
519 520
      int32_t inc = (int32_t)newCapacity - pResultRowInfo->capacity;
      memset(&pResultRowInfo->pResult[pResultRowInfo->capacity], 0, POINTER_BYTES * inc);
521

H
Haojun Liao 已提交
522
      pResultRowInfo->capacity = (int32_t)newCapacity;
523
    }
524

525 526 527 528 529 530 531 532 533 534 535 536 537
    SResultRow *pResult = NULL;

    if (p1 == NULL) {
      pResult = getNewResultRow(pRuntimeEnv->pool);
      int32_t ret = initResultRow(pResult);
      if (ret != TSDB_CODE_SUCCESS) {
        longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
      }

      // add a new result set for a new group
      taosHashPut(pRuntimeEnv->pResultRowHashTable, pRuntimeEnv->keyBuf, GET_RES_WINDOW_KEY_LEN(bytes), &pResult, POINTER_BYTES);
    } else {
      pResult = *p1;
538
    }
H
Haojun Liao 已提交
539

540
    pResultRowInfo->pResult[pResultRowInfo->size] = pResult;
H
Haojun Liao 已提交
541
    pResultRowInfo->curIndex = pResultRowInfo->size++;
542
  }
543

544
  // too many time window in query
H
Haojun Liao 已提交
545
  if (pResultRowInfo->size > MAX_INTERVAL_TIME_WINDOW) {
546 547 548
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_TOO_MANY_TIMEWINDOW);
  }

H
Haojun Liao 已提交
549
  return getResultRow(pResultRowInfo, pResultRowInfo->curIndex);
550 551 552
}

// get the correct time window according to the handled timestamp
H
Haojun Liao 已提交
553
static STimeWindow getActiveTimeWindow(SResultRowInfo *pWindowResInfo, int64_t ts, SQuery *pQuery) {
554
  STimeWindow w = {0};
555

556
 if (pWindowResInfo->curIndex == -1) {  // the first window, from the previous stored value
557
    w.skey = pWindowResInfo->prevSKey;
558
    if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
559
      w.ekey = taosTimeAdd(w.skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
560
    } else {
561
      w.ekey = w.skey + pQuery->interval.interval - 1;
562
    }
563
  } else {
564
    int32_t slot = curTimeWindowIndex(pWindowResInfo);
565
    SResultRow* pWindowRes = getResultRow(pWindowResInfo, slot);
566
    w = pWindowRes->win;
567
  }
568

569
  if (w.skey > ts || w.ekey < ts) {
570 571 572
    if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
      w.skey = taosTimeTruncate(ts, &pQuery->interval, pQuery->precision);
      w.ekey = taosTimeAdd(w.skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
573 574
    } else {
      int64_t st = w.skey;
575

576
      if (st > ts) {
577
        st -= ((st - ts + pQuery->interval.sliding - 1) / pQuery->interval.sliding) * pQuery->interval.sliding;
578
      }
579

580
      int64_t et = st + pQuery->interval.interval - 1;
581
      if (et < ts) {
582
        st += ((ts - et + pQuery->interval.sliding - 1) / pQuery->interval.sliding) * pQuery->interval.sliding;
583
      }
584

585
      w.skey = st;
586
      w.ekey = w.skey + pQuery->interval.interval - 1;
587
    }
588
  }
589

590 591 592 593 594 595 596
  /*
   * query border check, skey should not be bounded by the query time range, since the value skey will
   * be used as the time window index value. So we only change ekey of time window accordingly.
   */
  if (w.ekey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) {
    w.ekey = pQuery->window.ekey;
  }
597

598 599 600
  return w;
}

H
Haojun Liao 已提交
601
static int32_t addNewWindowResultBuf(SResultRow *pWindowRes, SDiskbasedResultBuf *pResultBuf, int32_t tid,
602
                                     int32_t numOfRowsPerPage) {
603
  if (pWindowRes->pageId != -1) {
604 605
    return 0;
  }
606

607
  tFilePage *pData = NULL;
608

609 610
  // in the first scan, new space needed for results
  int32_t pageId = -1;
H
Haojun Liao 已提交
611
  SIDList list = getDataBufPagesIdList(pResultBuf, tid);
612

H
Haojun Liao 已提交
613
  if (taosArrayGetSize(list) == 0) {
H
Haojun Liao 已提交
614
    pData = getNewDataBuf(pResultBuf, tid, &pageId);
615
  } else {
H
Haojun Liao 已提交
616 617 618
    SPageInfo* pi = getLastPageInfo(list);
    pData = getResBufPage(pResultBuf, pi->pageId);
    pageId = pi->pageId;
619

620
    if (pData->num >= numOfRowsPerPage) {
H
Haojun Liao 已提交
621 622
      // release current page first, and prepare the next one
      releaseResBufPageInfo(pResultBuf, pi);
H
Haojun Liao 已提交
623
      pData = getNewDataBuf(pResultBuf, tid, &pageId);
624
      if (pData != NULL) {
625
        assert(pData->num == 0);  // number of elements must be 0 for new allocated buffer
626 627 628
      }
    }
  }
629

630 631 632
  if (pData == NULL) {
    return -1;
  }
633

634
  // set the number of rows in current disk page
635 636 637
  if (pWindowRes->pageId == -1) {  // not allocated yet, allocate new buffer
    pWindowRes->pageId = pageId;
    pWindowRes->rowId = (int32_t)(pData->num++);
H
Haojun Liao 已提交
638

639
    assert(pWindowRes->pageId >= 0);
640
  }
641

642 643 644
  return 0;
}

645 646
static int32_t setWindowOutputBufByKey(SQueryRuntimeEnv *pRuntimeEnv, SResultRowInfo *pResultRowInfo, STimeWindow *win,
    bool masterscan, SResultRow** pResult, int64_t groupId) {
647 648
  assert(win->skey <= win->ekey);
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
649

650
  SResultRow *pResultRow = doPrepareResultRowFromKey(pRuntimeEnv, pResultRowInfo, (char *)&win->skey, TSDB_KEYSIZE, masterscan, groupId);
H
Haojun Liao 已提交
651
  if (pResultRow == NULL) {
652 653
    *pResult = NULL;
    return TSDB_CODE_SUCCESS;
654
  }
655

656
  // not assign result buffer yet, add new result buffer
H
Haojun Liao 已提交
657
  if (pResultRow->pageId == -1) {
H
Haojun Liao 已提交
658
    int32_t ret = addNewWindowResultBuf(pResultRow, pResultBuf, (int32_t) groupId, pRuntimeEnv->numOfRowsPerPage);
H
Haojun Liao 已提交
659
    if (ret != TSDB_CODE_SUCCESS) {
660 661 662
      return -1;
    }
  }
663

664
  // set time window for current result
H
Haojun Liao 已提交
665
  pResultRow->win = (*win);
666
  *pResult = pResultRow;
H
Haojun Liao 已提交
667
  setResultRowOutputBufInitCtx(pRuntimeEnv, pResultRow);
668

669 670 671
  return TSDB_CODE_SUCCESS;
}

H
Haojun Liao 已提交
672
static bool getResultRowStatus(SResultRowInfo *pWindowResInfo, int32_t slot) {
673
  assert(slot >= 0 && slot < pWindowResInfo->size);
674
  return pWindowResInfo->pResult[slot]->closed;
675 676
}

677 678 679 680 681 682 683 684 685 686 687 688 689 690
typedef enum SResultTsInterpType {
  RESULT_ROW_START_INTERP = 1,
  RESULT_ROW_END_INTERP   = 2,
} SResultTsInterpType;

static void setResultRowInterpo(SResultRow* pResult, SResultTsInterpType type) {
  assert(pResult != NULL && (type == RESULT_ROW_START_INTERP || type == RESULT_ROW_END_INTERP));
  if (type == RESULT_ROW_START_INTERP) {
    pResult->startInterp = true;
  } else {
    pResult->endInterp   = true;
  }
}

H
Haojun Liao 已提交
691
static bool resultRowInterpolated(SResultRow* pResult, SResultTsInterpType type) {
692 693 694 695 696 697 698 699
  assert(pResult != NULL && (type == RESULT_ROW_START_INTERP || type == RESULT_ROW_END_INTERP));
  if (type == RESULT_ROW_START_INTERP) {
    return pResult->startInterp == true;
  } else {
    return pResult->endInterp   == true;
  }
}

H
Haojun Liao 已提交
700
static FORCE_INLINE int32_t getForwardStepsInBlock(int32_t numOfRows, __block_search_fn_t searchFn, TSKEY ekey, int16_t pos,
701 702
                                      int16_t order, int64_t *pData) {
  int32_t forwardStep = 0;
703

H
Haojun Liao 已提交
704 705 706 707
  if (order == TSDB_ORDER_ASC) {
    int32_t end = searchFn((char*) &pData[pos], numOfRows - pos, ekey, order);
    if (end >= 0) {
      forwardStep = end;
708

H
Haojun Liao 已提交
709 710 711 712 713 714 715 716 717 718 719 720
      if (pData[end + pos] == ekey) {
        forwardStep += 1;
      }
    }
  } else {
    int32_t end = searchFn((char *)pData, pos + 1, ekey, order);
    if (end >= 0) {
      forwardStep = pos - end;

      if (pData[end] == ekey) {
        forwardStep += 1;
      }
721 722
    }
  }
723

H
Haojun Liao 已提交
724
  assert(forwardStep > 0);
725 726 727
  return forwardStep;
}

728
static void doUpdateResultRowIndex(SResultRowInfo*pResultRowInfo, TSKEY lastKey, bool ascQuery, bool timeWindowInterpo) {
H
Haojun Liao 已提交
729
  int64_t skey = TSKEY_INITIAL_VAL;
730 731 732
  int32_t i = 0;
  for (i = pResultRowInfo->size - 1; i >= 0; --i) {
    SResultRow *pResult = pResultRowInfo->pResult[i];
H
Haojun Liao 已提交
733
    if (pResult->closed) {
734
      break;
H
Haojun Liao 已提交
735 736
    }

737
    // new closed result rows
738 739 740 741 742 743 744 745 746 747
    if (timeWindowInterpo) {
      if (pResult->endInterp && ((pResult->win.skey <= lastKey && ascQuery) || (pResult->win.skey >= lastKey && !ascQuery))) {
        if (i > 0) { // the first time window, the startInterp is false.
          assert(pResult->startInterp);
        }

        closeResultRow(pResultRowInfo, i);
      } else {
        skey = pResult->win.skey;
      }
H
Haojun Liao 已提交
748
    } else {
749 750 751 752 753
      if ((pResult->win.ekey <= lastKey && ascQuery) || (pResult->win.skey >= lastKey && !ascQuery)) {
        closeResultRow(pResultRowInfo, i);
      } else {
        skey = pResult->win.skey;
      }
H
Haojun Liao 已提交
754 755 756
    }
  }

757
  // all result rows are closed, set the last one to be the skey
H
Haojun Liao 已提交
758
  if (skey == TSKEY_INITIAL_VAL) {
759
    pResultRowInfo->curIndex = pResultRowInfo->size - 1;
H
Haojun Liao 已提交
760 761
  } else {

762 763 764 765 766
    for (i = pResultRowInfo->size - 1; i >= 0; --i) {
      SResultRow *pResult = pResultRowInfo->pResult[i];
      if (pResult->closed) {
        break;
      }
767
    }
768

D
fix bug  
dapan1121 已提交
769
    if (i == pResultRowInfo->size - 1) {
770
      pResultRowInfo->curIndex = i;
D
fix bug  
dapan1121 已提交
771 772 773 774
    } else {
      pResultRowInfo->curIndex = i + 1;  // current not closed result object
    }

775
    pResultRowInfo->prevSKey = pResultRowInfo->pResult[pResultRowInfo->curIndex]->win.skey;
776
  }
777
}
778

779
static void updateResultRowIndex(SResultRowInfo* pResultRowInfo, STableQueryInfo* pTableQueryInfo, bool ascQuery, bool timeWindowInterpo) {
780
  if ((pTableQueryInfo->lastKey > pTableQueryInfo->win.ekey && ascQuery) || (pTableQueryInfo->lastKey < pTableQueryInfo->win.ekey && (!ascQuery))) {
781 782 783
    closeAllResultRows(pResultRowInfo);
    pResultRowInfo->curIndex = pResultRowInfo->size - 1;
  } else {
784
    int32_t step = ascQuery? 1:-1;
785
    doUpdateResultRowIndex(pResultRowInfo, pTableQueryInfo->lastKey - step, ascQuery, timeWindowInterpo);
786
  }
787 788 789
}

static int32_t getNumOfRowsInTimeWindow(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo, TSKEY *pPrimaryColumn,
H
hjxilinx 已提交
790
                                        int32_t startPos, TSKEY ekey, __block_search_fn_t searchFn, bool updateLastKey) {
791
  assert(startPos >= 0 && startPos < pDataBlockInfo->rows);
792

H
Haojun Liao 已提交
793
  int32_t num   = -1;
794
  int32_t order = pQuery->order.order;
H
Haojun Liao 已提交
795
  int32_t step  = GET_FORWARD_DIRECTION_FACTOR(order);
796

H
hjxilinx 已提交
797
  STableQueryInfo* item = pQuery->current;
798

799 800
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (ekey < pDataBlockInfo->window.ekey) {
801
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
H
Haojun Liao 已提交
802 803
      if (updateLastKey) { // update the last key
        item->lastKey = pPrimaryColumn[startPos + (num - 1)] + step;
804 805
      }
    } else {
806
      num = pDataBlockInfo->rows - startPos;
807
      if (updateLastKey) {
H
hjxilinx 已提交
808
        item->lastKey = pDataBlockInfo->window.ekey + step;
809 810 811 812
      }
    }
  } else {  // desc
    if (ekey > pDataBlockInfo->window.skey) {
813
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
H
Haojun Liao 已提交
814 815
      if (updateLastKey) {  // update the last key
        item->lastKey = pPrimaryColumn[startPos - (num - 1)] + step;
816 817 818 819
      }
    } else {
      num = startPos + 1;
      if (updateLastKey) {
H
hjxilinx 已提交
820
        item->lastKey = pDataBlockInfo->window.skey + step;
821 822 823
      }
    }
  }
824

H
Haojun Liao 已提交
825
  assert(num > 0);
826 827 828
  return num;
}

829 830
static void doBlockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, STimeWindow *pWin, int32_t offset, int32_t forwardStep, TSKEY *tsCol, int32_t numOfTotal) {
  SQuery         *pQuery = pRuntimeEnv->pQuery;
831
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
832

H
Haojun Liao 已提交
833 834
  bool hasPrev = pCtx[0].preAggVals.isSet;

835 836 837 838
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
    pCtx[k].nStartQueryTimestamp = pWin->skey;
    pCtx[k].size = forwardStep;
    pCtx[k].startOffset = (QUERY_IS_ASC_QUERY(pQuery)) ? offset : offset - (forwardStep - 1);
839

840
    int32_t functionId = pQuery->pExpr1[k].base.functionId;
H
Haojun Liao 已提交
841

842 843 844 845 846
    // not a whole block involved in query processing, statistics data can not be used
    // NOTE: the original value of isSet have been changed here
    if (pCtx[k].preAggVals.isSet && forwardStep < numOfTotal) {
      pCtx[k].preAggVals.isSet = false;
    }
H
Haojun Liao 已提交
847

848 849
    if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
      aAggs[functionId].xFunction(&pCtx[k]);
850
    }
851 852 853

    // restore it
    pCtx[k].preAggVals.isSet = hasPrev;
854 855 856
  }
}

857 858
static void doRowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, STimeWindow *pWin, int32_t offset) {
  SQuery         *pQuery = pRuntimeEnv->pQuery;
859
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
860

861 862
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
    pCtx[k].nStartQueryTimestamp = pWin->skey;
863

864 865 866
    int32_t functionId = pQuery->pExpr1[k].base.functionId;
    if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
      aAggs[functionId].xFunctionF(&pCtx[k], offset);
867 868 869 870
    }
  }
}

H
Haojun Liao 已提交
871 872
static int32_t getNextQualifiedWindow(SQueryRuntimeEnv *pRuntimeEnv, STimeWindow *pNext, SDataBlockInfo *pDataBlockInfo,
    TSKEY *primaryKeys, __block_search_fn_t searchFn, int32_t prevPosition) {
873
  SQuery *pQuery = pRuntimeEnv->pQuery;
874

H
Haojun Liao 已提交
875
  getNextTimeWindow(pQuery, pNext);
876

H
Haojun Liao 已提交
877
  // next time window is not in current block
H
Haojun Liao 已提交
878 879
  if ((pNext->skey > pDataBlockInfo->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (pNext->ekey < pDataBlockInfo->window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
H
Haojun Liao 已提交
880 881
    return -1;
  }
882

H
Haojun Liao 已提交
883 884
  TSKEY startKey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
885
    startKey = pNext->skey;
H
Haojun Liao 已提交
886 887
    if (startKey < pQuery->window.skey) {
      startKey = pQuery->window.skey;
888
    }
H
Haojun Liao 已提交
889
  } else {
H
Haojun Liao 已提交
890
    startKey = pNext->ekey;
H
Haojun Liao 已提交
891 892
    if (startKey > pQuery->window.skey) {
      startKey = pQuery->window.skey;
893
    }
H
Haojun Liao 已提交
894
  }
895

H
Haojun Liao 已提交
896
  int32_t startPos = 0;
H
Haojun Liao 已提交
897

H
Haojun Liao 已提交
898
  // tumbling time window query, a special case of sliding time window query
899
  if (pQuery->interval.sliding == pQuery->interval.interval && prevPosition != -1) {
H
Haojun Liao 已提交
900 901 902
    int32_t factor = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
    startPos = prevPosition + factor;
  } else {
H
Haojun Liao 已提交
903
    if (startKey <= pDataBlockInfo->window.skey && QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
904
      startPos = 0;
H
Haojun Liao 已提交
905
    } else if (startKey >= pDataBlockInfo->window.ekey && !QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
906 907 908 909
      startPos = pDataBlockInfo->rows - 1;
    } else {
      startPos = searchFn((char *)primaryKeys, pDataBlockInfo->rows, startKey, pQuery->order.order);
    }
H
Haojun Liao 已提交
910
  }
911

H
Haojun Liao 已提交
912 913 914 915
  /*
   * This time window does not cover any data, try next time window,
   * this case may happen when the time window is too small
   */
H
Haojun Liao 已提交
916 917 918
  if (primaryKeys == NULL) {
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      assert(pDataBlockInfo->window.skey <= pNext->ekey);
919
    } else {
H
Haojun Liao 已提交
920
      assert(pDataBlockInfo->window.ekey >= pNext->skey);
921
    }
H
Haojun Liao 已提交
922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940
  } else {
    if (QUERY_IS_ASC_QUERY(pQuery) && primaryKeys[startPos] > pNext->ekey) {
      TSKEY next = primaryKeys[startPos];
      if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
        pNext->skey = taosTimeTruncate(next, &pQuery->interval, pQuery->precision);
        pNext->ekey = taosTimeAdd(pNext->skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
      } else {
        pNext->ekey += ((next - pNext->ekey + pQuery->interval.sliding - 1)/pQuery->interval.sliding) * pQuery->interval.sliding;
        pNext->skey = pNext->ekey - pQuery->interval.interval + 1;
      }
    } else if ((!QUERY_IS_ASC_QUERY(pQuery)) && primaryKeys[startPos] < pNext->skey) {
      TSKEY next = primaryKeys[startPos];
      if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
        pNext->skey = taosTimeTruncate(next, &pQuery->interval, pQuery->precision);
        pNext->ekey = taosTimeAdd(pNext->skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
      } else {
        pNext->skey -= ((pNext->skey - next + pQuery->interval.sliding - 1) / pQuery->interval.sliding) * pQuery->interval.sliding;
        pNext->ekey = pNext->skey + pQuery->interval.interval - 1;
      }
941
    }
942
  }
943

H
Haojun Liao 已提交
944
  return startPos;
945 946
}

H
Haojun Liao 已提交
947
static FORCE_INLINE TSKEY reviseWindowEkey(SQuery *pQuery, STimeWindow *pWindow) {
948 949 950 951 952 953 954 955 956 957 958 959
  TSKEY ekey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    ekey = pWindow->ekey;
    if (ekey > pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  } else {
    ekey = pWindow->skey;
    if (ekey < pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  }
960

961 962 963
  return ekey;
}

H
hjxilinx 已提交
964 965
//todo binary search
static void* getDataBlockImpl(SArray* pDataBlock, int32_t colId) {
S
TD-1057  
Shengliang Guan 已提交
966
  int32_t numOfCols = (int32_t)taosArrayGetSize(pDataBlock);
967

H
hjxilinx 已提交
968 969 970 971 972 973
  for (int32_t i = 0; i < numOfCols; ++i) {
    SColumnInfoData *p = taosArrayGet(pDataBlock, i);
    if (colId == p->info.colId) {
      return p->pData;
    }
  }
974

H
hjxilinx 已提交
975 976 977
  return NULL;
}

978
static char *getDataBlock(SQueryRuntimeEnv *pRuntimeEnv, SArithmeticSupport *sas, int32_t col, int32_t size, SArray *pDataBlock) {
dengyihao's avatar
dengyihao 已提交
979 980 981
  if (pDataBlock == NULL) {
    return NULL;
  }
982

H
Haojun Liao 已提交
983
  char *dataBlock = NULL;
H
Haojun Liao 已提交
984
  SQuery *pQuery = pRuntimeEnv->pQuery;
985

H
Haojun Liao 已提交
986
  int32_t functionId = pQuery->pExpr1[col].base.functionId;
987
  if (functionId == TSDB_FUNC_ARITHM) {
H
Haojun Liao 已提交
988
    sas->pArithExpr = &pQuery->pExpr1[col];
989 990 991
    sas->offset     = (QUERY_IS_ASC_QUERY(pQuery))? pQuery->pos : pQuery->pos - (size - 1);
    sas->colList    = pQuery->colList;
    sas->numOfCols  = pQuery->numOfCols;
H
Haojun Liao 已提交
992

993
    // here the pQuery->colList and sas->colList are identical
S
TD-1057  
Shengliang Guan 已提交
994
    int32_t numOfCols = (int32_t)taosArrayGetSize(pDataBlock);
995
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
996
      SColumnInfo *pColMsg = &pQuery->colList[i];
997

998 999 1000 1001 1002 1003 1004 1005
      dataBlock = NULL;
      for (int32_t k = 0; k < numOfCols; ++k) {  //todo refactor
        SColumnInfoData *p = taosArrayGet(pDataBlock, k);
        if (pColMsg->colId == p->info.colId) {
          dataBlock = p->pData;
          break;
        }
      }
1006

1007
      assert(dataBlock != NULL);
1008
      sas->data[i] = dataBlock;  // start from the offset
1009
    }
1010

1011
  } else {  // other type of query function
H
Haojun Liao 已提交
1012
    SColIndex *pCol = &pQuery->pExpr1[col].base.colInfo;
H
Haojun Liao 已提交
1013
    if (TSDB_COL_IS_NORMAL_COL(pCol->flag)) {
H
Haojun Liao 已提交
1014
      SColIndex* pColIndex = &pQuery->pExpr1[col].base.colInfo;
H
Haojun Liao 已提交
1015 1016 1017 1018
      SColumnInfoData *p = taosArrayGet(pDataBlock, pColIndex->colIndex);
      assert(p->info.colId == pColIndex->colId);

      dataBlock = p->pData;
H
Haojun Liao 已提交
1019 1020
    } else {
      dataBlock = NULL;
1021 1022
    }
  }
1023

1024 1025 1026
  return dataBlock;
}

H
Haojun Liao 已提交
1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038
static void setNotInterpoWindowKey(SQLFunctionCtx* pCtx, int32_t numOfOutput, int32_t type) {
  if (type == RESULT_ROW_START_INTERP) {
    for (int32_t k = 0; k < numOfOutput; ++k) {
      pCtx[k].start.key = INT64_MIN;
    }
  } else {
    for (int32_t k = 0; k < numOfOutput; ++k) {
      pCtx[k].end.key = INT64_MIN;
    }
  }
}

1039
// window start key interpolation
H
Haojun Liao 已提交
1040
static bool setTimeWindowInterpolationStartTs(SQueryRuntimeEnv* pRuntimeEnv, int32_t pos, int32_t numOfRows, SArray* pDataBlock, TSKEY* tsCols, STimeWindow* win) {
1041 1042
  SQuery* pQuery = pRuntimeEnv->pQuery;

H
Haojun Liao 已提交
1043
  TSKEY curTs  = tsCols[pos];
1044 1045
  TSKEY lastTs = *(TSKEY *) pRuntimeEnv->prevRow[0];

H
Haojun Liao 已提交
1046 1047 1048 1049
  // lastTs == INT64_MIN and pos == 0 means this is the first time window, interpolation is not needed.
  // start exactly from this point, no need to do interpolation
  TSKEY key = QUERY_IS_ASC_QUERY(pQuery)? win->skey:win->ekey;
  if (key == curTs) {
H
Haojun Liao 已提交
1050
    setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_START_INTERP);
1051
    return true;
H
Haojun Liao 已提交
1052
  }
1053

H
Haojun Liao 已提交
1054
  if (lastTs == INT64_MIN && ((pos == 0 && QUERY_IS_ASC_QUERY(pQuery)) || (pos == (numOfRows - 1) && !QUERY_IS_ASC_QUERY(pQuery)))) {
H
Haojun Liao 已提交
1055
    setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_START_INTERP);
H
Haojun Liao 已提交
1056
    return true;
1057 1058
  }

H
Haojun Liao 已提交
1059 1060 1061
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
  TSKEY   prevTs = ((pos == 0 && QUERY_IS_ASC_QUERY(pQuery)) || (pos == (numOfRows - 1) && !QUERY_IS_ASC_QUERY(pQuery)))?
      lastTs:tsCols[pos - step];
1062

H
Haojun Liao 已提交
1063 1064 1065
  doRowwiseTimeWindowInterpolation(pRuntimeEnv, pDataBlock, prevTs, pos - step, curTs, pos, key, RESULT_ROW_START_INTERP);
  return true;
}
1066

H
Haojun Liao 已提交
1067 1068 1069
static bool setTimeWindowInterpolationEndTs(SQueryRuntimeEnv* pRuntimeEnv, int32_t endRowIndex, SArray* pDataBlock, TSKEY* tsCols, TSKEY blockEkey, STimeWindow* win) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
  TSKEY   actualEndKey = tsCols[endRowIndex];
1070

H
Haojun Liao 已提交
1071
  TSKEY key = QUERY_IS_ASC_QUERY(pQuery)? win->ekey:win->skey;
1072

H
Haojun Liao 已提交
1073 1074
  // not ended in current data block, do not invoke interpolation
  if ((key > blockEkey && QUERY_IS_ASC_QUERY(pQuery)) || (key < blockEkey && !QUERY_IS_ASC_QUERY(pQuery))) {
H
Haojun Liao 已提交
1075
    setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_END_INTERP);
H
Haojun Liao 已提交
1076 1077
    return false;
  }
1078

H
Haojun Liao 已提交
1079 1080
  // there is actual end point of current time window, no interpolation need
  if (key == actualEndKey) {
H
Haojun Liao 已提交
1081
    setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_END_INTERP);
1082 1083
    return true;
  }
H
Haojun Liao 已提交
1084 1085 1086 1087 1088 1089 1090 1091

  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
  int32_t nextRowIndex = endRowIndex + step;
  assert(nextRowIndex >= 0);

  TSKEY nextKey = tsCols[nextRowIndex];
  doRowwiseTimeWindowInterpolation(pRuntimeEnv, pDataBlock, actualEndKey, endRowIndex, nextKey, nextRowIndex, key, RESULT_ROW_END_INTERP);
  return true;
1092 1093
}

1094 1095
static void saveDataBlockLastRow(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pDataBlockInfo, SArray* pDataBlock,
    int32_t rowIndex) {
1096 1097 1098 1099 1100 1101 1102
  if (pDataBlock == NULL) {
    return;
  }

  SQuery* pQuery = pRuntimeEnv->pQuery;
  for (int32_t k = 0; k < pQuery->numOfCols; ++k) {
    SColumnInfoData *pColInfo = taosArrayGet(pDataBlock, k);
H
Haojun Liao 已提交
1103
    memcpy(pRuntimeEnv->prevRow[k], ((char*)pColInfo->pData) + (pColInfo->info.bytes * rowIndex), pColInfo->info.bytes);
1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119
  }
}

static TSKEY getStartTsKey(SQuery* pQuery, SDataBlockInfo* pDataBlockInfo, TSKEY* tsCols, int32_t step) {
  TSKEY ts = TSKEY_INITIAL_VAL;

  if (tsCols == NULL) {
    ts = QUERY_IS_ASC_QUERY(pQuery) ? pDataBlockInfo->window.skey : pDataBlockInfo->window.ekey;
  } else {
    int32_t offset = GET_COL_DATA_POS(pQuery, 0, step);
    ts = tsCols[offset];
  }

  return ts;
}

H
Haojun Liao 已提交
1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158
static void doWindowBorderInterpolation(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pDataBlockInfo, SArray *pDataBlock,
    SResultRow* pResult, STimeWindow* win, int32_t startPos, int32_t forwardStep) {
  if (!pRuntimeEnv->timeWindowInterpo) {
    return;
  }

  assert(pDataBlock != NULL);

  SQuery* pQuery = pRuntimeEnv->pQuery;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

  SColumnInfoData *pColInfo = taosArrayGet(pDataBlock, 0);

  TSKEY  *tsCols = (TSKEY *)(pColInfo->pData);
  bool done = resultRowInterpolated(pResult, RESULT_ROW_START_INTERP);
  if (!done) {
    int32_t startRowIndex = startPos;
    bool    interp = setTimeWindowInterpolationStartTs(pRuntimeEnv, startRowIndex, pDataBlockInfo->rows, pDataBlock, tsCols, win);
    if (interp) {
      setResultRowInterpo(pResult, RESULT_ROW_START_INTERP);
    }
  } else {
    setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_START_INTERP);
  }

  done = resultRowInterpolated(pResult, RESULT_ROW_END_INTERP);
  if (!done) {
    int32_t endRowIndex = startPos + (forwardStep - 1) * step;

    TSKEY endKey = QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->window.ekey:pDataBlockInfo->window.skey;
    bool  interp = setTimeWindowInterpolationEndTs(pRuntimeEnv, endRowIndex, pDataBlock, tsCols, endKey, win);
    if (interp) {
      setResultRowInterpo(pResult, RESULT_ROW_END_INTERP);
    }
  } else {
    setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_END_INTERP);
  }
}

1159
/**
H
Haojun Liao 已提交
1160
 * todo set the last value for pQueryTableInfo as in rowwiseapplyfunctions
1161 1162
 * @param pRuntimeEnv
 * @param forwardStep
1163
 * @param tsCols
1164 1165 1166 1167 1168
 * @param pFields
 * @param isDiskFileBlock
 * @return                  the incremental number of output value, so it maybe 0 for fixed number of query,
 *                          such as count/min/max etc.
 */
H
Haojun Liao 已提交
1169
static void blockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis, SDataBlockInfo *pDataBlockInfo,
H
Haojun Liao 已提交
1170
                                    SResultRowInfo *pWindowResInfo, __block_search_fn_t searchFn, SArray *pDataBlock) {
1171
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1172
  bool            masterScan = IS_MASTER_SCAN(pRuntimeEnv);
1173

1174 1175 1176
  SQuery *pQuery  = pRuntimeEnv->pQuery;
  int64_t groupId = pQuery->current->groupIndex;

1177
  TSKEY  *tsCols = NULL;
1178
  if (pDataBlock != NULL) {
1179
    SColumnInfoData *pColInfo = taosArrayGet(pDataBlock, 0);
1180
    tsCols = (TSKEY *)(pColInfo->pData);
1181
  }
1182

1183
  SQInfo *pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
1184
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
1185 1186
    char *dataBlock = getDataBlock(pRuntimeEnv, &pRuntimeEnv->sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &pRuntimeEnv->sasArray[k], k, pQInfo->vgId);
1187
  }
1188

1189
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
1190
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
H
Haojun Liao 已提交
1191 1192
    int32_t prevIndex = curTimeWindowIndex(pWindowResInfo);

1193
    TSKEY ts = getStartTsKey(pQuery, pDataBlockInfo, tsCols, step);
H
Haojun Liao 已提交
1194
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
1195

1196
    SResultRow* pResult = NULL;
1197 1198 1199
    int32_t ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, &win, masterScan, &pResult, groupId);
    if (ret != TSDB_CODE_SUCCESS || pResult == NULL) {
      goto _end;
1200
    }
1201

H
Haojun Liao 已提交
1202 1203 1204
    int32_t forwardStep = 0;
    int32_t startPos = pQuery->pos;

1205 1206
    TSKEY ekey = reviseWindowEkey(pQuery, &win);
    forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, pQuery->pos, ekey, searchFn, true);
H
Haojun Liao 已提交
1207

1208 1209 1210
    // prev time window not interpolation yet.
    int32_t curIndex = curTimeWindowIndex(pWindowResInfo);
    if (prevIndex != -1 && prevIndex < curIndex && pRuntimeEnv->timeWindowInterpo) {
1211
      for(int32_t j = prevIndex; j < curIndex; ++j) { // previous time window may be all closed already.
1212
        SResultRow *pRes = pWindowResInfo->pResult[j];
1213 1214 1215 1216
        if (pRes->closed) {
          assert(resultRowInterpolated(pRes, RESULT_ROW_START_INTERP) && resultRowInterpolated(pRes, RESULT_ROW_END_INTERP));
          continue;
        }
H
Haojun Liao 已提交
1217

1218 1219 1220
        STimeWindow w = pRes->win;
        ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, &w, masterScan, &pResult, groupId);
        assert(ret == TSDB_CODE_SUCCESS && !resultRowInterpolated(pResult, RESULT_ROW_END_INTERP));
H
Haojun Liao 已提交
1221

1222 1223 1224 1225
        int32_t p = QUERY_IS_ASC_QUERY(pQuery)? 0:pDataBlockInfo->rows-1;
        doRowwiseTimeWindowInterpolation(pRuntimeEnv, pDataBlock, *(TSKEY*) pRuntimeEnv->prevRow[0], -1,  tsCols[0], p, w.ekey, RESULT_ROW_END_INTERP);
        setResultRowInterpo(pResult, RESULT_ROW_END_INTERP);
        setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_START_INTERP);
H
Haojun Liao 已提交
1226

1227
        doBlockwiseApplyFunctions(pRuntimeEnv, &w, startPos, 0, tsCols, pDataBlockInfo->rows);
H
Haojun Liao 已提交
1228 1229
      }

1230 1231 1232
      // restore current time window
      ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, &win, masterScan, &pResult, groupId);
      assert (ret == TSDB_CODE_SUCCESS);
1233
    }
1234

1235 1236 1237
    // window start key interpolation
    doWindowBorderInterpolation(pRuntimeEnv, pDataBlockInfo, pDataBlock, pResult, &win, pQuery->pos, forwardStep);
    doBlockwiseApplyFunctions(pRuntimeEnv, &win, startPos, forwardStep, tsCols, pDataBlockInfo->rows);
1238

1239
    STimeWindow nextWin = win;
1240
    while (1) {
H
Haojun Liao 已提交
1241 1242
      int32_t prevEndPos = (forwardStep - 1) * step + startPos;
      startPos = getNextQualifiedWindow(pRuntimeEnv, &nextWin, pDataBlockInfo, tsCols, searchFn, prevEndPos);
1243 1244 1245
      if (startPos < 0) {
        break;
      }
1246

1247
      // null data, failed to allocate more memory buffer
1248 1249
      int32_t code = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, &nextWin, masterScan, &pResult, groupId);
      if (code != TSDB_CODE_SUCCESS || pResult == NULL) {
1250 1251
        break;
      }
1252

1253
      ekey = reviseWindowEkey(pQuery, &nextWin);
H
Haojun Liao 已提交
1254
      forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, startPos, ekey, searchFn, true);
1255

1256
      // window start(end) key interpolation
H
Haojun Liao 已提交
1257
      doWindowBorderInterpolation(pRuntimeEnv, pDataBlockInfo, pDataBlock, pResult, &nextWin, startPos, forwardStep);
1258
      doBlockwiseApplyFunctions(pRuntimeEnv, &nextWin, startPos, forwardStep, tsCols, pDataBlockInfo->rows);
1259
    }
1260

1261 1262 1263 1264 1265 1266
  } else {
    /*
     * the sqlfunctionCtx parameters should be set done before all functions are invoked,
     * since the selectivity + tag_prj query needs all parameters been set done.
     * tag_prj function are changed to be TSDB_FUNC_TAG_DUMMY
     */
1267
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
Haojun Liao 已提交
1268
      int32_t functionId = pQuery->pExpr1[k].base.functionId;
1269
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
1270
        pCtx[k].nStartQueryTimestamp = pDataBlockInfo->window.skey;
1271 1272 1273 1274
        aAggs[functionId].xFunction(&pCtx[k]);
      }
    }
  }
1275

1276
  _end:
1277
  if (pRuntimeEnv->timeWindowInterpo) {
1278 1279
    int32_t rowIndex = QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->rows-1:0;
    saveDataBlockLastRow(pRuntimeEnv, pDataBlockInfo, pDataBlock, rowIndex);
1280
  }
1281 1282
}

1283
static int32_t setGroupResultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, char *pData, int16_t type, int16_t bytes, int32_t groupIndex) {
1284
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
1285

H
Haojun Liao 已提交
1286
  // not assign result buffer yet, add new result buffer, TODO remove it
1287 1288 1289 1290 1291 1292 1293
  char* d = pData;
  int16_t len = bytes;
  if (type == TSDB_DATA_TYPE_BINARY||type == TSDB_DATA_TYPE_NCHAR) {
    d = varDataVal(pData);
    len = varDataLen(pData);
  } else if (type == TSDB_DATA_TYPE_FLOAT || type == TSDB_DATA_TYPE_DOUBLE) {
    SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
H
Haojun Liao 已提交
1294
    qError("QInfo:%p group by not supported on double/float columns, abort", pQInfo);
1295
    return -1;
1296 1297
  }

1298 1299
  SResultRow *pResultRow = doPrepareResultRowFromKey(pRuntimeEnv, &pRuntimeEnv->windowResInfo, d, len, true, groupIndex);
  assert (pResultRow != NULL);
1300 1301

  int64_t v = -1;
H
Haojun Liao 已提交
1302
  GET_TYPED_DATA(v, int64_t, type, pData);
1303
  if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
1304 1305 1306 1307 1308 1309
    if (pResultRow->key == NULL) {
      pResultRow->key = malloc(varDataTLen(pData));
      varDataCopy(pResultRow->key, pData);
    } else {
      assert(memcmp(pResultRow->key, pData, varDataTLen(pData)) == 0);
    }
1310
  } else {
H
Haojun Liao 已提交
1311 1312
    pResultRow->win.skey = v;
    pResultRow->win.ekey = v;
1313
  }
1314

H
Haojun Liao 已提交
1315
  if (pResultRow->pageId == -1) {
1316
    int32_t ret = addNewWindowResultBuf(pResultRow, pResultBuf, groupIndex, pRuntimeEnv->numOfRowsPerPage);
1317 1318 1319 1320
    if (ret != 0) {
      return -1;
    }
  }
1321

H
Haojun Liao 已提交
1322
  setResultOutputBuf(pRuntimeEnv, pResultRow);
1323 1324 1325 1326
  initCtxOutputBuf(pRuntimeEnv);
  return TSDB_CODE_SUCCESS;
}

1327
static char *getGroupbyColumnData(SQuery *pQuery, int16_t *type, int16_t *bytes, SArray* pDataBlock) {
1328
  SSqlGroupbyExpr *pGroupbyExpr = pQuery->pGroupbyExpr;
1329

1330
  for (int32_t k = 0; k < pGroupbyExpr->numOfGroupCols; ++k) {
1331
    SColIndex* pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, k);
Y
TD-1230  
yihaoDeng 已提交
1332
    if (TSDB_COL_IS_TAG(pColIndex->flag)) {
1333 1334
      continue;
    }
1335

1336
    int16_t colIndex = -1;
1337
    int32_t colId = pColIndex->colId;
1338

1339
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
1340
      if (pQuery->colList[i].colId == colId) {
1341 1342 1343 1344
        colIndex = i;
        break;
      }
    }
1345

1346
    assert(colIndex >= 0 && colIndex < pQuery->numOfCols);
1347

1348 1349
    *type = pQuery->colList[colIndex].type;
    *bytes = pQuery->colList[colIndex].bytes;
1350
    /*
1351 1352 1353
     *  the colIndex is acquired from the first tables of all qualified tables in this vnode during query prepare
     * stage, the remain tables may not have the required column in cache actually. So, the validation of required
     * column in cache with the corresponding schema is reinforced.
1354
     */
S
TD-1057  
Shengliang Guan 已提交
1355
    int32_t numOfCols = (int32_t)taosArrayGetSize(pDataBlock);
1356

1357 1358 1359 1360 1361 1362
    for (int32_t i = 0; i < numOfCols; ++i) {
      SColumnInfoData *p = taosArrayGet(pDataBlock, i);
      if (pColIndex->colId == p->info.colId) {
        return p->pData;
      }
    }
1363
  }
1364

1365
  return NULL;
1366 1367 1368 1369
}

static int32_t doTSJoinFilter(SQueryRuntimeEnv *pRuntimeEnv, int32_t offset) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
1370

H
Haojun Liao 已提交
1371
  STSElem         elem = tsBufGetElem(pRuntimeEnv->pTsBuf);
1372
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1373

1374
  // compare tag first
H
Haojun Liao 已提交
1375
  if (tVariantCompare(&pCtx[0].tag, elem.tag) != 0) {
1376 1377
    return TS_JOIN_TAG_NOT_EQUALS;
  }
1378

S
TD-1057  
Shengliang Guan 已提交
1379
  TSKEY key = *(TSKEY *)((char*)pCtx[0].aInputElemBuf + TSDB_KEYSIZE * offset);
1380 1381

#if defined(_DEBUG_VIEW)
1382
  printf("elem in comp ts file:%" PRId64 ", key:%" PRId64 ", tag:%"PRIu64", query order:%d, ts order:%d, traverse:%d, index:%d\n",
1383
         elem.ts, key, elem.tag.i64, pQuery->order.order, pRuntimeEnv->pTsBuf->tsOrder,
H
Haojun Liao 已提交
1384
         pRuntimeEnv->pTsBuf->cur.order, pRuntimeEnv->pTsBuf->cur.tsIndex);
1385
#endif
1386

1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (key < elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key > elem.ts) {
      assert(false);
    }
  } else {
    if (key > elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key < elem.ts) {
      assert(false);
    }
  }
1400

1401 1402 1403 1404
  return TS_JOIN_TS_EQUAL;
}

static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId) {
H
Haojun Liao 已提交
1405
  SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx);
H
hjxilinx 已提交
1406
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
1407 1408 1409 1410 1411

  // in case of timestamp column, always generated results.
  if (functionId == TSDB_FUNC_TS) {
    return true;
  }
1412

1413 1414 1415
  if (pResInfo->complete || functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TS_DUMMY) {
    return false;
  }
1416

1417
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_FIRST) {
H
hjxilinx 已提交
1418 1419
    return QUERY_IS_ASC_QUERY(pQuery);
  }
1420

H
Haojun Liao 已提交
1421
  // denote the order type
1422
  if ((functionId == TSDB_FUNC_LAST_DST || functionId == TSDB_FUNC_LAST)) {
1423
    return pCtx->param[0].i64 == pQuery->order.order;
1424 1425
  }

1426
  // in the supplementary scan, only the following functions need to be executed
H
Haojun Liao 已提交
1427
  if (IS_REVERSE_SCAN(pRuntimeEnv)) {
1428 1429
    return false;
  }
1430

1431 1432 1433
  return true;
}

H
Haojun Liao 已提交
1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450
void doRowwiseTimeWindowInterpolation(SQueryRuntimeEnv* pRuntimeEnv, SArray* pDataBlock, TSKEY prevTs, int32_t prevRowIndex, TSKEY curTs, int32_t curRowIndex, TSKEY windowKey,  int32_t type) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
    int32_t functionId = pQuery->pExpr1[k].base.functionId;
    if (functionId != TSDB_FUNC_TWA) {
      pRuntimeEnv->pCtx[k].start.key = INT64_MIN;
      continue;
    }

    SColIndex* pColIndex = &pQuery->pExpr1[k].base.colInfo;
    int16_t index = pColIndex->colIndex;
    SColumnInfoData* pColInfo = taosArrayGet(pDataBlock, index);

    assert(pColInfo->info.colId == pColIndex->colId && curTs != windowKey);
    double v1 = 0, v2 = 0, v = 0;

    if (prevRowIndex == -1) {
1451
      GET_TYPED_DATA(v1, double, pColInfo->info.type, (char *)pRuntimeEnv->prevRow[index]);
H
Haojun Liao 已提交
1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472
    } else {
      GET_TYPED_DATA(v1, double, pColInfo->info.type, (char *)pColInfo->pData + prevRowIndex * pColInfo->info.bytes);
    }

    GET_TYPED_DATA(v2, double, pColInfo->info.type, (char *)pColInfo->pData + curRowIndex * pColInfo->info.bytes);

    SPoint point1 = (SPoint){.key = prevTs, .val = &v1};
    SPoint point2 = (SPoint){.key = curTs, .val = &v2};
    SPoint point  = (SPoint){.key = windowKey, .val = &v};
    taosGetLinearInterpolationVal(TSDB_DATA_TYPE_DOUBLE, &point1, &point2, &point);

    if (type == RESULT_ROW_START_INTERP) {
      pRuntimeEnv->pCtx[k].start.key = point.key;
      pRuntimeEnv->pCtx[k].start.val = v;
    } else {
      pRuntimeEnv->pCtx[k].end.key = point.key;
      pRuntimeEnv->pCtx[k].end.val = v;
    }
  }
}

H
Haojun Liao 已提交
1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509
static void setTimeWindowSKeyInterp(SQueryRuntimeEnv* pRuntimeEnv, SArray* pDataBlock, TSKEY prevTs, int32_t prevRowIndex, TSKEY ts, int32_t offset, SResultRow* pResult, STimeWindow* win) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  bool done = resultRowInterpolated(pResult, RESULT_ROW_START_INTERP);
  if (!done) {
    TSKEY key = QUERY_IS_ASC_QUERY(pQuery)? win->skey:win->ekey;
    if (key == ts) {
      setResultRowInterpo(pResult, RESULT_ROW_START_INTERP);
    } else if (prevTs != INT64_MIN && ((QUERY_IS_ASC_QUERY(pQuery) && prevTs < key) || (!QUERY_IS_ASC_QUERY(pQuery) && prevTs > key))) {
      doRowwiseTimeWindowInterpolation(pRuntimeEnv, pDataBlock, prevTs, prevRowIndex, ts, offset, key, RESULT_ROW_START_INTERP);
      setResultRowInterpo(pResult, RESULT_ROW_START_INTERP);
    } else {
      setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_START_INTERP);
    }

    setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_END_INTERP);
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
      pRuntimeEnv->pCtx[k].size = 1;
    }
  } else {
    setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_START_INTERP);
  }
}

static void setTimeWindowEKeyInterp(SQueryRuntimeEnv* pRuntimeEnv, SArray* pDataBlock, TSKEY prevTs, int32_t prevRowIndex, TSKEY ts, int32_t offset, SResultRow* pResult, STimeWindow* win) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  TSKEY key = QUERY_IS_ASC_QUERY(pQuery)? win->ekey:win->skey;
  doRowwiseTimeWindowInterpolation(pRuntimeEnv, pDataBlock, prevTs, prevRowIndex, ts, offset, key, RESULT_ROW_END_INTERP);
  setResultRowInterpo(pResult, RESULT_ROW_END_INTERP);

  setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_START_INTERP);
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    pRuntimeEnv->pCtx[i].size = 0;
  }
}

1510
static void rowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis, SDataBlockInfo *pDataBlockInfo,
1511
                                  SResultRowInfo *pWindowResInfo, SArray *pDataBlock) {
1512
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1513
  bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);
1514

1515
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1516
  STableQueryInfo* item = pQuery->current;
H
Haojun Liao 已提交
1517

1518 1519
  int64_t groupId = item->groupIndex;

H
Haojun Liao 已提交
1520 1521 1522
  SColumnInfoData* pColumnInfoData = (SColumnInfoData *)taosArrayGet(pDataBlock, 0);

  TSKEY  *tsCols = (pColumnInfoData->info.type == TSDB_DATA_TYPE_TIMESTAMP)? (TSKEY*) pColumnInfoData->pData:NULL;
H
Haojun Liao 已提交
1523
  bool    groupbyColumnValue = pRuntimeEnv->groupbyColumn;
1524

1525 1526
  int16_t type = 0;
  int16_t bytes = 0;
1527

1528
  char *groupbyColumnData = NULL;
H
Haojun Liao 已提交
1529
  if (groupbyColumnValue) {
1530
    groupbyColumnData = getGroupbyColumnData(pQuery, &type, &bytes, pDataBlock);
1531
  }
1532

H
Haojun Liao 已提交
1533
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
1534
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
1535 1536
    char *dataBlock = getDataBlock(pRuntimeEnv, &pRuntimeEnv->sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &pRuntimeEnv->sasArray[k], k, pQInfo->vgId);
H
Haojun Liao 已提交
1537
    pCtx[k].size = 1;
1538
  }
1539

1540 1541
  // set the input column data
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
1542
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
H
hjxilinx 已提交
1543 1544
    pFilterInfo->pData = getDataBlockImpl(pDataBlock, pFilterInfo->info.colId);
    assert(pFilterInfo->pData != NULL);
1545
  }
1546

1547
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1548

1549 1550
  // from top to bottom in desc
  // from bottom to top in asc order
H
Haojun Liao 已提交
1551
  if (pRuntimeEnv->pTsBuf != NULL) {
1552
    qDebug("QInfo:%p process data rows, numOfRows:%d, query order:%d, ts comp order:%d", pQInfo, pDataBlockInfo->rows,
H
Haojun Liao 已提交
1553
           pQuery->order.order, pRuntimeEnv->pTsBuf->cur.order);
1554
  }
1555

H
hjxilinx 已提交
1556
  int32_t offset = -1;
H
Haojun Liao 已提交
1557
  TSKEY   prevTs = *(TSKEY*) pRuntimeEnv->prevRow[0];
H
Haojun Liao 已提交
1558
  int32_t prevRowIndex = -1;
1559

1560
  for (int32_t j = 0; j < pDataBlockInfo->rows; ++j) {
H
hjxilinx 已提交
1561
    offset = GET_COL_DATA_POS(pQuery, j, step);
1562

H
Haojun Liao 已提交
1563
    if (pRuntimeEnv->pTsBuf != NULL) {
1564 1565
      int32_t ret = doTSJoinFilter(pRuntimeEnv, offset);
      if (ret == TS_JOIN_TAG_NOT_EQUALS) {
1566
        break;
1567
      } else if (ret == TS_JOIN_TS_NOT_EQUALS) {
1568 1569
        continue;
      } else {
1570
        assert(ret == TS_JOIN_TS_EQUAL);
1571 1572
      }
    }
1573

1574
    if (pQuery->numOfFilterCols > 0 && (!doFilterData(pQuery, offset))) {
1575 1576
      continue;
    }
1577

1578
    // interval window query, decide the time window according to the primary timestamp
H
Haojun Liao 已提交
1579
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
H
Haojun Liao 已提交
1580
      int32_t prevWindowIndex = curTimeWindowIndex(pWindowResInfo);
1581
      int64_t ts = tsCols[offset];
H
Haojun Liao 已提交
1582

1583
      STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
1584

1585
      SResultRow* pResult = NULL;
1586 1587 1588
      int32_t ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, &win, masterScan, &pResult, groupId);
      if (ret != TSDB_CODE_SUCCESS || pResult == NULL) {  // null data, too many state code
        goto _end;
1589
      }
H
Haojun Liao 已提交
1590

1591 1592
      // window start key interpolation
      if (pRuntimeEnv->timeWindowInterpo) {
H
Haojun Liao 已提交
1593 1594 1595 1596 1597
        // check for the time window end time interpolation
        int32_t curIndex = curTimeWindowIndex(pWindowResInfo);
        if (prevWindowIndex != -1 && prevWindowIndex < curIndex) {
          for (int32_t k = prevWindowIndex; k < curIndex; ++k) {
            SResultRow *pRes = pWindowResInfo->pResult[k];
1598 1599 1600 1601
            if (pRes->closed) {
              assert(resultRowInterpolated(pResult, RESULT_ROW_START_INTERP) && resultRowInterpolated(pResult, RESULT_ROW_END_INTERP));
              continue;
            }
H
Haojun Liao 已提交
1602

1603
            ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, &pRes->win, masterScan, &pResult, groupId);
H
Haojun Liao 已提交
1604
            assert(ret == TSDB_CODE_SUCCESS && !resultRowInterpolated(pResult, RESULT_ROW_END_INTERP));
H
Haojun Liao 已提交
1605

H
Haojun Liao 已提交
1606
            setTimeWindowEKeyInterp(pRuntimeEnv, pDataBlock, prevTs, prevRowIndex, ts, offset, pResult, &pRes->win);
1607
            doRowwiseApplyFunctions(pRuntimeEnv, &pRes->win, offset);
H
Haojun Liao 已提交
1608 1609 1610
          }

          // restore current time window
1611
          ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, &win, masterScan, &pResult, groupId);
H
Haojun Liao 已提交
1612 1613
          if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
            continue;
1614 1615
          }
        }
1616

H
Haojun Liao 已提交
1617
        setTimeWindowSKeyInterp(pRuntimeEnv, pDataBlock, prevTs, prevRowIndex, ts, offset, pResult, &win);
1618
      }
H
Haojun Liao 已提交
1619

1620
      doRowwiseApplyFunctions(pRuntimeEnv, &win, offset);
1621
      int32_t index = pWindowResInfo->curIndex;
1622

1623 1624
      STimeWindow nextWin = win;
      while (1) {
H
Haojun Liao 已提交
1625
        getNextTimeWindow(pQuery, &nextWin);
1626
        if ((nextWin.skey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
H
Haojun Liao 已提交
1627
            (nextWin.ekey < pQuery->window.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
1628 1629
          break;
        }
1630

1631 1632 1633
        if (ts < nextWin.skey || ts > nextWin.ekey) {
          break;
        }
1634

1635
        // null data, failed to allocate more memory buffer
1636 1637
        int32_t code = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, &nextWin, masterScan, &pResult, groupId);
        if (code != TSDB_CODE_SUCCESS || pResult == NULL) {
1638 1639
          break;
        }
1640

1641 1642
        setTimeWindowSKeyInterp(pRuntimeEnv, pDataBlock, prevTs, prevRowIndex, ts, offset, pResult, &nextWin);
        doRowwiseApplyFunctions(pRuntimeEnv, &nextWin, offset);
1643
      }
1644

1645
      // restore the index, add the result row will move the index
1646 1647 1648
      pWindowResInfo->curIndex = index;
    } else {  // other queries
      // decide which group this rows belongs to according to current state value
H
Haojun Liao 已提交
1649
      if (groupbyColumnValue) {
H
hjxilinx 已提交
1650
        char *val = groupbyColumnData + bytes * offset;
1651 1652 1653
        if (isNull(val, type)) {  // ignore the null value
          continue;
        }
1654

1655
        int32_t ret = setGroupResultOutputBuf(pRuntimeEnv, val, type, bytes, item->groupIndex);
1656
        if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
1657
          longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_APP_ERROR);
1658 1659
        }
      }
1660

1661
      for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
Haojun Liao 已提交
1662
        int32_t functionId = pQuery->pExpr1[k].base.functionId;
1663 1664 1665 1666 1667
        if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
          aAggs[functionId].xFunctionF(&pCtx[k], offset);
        }
      }
    }
1668

H
Haojun Liao 已提交
1669 1670
    prevTs = tsCols[offset];
    prevRowIndex = offset;
1671

H
Haojun Liao 已提交
1672
    if (pRuntimeEnv->pTsBuf != NULL) {
1673
      // if timestamp filter list is empty, quit current query
H
Haojun Liao 已提交
1674
      if (!tsBufNextPos(pRuntimeEnv->pTsBuf)) {
H
hjxilinx 已提交
1675
        setQueryStatus(pQuery, QUERY_COMPLETED);
1676 1677 1678 1679
        break;
      }
    }
  }
H
Haojun Liao 已提交
1680

1681
  _end:
1682
  assert(offset >= 0 && tsCols != NULL);
D
fix bug  
dapan1121 已提交
1683
  if (prevTs != INT64_MIN && prevTs != *(int64_t*)pRuntimeEnv->prevRow[0]) {
1684
    assert(prevRowIndex >= 0);
1685
    item->lastKey = prevTs + step;
H
Haojun Liao 已提交
1686 1687
  }

1688 1689 1690
  // In case of all rows in current block are not qualified
  if (pRuntimeEnv->timeWindowInterpo && prevRowIndex != -1) {
    saveDataBlockLastRow(pRuntimeEnv, pDataBlockInfo, pDataBlock, prevRowIndex);
H
Haojun Liao 已提交
1691
  }
H
Haojun Liao 已提交
1692

H
Haojun Liao 已提交
1693 1694
  if (pRuntimeEnv->pTsBuf != NULL) {
    item->cur = tsBufGetCursor(pRuntimeEnv->pTsBuf);
1695
  }
1696 1697 1698
}

static int32_t tableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo,
H
hjxilinx 已提交
1699
                                          SDataStatis *pStatis, __block_search_fn_t searchFn, SArray *pDataBlock) {
H
hjxilinx 已提交
1700
  SQuery *pQuery = pRuntimeEnv->pQuery;
1701

1702 1703
  STableQueryInfo* pTableQueryInfo = pQuery->current;
  SResultRowInfo*  pResultRowInfo = &pRuntimeEnv->windowResInfo;
1704

H
Haojun Liao 已提交
1705
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTsBuf != NULL || pRuntimeEnv->groupbyColumn) {
1706
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pResultRowInfo, pDataBlock);
1707
  } else {
1708
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pResultRowInfo, searchFn, pDataBlock);
1709
  }
1710

1711 1712 1713 1714
  // update the lastkey of current table for projection/aggregation query
  TSKEY lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pDataBlockInfo->window.ekey : pDataBlockInfo->window.skey;
  pTableQueryInfo->lastKey = lastKey + GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

1715
  // interval query with limit applied
1716
  int32_t numOfRes = 0;
H
Haojun Liao 已提交
1717
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyColumn) {
1718
    numOfRes = pResultRowInfo->size;
1719
    updateResultRowIndex(pResultRowInfo, pTableQueryInfo, QUERY_IS_ASC_QUERY(pQuery), pRuntimeEnv->timeWindowInterpo);
H
Haojun Liao 已提交
1720
  } else { // projection query
1721
    numOfRes = (int32_t) getNumOfResult(pRuntimeEnv);
1722

1723
    // update the number of output result
H
Haojun Liao 已提交
1724
    if (numOfRes > 0 && pQuery->checkResultBuf == 1) {
1725 1726
      assert(numOfRes >= pQuery->rec.rows);
      pQuery->rec.rows = numOfRes;
1727

1728 1729 1730
      if (numOfRes >= pQuery->rec.threshold) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
      }
1731

1732 1733 1734
      if ((pQuery->limit.limit >= 0) && (pQuery->limit.limit + pQuery->limit.offset) <= numOfRes) {
        setQueryStatus(pQuery, QUERY_COMPLETED);
      }
1735

1736 1737
      if (((pTableQueryInfo->lastKey > pTableQueryInfo->win.ekey) && QUERY_IS_ASC_QUERY(pQuery)) ||
          ((pTableQueryInfo->lastKey < pTableQueryInfo->win.ekey) && (!QUERY_IS_ASC_QUERY(pQuery)))) {
1738 1739
        setQueryStatus(pQuery, QUERY_COMPLETED);
      }
H
Haojun Liao 已提交
1740
    }
1741
  }
1742

1743
  return numOfRes;
1744 1745
}

H
Haojun Liao 已提交
1746
void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
H
Haojun Liao 已提交
1747
                   SDataStatis *pStatis, void *param, int32_t colIndex, int32_t vgId) {
1748

H
Haojun Liao 已提交
1749 1750
  int32_t functionId = pQuery->pExpr1[colIndex].base.functionId;
  int32_t colId = pQuery->pExpr1[colIndex].base.colInfo.colId;
1751

1752
  SDataStatis *tpField = NULL;
H
Haojun Liao 已提交
1753
  pCtx->hasNull = hasNullValue(&pQuery->pExpr1[colIndex].base.colInfo, pStatis, &tpField);
1754
  pCtx->aInputElemBuf = inputData;
1755

1756
  if (tpField != NULL) {
H
Haojun Liao 已提交
1757
    pCtx->preAggVals.isSet  = true;
1758 1759
    pCtx->preAggVals.statis = *tpField;
    assert(pCtx->preAggVals.statis.numOfNull <= pBlockInfo->rows);
1760 1761 1762
  } else {
    pCtx->preAggVals.isSet = false;
  }
1763

H
Haojun Liao 已提交
1764 1765
  pCtx->preAggVals.dataBlockLoaded = (inputData != NULL);

H
Haojun Liao 已提交
1766 1767
  // limit/offset query will affect this value
  pCtx->size = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->rows - pQuery->pos : pQuery->pos + 1;
1768

H
Haojun Liao 已提交
1769
  // minimum value no matter ascending/descending order query
H
Haojun Liao 已提交
1770 1771
  pCtx->startOffset = QUERY_IS_ASC_QUERY(pQuery) ? pQuery->pos: (pQuery->pos - pCtx->size + 1);
  assert(pCtx->startOffset >= 0);
H
Haojun Liao 已提交
1772

1773 1774
  uint32_t status = aAggs[functionId].nStatus;
  if (((status & (TSDB_FUNCSTATE_SELECTIVITY | TSDB_FUNCSTATE_NEED_TS)) != 0) && (tsCol != NULL)) {
1775
    pCtx->ptsList = tsCol;
1776
  }
1777

1778 1779 1780 1781 1782
  if (functionId >= TSDB_FUNC_FIRST_DST && functionId <= TSDB_FUNC_LAST_DST) {
    // last_dist or first_dist function
    // store the first&last timestamp into the intermediate buffer [1], the true
    // value may be null but timestamp will never be null
  } else if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_TWA ||
1783
             functionId == TSDB_FUNC_DIFF || (functionId >= TSDB_FUNC_RATE && functionId <= TSDB_FUNC_AVG_IRATE)) {
1784
    /*
H
Haojun Liao 已提交
1785
     * least squares function needs two columns of input, currently, the x value of linear equation is set to
H
Haojun Liao 已提交
1786
     * timestamp column, and the y-value is the column specified in pQuery->pExpr1[i].colIdxInBuffer
1787 1788 1789 1790 1791
     *
     * top/bottom function needs timestamp to indicate when the
     * top/bottom values emerge, so does diff function
     */
    if (functionId == TSDB_FUNC_TWA) {
1792
       pCtx->param[1].i64 = pQuery->window.skey;
1793
       pCtx->param[1].nType = TSDB_DATA_TYPE_BIGINT;
1794
       pCtx->param[2].i64 = pQuery->window.ekey;
1795
       pCtx->param[2].nType = TSDB_DATA_TYPE_BIGINT;
1796
    }
1797

1798 1799
  } else if (functionId == TSDB_FUNC_ARITHM) {
    pCtx->param[1].pz = param;
H
Haojun Liao 已提交
1800 1801 1802 1803 1804 1805
  } else if (functionId == TSDB_FUNC_SPREAD) {  // set the statistics data for primary time stamp column
    if (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
      pCtx->preAggVals.isSet  = true;
      pCtx->preAggVals.statis.min = pBlockInfo->window.skey;
      pCtx->preAggVals.statis.max = pBlockInfo->window.ekey;
    }
1806
  } else if (functionId == TSDB_FUNC_INTERP) {
H
Haojun Liao 已提交
1807 1808 1809
    SResultRowCellInfo* pInfo = GET_RES_INFO(pCtx);

    SInterpInfoDetail *pInterpInfo = (SInterpInfoDetail *)GET_ROWCELL_INTERBUF(pInfo);
S
TD-1057  
Shengliang Guan 已提交
1810
    pInterpInfo->type = (int8_t)pQuery->fillType;
1811 1812
    pInterpInfo->ts = pQuery->window.skey;
    pInterpInfo->primaryCol = (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX);
1813

1814 1815 1816 1817
    if (pQuery->fillVal != NULL) {
      if (isNull((const char*) &pQuery->fillVal[colIndex], pCtx->inputType)) {
        pCtx->param[1].nType = TSDB_DATA_TYPE_NULL;
      } else { // todo refactor, tVariantCreateFromBinary should handle the NULL value
H
Haojun Liao 已提交
1818 1819 1820
        if (pCtx->inputType != TSDB_DATA_TYPE_BINARY && pCtx->inputType != TSDB_DATA_TYPE_NCHAR) {
          tVariantCreateFromBinary(&pCtx->param[1], (char*) &pQuery->fillVal[colIndex], pCtx->inputBytes, pCtx->inputType);
        }
1821 1822
      }
    }
H
Haojun Liao 已提交
1823
  } else if (functionId == TSDB_FUNC_TS_COMP) {
1824
    pCtx->param[0].i64 = vgId;
H
Haojun Liao 已提交
1825
    pCtx->param[0].nType = TSDB_DATA_TYPE_BIGINT;
1826
  }
1827

1828 1829 1830 1831 1832 1833
#if defined(_DEBUG_VIEW)
  //  int64_t *tsList = (int64_t *)primaryColumnData;
//  int64_t  s = tsList[0];
//  int64_t  e = tsList[size - 1];

//    if (IS_DATA_BLOCK_LOADED(blockStatus)) {
1834
//        qDebug("QInfo:%p query ts:%lld-%lld, offset:%d, rows:%d, bstatus:%d,
1835 1836 1837
//        functId:%d", GET_QINFO_ADDR(pQuery),
//               s, e, startOffset, size, blockStatus, functionId);
//    } else {
1838
//        qDebug("QInfo:%p block not loaded, bstatus:%d",
1839 1840 1841 1842 1843 1844
//        GET_QINFO_ADDR(pQuery), blockStatus);
//    }
#endif
}

// set the output buffer for the selectivity + tag query
H
Haojun Liao 已提交
1845
static int32_t setCtxTagColumnInfo(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx) {
H
Haojun Liao 已提交
1846 1847
  SQuery* pQuery = pRuntimeEnv->pQuery;

1848
  if (isSelectivityWithTagsQuery(pQuery)) {
1849
    int32_t num = 0;
1850
    int16_t tagLen = 0;
1851

1852
    SQLFunctionCtx *p = NULL;
1853
    SQLFunctionCtx **pTagCtx = calloc(pQuery->numOfOutput, POINTER_BYTES);
H
Haojun Liao 已提交
1854 1855 1856
    if (pTagCtx == NULL) {
      return TSDB_CODE_QRY_OUT_OF_MEMORY;
    }
H
Haojun Liao 已提交
1857

1858
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1859
      SSqlFuncMsg *pSqlFuncMsg = &pQuery->pExpr1[i].base;
1860

1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873
      if (pSqlFuncMsg->functionId == TSDB_FUNC_TAG_DUMMY || pSqlFuncMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        tagLen += pCtx[i].outputBytes;
        pTagCtx[num++] = &pCtx[i];
      } else if ((aAggs[pSqlFuncMsg->functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
        p = &pCtx[i];
      } else if (pSqlFuncMsg->functionId == TSDB_FUNC_TS || pSqlFuncMsg->functionId == TSDB_FUNC_TAG) {
        // tag function may be the group by tag column
        // ts may be the required primary timestamp column
        continue;
      } else {
        // the column may be the normal column, group by normal_column, the functionId is TSDB_FUNC_PRJ
      }
    }
dengyihao's avatar
dengyihao 已提交
1874 1875 1876 1877 1878
    if (p != NULL) {
      p->tagInfo.pTagCtxList = pTagCtx;
      p->tagInfo.numOfTagCols = num;
      p->tagInfo.tagsLen = tagLen;
    } else {
S
TD-1848  
Shengliang Guan 已提交
1879
      tfree(pTagCtx);
dengyihao's avatar
dengyihao 已提交
1880
    }
1881
  }
H
Haojun Liao 已提交
1882 1883

  return TSDB_CODE_SUCCESS;
1884 1885
}

1886
static int32_t setupQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv, int16_t order) {
1887
  qDebug("QInfo:%p setup runtime env", GET_QINFO_ADDR(pRuntimeEnv));
1888 1889
  SQuery *pQuery = pRuntimeEnv->pQuery;

1890
  pRuntimeEnv->pCtx = (SQLFunctionCtx *)calloc(pQuery->numOfOutput, sizeof(SQLFunctionCtx));
H
Haojun Liao 已提交
1891
  pRuntimeEnv->offset = calloc(pQuery->numOfOutput, sizeof(int16_t));
H
Haojun Liao 已提交
1892
  pRuntimeEnv->rowCellInfoOffset = calloc(pQuery->numOfOutput, sizeof(int32_t));
1893
  pRuntimeEnv->sasArray = calloc(pQuery->numOfOutput, sizeof(SArithmeticSupport));
1894

1895
  if (pRuntimeEnv->offset == NULL || pRuntimeEnv->pCtx == NULL || pRuntimeEnv->rowCellInfoOffset == NULL || pRuntimeEnv->sasArray == NULL) {
1896
    goto _clean;
1897
  }
1898

1899
  pRuntimeEnv->offset[0] = 0;
1900
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1901
    SSqlFuncMsg *pSqlFuncMsg = &pQuery->pExpr1[i].base;
1902

1903
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1904
    SColIndex* pIndex = &pSqlFuncMsg->colInfo;
1905

Y
TD-1230  
yihaoDeng 已提交
1906
    if (TSDB_COL_REQ_NULL(pIndex->flag)) {
1907 1908
      pCtx->requireNull = true;
      pIndex->flag &= ~(TSDB_COL_NULL);
Y
TD-1230  
yihaoDeng 已提交
1909
    } else {
1910 1911
      pCtx->requireNull = false;
    }
Y
TD-1230  
yihaoDeng 已提交
1912

1913 1914
    int32_t index = pSqlFuncMsg->colInfo.colIndex;
    if (TSDB_COL_IS_TAG(pIndex->flag)) {
1915
      if (pIndex->colId == TSDB_TBNAME_COLUMN_INDEX) {  // todo refactor
H
Haojun Liao 已提交
1916 1917
        SSchema s = tGetTableNameColumnSchema();

Y
yihaoDeng 已提交
1918 1919 1920 1921
        pCtx->inputBytes = s.bytes;
        pCtx->inputType = s.type;
      } else if (pIndex->colId == TSDB_BLOCK_DIST_COLUMN_INDEX) {
        SSchema s = tGetBlockDistColumnSchema();
H
Haojun Liao 已提交
1922 1923
        pCtx->inputBytes = s.bytes;
        pCtx->inputType = s.type;
1924 1925 1926 1927
      } else {
        pCtx->inputBytes = pQuery->tagColList[index].bytes;
        pCtx->inputType = pQuery->tagColList[index].type;
      }
H
Haojun Liao 已提交
1928 1929 1930
    } else if (TSDB_COL_IS_UD_COL(pIndex->flag)) {
      pCtx->inputBytes = pSqlFuncMsg->arg[0].argBytes;
      pCtx->inputType = pSqlFuncMsg->arg[0].argType;
1931 1932 1933 1934
    } else {
      pCtx->inputBytes = pQuery->colList[index].bytes;
      pCtx->inputType = pQuery->colList[index].type;
    }
1935

1936
    assert(isValidDataType(pCtx->inputType));
1937
    pCtx->ptsOutputBuf = NULL;
1938

H
Haojun Liao 已提交
1939 1940
    pCtx->outputBytes  = pQuery->pExpr1[i].bytes;
    pCtx->outputType   = pQuery->pExpr1[i].type;
1941

H
Haojun Liao 已提交
1942 1943 1944
    pCtx->order        = pQuery->order.order;
    pCtx->functionId   = pSqlFuncMsg->functionId;
    pCtx->stableQuery  = pRuntimeEnv->stableQuery;
H
Haojun Liao 已提交
1945
    pCtx->interBufBytes = pQuery->pExpr1[i].interBytes;
1946 1947
    pCtx->start.key    = INT64_MIN;
    pCtx->end.key      = INT64_MIN;
1948

H
Haojun Liao 已提交
1949
    pCtx->numOfParams  = pSqlFuncMsg->numOfParams;
1950 1951 1952 1953
    for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
      int16_t type = pSqlFuncMsg->arg[j].argType;
      int16_t bytes = pSqlFuncMsg->arg[j].argBytes;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
1954
        tVariantCreateFromBinary(&pCtx->param[j], pSqlFuncMsg->arg[j].argValue.pz, bytes, type);
1955 1956 1957 1958
      } else {
        tVariantCreateFromBinary(&pCtx->param[j], (char *)&pSqlFuncMsg->arg[j].argValue.i64, bytes, type);
      }
    }
1959

1960 1961
    // set the order information for top/bottom query
    int32_t functionId = pCtx->functionId;
1962

1963
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
H
Haojun Liao 已提交
1964
      int32_t f = pQuery->pExpr1[0].base.functionId;
1965
      assert(f == TSDB_FUNC_TS || f == TSDB_FUNC_TS_DUMMY);
1966

1967
      pCtx->param[2].i64 = order;
1968
      pCtx->param[2].nType = TSDB_DATA_TYPE_BIGINT;
1969
      pCtx->param[3].i64 = functionId;
1970
      pCtx->param[3].nType = TSDB_DATA_TYPE_BIGINT;
1971

1972
      pCtx->param[1].i64 = pQuery->order.orderColId;
1973
    }
1974

1975 1976 1977 1978 1979
    if (functionId == TSDB_FUNC_ARITHM) {
      pRuntimeEnv->sasArray[i].data = calloc(pQuery->numOfCols, POINTER_BYTES);
      if (pRuntimeEnv->sasArray[i].data == NULL) {
        goto _clean;
      }
1980
    }
1981

1982 1983
    if (i > 0) {
      pRuntimeEnv->offset[i] = pRuntimeEnv->offset[i - 1] + pRuntimeEnv->pCtx[i - 1].outputBytes;
H
Haojun Liao 已提交
1984
      pRuntimeEnv->rowCellInfoOffset[i] = pRuntimeEnv->rowCellInfoOffset[i - 1] + sizeof(SResultRowCellInfo) + pQuery->pExpr1[i - 1].interBytes;
1985 1986
    }
  }
1987

1988 1989
  *(int64_t*) pRuntimeEnv->prevRow[0] = INT64_MIN;

1990
  // if it is group by normal column, do not set output buffer, the output buffer is pResult
1991
  // fixed output query/multi-output query for normal table
H
Haojun Liao 已提交
1992
  if (!pRuntimeEnv->groupbyColumn && !pRuntimeEnv->stableQuery && !QUERY_IS_INTERVAL_QUERY(pRuntimeEnv->pQuery)) {
H
Haojun Liao 已提交
1993
    resetDefaultResInfoOutputBuf(pRuntimeEnv);
1994
  }
1995

H
Haojun Liao 已提交
1996 1997 1998
  if (setCtxTagColumnInfo(pRuntimeEnv, pRuntimeEnv->pCtx) != TSDB_CODE_SUCCESS) {
    goto _clean;
  }
1999

H
Haojun Liao 已提交
2000
  qDebug("QInfo:%p init runtime completed", GET_QINFO_ADDR(pRuntimeEnv));
2001
  return TSDB_CODE_SUCCESS;
2002

2003
_clean:
S
TD-1848  
Shengliang Guan 已提交
2004
  tfree(pRuntimeEnv->pCtx);
H
Haojun Liao 已提交
2005 2006
  tfree(pRuntimeEnv->offset);
  tfree(pRuntimeEnv->rowCellInfoOffset);
2007
  tfree(pRuntimeEnv->sasArray);
2008

2009
  return TSDB_CODE_QRY_OUT_OF_MEMORY;
2010 2011
}

H
Haojun Liao 已提交
2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024
static void doFreeQueryHandle(SQInfo* pQInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;

  tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
  tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);

  pRuntimeEnv->pQueryHandle = NULL;
  pRuntimeEnv->pSecQueryHandle = NULL;

  SMemRef* pMemRef = &pQInfo->memRef;
  assert(pMemRef->ref == 0 && pMemRef->imem == NULL && pMemRef->mem == NULL);
}

D
fix bug  
dapan1121 已提交
2025

2026 2027 2028 2029
static void teardownQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv) {
  if (pRuntimeEnv->pQuery == NULL) {
    return;
  }
2030

2031
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
2032
  SQInfo* pQInfo = (SQInfo*) GET_QINFO_ADDR(pRuntimeEnv);
2033

2034
  qDebug("QInfo:%p teardown runtime env", pQInfo);
H
Haojun Liao 已提交
2035
  cleanupResultRowInfo(&pRuntimeEnv->windowResInfo);
2036

D
fix bug  
dapan1121 已提交
2037 2038 2039 2040 2041 2042 2043 2044 2045 2046
  if (isTSCompQuery(pQuery)) {
    FILE *f = *(FILE **)pQuery->sdata[0]->data;

    if (f) {
      fclose(f);
      *(FILE **)pQuery->sdata[0]->data = NULL;
    }
  }


2047
  if (pRuntimeEnv->pCtx != NULL) {
2048
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2049
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
2050

2051 2052 2053
      for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
        tVariantDestroy(&pCtx->param[j]);
      }
2054

2055
      tVariantDestroy(&pCtx->tag);
S
TD-1848  
Shengliang Guan 已提交
2056
      tfree(pCtx->tagInfo.pTagCtxList);
2057
    }
2058

S
TD-1848  
Shengliang Guan 已提交
2059
    tfree(pRuntimeEnv->pCtx);
2060
  }
2061

2062 2063 2064 2065 2066 2067 2068 2069
  if (pRuntimeEnv->sasArray != NULL) {
    for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
      tfree(pRuntimeEnv->sasArray[i].data);
    }

    tfree(pRuntimeEnv->sasArray);
  }

2070
  pRuntimeEnv->pFillInfo = taosDestroyFillInfo(pRuntimeEnv->pFillInfo);
2071

H
Haojun Liao 已提交
2072
  destroyResultBuf(pRuntimeEnv->pResultBuf);
H
Haojun Liao 已提交
2073
  doFreeQueryHandle(pQInfo);
2074

H
Haojun Liao 已提交
2075
  pRuntimeEnv->pTsBuf = tsBufDestroy(pRuntimeEnv->pTsBuf);
H
Haojun Liao 已提交
2076 2077

  tfree(pRuntimeEnv->offset);
S
TD-1848  
Shengliang Guan 已提交
2078 2079
  tfree(pRuntimeEnv->keyBuf);
  tfree(pRuntimeEnv->rowCellInfoOffset);
2080
  tfree(pRuntimeEnv->prevRow);
H
Haojun Liao 已提交
2081

H
Haojun Liao 已提交
2082 2083
  taosHashCleanup(pRuntimeEnv->pResultRowHashTable);
  pRuntimeEnv->pResultRowHashTable = NULL;
2084

H
Haojun Liao 已提交
2085
  pRuntimeEnv->pool = destroyResultRowPool(pRuntimeEnv->pool);
2086 2087
}

2088 2089 2090 2091
static bool needBuildResAfterQueryComplete(SQInfo* pQInfo) {
  return pQInfo->rspContext != NULL;
}

H
Haojun Liao 已提交
2092
#define IS_QUERY_KILLED(_q) ((_q)->code == TSDB_CODE_TSC_QUERY_CANCELLED)
2093

2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112
static bool isQueryKilled(SQInfo *pQInfo) {
  if (IS_QUERY_KILLED(pQInfo)) {
    return true;
  }

  // query has been executed more than tsShellActivityTimer, and the retrieve has not arrived
  // abort current query execution.
  if (pQInfo->owner != 0 && ((taosGetTimestampSec() - pQInfo->startExecTs) > getMaximumIdleDurationSec()) &&
      (!needBuildResAfterQueryComplete(pQInfo))) {

    assert(pQInfo->startExecTs != 0);
    qDebug("QInfo:%p retrieve not arrive beyond %d sec, abort current query execution, start:%"PRId64", current:%d", pQInfo, 1,
           pQInfo->startExecTs, taosGetTimestampSec());
    return true;
  }

  return false;
}

H
Haojun Liao 已提交
2113
static void setQueryKilled(SQInfo *pQInfo) { pQInfo->code = TSDB_CODE_TSC_QUERY_CANCELLED;}
H
hjxilinx 已提交
2114

H
Haojun Liao 已提交
2115 2116 2117
static bool isFixedOutputQuery(SQueryRuntimeEnv* pRuntimeEnv) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
2118 2119
    return false;
  }
2120

2121
  // Note:top/bottom query is fixed output query
H
Haojun Liao 已提交
2122
  if (pRuntimeEnv->topBotQuery || pRuntimeEnv->groupbyColumn) {
2123 2124
    return true;
  }
2125

2126
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2127
    SSqlFuncMsg *pExprMsg = &pQuery->pExpr1[i].base;
2128

2129 2130
    // ignore the ts_comp function
    if (i == 0 && pExprMsg->functionId == TSDB_FUNC_PRJ && pExprMsg->numOfParams == 1 &&
2131
        pExprMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
2132 2133
      continue;
    }
2134

2135 2136 2137
    if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
2138

2139 2140 2141 2142
    if (!IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus)) {
      return true;
    }
  }
2143

2144 2145 2146
  return false;
}

2147
// todo refactor with isLastRowQuery
2148
bool isPointInterpoQuery(SQuery *pQuery) {
2149
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2150 2151
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
    if (functionId == TSDB_FUNC_INTERP) {
2152 2153 2154
      return true;
    }
  }
2155

2156 2157 2158 2159
  return false;
}

// TODO REFACTOR:MERGE WITH CLIENT-SIDE FUNCTION
H
hjxilinx 已提交
2160
static bool isSumAvgRateQuery(SQuery *pQuery) {
2161
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2162
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
2163 2164 2165
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
2166

2167 2168 2169 2170 2171
    if (functionId == TSDB_FUNC_SUM_RATE || functionId == TSDB_FUNC_SUM_IRATE || functionId == TSDB_FUNC_AVG_RATE ||
        functionId == TSDB_FUNC_AVG_IRATE) {
      return true;
    }
  }
2172

2173 2174 2175
  return false;
}

H
hjxilinx 已提交
2176
static bool isFirstLastRowQuery(SQuery *pQuery) {
2177
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2178
    int32_t functionID = pQuery->pExpr1[i].base.functionId;
2179 2180 2181 2182
    if (functionID == TSDB_FUNC_LAST_ROW) {
      return true;
    }
  }
2183

2184 2185 2186
  return false;
}

H
hjxilinx 已提交
2187
static bool needReverseScan(SQuery *pQuery) {
2188
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2189
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
2190 2191 2192
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG) {
      continue;
    }
2193

2194
    if ((functionId == TSDB_FUNC_FIRST || functionId == TSDB_FUNC_FIRST_DST) && !QUERY_IS_ASC_QUERY(pQuery)) {
2195 2196
      return true;
    }
2197 2198

    if (functionId == TSDB_FUNC_LAST || functionId == TSDB_FUNC_LAST_DST) {
H
Haojun Liao 已提交
2199
      // the scan order to acquire the last result of the specified column
H
Haojun Liao 已提交
2200
      int32_t order = (int32_t)pQuery->pExpr1[i].base.arg->argValue.i64;
H
Haojun Liao 已提交
2201 2202 2203
      if (order != pQuery->order.order) {
        return true;
      }
2204
    }
2205
  }
2206

2207 2208
  return false;
}
H
hjxilinx 已提交
2209

H
Haojun Liao 已提交
2210 2211 2212 2213
/**
 * The following 4 kinds of query are treated as the tags query
 * tagprj, tid_tag query, count(tbname), 'abc' (user defined constant value column) query
 */
H
hjxilinx 已提交
2214 2215
static bool onlyQueryTags(SQuery* pQuery) {
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2216
    SExprInfo* pExprInfo = &pQuery->pExpr1[i];
H
Haojun Liao 已提交
2217 2218

    int32_t functionId = pExprInfo->base.functionId;
H
Haojun Liao 已提交
2219 2220 2221 2222

    if (functionId != TSDB_FUNC_TAGPRJ &&
        functionId != TSDB_FUNC_TID_TAG &&
        (!(functionId == TSDB_FUNC_COUNT && pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX)) &&
Y
TD-1230  
yihaoDeng 已提交
2223
        (!(functionId == TSDB_FUNC_PRJ && TSDB_COL_IS_UD_COL(pExprInfo->base.colInfo.flag)))) {
H
hjxilinx 已提交
2224 2225 2226
      return false;
    }
  }
2227

H
hjxilinx 已提交
2228 2229 2230
  return true;
}

2231 2232
/////////////////////////////////////////////////////////////////////////////////////////////

H
Haojun Liao 已提交
2233
void getAlignQueryTimeWindow(SQuery *pQuery, int64_t key, int64_t keyFirst, int64_t keyLast, STimeWindow *win) {
2234 2235
  assert(key >= keyFirst && key <= keyLast && pQuery->interval.sliding <= pQuery->interval.interval);
  win->skey = taosTimeTruncate(key, &pQuery->interval, pQuery->precision);
H
Haojun Liao 已提交
2236 2237

  /*
2238
   * if the realSkey > INT64_MAX - pQuery->interval.interval, the query duration between
H
Haojun Liao 已提交
2239 2240
   * realSkey and realEkey must be less than one interval.Therefore, no need to adjust the query ranges.
   */
2241 2242
  if (keyFirst > (INT64_MAX - pQuery->interval.interval)) {
    assert(keyLast - keyFirst < pQuery->interval.interval);
2243
    win->ekey = INT64_MAX;
2244 2245
  } else if (pQuery->interval.intervalUnit == 'n' || pQuery->interval.intervalUnit == 'y') {
    win->ekey = taosTimeAdd(win->skey, pQuery->interval.interval, pQuery->interval.intervalUnit, pQuery->precision) - 1;
H
Haojun Liao 已提交
2246
  } else {
2247
    win->ekey = win->skey + pQuery->interval.interval - 1;
2248 2249 2250 2251 2252
  }
}

static void setScanLimitationByResultBuffer(SQuery *pQuery) {
  if (isTopBottomQuery(pQuery)) {
H
Haojun Liao 已提交
2253
    pQuery->checkResultBuf = 0;
2254
  } else if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
H
Haojun Liao 已提交
2255
    pQuery->checkResultBuf = 0;
2256 2257
  } else {
    bool hasMultioutput = false;
2258
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2259
      SSqlFuncMsg *pExprMsg = &pQuery->pExpr1[i].base;
2260 2261 2262
      if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        continue;
      }
2263

2264 2265 2266 2267 2268
      hasMultioutput = IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus);
      if (!hasMultioutput) {
        break;
      }
    }
2269

H
Haojun Liao 已提交
2270
    pQuery->checkResultBuf = hasMultioutput ? 1 : 0;
2271 2272 2273 2274 2275 2276
  }
}

/*
 * todo add more parameters to check soon..
 */
2277
bool colIdCheck(SQuery *pQuery) {
2278 2279
  // load data column information is incorrect
  for (int32_t i = 0; i < pQuery->numOfCols - 1; ++i) {
2280
    if (pQuery->colList[i].colId == pQuery->colList[i + 1].colId) {
S
slguan 已提交
2281
      qError("QInfo:%p invalid data load column for query", GET_QINFO_ADDR(pQuery));
2282 2283 2284
      return false;
    }
  }
2285

2286 2287 2288 2289 2290 2291
  return true;
}

// todo ignore the avg/sum/min/max/count/stddev/top/bottom functions, of which
// the scan order is not matter
static bool onlyOneQueryType(SQuery *pQuery, int32_t functId, int32_t functIdDst) {
2292
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2293
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
2294

2295 2296 2297 2298
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG ||
        functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
2299

2300 2301 2302 2303
    if (functionId != functId && functionId != functIdDst) {
      return false;
    }
  }
2304

2305 2306 2307 2308 2309 2310 2311
  return true;
}

static bool onlyFirstQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_FIRST, TSDB_FUNC_FIRST_DST); }

static bool onlyLastQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_LAST, TSDB_FUNC_LAST_DST); }

H
Haojun Liao 已提交
2312
// todo refactor, add iterator
2313 2314
static void doExchangeTimeWindow(SQInfo* pQInfo, STimeWindow* win) {
  size_t t = taosArrayGetSize(pQInfo->tableGroupInfo.pGroupList);
H
Haojun Liao 已提交
2315
  for(int32_t i = 0; i < t; ++i) {
2316
    SArray* p1 = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, i);
H
Haojun Liao 已提交
2317 2318 2319

    size_t len = taosArrayGetSize(p1);
    for(int32_t j = 0; j < len; ++j) {
2320
      STableKeyInfo* pInfo = taosArrayGet(p1, j);
H
Haojun Liao 已提交
2321

2322 2323 2324 2325
      // update the new lastkey if it is equalled to the value of the old skey
      if (pInfo->lastKey == win->ekey) {
        pInfo->lastKey = win->skey;
      }
H
Haojun Liao 已提交
2326 2327 2328 2329
    }
  }
}

2330
static void changeExecuteScanOrder(SQInfo *pQInfo, SQueryTableMsg* pQueryMsg, bool stableQuery) {
H
Haojun Liao 已提交
2331 2332
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;

2333 2334 2335
  // in case of point-interpolation query, use asc order scan
  char msg[] = "QInfo:%p scan order changed for %s query, old:%d, new:%d, qrange exchanged, old qrange:%" PRId64
               "-%" PRId64 ", new qrange:%" PRId64 "-%" PRId64;
2336

2337 2338
  // todo handle the case the the order irrelevant query type mixed up with order critical query type
  // descending order query for last_row query
H
Haojun Liao 已提交
2339
  if (isFirstLastRowQuery(pQuery)) {
H
Haojun Liao 已提交
2340
    qDebug("QInfo:%p scan order changed for last_row query, old:%d, new:%d", pQInfo, pQuery->order.order, TSDB_ORDER_ASC);
2341

H
Haojun Liao 已提交
2342
    pQuery->order.order = TSDB_ORDER_ASC;
H
Haojun Liao 已提交
2343 2344 2345
    if (pQuery->window.skey > pQuery->window.ekey) {
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
2346

2347 2348
    return;
  }
2349

H
Haojun Liao 已提交
2350
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr) && pQuery->order.order == TSDB_ORDER_DESC) {
H
Haojun Liao 已提交
2351
    pQuery->order.order = TSDB_ORDER_ASC;
H
Haojun Liao 已提交
2352 2353 2354
    if (pQuery->window.skey > pQuery->window.ekey) {
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
H
Haojun Liao 已提交
2355

2356
    doExchangeTimeWindow(pQInfo, &pQuery->window);
H
Haojun Liao 已提交
2357 2358 2359
    return;
  }

2360
  if (isPointInterpoQuery(pQuery) && pQuery->interval.interval == 0) {
H
Haojun Liao 已提交
2361 2362 2363 2364 2365
    if (!QUERY_IS_ASC_QUERY(pQuery)) {
      qDebug(msg, GET_QINFO_ADDR(pQuery), "interp", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
             pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
2366

2367
    pQuery->order.order = TSDB_ORDER_ASC;
2368 2369
    return;
  }
2370

2371
  if (pQuery->interval.interval == 0) {
2372 2373
    if (onlyFirstQuery(pQuery)) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
2374
        qDebug(msg, GET_QINFO_ADDR(pQuery), "only-first", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
2375 2376
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2377
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2378
        doExchangeTimeWindow(pQInfo, &pQuery->window);
2379
      }
2380

2381
      pQuery->order.order = TSDB_ORDER_ASC;
2382 2383
    } else if (onlyLastQuery(pQuery)) {
      if (QUERY_IS_ASC_QUERY(pQuery)) {
2384
        qDebug(msg, GET_QINFO_ADDR(pQuery), "only-last", pQuery->order.order, TSDB_ORDER_DESC, pQuery->window.skey,
2385 2386
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2387
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2388
        doExchangeTimeWindow(pQInfo, &pQuery->window);
2389
      }
2390

2391
      pQuery->order.order = TSDB_ORDER_DESC;
2392
    }
2393

2394
  } else {  // interval query
2395
    if (stableQuery) {
2396 2397
      if (onlyFirstQuery(pQuery)) {
        if (!QUERY_IS_ASC_QUERY(pQuery)) {
2398
          qDebug(msg, GET_QINFO_ADDR(pQuery), "only-first stable", pQuery->order.order, TSDB_ORDER_ASC,
2399 2400
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2401
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2402
          doExchangeTimeWindow(pQInfo, &pQuery->window);
2403
        }
2404

2405
        pQuery->order.order = TSDB_ORDER_ASC;
2406 2407
      } else if (onlyLastQuery(pQuery)) {
        if (QUERY_IS_ASC_QUERY(pQuery)) {
2408
          qDebug(msg, GET_QINFO_ADDR(pQuery), "only-last stable", pQuery->order.order, TSDB_ORDER_DESC,
2409 2410
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

2411
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
2412
          doExchangeTimeWindow(pQInfo, &pQuery->window);
2413
        }
2414

2415
        pQuery->order.order = TSDB_ORDER_DESC;
2416 2417 2418 2419 2420 2421 2422 2423
      }
    }
  }
}

static int32_t getInitialPageNum(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  int32_t INITIAL_RESULT_ROWS_VALUE = 16;
2424

2425
  int32_t num = 0;
2426

2427 2428
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
    num = 128;
H
Haojun Liao 已提交
2429
  } else if (QUERY_IS_INTERVAL_QUERY(pQuery)) {  // time window query, allocate one page for each table
2430
    size_t s = pQInfo->tableqinfoGroupInfo.numOfTables;
S
TD-1057  
Shengliang Guan 已提交
2431
    num = (int32_t)(MAX(s, INITIAL_RESULT_ROWS_VALUE));
2432 2433
  } else {    // for super table query, one page for each subset
    num = 1;  // pQInfo->pSidSet->numOfSubSet;
2434
  }
2435

2436 2437 2438 2439
  assert(num > 0);
  return num;
}

2440 2441
static void getIntermediateBufInfo(SQueryRuntimeEnv* pRuntimeEnv, int32_t* ps, int32_t* rowsize) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
2442
  int32_t MIN_ROWS_PER_PAGE = 4;
2443

S
TD-1057  
Shengliang Guan 已提交
2444
  *rowsize = (int32_t)(pQuery->rowSize * GET_ROW_PARAM_FOR_MULTIOUTPUT(pQuery, pRuntimeEnv->topBotQuery, pRuntimeEnv->stableQuery));
2445 2446 2447 2448
  int32_t overhead = sizeof(tFilePage);

  // one page contains at least two rows
  *ps = DEFAULT_INTERN_BUF_PAGE_SIZE;
H
Haojun Liao 已提交
2449
  while(((*rowsize) * MIN_ROWS_PER_PAGE) > (*ps) - overhead) {
2450 2451 2452 2453
    *ps = (*ps << 1u);
  }

  pRuntimeEnv->numOfRowsPerPage = ((*ps) - sizeof(tFilePage)) / (*rowsize);
2454
  assert(pRuntimeEnv->numOfRowsPerPage <= MAX_ROWS_PER_RESBUF_PAGE);
2455 2456
}

H
Haojun Liao 已提交
2457
#define IS_PREFILTER_TYPE(_t) ((_t) != TSDB_DATA_TYPE_BINARY && (_t) != TSDB_DATA_TYPE_NCHAR)
2458

H
Haojun Liao 已提交
2459 2460 2461 2462
static bool needToLoadDataBlock(SQueryRuntimeEnv* pRuntimeEnv, SDataStatis *pDataStatis, SQLFunctionCtx *pCtx,
    int32_t numOfRows) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
  if (pDataStatis == NULL || (pQuery->numOfFilterCols == 0 && (!pRuntimeEnv->topBotQuery))) {
2463 2464 2465 2466 2467
    return true;
  }

  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
2468

H
Haojun Liao 已提交
2469 2470 2471 2472 2473 2474 2475 2476
    int32_t index = -1;
    for(int32_t i = 0; i < pQuery->numOfCols; ++i) {
      if (pDataStatis[i].colId == pFilterInfo->info.colId) {
        index = i;
        break;
      }
    }

2477
    // no statistics data, load the true data block
H
Haojun Liao 已提交
2478
    if (index == -1) {
H
Haojun Liao 已提交
2479
      return true;
2480
    }
2481

2482
    // not support pre-filter operation on binary/nchar data type
H
Haojun Liao 已提交
2483
    if (!IS_PREFILTER_TYPE(pFilterInfo->info.type)) {
H
Haojun Liao 已提交
2484
      return true;
2485
    }
2486

2487
    // all data in current column are NULL, no need to check its boundary value
H
Haojun Liao 已提交
2488
    if (pDataStatis[index].numOfNull == numOfRows) {
2489 2490 2491 2492

      // if isNULL query exists, load the null data column
      for (int32_t j = 0; j < pFilterInfo->numOfFilters; ++j) {
        SColumnFilterElem *pFilterElem = &pFilterInfo->pFilters[j];
2493
        if (pFilterElem->fp == isNullOperator) {
2494 2495 2496 2497
          return true;
        }
      }

2498 2499
      continue;
    }
2500

H
Haojun Liao 已提交
2501 2502 2503
    SDataStatis* pDataBlockst = &pDataStatis[index];

    if (pFilterInfo->info.type == TSDB_DATA_TYPE_FLOAT) {
S
TD-1057  
Shengliang Guan 已提交
2504 2505
      float minval = (float)(*(double *)(&pDataBlockst->min));
      float maxval = (float)(*(double *)(&pDataBlockst->max));
2506

2507
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
2508
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&minval, (char *)&maxval, TSDB_DATA_TYPE_FLOAT)) {
2509 2510 2511 2512 2513
          return true;
        }
      }
    } else {
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
2514
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&pDataBlockst->min, (char *)&pDataBlockst->max, pFilterInfo->info.type)) {
2515 2516 2517 2518 2519
          return true;
        }
      }
    }
  }
2520

H
Haojun Liao 已提交
2521 2522
  if (pRuntimeEnv->topBotQuery) {
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2523
      int32_t functionId = pQuery->pExpr1[i].base.functionId;
H
Haojun Liao 已提交
2524 2525 2526 2527 2528
      if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
        return topbot_datablock_filter(&pCtx[i], functionId, (char *)&pDataStatis[i].min, (char *)&pDataStatis[i].max);
      }
    }
  }
2529

H
Haojun Liao 已提交
2530
  return false;
2531 2532
}

H
Haojun Liao 已提交
2533 2534 2535 2536 2537 2538 2539 2540
static bool overlapWithTimeWindow(SQuery* pQuery, SDataBlockInfo* pBlockInfo) {
  STimeWindow w = {0};

  TSKEY sk = MIN(pQuery->window.skey, pQuery->window.ekey);
  TSKEY ek = MAX(pQuery->window.skey, pQuery->window.ekey);

  if (QUERY_IS_ASC_QUERY(pQuery)) {
    getAlignQueryTimeWindow(pQuery, pBlockInfo->window.skey, sk, ek, &w);
H
Haojun Liao 已提交
2541
    assert(w.ekey >= pBlockInfo->window.skey);
H
Haojun Liao 已提交
2542

H
Haojun Liao 已提交
2543
    if (w.ekey < pBlockInfo->window.ekey) {
H
Haojun Liao 已提交
2544 2545 2546 2547
      return true;
    }

    while(1) {
H
Haojun Liao 已提交
2548
      getNextTimeWindow(pQuery, &w);
H
Haojun Liao 已提交
2549
      if (w.skey > pBlockInfo->window.ekey) {
H
Haojun Liao 已提交
2550 2551 2552
        break;
      }

H
Haojun Liao 已提交
2553 2554
      assert(w.ekey > pBlockInfo->window.ekey);
      if (w.skey <= pBlockInfo->window.ekey && w.skey > pBlockInfo->window.skey) {
H
Haojun Liao 已提交
2555 2556 2557 2558 2559
        return true;
      }
    }
  } else {
    getAlignQueryTimeWindow(pQuery, pBlockInfo->window.ekey, sk, ek, &w);
H
Haojun Liao 已提交
2560 2561 2562
    assert(w.skey <= pBlockInfo->window.ekey);

    if (w.skey > pBlockInfo->window.skey) {
H
Haojun Liao 已提交
2563 2564 2565 2566
      return true;
    }

    while(1) {
H
Haojun Liao 已提交
2567
      getNextTimeWindow(pQuery, &w);
H
Haojun Liao 已提交
2568 2569 2570 2571
      if (w.ekey < pBlockInfo->window.skey) {
        break;
      }

H
Haojun Liao 已提交
2572 2573
      assert(w.skey < pBlockInfo->window.skey);
      if (w.ekey < pBlockInfo->window.ekey && w.ekey >= pBlockInfo->window.skey) {
H
Haojun Liao 已提交
2574 2575 2576 2577 2578 2579 2580 2581
        return true;
      }
    }
  }

  return false;
}

H
Haojun Liao 已提交
2582
int32_t loadDataBlockOnDemand(SQueryRuntimeEnv *pRuntimeEnv, SResultRowInfo * pWindowResInfo, void* pQueryHandle, SDataBlockInfo* pBlockInfo, SDataStatis **pStatis, SArray** pDataBlock, uint32_t* status) {
H
Haojun Liao 已提交
2583 2584
  *status = BLK_DATA_NO_NEEDED;

2585
  SQuery *pQuery = pRuntimeEnv->pQuery;
2586 2587
  int64_t groupId = pQuery->current->groupIndex;

2588 2589
  SQueryCostInfo* pCost = &pRuntimeEnv->summary;

H
Haojun Liao 已提交
2590
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTsBuf > 0) {
2591
    *status = BLK_DATA_ALL_NEEDED;
H
Haojun Liao 已提交
2592
  } else { // check if this data block is required to load
H
Haojun Liao 已提交
2593

H
Haojun Liao 已提交
2594
    // Calculate all time windows that are overlapping or contain current data block.
2595
    // If current data block is contained by all possible time window, do not load current data block.
H
Haojun Liao 已提交
2596
    if (QUERY_IS_INTERVAL_QUERY(pQuery) && overlapWithTimeWindow(pQuery, pBlockInfo)) {
2597
      *status = BLK_DATA_ALL_NEEDED;
2598
    }
2599

2600
    if ((*status) != BLK_DATA_ALL_NEEDED) {
H
Haojun Liao 已提交
2601 2602 2603
      // the pCtx[i] result is belonged to previous time window since the outputBuf has not been set yet,
      // the filter result may be incorrect. So in case of interval query, we need to set the correct time output buffer
      if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
2604 2605
        SResultRow* pResult = NULL;

H
Haojun Liao 已提交
2606 2607 2608 2609
        bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);

        TSKEY k = QUERY_IS_ASC_QUERY(pQuery)? pBlockInfo->window.skey:pBlockInfo->window.ekey;
        STimeWindow win = getActiveTimeWindow(pWindowResInfo, k, pQuery);
2610
        if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, &win, masterScan, &pResult, groupId) != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
2611 2612 2613 2614
          // todo handle error in set result for timewindow
        }
      }

H
Haojun Liao 已提交
2615
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2616
        SSqlFuncMsg* pSqlFunc = &pQuery->pExpr1[i].base;
H
Haojun Liao 已提交
2617 2618 2619

        int32_t functionId = pSqlFunc->functionId;
        int32_t colId = pSqlFunc->colInfo.colId;
2620 2621
        (*status) |= aAggs[functionId].dataReqFunc(&pRuntimeEnv->pCtx[i], pBlockInfo->window.skey, pBlockInfo->window.ekey, colId);
        if (((*status) & BLK_DATA_ALL_NEEDED) == BLK_DATA_ALL_NEEDED) {
H
Haojun Liao 已提交
2622 2623 2624
          break;
        }
      }
2625 2626
    }
  }
2627

2628
  if ((*status) == BLK_DATA_NO_NEEDED) {
H
Haojun Liao 已提交
2629 2630
    qDebug("QInfo:%p data block discard, brange:%"PRId64 "-%"PRId64", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
           pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
2631
    pCost->discardBlocks += 1;
2632 2633 2634 2635
  } else if ((*status) == BLK_DATA_STATIS_NEEDED) {

    // this function never returns error?
    tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis);
2636
    pCost->loadBlockStatis += 1;
2637

2638
    if (*pStatis == NULL) { // data block statistics does not exist, load data block
H
Haojun Liao 已提交
2639
      *pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2640
      pCost->totalCheckedRows += pBlockInfo->rows;
2641 2642
    }
  } else {
2643
    assert((*status) == BLK_DATA_ALL_NEEDED);
2644

2645
    // load the data block statistics to perform further filter
2646
    pCost->loadBlockStatis += 1;
2647
    tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis);
2648

H
Haojun Liao 已提交
2649
    if (!needToLoadDataBlock(pRuntimeEnv, *pStatis, pRuntimeEnv->pCtx, pBlockInfo->rows)) {
2650
      // current block has been discard due to filter applied
2651
      pCost->discardBlocks += 1;
H
Haojun Liao 已提交
2652 2653
      qDebug("QInfo:%p data block discard, brange:%"PRId64 "-%"PRId64", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
          pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
2654
      (*status) = BLK_DATA_DISCARD;
2655
    }
2656

2657 2658
    pCost->totalCheckedRows += pBlockInfo->rows;
    pCost->loadBlocks += 1;
H
Haojun Liao 已提交
2659
    *pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2660 2661 2662
    if (*pDataBlock == NULL) {
      return terrno;
    }
2663
  }
2664

H
Haojun Liao 已提交
2665
  return TSDB_CODE_SUCCESS;
2666 2667
}

H
hjxilinx 已提交
2668
int32_t binarySearchForKey(char *pValue, int num, TSKEY key, int order) {
2669
  int32_t midPos = -1;
H
Haojun Liao 已提交
2670
  int32_t numOfRows;
2671

2672 2673 2674
  if (num <= 0) {
    return -1;
  }
2675

2676
  assert(order == TSDB_ORDER_ASC || order == TSDB_ORDER_DESC);
2677 2678

  TSKEY * keyList = (TSKEY *)pValue;
2679
  int32_t firstPos = 0;
2680
  int32_t lastPos = num - 1;
2681

2682
  if (order == TSDB_ORDER_DESC) {
H
hjxilinx 已提交
2683 2684 2685 2686 2687
    // find the first position which is smaller than the key
    while (1) {
      if (key >= keyList[lastPos]) return lastPos;
      if (key == keyList[firstPos]) return firstPos;
      if (key < keyList[firstPos]) return firstPos - 1;
2688

H
Haojun Liao 已提交
2689 2690
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2691

H
hjxilinx 已提交
2692 2693 2694 2695 2696 2697 2698 2699
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
2700

H
hjxilinx 已提交
2701 2702 2703 2704 2705
  } else {
    // find the first position which is bigger than the key
    while (1) {
      if (key <= keyList[firstPos]) return firstPos;
      if (key == keyList[lastPos]) return lastPos;
2706

H
hjxilinx 已提交
2707 2708 2709 2710 2711 2712 2713
      if (key > keyList[lastPos]) {
        lastPos = lastPos + 1;
        if (lastPos >= num)
          return -1;
        else
          return lastPos;
      }
2714

H
Haojun Liao 已提交
2715
      numOfRows = lastPos - firstPos + 1;
H
Haojun Liao 已提交
2716
      midPos = (numOfRows >> 1u) + firstPos;
2717

H
hjxilinx 已提交
2718 2719 2720 2721 2722 2723 2724 2725 2726
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
  }
2727

H
hjxilinx 已提交
2728 2729 2730
  return midPos;
}

2731 2732 2733 2734 2735 2736 2737 2738
static void ensureOutputBufferSimple(SQueryRuntimeEnv* pRuntimeEnv, int32_t capacity) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  if (capacity < pQuery->rec.capacity) {
    return;
  }

  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2739
    int32_t bytes = pQuery->pExpr1[i].bytes;
2740 2741 2742
    assert(bytes > 0 && capacity > 0);

    char *tmp = realloc(pQuery->sdata[i], bytes * capacity + sizeof(tFilePage));
H
Haojun Liao 已提交
2743
    if (tmp == NULL) {
H
Haojun Liao 已提交
2744
      longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
2745 2746 2747 2748 2749 2750 2751 2752
    } else {
      pQuery->sdata[i] = (tFilePage *)tmp;
    }

    // set the pCtx output buffer position
    pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data;
  }

2753
  qDebug("QInfo:%p realloc output buffer to inc output buffer from: %" PRId64 " rows to:%d rows", GET_QINFO_ADDR(pRuntimeEnv),
2754 2755 2756 2757 2758
         pQuery->rec.capacity, capacity);

  pQuery->rec.capacity = capacity;
}

2759
// TODO merge with enuserOutputBufferSimple
2760 2761 2762
static void ensureOutputBuffer(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pBlockInfo) {
  // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
2763
  if (!QUERY_IS_INTERVAL_QUERY(pQuery) && !pRuntimeEnv->groupbyColumn && !isFixedOutputQuery(pRuntimeEnv) && !isTSCompQuery(pQuery)) {
2764
    SResultRec *pRec = &pQuery->rec;
2765

2766
    if (pQuery->rec.capacity - pQuery->rec.rows < pBlockInfo->rows) {
S
TD-1057  
Shengliang Guan 已提交
2767 2768
      int32_t remain = (int32_t)(pRec->capacity - pRec->rows);
      int32_t newSize = (int32_t)(pRec->capacity + (pBlockInfo->rows - remain));
2769

2770
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2771
        int32_t bytes = pQuery->pExpr1[i].bytes;
H
Haojun Liao 已提交
2772 2773
        assert(bytes > 0 && newSize > 0);

2774
        char *tmp = realloc(pQuery->sdata[i], bytes * newSize + sizeof(tFilePage));
H
Haojun Liao 已提交
2775
        if (tmp == NULL) {
H
Haojun Liao 已提交
2776
          longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
2777
        } else {
2778
          memset(tmp + sizeof(tFilePage) + bytes * pRec->rows, 0, (size_t)((newSize - pRec->rows) * bytes));
2779 2780
          pQuery->sdata[i] = (tFilePage *)tmp;
        }
H
Haojun Liao 已提交
2781

2782 2783
        // set the pCtx output buffer position
        pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data + pRec->rows * bytes;
H
Haojun Liao 已提交
2784

H
Haojun Liao 已提交
2785
        int32_t functionId = pQuery->pExpr1[i].base.functionId;
2786 2787 2788 2789
        if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
          pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
        }
      }
H
Haojun Liao 已提交
2790

2791
      qDebug("QInfo:%p realloc output buffer, new size: %d rows, old:%" PRId64 ", remain:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv),
2792
             newSize, pRec->capacity, newSize - pRec->rows);
2793

2794 2795 2796 2797 2798
      pRec->capacity = newSize;
    }
  }
}

2799 2800 2801 2802 2803
static void doSetInitialTimewindow(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pBlockInfo) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  if (QUERY_IS_INTERVAL_QUERY(pQuery) && pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL) {
    STimeWindow w = TSWINDOW_INITIALIZER;
H
Haojun Liao 已提交
2804
    SResultRowInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
2805 2806 2807 2808

    if (QUERY_IS_ASC_QUERY(pQuery)) {
      getAlignQueryTimeWindow(pQuery, pBlockInfo->window.skey, pBlockInfo->window.skey, pQuery->window.ekey, &w);
      pWindowResInfo->prevSKey = w.skey;
2809
    } else { // the start position of the first time window in the endpoint that spreads beyond the queried last timestamp
2810 2811 2812 2813 2814 2815
      getAlignQueryTimeWindow(pQuery, pBlockInfo->window.ekey, pQuery->window.ekey, pBlockInfo->window.ekey, &w);
      pWindowResInfo->prevSKey = w.skey;
    }
  }
}

2816 2817
static int64_t doScanAllDataBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
2818
  STableQueryInfo* pTableQueryInfo = pQuery->current;
H
Haojun Liao 已提交
2819
  SQueryCostInfo*  summary  = &pRuntimeEnv->summary;
2820

2821
  qDebug("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", lastkey:%" PRId64 ", order:%d",
H
hjxilinx 已提交
2822 2823
         GET_QINFO_ADDR(pRuntimeEnv), pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, pTableQueryInfo->lastKey,
         pQuery->order.order);
2824

2825
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
H
Haojun Liao 已提交
2826
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
2827

H
Haojun Liao 已提交
2828
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
2829
  while (tsdbNextDataBlock(pQueryHandle)) {
H
Haojun Liao 已提交
2830
    summary->totalBlocks += 1;
H
Haojun Liao 已提交
2831

2832
    if (isQueryKilled(GET_QINFO_ADDR(pRuntimeEnv))) {
H
Haojun Liao 已提交
2833
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
2834
    }
2835

H
Haojun Liao 已提交
2836
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
2837
    doSetInitialTimewindow(pRuntimeEnv, &blockInfo);
2838

H
hjxilinx 已提交
2839
    // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
2840
    ensureOutputBuffer(pRuntimeEnv, &blockInfo);
2841

2842
    SDataStatis *pStatis = NULL;
2843 2844
    SArray *     pDataBlock = NULL;
    uint32_t     status = 0;
2845

H
Haojun Liao 已提交
2846
    int32_t ret = loadDataBlockOnDemand(pRuntimeEnv, &pRuntimeEnv->windowResInfo, pQueryHandle, &blockInfo, &pStatis, &pDataBlock, &status);
2847
    if (ret != TSDB_CODE_SUCCESS) {
2848 2849 2850
      break;
    }

2851 2852 2853 2854 2855 2856
    if (status == BLK_DATA_DISCARD) {
      pQuery->current->lastKey =
              QUERY_IS_ASC_QUERY(pQuery) ? blockInfo.window.ekey + step : blockInfo.window.skey + step;
      continue;
    }

H
Haojun Liao 已提交
2857 2858
    // query start position can not move into tableApplyFunctionsOnBlock due to limit/offset condition
    pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : blockInfo.rows - 1;
H
hjxilinx 已提交
2859
    int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, binarySearchForKey, pDataBlock);
2860

H
Haojun Liao 已提交
2861
    summary->totalRows += blockInfo.rows;
2862
    qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
2863
           blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes, pQuery->current->lastKey);
2864

2865 2866
    // while the output buffer is full or limit/offset is applied, query may be paused here
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL | QUERY_COMPLETED)) {
H
hjxilinx 已提交
2867
      break;
2868 2869
    }
  }
2870

H
Haojun Liao 已提交
2871 2872 2873 2874
  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }

H
hjxilinx 已提交
2875
  // if the result buffer is not full, set the query complete
2876 2877 2878
  if (!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
2879

2880 2881 2882
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
    closeAllResultRows(&pRuntimeEnv->windowResInfo);
    pRuntimeEnv->windowResInfo.curIndex = pRuntimeEnv->windowResInfo.size - 1;  // point to the last time window
2883
  }
2884

2885
  return 0;
2886 2887 2888 2889 2890 2891
}

/*
 * set tag value in SQLFunctionCtx
 * e.g.,tag information into input buffer
 */
2892
static void doSetTagValueInParam(void *tsdb, void* pTable, int32_t tagColId, tVariant *tag, int16_t type, int16_t bytes) {
H
[td-90]  
Haojun Liao 已提交
2893
  tVariantDestroy(tag);
2894

2895
  if (tagColId == TSDB_TBNAME_COLUMN_INDEX) {
2896
    char* val = tsdbGetTableName(pTable);
H
[td-90]  
Haojun Liao 已提交
2897
    assert(val != NULL);
2898

H
[td-90]  
Haojun Liao 已提交
2899
    tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), TSDB_DATA_TYPE_BINARY);
2900
  } else {
2901
    char* val = tsdbGetTableTagVal(pTable, tagColId, type, bytes);
H
[td-90]  
Haojun Liao 已提交
2902 2903 2904 2905
    if (val == NULL) {
      tag->nType = TSDB_DATA_TYPE_NULL;
      return;
    }
2906

H
hjxilinx 已提交
2907
    if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
H
Hongze Cheng 已提交
2908
      if (isNull(val, type)) {
H
Haojun Liao 已提交
2909 2910 2911 2912
        tag->nType = TSDB_DATA_TYPE_NULL;
        return;
      }

H
[td-90]  
Haojun Liao 已提交
2913
      tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), type);
H
hjxilinx 已提交
2914
    } else {
H
Haojun Liao 已提交
2915 2916 2917 2918 2919
      if (isNull(val, type)) {
        tag->nType = TSDB_DATA_TYPE_NULL;
        return;
      }

H
[td-90]  
Haojun Liao 已提交
2920
      tVariantCreateFromBinary(tag, val, bytes, type);
H
hjxilinx 已提交
2921
    }
2922
  }
2923 2924
}

2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936
static SColumnInfo* doGetTagColumnInfoById(SColumnInfo* pTagColList, int32_t numOfTags, int16_t colId) {
  assert(pTagColList != NULL && numOfTags > 0);

  for(int32_t i = 0; i < numOfTags; ++i) {
    if (pTagColList[i].colId == colId) {
      return &pTagColList[i];
    }
  }

  return NULL;
}

2937
void setTagVal(SQueryRuntimeEnv *pRuntimeEnv, void *pTable, void *tsdb) {
2938
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
2939
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
2940

H
Haojun Liao 已提交
2941
  SExprInfo *pExprInfo = &pQuery->pExpr1[0];
2942
  if (pQuery->numOfOutput == 1 && pExprInfo->base.functionId == TSDB_FUNC_TS_COMP && pRuntimeEnv->stableQuery) {
H
[td-90]  
Haojun Liao 已提交
2943
    assert(pExprInfo->base.numOfParams == 1);
H
Haojun Liao 已提交
2944

S
TD-1057  
Shengliang Guan 已提交
2945
    int16_t tagColId = (int16_t)pExprInfo->base.arg->argValue.i64;
2946
    SColumnInfo* pColInfo = doGetTagColumnInfoById(pQuery->tagColList, pQuery->numOfTags, tagColId);
H
Haojun Liao 已提交
2947

2948
    doSetTagValueInParam(tsdb, pTable, tagColId, &pRuntimeEnv->pCtx[0].tag, pColInfo->type, pColInfo->bytes);
2949 2950
  } else {
    // set tag value, by which the results are aggregated.
2951
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
H
Haojun Liao 已提交
2952
      SExprInfo* pLocalExprInfo = &pQuery->pExpr1[idx];
2953

2954
      // ts_comp column required the tag value for join filter
H
Haojun Liao 已提交
2955
      if (!TSDB_COL_IS_TAG(pLocalExprInfo->base.colInfo.flag)) {
2956 2957
        continue;
      }
2958

2959
      // todo use tag column index to optimize performance
2960
      doSetTagValueInParam(tsdb, pTable, pLocalExprInfo->base.colInfo.colId, &pRuntimeEnv->pCtx[idx].tag,
H
Haojun Liao 已提交
2961
                           pLocalExprInfo->type, pLocalExprInfo->bytes);
2962
    }
2963

2964
    // set the join tag for first column
H
[td-90]  
Haojun Liao 已提交
2965
    SSqlFuncMsg *pFuncMsg = &pExprInfo->base;
H
Haojun Liao 已提交
2966
    if ((pFuncMsg->functionId == TSDB_FUNC_TS || pFuncMsg->functionId == TSDB_FUNC_PRJ) && pRuntimeEnv->pTsBuf != NULL &&
2967
        pFuncMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
2968
      assert(pFuncMsg->numOfParams == 1);
H
Haojun Liao 已提交
2969

2970 2971
      int16_t      tagColId = (int16_t)pExprInfo->base.arg->argValue.i64;
      SColumnInfo *pColInfo = doGetTagColumnInfoById(pQuery->tagColList, pQuery->numOfTags, tagColId);
H
Haojun Liao 已提交
2972

2973
      doSetTagValueInParam(tsdb, pTable, tagColId, &pRuntimeEnv->pCtx[0].tag, pColInfo->type, pColInfo->bytes);
H
Haojun Liao 已提交
2974

2975 2976 2977 2978 2979 2980
      int16_t tagType = pRuntimeEnv->pCtx[0].tag.nType;
      if (tagType == TSDB_DATA_TYPE_BINARY || tagType == TSDB_DATA_TYPE_NCHAR) {
        qDebug("QInfo:%p set tag value for join comparison, colId:%" PRId64 ", val:%s", pQInfo,
               pExprInfo->base.arg->argValue.i64, pRuntimeEnv->pCtx[0].tag.pz);
      } else {
        qDebug("QInfo:%p set tag value for join comparison, colId:%" PRId64 ", val:%" PRId64, pQInfo,
2981
               pExprInfo->base.arg->argValue.i64, pRuntimeEnv->pCtx[0].tag.i64);
2982
      }
2983 2984 2985 2986
    }
  }
}

2987
static UNUSED_FUNC void printBinaryData(int32_t functionId, char *data, int32_t srcDataType) {
2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_LAST_DST) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_BINARY:
        printf("%" PRId64 ",%s\t", *(TSKEY *)data, (data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int8_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int16_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 ",%" PRId64 "\t", *(TSKEY *)data, *(TSKEY *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int32_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%" PRId64 ",%f\t", *(TSKEY *)data, *(float *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%" PRId64 ",%lf\t", *(TSKEY *)data, *(double *)(data + TSDB_KEYSIZE + 1));
        break;
    }
  } else if (functionId == TSDB_FUNC_AVG) {
    printf("%lf,%d\t", *(double *)data, *(int32_t *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_SPREAD) {
    printf("%lf,%lf\t", *(double *)data, *(double *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_TWA) {
    data += 1;
    printf("%lf,%" PRId64 ",%" PRId64 ",%" PRId64 "\t", *(double *)data, *(int64_t *)(data + 8),
           *(int64_t *)(data + 16), *(int64_t *)(data + 24));
  } else if (functionId == TSDB_FUNC_MIN || functionId == TSDB_FUNC_MAX) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%d\t", *(int8_t *)data);
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%d\t", *(int16_t *)data);
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 "\t", *(int64_t *)data);
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%d\t", *(int *)data);
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%f\t", *(float *)data);
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%f\t", *(float *)data);
        break;
    }
  } else if (functionId == TSDB_FUNC_SUM) {
    if (srcDataType == TSDB_DATA_TYPE_FLOAT || srcDataType == TSDB_DATA_TYPE_DOUBLE) {
      printf("%lf\t", *(float *)data);
    } else {
      printf("%" PRId64 "\t", *(int64_t *)data);
    }
  } else {
    printf("%s\t", data);
  }
}

3056
void UNUSED_FUNC displayInterResult(tFilePage **pdata, SQueryRuntimeEnv* pRuntimeEnv, int32_t numOfRows) {
3057
  SQuery* pQuery = pRuntimeEnv->pQuery;
3058
  int32_t numOfCols = pQuery->numOfOutput;
3059
  printf("super table query intermediate result, total:%d\n", numOfRows);
3060

3061 3062
  for (int32_t j = 0; j < numOfRows; ++j) {
    for (int32_t i = 0; i < numOfCols; ++i) {
3063

H
Haojun Liao 已提交
3064
      switch (pQuery->pExpr1[i].type) {
3065
        case TSDB_DATA_TYPE_BINARY: {
H
Haojun Liao 已提交
3066 3067
          int32_t type = pQuery->pExpr1[i].type;
          printBinaryData(pQuery->pExpr1[i].base.functionId, pdata[i]->data + pQuery->pExpr1[i].bytes * j,
3068 3069 3070 3071 3072
                          type);
          break;
        }
        case TSDB_DATA_TYPE_TIMESTAMP:
        case TSDB_DATA_TYPE_BIGINT:
H
Haojun Liao 已提交
3073
          printf("%" PRId64 "\t", *(int64_t *)(pdata[i]->data + pQuery->pExpr1[i].bytes * j));
3074 3075
          break;
        case TSDB_DATA_TYPE_INT:
H
Haojun Liao 已提交
3076
          printf("%d\t", *(int32_t *)(pdata[i]->data + pQuery->pExpr1[i].bytes * j));
3077 3078
          break;
        case TSDB_DATA_TYPE_FLOAT:
H
Haojun Liao 已提交
3079
          printf("%f\t", *(float *)(pdata[i]->data + pQuery->pExpr1[i].bytes * j));
3080 3081
          break;
        case TSDB_DATA_TYPE_DOUBLE:
H
Haojun Liao 已提交
3082
          printf("%lf\t", *(double *)(pdata[i]->data + pQuery->pExpr1[i].bytes * j));
3083 3084 3085 3086 3087 3088 3089 3090
          break;
      }
    }
    printf("\n");
  }
}

typedef struct SCompSupporter {
H
hjxilinx 已提交
3091
  STableQueryInfo **pTableQueryInfo;
3092 3093
  int32_t          *rowIndex;
  int32_t           order;
3094 3095 3096
} SCompSupporter;

int32_t tableResultComparFn(const void *pLeft, const void *pRight, void *param) {
3097
  int32_t left  = *(int32_t *)pLeft;
3098
  int32_t right = *(int32_t *)pRight;
3099

3100
  SCompSupporter *  supporter = (SCompSupporter *)param;
3101

3102 3103
  int32_t leftPos  = supporter->rowIndex[left];
  int32_t rightPos = supporter->rowIndex[right];
3104

3105 3106 3107 3108
  /* left source is exhausted */
  if (leftPos == -1) {
    return 1;
  }
3109

3110 3111 3112 3113
  /* right source is exhausted*/
  if (rightPos == -1) {
    return -1;
  }
3114

3115
  STableQueryInfo** pList = supporter->pTableQueryInfo;
3116

3117 3118 3119
  SResultRowInfo *pWindowResInfo1 = &(pList[left]->windowResInfo);
  SResultRow * pWindowRes1 = getResultRow(pWindowResInfo1, leftPos);
  TSKEY leftTimestamp = pWindowRes1->win.skey;
3120

3121
  SResultRowInfo *pWindowResInfo2 = &(pList[right]->windowResInfo);
3122
  SResultRow * pWindowRes2 = getResultRow(pWindowResInfo2, rightPos);
3123
  TSKEY rightTimestamp = pWindowRes2->win.skey;
3124

3125 3126 3127
  if (leftTimestamp == rightTimestamp) {
    return 0;
  }
3128

3129 3130 3131 3132 3133
  if (supporter->order == TSDB_ORDER_ASC) {
    return (leftTimestamp > rightTimestamp)? 1:-1;
  } else {
    return (leftTimestamp < rightTimestamp)? 1:-1;
  }
3134 3135
}

3136
int32_t mergeGroupResult(SQInfo *pQInfo) {
H
Haojun Liao 已提交
3137
  int64_t st = taosGetTimestampUs();
3138

3139
  SGroupResInfo* pGroupResInfo = &pQInfo->groupResInfo;
3140

3141
  int32_t numOfGroups = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
3142
  while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
3143
    SArray *group = GET_TABLEGROUP(pQInfo, pQInfo->groupIndex);
3144 3145

    int32_t ret = mergeIntoGroupResultImpl(pGroupResInfo, group, pQInfo);
3146 3147
    if (ret != TSDB_CODE_SUCCESS) {
      return ret;
3148 3149 3150
    }

    // this group generates at least one result, return results
3151 3152
    pQInfo->groupIndex += 1;
    if (taosArrayGetSize(pGroupResInfo->pRows) > 0) {
3153 3154
      break;
    }
3155

3156
    qDebug("QInfo:%p no result in group %d, continue", pQInfo, pQInfo->groupIndex - 1);
3157 3158 3159 3160
    taosArrayClear(pGroupResInfo->pRows);

    pGroupResInfo->index = 0;
    pGroupResInfo->rowId = 0;
3161
  }
3162

3163
  if (pQInfo->groupIndex == numOfGroups && taosArrayGetSize(pGroupResInfo->pRows) == 0) {
H
Haojun Liao 已提交
3164 3165 3166
    SET_STABLE_QUERY_OVER(pQInfo);
  }

H
Haojun Liao 已提交
3167 3168 3169
  int64_t elapsedTime = taosGetTimestampUs() - st;
  qDebug("QInfo:%p merge res data into group, index:%d, total group:%d, elapsed time:%" PRId64 "us", pQInfo,
         pQInfo->groupIndex - 1, numOfGroups, elapsedTime);
3170

H
Haojun Liao 已提交
3171
  pQInfo->runtimeEnv.summary.firstStageMergeTime += elapsedTime;
3172 3173 3174
  return TSDB_CODE_SUCCESS;
}

3175 3176
static int32_t doCopyToSData(SQInfo *pQInfo, SResultRow **pRows, int32_t numOfRows, int32_t* index, int32_t orderType);

3177
void copyResToQueryResultBuf(SQInfo *pQInfo, SQuery *pQuery) {
H
Haojun Liao 已提交
3178 3179
  SGroupResInfo* pGroupResInfo = &pQInfo->groupResInfo;

3180 3181
  // all results in current group have been returned to client, try next group
  if (pGroupResInfo->index >= taosArrayGetSize(pGroupResInfo->pRows)) {
3182
    // current results of group has been sent to client, try next group
3183 3184 3185 3186
    pGroupResInfo->index = 0;
    pGroupResInfo->rowId = 0;
    taosArrayClear(pGroupResInfo->pRows);

3187
    if (mergeGroupResult(pQInfo) != TSDB_CODE_SUCCESS) {
3188 3189
      return;  // failed to save data in the disk
    }
3190

3191
    // check if all results has been sent to client
S
TD-1057  
Shengliang Guan 已提交
3192
    int32_t numOfGroup = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
3193
    if (taosArrayGetSize(pGroupResInfo->pRows) == 0 && pQInfo->groupIndex == numOfGroup) {
H
Haojun Liao 已提交
3194
      SET_STABLE_QUERY_OVER(pQInfo);
3195 3196
      return;
    }
3197
  }
3198

3199 3200
  int32_t size = (int32_t) taosArrayGetSize(pGroupResInfo->pRows);
  pQuery->rec.rows = doCopyToSData(pQInfo, pGroupResInfo->pRows->pData, (int32_t) size, &pGroupResInfo->index, TSDB_ORDER_ASC);
3201 3202
}

3203 3204 3205
int64_t getNumOfResultWindowRes(SQueryRuntimeEnv* pRuntimeEnv, SResultRow *pResultRow) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

3206
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
3207
    int32_t functionId = pQuery->pExpr1[j].base.functionId;
3208

3209 3210 3211 3212 3213 3214 3215
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
      continue;
    }
3216

3217
    SResultRowCellInfo *pResultInfo = getResultCell(pRuntimeEnv, pResultRow, j);
H
Haojun Liao 已提交
3218
    assert(pResultInfo != NULL);
3219

H
Haojun Liao 已提交
3220 3221
    if (pResultInfo->numOfRes > 0) {
      return pResultInfo->numOfRes;
3222 3223
    }
  }
3224

H
Haojun Liao 已提交
3225
  return 0;
3226 3227
}

3228
int32_t mergeIntoGroupResultImpl(SGroupResInfo* pGroupResInfo, SArray *pTableList, SQInfo* pQInfo) {
3229
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
3230 3231 3232
  bool ascQuery = QUERY_IS_ASC_QUERY(pRuntimeEnv->pQuery);

  int32_t code = TSDB_CODE_SUCCESS;
3233

3234 3235 3236
  int32_t *posList = NULL;
  SLoserTreeInfo *pTree = NULL;
  STableQueryInfo **pTableQueryInfoList = NULL;
3237

3238 3239 3240 3241
  size_t size = taosArrayGetSize(pTableList);
  if (pGroupResInfo->pRows == NULL) {
    pGroupResInfo->pRows = taosArrayInit(100, POINTER_BYTES);
  }
3242

3243 3244
  posList = calloc(size, sizeof(int32_t));
  pTableQueryInfoList = malloc(POINTER_BYTES * size);
3245

3246
  if (pTableQueryInfoList == NULL || posList == NULL || pGroupResInfo->pRows == NULL) {
3247
    qError("QInfo:%p failed alloc memory", pQInfo);
3248 3249
    code = TSDB_CODE_QRY_OUT_OF_MEMORY;
    goto _end;
3250 3251
  }

3252
  int32_t numOfTables = 0;
3253
  for (int32_t i = 0; i < size; ++i) {
3254 3255 3256
    STableQueryInfo *item = taosArrayGetP(pTableList, i);
    if (item->windowResInfo.size > 0) {
      pTableQueryInfoList[numOfTables++] = item;
3257 3258
    }
  }
3259

H
Haojun Liao 已提交
3260
  // there is no data in current group
3261
  // no need to merge results since only one table in each group
3262
  if (numOfTables == 0) {
3263
    goto _end;
3264
  }
3265

3266
  SCompSupporter cs = {pTableQueryInfoList, posList, pRuntimeEnv->pQuery->order.order};
3267

3268 3269 3270 3271 3272
  int32_t ret = tLoserTreeCreate(&pTree, numOfTables, &cs, tableResultComparFn);
  if (ret != TSDB_CODE_SUCCESS) {
    code = TSDB_CODE_QRY_OUT_OF_MEMORY;
    goto _end;
  }
H
Haojun Liao 已提交
3273

3274
  int64_t lastTimestamp = ascQuery? INT64_MIN:INT64_MAX;
3275
  int64_t startt = taosGetTimestampMs();
3276

3277
  while (1) {
3278
    if (isQueryKilled(pQInfo)) {
3279
      qDebug("QInfo:%p it is already killed, abort", pQInfo);
3280 3281
      code = TSDB_CODE_TSC_QUERY_CANCELLED;
      goto _end;
3282 3283
    }

3284
    int32_t tableIndex = pTree->pNode[0].index;
3285

3286 3287
    SResultRowInfo *pWindowResInfo = &pTableQueryInfoList[tableIndex]->windowResInfo;
    SResultRow  *pWindowRes = getResultRow(pWindowResInfo, cs.rowIndex[tableIndex]);
3288

3289
    int64_t num = getNumOfResultWindowRes(pRuntimeEnv, pWindowRes);
3290
    if (num <= 0) {
3291
      cs.rowIndex[tableIndex] += 1;
3292

3293 3294 3295
      if (cs.rowIndex[tableIndex] >= pWindowResInfo->size) {
        cs.rowIndex[tableIndex] = -1;
        if (--numOfTables == 0) { // all input sources are exhausted
3296 3297 3298 3299
          break;
        }
      }
    } else {
3300
      assert((pWindowRes->win.skey >= lastTimestamp && ascQuery) || (pWindowRes->win.skey <= lastTimestamp && !ascQuery));
3301

3302 3303
      if (pWindowRes->win.skey != lastTimestamp) {
        taosArrayPush(pGroupResInfo->pRows, &pWindowRes);
H
Haojun Liao 已提交
3304
        pWindowRes->numOfRows = (uint32_t) num;
3305
      }
3306

3307
      lastTimestamp = pWindowRes->win.skey;
3308

3309 3310 3311
      // move to the next row of current entry
      if ((++cs.rowIndex[tableIndex]) >= pWindowResInfo->size) {
        cs.rowIndex[tableIndex] = -1;
3312

3313
        // all input sources are exhausted
3314
        if ((--numOfTables) == 0) {
3315 3316 3317 3318
          break;
        }
      }
    }
3319

3320
    tLoserTreeAdjust(pTree, tableIndex + pTree->numOfEntries);
3321
  }
3322

3323 3324 3325
  int64_t endt = taosGetTimestampMs();

#ifdef _DEBUG_VIEW
3326
  displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
3327
#endif
3328

3329
  qDebug("QInfo:%p result merge completed for group:%d, elapsed time:%" PRId64 " ms", pQInfo, pQInfo->groupIndex, endt - startt);
3330

3331 3332
  _end:
  tfree(pTableQueryInfoList);
S
TD-1848  
Shengliang Guan 已提交
3333 3334
  tfree(posList);
  tfree(pTree);
3335

3336
  return code;
3337 3338
}

3339 3340 3341 3342
static void updateTableQueryInfoForReverseScan(SQuery *pQuery, STableQueryInfo *pTableQueryInfo) {
  if (pTableQueryInfo == NULL) {
    return;
  }
3343

3344 3345
  SWAP(pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, TSKEY);
  pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
3346

3347 3348
  SWITCH_ORDER(pTableQueryInfo->cur.order);
  pTableQueryInfo->cur.vgroupIndex = -1;
H
Haojun Liao 已提交
3349 3350 3351

  // set the index at the end of time window
  pTableQueryInfo->windowResInfo.curIndex = pTableQueryInfo->windowResInfo.size - 1;
3352 3353
}

H
Haojun Liao 已提交
3354
static void disableFuncInReverseScanImpl(SQueryRuntimeEnv* pRuntimeEnv, SResultRowInfo *pWindowResInfo, int32_t order) {
3355
  SQuery* pQuery = pRuntimeEnv->pQuery;
3356

3357
  for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
3358
    bool closed = getResultRowStatus(pWindowResInfo, i);
3359
    if (!closed) {
3360 3361
      continue;
    }
3362

3363
    SResultRow *pRow = getResultRow(pWindowResInfo, i);
3364

3365
    // open/close the specified query for each group result
3366
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
3367
      int32_t functId = pQuery->pExpr1[j].base.functionId;
3368
      SResultRowCellInfo* pInfo = getResultCell(pRuntimeEnv, pRow, j);
3369

3370 3371
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
3372
        pInfo->complete = false;
3373
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
3374
        pInfo->complete = true;
3375 3376 3377 3378 3379
      }
    }
  }
}

3380 3381
void disableFuncInReverseScan(SQInfo *pQInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
3382
  SQuery *pQuery = pRuntimeEnv->pQuery;
3383
  int32_t order = pQuery->order.order;
3384

3385
  // group by normal columns and interval query on normal table
H
Haojun Liao 已提交
3386
  SResultRowInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
3387
  if (pRuntimeEnv->groupbyColumn || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3388
    disableFuncInReverseScanImpl(pRuntimeEnv, pWindowResInfo, order);
3389
  } else {  // for simple result of table query,
3390
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {  // todo refactor
H
Haojun Liao 已提交
3391
      int32_t functId = pQuery->pExpr1[j].base.functionId;
3392

3393
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[j];
3394 3395 3396
      if (pCtx->resultInfo == NULL) {
        continue; // resultInfo is NULL, means no data checked in previous scan
      }
3397

3398 3399
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
3400 3401 3402 3403 3404 3405
        pCtx->resultInfo->complete = false;
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
        pCtx->resultInfo->complete = true;
      }
    }
  }
H
Haojun Liao 已提交
3406 3407 3408 3409
}

static void setupQueryRangeForReverseScan(SQInfo* pQInfo) {
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
S
Shengliang Guan 已提交
3410
  int32_t numOfGroups = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
H
Haojun Liao 已提交
3411

H
hjxilinx 已提交
3412
  for(int32_t i = 0; i < numOfGroups; ++i) {
H
Haojun Liao 已提交
3413
    SArray *group = GET_TABLEGROUP(pQInfo, i);
H
Haojun Liao 已提交
3414 3415
    SArray *tableKeyGroup = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, i);

H
hjxilinx 已提交
3416 3417
    size_t t = taosArrayGetSize(group);
    for (int32_t j = 0; j < t; ++j) {
3418 3419
      STableQueryInfo *pCheckInfo = taosArrayGetP(group, j);
      updateTableQueryInfoForReverseScan(pQuery, pCheckInfo);
H
Haojun Liao 已提交
3420

H
Haojun Liao 已提交
3421 3422
      // update the last key in tableKeyInfo list, the tableKeyInfo is used to build the tsdbQueryHandle and decide
      // the start check timestamp of tsdbQueryHandle
H
Haojun Liao 已提交
3423 3424 3425 3426
      STableKeyInfo *pTableKeyInfo = taosArrayGet(tableKeyGroup, j);
      pTableKeyInfo->lastKey = pCheckInfo->lastKey;

      assert(pCheckInfo->pTable == pTableKeyInfo->pTable);
H
hjxilinx 已提交
3427 3428
    }
  }
3429 3430
}

3431
void switchCtxOrder(SQueryRuntimeEnv *pRuntimeEnv) {
3432
  SQuery *pQuery = pRuntimeEnv->pQuery;
3433
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
3434
    SWITCH_ORDER(pRuntimeEnv->pCtx[i].order);
3435 3436 3437
  }
}

3438
int32_t initResultRow(SResultRow *pResultRow) {
H
Haojun Liao 已提交
3439
  pResultRow->pCellInfo = (SResultRowCellInfo*)((char*)pResultRow + sizeof(SResultRow));
H
Haojun Liao 已提交
3440 3441
  pResultRow->pageId    = -1;
  pResultRow->rowId     = -1;
B
Bomin Zhang 已提交
3442
  return TSDB_CODE_SUCCESS;
3443 3444
}

H
Haojun Liao 已提交
3445
void resetDefaultResInfoOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
3446
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
3447

H
Haojun Liao 已提交
3448
  int32_t tid = 0;
3449
  int64_t uid = 0;
H
Haojun Liao 已提交
3450
  SResultRow* pRow = doPrepareResultRowFromKey(pRuntimeEnv, &pRuntimeEnv->windowResInfo, (char *)&tid, sizeof(tid), true, uid);
3451

3452
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3453 3454
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
    pCtx->aOutputBuf = pQuery->sdata[i]->data;
3455

3456 3457 3458 3459
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT/TAGPRJ/PRJ/TAG etc.
     */
3460 3461 3462
    SResultRowCellInfo* pCellInfo = getResultCell(pRuntimeEnv, pRow, i);
    RESET_RESULT_INFO(pCellInfo);
    pCtx->resultInfo = pCellInfo;
3463

3464
    // set the timestamp output buffer for top/bottom/diff query
H
Haojun Liao 已提交
3465
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
3466 3467 3468
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3469

H
Haojun Liao 已提交
3470
    memset(pQuery->sdata[i]->data, 0, (size_t)(pQuery->pExpr1[i].bytes * pQuery->rec.capacity));
3471
  }
3472

3473 3474 3475 3476 3477
  initCtxOutputBuf(pRuntimeEnv);
}

void forwardCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, int64_t output) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3478

3479
  // reset the execution contexts
3480
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
3481
    int32_t functionId = pQuery->pExpr1[j].base.functionId;
3482
    assert(functionId != TSDB_FUNC_DIFF);
3483

3484 3485 3486 3487
    // set next output position
    if (IS_OUTER_FORWARD(aAggs[functionId].nStatus)) {
      pRuntimeEnv->pCtx[j].aOutputBuf += pRuntimeEnv->pCtx[j].outputBytes * output;
    }
3488

3489 3490 3491 3492 3493 3494 3495 3496
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      /*
       * NOTE: for top/bottom query, the value of first column of output (timestamp) are assigned
       * in the procedure of top/bottom routine
       * the output buffer in top/bottom routine is ptsOutputBuf, so we need to forward the output buffer
       *
       * diff function is handled in multi-output function
       */
S
TD-1057  
Shengliang Guan 已提交
3497
      pRuntimeEnv->pCtx[j].ptsOutputBuf = (char*)pRuntimeEnv->pCtx[j].ptsOutputBuf + TSDB_KEYSIZE * output;
3498
    }
3499

3500
    RESET_RESULT_INFO(pRuntimeEnv->pCtx[j].resultInfo);
3501 3502 3503 3504 3505
  }
}

void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3506

3507
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
3508
    int32_t functionId = pQuery->pExpr1[j].base.functionId;
3509
    pRuntimeEnv->pCtx[j].currentStage = 0;
3510

H
Haojun Liao 已提交
3511
    SResultRowCellInfo* pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
H
Haojun Liao 已提交
3512 3513 3514
    if (pResInfo->initialized) {
      continue;
    }
3515

3516 3517 3518 3519
    aAggs[functionId].init(&pRuntimeEnv->pCtx[j]);
  }
}

3520
void skipResults(SQueryRuntimeEnv *pRuntimeEnv) {
3521
  SQuery *pQuery = pRuntimeEnv->pQuery;
3522
  if (pQuery->rec.rows == 0 || pQuery->limit.offset == 0) {
3523 3524
    return;
  }
3525

3526
  if (pQuery->rec.rows <= pQuery->limit.offset) {
3527
    qDebug("QInfo:%p skip rows:%" PRId64 ", new offset:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), pQuery->rec.rows,
3528
        pQuery->limit.offset - pQuery->rec.rows);
3529

3530 3531
    pQuery->limit.offset -= pQuery->rec.rows;
    pQuery->rec.rows = 0;
3532

H
Haojun Liao 已提交
3533
    resetDefaultResInfoOutputBuf(pRuntimeEnv);
3534

H
Haojun Liao 已提交
3535
    // clear the buffer full flag if exists
3536
    CLEAR_QUERY_STATUS(pQuery, QUERY_RESBUF_FULL);
3537
  } else {
3538
    int64_t numOfSkip = pQuery->limit.offset;
3539
    pQuery->rec.rows -= numOfSkip;
3540
    pQuery->limit.offset = 0;
3541

3542
    qDebug("QInfo:%p skip row:%"PRId64", new offset:%d, numOfRows remain:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), numOfSkip,
3543
           0, pQuery->rec.rows);
3544

3545
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
3546
      int32_t functionId = pQuery->pExpr1[i].base.functionId;
3547
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
H
Haojun Liao 已提交
3548

3549
      memmove(pQuery->sdata[i]->data, (char*)pQuery->sdata[i]->data + bytes * numOfSkip, (size_t)(pQuery->rec.rows * bytes));
H
Haojun Liao 已提交
3550
      pRuntimeEnv->pCtx[i].aOutputBuf = ((char*) pQuery->sdata[i]->data) + pQuery->rec.rows * bytes;
3551

3552
      if (functionId == TSDB_FUNC_DIFF || functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
3553
        pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
3554 3555
      }
    }
3556

S
TD-1057  
Shengliang Guan 已提交
3557
    updateNumOfResult(pRuntimeEnv, (int32_t)pQuery->rec.rows);
3558 3559 3560 3561 3562 3563 3564 3565
  }
}

void setQueryStatus(SQuery *pQuery, int8_t status) {
  if (status == QUERY_NOT_COMPLETED) {
    pQuery->status = status;
  } else {
    // QUERY_NOT_COMPLETED is not compatible with any other status, so clear its position first
3566
    CLEAR_QUERY_STATUS(pQuery, QUERY_NOT_COMPLETED);
3567 3568 3569 3570 3571 3572
    pQuery->status |= status;
  }
}

bool needScanDataBlocksAgain(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3573

H
hjxilinx 已提交
3574
  bool toContinue = false;
H
Haojun Liao 已提交
3575
  if (pRuntimeEnv->groupbyColumn || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3576
    // for each group result, call the finalize function for each column
H
Haojun Liao 已提交
3577
    SResultRowInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
3578

3579
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
3580
      SResultRow *pResult = getResultRow(pWindowResInfo, i);
3581

3582
      setResultOutputBuf(pRuntimeEnv, pResult);
3583
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
3584
        int16_t functId = pQuery->pExpr1[j].base.functionId;
3585 3586 3587
        if (functId == TSDB_FUNC_TS) {
          continue;
        }
3588

3589
        aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
H
Haojun Liao 已提交
3590
        SResultRowCellInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3591

3592 3593 3594 3595
        toContinue |= (!pResInfo->complete);
      }
    }
  } else {
3596
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
3597
      int16_t functId = pQuery->pExpr1[j].base.functionId;
3598 3599 3600
      if (functId == TSDB_FUNC_TS) {
        continue;
      }
3601

3602
      aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
H
Haojun Liao 已提交
3603
      SResultRowCellInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3604

3605 3606 3607
      toContinue |= (!pResInfo->complete);
    }
  }
3608

3609 3610 3611
  return toContinue;
}

H
Haojun Liao 已提交
3612
static SQueryStatusInfo getQueryStatusInfo(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
3613
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3614
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3615

H
Haojun Liao 已提交
3616 3617
  assert((start <= pTableQueryInfo->lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (start >= pTableQueryInfo->lastKey && !QUERY_IS_ASC_QUERY(pQuery)));
3618

3619
  SQueryStatusInfo info = {
H
hjxilinx 已提交
3620
      .status      = pQuery->status,
3621
      .windowIndex = pRuntimeEnv->windowResInfo.curIndex,
H
Haojun Liao 已提交
3622
      .lastKey     = start,
3623
  };
3624

S
TD-1057  
Shengliang Guan 已提交
3625
  TIME_WINDOW_COPY(info.w, pQuery->window);
3626 3627 3628
  return info;
}

3629 3630 3631 3632
static void setEnvBeforeReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQInfo *pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery *pQuery = pRuntimeEnv->pQuery;

H
Haojun Liao 已提交
3633 3634 3635 3636
  pStatus->cur = tsBufGetCursor(pRuntimeEnv->pTsBuf);  // save the cursor
  if (pRuntimeEnv->pTsBuf) {
    SWITCH_ORDER(pRuntimeEnv->pTsBuf->cur.order);
    bool ret = tsBufNextPos(pRuntimeEnv->pTsBuf);
3637
    assert(ret);
3638
  }
3639

3640
  // reverse order time range
3641
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
3642
  SWITCH_ORDER(pQuery->order.order);
3643 3644 3645 3646 3647 3648 3649

  if (QUERY_IS_ASC_QUERY(pQuery)) {
    assert(pQuery->window.skey <= pQuery->window.ekey);
  } else {
    assert(pQuery->window.skey >= pQuery->window.ekey);
  }

3650
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
H
Haojun Liao 已提交
3651
  STsdbQueryCond cond = createTsdbQueryCond(pQuery, &pQuery->window);
S
TD-1057  
Shengliang Guan 已提交
3652

H
Haojun Liao 已提交
3653 3654 3655 3656 3657
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
  disableFuncInReverseScan(pQInfo);
  setupQueryRangeForReverseScan(pQInfo);

3658 3659 3660 3661
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
3662

H
Haojun Liao 已提交
3663
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef);
B
Bomin Zhang 已提交
3664 3665 3666
  if (pRuntimeEnv->pSecQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
3667 3668
}

3669 3670
static void clearEnvAfterReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3671
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3672

3673 3674
  SWITCH_ORDER(pQuery->order.order);
  switchCtxOrder(pRuntimeEnv);
3675

H
Haojun Liao 已提交
3676 3677 3678
  tsBufSetCursor(pRuntimeEnv->pTsBuf, &pStatus->cur);
  if (pRuntimeEnv->pTsBuf) {
    pRuntimeEnv->pTsBuf->cur.order = pQuery->order.order;
3679
  }
3680

3681
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
3682

3683
  // update the pQuery->window.skey and pQuery->window.ekey to limit the scan scope of sliding query during reverse scan
H
hjxilinx 已提交
3684
  pTableQueryInfo->lastKey = pStatus->lastKey;
3685
  pQuery->status = pStatus->status;
3686

H
hjxilinx 已提交
3687
  pTableQueryInfo->win = pStatus->w;
3688
  pQuery->window = pTableQueryInfo->win;
3689 3690
}

H
Haojun Liao 已提交
3691 3692 3693 3694 3695 3696 3697
static void restoreTimeWindow(STableGroupInfo* pTableGroupInfo, STsdbQueryCond* pCond) {
  assert(pTableGroupInfo->numOfTables == 1);
  SArray* pTableKeyGroup = taosArrayGetP(pTableGroupInfo->pGroupList, 0);
  STableKeyInfo* pKeyInfo = taosArrayGet(pTableKeyGroup, 0);
  pKeyInfo->lastKey = pCond->twindow.skey;
}

3698
void scanOneTableDataBlocks(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
H
hjxilinx 已提交
3699
  SQInfo *pQInfo = (SQInfo *) GET_QINFO_ADDR(pRuntimeEnv);
3700
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3701
  STableQueryInfo *pTableQueryInfo = pQuery->current;
3702

3703
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
3704

3705
  // store the start query position
H
Haojun Liao 已提交
3706
  SQueryStatusInfo qstatus = getQueryStatusInfo(pRuntimeEnv, start);
3707
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
3708

3709 3710 3711
  if (!pRuntimeEnv->groupbyColumn && pRuntimeEnv->hasTagResults) {
    setTagVal(pRuntimeEnv, pTableQueryInfo->pTable, pQInfo->tsdb);
  }
3712

3713 3714
  while (1) {
    doScanAllDataBlocks(pRuntimeEnv);
3715

3716 3717
    if (pRuntimeEnv->scanFlag == MASTER_SCAN) {
      qstatus.status = pQuery->status;
3718 3719

      // do nothing if no data blocks are found qualified during scan
3720
      if (qstatus.lastKey == pTableQueryInfo->lastKey) {
H
Haojun Liao 已提交
3721
        qDebug("QInfo:%p no results generated in this scan", pQInfo);
3722
      }
3723
    }
3724

3725
    if (!needScanDataBlocksAgain(pRuntimeEnv)) {
3726
      // restore the status code and jump out of loop
3727
      if (pRuntimeEnv->scanFlag == REPEAT_SCAN) {
3728
        pQuery->status = qstatus.status;
3729
      }
3730

3731 3732
      break;
    }
3733

3734 3735
    if (pRuntimeEnv->pSecQueryHandle != NULL) {
      tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
3736
    }
3737

3738
    STsdbQueryCond cond = createTsdbQueryCond(pQuery, &pQuery->window);
H
Haojun Liao 已提交
3739
    restoreTimeWindow(&pQInfo->tableGroupInfo, &cond);
H
Haojun Liao 已提交
3740
    pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef);
B
Bomin Zhang 已提交
3741 3742 3743
    if (pRuntimeEnv->pSecQueryHandle == NULL) {
      longjmp(pRuntimeEnv->env, terrno);
    }
3744

3745
    pRuntimeEnv->windowResInfo.curIndex = qstatus.windowIndex;
3746 3747
    setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
    pRuntimeEnv->scanFlag = REPEAT_SCAN;
3748

3749
    qDebug("QInfo:%p start to repeat scan data blocks due to query func required, qrange:%"PRId64"-%"PRId64, pQInfo,
3750
        cond.twindow.skey, cond.twindow.ekey);
3751

3752
    // check if query is killed or not
3753
    if (isQueryKilled(pQInfo)) {
H
Haojun Liao 已提交
3754
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
3755 3756
    }
  }
3757

H
hjxilinx 已提交
3758
  if (!needReverseScan(pQuery)) {
3759 3760
    return;
  }
3761

3762
  setEnvBeforeReverseScan(pRuntimeEnv, &qstatus);
3763

3764
  // reverse scan from current position
3765
  qDebug("QInfo:%p start to reverse scan", pQInfo);
3766
  doScanAllDataBlocks(pRuntimeEnv);
3767 3768

  clearEnvAfterReverseScan(pRuntimeEnv, &qstatus);
3769 3770
}

H
hjxilinx 已提交
3771
void finalizeQueryResult(SQueryRuntimeEnv *pRuntimeEnv) {
3772
  SQuery *pQuery = pRuntimeEnv->pQuery;
3773

H
Haojun Liao 已提交
3774
  if (pRuntimeEnv->groupbyColumn || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3775
    // for each group result, call the finalize function for each column
H
Haojun Liao 已提交
3776
    SResultRowInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
3777
    if (pRuntimeEnv->groupbyColumn) {
H
Haojun Liao 已提交
3778
      closeAllResultRows(pWindowResInfo);
3779
    }
3780

3781
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
H
Haojun Liao 已提交
3782
      SResultRow *buf = pWindowResInfo->pResult[i];
H
Haojun Liao 已提交
3783
      if (!isResultRowClosed(pWindowResInfo, i)) {
3784 3785
        continue;
      }
3786

3787
      setResultOutputBuf(pRuntimeEnv, buf);
3788

3789
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
3790
        aAggs[pQuery->pExpr1[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3791
      }
3792

3793 3794 3795 3796
      /*
       * set the number of output results for group by normal columns, the number of output rows usually is 1 except
       * the top and bottom query
       */
S
TD-1057  
Shengliang Guan 已提交
3797
      buf->numOfRows = (uint16_t)getNumOfResult(pRuntimeEnv);
3798
    }
3799

3800
  } else {
3801
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
3802
      aAggs[pQuery->pExpr1[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3803 3804 3805 3806 3807
    }
  }
}

static bool hasMainOutput(SQuery *pQuery) {
3808
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
3809
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
3810

3811 3812 3813 3814
    if (functionId != TSDB_FUNC_TS && functionId != TSDB_FUNC_TAG && functionId != TSDB_FUNC_TAGPRJ) {
      return true;
    }
  }
3815

3816 3817 3818
  return false;
}

H
Haojun Liao 已提交
3819
static STableQueryInfo *createTableQueryInfo(SQueryRuntimeEnv *pRuntimeEnv, void* pTable, STimeWindow win, void* buf) {
H
Haojun Liao 已提交
3820
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
3821

H
Haojun Liao 已提交
3822
  STableQueryInfo *pTableQueryInfo = buf;
3823

H
hjxilinx 已提交
3824 3825
  pTableQueryInfo->win = win;
  pTableQueryInfo->lastKey = win.skey;
3826

3827
  pTableQueryInfo->pTable = pTable;
3828
  pTableQueryInfo->cur.vgroupIndex = -1;
3829

H
Haojun Liao 已提交
3830
  // set more initial size of interval/groupby query
H
Haojun Liao 已提交
3831
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyColumn) {
H
Haojun Liao 已提交
3832
    int32_t initialSize = 128;
H
Haojun Liao 已提交
3833
    int32_t code = initResultRowInfo(&pTableQueryInfo->windowResInfo, initialSize, TSDB_DATA_TYPE_INT);
B
Bomin Zhang 已提交
3834 3835 3836
    if (code != TSDB_CODE_SUCCESS) {
      return NULL;
    }
H
Haojun Liao 已提交
3837
  } else { // in other aggregate query, do not initialize the windowResInfo
H
Haojun Liao 已提交
3838 3839
  }

3840 3841 3842
  return pTableQueryInfo;
}

H
Haojun Liao 已提交
3843
void destroyTableQueryInfoImpl(STableQueryInfo *pTableQueryInfo) {
3844 3845 3846
  if (pTableQueryInfo == NULL) {
    return;
  }
3847

H
Haojun Liao 已提交
3848
  tVariantDestroy(&pTableQueryInfo->tag);
H
Haojun Liao 已提交
3849
  cleanupResultRowInfo(&pTableQueryInfo->windowResInfo);
3850 3851 3852 3853 3854
}

/**
 * set output buffer for different group
 * @param pRuntimeEnv
3855
 * @param pDataBlockInfo
3856
 */
H
Haojun Liao 已提交
3857
void setExecutionContext(SQInfo *pQInfo, int32_t groupIndex, TSKEY nextKey) {
3858
  SQueryRuntimeEnv *pRuntimeEnv     = &pQInfo->runtimeEnv;
H
Haojun Liao 已提交
3859
  STableQueryInfo  *pTableQueryInfo = pRuntimeEnv->pQuery->current;
3860
  SResultRowInfo   *pWindowResInfo  = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
3861

H
Haojun Liao 已提交
3862 3863
  // lastKey needs to be updated
  pTableQueryInfo->lastKey = nextKey;
H
Haojun Liao 已提交
3864

H
Haojun Liao 已提交
3865
  if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTsBuf != NULL) {
H
Haojun Liao 已提交
3866 3867
    setAdditionalInfo(pQInfo, pTableQueryInfo->pTable, pTableQueryInfo);
  }
H
Haojun Liao 已提交
3868

H
Haojun Liao 已提交
3869 3870 3871
  if (pRuntimeEnv->prevGroupId != INT32_MIN && pRuntimeEnv->prevGroupId == groupIndex) {
    return;
  }
3872

3873
  int64_t uid = 0;
H
Haojun Liao 已提交
3874
  SResultRow *pResultRow = doPrepareResultRowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&groupIndex,
H
Haojun Liao 已提交
3875
      sizeof(groupIndex), true, uid);
3876
  assert (pResultRow != NULL);
3877

3878 3879 3880 3881
  /*
   * not assign result buffer yet, add new result buffer
   * all group belong to one result set, and each group result has different group id so set the id to be one
   */
H
Haojun Liao 已提交
3882 3883
  if (pResultRow->pageId == -1) {
    if (addNewWindowResultBuf(pResultRow, pRuntimeEnv->pResultBuf, groupIndex, pRuntimeEnv->numOfRowsPerPage) !=
3884 3885 3886 3887
        TSDB_CODE_SUCCESS) {
      return;
    }
  }
3888

H
Haojun Liao 已提交
3889 3890
  // record the current active group id
  pRuntimeEnv->prevGroupId = groupIndex;
H
Haojun Liao 已提交
3891
  setResultOutputBuf(pRuntimeEnv, pResultRow);
3892 3893 3894
  initCtxOutputBuf(pRuntimeEnv);
}

3895
void setResultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SResultRow *pResult) {
3896
  SQuery *pQuery = pRuntimeEnv->pQuery;
3897

3898
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
3899
  tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pageId);
H
Haojun Liao 已提交
3900

3901
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3902
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
H
Haojun Liao 已提交
3903
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult, page);
3904

H
Haojun Liao 已提交
3905
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
3906 3907 3908
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3909

3910
    /*
3911
     * set the output buffer information and intermediate buffer,
3912 3913
     * not all queries require the interResultBuf, such as COUNT
     */
3914
    pCtx->resultInfo = getResultCell(pRuntimeEnv, pResult, i);
3915 3916 3917
  }
}

H
Haojun Liao 已提交
3918
void setResultRowOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SResultRow *pResult) {
H
Haojun Liao 已提交
3919
  SQuery *pQuery = pRuntimeEnv->pQuery;
3920

H
Haojun Liao 已提交
3921
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
3922
  tFilePage* bufPage = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pageId);
H
Haojun Liao 已提交
3923

H
Haojun Liao 已提交
3924 3925 3926
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];

3927
    pCtx->resultInfo = getResultCell(pRuntimeEnv, pResult, i);
H
Haojun Liao 已提交
3928
    if (pCtx->resultInfo->initialized && pCtx->resultInfo->complete) {
H
Haojun Liao 已提交
3929 3930
      continue;
    }
3931

H
Haojun Liao 已提交
3932
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult, bufPage);
H
Haojun Liao 已提交
3933
    pCtx->currentStage = 0;
3934

H
Haojun Liao 已提交
3935 3936 3937 3938
    int32_t functionId = pCtx->functionId;
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3939

H
Haojun Liao 已提交
3940 3941 3942 3943 3944 3945
    if (!pCtx->resultInfo->initialized) {
      aAggs[functionId].init(pCtx);
    }
  }
}

3946
int32_t setAdditionalInfo(SQInfo *pQInfo, void* pTable, STableQueryInfo *pTableQueryInfo) {
3947
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
3948

3949
  setTagVal(pRuntimeEnv, pTable, pQInfo->tsdb);
3950

3951
  // both the master and supplement scan needs to set the correct ts comp start position
H
Haojun Liao 已提交
3952
  if (pRuntimeEnv->pTsBuf != NULL) {
H
Haojun Liao 已提交
3953 3954
    tVariant* pTag = &pRuntimeEnv->pCtx[0].tag;

H
Haojun Liao 已提交
3955 3956
    if (pTableQueryInfo->cur.vgroupIndex == -1) {
      tVariantAssign(&pTableQueryInfo->tag, pTag);
3957

H
Haojun Liao 已提交
3958
      STSElem elem = tsBufGetElemStartPos(pRuntimeEnv->pTsBuf, pQInfo->vgId, &pTableQueryInfo->tag);
H
Haojun Liao 已提交
3959

H
Haojun Liao 已提交
3960
      // failed to find data with the specified tag value and vnodeId
3961
      if (!tsBufIsValidElem(&elem)) {
H
Haojun Liao 已提交
3962 3963 3964
        if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
          qError("QInfo:%p failed to find tag:%s in ts_comp", pQInfo, pTag->pz);
        } else {
3965
          qError("QInfo:%p failed to find tag:%" PRId64 " in ts_comp", pQInfo, pTag->i64);
H
Haojun Liao 已提交
3966 3967 3968 3969 3970
        }

        return false;
      }

H
Haojun Liao 已提交
3971
      // keep the cursor info of current meter
H
Haojun Liao 已提交
3972
      pTableQueryInfo->cur = tsBufGetCursor(pRuntimeEnv->pTsBuf);
H
Haojun Liao 已提交
3973
      if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
H
Haojun Liao 已提交
3974
        qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex);
H
Haojun Liao 已提交
3975
      } else {
3976
        qDebug("QInfo:%p find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex);
H
Haojun Liao 已提交
3977 3978
      }

H
Haojun Liao 已提交
3979
    } else {
H
Haojun Liao 已提交
3980
      tsBufSetCursor(pRuntimeEnv->pTsBuf, &pTableQueryInfo->cur);
H
Haojun Liao 已提交
3981 3982

      if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
H
Haojun Liao 已提交
3983
        qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex);
H
Haojun Liao 已提交
3984
      } else {
3985
        qDebug("QInfo:%p find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex);
H
Haojun Liao 已提交
3986
      }
3987 3988
    }
  }
3989

3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001
  return 0;
}

/*
 * There are two cases to handle:
 *
 * 1. Query range is not set yet (queryRangeSet = 0). we need to set the query range info, including pQuery->lastKey,
 *    pQuery->window.skey, and pQuery->eKey.
 * 2. Query range is set and query is in progress. There may be another result with the same query ranges to be
 *    merged during merge stage. In this case, we need the pTableQueryInfo->lastResRows to decide if there
 *    is a previous result generated or not.
 */
H
hjxilinx 已提交
4002
void setIntervalQueryRange(SQInfo *pQInfo, TSKEY key) {
4003 4004
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
4005
  STableQueryInfo *pTableQueryInfo = pQuery->current;
4006

4007 4008 4009
  if (pTableQueryInfo->queryRangeSet) {
    pTableQueryInfo->lastKey = key;
  } else {
4010
    pTableQueryInfo->win.skey = key;
4011
    STimeWindow win = {.skey = key, .ekey = pQuery->window.ekey};
4012

4013 4014 4015 4016 4017
    // for too small query range, no data in this interval.
    if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey < pQuery->window.skey)) ||
        (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey < pQuery->window.ekey))) {
      return;
    }
4018

4019 4020 4021
    /**
     * In handling the both ascending and descending order super table query, we need to find the first qualified
     * timestamp of this table, and then set the first qualified start timestamp.
4022
     * In ascending query, the key is the first qualified timestamp. However, in the descending order query, additional
4023 4024
     * operations involve.
     */
H
Haojun Liao 已提交
4025
    STimeWindow     w = TSWINDOW_INITIALIZER;
H
Haojun Liao 已提交
4026
    SResultRowInfo *pWindowResInfo = &pTableQueryInfo->windowResInfo;
4027

H
Haojun Liao 已提交
4028 4029
    TSKEY sk = MIN(win.skey, win.ekey);
    TSKEY ek = MAX(win.skey, win.ekey);
H
Haojun Liao 已提交
4030
    getAlignQueryTimeWindow(pQuery, win.skey, sk, ek, &w);
4031

4032 4033
    if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
4034
        assert(win.ekey == pQuery->window.ekey);
4035
      }
4036

4037
      pWindowResInfo->prevSKey = w.skey;
4038
    }
4039

4040
    pTableQueryInfo->queryRangeSet = 1;
4041
    pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
4042 4043 4044 4045
  }
}

bool requireTimestamp(SQuery *pQuery) {
4046
  for (int32_t i = 0; i < pQuery->numOfOutput; i++) {
H
Haojun Liao 已提交
4047
    int32_t functionId = pQuery->pExpr1[i].base.functionId;
4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060
    if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_NEED_TS) != 0) {
      return true;
    }
  }
  return false;
}

bool needPrimaryTimestampCol(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo) {
  /*
   * 1. if skey or ekey locates in this block, we need to load the timestamp column to decide the precise position
   * 2. if there are top/bottom, first_dst/last_dst functions, we need to load timestamp column in any cases;
   */
  STimeWindow *w = &pDataBlockInfo->window;
H
hjxilinx 已提交
4061
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4062

H
hjxilinx 已提交
4063
  bool loadPrimaryTS = (pTableQueryInfo->lastKey >= w->skey && pTableQueryInfo->lastKey <= w->ekey) ||
4064 4065
                       (pQuery->window.ekey >= w->skey && pQuery->window.ekey <= w->ekey) || requireTimestamp(pQuery);

4066 4067 4068
  return loadPrimaryTS;
}

4069
static int32_t doCopyToSData(SQInfo *pQInfo, SResultRow **pRows, int32_t numOfRows, int32_t *index, int32_t orderType) {
4070 4071
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4072

4073
  int32_t numOfResult = 0;
4074
  int32_t start = 0;
4075
  int32_t step = -1;
4076

4077
  qDebug("QInfo:%p start to copy data from windowResInfo to query buf", pQInfo);
4078
  if (orderType == TSDB_ORDER_ASC) {
4079
    start = (*index);
4080 4081
    step = 1;
  } else {  // desc order copy all data
4082
    start = numOfRows - (*index) - 1;
4083 4084
    step = -1;
  }
4085

H
Haojun Liao 已提交
4086 4087
  SGroupResInfo* pGroupResInfo = &pQInfo->groupResInfo;

4088 4089 4090
  for (int32_t i = start; (i < numOfRows) && (i >= 0); i += step) {
    if (pRows[i]->numOfRows == 0) {
      (*index) += 1;
4091
      pGroupResInfo->rowId = 0;
4092 4093
      continue;
    }
4094

4095
    int32_t numOfRowsToCopy = pRows[i]->numOfRows - pGroupResInfo->rowId;
4096
    int32_t oldOffset = pGroupResInfo->rowId;
4097

4098
    /*
H
Haojun Liao 已提交
4099 4100
     * current output space is not enough to accommodate all data of this page, only partial results
     * will be copied to SQuery object's result buffer
4101
     */
4102
    if (numOfRowsToCopy > pQuery->rec.capacity - numOfResult) {
H
Haojun Liao 已提交
4103
      numOfRowsToCopy = (int32_t) pQuery->rec.capacity - numOfResult;
4104
      pGroupResInfo->rowId += numOfRowsToCopy;
4105
    } else {
4106
      pGroupResInfo->rowId = 0;
4107
      (*index) += 1;
4108
    }
4109

4110
    tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pRows[i]->pageId);
H
Haojun Liao 已提交
4111

4112
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
4113
      int32_t size = pRuntimeEnv->pCtx[j].outputBytes;
4114

4115
      char *out = pQuery->sdata[j]->data + numOfResult * size;
4116
      char *in  = getPosInResultPage(pRuntimeEnv, j, pRows[i], page);
4117 4118
      memcpy(out, in + oldOffset * size, size * numOfRowsToCopy);
    }
4119

4120
    numOfResult += numOfRowsToCopy;
4121 4122 4123
    if (numOfResult == pQuery->rec.capacity) {
      break;
    }
4124
  }
4125

4126
  qDebug("QInfo:%p copy data to query buf completed", pQInfo);
4127 4128

#ifdef _DEBUG_VIEW
4129
  displayInterResult(pQuery->sdata, pRuntimeEnv, numOfResult);
4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142
#endif
  return numOfResult;
}

/**
 * copyFromWindowResToSData support copy data in ascending/descending order
 * For interval query of both super table and table, copy the data in ascending order, since the output results are
 * ordered in SWindowResutl already. While handling the group by query for both table and super table,
 * all group result are completed already.
 *
 * @param pQInfo
 * @param result
 */
H
Haojun Liao 已提交
4143
void copyFromWindowResToSData(SQInfo *pQInfo, SResultRowInfo *pResultInfo) {
4144
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
4145

4146
  int32_t orderType = (pQuery->pGroupbyExpr != NULL) ? pQuery->pGroupbyExpr->orderType : TSDB_ORDER_ASC;
4147
  int32_t numOfResult = doCopyToSData(pQInfo, pResultInfo->pResult, pResultInfo->size, &pQInfo->groupIndex, orderType);
4148

4149 4150
  pQuery->rec.rows += numOfResult;
  assert(pQuery->rec.rows <= pQuery->rec.capacity);
4151 4152
}

H
Haojun Liao 已提交
4153
static void updateWindowResNumOfRes(SQueryRuntimeEnv *pRuntimeEnv) {
4154
  SQuery *pQuery = pRuntimeEnv->pQuery;
4155

4156
  // update the number of result for each, only update the number of rows for the corresponding window result.
H
Haojun Liao 已提交
4157 4158 4159
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
    return;
  }
4160

H
Haojun Liao 已提交
4161
  for (int32_t i = 0; i < pRuntimeEnv->windowResInfo.size; ++i) {
H
Haojun Liao 已提交
4162
    SResultRow *pResult = pRuntimeEnv->windowResInfo.pResult[i];
4163

H
Haojun Liao 已提交
4164 4165 4166 4167
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
      int32_t functionId = pRuntimeEnv->pCtx[j].functionId;
      if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
        continue;
4168
      }
H
Haojun Liao 已提交
4169

4170 4171
      SResultRowCellInfo* pCell = getResultCell(pRuntimeEnv, pResult, j);
      pResult->numOfRows = (uint16_t)(MAX(pResult->numOfRows, pCell->numOfRes));
4172 4173 4174 4175
    }
  }
}

H
Haojun Liao 已提交
4176
static void stableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo, SDataStatis *pStatis,
4177
    SArray *pDataBlock, __block_search_fn_t searchFn) {
4178
  SQuery *         pQuery = pRuntimeEnv->pQuery;
4179
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4180

4181
  SResultRowInfo * pResultRowInfo = &pTableQueryInfo->windowResInfo;
H
hjxilinx 已提交
4182
  pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : pDataBlockInfo->rows - 1;
4183

H
Haojun Liao 已提交
4184
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTsBuf != NULL || pRuntimeEnv->groupbyColumn) {
4185
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pResultRowInfo, pDataBlock);
4186
  } else {
4187
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pResultRowInfo, searchFn, pDataBlock);
4188
  }
H
Haojun Liao 已提交
4189 4190

  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
4191
    updateResultRowIndex(pResultRowInfo, pTableQueryInfo, QUERY_IS_ASC_QUERY(pQuery), pRuntimeEnv->timeWindowInterpo);
H
Haojun Liao 已提交
4192
  }
4193 4194
}

4195
bool hasNotReturnedResults(SQueryRuntimeEnv* pRuntimeEnv) {
4196 4197
  SQuery *pQuery = pRuntimeEnv->pQuery;
  SFillInfo *pFillInfo = pRuntimeEnv->pFillInfo;
4198

H
Haojun Liao 已提交
4199
  if (pQuery->limit.limit > 0 && pQuery->rec.total >= pQuery->limit.limit) {
4200 4201
    return false;
  }
4202

4203
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
H
Haojun Liao 已提交
4204
    // There are results not returned to client yet, so filling applied to the remain result is required firstly.
4205
    if (taosFillHasMoreResults(pFillInfo)) {
H
Haojun Liao 已提交
4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218
      return true;
    }

    /*
     * While the code reaches here, there are no results remains now.
     * If query is not completed yet, the gaps between two results blocks need to be handled after next data block
     * is retrieved from TSDB.
     *
     * NOTE: If the result set is not the first block, the gap in front of the result set will be filled. If the result
     * set is the FIRST result block, the gap between the start time of query time window and the timestamp of the
     * first result row in the actual result set will fill nothing.
     */
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4219
      int32_t numOfTotal = (int32_t)getNumOfResultsAfterFillGap(pFillInfo, pQuery->window.ekey, (int32_t)pQuery->rec.capacity);
H
Haojun Liao 已提交
4220 4221 4222 4223 4224 4225
      return numOfTotal > 0;
    }

  } else {
    // there are results waiting for returned to client.
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED) &&
H
Haojun Liao 已提交
4226
        (pRuntimeEnv->groupbyColumn || QUERY_IS_INTERVAL_QUERY(pQuery)) &&
H
Haojun Liao 已提交
4227 4228 4229
        (pRuntimeEnv->windowResInfo.size > 0)) {
      return true;
    }
4230
  }
4231 4232

  return false;
4233 4234
}

H
Haojun Liao 已提交
4235 4236 4237 4238
static int16_t getNumOfFinalResCol(SQuery* pQuery) {
  return pQuery->pExpr2 == NULL? pQuery->numOfOutput:pQuery->numOfExpr2;
}

4239
static void doCopyQueryResultToMsg(SQInfo *pQInfo, int32_t numOfRows, char *data) {
4240
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
4241

H
Haojun Liao 已提交
4242 4243
  if (pQuery->pExpr2 == NULL) {
    for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
H
Haojun Liao 已提交
4244
      int32_t bytes = pQuery->pExpr1[col].bytes;
4245

H
Haojun Liao 已提交
4246 4247 4248 4249 4250 4251 4252 4253 4254 4255
      memmove(data, pQuery->sdata[col]->data, bytes * numOfRows);
      data += bytes * numOfRows;
    }
  } else {
    for (int32_t col = 0; col < pQuery->numOfExpr2; ++col) {
      int32_t bytes = pQuery->pExpr2[col].bytes;

      memmove(data, pQuery->sdata[col]->data, bytes * numOfRows);
      data += bytes * numOfRows;
    }
4256
  }
4257

H
Haojun Liao 已提交
4258
  int32_t numOfTables = (int32_t) taosHashGetSize(pQInfo->arrTableIdInfo);
weixin_48148422's avatar
weixin_48148422 已提交
4259 4260
  *(int32_t*)data = htonl(numOfTables);
  data += sizeof(int32_t);
4261

4262
  int32_t total = 0;
4263
  STableIdInfo* item = taosHashIterate(pQInfo->arrTableIdInfo, NULL);
4264

4265
  while(item) {
weixin_48148422's avatar
weixin_48148422 已提交
4266
    STableIdInfo* pDst = (STableIdInfo*)data;
4267 4268 4269 4270
    pDst->uid = htobe64(item->uid);
    pDst->tid = htonl(item->tid);
    pDst->key = htobe64(item->key);

weixin_48148422's avatar
weixin_48148422 已提交
4271
    data += sizeof(STableIdInfo);
4272 4273 4274
    total++;

    qDebug("QInfo:%p set subscribe info, tid:%d, uid:%"PRIu64", skey:%"PRId64, pQInfo, item->tid, item->uid, item->key);
4275
    item = taosHashIterate(pQInfo->arrTableIdInfo, item);
weixin_48148422's avatar
weixin_48148422 已提交
4276 4277
  }

4278 4279
  qDebug("QInfo:%p set %d subscribe info", pQInfo, total);

H
Haojun Liao 已提交
4280
  // Check if query is completed or not for stable query or normal table query respectively.
H
hjxilinx 已提交
4281
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4282
    if (pQInfo->runtimeEnv.stableQuery) {
H
Haojun Liao 已提交
4283
      if (IS_STASBLE_QUERY_OVER(pQInfo)) {
4284 4285 4286
        setQueryStatus(pQuery, QUERY_OVER);
      }
    } else {
4287
      if (!hasNotReturnedResults(&pQInfo->runtimeEnv)) {
4288 4289
        setQueryStatus(pQuery, QUERY_OVER);
      }
4290
    }
H
hjxilinx 已提交
4291
  }
4292 4293
}

H
Haojun Liao 已提交
4294
int32_t doFillGapsInResults(SQueryRuntimeEnv* pRuntimeEnv, tFilePage **pDst, int32_t *numOfFilled) {
4295
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
4296
  SQuery *pQuery = pRuntimeEnv->pQuery;
4297
  SFillInfo* pFillInfo = pRuntimeEnv->pFillInfo;
4298

4299
  while (1) {
H
Haojun Liao 已提交
4300
    int32_t ret = (int32_t)taosFillResultDataBlock(pFillInfo, (tFilePage**)pQuery->sdata, (int32_t)pQuery->rec.capacity);
4301

4302
    // todo apply limit output function
4303 4304
    /* reached the start position of according to offset value, return immediately */
    if (pQuery->limit.offset == 0) {
4305
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows", pQInfo, pFillInfo->numOfRows, ret);
4306 4307
      return ret;
    }
4308

4309
    if (pQuery->limit.offset < ret) {
4310
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%" PRId64 ". Discard due to offset, remain:%" PRId64 ", new offset:%d",
4311
             pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, ret - pQuery->limit.offset, 0);
4312

S
TD-1057  
Shengliang Guan 已提交
4313
      ret -= (int32_t)pQuery->limit.offset;
4314
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
4315 4316
        memmove(pDst[i]->data, pDst[i]->data + pQuery->pExpr1[i].bytes * pQuery->limit.offset,
                ret * pQuery->pExpr1[i].bytes);
4317
      }
4318

4319 4320 4321
      pQuery->limit.offset = 0;
      return ret;
    } else {
4322
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%" PRId64 ". Discard due to offset, "
B
Bomin Zhang 已提交
4323
             "remain:%d, new offset:%" PRId64, pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, 0,
4324
          pQuery->limit.offset - ret);
4325

4326
      pQuery->limit.offset -= ret;
4327
      pQuery->rec.rows = 0;
4328 4329
      ret = 0;
    }
4330

4331
    if (!hasNotReturnedResults(pRuntimeEnv)) {
4332 4333 4334 4335 4336
      return ret;
    }
  }
}

4337
static void queryCostStatis(SQInfo *pQInfo) {
4338
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4339
  SQueryCostInfo *pSummary = &pRuntimeEnv->summary;
H
Haojun Liao 已提交
4340

H
Haojun Liao 已提交
4341
  uint64_t hashSize = taosHashGetMemSize(pQInfo->runtimeEnv.pResultRowHashTable);
H
Haojun Liao 已提交
4342 4343 4344
  hashSize += taosHashGetMemSize(pQInfo->tableqinfoGroupInfo.map);
  pSummary->hashSize = hashSize;

H
Haojun Liao 已提交
4345 4346 4347
  // add the merge time
  pSummary->elapsedTime += pSummary->firstStageMergeTime;

H
Haojun Liao 已提交
4348 4349 4350
  SResultRowPool* p = pQInfo->runtimeEnv.pool;
  pSummary->winInfoSize = getResultRowPoolMemSize(p);
  pSummary->numOfTimeWindows = getNumOfAllocatedResultRows(p);
4351

H
Haojun Liao 已提交
4352 4353 4354
  qDebug("QInfo:%p :cost summary: elapsed time:%"PRId64" us, first merge:%"PRId64" us, total blocks:%d, "
         "load block statis:%d, load data block:%d, total rows:%"PRId64 ", check rows:%"PRId64,
         pQInfo, pSummary->elapsedTime, pSummary->firstStageMergeTime, pSummary->totalBlocks, pSummary->loadBlockStatis,
H
Haojun Liao 已提交
4355
         pSummary->loadBlocks, pSummary->totalRows, pSummary->totalCheckedRows);
4356

4357
  qDebug("QInfo:%p :cost summary: winResPool size:%.2f Kb, numOfWin:%"PRId64", tableInfoSize:%.2f Kb, hashTable:%.2f Kb", pQInfo, pSummary->winInfoSize/1024.0,
H
Haojun Liao 已提交
4358
      pSummary->numOfTimeWindows, pSummary->tableInfoSize/1024.0, pSummary->hashSize/1024.0);
4359 4360
}

4361 4362
static void updateOffsetVal(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pBlockInfo) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
4363
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4364

4365
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
4366

4367
  if (pQuery->limit.offset == pBlockInfo->rows) {  // current block will ignore completed
H
hjxilinx 已提交
4368
    pTableQueryInfo->lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->window.ekey + step : pBlockInfo->window.skey + step;
4369 4370 4371
    pQuery->limit.offset = 0;
    return;
  }
4372

4373
  if (QUERY_IS_ASC_QUERY(pQuery)) {
S
TD-1057  
Shengliang Guan 已提交
4374
    pQuery->pos = (int32_t)pQuery->limit.offset;
4375
  } else {
S
TD-1057  
Shengliang Guan 已提交
4376
    pQuery->pos = pBlockInfo->rows - (int32_t)pQuery->limit.offset - 1;
4377
  }
4378

4379
  assert(pQuery->pos >= 0 && pQuery->pos <= pBlockInfo->rows - 1);
4380

4381
  SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
4382
  SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);
4383

4384
  // update the pQuery->limit.offset value, and pQuery->pos value
H
Haojun Liao 已提交
4385
  TSKEY *keys = (TSKEY *) pColInfoData->pData;
4386 4387

  // update the offset value
H
hjxilinx 已提交
4388
  pTableQueryInfo->lastKey = keys[pQuery->pos];
4389
  pQuery->limit.offset = 0;
4390

H
hjxilinx 已提交
4391
  int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, pBlockInfo, NULL, binarySearchForKey, pDataBlock);
4392

4393
  qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
4394
         pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows, numOfRes, pQuery->current->lastKey);
4395
}
Y
yihaoDeng 已提交
4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438
static void freeTableBlockDist(STableBlockDist *pTableBlockDist) {
  if (pTableBlockDist != NULL) {
    taosArrayDestroy(pTableBlockDist->dataBlockInfos); 
    free(pTableBlockDist->result);
    free(pTableBlockDist);
  }
}
static int32_t getPercentileFromSortedArray(const SArray* pArray, float rate) {
  size_t len = taosArrayGetSize(pArray);
  if (len == 0) {
    return 0;
  }
  assert(rate >= 0 && rate <= 1.0);
  int idx = (int32_t)((len - 1) * rate);
  return ((SDataBlockInfo *)(taosArrayGet(pArray, idx)))->rows;
}
static int32_t compareBlockInfo(const void *pLeft, const void *pRight) {
  int32_t left = ((SDataBlockInfo *)pLeft)->rows;
  int32_t right = ((SDataBlockInfo *)pRight)->rows; 
  if (left > right) return 1; 
  if (left < right) return -1; 
  return 0;
} 

static void generateBlockDistResult(STableBlockDist *pTableBlockDist) {
  if (pTableBlockDist == NULL) {
     return;
  }
  int64_t min = INT64_MAX, max = INT64_MIN, avg = 0;    
  SArray* blockInfos= pTableBlockDist->dataBlockInfos;  
  int64_t totalRows = 0, totalBlocks = taosArrayGetSize(blockInfos); 
  for (size_t i = 0; i < taosArrayGetSize(blockInfos); i++) {
    SDataBlockInfo *blockInfo = taosArrayGet(blockInfos, i); 
    int64_t rows = blockInfo->rows;
    min = MIN(min, rows);       
    max = MAX(max, rows);
    totalRows += rows;  
  }
  avg = totalBlocks > 0 ? (int32_t)(((totalRows * 1.0)/totalBlocks)) : 0;

  taosArraySort(blockInfos, compareBlockInfo);

  sprintf(pTableBlockDist->result, 
Y
yihaoDeng 已提交
4439
          "summery: \n\t 5th=[%d], 25th=[%d], 50th=[%d],75th=[%d], 95th=[%d], 99th=[%d] \n\t min=[%"PRId64"], max=[%"PRId64"], avg = [%"PRId64"] \n\t totalRows=[%"PRId64"], totalBlocks=[%"PRId64"] \n\t seekHeaderTimeCost=[%"PRId64"(us)] \n\t rowsInMem=[%"PRId64"]",  
Y
yihaoDeng 已提交
4440 4441 4442 4443 4444 4445 4446
          getPercentileFromSortedArray(blockInfos, 0.05), getPercentileFromSortedArray(blockInfos, 0.25), getPercentileFromSortedArray(blockInfos, 0.50), 
          getPercentileFromSortedArray(blockInfos, 0.75), getPercentileFromSortedArray(blockInfos, 0.95), getPercentileFromSortedArray(blockInfos, 0.99),
          min, max, avg,
          totalRows, totalBlocks,
          pTableBlockDist->firstSeekTimeUs,
          pTableBlockDist->numOfRowsInMemTable);
} 
4447 4448 4449 4450 4451
void skipBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;

  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0) {
    return;
4452
  }
4453

4454 4455 4456
  pQuery->pos = 0;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

H
hjxilinx 已提交
4457
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4458
  TsdbQueryHandleT pQueryHandle = pRuntimeEnv->pQueryHandle;
4459

H
Haojun Liao 已提交
4460
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4461
  while (tsdbNextDataBlock(pQueryHandle)) {
4462
    if (isQueryKilled(GET_QINFO_ADDR(pRuntimeEnv))) {
H
Haojun Liao 已提交
4463
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4464
    }
4465

H
Haojun Liao 已提交
4466
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
4467

4468 4469
    if (pQuery->limit.offset > blockInfo.rows) {
      pQuery->limit.offset -= blockInfo.rows;
H
hjxilinx 已提交
4470 4471
      pTableQueryInfo->lastKey = (QUERY_IS_ASC_QUERY(pQuery)) ? blockInfo.window.ekey : blockInfo.window.skey;
      pTableQueryInfo->lastKey += step;
4472

4473
      qDebug("QInfo:%p skip rows:%d, offset:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv), blockInfo.rows,
4474 4475
             pQuery->limit.offset);
    } else {  // find the appropriated start position in current block
4476 4477 4478
      updateOffsetVal(pRuntimeEnv, &blockInfo);
      break;
    }
4479
  }
H
Haojun Liao 已提交
4480 4481 4482 4483

  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }
4484
}
4485

H
Haojun Liao 已提交
4486 4487
static TSKEY doSkipIntervalProcess(SQueryRuntimeEnv* pRuntimeEnv, STimeWindow* win, SDataBlockInfo* pBlockInfo, STableQueryInfo* pTableQueryInfo) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
4488
  SResultRowInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526

  assert(pQuery->limit.offset == 0);
  STimeWindow tw = *win;
  getNextTimeWindow(pQuery, &tw);

  if ((tw.skey <= pBlockInfo->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (tw.ekey >= pBlockInfo->window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {

    // load the data block and check data remaining in current data block
    // TODO optimize performance
    SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
    SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

    tw = *win;
    int32_t startPos =
        getNextQualifiedWindow(pRuntimeEnv, &tw, pBlockInfo, pColInfoData->pData, binarySearchForKey, -1);
    assert(startPos >= 0);

    // set the abort info
    pQuery->pos = startPos;

    // reset the query start timestamp
    pTableQueryInfo->win.skey = ((TSKEY *)pColInfoData->pData)[startPos];
    pQuery->window.skey = pTableQueryInfo->win.skey;
    TSKEY key = pTableQueryInfo->win.skey;

    pWindowResInfo->prevSKey = tw.skey;
    int32_t index = pRuntimeEnv->windowResInfo.curIndex;

    int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, pBlockInfo, NULL, binarySearchForKey, pDataBlock);
    pRuntimeEnv->windowResInfo.curIndex = index;  // restore the window index

    qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%" PRId64,
           GET_QINFO_ADDR(pRuntimeEnv), pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows, numOfRes,
           pQuery->current->lastKey);

    return key;
  } else {  // do nothing
4527
    pQuery->window.skey      = tw.skey;
H
Haojun Liao 已提交
4528
    pWindowResInfo->prevSKey = tw.skey;
4529
    pTableQueryInfo->lastKey = tw.skey;
H
Haojun Liao 已提交
4530 4531 4532 4533 4534 4535 4536

    return tw.skey;
  }

  return true;
}

H
Haojun Liao 已提交
4537
static bool skipTimeInterval(SQueryRuntimeEnv *pRuntimeEnv, TSKEY* start) {
4538
  SQuery *pQuery = pRuntimeEnv->pQuery;
4539 4540 4541 4542
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    assert(*start <= pQuery->current->lastKey);
  } else {
    assert(*start >= pQuery->current->lastKey);
H
Haojun Liao 已提交
4543 4544
  }

4545
  // if queried with value filter, do NOT forward query start position
H
Haojun Liao 已提交
4546
  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTsBuf != NULL || pRuntimeEnv->pFillInfo != NULL) {
4547
    return true;
4548
  }
4549

4550
  /*
4551 4552
   * 1. for interval without interpolation query we forward pQuery->interval.interval at a time for
   *    pQuery->limit.offset times. Since hole exists, pQuery->interval.interval*pQuery->limit.offset value is
4553 4554
   *    not valid. otherwise, we only forward pQuery->limit.offset number of points
   */
4555
  assert(pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL);
4556

H
Haojun Liao 已提交
4557
  STimeWindow w = TSWINDOW_INITIALIZER;
4558
  bool ascQuery = QUERY_IS_ASC_QUERY(pQuery);
4559

H
Haojun Liao 已提交
4560
  SResultRowInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
hjxilinx 已提交
4561
  STableQueryInfo *pTableQueryInfo = pQuery->current;
4562

H
Haojun Liao 已提交
4563
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4564
  while (tsdbNextDataBlock(pRuntimeEnv->pQueryHandle)) {
H
Haojun Liao 已提交
4565
    tsdbRetrieveDataBlockInfo(pRuntimeEnv->pQueryHandle, &blockInfo);
4566

H
Haojun Liao 已提交
4567 4568
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
H
Haojun Liao 已提交
4569
        getAlignQueryTimeWindow(pQuery, blockInfo.window.skey, blockInfo.window.skey, pQuery->window.ekey, &w);
H
Haojun Liao 已提交
4570 4571
        pWindowResInfo->prevSKey = w.skey;
      }
4572
    } else {
H
Haojun Liao 已提交
4573
      getAlignQueryTimeWindow(pQuery, blockInfo.window.ekey, pQuery->window.ekey, blockInfo.window.ekey, &w);
4574 4575
      pWindowResInfo->prevSKey = w.skey;
    }
4576

4577 4578
    // the first time window
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, pWindowResInfo->prevSKey, pQuery);
4579

4580
    while (pQuery->limit.offset > 0) {
H
Haojun Liao 已提交
4581 4582
      STimeWindow tw = win;

4583
      if ((win.ekey <= blockInfo.window.ekey && ascQuery) || (win.ekey >= blockInfo.window.skey && !ascQuery)) {
4584 4585
        pQuery->limit.offset -= 1;
        pWindowResInfo->prevSKey = win.skey;
4586 4587 4588 4589 4590 4591

        // current time window is aligned with blockInfo.window.ekey
        // restart it from next data block by set prevSKey to be TSKEY_INITIAL_VAL;
        if ((win.ekey == blockInfo.window.ekey && ascQuery) || (win.ekey == blockInfo.window.skey && !ascQuery)) {
          pWindowResInfo->prevSKey = TSKEY_INITIAL_VAL;
        }
H
Haojun Liao 已提交
4592
      }
4593

4594
      if (pQuery->limit.offset == 0) {
H
Haojun Liao 已提交
4595 4596
        *start = doSkipIntervalProcess(pRuntimeEnv, &win, &blockInfo, pTableQueryInfo);
        return true;
4597 4598
      }

4599 4600 4601
      // current window does not ended in current data block, try next data block
      getNextTimeWindow(pQuery, &tw);

H
Haojun Liao 已提交
4602 4603 4604 4605 4606 4607 4608
      /*
       * If the next time window still starts from current data block,
       * load the primary timestamp column first, and then find the start position for the next queried time window.
       * Note that only the primary timestamp column is required.
       * TODO: Optimize for this cases. All data blocks are not needed to be loaded, only if the first actually required
       * time window resides in current data block.
       */
4609 4610 4611
      if ((tw.skey <= blockInfo.window.ekey && ascQuery) || (tw.ekey >= blockInfo.window.skey && !ascQuery)) {

        SArray *pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
4612 4613
        SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

4614
        if ((win.ekey > blockInfo.window.ekey && ascQuery) || (win.ekey < blockInfo.window.skey && !ascQuery)) {
H
Haojun Liao 已提交
4615 4616 4617 4618
          pQuery->limit.offset -= 1;
        }

        if (pQuery->limit.offset == 0) {
H
Haojun Liao 已提交
4619 4620
          *start = doSkipIntervalProcess(pRuntimeEnv, &win, &blockInfo, pTableQueryInfo);
          return true;
H
Haojun Liao 已提交
4621 4622 4623 4624 4625
        } else {
          tw = win;
          int32_t startPos =
              getNextQualifiedWindow(pRuntimeEnv, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey, -1);
          assert(startPos >= 0);
4626

H
Haojun Liao 已提交
4627 4628 4629 4630 4631 4632
          // set the abort info
          pQuery->pos = startPos;
          pTableQueryInfo->lastKey = ((TSKEY *)pColInfoData->pData)[startPos];
          pWindowResInfo->prevSKey = tw.skey;
          win = tw;
        }
4633
      } else {
H
Haojun Liao 已提交
4634
        break;  // offset is not 0, and next time window begins or ends in the next block.
4635 4636 4637
      }
    }
  }
4638

H
Haojun Liao 已提交
4639 4640 4641 4642 4643
  // check for error
  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }

4644 4645 4646
  return true;
}

H
Haojun Liao 已提交
4647 4648
static void doDestroyTableQueryInfo(STableGroupInfo* pTableqinfoGroupInfo);

B
Bomin Zhang 已提交
4649
static int32_t setupQueryHandle(void* tsdb, SQInfo* pQInfo, bool isSTableQuery) {
B
Bomin Zhang 已提交
4650
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4651 4652
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

B
Bomin Zhang 已提交
4653
  if (onlyQueryTags(pQuery)) {
B
Bomin Zhang 已提交
4654
    return TSDB_CODE_SUCCESS;
B
Bomin Zhang 已提交
4655 4656
  }

H
Haojun Liao 已提交
4657
  if (isSTableQuery && (!QUERY_IS_INTERVAL_QUERY(pQuery)) && (!isFixedOutputQuery(pRuntimeEnv))) {
B
Bomin Zhang 已提交
4658
    return TSDB_CODE_SUCCESS;
B
Bomin Zhang 已提交
4659
  }
4660

H
Haojun Liao 已提交
4661
  STsdbQueryCond cond = createTsdbQueryCond(pQuery, &pQuery->window);
S
TD-1057  
Shengliang Guan 已提交
4662

B
Bomin Zhang 已提交
4663
  if (!isSTableQuery
4664
    && (pQInfo->tableqinfoGroupInfo.numOfTables == 1)
4665
    && (cond.order == TSDB_ORDER_ASC)
H
Haojun Liao 已提交
4666
    && (!QUERY_IS_INTERVAL_QUERY(pQuery))
B
Bomin Zhang 已提交
4667
    && (!isGroupbyNormalCol(pQuery->pGroupbyExpr))
H
Haojun Liao 已提交
4668
    && (!isFixedOutputQuery(pRuntimeEnv))
B
Bomin Zhang 已提交
4669
  ) {
H
Haojun Liao 已提交
4670
    SArray* pa = GET_TABLEGROUP(pQInfo, 0);
4671 4672
    STableQueryInfo* pCheckInfo = taosArrayGetP(pa, 0);
    cond.twindow = pCheckInfo->win;
4673
  }
B
Bomin Zhang 已提交
4674

B
Bomin Zhang 已提交
4675
  terrno = TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
4676
  if (isFirstLastRowQuery(pQuery)) {
H
Haojun Liao 已提交
4677
    pRuntimeEnv->pQueryHandle = tsdbQueryLastRow(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef);
H
Haojun Liao 已提交
4678

H
Haojun Liao 已提交
4679 4680
    // update the query time window
    pQuery->window = cond.twindow;
H
Haojun Liao 已提交
4681
    if (pQInfo->tableGroupInfo.numOfTables == 0) {
H
Haojun Liao 已提交
4682
      pQInfo->tableqinfoGroupInfo.numOfTables = 0;
H
Haojun Liao 已提交
4683 4684 4685 4686
    } else {
      size_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
      for(int32_t i = 0; i < numOfGroups; ++i) {
        SArray *group = GET_TABLEGROUP(pQInfo, i);
H
Haojun Liao 已提交
4687

H
Haojun Liao 已提交
4688 4689 4690
        size_t t = taosArrayGetSize(group);
        for (int32_t j = 0; j < t; ++j) {
          STableQueryInfo *pCheckInfo = taosArrayGetP(group, j);
H
Haojun Liao 已提交
4691

H
Haojun Liao 已提交
4692 4693 4694
          pCheckInfo->win = pQuery->window;
          pCheckInfo->lastKey = pCheckInfo->win.skey;
        }
H
Haojun Liao 已提交
4695 4696
      }
    }
4697
  } else if (isPointInterpoQuery(pQuery)) {
H
Haojun Liao 已提交
4698
    pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef);
H
Haojun Liao 已提交
4699
  } else {
H
Haojun Liao 已提交
4700
    pRuntimeEnv->pQueryHandle = tsdbQueryTables(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef);
H
Haojun Liao 已提交
4701
  }
4702

B
Bomin Zhang 已提交
4703
  return terrno;
B
Bomin Zhang 已提交
4704 4705
}

H
Haojun Liao 已提交
4706
static SFillColInfo* createFillColInfo(SQuery* pQuery) {
H
Haojun Liao 已提交
4707
  int32_t numOfCols = getNumOfFinalResCol(pQuery);
4708
  int32_t offset = 0;
4709

4710
  SFillColInfo* pFillCol = calloc(numOfCols, sizeof(SFillColInfo));
H
Haojun Liao 已提交
4711 4712 4713 4714
  if (pFillCol == NULL) {
    return NULL;
  }

H
Haojun Liao 已提交
4715
  // TODO refactor
4716
  for(int32_t i = 0; i < numOfCols; ++i) {
H
Haojun Liao 已提交
4717
    SExprInfo* pExprInfo = (pQuery->pExpr2 == NULL)? &pQuery->pExpr1[i]:&pQuery->pExpr2[i];
4718

4719
    pFillCol[i].col.bytes  = pExprInfo->bytes;
S
TD-1057  
Shengliang Guan 已提交
4720
    pFillCol[i].col.type   = (int8_t)pExprInfo->type;
4721
    pFillCol[i].col.offset = offset;
H
Haojun Liao 已提交
4722
    pFillCol[i].tagIndex   = -2;
4723 4724
    pFillCol[i].flag       = TSDB_COL_NORMAL;    // always be ta normal column for table query
    pFillCol[i].functionId = pExprInfo->base.functionId;
4725
    pFillCol[i].fillVal.i = pQuery->fillVal[i];
4726

4727 4728
    offset += pExprInfo->bytes;
  }
4729

4730 4731 4732
  return pFillCol;
}

4733
int32_t doInitQInfo(SQInfo *pQInfo, STSBuf *pTsBuf, void *tsdb, int32_t vgId, bool isSTableQuery) {
4734 4735 4736
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;

  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
4737 4738 4739

  pRuntimeEnv->topBotQuery = isTopBottomQuery(pQuery);
  pRuntimeEnv->hasTagResults = hasTagValOutput(pQuery);
4740
  pRuntimeEnv->timeWindowInterpo = timeWindowInterpoRequired(pQuery);
4741 4742

  setScanLimitationByResultBuffer(pQuery);
H
Haojun Liao 已提交
4743

H
Haojun Liao 已提交
4744
  int32_t code = setupQueryHandle(tsdb, pQInfo, isSTableQuery);
B
Bomin Zhang 已提交
4745 4746 4747
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }
4748

4749
  pQInfo->tsdb = tsdb;
4750
  pQInfo->vgId = vgId;
4751 4752

  pRuntimeEnv->pQuery = pQuery;
H
Haojun Liao 已提交
4753
  pRuntimeEnv->pTsBuf = pTsBuf;
4754
  pRuntimeEnv->cur.vgroupIndex = -1;
4755
  pRuntimeEnv->stableQuery = isSTableQuery;
H
Haojun Liao 已提交
4756
  pRuntimeEnv->prevGroupId = INT32_MIN;
H
Haojun Liao 已提交
4757
  pRuntimeEnv->groupbyColumn = isGroupbyNormalCol(pQuery->pGroupbyExpr);
4758

H
Haojun Liao 已提交
4759
  if (pTsBuf != NULL) {
H
Haojun Liao 已提交
4760 4761
    int16_t order = (pQuery->order.order == pRuntimeEnv->pTsBuf->tsOrder) ? TSDB_ORDER_ASC : TSDB_ORDER_DESC;
    tsBufSetTraverseOrder(pRuntimeEnv->pTsBuf, order);
4762 4763
  }

4764 4765 4766
  int32_t ps = DEFAULT_PAGE_SIZE;
  int32_t rowsize = 0;
  getIntermediateBufInfo(pRuntimeEnv, &ps, &rowsize);
H
Haojun Liao 已提交
4767
  int32_t TENMB = 1024*1024*10;
4768

H
Haojun Liao 已提交
4769
  if (isSTableQuery && !onlyQueryTags(pRuntimeEnv->pQuery)) {
H
Haojun Liao 已提交
4770
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rowsize, ps, TENMB, pQInfo);
4771 4772 4773 4774
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

H
Haojun Liao 已提交
4775
    if (!QUERY_IS_INTERVAL_QUERY(pQuery)) {
4776
      int16_t type = TSDB_DATA_TYPE_NULL;
H
Haojun Liao 已提交
4777
      if (pRuntimeEnv->groupbyColumn) {  // group by columns not tags;
4778 4779 4780 4781 4782
        type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
      } else {
        type = TSDB_DATA_TYPE_INT;  // group id
      }

H
Haojun Liao 已提交
4783
      code = initResultRowInfo(&pRuntimeEnv->windowResInfo, 8, type);
B
Bomin Zhang 已提交
4784 4785 4786
      if (code != TSDB_CODE_SUCCESS) {
        return code;
      }
4787
    }
H
Haojun Liao 已提交
4788
  } else if (pRuntimeEnv->groupbyColumn || QUERY_IS_INTERVAL_QUERY(pQuery) || (!isSTableQuery)) {
4789 4790
    int32_t numOfResultRows = getInitialPageNum(pQInfo);
    getIntermediateBufInfo(pRuntimeEnv, &ps, &rowsize);
H
Haojun Liao 已提交
4791
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rowsize, ps, TENMB, pQInfo);
4792 4793 4794 4795 4796
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

    int16_t type = TSDB_DATA_TYPE_NULL;
H
Haojun Liao 已提交
4797
    if (pRuntimeEnv->groupbyColumn) {
4798 4799 4800 4801 4802
      type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
    } else {
      type = TSDB_DATA_TYPE_TIMESTAMP;
    }

H
Haojun Liao 已提交
4803
    code = initResultRowInfo(&pRuntimeEnv->windowResInfo, numOfResultRows, type);
B
Bomin Zhang 已提交
4804 4805 4806
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }
4807 4808
  }

H
Haojun Liao 已提交
4809 4810 4811 4812 4813 4814
  // create runtime environment
  code = setupQueryRuntimeEnv(pRuntimeEnv, pQuery->order.order);
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }

4815
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
H
Haojun Liao 已提交
4816
    SFillColInfo* pColInfo = createFillColInfo(pQuery);
H
Haojun Liao 已提交
4817 4818 4819 4820 4821 4822
    STimeWindow w = TSWINDOW_INITIALIZER;

    TSKEY sk = MIN(pQuery->window.skey, pQuery->window.ekey);
    TSKEY ek = MAX(pQuery->window.skey, pQuery->window.ekey);
    getAlignQueryTimeWindow(pQuery, pQuery->window.skey, sk, ek, &w);

H
Haojun Liao 已提交
4823
    int32_t numOfCols = getNumOfFinalResCol(pQuery);
H
Haojun Liao 已提交
4824
    pRuntimeEnv->pFillInfo = taosInitFillInfo(pQuery->order.order, w.skey, 0, (int32_t)pQuery->rec.capacity, numOfCols,
4825
                                              pQuery->interval.sliding, pQuery->interval.slidingUnit, (int8_t)pQuery->precision,
H
Haojun Liao 已提交
4826
                                              pQuery->fillType, pColInfo, pQInfo);
4827
  }
4828

H
Haojun Liao 已提交
4829
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
4830
  return TSDB_CODE_SUCCESS;
4831 4832
}

4833
static void enableExecutionForNextTable(SQueryRuntimeEnv *pRuntimeEnv) {
4834
  SQuery *pQuery = pRuntimeEnv->pQuery;
4835

4836
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
4837
    SResultRowCellInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[i]);
4838 4839 4840 4841 4842 4843
    if (pResInfo != NULL) {
      pResInfo->complete = false;
    }
  }
}

H
Haojun Liao 已提交
4844
// TODO refactor: setAdditionalInfo
H
Haojun Liao 已提交
4845 4846 4847 4848 4849
static FORCE_INLINE void setEnvForEachBlock(SQInfo* pQInfo, STableQueryInfo* pTableQueryInfo, SDataBlockInfo* pBlockInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

H
Haojun Liao 已提交
4850
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
H
Haojun Liao 已提交
4851 4852 4853
    TSKEY nextKey = pBlockInfo->window.skey;
    setIntervalQueryRange(pQInfo, nextKey);

H
Haojun Liao 已提交
4854
    if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTsBuf != NULL) {
H
Haojun Liao 已提交
4855 4856
      setAdditionalInfo(pQInfo, pTableQueryInfo->pTable, pTableQueryInfo);
    }
H
Haojun Liao 已提交
4857 4858
  } else {  // non-interval query
    setExecutionContext(pQInfo, pTableQueryInfo->groupIndex, pBlockInfo->window.ekey + step);
H
Haojun Liao 已提交
4859 4860 4861
  }
}

H
Haojun Liao 已提交
4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875
static void doTableQueryInfoTimeWindowCheck(SQuery* pQuery, STableQueryInfo* pTableQueryInfo) {
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    assert(
        (pTableQueryInfo->win.skey <= pTableQueryInfo->win.ekey) &&
        (pTableQueryInfo->lastKey >= pTableQueryInfo->win.skey) &&
        (pTableQueryInfo->win.skey >= pQuery->window.skey && pTableQueryInfo->win.ekey <= pQuery->window.ekey));
  } else {
    assert(
        (pTableQueryInfo->win.skey >= pTableQueryInfo->win.ekey) &&
        (pTableQueryInfo->lastKey <= pTableQueryInfo->win.skey) &&
        (pTableQueryInfo->win.skey <= pQuery->window.skey && pTableQueryInfo->win.ekey >= pQuery->window.ekey));
  }
}

H
Haojun Liao 已提交
4876
static int64_t scanMultiTableDataBlocks(SQInfo *pQInfo) {
4877
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
Haojun Liao 已提交
4878 4879
  SQuery*           pQuery = pRuntimeEnv->pQuery;
  SQueryCostInfo*   summary  = &pRuntimeEnv->summary;
H
Haojun Liao 已提交
4880

H
hjxilinx 已提交
4881
  int64_t st = taosGetTimestampMs();
4882

4883
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
H
Haojun Liao 已提交
4884
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4885

H
Haojun Liao 已提交
4886 4887
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

H
Haojun Liao 已提交
4888
  while (tsdbNextDataBlock(pQueryHandle)) {
4889
    summary->totalBlocks += 1;
H
Haojun Liao 已提交
4890

4891
    if (isQueryKilled(pQInfo)) {
H
Haojun Liao 已提交
4892
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4893
    }
4894

H
Haojun Liao 已提交
4895
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
H
Haojun Liao 已提交
4896 4897 4898 4899
    STableQueryInfo **pTableQueryInfo = (STableQueryInfo**) taosHashGet(pQInfo->tableqinfoGroupInfo.map, &blockInfo.tid, sizeof(blockInfo.tid));
    if(pTableQueryInfo == NULL) {
      break;
    }
4900

H
Haojun Liao 已提交
4901
    pQuery->current = *pTableQueryInfo;
H
Haojun Liao 已提交
4902
    doTableQueryInfoTimeWindowCheck(pQuery, *pTableQueryInfo);
4903

H
Haojun Liao 已提交
4904
    if (!pRuntimeEnv->groupbyColumn) {
H
Haojun Liao 已提交
4905
      setEnvForEachBlock(pQInfo, *pTableQueryInfo, &blockInfo);
4906
    }
4907

4908
    uint32_t     status = 0;
H
Haojun Liao 已提交
4909 4910
    SDataStatis *pStatis = NULL;
    SArray      *pDataBlock = NULL;
4911

H
Haojun Liao 已提交
4912
    int32_t ret = loadDataBlockOnDemand(pRuntimeEnv, &pQuery->current->windowResInfo, pQueryHandle, &blockInfo, &pStatis, &pDataBlock, &status);
4913 4914 4915 4916 4917
    if (ret != TSDB_CODE_SUCCESS) {
      break;
    }

    if (status == BLK_DATA_DISCARD) {
H
Haojun Liao 已提交
4918
      pQuery->current->lastKey = QUERY_IS_ASC_QUERY(pQuery)? blockInfo.window.ekey + step : blockInfo.window.skey + step;
H
Haojun Liao 已提交
4919 4920 4921
      continue;
    }

4922 4923
    summary->totalRows += blockInfo.rows;
    stableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, pDataBlock, binarySearchForKey);
H
Haojun Liao 已提交
4924

H
Haojun Liao 已提交
4925 4926 4927 4928
    qDebug("QInfo:%p check data block completed, uid:%"PRId64", tid:%d, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, "
           "lastKey:%" PRId64,
           pQInfo, blockInfo.uid, blockInfo.tid, blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows,
           pQuery->current->lastKey);
4929
  }
4930

H
Haojun Liao 已提交
4931 4932 4933 4934
  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }

H
Haojun Liao 已提交
4935 4936
  updateWindowResNumOfRes(pRuntimeEnv);

H
hjxilinx 已提交
4937 4938
  int64_t et = taosGetTimestampMs();
  return et - st;
4939 4940
}

4941 4942
static bool multiTableMultioutputHelper(SQInfo *pQInfo, int32_t index) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4943
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4944

4945
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
H
Haojun Liao 已提交
4946
  SArray *group = GET_TABLEGROUP(pQInfo, 0);
4947
  STableQueryInfo* pCheckInfo = taosArrayGetP(group, index);
4948

H
Haojun Liao 已提交
4949
  if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTsBuf != NULL) {
H
Haojun Liao 已提交
4950 4951
    setTagVal(pRuntimeEnv, pCheckInfo->pTable, pQInfo->tsdb);
  }
4952

H
Haojun Liao 已提交
4953
  STableId* id = TSDB_TABLEID(pCheckInfo->pTable);
4954
  qDebug("QInfo:%p query on (%d): uid:%" PRIu64 ", tid:%d, qrange:%" PRId64 "-%" PRId64, pQInfo, index,
H
Haojun Liao 已提交
4955
         id->uid, id->tid, pCheckInfo->lastKey, pCheckInfo->win.ekey);
4956

4957
  STsdbQueryCond cond = {
4958
      .twindow   = {pCheckInfo->lastKey, pCheckInfo->win.ekey},
H
hjxilinx 已提交
4959 4960
      .order     = pQuery->order.order,
      .colList   = pQuery->colList,
4961
      .numOfCols = pQuery->numOfCols,
4962
  };
4963

H
hjxilinx 已提交
4964
  // todo refactor
4965
  SArray *g1 = taosArrayInit(1, POINTER_BYTES);
H
Haojun Liao 已提交
4966 4967 4968 4969
  SArray *tx = taosArrayInit(1, sizeof(STableKeyInfo));

  STableKeyInfo info = {.pTable = pCheckInfo->pTable, .lastKey = pCheckInfo->lastKey};
  taosArrayPush(tx, &info);
4970

4971
  taosArrayPush(g1, &tx);
4972
  STableGroupInfo gp = {.numOfTables = 1, .pGroupList = g1};
4973

4974
  // include only current table
4975 4976 4977 4978
  if (pRuntimeEnv->pQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
    pRuntimeEnv->pQueryHandle = NULL;
  }
4979

H
Haojun Liao 已提交
4980
  pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo, &pQInfo->memRef);
4981 4982
  taosArrayDestroy(tx);
  taosArrayDestroy(g1);
B
Bomin Zhang 已提交
4983 4984 4985
  if (pRuntimeEnv->pQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
4986

H
Haojun Liao 已提交
4987
  if (pRuntimeEnv->pTsBuf != NULL) {
H
Haojun Liao 已提交
4988 4989
      tVariant* pTag = &pRuntimeEnv->pCtx[0].tag;

4990
    if (pRuntimeEnv->cur.vgroupIndex == -1) {
H
Haojun Liao 已提交
4991
      STSElem elem = tsBufGetElemStartPos(pRuntimeEnv->pTsBuf, pQInfo->vgId, pTag);
H
Haojun Liao 已提交
4992
      // failed to find data with the specified tag value and vnodeId
4993
      if (!tsBufIsValidElem(&elem)) {
H
Haojun Liao 已提交
4994 4995 4996
        if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
          qError("QInfo:%p failed to find tag:%s in ts_comp", pQInfo, pTag->pz);
        } else {
4997
          qError("QInfo:%p failed to find tag:%"PRId64" in ts_comp", pQInfo, pTag->i64);
H
Haojun Liao 已提交
4998 4999
        }

5000
        return false;
H
Haojun Liao 已提交
5001
      } else {
H
Haojun Liao 已提交
5002
        STSCursor cur = tsBufGetCursor(pRuntimeEnv->pTsBuf);
H
Haojun Liao 已提交
5003 5004 5005 5006 5007

        if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
          qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz,
                 cur.blockIndex, cur.tsIndex);
        } else {
5008
          qDebug("QInfo:%p find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64,
H
Haojun Liao 已提交
5009 5010
                 cur.blockIndex, cur.tsIndex);
        }
5011 5012
      }
    } else {
H
Haojun Liao 已提交
5013
      STSElem elem = tsBufGetElem(pRuntimeEnv->pTsBuf);
H
Haojun Liao 已提交
5014
      if (tVariantCompare(elem.tag, &pRuntimeEnv->pCtx[0].tag) != 0) {
H
Haojun Liao 已提交
5015

H
Haojun Liao 已提交
5016
        STSElem elem1 = tsBufGetElemStartPos(pRuntimeEnv->pTsBuf, pQInfo->vgId, pTag);
H
Haojun Liao 已提交
5017
        // failed to find data with the specified tag value and vnodeId
5018
        if (!tsBufIsValidElem(&elem1)) {
H
Haojun Liao 已提交
5019 5020 5021
          if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
            qError("QInfo:%p failed to find tag:%s in ts_comp", pQInfo, pTag->pz);
          } else {
5022
            qError("QInfo:%p failed to find tag:%"PRId64" in ts_comp", pQInfo, pTag->i64);
H
Haojun Liao 已提交
5023
          }
H
Haojun Liao 已提交
5024

H
Haojun Liao 已提交
5025
          return false;
H
Haojun Liao 已提交
5026
        } else {
H
Haojun Liao 已提交
5027
          STSCursor cur = tsBufGetCursor(pRuntimeEnv->pTsBuf);
H
Haojun Liao 已提交
5028 5029 5030
          if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
            qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, cur.blockIndex, cur.tsIndex);
          } else {
5031
            qDebug("QInfo:%p find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64, cur.blockIndex, cur.tsIndex);
H
Haojun Liao 已提交
5032
          }
H
Haojun Liao 已提交
5033
        }
H
Haojun Liao 已提交
5034

H
Haojun Liao 已提交
5035
      } else {
H
Haojun Liao 已提交
5036 5037
        tsBufSetCursor(pRuntimeEnv->pTsBuf, &pRuntimeEnv->cur);
        STSCursor cur = tsBufGetCursor(pRuntimeEnv->pTsBuf);
H
Haojun Liao 已提交
5038 5039 5040
        if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) {
          qDebug("QInfo:%p continue scan ts_comp file, tag:%s blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, cur.blockIndex, cur.tsIndex);
        } else {
5041
          qDebug("QInfo:%p continue scan ts_comp file, tag:%"PRId64" blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64, cur.blockIndex, cur.tsIndex);
H
Haojun Liao 已提交
5042
        }
H
Haojun Liao 已提交
5043
      }
5044 5045
    }
  }
5046

5047
  initCtxOutputBuf(pRuntimeEnv);
5048 5049 5050
  return true;
}

H
Haojun Liao 已提交
5051
STsdbQueryCond createTsdbQueryCond(SQuery* pQuery, STimeWindow* win) {
5052 5053 5054 5055 5056 5057
  STsdbQueryCond cond = {
      .colList   = pQuery->colList,
      .order     = pQuery->order.order,
      .numOfCols = pQuery->numOfCols,
  };

H
Haojun Liao 已提交
5058
  TIME_WINDOW_COPY(cond.twindow, *win);
5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085
  return cond;
}

static STableIdInfo createTableIdInfo(SQuery* pQuery) {
  assert(pQuery != NULL && pQuery->current != NULL);

  STableIdInfo tidInfo;
  STableId* id = TSDB_TABLEID(pQuery->current->pTable);

  tidInfo.uid = id->uid;
  tidInfo.tid = id->tid;
  tidInfo.key = pQuery->current->lastKey;

  return tidInfo;
}

static void updateTableIdInfo(SQuery* pQuery, SHashObj* pTableIdInfo) {
  STableIdInfo tidInfo = createTableIdInfo(pQuery);
  STableIdInfo* idinfo = taosHashGet(pTableIdInfo, &tidInfo.tid, sizeof(tidInfo.tid));
  if (idinfo != NULL) {
    assert(idinfo->tid == tidInfo.tid && idinfo->uid == tidInfo.uid);
    idinfo->key = tidInfo.key;
  } else {
    taosHashPut(pTableIdInfo, &tidInfo.tid, sizeof(tidInfo.tid), &tidInfo, sizeof(STableIdInfo));
  }
}

5086 5087 5088 5089 5090 5091 5092
/**
 * super table query handler
 * 1. super table projection query, group-by on normal columns query, ts-comp query
 * 2. point interpolation query, last row query
 *
 * @param pQInfo
 */
5093
static void sequentialTableProcess(SQInfo *pQInfo) {
5094
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
5095
  SQuery *          pQuery = pRuntimeEnv->pQuery;
5096
  setQueryStatus(pQuery, QUERY_COMPLETED);
5097

H
Haojun Liao 已提交
5098
  size_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
5099

5100
  if (isPointInterpoQuery(pQuery)) {
H
Haojun Liao 已提交
5101
    resetDefaultResInfoOutputBuf(pRuntimeEnv);
5102
    assert(pQuery->limit.offset == 0 && pQuery->limit.limit != 0);
5103

5104
    while (pQInfo->groupIndex < numOfGroups) {
5105
      SArray *group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex);
H
Haojun Liao 已提交
5106

5107 5108
      qDebug("QInfo:%p point interpolation query on group:%d, total group:%" PRIzu ", current group:%p", pQInfo,
             pQInfo->groupIndex, numOfGroups, group);
H
Haojun Liao 已提交
5109
      STsdbQueryCond cond = createTsdbQueryCond(pQuery, &pQuery->window);
S
TD-1057  
Shengliang Guan 已提交
5110

H
Haojun Liao 已提交
5111 5112 5113
      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);
5114

H
Haojun Liao 已提交
5115 5116 5117 5118 5119 5120 5121
      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }
5122

H
Haojun Liao 已提交
5123
      pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(pQInfo->tsdb, &cond, &gp, pQInfo, &pQInfo->memRef);
B
Bomin Zhang 已提交
5124 5125 5126 5127 5128 5129

      taosArrayDestroy(tx);
      taosArrayDestroy(g1);
      if (pRuntimeEnv->pQueryHandle == NULL) {
        longjmp(pRuntimeEnv->env, terrno);
      }
H
Haojun Liao 已提交
5130

H
Haojun Liao 已提交
5131
      initCtxOutputBuf(pRuntimeEnv);
5132

5133
      SArray *s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle);
5134
      assert(taosArrayGetSize(s) >= 1);
5135

5136
      setTagVal(pRuntimeEnv, taosArrayGetP(s, 0), pQInfo->tsdb);
dengyihao's avatar
dengyihao 已提交
5137
      taosArrayDestroy(s);
H
Haojun Liao 已提交
5138

H
Haojun Liao 已提交
5139
      // here we simply set the first table as current table
5140
      SArray *first = GET_TABLEGROUP(pQInfo, pQInfo->groupIndex);
5141 5142
      pQuery->current = taosArrayGetP(first, 0);

5143
      scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
5144

H
Haojun Liao 已提交
5145 5146 5147 5148 5149
      int64_t numOfRes = getNumOfResult(pRuntimeEnv);
      if (numOfRes > 0) {
        pQuery->rec.rows += numOfRes;
        forwardCtxOutputBuf(pRuntimeEnv, numOfRes);
      }
5150

H
Haojun Liao 已提交
5151 5152 5153 5154 5155
      skipResults(pRuntimeEnv);
      pQInfo->groupIndex += 1;

      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
5156 5157 5158 5159 5160 5161

      if (pQuery->rec.rows >= pQuery->rec.capacity) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
        break;
      }
    }
H
Haojun Liao 已提交
5162
  } else if (pRuntimeEnv->groupbyColumn) {  // group-by on normal columns query
5163
    while (pQInfo->groupIndex < numOfGroups) {
5164
      SArray *group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex);
5165

5166 5167
      qDebug("QInfo:%p group by normal columns group:%d, total group:%" PRIzu "", pQInfo, pQInfo->groupIndex,
             numOfGroups);
5168

H
Haojun Liao 已提交
5169
      STsdbQueryCond cond = createTsdbQueryCond(pQuery, &pQuery->window);
S
TD-1057  
Shengliang Guan 已提交
5170

5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182
      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);

      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }

H
Haojun Liao 已提交
5183
      // no need to update the lastkey for each table
H
Haojun Liao 已提交
5184
      pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo, &pQInfo->memRef);
H
Haojun Liao 已提交
5185

B
Bomin Zhang 已提交
5186 5187
      taosArrayDestroy(g1);
      taosArrayDestroy(tx);
B
Bomin Zhang 已提交
5188 5189 5190
      if (pRuntimeEnv->pQueryHandle == NULL) {
        longjmp(pRuntimeEnv->env, terrno);
      }
5191

5192
      SArray *s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle);
5193 5194
      assert(taosArrayGetSize(s) >= 1);

5195
      setTagVal(pRuntimeEnv, taosArrayGetP(s, 0), pQInfo->tsdb);
5196 5197 5198 5199 5200

      // here we simply set the first table as current table
      scanMultiTableDataBlocks(pQInfo);
      pQInfo->groupIndex += 1;

5201
      taosArrayDestroy(s);
5202

5203
      // no results generated for current group, continue to try the next group
5204
      SResultRowInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
5205 5206 5207 5208 5209
      if (pWindowResInfo->size <= 0) {
        continue;
      }

      for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
5210
        pWindowResInfo->pResult[i]->closed = true;  // enable return all results for group by normal columns
5211

H
Haojun Liao 已提交
5212
        SResultRow *pResult = pWindowResInfo->pResult[i];
5213
        for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
5214
          SResultRowCellInfo *pCell = getResultCell(pRuntimeEnv, pResult, j);
5215
          pResult->numOfRows = (uint16_t)(MAX(pResult->numOfRows, pCell->numOfRes));
5216 5217 5218
        }
      }

5219
      qDebug("QInfo:%p generated groupby columns results %d rows for group %d completed", pQInfo, pWindowResInfo->size,
5220
             pQInfo->groupIndex);
5221 5222 5223 5224 5225 5226
      int32_t currentGroupIndex = pQInfo->groupIndex;

      pQuery->rec.rows = 0;
      pQInfo->groupIndex = 0;

      ensureOutputBufferSimple(pRuntimeEnv, pWindowResInfo->size);
5227
      copyFromWindowResToSData(pQInfo, pWindowResInfo);
5228

5229
      pQInfo->groupIndex = currentGroupIndex;  // restore the group index
5230
      assert(pQuery->rec.rows == pWindowResInfo->size);
5231
      resetResultRowInfo(pRuntimeEnv, &pRuntimeEnv->windowResInfo);
5232
      break;
5233
    }
H
Haojun Liao 已提交
5234
  } else if (pRuntimeEnv->queryWindowIdentical && pRuntimeEnv->pTsBuf == NULL && !isTSCompQuery(pQuery)) {
5235 5236 5237 5238 5239 5240 5241 5242 5243 5244
    //super table projection query with identical query time range for all tables.
    SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
    resetDefaultResInfoOutputBuf(pRuntimeEnv);

    SArray *group = GET_TABLEGROUP(pQInfo, 0);
    assert(taosArrayGetSize(group) == pQInfo->tableqinfoGroupInfo.numOfTables &&
           1 == taosArrayGetSize(pQInfo->tableqinfoGroupInfo.pGroupList));

    void *pQueryHandle = pRuntimeEnv->pQueryHandle;
    if (pQueryHandle == NULL) {
H
Haojun Liao 已提交
5245
      STsdbQueryCond con = createTsdbQueryCond(pQuery, &pQuery->window);
5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256
      pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &con, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef);
      pQueryHandle = pRuntimeEnv->pQueryHandle;
    }

    // skip blocks without load the actual data block from file if no filter condition present
    //    skipBlocks(&pQInfo->runtimeEnv);
    //    if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0) {
    //      setQueryStatus(pQuery, QUERY_COMPLETED);
    //      return;
    //    }

H
Haojun Liao 已提交
5257 5258 5259 5260 5261 5262
    if (pQuery->prjInfo.vgroupLimit != -1) {
      assert(pQuery->limit.limit == -1 && pQuery->limit.offset == 0);
    } else if (pQuery->limit.limit != -1) {
      assert(pQuery->prjInfo.vgroupLimit == -1);
    }

5263
    bool hasMoreBlock = true;
H
Haojun Liao 已提交
5264
    int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
5265 5266 5267 5268
    SQueryCostInfo *summary = &pRuntimeEnv->summary;
    while ((hasMoreBlock = tsdbNextDataBlock(pQueryHandle)) == true) {
      summary->totalBlocks += 1;

5269
      if (isQueryKilled(pQInfo)) {
5270 5271 5272 5273 5274
        longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
      }

      tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
      STableQueryInfo **pTableQueryInfo =
H
Haojun Liao 已提交
5275
          (STableQueryInfo **) taosHashGet(pQInfo->tableqinfoGroupInfo.map, &blockInfo.tid, sizeof(blockInfo.tid));
5276 5277 5278 5279 5280
      if (pTableQueryInfo == NULL) {
        break;
      }

      pQuery->current = *pTableQueryInfo;
H
Haojun Liao 已提交
5281
      doTableQueryInfoTimeWindowCheck(pQuery, *pTableQueryInfo);
5282 5283 5284 5285 5286

      if (pRuntimeEnv->hasTagResults) {
        setTagVal(pRuntimeEnv, pQuery->current->pTable, pQInfo->tsdb);
      }

H
Haojun Liao 已提交
5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305
      if (pQuery->prjInfo.vgroupLimit > 0 && pQuery->current->windowResInfo.size > pQuery->prjInfo.vgroupLimit) {
        pQuery->current->lastKey =
                QUERY_IS_ASC_QUERY(pQuery) ? blockInfo.window.ekey + step : blockInfo.window.skey + step;
        continue;
      }

      // it is a super table ordered projection query, check for the number of output for each vgroup
      if (pQuery->prjInfo.vgroupLimit > 0 && pQuery->rec.rows >= pQuery->prjInfo.vgroupLimit) {
        if (QUERY_IS_ASC_QUERY(pQuery) && blockInfo.window.skey >= pQuery->prjInfo.ts) {
          pQuery->current->lastKey =
                  QUERY_IS_ASC_QUERY(pQuery) ? blockInfo.window.ekey + step : blockInfo.window.skey + step;
          continue;
        } else if (!QUERY_IS_ASC_QUERY(pQuery) && blockInfo.window.ekey <= pQuery->prjInfo.ts) {
          pQuery->current->lastKey =
                  QUERY_IS_ASC_QUERY(pQuery) ? blockInfo.window.ekey + step : blockInfo.window.skey + step;
          continue;
        }
      }

5306 5307
      uint32_t     status = 0;
      SDataStatis *pStatis = NULL;
5308
      SArray      *pDataBlock = NULL;
5309 5310 5311 5312 5313 5314 5315

      int32_t ret = loadDataBlockOnDemand(pRuntimeEnv, &pQuery->current->windowResInfo, pQueryHandle, &blockInfo,
                                          &pStatis, &pDataBlock, &status);
      if (ret != TSDB_CODE_SUCCESS) {
        break;
      }

H
Haojun Liao 已提交
5316 5317 5318 5319 5320
      if(status == BLK_DATA_DISCARD) {
        pQuery->current->lastKey =
                QUERY_IS_ASC_QUERY(pQuery) ? blockInfo.window.ekey + step : blockInfo.window.skey + step;
        continue;
      }
5321

H
Haojun Liao 已提交
5322
      ensureOutputBuffer(pRuntimeEnv, &blockInfo);
H
Haojun Liao 已提交
5323 5324
      int64_t prev = getNumOfResult(pRuntimeEnv);

5325 5326 5327 5328 5329 5330 5331 5332 5333 5334
      pQuery->pos = QUERY_IS_ASC_QUERY(pQuery) ? 0 : blockInfo.rows - 1;
      int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, binarySearchForKey, pDataBlock);

      summary->totalRows += blockInfo.rows;
      qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%" PRId64,
             GET_QINFO_ADDR(pRuntimeEnv), blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes,
             pQuery->current->lastKey);

      pQuery->rec.rows = getNumOfResult(pRuntimeEnv);

H
Haojun Liao 已提交
5335
      int64_t inc = pQuery->rec.rows - prev;
H
Haojun Liao 已提交
5336
      pQuery->current->windowResInfo.size += (int32_t) inc;
H
Haojun Liao 已提交
5337

5338 5339 5340 5341 5342
      // the flag may be set by tableApplyFunctionsOnBlock, clear it here
      CLEAR_QUERY_STATUS(pQuery, QUERY_COMPLETED);

      updateTableIdInfo(pQuery, pQInfo->arrTableIdInfo);

H
Haojun Liao 已提交
5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358
      if (pQuery->prjInfo.vgroupLimit >= 0) {
        if (((pQuery->rec.rows + pQuery->rec.total) < pQuery->prjInfo.vgroupLimit) || ((pQuery->rec.rows + pQuery->rec.total) > pQuery->prjInfo.vgroupLimit && prev < pQuery->prjInfo.vgroupLimit)) {
          if (QUERY_IS_ASC_QUERY(pQuery) && pQuery->prjInfo.ts < blockInfo.window.ekey) {
            pQuery->prjInfo.ts = blockInfo.window.ekey;
          } else if (!QUERY_IS_ASC_QUERY(pQuery) && pQuery->prjInfo.ts > blockInfo.window.skey) {
            pQuery->prjInfo.ts = blockInfo.window.skey;
          }
        }
      } else {
        // the limitation of output result is reached, set the query completed
        skipResults(pRuntimeEnv);
        if (limitResults(pRuntimeEnv)) {
          setQueryStatus(pQuery, QUERY_COMPLETED);
          SET_STABLE_QUERY_OVER(pQInfo);
          break;
        }
5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370
      }

      // while the output buffer is full or limit/offset is applied, query may be paused here
      if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL|QUERY_COMPLETED)) {
        break;
      }
    }

    if (!hasMoreBlock) {
      setQueryStatus(pQuery, QUERY_COMPLETED);
      SET_STABLE_QUERY_OVER(pQInfo);
    }
5371 5372
  } else {
    /*
5373 5374 5375
     * the following two cases handled here.
     * 1. ts-comp query, and 2. the super table projection query with different query time range for each table.
     * If the subgroup index is larger than 0, results generated by group by tbname,k is existed.
5376 5377
     * we need to return it to client in the first place.
     */
5378
    if (pQInfo->groupIndex > 0) {
5379
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5380
      pQuery->rec.total += pQuery->rec.rows;
5381

5382
      if (pQuery->rec.rows > 0) {
5383 5384 5385
        return;
      }
    }
5386

5387
    // all data have returned already
5388
    if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
5389 5390
      return;
    }
5391

H
Haojun Liao 已提交
5392
    resetDefaultResInfoOutputBuf(pRuntimeEnv);
H
Haojun Liao 已提交
5393
    resetResultRowInfo(pRuntimeEnv, &pRuntimeEnv->windowResInfo);
5394

H
Haojun Liao 已提交
5395
    SArray *group = GET_TABLEGROUP(pQInfo, 0);
5396 5397
    assert(taosArrayGetSize(group) == pQInfo->tableqinfoGroupInfo.numOfTables &&
           1 == taosArrayGetSize(pQInfo->tableqinfoGroupInfo.pGroupList));
5398

5399
    while (pQInfo->tableIndex < pQInfo->tableqinfoGroupInfo.numOfTables) {
5400
      if (isQueryKilled(pQInfo)) {
H
Haojun Liao 已提交
5401
        longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
5402
      }
5403

5404
      pQuery->current = taosArrayGetP(group, pQInfo->tableIndex);
5405
      if (!multiTableMultioutputHelper(pQInfo, pQInfo->tableIndex)) {
5406
        pQInfo->tableIndex++;
5407 5408
        continue;
      }
5409

H
hjxilinx 已提交
5410
      // TODO handle the limit offset problem
5411
      if (pQuery->numOfFilterCols == 0 && pQuery->limit.offset > 0) {
5412 5413
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
          pQInfo->tableIndex++;
5414 5415 5416
          continue;
        }
      }
5417

5418
      scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
5419
      skipResults(pRuntimeEnv);
5420

5421
      // the limitation of output result is reached, set the query completed
5422
      if (limitResults(pRuntimeEnv)) {
H
Haojun Liao 已提交
5423
        SET_STABLE_QUERY_OVER(pQInfo);
5424 5425
        break;
      }
5426

5427 5428
      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
5429

5430
      if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
5431 5432 5433 5434 5435 5436
        /*
         * query range is identical in terms of all meters involved in query,
         * so we need to restore them at the *beginning* of query on each meter,
         * not the consecutive query on meter on which is aborted due to buffer limitation
         * to ensure that, we can reset the query range once query on a meter is completed.
         */
5437
        pQInfo->tableIndex++;
5438
        updateTableIdInfo(pQuery, pQInfo->arrTableIdInfo);
weixin_48148422's avatar
weixin_48148422 已提交
5439

5440
        // if the buffer is full or group by each table, we need to jump out of the loop
H
Haojun Liao 已提交
5441
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
5442 5443
          break;
        }
5444

H
Haojun Liao 已提交
5445 5446
        if (pRuntimeEnv->pTsBuf != NULL) {
          pRuntimeEnv->cur = pRuntimeEnv->pTsBuf->cur;
H
Haojun Liao 已提交
5447 5448
        }

5449
      } else {
5450
        // all data in the result buffer are skipped due to the offset, continue to retrieve data from current meter
5451 5452
        if (pQuery->rec.rows == 0) {
          assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
5453 5454
          continue;
        } else {
5455 5456 5457
          // buffer is full, wait for the next round to retrieve data from current meter
          assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
          break;
5458 5459 5460
        }
      }
    }
H
Haojun Liao 已提交
5461

5462
    if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
H
Haojun Liao 已提交
5463 5464
      setQueryStatus(pQuery, QUERY_COMPLETED);
    }
5465

5466 5467 5468 5469 5470 5471 5472 5473 5474 5475 5476 5477 5478 5479
    /*
     * 1. super table projection query, group-by on normal columns query, ts-comp query
     * 2. point interpolation query, last row query
     *
     * group-by on normal columns query and last_row query do NOT invoke the finalizer here,
     * since the finalize stage will be done at the client side.
     *
     * projection query, point interpolation query do not need the finalizer.
     *
     * Only the ts-comp query requires the finalizer function to be executed here.
     */
    if (isTSCompQuery(pQuery)) {
      finalizeQueryResult(pRuntimeEnv);
    }
5480

H
Haojun Liao 已提交
5481 5482
    if (pRuntimeEnv->pTsBuf != NULL) {
      pRuntimeEnv->cur = pRuntimeEnv->pTsBuf->cur;
5483
    }
5484

5485 5486 5487 5488 5489
    qDebug("QInfo %p numOfTables:%" PRIu64 ", index:%d, numOfGroups:%" PRIzu ", %" PRId64
           " points returned, total:%" PRId64 ", offset:%" PRId64,
           pQInfo, (uint64_t)pQInfo->tableqinfoGroupInfo.numOfTables, pQInfo->tableIndex, numOfGroups, pQuery->rec.rows,
           pQuery->rec.total, pQuery->limit.offset);
  }
5490 5491
}

H
Haojun Liao 已提交
5492
static int32_t doSaveContext(SQInfo *pQInfo) {
5493 5494 5495
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

5496 5497 5498
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
  SWITCH_ORDER(pQuery->order.order);
5499

H
Haojun Liao 已提交
5500 5501
  if (pRuntimeEnv->pTsBuf != NULL) {
    SWITCH_ORDER(pRuntimeEnv->pTsBuf->cur.order);
5502
  }
5503

H
Haojun Liao 已提交
5504
  STsdbQueryCond cond = createTsdbQueryCond(pQuery, &pQuery->window);
S
TD-1057  
Shengliang Guan 已提交
5505

5506 5507 5508 5509
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
H
Haojun Liao 已提交
5510

H
Haojun Liao 已提交
5511 5512 5513 5514 5515
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
  disableFuncInReverseScan(pQInfo);
  setupQueryRangeForReverseScan(pQInfo);

H
Haojun Liao 已提交
5516
  pRuntimeEnv->prevGroupId = INT32_MIN;
H
Haojun Liao 已提交
5517
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef);
H
Haojun Liao 已提交
5518
  return (pRuntimeEnv->pSecQueryHandle == NULL)? -1:0;
H
hjxilinx 已提交
5519 5520
}

5521 5522 5523 5524
static void doRestoreContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

H
hjxilinx 已提交
5525
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
5526
  SWITCH_ORDER(pQuery->order.order);
5527

H
Haojun Liao 已提交
5528 5529
  if (pRuntimeEnv->pTsBuf != NULL) {
    SWITCH_ORDER(pRuntimeEnv->pTsBuf->cur.order);
5530
  }
5531

5532
  switchCtxOrder(pRuntimeEnv);
5533 5534 5535
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
}

5536 5537 5538
static void doCloseAllTimeWindowAfterScan(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
Haojun Liao 已提交
5539
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
H
Haojun Liao 已提交
5540
    size_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
5541
    for (int32_t i = 0; i < numOfGroup; ++i) {
H
Haojun Liao 已提交
5542
      SArray *group = GET_TABLEGROUP(pQInfo, i);
5543

5544
      size_t num = taosArrayGetSize(group);
5545
      for (int32_t j = 0; j < num; ++j) {
5546
        STableQueryInfo* item = taosArrayGetP(group, j);
H
Haojun Liao 已提交
5547
        closeAllResultRows(&item->windowResInfo);
5548
      }
H
hjxilinx 已提交
5549 5550
    }
  } else {  // close results for group result
H
Haojun Liao 已提交
5551
    closeAllResultRows(&pQInfo->runtimeEnv.windowResInfo);
H
hjxilinx 已提交
5552 5553 5554 5555
  }
}

static void multiTableQueryProcess(SQInfo *pQInfo) {
5556
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
5557
  SQuery           *pQuery = pRuntimeEnv->pQuery;
5558

5559
  if (pQInfo->groupIndex > 0) {
5560
    /*
5561
     * if the groupIndex > 0, the query process must be completed yet, we only need to
5562 5563
     * copy the data into output buffer
     */
H
Haojun Liao 已提交
5564
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
5565 5566
      copyResToQueryResultBuf(pQInfo, pQuery);
    } else {
5567
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5568
    }
5569

5570
    qDebug("QInfo:%p current:%"PRId64", total:%"PRId64"", pQInfo, pQuery->rec.rows, pQuery->rec.total);
5571 5572
    return;
  }
5573

5574
  qDebug("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", order:%d, forward scan start", pQInfo,
5575 5576
         pQuery->window.skey, pQuery->window.ekey, pQuery->order.order);

H
hjxilinx 已提交
5577
  // do check all qualified data blocks
H
Haojun Liao 已提交
5578
  int64_t el = scanMultiTableDataBlocks(pQInfo);
5579
  qDebug("QInfo:%p master scan completed, elapsed time: %" PRId64 "ms, reverse scan start", pQInfo, el);
5580

H
hjxilinx 已提交
5581
  // query error occurred or query is killed, abort current execution
5582
  if (pQInfo->code != TSDB_CODE_SUCCESS || isQueryKilled(pQInfo)) {
5583
    qDebug("QInfo:%p query killed or error occurred, code:%s, abort", pQInfo, tstrerror(pQInfo->code));
H
Haojun Liao 已提交
5584
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
5585
  }
5586

H
hjxilinx 已提交
5587 5588
  // close all time window results
  doCloseAllTimeWindowAfterScan(pQInfo);
5589

H
hjxilinx 已提交
5590
  if (needReverseScan(pQuery)) {
H
Haojun Liao 已提交
5591 5592 5593 5594 5595 5596 5597 5598
    int32_t code = doSaveContext(pQInfo);
    if (code == TSDB_CODE_SUCCESS) {
      el = scanMultiTableDataBlocks(pQInfo);
      qDebug("QInfo:%p reversed scan completed, elapsed time: %" PRId64 "ms", pQInfo, el);
      doRestoreContext(pQInfo);
    } else {
      pQInfo->code = code;
    }
H
hjxilinx 已提交
5599
  } else {
5600
    qDebug("QInfo:%p no need to do reversed scan, query completed", pQInfo);
5601
  }
5602

5603
  setQueryStatus(pQuery, QUERY_COMPLETED);
5604

5605
  if (pQInfo->code != TSDB_CODE_SUCCESS || isQueryKilled(pQInfo)) {
5606
    qDebug("QInfo:%p query killed or error occurred, code:%s, abort", pQInfo, tstrerror(pQInfo->code));
H
Haojun Liao 已提交
5607
    //TODO finalizeQueryResult may cause SEGSEV, since the memory may not allocated yet, add a cleanup function instead
H
Haojun Liao 已提交
5608
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
H
hjxilinx 已提交
5609
  }
5610

H
Haojun Liao 已提交
5611
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || isSumAvgRateQuery(pQuery)) {
5612 5613
    int32_t code = mergeGroupResult(pQInfo);
    if (code == TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
5614
      copyResToQueryResultBuf(pQInfo, pQuery);
5615 5616

#ifdef _DEBUG_VIEW
5617
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
5618
#endif
5619 5620
    } else {  // set the error code
      pQInfo->code = code;
5621 5622
    }
  } else {  // not a interval query
5623
    copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5624
  }
5625

5626
  // handle the limitation of output buffer
5627
  qDebug("QInfo:%p points returned:%" PRId64 ", total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
5628 5629
}

H
Haojun Liao 已提交
5630 5631 5632 5633 5634 5635 5636 5637 5638 5639 5640 5641 5642 5643 5644 5645 5646
static char *getArithemicInputSrc(void *param, const char *name, int32_t colId) {
  SArithmeticSupport *pSupport = (SArithmeticSupport *) param;
  SExprInfo* pExprInfo = (SExprInfo*) pSupport->exprList;

  int32_t index = -1;
  for (int32_t i = 0; i < pSupport->numOfCols; ++i) {
    if (colId == pExprInfo[i].base.resColId) {
      index = i;
      break;
    }
  }

  assert(index >= 0 && index < pSupport->numOfCols);
  return pSupport->data[index] + pSupport->offset * pExprInfo[index].bytes;
}

static void doSecondaryArithmeticProcess(SQuery* pQuery) {
H
Haojun Liao 已提交
5647 5648 5649
  if (pQuery->numOfExpr2 == 0) {
    return;
  }
H
Haojun Liao 已提交
5650

H
Haojun Liao 已提交
5651
  SArithmeticSupport arithSup = {0};
H
Haojun Liao 已提交
5652 5653 5654
  tFilePage **data = calloc(pQuery->numOfExpr2, POINTER_BYTES);
  for (int32_t i = 0; i < pQuery->numOfExpr2; ++i) {
    int32_t bytes = pQuery->pExpr2[i].bytes;
P
plum-lihui 已提交
5655
    data[i] = (tFilePage *)malloc((size_t)(bytes * pQuery->rec.rows) + sizeof(tFilePage));
H
Haojun Liao 已提交
5656 5657
  }

H
Haojun Liao 已提交
5658 5659 5660 5661
  arithSup.offset = 0;
  arithSup.numOfCols = (int32_t)pQuery->numOfOutput;
  arithSup.exprList  = pQuery->pExpr1;
  arithSup.data      = calloc(arithSup.numOfCols, POINTER_BYTES);
H
Haojun Liao 已提交
5662

H
Haojun Liao 已提交
5663 5664
  for (int32_t k = 0; k < arithSup.numOfCols; ++k) {
    arithSup.data[k] = pQuery->sdata[k]->data;
H
Haojun Liao 已提交
5665 5666 5667 5668 5669 5670 5671 5672 5673 5674
  }

  for (int i = 0; i < pQuery->numOfExpr2; ++i) {
    SExprInfo *pExpr = &pQuery->pExpr2[i];

    // calculate the result from several other columns
    SSqlFuncMsg* pSqlFunc = &pExpr->base;
    if (pSqlFunc->functionId != TSDB_FUNC_ARITHM) {

      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
5675 5676
        if (pSqlFunc->functionId == pQuery->pExpr1[j].base.functionId &&
            pSqlFunc->colInfo.colId == pQuery->pExpr1[j].base.colInfo.colId) {
P
plum-lihui 已提交
5677
          memcpy(data[i]->data, pQuery->sdata[j]->data, (size_t)(pQuery->pExpr1[j].bytes * pQuery->rec.rows));
H
Haojun Liao 已提交
5678 5679 5680 5681
          break;
        }
      }
    } else {
H
Haojun Liao 已提交
5682
      arithSup.pArithExpr = pExpr;
H
Haojun Liao 已提交
5683
      arithmeticTreeTraverse(arithSup.pArithExpr->pExpr, (int32_t)pQuery->rec.rows, data[i]->data, &arithSup, TSDB_ORDER_ASC,
H
Haojun Liao 已提交
5684 5685 5686 5687 5688
                            getArithemicInputSrc);
    }
  }

  for (int32_t i = 0; i < pQuery->numOfExpr2; ++i) {
P
plum-lihui 已提交
5689
    memcpy(pQuery->sdata[i]->data, data[i]->data, (size_t)(pQuery->pExpr2[i].bytes * pQuery->rec.rows));
H
Haojun Liao 已提交
5690 5691
  }

H
Haojun Liao 已提交
5692 5693 5694 5695 5696 5697
  for (int32_t i = 0; i < pQuery->numOfExpr2; ++i) {
    tfree(data[i]);
  }

  tfree(data);
  tfree(arithSup.data);
H
Haojun Liao 已提交
5698 5699
}

5700 5701 5702 5703 5704 5705
/*
 * in each query, this function will be called only once, no retry for further result.
 *
 * select count(*)/top(field,k)/avg(field name) from table_name [where ts>now-1a];
 * select count(*) from table_name group by status_column;
 */
H
Haojun Liao 已提交
5706
static void tableAggregationProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
5707
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
5708

H
hjxilinx 已提交
5709
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
5710
  if (!pRuntimeEnv->topBotQuery && pQuery->limit.offset > 0) {  // no need to execute, since the output will be ignore.
H
Haojun Liao 已提交
5711 5712
    return;
  }
5713

5714
  scanOneTableDataBlocks(pRuntimeEnv, pTableInfo->lastKey);
H
hjxilinx 已提交
5715
  finalizeQueryResult(pRuntimeEnv);
5716

H
Haojun Liao 已提交
5717 5718 5719 5720
  // since the numOfRows must be identical for all sql functions that are allowed to be executed simutaneously.
  pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
  doSecondaryArithmeticProcess(pQuery);

5721
  if (isQueryKilled(pQInfo)) {
H
Haojun Liao 已提交
5722
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
5723
  }
5724

5725
  skipResults(pRuntimeEnv);
5726
  limitResults(pRuntimeEnv);
5727 5728
}

H
Haojun Liao 已提交
5729
static void tableProjectionProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
5730
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
5731

5732
  // for ts_comp query, re-initialized is not allowed
H
Haojun Liao 已提交
5733
  SQuery *pQuery = pRuntimeEnv->pQuery;
5734
  if (!isTSCompQuery(pQuery)) {
H
Haojun Liao 已提交
5735
    resetDefaultResInfoOutputBuf(pRuntimeEnv);
5736
  }
5737

5738 5739 5740 5741 5742 5743
  // skip blocks without load the actual data block from file if no filter condition present
  skipBlocks(&pQInfo->runtimeEnv);
  if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return;
  }
5744 5745

  while (1) {
5746
    scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
H
hjxilinx 已提交
5747
    finalizeQueryResult(pRuntimeEnv);
5748

5749 5750
    pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
    if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols > 0 && pQuery->rec.rows > 0) {
5751
      skipResults(pRuntimeEnv);
5752 5753 5754
    }

    /*
H
hjxilinx 已提交
5755 5756
     * 1. if pQuery->size == 0, pQuery->limit.offset >= 0, still need to check data
     * 2. if pQuery->size > 0, pQuery->limit.offset must be 0
5757
     */
5758
    if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
5759 5760 5761
      break;
    }

5762
    qDebug("QInfo:%p skip current result, offset:%" PRId64 ", next qrange:%" PRId64 "-%" PRId64,
B
Bomin Zhang 已提交
5763
           pQInfo, pQuery->limit.offset, pQuery->current->lastKey, pQuery->current->win.ekey);
5764

H
Haojun Liao 已提交
5765
    resetDefaultResInfoOutputBuf(pRuntimeEnv);
5766 5767
  }

5768
  limitResults(pRuntimeEnv);
5769
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
5770
    qDebug("QInfo:%p query paused due to output limitation, next qrange:%" PRId64 "-%" PRId64, pQInfo,
H
hjxilinx 已提交
5771
        pQuery->current->lastKey, pQuery->window.ekey);
weixin_48148422's avatar
weixin_48148422 已提交
5772
  } else if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
5773 5774
    STableIdInfo tidInfo = createTableIdInfo(pQuery);
    taosHashPut(pQInfo->arrTableIdInfo, &tidInfo.tid, sizeof(tidInfo.tid), &tidInfo, sizeof(STableIdInfo));
5775 5776
  }

5777 5778 5779
  if (!isTSCompQuery(pQuery)) {
    assert(pQuery->rec.rows <= pQuery->rec.capacity);
  }
5780 5781
}

5782
// handle time interval query on table
H
hjxilinx 已提交
5783
static void tableIntervalProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
5784
  SQueryRuntimeEnv *pRuntimeEnv = &(pQInfo->runtimeEnv);
H
hjxilinx 已提交
5785
  SQuery *pQuery = pRuntimeEnv->pQuery;
5786

5787
  TSKEY newStartKey = QUERY_IS_ASC_QUERY(pQuery)? INT64_MIN:INT64_MAX;
5788

5789
  // skip blocks without load the actual data block from file if no filter condition present
H
Haojun Liao 已提交
5790
  if (!pRuntimeEnv->groupbyColumn) {
H
Haojun Liao 已提交
5791 5792 5793 5794 5795
    skipTimeInterval(pRuntimeEnv, &newStartKey);
    if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0 && pRuntimeEnv->pFillInfo == NULL) {
      setQueryStatus(pQuery, QUERY_COMPLETED);
      return;
    }
5796 5797
  }

5798 5799
  scanOneTableDataBlocks(pRuntimeEnv, newStartKey);
  assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_NOT_COMPLETED));
5800

5801
  finalizeQueryResult(pRuntimeEnv);
5802

5803 5804
  // skip offset result rows
  pQuery->rec.rows = 0;
5805

5806 5807
  // not fill or no result generated during this query
  if (pQuery->fillType == TSDB_FILL_NONE || pRuntimeEnv->windowResInfo.size == 0) {
5808 5809 5810 5811
    // all data scanned, the group by normal column can return
    int32_t numOfClosed = numOfClosedResultRows(&pRuntimeEnv->windowResInfo);
    if (pQuery->limit.offset > numOfClosed) {
      return;
H
Haojun Liao 已提交
5812 5813
    }

H
Haojun Liao 已提交
5814
    pQInfo->groupIndex = (int32_t) pQuery->limit.offset;
5815

5816 5817
    copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
    doSecondaryArithmeticProcess(pQuery);
5818

5819 5820
    limitResults(pRuntimeEnv);
  } else {
5821

5822 5823
    copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
    doSecondaryArithmeticProcess(pQuery);
5824

5825 5826
    taosFillSetStartInfo(pRuntimeEnv->pFillInfo, (int32_t)pQuery->rec.rows, pQuery->window.ekey);
    taosFillCopyInputDataFromFilePage(pRuntimeEnv->pFillInfo, (const tFilePage **)pQuery->sdata);
H
Haojun Liao 已提交
5827

5828 5829
    int32_t numOfFilled = 0;
    pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, &numOfFilled);
H
Haojun Liao 已提交
5830

5831
    if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
H
Haojun Liao 已提交
5832 5833
      limitResults(pRuntimeEnv);
    }
5834 5835 5836
  }
}

5837 5838 5839 5840
static void tableQueryImpl(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

5841
  if (hasNotReturnedResults(pRuntimeEnv)) {
H
Haojun Liao 已提交
5842 5843 5844 5845 5846 5847 5848 5849 5850 5851 5852 5853
    if (pQuery->fillType != TSDB_FILL_NONE) {
      /*
       * There are remain results that are not returned due to result interpolation
       * So, we do keep in this procedure instead of launching retrieve procedure for next results.
       */
      int32_t numOfFilled = 0;
      pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, &numOfFilled);

      if (pQuery->rec.rows > 0) {
        limitResults(pRuntimeEnv);
      }

H
Haojun Liao 已提交
5854
      qDebug("QInfo:%p current:%" PRId64 " returned, total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total);
H
Haojun Liao 已提交
5855
    } else {
5856
      pQuery->rec.rows = 0;
5857
      assert(pRuntimeEnv->windowResInfo.size > 0);
5858

5859
      if (pQInfo->groupIndex < pRuntimeEnv->windowResInfo.size) {
5860
        copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5861
      }
H
Haojun Liao 已提交
5862

5863 5864 5865 5866
      if (pQuery->rec.rows > 0) {
        qDebug("QInfo:%p %" PRId64 " rows returned from group results, total:%" PRId64 "", pQInfo, pQuery->rec.rows,
               pQuery->rec.total);
      }
H
Haojun Liao 已提交
5867

5868 5869 5870
      // there are not data remains
      if (pQuery->rec.rows <= 0 || pRuntimeEnv->windowResInfo.size <= pQInfo->groupIndex) {
        qDebug("QInfo:%p query over, %" PRId64 " rows are returned", pQInfo, pQuery->rec.total);
5871 5872
      }
    }
H
Haojun Liao 已提交
5873 5874

    return;
5875
  }
5876

H
hjxilinx 已提交
5877
  // number of points returned during this query
5878
  pQuery->rec.rows = 0;
5879
  int64_t st = taosGetTimestampUs();
5880

5881
  assert(pQInfo->tableqinfoGroupInfo.numOfTables == 1);
H
Haojun Liao 已提交
5882
  SArray* g = GET_TABLEGROUP(pQInfo, 0);
H
Haojun Liao 已提交
5883

5884
  STableQueryInfo* item = taosArrayGetP(g, 0);
H
Haojun Liao 已提交
5885
  pQuery->current = item;
5886

5887
  // group by normal column, sliding window query, interval query are handled by interval query processor
H
Haojun Liao 已提交
5888
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyColumn) {  // interval (down sampling operation)
5889
    tableIntervalProcess(pQInfo, item);
H
Haojun Liao 已提交
5890
  } else if (isFixedOutputQuery(pRuntimeEnv)) {
H
Haojun Liao 已提交
5891
    tableAggregationProcess(pQInfo, item);
5892
  } else {  // diff/add/multiply/subtract/division
H
Haojun Liao 已提交
5893
    assert(pQuery->checkResultBuf == 1);
H
Haojun Liao 已提交
5894
    tableProjectionProcess(pQInfo, item);
5895
  }
5896

5897
  // record the total elapsed time
5898
  pRuntimeEnv->summary.elapsedTime += (taosGetTimestampUs() - st);
5899
  assert(pQInfo->tableqinfoGroupInfo.numOfTables == 1);
5900
}
Y
yihaoDeng 已提交
5901 5902 5903 5904 5905 5906 5907 5908 5909 5910 5911 5912 5913 5914 5915 5916 5917 5918 5919 5920 5921 5922 5923 5924 5925 5926 5927 5928 5929 5930 5931 5932 5933 5934 5935 5936 5937 5938 5939 5940 5941 5942 5943 5944 5945 5946 5947 5948 5949 5950 5951 5952
static void buildTableBlockDistResult(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->pos = 0;

  STableBlockDist *pTableBlockDist  = calloc(1, sizeof(STableBlockDist)); 
  pTableBlockDist->dataBlockInfos   = taosArrayInit(512, sizeof(SDataBlockInfo));
  pTableBlockDist->result           = malloc(512);

  TsdbQueryHandleT pQueryHandle = pRuntimeEnv->pQueryHandle;
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
  SSchema blockDistSchema = tGetBlockDistColumnSchema();

  int64_t startTime = taosGetTimestampUs();
  while (tsdbNextDataBlockWithoutMerge(pQueryHandle)) {
    if (isQueryKilled(GET_QINFO_ADDR(pRuntimeEnv))) {
      freeTableBlockDist(pTableBlockDist);
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
    }
    if (pTableBlockDist->firstSeekTimeUs == 0) {
       pTableBlockDist->firstSeekTimeUs = taosGetTimestampUs() - startTime;
    }   
   
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
    taosArrayPush(pTableBlockDist->dataBlockInfos, &blockInfo); 
  }
  if (terrno != TSDB_CODE_SUCCESS) {
    freeTableBlockDist(pTableBlockDist);
    longjmp(pRuntimeEnv->env, terrno);
  }

  pTableBlockDist->numOfRowsInMemTable = tsdbGetNumOfRowsInMemTable(pQueryHandle); 
  
  generateBlockDistResult(pTableBlockDist); 

  int type = -1;
  assert(pQuery->numOfOutput == 1);
  SExprInfo* pExprInfo = pQuery->pExpr1;
  for (int32_t j = 0; j < pQuery->numOfOutput; j++) {
    if (pExprInfo[j].base.colInfo.colId == TSDB_BLOCK_DIST_COLUMN_INDEX) {
      type = blockDistSchema.type;
    }
    assert(type == TSDB_DATA_TYPE_BINARY);
    STR_TO_VARSTR(pQuery->sdata[j]->data, pTableBlockDist->result); 
  }

  freeTableBlockDist(pTableBlockDist);

  pQuery->rec.rows = 1;
  setQueryStatus(pQuery, QUERY_COMPLETED);
  return;
}
5953

5954
static void stableQueryImpl(SQInfo *pQInfo) {
H
Haojun Liao 已提交
5955 5956
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *pQuery = pRuntimeEnv->pQuery;
5957
  pQuery->rec.rows = 0;
5958

5959
  int64_t st = taosGetTimestampUs();
5960

H
Haojun Liao 已提交
5961
  if (QUERY_IS_INTERVAL_QUERY(pQuery) ||
H
Haojun Liao 已提交
5962
      (isFixedOutputQuery(pRuntimeEnv) && (!isPointInterpoQuery(pQuery)) && (!pRuntimeEnv->groupbyColumn))) {
H
hjxilinx 已提交
5963
    multiTableQueryProcess(pQInfo);
5964
  } else {
H
Haojun Liao 已提交
5965
    assert((pQuery->checkResultBuf == 1 && pQuery->interval.interval == 0) || isPointInterpoQuery(pQuery) ||
H
Haojun Liao 已提交
5966
           pRuntimeEnv->groupbyColumn);
5967

5968
    sequentialTableProcess(pQInfo);
5969
  }
5970

H
hjxilinx 已提交
5971
  // record the total elapsed time
5972
  pQInfo->runtimeEnv.summary.elapsedTime += (taosGetTimestampUs() - st);
H
hjxilinx 已提交
5973 5974
}

5975
static int32_t getColumnIndexInSource(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
5976
  int32_t j = 0;
5977

5978
  if (TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) {
H
Haojun Liao 已提交
5979
    if (pExprMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
5980
      return TSDB_TBNAME_COLUMN_INDEX;
Y
yihaoDeng 已提交
5981 5982
    } else if (pExprMsg->colInfo.colId == TSDB_BLOCK_DIST_COLUMN_INDEX) {
      return TSDB_BLOCK_DIST_COLUMN_INDEX;     
H
Haojun Liao 已提交
5983
    }
Y
yihaoDeng 已提交
5984
    
H
Haojun Liao 已提交
5985

5986 5987 5988 5989
    while(j < pQueryMsg->numOfTags) {
      if (pExprMsg->colInfo.colId == pTagCols[j].colId) {
        return j;
      }
5990

5991 5992
      j += 1;
    }
5993

Y
TD-1230  
yihaoDeng 已提交
5994
  } else if (TSDB_COL_IS_UD_COL(pExprMsg->colInfo.flag)) {  // user specified column data
H
Haojun Liao 已提交
5995
    return TSDB_UD_COLUMN_INDEX;
5996 5997 5998 5999 6000
  } else {
    while (j < pQueryMsg->numOfCols) {
      if (pExprMsg->colInfo.colId == pQueryMsg->colList[j].colId) {
        return j;
      }
6001

6002
      j += 1;
6003 6004
    }
  }
6005 6006

  return INT32_MIN;  // return a less than TSDB_TBNAME_COLUMN_INDEX value
6007 6008
}

6009 6010
bool validateExprColumnInfo(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
  int32_t j = getColumnIndexInSource(pQueryMsg, pExprMsg, pTagCols);
6011
  return j != INT32_MIN;
6012 6013
}

6014
static bool validateQueryMsg(SQueryTableMsg *pQueryMsg) {
6015 6016
  if (pQueryMsg->interval.interval < 0) {
    qError("qmsg:%p illegal value of interval time %" PRId64, pQueryMsg, pQueryMsg->interval.interval);
6017
    return false;
6018 6019
  }

H
hjxilinx 已提交
6020
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
6021
    qError("qmsg:%p illegal value of numOfTables %d", pQueryMsg, pQueryMsg->numOfTables);
6022
    return false;
6023 6024
  }

H
hjxilinx 已提交
6025
  if (pQueryMsg->numOfGroupCols < 0) {
S
slguan 已提交
6026
    qError("qmsg:%p illegal value of numOfGroupbyCols %d", pQueryMsg, pQueryMsg->numOfGroupCols);
6027
    return false;
6028 6029
  }

6030 6031
  if (pQueryMsg->numOfOutput > TSDB_MAX_COLUMNS || pQueryMsg->numOfOutput <= 0) {
    qError("qmsg:%p illegal value of output columns %d", pQueryMsg, pQueryMsg->numOfOutput);
6032
    return false;
6033 6034
  }

6035 6036 6037
  return true;
}

6038
static bool validateQuerySourceCols(SQueryTableMsg *pQueryMsg, SSqlFuncMsg** pExprMsg, SColumnInfo* pTagCols) {
6039 6040 6041 6042
  int32_t numOfTotal = pQueryMsg->numOfCols + pQueryMsg->numOfTags;
  if (pQueryMsg->numOfCols < 0 || pQueryMsg->numOfTags < 0 || numOfTotal > TSDB_MAX_COLUMNS) {
    qError("qmsg:%p illegal value of numOfCols %d numOfTags:%d", pQueryMsg, pQueryMsg->numOfCols, pQueryMsg->numOfTags);
    return false;
6043 6044 6045
  }

  if (numOfTotal == 0) {
6046
    for(int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
H
Haojun Liao 已提交
6047 6048 6049 6050 6051
      SSqlFuncMsg* pFuncMsg = pExprMsg[i];

      if ((pFuncMsg->functionId == TSDB_FUNC_TAGPRJ) ||
          (pFuncMsg->functionId == TSDB_FUNC_TID_TAG && pFuncMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) ||
          (pFuncMsg->functionId == TSDB_FUNC_COUNT && pFuncMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX)) {
6052
        continue;
6053
      }
6054

6055
      return false;
6056 6057
    }
  }
6058

6059 6060 6061 6062 6063 6064
  for(int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
    if (!validateExprColumnInfo(pQueryMsg, pExprMsg[i], pTagCols)) {
      return TSDB_CODE_QRY_INVALID_MSG;
    }
  }

6065
  return true;
6066 6067
}

6068
static char *createTableIdList(SQueryTableMsg *pQueryMsg, char *pMsg, SArray **pTableIdList) {
H
hjxilinx 已提交
6069
  assert(pQueryMsg->numOfTables > 0);
6070

weixin_48148422's avatar
weixin_48148422 已提交
6071
  *pTableIdList = taosArrayInit(pQueryMsg->numOfTables, sizeof(STableIdInfo));
6072

weixin_48148422's avatar
weixin_48148422 已提交
6073 6074
  for (int32_t j = 0; j < pQueryMsg->numOfTables; ++j) {
    STableIdInfo* pTableIdInfo = (STableIdInfo *)pMsg;
6075

6076
    pTableIdInfo->tid = htonl(pTableIdInfo->tid);
H
hjxilinx 已提交
6077 6078
    pTableIdInfo->uid = htobe64(pTableIdInfo->uid);
    pTableIdInfo->key = htobe64(pTableIdInfo->key);
6079

H
hjxilinx 已提交
6080 6081 6082
    taosArrayPush(*pTableIdList, pTableIdInfo);
    pMsg += sizeof(STableIdInfo);
  }
6083

H
hjxilinx 已提交
6084 6085
  return pMsg;
}
6086

6087
/**
H
hjxilinx 已提交
6088
 * pQueryMsg->head has been converted before this function is called.
6089
 *
H
hjxilinx 已提交
6090
 * @param pQueryMsg
6091 6092 6093 6094
 * @param pTableIdList
 * @param pExpr
 * @return
 */
H
Haojun Liao 已提交
6095
static int32_t convertQueryMsg(SQueryTableMsg *pQueryMsg, SArray **pTableIdList, SSqlFuncMsg ***pExpr, SSqlFuncMsg ***pSecStageExpr,
6096
                               char **tagCond, char** tbnameCond, SColIndex **groupbyCols, SColumnInfo** tagCols, char** sql) {
6097 6098
  int32_t code = TSDB_CODE_SUCCESS;

6099 6100 6101 6102
  if (taosCheckVersion(pQueryMsg->version, version, 3) != 0) {
    return TSDB_CODE_QRY_INVALID_MSG;
  }

6103 6104 6105 6106
  pQueryMsg->numOfTables = htonl(pQueryMsg->numOfTables);

  pQueryMsg->window.skey = htobe64(pQueryMsg->window.skey);
  pQueryMsg->window.ekey = htobe64(pQueryMsg->window.ekey);
6107 6108 6109
  pQueryMsg->interval.interval = htobe64(pQueryMsg->interval.interval);
  pQueryMsg->interval.sliding = htobe64(pQueryMsg->interval.sliding);
  pQueryMsg->interval.offset = htobe64(pQueryMsg->interval.offset);
6110 6111
  pQueryMsg->limit = htobe64(pQueryMsg->limit);
  pQueryMsg->offset = htobe64(pQueryMsg->offset);
H
Haojun Liao 已提交
6112
  pQueryMsg->vgroupLimit = htobe64(pQueryMsg->vgroupLimit);
H
hjxilinx 已提交
6113

6114 6115
  pQueryMsg->order = htons(pQueryMsg->order);
  pQueryMsg->orderColId = htons(pQueryMsg->orderColId);
H
Haojun Liao 已提交
6116
  pQueryMsg->queryType = htonl(pQueryMsg->queryType);
weixin_48148422's avatar
weixin_48148422 已提交
6117
  pQueryMsg->tagNameRelType = htons(pQueryMsg->tagNameRelType);
6118 6119

  pQueryMsg->numOfCols = htons(pQueryMsg->numOfCols);
6120
  pQueryMsg->numOfOutput = htons(pQueryMsg->numOfOutput);
H
hjxilinx 已提交
6121
  pQueryMsg->numOfGroupCols = htons(pQueryMsg->numOfGroupCols);
6122 6123 6124
  pQueryMsg->tagCondLen = htons(pQueryMsg->tagCondLen);
  pQueryMsg->tsOffset = htonl(pQueryMsg->tsOffset);
  pQueryMsg->tsLen = htonl(pQueryMsg->tsLen);
H
hjxilinx 已提交
6125
  pQueryMsg->tsNumOfBlocks = htonl(pQueryMsg->tsNumOfBlocks);
6126
  pQueryMsg->tsOrder = htonl(pQueryMsg->tsOrder);
6127
  pQueryMsg->numOfTags = htonl(pQueryMsg->numOfTags);
H
Haojun Liao 已提交
6128
  pQueryMsg->tbnameCondLen = htonl(pQueryMsg->tbnameCondLen);
H
Haojun Liao 已提交
6129
  pQueryMsg->secondStageOutput = htonl(pQueryMsg->secondStageOutput);
6130
  pQueryMsg->sqlstrLen = htonl(pQueryMsg->sqlstrLen);
6131

6132
  // query msg safety check
6133
  if (!validateQueryMsg(pQueryMsg)) {
6134 6135
    code = TSDB_CODE_QRY_INVALID_MSG;
    goto _cleanup;
6136 6137
  }

H
hjxilinx 已提交
6138 6139
  char *pMsg = (char *)(pQueryMsg->colList) + sizeof(SColumnInfo) * pQueryMsg->numOfCols;
  for (int32_t col = 0; col < pQueryMsg->numOfCols; ++col) {
6140 6141
    SColumnInfo *pColInfo = &pQueryMsg->colList[col];

H
hjxilinx 已提交
6142
    pColInfo->colId = htons(pColInfo->colId);
6143
    pColInfo->type = htons(pColInfo->type);
H
hjxilinx 已提交
6144 6145
    pColInfo->bytes = htons(pColInfo->bytes);
    pColInfo->numOfFilters = htons(pColInfo->numOfFilters);
6146

6147 6148 6149 6150 6151
    if (!isValidDataType(pColInfo->type)) {
      qDebug("qmsg:%p, invalid data type in source column, index:%d, type:%d", pQueryMsg, col, pColInfo->type);
      code = TSDB_CODE_QRY_INVALID_MSG;
      goto _cleanup;
    }
6152

H
hjxilinx 已提交
6153
    int32_t numOfFilters = pColInfo->numOfFilters;
6154
    if (numOfFilters > 0) {
H
hjxilinx 已提交
6155
      pColInfo->filters = calloc(numOfFilters, sizeof(SColumnFilterInfo));
H
Haojun Liao 已提交
6156 6157 6158 6159
      if (pColInfo->filters == NULL) {
        code = TSDB_CODE_QRY_OUT_OF_MEMORY;
        goto _cleanup;
      }
6160 6161 6162
    }

    for (int32_t f = 0; f < numOfFilters; ++f) {
6163
      SColumnFilterInfo *pFilterMsg = (SColumnFilterInfo *)pMsg;
6164

6165 6166
      SColumnFilterInfo *pColFilter = &pColInfo->filters[f];
      pColFilter->filterstr = htons(pFilterMsg->filterstr);
6167 6168 6169

      pMsg += sizeof(SColumnFilterInfo);

6170 6171
      if (pColFilter->filterstr) {
        pColFilter->len = htobe64(pFilterMsg->len);
6172

6173
        pColFilter->pz = (int64_t)calloc(1, (size_t)(pColFilter->len + 1 * TSDB_NCHAR_SIZE)); // note: null-terminator
H
Haojun Liao 已提交
6174 6175 6176 6177 6178
        if (pColFilter->pz == 0) {
          code = TSDB_CODE_QRY_OUT_OF_MEMORY;
          goto _cleanup;
        }

6179
        memcpy((void *)pColFilter->pz, pMsg, (size_t)pColFilter->len);
6180
        pMsg += (pColFilter->len + 1);
6181
      } else {
6182 6183
        pColFilter->lowerBndi = htobe64(pFilterMsg->lowerBndi);
        pColFilter->upperBndi = htobe64(pFilterMsg->upperBndi);
6184 6185
      }

6186 6187
      pColFilter->lowerRelOptr = htons(pFilterMsg->lowerRelOptr);
      pColFilter->upperRelOptr = htons(pFilterMsg->upperRelOptr);
6188 6189 6190
    }
  }

6191
  *pExpr = calloc(pQueryMsg->numOfOutput, POINTER_BYTES);
H
Haojun Liao 已提交
6192 6193 6194 6195 6196
  if (*pExpr == NULL) {
    code = TSDB_CODE_QRY_OUT_OF_MEMORY;
    goto _cleanup;
  }

6197
  SSqlFuncMsg *pExprMsg = (SSqlFuncMsg *)pMsg;
6198

6199
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
6200
    (*pExpr)[i] = pExprMsg;
6201

6202
    pExprMsg->colInfo.colIndex = htons(pExprMsg->colInfo.colIndex);
6203
    pExprMsg->colInfo.colId = htons(pExprMsg->colInfo.colId);
H
Haojun Liao 已提交
6204 6205 6206 6207
    pExprMsg->colInfo.flag  = htons(pExprMsg->colInfo.flag);
    pExprMsg->functionId    = htons(pExprMsg->functionId);
    pExprMsg->numOfParams   = htons(pExprMsg->numOfParams);
    pExprMsg->resColId      = htons(pExprMsg->resColId);
6208

6209
    pMsg += sizeof(SSqlFuncMsg);
6210 6211

    for (int32_t j = 0; j < pExprMsg->numOfParams; ++j) {
6212
      pExprMsg->arg[j].argType = htons(pExprMsg->arg[j].argType);
6213 6214 6215 6216
      pExprMsg->arg[j].argBytes = htons(pExprMsg->arg[j].argBytes);

      if (pExprMsg->arg[j].argType == TSDB_DATA_TYPE_BINARY) {
        pExprMsg->arg[j].argValue.pz = pMsg;
6217
        pMsg += pExprMsg->arg[j].argBytes;  // one more for the string terminated char.
6218 6219 6220 6221 6222
      } else {
        pExprMsg->arg[j].argValue.i64 = htobe64(pExprMsg->arg[j].argValue.i64);
      }
    }

H
Haojun Liao 已提交
6223 6224
    int16_t functionId = pExprMsg->functionId;
    if (functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ || functionId == TSDB_FUNC_TAG_DUMMY) {
Y
TD-1230  
yihaoDeng 已提交
6225
      if (!TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) {  // ignore the column  index check for arithmetic expression.
6226 6227
        code = TSDB_CODE_QRY_INVALID_MSG;
        goto _cleanup;
6228 6229 6230
      }
    }

6231
    pExprMsg = (SSqlFuncMsg *)pMsg;
6232
  }
6233

H
Haojun Liao 已提交
6234 6235 6236
  if (pQueryMsg->secondStageOutput) {
    pExprMsg = (SSqlFuncMsg *)pMsg;
    *pSecStageExpr = calloc(pQueryMsg->secondStageOutput, POINTER_BYTES);
6237

H
Haojun Liao 已提交
6238 6239 6240 6241 6242 6243 6244 6245 6246 6247 6248 6249 6250 6251 6252 6253 6254 6255 6256 6257 6258 6259 6260 6261 6262 6263 6264 6265 6266 6267 6268 6269 6270 6271 6272
    for (int32_t i = 0; i < pQueryMsg->secondStageOutput; ++i) {
      (*pSecStageExpr)[i] = pExprMsg;

      pExprMsg->colInfo.colIndex = htons(pExprMsg->colInfo.colIndex);
      pExprMsg->colInfo.colId = htons(pExprMsg->colInfo.colId);
      pExprMsg->colInfo.flag = htons(pExprMsg->colInfo.flag);
      pExprMsg->functionId = htons(pExprMsg->functionId);
      pExprMsg->numOfParams = htons(pExprMsg->numOfParams);

      pMsg += sizeof(SSqlFuncMsg);

      for (int32_t j = 0; j < pExprMsg->numOfParams; ++j) {
        pExprMsg->arg[j].argType = htons(pExprMsg->arg[j].argType);
        pExprMsg->arg[j].argBytes = htons(pExprMsg->arg[j].argBytes);

        if (pExprMsg->arg[j].argType == TSDB_DATA_TYPE_BINARY) {
          pExprMsg->arg[j].argValue.pz = pMsg;
          pMsg += pExprMsg->arg[j].argBytes;  // one more for the string terminated char.
        } else {
          pExprMsg->arg[j].argValue.i64 = htobe64(pExprMsg->arg[j].argValue.i64);
        }
      }

      int16_t functionId = pExprMsg->functionId;
      if (functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ || functionId == TSDB_FUNC_TAG_DUMMY) {
        if (!TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) {  // ignore the column  index check for arithmetic expression.
          code = TSDB_CODE_QRY_INVALID_MSG;
          goto _cleanup;
        }
      }

      pExprMsg = (SSqlFuncMsg *)pMsg;
    }
  }

H
hjxilinx 已提交
6273
  pMsg = createTableIdList(pQueryMsg, pMsg, pTableIdList);
6274

H
hjxilinx 已提交
6275
  if (pQueryMsg->numOfGroupCols > 0) {  // group by tag columns
6276
    *groupbyCols = malloc(pQueryMsg->numOfGroupCols * sizeof(SColIndex));
6277 6278 6279 6280
    if (*groupbyCols == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }
6281 6282

    for (int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
D
fix bug  
dapan1121 已提交
6283
      (*groupbyCols)[i].colId = htons(*(int16_t *)pMsg);
6284
      pMsg += sizeof((*groupbyCols)[i].colId);
6285

D
fix bug  
dapan1121 已提交
6286
      (*groupbyCols)[i].colIndex = htons(*(int16_t *)pMsg);
6287 6288
      pMsg += sizeof((*groupbyCols)[i].colIndex);

D
fix bug  
dapan1121 已提交
6289
      (*groupbyCols)[i].flag = htons(*(int16_t *)pMsg);
6290 6291 6292 6293 6294
      pMsg += sizeof((*groupbyCols)[i].flag);

      memcpy((*groupbyCols)[i].name, pMsg, tListLen(groupbyCols[i]->name));
      pMsg += tListLen((*groupbyCols)[i].name);
    }
6295

H
hjxilinx 已提交
6296 6297
    pQueryMsg->orderByIdx = htons(pQueryMsg->orderByIdx);
    pQueryMsg->orderType = htons(pQueryMsg->orderType);
6298 6299
  }

6300 6301
  pQueryMsg->fillType = htons(pQueryMsg->fillType);
  if (pQueryMsg->fillType != TSDB_FILL_NONE) {
6302
    pQueryMsg->fillVal = (uint64_t)(pMsg);
6303 6304

    int64_t *v = (int64_t *)pMsg;
6305
    for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
6306 6307
      v[i] = htobe64(v[i]);
    }
6308

6309
    pMsg += sizeof(int64_t) * pQueryMsg->numOfOutput;
6310
  }
6311

6312 6313
  if (pQueryMsg->numOfTags > 0) {
    (*tagCols) = calloc(1, sizeof(SColumnInfo) * pQueryMsg->numOfTags);
H
Haojun Liao 已提交
6314 6315 6316 6317 6318
    if (*tagCols == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }

6319 6320
    for (int32_t i = 0; i < pQueryMsg->numOfTags; ++i) {
      SColumnInfo* pTagCol = (SColumnInfo*) pMsg;
6321

6322 6323 6324 6325
      pTagCol->colId = htons(pTagCol->colId);
      pTagCol->bytes = htons(pTagCol->bytes);
      pTagCol->type  = htons(pTagCol->type);
      pTagCol->numOfFilters = 0;
6326

6327
      (*tagCols)[i] = *pTagCol;
6328
      pMsg += sizeof(SColumnInfo);
6329
    }
H
hjxilinx 已提交
6330
  }
6331

6332 6333 6334
  // the tag query condition expression string is located at the end of query msg
  if (pQueryMsg->tagCondLen > 0) {
    *tagCond = calloc(1, pQueryMsg->tagCondLen);
H
Haojun Liao 已提交
6335 6336 6337 6338 6339 6340

    if (*tagCond == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;

    }
6341 6342 6343
    memcpy(*tagCond, pMsg, pQueryMsg->tagCondLen);
    pMsg += pQueryMsg->tagCondLen;
  }
6344

H
Haojun Liao 已提交
6345 6346
  if (pQueryMsg->tbnameCondLen > 0) {
    *tbnameCond = calloc(1, pQueryMsg->tbnameCondLen + 1);
6347 6348 6349 6350 6351
    if (*tbnameCond == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }

H
Haojun Liao 已提交
6352 6353
    strncpy(*tbnameCond, pMsg, pQueryMsg->tbnameCondLen);
    pMsg += pQueryMsg->tbnameCondLen;
6354 6355 6356 6357 6358 6359 6360
  }

  *sql = strndup(pMsg, pQueryMsg->sqlstrLen);

  if (!validateQuerySourceCols(pQueryMsg, *pExpr, *tagCols)) {
    code = TSDB_CODE_QRY_INVALID_MSG;
    goto _cleanup;
weixin_48148422's avatar
weixin_48148422 已提交
6361
  }
6362

6363
  qDebug("qmsg:%p query %d tables, type:%d, qrange:%" PRId64 "-%" PRId64 ", numOfGroupbyTagCols:%d, order:%d, "
H
Haojun Liao 已提交
6364 6365
         "outputCols:%d, numOfCols:%d, interval:%" PRId64 ", fillType:%d, comptsLen:%d, compNumOfBlocks:%d, limit:%" PRId64 ", offset:%" PRId64,
         pQueryMsg, pQueryMsg->numOfTables, pQueryMsg->queryType, pQueryMsg->window.skey, pQueryMsg->window.ekey, pQueryMsg->numOfGroupCols,
6366
         pQueryMsg->order, pQueryMsg->numOfOutput, pQueryMsg->numOfCols, pQueryMsg->interval.interval,
H
Haojun Liao 已提交
6367
         pQueryMsg->fillType, pQueryMsg->tsLen, pQueryMsg->tsNumOfBlocks, pQueryMsg->limit, pQueryMsg->offset);
6368

6369
  qDebug("qmsg:%p, sql:%s", pQueryMsg, *sql);
6370
  return TSDB_CODE_SUCCESS;
dengyihao's avatar
dengyihao 已提交
6371 6372

_cleanup:
S
TD-1848  
Shengliang Guan 已提交
6373
  tfree(*pExpr);
dengyihao's avatar
dengyihao 已提交
6374 6375
  taosArrayDestroy(*pTableIdList);
  *pTableIdList = NULL;
S
TD-1848  
Shengliang Guan 已提交
6376 6377 6378 6379
  tfree(*tbnameCond);
  tfree(*groupbyCols);
  tfree(*tagCols);
  tfree(*tagCond);
6380
  tfree(*sql);
6381 6382

  return code;
6383 6384
}

H
Haojun Liao 已提交
6385 6386
static int32_t buildArithmeticExprFromMsg(SExprInfo *pArithExprInfo, SQueryTableMsg *pQueryMsg) {
  qDebug("qmsg:%p create arithmetic expr from binary", pQueryMsg);
weixin_48148422's avatar
weixin_48148422 已提交
6387 6388

  tExprNode* pExprNode = NULL;
dengyihao's avatar
dengyihao 已提交
6389
  TRY(TSDB_MAX_TAG_CONDITIONS) {
weixin_48148422's avatar
weixin_48148422 已提交
6390 6391 6392
    pExprNode = exprTreeFromBinary(pArithExprInfo->base.arg[0].argValue.pz, pArithExprInfo->base.arg[0].argBytes);
  } CATCH( code ) {
    CLEANUP_EXECUTE();
dengyihao's avatar
TD-816  
dengyihao 已提交
6393
    qError("qmsg:%p failed to create arithmetic expression string from:%s, reason: %s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz, tstrerror(code));
weixin_48148422's avatar
weixin_48148422 已提交
6394 6395 6396
    return code;
  } END_TRY

H
hjxilinx 已提交
6397
  if (pExprNode == NULL) {
6398
    qError("qmsg:%p failed to create arithmetic expression string from:%s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz);
6399
    return TSDB_CODE_QRY_APP_ERROR;
6400
  }
6401

6402
  pArithExprInfo->pExpr = pExprNode;
6403 6404 6405
  return TSDB_CODE_SUCCESS;
}

H
Haojun Liao 已提交
6406
static int32_t createQueryFuncExprFromMsg(SQueryTableMsg *pQueryMsg, int32_t numOfOutput, SExprInfo **pExprInfo, SSqlFuncMsg **pExprMsg,
6407 6408
    SColumnInfo* pTagCols) {
  *pExprInfo = NULL;
H
hjxilinx 已提交
6409
  int32_t code = TSDB_CODE_SUCCESS;
6410

H
Haojun Liao 已提交
6411
  SExprInfo *pExprs = (SExprInfo *)calloc(pQueryMsg->numOfOutput, sizeof(SExprInfo));
6412
  if (pExprs == NULL) {
6413
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
6414 6415 6416 6417 6418
  }

  bool    isSuperTable = QUERY_IS_STABLE_QUERY(pQueryMsg->queryType);
  int16_t tagLen = 0;

H
Haojun Liao 已提交
6419
  for (int32_t i = 0; i < numOfOutput; ++i) {
6420
    pExprs[i].base = *pExprMsg[i];
6421
    pExprs[i].bytes = 0;
6422 6423 6424 6425

    int16_t type = 0;
    int16_t bytes = 0;

6426
    // parse the arithmetic expression
6427
    if (pExprs[i].base.functionId == TSDB_FUNC_ARITHM) {
H
Haojun Liao 已提交
6428
      code = buildArithmeticExprFromMsg(&pExprs[i], pQueryMsg);
6429

6430
      if (code != TSDB_CODE_SUCCESS) {
S
TD-1848  
Shengliang Guan 已提交
6431
        tfree(pExprs);
6432
        return code;
6433 6434
      }

6435
      type  = TSDB_DATA_TYPE_DOUBLE;
H
Haojun Liao 已提交
6436
      bytes = tDataTypes[type].bytes;
H
Haojun Liao 已提交
6437
    } else if (pExprs[i].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX && pExprs[i].base.functionId == TSDB_FUNC_TAGPRJ) {  // parse the normal column
H
Haojun Liao 已提交
6438
      SSchema s = tGetTableNameColumnSchema();
H
Haojun Liao 已提交
6439
      type = s.type;
H
Haojun Liao 已提交
6440
      bytes = s.bytes;
Y
yihaoDeng 已提交
6441 6442 6443 6444
    } else if (pExprs[i].base.colInfo.colId == TSDB_BLOCK_DIST_COLUMN_INDEX) {
      SSchema s = tGetBlockDistColumnSchema(); 
      type = s.type;
      bytes = s.bytes;
6445 6446
    } else if (pExprs[i].base.colInfo.colId <= TSDB_UD_COLUMN_INDEX) {
      // it is a user-defined constant value column
H
Haojun Liao 已提交
6447 6448
      assert(pExprs[i].base.functionId == TSDB_FUNC_PRJ);

6449 6450
      type = pExprs[i].base.arg[1].argType;
      bytes = pExprs[i].base.arg[1].argBytes;
H
Haojun Liao 已提交
6451 6452 6453 6454 6455

      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        bytes += VARSTR_HEADER_SIZE;
      }
    } else {
6456
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
6457
      if (TSDB_COL_IS_TAG(pExprs[i].base.colInfo.flag)) {
Y
yihaoDeng 已提交
6458
        if (j < TSDB_BLOCK_DIST_COLUMN_INDEX || j >= pQueryMsg->numOfTags) {
6459 6460 6461 6462 6463 6464 6465
          return TSDB_CODE_QRY_INVALID_MSG;
        }
      } else {
        if (j < PRIMARYKEY_TIMESTAMP_COL_INDEX || j >= pQueryMsg->numOfCols) {
          return TSDB_CODE_QRY_INVALID_MSG;
        }
      }
H
Haojun Liao 已提交
6466

dengyihao's avatar
dengyihao 已提交
6467
      if (pExprs[i].base.colInfo.colId != TSDB_TBNAME_COLUMN_INDEX && j >= 0) {
H
Haojun Liao 已提交
6468 6469 6470 6471
        SColumnInfo* pCol = (TSDB_COL_IS_TAG(pExprs[i].base.colInfo.flag))? &pTagCols[j]:&pQueryMsg->colList[j];
        type = pCol->type;
        bytes = pCol->bytes;
      } else {
H
Haojun Liao 已提交
6472
        SSchema s = tGetTableNameColumnSchema();
H
hjxilinx 已提交
6473

H
Haojun Liao 已提交
6474 6475 6476
        type  = s.type;
        bytes = s.bytes;
      }
6477 6478
    }

S
TD-1057  
Shengliang Guan 已提交
6479
    int32_t param = (int32_t)pExprs[i].base.arg[0].argValue.i64;
6480
    if (getResultDataInfo(type, bytes, pExprs[i].base.functionId, param, &pExprs[i].type, &pExprs[i].bytes,
6481
                          &pExprs[i].interBytes, 0, isSuperTable) != TSDB_CODE_SUCCESS) {
S
TD-1848  
Shengliang Guan 已提交
6482
      tfree(pExprs);
6483
      return TSDB_CODE_QRY_INVALID_MSG;
6484 6485
    }

6486
    if (pExprs[i].base.functionId == TSDB_FUNC_TAG_DUMMY || pExprs[i].base.functionId == TSDB_FUNC_TS_DUMMY) {
6487
      tagLen += pExprs[i].bytes;
6488
    }
6489

6490
    assert(isValidDataType(pExprs[i].type));
6491 6492 6493
  }

  // TODO refactor
H
Haojun Liao 已提交
6494
  for (int32_t i = 0; i < numOfOutput; ++i) {
6495 6496
    pExprs[i].base = *pExprMsg[i];
    int16_t functId = pExprs[i].base.functionId;
6497

6498
    if (functId == TSDB_FUNC_TOP || functId == TSDB_FUNC_BOTTOM) {
6499
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
6500 6501 6502 6503 6504 6505 6506 6507 6508
      if (j < 0 || j >= pQueryMsg->numOfCols) {
        assert(0);
      } else {
        SColumnInfo *pCol = &pQueryMsg->colList[j];
        int32_t ret =
            getResultDataInfo(pCol->type, pCol->bytes, functId, (int32_t)pExprs[i].base.arg[0].argValue.i64,
                              &pExprs[i].type, &pExprs[i].bytes, &pExprs[i].interBytes, tagLen, isSuperTable);
        assert(ret == TSDB_CODE_SUCCESS);
      }
6509 6510 6511
    }
  }

6512
  *pExprInfo = pExprs;
6513 6514 6515
  return TSDB_CODE_SUCCESS;
}

6516
static SSqlGroupbyExpr *createGroupbyExprFromMsg(SQueryTableMsg *pQueryMsg, SColIndex *pColIndex, int32_t *code) {
6517 6518 6519 6520 6521
  if (pQueryMsg->numOfGroupCols == 0) {
    return NULL;
  }

  // using group by tag columns
6522
  SSqlGroupbyExpr *pGroupbyExpr = (SSqlGroupbyExpr *)calloc(1, sizeof(SSqlGroupbyExpr));
6523
  if (pGroupbyExpr == NULL) {
6524
    *code = TSDB_CODE_QRY_OUT_OF_MEMORY;
6525 6526 6527 6528 6529 6530 6531
    return NULL;
  }

  pGroupbyExpr->numOfGroupCols = pQueryMsg->numOfGroupCols;
  pGroupbyExpr->orderType = pQueryMsg->orderType;
  pGroupbyExpr->orderIndex = pQueryMsg->orderByIdx;

6532 6533 6534 6535
  pGroupbyExpr->columnInfo = taosArrayInit(pQueryMsg->numOfGroupCols, sizeof(SColIndex));
  for(int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
    taosArrayPush(pGroupbyExpr->columnInfo, &pColIndex[i]);
  }
6536

6537 6538 6539
  return pGroupbyExpr;
}

6540
static int32_t createFilterInfo(void *pQInfo, SQuery *pQuery) {
6541
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
6542
    if (pQuery->colList[i].numOfFilters > 0) {
6543 6544 6545 6546 6547 6548 6549 6550 6551
      pQuery->numOfFilterCols++;
    }
  }

  if (pQuery->numOfFilterCols == 0) {
    return TSDB_CODE_SUCCESS;
  }

  pQuery->pFilterInfo = calloc(1, sizeof(SSingleColumnFilterInfo) * pQuery->numOfFilterCols);
H
Haojun Liao 已提交
6552 6553 6554
  if (pQuery->pFilterInfo == NULL) {
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
  }
6555 6556

  for (int32_t i = 0, j = 0; i < pQuery->numOfCols; ++i) {
6557
    if (pQuery->colList[i].numOfFilters > 0) {
6558 6559
      SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[j];

B
Bomin Zhang 已提交
6560
      memcpy(&pFilterInfo->info, &pQuery->colList[i], sizeof(SColumnInfo));
6561
      pFilterInfo->info = pQuery->colList[i];
6562

6563
      pFilterInfo->numOfFilters = pQuery->colList[i].numOfFilters;
6564
      pFilterInfo->pFilters = calloc(pFilterInfo->numOfFilters, sizeof(SColumnFilterElem));
H
Haojun Liao 已提交
6565 6566 6567
      if (pFilterInfo->pFilters == NULL) {
        return TSDB_CODE_QRY_OUT_OF_MEMORY;
      }
6568 6569 6570

      for (int32_t f = 0; f < pFilterInfo->numOfFilters; ++f) {
        SColumnFilterElem *pSingleColFilter = &pFilterInfo->pFilters[f];
6571
        pSingleColFilter->filterInfo = pQuery->colList[i].filters[f];
6572 6573 6574 6575

        int32_t lower = pSingleColFilter->filterInfo.lowerRelOptr;
        int32_t upper = pSingleColFilter->filterInfo.upperRelOptr;
        if (lower == TSDB_RELATION_INVALID && upper == TSDB_RELATION_INVALID) {
S
slguan 已提交
6576
          qError("QInfo:%p invalid filter info", pQInfo);
6577
          return TSDB_CODE_QRY_INVALID_MSG;
6578 6579
        }

6580 6581 6582
        pSingleColFilter->fp = getFilterOperator(lower, upper);
        if (pSingleColFilter->fp == NULL) {
          qError("QInfo:%p invalid filter info", pQInfo);
6583
          return TSDB_CODE_QRY_INVALID_MSG;
6584 6585
        }

6586
        pSingleColFilter->bytes = pQuery->colList[i].bytes;
6587 6588 6589 6590 6591 6592 6593 6594 6595
      }

      j++;
    }
  }

  return TSDB_CODE_SUCCESS;
}

6596
static void doUpdateExprColumnIndex(SQuery *pQuery) {
H
Haojun Liao 已提交
6597
  assert(pQuery->pExpr1 != NULL && pQuery != NULL);
6598

6599
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
Haojun Liao 已提交
6600
    SSqlFuncMsg *pSqlExprMsg = &pQuery->pExpr1[k].base;
6601
    if (pSqlExprMsg->functionId == TSDB_FUNC_ARITHM) {
6602 6603
      continue;
    }
6604

6605
    // todo opt performance
H
Haojun Liao 已提交
6606
    SColIndex *pColIndex = &pSqlExprMsg->colInfo;
H
Haojun Liao 已提交
6607
    if (TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
6608 6609
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfCols; ++f) {
H
Haojun Liao 已提交
6610 6611
        if (pColIndex->colId == pQuery->colList[f].colId) {
          pColIndex->colIndex = f;
6612 6613 6614
          break;
        }
      }
H
Haojun Liao 已提交
6615 6616

      assert(f < pQuery->numOfCols);
6617 6618
    } else if (pColIndex->colId <= TSDB_UD_COLUMN_INDEX) {
      // do nothing for user-defined constant value result columns
6619
    } else {
6620 6621
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfTags; ++f) {
H
Haojun Liao 已提交
6622 6623
        if (pColIndex->colId == pQuery->tagColList[f].colId) {
          pColIndex->colIndex = f;
6624 6625
          break;
        }
6626
      }
6627

Y
yihaoDeng 已提交
6628
      assert(f < pQuery->numOfTags || pColIndex->colId == TSDB_TBNAME_COLUMN_INDEX || pColIndex->colId == TSDB_BLOCK_DIST_COLUMN_INDEX);
6629 6630 6631 6632
    }
  }
}

dengyihao's avatar
dengyihao 已提交
6633 6634
static void freeQInfo(SQInfo *pQInfo);

H
Haojun Liao 已提交
6635 6636 6637
static void calResultBufSize(SQuery* pQuery) {
  const int32_t RESULT_MSG_MIN_SIZE  = 1024 * (1024 + 512);  // bytes
  const int32_t RESULT_MSG_MIN_ROWS  = 8192;
S
TD-1057  
Shengliang Guan 已提交
6638
  const float RESULT_THRESHOLD_RATIO = 0.85f;
H
Haojun Liao 已提交
6639

6640 6641 6642 6643 6644
  if (isProjQuery(pQuery)) {
    int32_t numOfRes = RESULT_MSG_MIN_SIZE / pQuery->rowSize;
    if (numOfRes < RESULT_MSG_MIN_ROWS) {
      numOfRes = RESULT_MSG_MIN_ROWS;
    }
H
Haojun Liao 已提交
6645

6646
    pQuery->rec.capacity  = numOfRes;
S
TD-1057  
Shengliang Guan 已提交
6647
    pQuery->rec.threshold = (int32_t)(numOfRes * RESULT_THRESHOLD_RATIO);
6648 6649
  } else {  // in case of non-prj query, a smaller output buffer will be used.
    pQuery->rec.capacity = 4096;
S
TD-1057  
Shengliang Guan 已提交
6650
    pQuery->rec.threshold = (int32_t)(pQuery->rec.capacity * RESULT_THRESHOLD_RATIO);
6651
  }
H
Haojun Liao 已提交
6652 6653
}

6654
static SQInfo *createQInfoImpl(SQueryTableMsg *pQueryMsg, SSqlGroupbyExpr *pGroupbyExpr, SExprInfo *pExprs,
6655
                               SExprInfo *pSecExprs, STableGroupInfo *pTableGroupInfo, SColumnInfo* pTagCols, bool stableQuery, char* sql) {
B
Bomin Zhang 已提交
6656 6657 6658
  int16_t numOfCols = pQueryMsg->numOfCols;
  int16_t numOfOutput = pQueryMsg->numOfOutput;

6659 6660
  SQInfo *pQInfo = (SQInfo *)calloc(1, sizeof(SQInfo));
  if (pQInfo == NULL) {
B
Bomin Zhang 已提交
6661
    goto _cleanup_qinfo;
6662
  }
6663

B
Bomin Zhang 已提交
6664 6665 6666
  // to make sure third party won't overwrite this structure
  pQInfo->signature = pQInfo;
  pQInfo->tableGroupInfo = *pTableGroupInfo;
6667 6668

  SQuery *pQuery = calloc(1, sizeof(SQuery));
B
Bomin Zhang 已提交
6669 6670 6671
  if (pQuery == NULL) {
    goto _cleanup_query;
  }
H
Haojun Liao 已提交
6672

6673 6674
  pQInfo->runtimeEnv.pQuery = pQuery;

6675
  pQuery->numOfCols       = numOfCols;
H
hjxilinx 已提交
6676
  pQuery->numOfOutput     = numOfOutput;
6677 6678 6679
  pQuery->limit.limit     = pQueryMsg->limit;
  pQuery->limit.offset    = pQueryMsg->offset;
  pQuery->order.order     = pQueryMsg->order;
6680
  pQuery->order.orderColId = pQueryMsg->orderColId;
H
Haojun Liao 已提交
6681
  pQuery->pExpr1          = pExprs;
H
Haojun Liao 已提交
6682 6683
  pQuery->pExpr2          = pSecExprs;
  pQuery->numOfExpr2      = pQueryMsg->secondStageOutput;
6684
  pQuery->pGroupbyExpr    = pGroupbyExpr;
6685
  memcpy(&pQuery->interval, &pQueryMsg->interval, sizeof(pQuery->interval));
6686
  pQuery->fillType        = pQueryMsg->fillType;
6687
  pQuery->numOfTags       = pQueryMsg->numOfTags;
B
Bomin Zhang 已提交
6688
  pQuery->tagColList      = pTagCols;
H
Haojun Liao 已提交
6689 6690
  pQuery->prjInfo.vgroupLimit = pQueryMsg->vgroupLimit;
  pQuery->prjInfo.ts      = (pQueryMsg->order == TSDB_ORDER_ASC)? INT64_MIN:INT64_MAX;
H
Haojun Liao 已提交
6691

6692
  pQuery->colList = calloc(numOfCols, sizeof(SSingleColumnFilterInfo));
6693
  if (pQuery->colList == NULL) {
6694
    goto _cleanup;
6695
  }
6696

6697
  int32_t srcSize = 0;
H
hjxilinx 已提交
6698
  for (int16_t i = 0; i < numOfCols; ++i) {
6699
    pQuery->colList[i] = pQueryMsg->colList[i];
6700
    pQuery->colList[i].filters = tscFilterInfoClone(pQueryMsg->colList[i].filters, pQuery->colList[i].numOfFilters);
6701
    srcSize += pQuery->colList[i].bytes;
H
hjxilinx 已提交
6702
  }
6703

6704
  // calculate the result row size
6705 6706 6707
  for (int16_t col = 0; col < numOfOutput; ++col) {
    assert(pExprs[col].bytes > 0);
    pQuery->rowSize += pExprs[col].bytes;
6708
  }
6709

6710
  doUpdateExprColumnIndex(pQuery);
6711

6712
  int32_t ret = createFilterInfo(pQInfo, pQuery);
6713
  if (ret != TSDB_CODE_SUCCESS) {
6714
    goto _cleanup;
6715 6716 6717
  }

  // prepare the result buffer
6718
  pQuery->sdata = (tFilePage **)calloc(pQuery->numOfOutput, POINTER_BYTES);
6719
  if (pQuery->sdata == NULL) {
6720
    goto _cleanup;
6721 6722
  }

H
Haojun Liao 已提交
6723
  calResultBufSize(pQuery);
6724

6725
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
6726
    // allocate additional memory for interResults that are usually larger then final results
H
Haojun Liao 已提交
6727
    // TODO refactor
H
Haojun Liao 已提交
6728 6729 6730 6731 6732 6733 6734 6735
    int16_t bytes = 0;
    if (pQuery->pExpr2 == NULL || col > pQuery->numOfExpr2) {
      bytes = pExprs[col].bytes;
    } else {
      bytes = MAX(pQuery->pExpr2[col].bytes, pExprs[col].bytes);
    }

    size_t size = (size_t)((pQuery->rec.capacity + 1) * bytes + pExprs[col].interBytes + sizeof(tFilePage));
6736
    pQuery->sdata[col] = (tFilePage *)calloc(1, size);
6737
    if (pQuery->sdata[col] == NULL) {
6738
      goto _cleanup;
6739 6740 6741
    }
  }

6742
  if (pQuery->fillType != TSDB_FILL_NONE) {
6743 6744
    pQuery->fillVal = malloc(sizeof(int64_t) * pQuery->numOfOutput);
    if (pQuery->fillVal == NULL) {
6745
      goto _cleanup;
6746 6747 6748
    }

    // the first column is the timestamp
6749
    memcpy(pQuery->fillVal, (char *)pQueryMsg->fillVal, pQuery->numOfOutput * sizeof(int64_t));
6750 6751
  }

dengyihao's avatar
dengyihao 已提交
6752 6753 6754 6755 6756 6757
  size_t numOfGroups = 0;
  if (pTableGroupInfo->pGroupList != NULL) {
    numOfGroups = taosArrayGetSize(pTableGroupInfo->pGroupList);

    pQInfo->tableqinfoGroupInfo.pGroupList = taosArrayInit(numOfGroups, POINTER_BYTES);
    pQInfo->tableqinfoGroupInfo.numOfTables = pTableGroupInfo->numOfTables;
H
Haojun Liao 已提交
6758
    pQInfo->tableqinfoGroupInfo.map = taosHashInit(pTableGroupInfo->numOfTables,
H
Haojun Liao 已提交
6759
                                                   taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_NO_LOCK);
H
Haojun Liao 已提交
6760
  }
6761

H
Haojun Liao 已提交
6762
  pQInfo->runtimeEnv.interBufSize = getOutputInterResultBufSize(pQuery);
H
Haojun Liao 已提交
6763 6764
  pQInfo->runtimeEnv.summary.tableInfoSize += (pTableGroupInfo->numOfTables * sizeof(STableQueryInfo));

H
Haojun Liao 已提交
6765
  pQInfo->runtimeEnv.pResultRowHashTable = taosHashInit(pTableGroupInfo->numOfTables, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_NO_LOCK);
H
Haojun Liao 已提交
6766
  pQInfo->runtimeEnv.keyBuf = malloc(TSDB_MAX_BYTES_PER_ROW);
H
Haojun Liao 已提交
6767
  pQInfo->runtimeEnv.pool = initResultRowPool(getResultRowSize(&pQInfo->runtimeEnv));
6768 6769 6770 6771 6772 6773 6774 6775
  pQInfo->runtimeEnv.prevRow = malloc(POINTER_BYTES * pQuery->numOfCols + srcSize);

  char* start = POINTER_BYTES * pQuery->numOfCols + (char*) pQInfo->runtimeEnv.prevRow;
  pQInfo->runtimeEnv.prevRow[0] = start;

  for(int32_t i = 1; i < pQuery->numOfCols; ++i) {
    pQInfo->runtimeEnv.prevRow[i] = pQInfo->runtimeEnv.prevRow[i - 1] + pQuery->colList[i-1].bytes;
  }
H
Haojun Liao 已提交
6776

H
Haojun Liao 已提交
6777
  pQInfo->pBuf = calloc(pTableGroupInfo->numOfTables, sizeof(STableQueryInfo));
H
Haojun Liao 已提交
6778 6779 6780 6781
  if (pQInfo->pBuf == NULL) {
    goto _cleanup;
  }

H
Haojun Liao 已提交
6782
  // NOTE: pTableCheckInfo need to update the query time range and the lastKey info
6783
  pQInfo->arrTableIdInfo = taosHashInit(pTableGroupInfo->numOfTables, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false, HASH_NO_LOCK);
6784
  pQInfo->dataReady = QUERY_RESULT_NOT_READY;
H
Haojun Liao 已提交
6785
  pQInfo->rspContext = NULL;
6786
  pQInfo->sql = sql;
6787
  pthread_mutex_init(&pQInfo->lock, NULL);
H
Haojun Liao 已提交
6788
  tsem_init(&pQInfo->ready, 0, 0);
6789 6790 6791 6792 6793

  pQuery->pos = -1;
  pQuery->window = pQueryMsg->window;
  changeExecuteScanOrder(pQInfo, pQueryMsg, stableQuery);

6794
  pQInfo->runtimeEnv.queryWindowIdentical = true;
6795
  STimeWindow window = pQuery->window;
H
Haojun Liao 已提交
6796

H
Haojun Liao 已提交
6797
  int32_t index = 0;
H
hjxilinx 已提交
6798
  for(int32_t i = 0; i < numOfGroups; ++i) {
6799
    SArray* pa = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, i);
6800

H
Haojun Liao 已提交
6801
    size_t s = taosArrayGetSize(pa);
6802
    SArray* p1 = taosArrayInit(s, POINTER_BYTES);
B
Bomin Zhang 已提交
6803 6804 6805
    if (p1 == NULL) {
      goto _cleanup;
    }
H
Haojun Liao 已提交
6806

Y
yihaoDeng 已提交
6807
    taosArrayPush(pQInfo->tableqinfoGroupInfo.pGroupList, &p1);
6808

H
hjxilinx 已提交
6809
    for(int32_t j = 0; j < s; ++j) {
H
Haojun Liao 已提交
6810
      STableKeyInfo* info = taosArrayGet(pa, j);
6811

6812
      window.skey = info->lastKey;
6813 6814 6815 6816 6817
      if (info->lastKey != pQuery->window.skey) {
        pQInfo->runtimeEnv.queryWindowIdentical = false;
      }

      void* buf = (char*) pQInfo->pBuf + index * sizeof(STableQueryInfo);
H
Haojun Liao 已提交
6818
      STableQueryInfo* item = createTableQueryInfo(&pQInfo->runtimeEnv, info->pTable, window, buf);
B
Bomin Zhang 已提交
6819 6820 6821
      if (item == NULL) {
        goto _cleanup;
      }
H
Haojun Liao 已提交
6822

6823
      item->groupIndex = i;
H
hjxilinx 已提交
6824
      taosArrayPush(p1, &item);
6825 6826

      STableId* id = TSDB_TABLEID(info->pTable);
H
Haojun Liao 已提交
6827 6828
      taosHashPut(pQInfo->tableqinfoGroupInfo.map, &id->tid, sizeof(id->tid), &item, POINTER_BYTES);
      index += 1;
H
hjxilinx 已提交
6829 6830
    }
  }
6831
  colIdCheck(pQuery);
6832

Y
yihaoDeng 已提交
6833 6834
  pQInfo->runtimeEnv.queryBlockDist = (numOfOutput == 1 && pExprs[0].base.colInfo.colId == TSDB_BLOCK_DIST_COLUMN_INDEX);
   
6835
  qDebug("qmsg:%p QInfo:%p created", pQueryMsg, pQInfo);
6836 6837
  return pQInfo;

B
Bomin Zhang 已提交
6838
_cleanup_qinfo:
H
Haojun Liao 已提交
6839
  tsdbDestroyTableGroup(pTableGroupInfo);
B
Bomin Zhang 已提交
6840 6841

_cleanup_query:
6842 6843 6844 6845
  if (pGroupbyExpr != NULL) {
    taosArrayDestroy(pGroupbyExpr->columnInfo);
    free(pGroupbyExpr);
  }
H
Haojun Liao 已提交
6846

S
TD-1848  
Shengliang Guan 已提交
6847
  tfree(pTagCols);
B
Bomin Zhang 已提交
6848 6849 6850 6851 6852 6853
  for (int32_t i = 0; i < numOfOutput; ++i) {
    SExprInfo* pExprInfo = &pExprs[i];
    if (pExprInfo->pExpr != NULL) {
      tExprTreeDestroy(&pExprInfo->pExpr, NULL);
    }
  }
H
Haojun Liao 已提交
6854

S
TD-1848  
Shengliang Guan 已提交
6855
  tfree(pExprs);
B
Bomin Zhang 已提交
6856

6857
_cleanup:
dengyihao's avatar
dengyihao 已提交
6858
  freeQInfo(pQInfo);
6859 6860 6861
  return NULL;
}

H
hjxilinx 已提交
6862
static bool isValidQInfo(void *param) {
H
hjxilinx 已提交
6863 6864 6865 6866
  SQInfo *pQInfo = (SQInfo *)param;
  if (pQInfo == NULL) {
    return false;
  }
6867

H
hjxilinx 已提交
6868 6869 6870 6871
  /*
   * pQInfo->signature may be changed by another thread, so we assign value of signature
   * into local variable, then compare by using local variable
   */
6872
  uint64_t sig = (uint64_t)pQInfo->signature;
H
hjxilinx 已提交
6873 6874 6875
  return (sig == (uint64_t)pQInfo);
}

6876
static int32_t initQInfo(SQueryTableMsg *pQueryMsg, void *tsdb, int32_t vgId, SQInfo *pQInfo, bool isSTable) {
H
hjxilinx 已提交
6877
  int32_t code = TSDB_CODE_SUCCESS;
6878
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
6879

H
hjxilinx 已提交
6880
  STSBuf *pTSBuf = NULL;
6881
  if (pQueryMsg->tsLen > 0) { // open new file to save the result
H
Haojun Liao 已提交
6882
    char *tsBlock = (char *) pQueryMsg + pQueryMsg->tsOffset;
H
Haojun Liao 已提交
6883
    pTSBuf = tsBufCreateFromCompBlocks(tsBlock, pQueryMsg->tsNumOfBlocks, pQueryMsg->tsLen, pQueryMsg->tsOrder, vgId);
6884

H
hjxilinx 已提交
6885
    tsBufResetPos(pTSBuf);
dengyihao's avatar
dengyihao 已提交
6886
    bool ret = tsBufNextPos(pTSBuf);
6887

dengyihao's avatar
dengyihao 已提交
6888
    UNUSED(ret);
H
hjxilinx 已提交
6889
  }
6890

Y
TD-1665  
yihaoDeng 已提交
6891
  pQuery->precision = tsdbGetCfg(tsdb)->precision;
6892

6893 6894
  if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey > pQuery->window.ekey)) ||
      (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey > pQuery->window.skey))) {
6895
    qDebug("QInfo:%p no result in time range %" PRId64 "-%" PRId64 ", order %d", pQInfo, pQuery->window.skey,
6896
           pQuery->window.ekey, pQuery->order.order);
6897
    setQueryStatus(pQuery, QUERY_COMPLETED);
B
Bomin Zhang 已提交
6898
    pQInfo->tableqinfoGroupInfo.numOfTables = 0;
6899 6900
    return TSDB_CODE_SUCCESS;
  }
6901

6902
  if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
6903
    qDebug("QInfo:%p no table qualified for tag filter, abort query", pQInfo);
6904 6905 6906
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return TSDB_CODE_SUCCESS;
  }
H
hjxilinx 已提交
6907 6908

  // filter the qualified
6909
  if ((code = doInitQInfo(pQInfo, pTSBuf, tsdb, vgId, isSTable)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6910 6911
    goto _error;
  }
6912

H
hjxilinx 已提交
6913 6914 6915 6916
  return code;

_error:
  // table query ref will be decrease during error handling
6917
  freeQInfo(pQInfo);
H
hjxilinx 已提交
6918 6919 6920
  return code;
}

B
Bomin Zhang 已提交
6921
static void freeColumnFilterInfo(SColumnFilterInfo* pFilter, int32_t numOfFilters) {
H
Haojun Liao 已提交
6922
    if (pFilter == NULL || numOfFilters == 0) {
B
Bomin Zhang 已提交
6923 6924
      return;
    }
H
Haojun Liao 已提交
6925

B
Bomin Zhang 已提交
6926 6927 6928 6929 6930
    for (int32_t i = 0; i < numOfFilters; i++) {
      if (pFilter[i].filterstr) {
        free((void*)(pFilter[i].pz));
      }
    }
H
Haojun Liao 已提交
6931

B
Bomin Zhang 已提交
6932 6933 6934
    free(pFilter);
}

H
Haojun Liao 已提交
6935 6936
static void doDestroyTableQueryInfo(STableGroupInfo* pTableqinfoGroupInfo) {
  if (pTableqinfoGroupInfo->pGroupList != NULL) {
H
Haojun Liao 已提交
6937
    int32_t numOfGroups = (int32_t) taosArrayGetSize(pTableqinfoGroupInfo->pGroupList);
H
Haojun Liao 已提交
6938 6939 6940 6941 6942 6943 6944 6945 6946 6947 6948 6949 6950 6951 6952 6953 6954 6955 6956 6957 6958
    for (int32_t i = 0; i < numOfGroups; ++i) {
      SArray *p = taosArrayGetP(pTableqinfoGroupInfo->pGroupList, i);

      size_t num = taosArrayGetSize(p);
      for(int32_t j = 0; j < num; ++j) {
        STableQueryInfo* item = taosArrayGetP(p, j);
        destroyTableQueryInfoImpl(item);
      }

      taosArrayDestroy(p);
    }
  }

  taosArrayDestroy(pTableqinfoGroupInfo->pGroupList);
  taosHashCleanup(pTableqinfoGroupInfo->map);

  pTableqinfoGroupInfo->pGroupList = NULL;
  pTableqinfoGroupInfo->map = NULL;
  pTableqinfoGroupInfo->numOfTables = 0;
}

H
Haojun Liao 已提交
6959 6960 6961 6962 6963 6964 6965 6966 6967 6968 6969 6970 6971 6972 6973 6974
static void* destroyQueryFuncExpr(SExprInfo* pExprInfo, int32_t numOfExpr) {
  if (pExprInfo == NULL) {
    assert(numOfExpr == 0);
    return NULL;
  }

  for (int32_t i = 0; i < numOfExpr; ++i) {
    if (pExprInfo[i].pExpr != NULL) {
      tExprNodeDestroy(pExprInfo[i].pExpr, NULL);
    }
  }

  tfree(pExprInfo);
  return NULL;
}

H
hjxilinx 已提交
6975 6976 6977 6978
static void freeQInfo(SQInfo *pQInfo) {
  if (!isValidQInfo(pQInfo)) {
    return;
  }
6979

6980
  qDebug("QInfo:%p start to free QInfo", pQInfo);
6981

H
Haojun Liao 已提交
6982
  releaseQueryBuf(pQInfo->tableqinfoGroupInfo.numOfTables);
H
Haojun Liao 已提交
6983

6984
  teardownQueryRuntimeEnv(&pQInfo->runtimeEnv);
6985

H
Haojun Liao 已提交
6986 6987 6988 6989
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  if (pQuery != NULL) {
    if (pQuery->sdata != NULL) {
      for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
S
TD-1848  
Shengliang Guan 已提交
6990
        tfree(pQuery->sdata[col]);
H
Haojun Liao 已提交
6991
      }
S
TD-1848  
Shengliang Guan 已提交
6992
      tfree(pQuery->sdata);
H
hjxilinx 已提交
6993
    }
6994

H
Haojun Liao 已提交
6995
    if (pQuery->fillVal != NULL) {
S
TD-1848  
Shengliang Guan 已提交
6996
      tfree(pQuery->fillVal);
H
Haojun Liao 已提交
6997
    }
6998

H
Haojun Liao 已提交
6999 7000 7001
    for (int32_t i = 0; i < pQuery->numOfFilterCols; ++i) {
      SSingleColumnFilterInfo *pColFilter = &pQuery->pFilterInfo[i];
      if (pColFilter->numOfFilters > 0) {
S
TD-1848  
Shengliang Guan 已提交
7002
        tfree(pColFilter->pFilters);
H
hjxilinx 已提交
7003
      }
H
hjxilinx 已提交
7004
    }
7005

H
Haojun Liao 已提交
7006 7007
    pQuery->pExpr1 = destroyQueryFuncExpr(pQuery->pExpr1, pQuery->numOfOutput);
    pQuery->pExpr2 = destroyQueryFuncExpr(pQuery->pExpr2, pQuery->numOfExpr2);
7008

S
TD-1848  
Shengliang Guan 已提交
7009 7010
    tfree(pQuery->tagColList);
    tfree(pQuery->pFilterInfo);
H
Haojun Liao 已提交
7011 7012 7013 7014 7015 7016

    if (pQuery->colList != NULL) {
      for (int32_t i = 0; i < pQuery->numOfCols; i++) {
        SColumnInfo *column = pQuery->colList + i;
        freeColumnFilterInfo(column->filters, column->numOfFilters);
      }
S
TD-1848  
Shengliang Guan 已提交
7017
      tfree(pQuery->colList);
H
Haojun Liao 已提交
7018 7019
    }

H
Haojun Liao 已提交
7020 7021 7022 7023 7024
    if (pQuery->pGroupbyExpr != NULL) {
      taosArrayDestroy(pQuery->pGroupbyExpr->columnInfo);
      tfree(pQuery->pGroupbyExpr);
    }

S
TD-1848  
Shengliang Guan 已提交
7025
    tfree(pQuery);
H
hjxilinx 已提交
7026
  }
7027

H
Haojun Liao 已提交
7028
  doDestroyTableQueryInfo(&pQInfo->tableqinfoGroupInfo);
H
Haojun Liao 已提交
7029

S
TD-1848  
Shengliang Guan 已提交
7030
  tfree(pQInfo->pBuf);
7031 7032
  tfree(pQInfo->sql);

H
Haojun Liao 已提交
7033
  tsdbDestroyTableGroup(&pQInfo->tableGroupInfo);
7034
  taosHashCleanup(pQInfo->arrTableIdInfo);
7035

7036 7037
  taosArrayDestroy(pQInfo->groupResInfo.pRows);

7038
  pQInfo->signature = 0;
7039

7040
  qDebug("QInfo:%p QInfo is freed", pQInfo);
7041

S
TD-1848  
Shengliang Guan 已提交
7042
  tfree(pQInfo);
H
hjxilinx 已提交
7043 7044
}

H
hjxilinx 已提交
7045
static size_t getResultSize(SQInfo *pQInfo, int64_t *numOfRows) {
7046 7047
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
hjxilinx 已提交
7048 7049 7050 7051 7052 7053
  /*
   * get the file size and set the numOfRows to be the file size, since for tsComp query,
   * the returned row size is equalled to 1
   * TODO handle the case that the file is too large to send back one time
   */
  if (isTSCompQuery(pQuery) && (*numOfRows) > 0) {
D
fix bug  
dapan1121 已提交
7054
    struct stat fStat;
7055 7056
    FILE *f = *(FILE **)pQuery->sdata[0]->data;
    if ((f != NULL) && (fstat(fileno(f), &fStat) == 0)) {
D
fix bug  
dapan1121 已提交
7057 7058
      *numOfRows = fStat.st_size;
      return fStat.st_size;
H
hjxilinx 已提交
7059
    } else {
7060
      qError("QInfo:%p failed to get file info, file:%p, reason:%s", pQInfo, f, strerror(errno));
H
hjxilinx 已提交
7061 7062 7063
      return 0;
    }
  } else {
7064
    return (size_t)(pQuery->rowSize * (*numOfRows));
7065
  }
H
hjxilinx 已提交
7066
}
7067

H
hjxilinx 已提交
7068 7069 7070
static int32_t doDumpQueryResult(SQInfo *pQInfo, char *data) {
  // the remained number of retrieved rows, not the interpolated result
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
7071

H
hjxilinx 已提交
7072 7073
  // load data from file to msg buffer
  if (isTSCompQuery(pQuery)) {
D
fix bug  
dapan1121 已提交
7074

7075
    FILE *f = *(FILE **)pQuery->sdata[0]->data;  // TODO refactor
7076

H
hjxilinx 已提交
7077
    // make sure file exist
D
fix bug  
dapan1121 已提交
7078 7079
    if (f) {
      off_t s = lseek(fileno(f), 0, SEEK_END);
7080

D
fix bug  
dapan1121 已提交
7081 7082 7083
      qDebug("QInfo:%p ts comp data return, file:%p, size:%"PRId64, pQInfo, f, s);
      if (fseek(f, 0, SEEK_SET) >= 0) {
        size_t sz = fread(data, 1, s, f);
7084 7085 7086
        if(sz < s) {  // todo handle error
          assert(0);
        }
H
Haojun Liao 已提交
7087
      } else {
7088
        UNUSED(s);
dengyihao's avatar
dengyihao 已提交
7089
      }
H
Haojun Liao 已提交
7090

D
fix bug  
dapan1121 已提交
7091 7092
      fclose(f);
      *(FILE **)pQuery->sdata[0]->data = NULL;
H
hjxilinx 已提交
7093
    }
7094

H
hjxilinx 已提交
7095 7096 7097 7098
    // all data returned, set query over
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
      setQueryStatus(pQuery, QUERY_OVER);
    }
H
hjxilinx 已提交
7099
  } else {
S
TD-1057  
Shengliang Guan 已提交
7100
    doCopyQueryResultToMsg(pQInfo, (int32_t)pQuery->rec.rows, data);
7101
  }
7102

7103
  pQuery->rec.total += pQuery->rec.rows;
7104
  qDebug("QInfo:%p current numOfRes rows:%" PRId64 ", total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total);
7105

7106
  if (pQuery->limit.limit > 0 && pQuery->limit.limit == pQuery->rec.total) {
7107
    qDebug("QInfo:%p results limitation reached, limitation:%"PRId64, pQInfo, pQuery->limit.limit);
7108 7109
    setQueryStatus(pQuery, QUERY_OVER);
  }
7110

H
hjxilinx 已提交
7111
  return TSDB_CODE_SUCCESS;
7112 7113
}

7114 7115 7116 7117 7118 7119 7120
typedef struct SQueryMgmt {
  SCacheObj      *qinfoPool;      // query handle pool
  int32_t         vgId;
  bool            closed;
  pthread_mutex_t lock;
} SQueryMgmt;

7121
int32_t qCreateQueryInfo(void* tsdb, int32_t vgId, SQueryTableMsg* pQueryMsg, qinfo_t* pQInfo) {
7122
  assert(pQueryMsg != NULL && tsdb != NULL);
7123 7124

  int32_t code = TSDB_CODE_SUCCESS;
7125

7126
  char            *sql          = NULL;
H
Haojun Liao 已提交
7127 7128
  char            *tagCond      = NULL;
  char            *tbnameCond   = NULL;
7129
  SArray          *pTableIdList = NULL;
H
Haojun Liao 已提交
7130 7131 7132 7133 7134
  SSqlFuncMsg    **pExprMsg     = NULL;
  SSqlFuncMsg    **pSecExprMsg  = NULL;
  SExprInfo       *pExprs       = NULL;
  SExprInfo       *pSecExprs    = NULL;

7135 7136 7137
  SColIndex       *pGroupColIndex = NULL;
  SColumnInfo     *pTagColumnInfo = NULL;
  SSqlGroupbyExpr *pGroupbyExpr   = NULL;
7138

7139
  code = convertQueryMsg(pQueryMsg, &pTableIdList, &pExprMsg, &pSecExprMsg, &tagCond, &tbnameCond, &pGroupColIndex, &pTagColumnInfo, &sql);
7140
  if (code != TSDB_CODE_SUCCESS) {
B
Bomin Zhang 已提交
7141
    goto _over;
7142 7143
  }

H
hjxilinx 已提交
7144
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
7145
    qError("Invalid number of tables to query, numOfTables:%d", pQueryMsg->numOfTables);
7146
    code = TSDB_CODE_QRY_INVALID_MSG;
H
hjxilinx 已提交
7147
    goto _over;
7148 7149
  }

H
hjxilinx 已提交
7150
  if (pTableIdList == NULL || taosArrayGetSize(pTableIdList) == 0) {
S
slguan 已提交
7151
    qError("qmsg:%p, SQueryTableMsg wrong format", pQueryMsg);
7152
    code = TSDB_CODE_QRY_INVALID_MSG;
H
hjxilinx 已提交
7153
    goto _over;
7154 7155
  }

H
Haojun Liao 已提交
7156
  if ((code = createQueryFuncExprFromMsg(pQueryMsg, pQueryMsg->numOfOutput, &pExprs, pExprMsg, pTagColumnInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
7157
    goto _over;
7158 7159
  }

H
Haojun Liao 已提交
7160
  if (pSecExprMsg != NULL) {
H
Haojun Liao 已提交
7161
    if ((code = createQueryFuncExprFromMsg(pQueryMsg, pQueryMsg->secondStageOutput, &pSecExprs, pSecExprMsg, pTagColumnInfo)) != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
7162 7163 7164 7165
      goto _over;
    }
  }

dengyihao's avatar
dengyihao 已提交
7166
  pGroupbyExpr = createGroupbyExprFromMsg(pQueryMsg, pGroupColIndex, &code);
H
hjxilinx 已提交
7167
  if ((pGroupbyExpr == NULL && pQueryMsg->numOfGroupCols != 0) || code != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
7168
    goto _over;
7169
  }
7170

H
hjxilinx 已提交
7171
  bool isSTableQuery = false;
7172
  STableGroupInfo tableGroupInfo = {0};
7173 7174
  int64_t st = taosGetTimestampUs();

H
Haojun Liao 已提交
7175
  if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_TABLE_QUERY)) {
weixin_48148422's avatar
weixin_48148422 已提交
7176
    STableIdInfo *id = taosArrayGet(pTableIdList, 0);
H
Haojun Liao 已提交
7177

7178
    qDebug("qmsg:%p query normal table, uid:%"PRId64", tid:%d", pQueryMsg, id->uid, id->tid);
H
Haojun Liao 已提交
7179
    if ((code = tsdbGetOneTableGroup(tsdb, id->uid, pQueryMsg->window.skey, &tableGroupInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
7180
      goto _over;
7181
    }
H
Haojun Liao 已提交
7182
  } else if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY|TSDB_QUERY_TYPE_STABLE_QUERY)) {
7183
    isSTableQuery = true;
H
Haojun Liao 已提交
7184 7185 7186

    // also note there's possibility that only one table in the super table
    if (!TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY)) {
weixin_48148422's avatar
weixin_48148422 已提交
7187 7188 7189 7190 7191 7192 7193
      STableIdInfo *id = taosArrayGet(pTableIdList, 0);

      // group by normal column, do not pass the group by condition to tsdb to group table into different group
      int32_t numOfGroupByCols = pQueryMsg->numOfGroupCols;
      if (pQueryMsg->numOfGroupCols == 1 && !TSDB_COL_IS_TAG(pGroupColIndex->flag)) {
        numOfGroupByCols = 0;
      }
7194 7195

      qDebug("qmsg:%p query stable, uid:%"PRId64", tid:%d", pQueryMsg, id->uid, id->tid);
H
Haojun Liao 已提交
7196 7197 7198
      code = tsdbQuerySTableByTagCond(tsdb, id->uid, pQueryMsg->window.skey, tagCond, pQueryMsg->tagCondLen,
          pQueryMsg->tagNameRelType, tbnameCond, &tableGroupInfo, pGroupColIndex, numOfGroupByCols);

7199
      if (code != TSDB_CODE_SUCCESS) {
7200
        qError("qmsg:%p failed to query stable, reason: %s", pQueryMsg, tstrerror(code));
7201 7202
        goto _over;
      }
weixin_48148422's avatar
weixin_48148422 已提交
7203
    } else {
7204 7205 7206 7207
      code = tsdbGetTableGroupFromIdList(tsdb, pTableIdList, &tableGroupInfo);
      if (code != TSDB_CODE_SUCCESS) {
        goto _over;
      }
H
Haojun Liao 已提交
7208

S
TD-1057  
Shengliang Guan 已提交
7209
      qDebug("qmsg:%p query on %" PRIzu " tables in one group from client", pQueryMsg, tableGroupInfo.numOfTables);
7210
    }
7211 7212

    int64_t el = taosGetTimestampUs() - st;
S
TD-1057  
Shengliang Guan 已提交
7213
    qDebug("qmsg:%p tag filter completed, numOfTables:%" PRIzu ", elapsed time:%"PRId64"us", pQueryMsg, tableGroupInfo.numOfTables, el);
H
hjxilinx 已提交
7214
  } else {
7215
    assert(0);
7216
  }
7217

H
Haojun Liao 已提交
7218 7219 7220 7221 7222
  code = checkForQueryBuf(tableGroupInfo.numOfTables);
  if (code != TSDB_CODE_SUCCESS) {  // not enough query buffer, abort
    goto _over;
  }

7223
  (*pQInfo) = createQInfoImpl(pQueryMsg, pGroupbyExpr, pExprs, pSecExprs, &tableGroupInfo, pTagColumnInfo, isSTableQuery, sql);
H
Haojun Liao 已提交
7224

7225
  sql    = NULL;
dengyihao's avatar
dengyihao 已提交
7226
  pExprs = NULL;
H
Haojun Liao 已提交
7227
  pSecExprs = NULL;
dengyihao's avatar
dengyihao 已提交
7228 7229
  pGroupbyExpr = NULL;
  pTagColumnInfo = NULL;
7230

7231
  if ((*pQInfo) == NULL) {
7232
    code = TSDB_CODE_QRY_OUT_OF_MEMORY;
H
hjxilinx 已提交
7233
    goto _over;
7234
  }
7235

7236
  code = initQInfo(pQueryMsg, tsdb, vgId, *pQInfo, isSTableQuery);
7237

H
hjxilinx 已提交
7238
_over:
dengyihao's avatar
dengyihao 已提交
7239 7240 7241
  free(tagCond);
  free(tbnameCond);
  free(pGroupColIndex);
H
Haojun Liao 已提交
7242

dengyihao's avatar
dengyihao 已提交
7243 7244
  if (pGroupbyExpr != NULL) {
    taosArrayDestroy(pGroupbyExpr->columnInfo);
dengyihao's avatar
dengyihao 已提交
7245
    free(pGroupbyExpr);
7246
  }
H
Haojun Liao 已提交
7247

dengyihao's avatar
dengyihao 已提交
7248
  free(pTagColumnInfo);
7249
  free(sql);
dengyihao's avatar
dengyihao 已提交
7250
  free(pExprs);
H
Haojun Liao 已提交
7251 7252
  free(pSecExprs);

dengyihao's avatar
dengyihao 已提交
7253
  free(pExprMsg);
H
Haojun Liao 已提交
7254 7255
  free(pSecExprMsg);

H
hjxilinx 已提交
7256
  taosArrayDestroy(pTableIdList);
7257

B
Bomin Zhang 已提交
7258 7259 7260 7261 7262
  for (int32_t i = 0; i < pQueryMsg->numOfCols; i++) {
    SColumnInfo* column = pQueryMsg->colList + i;
    freeColumnFilterInfo(column->filters, column->numOfFilters);
  }

H
Haojun Liao 已提交
7263
  //pQInfo already freed in initQInfo, but *pQInfo may not pointer to null;
7264 7265 7266 7267
  if (code != TSDB_CODE_SUCCESS) {
    *pQInfo = NULL;
  }

7268
  // if failed to add ref for all tables in this query, abort current query
7269
  return code;
H
hjxilinx 已提交
7270 7271
}

H
Haojun Liao 已提交
7272
void qDestroyQueryInfo(qinfo_t qHandle) {
H
Haojun Liao 已提交
7273 7274 7275 7276 7277
  SQInfo* pQInfo = (SQInfo*) qHandle;
  if (!isValidQInfo(pQInfo)) {
    return;
  }

H
Haojun Liao 已提交
7278 7279 7280
  qDebug("QInfo:%p query completed", pQInfo);
  queryCostStatis(pQInfo);   // print the query cost summary
  freeQInfo(pQInfo);
H
Haojun Liao 已提交
7281 7282
}

7283 7284 7285 7286 7287 7288
static bool doBuildResCheck(SQInfo* pQInfo) {
  bool buildRes = false;

  pthread_mutex_lock(&pQInfo->lock);

  pQInfo->dataReady = QUERY_RESULT_READY;
7289
  buildRes = needBuildResAfterQueryComplete(pQInfo);
7290

H
Haojun Liao 已提交
7291 7292
  // clear qhandle owner, it must be in the secure area. other thread may run ahead before current, after it is
  // put into task to be executed.
S
TD-2616  
Shengliang Guan 已提交
7293
  assert(pQInfo->owner == taosGetSelfPthreadId());
7294
  pQInfo->owner = 0;
H
Haojun Liao 已提交
7295

H
Haojun Liao 已提交
7296
  pthread_mutex_unlock(&pQInfo->lock);
H
Haojun Liao 已提交
7297

7298
  // used in retrieve blocking model.
H
Haojun Liao 已提交
7299
  tsem_post(&pQInfo->ready);
7300 7301 7302
  return buildRes;
}

7303
bool qTableQuery(qinfo_t qinfo) {
7304
  SQInfo *pQInfo = (SQInfo *)qinfo;
H
Haojun Liao 已提交
7305
  assert(pQInfo && pQInfo->signature == pQInfo);
S
TD-2616  
Shengliang Guan 已提交
7306
  int64_t threadId = taosGetSelfPthreadId();
7307

7308 7309 7310 7311
  int64_t curOwner = 0;
  if ((curOwner = atomic_val_compare_exchange_64(&pQInfo->owner, 0, threadId)) != 0) {
    qError("QInfo:%p qhandle is now executed by thread:%p", pQInfo, (void*) curOwner);
    pQInfo->code = TSDB_CODE_QRY_IN_EXEC;
7312
    return false;
H
hjxilinx 已提交
7313
  }
7314

7315 7316 7317
  pQInfo->startExecTs = taosGetTimestampSec();

  if (isQueryKilled(pQInfo)) {
7318
    qDebug("QInfo:%p it is already killed, abort", pQInfo);
7319
    return doBuildResCheck(pQInfo);
H
hjxilinx 已提交
7320
  }
7321

7322 7323
  if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
    qDebug("QInfo:%p no table exists for query, abort", pQInfo);
7324 7325
    setQueryStatus(pQInfo->runtimeEnv.pQuery, QUERY_COMPLETED);
    return doBuildResCheck(pQInfo);
7326 7327 7328
  }

  // error occurs, record the error code and return to client
H
Haojun Liao 已提交
7329
  int32_t ret = setjmp(pQInfo->runtimeEnv.env);
7330 7331
  if (ret != TSDB_CODE_SUCCESS) {
    pQInfo->code = ret;
H
Haojun Liao 已提交
7332
    qDebug("QInfo:%p query abort due to error/cancel occurs, code:%s", pQInfo, tstrerror(pQInfo->code));
7333
    return doBuildResCheck(pQInfo);
7334 7335
  }

7336
  qDebug("QInfo:%p query task is launched", pQInfo);
7337

7338
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
7339
  if (onlyQueryTags(pQInfo->runtimeEnv.pQuery)) {
H
Haojun Liao 已提交
7340
    assert(pQInfo->runtimeEnv.pQueryHandle == NULL);
7341
    buildTagQueryResult(pQInfo);
H
hjxilinx 已提交
7342
  } else if (pQInfo->runtimeEnv.stableQuery) {
7343
    stableQueryImpl(pQInfo);
Y
yihaoDeng 已提交
7344 7345
  } else if (pQInfo->runtimeEnv.queryBlockDist){
    buildTableBlockDistResult(pQInfo);  
H
hjxilinx 已提交
7346
  } else {
7347
    tableQueryImpl(pQInfo);
H
hjxilinx 已提交
7348
  }
7349

7350
  SQuery* pQuery = pRuntimeEnv->pQuery;
7351
  if (isQueryKilled(pQInfo)) {
7352 7353
    qDebug("QInfo:%p query is killed", pQInfo);
  } else if (pQuery->rec.rows == 0) {
S
TD-1057  
Shengliang Guan 已提交
7354
    qDebug("QInfo:%p over, %" PRIzu " tables queried, %"PRId64" rows are returned", pQInfo, pQInfo->tableqinfoGroupInfo.numOfTables, pQuery->rec.total);
7355 7356 7357 7358 7359
  } else {
    qDebug("QInfo:%p query paused, %" PRId64 " rows returned, numOfTotal:%" PRId64 " rows",
           pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
  }

7360
  return doBuildResCheck(pQInfo);
H
hjxilinx 已提交
7361 7362
}

7363
int32_t qRetrieveQueryResultInfo(qinfo_t qinfo, bool* buildRes, void* pRspContext) {
7364 7365
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
7366
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
H
Haojun Liao 已提交
7367
    qError("QInfo:%p invalid qhandle", pQInfo);
7368
    return TSDB_CODE_QRY_INVALID_QHANDLE;
H
hjxilinx 已提交
7369
  }
7370

7371
  *buildRes = false;
H
Haojun Liao 已提交
7372
  if (IS_QUERY_KILLED(pQInfo)) {
B
Bomin Zhang 已提交
7373
    qDebug("QInfo:%p query is killed, code:0x%08x", pQInfo, pQInfo->code);
H
hjxilinx 已提交
7374
    return pQInfo->code;
H
hjxilinx 已提交
7375
  }
7376

7377
  int32_t code = TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
7378

H
Haojun Liao 已提交
7379
  if (tsRetrieveBlockingModel) {
7380
    pQInfo->rspContext = pRspContext;
H
Haojun Liao 已提交
7381
    tsem_wait(&pQInfo->ready);
7382
    *buildRes = true;
H
Haojun Liao 已提交
7383
    code = pQInfo->code;
7384
  } else {
H
Haojun Liao 已提交
7385
    SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
7386

H
Haojun Liao 已提交
7387 7388
    pthread_mutex_lock(&pQInfo->lock);

7389
    assert(pQInfo->rspContext == NULL);
H
Haojun Liao 已提交
7390 7391
    if (pQInfo->dataReady == QUERY_RESULT_READY) {
      *buildRes = true;
7392 7393
      qDebug("QInfo:%p retrieve result info, rowsize:%d, rows:%" PRId64 ", code:%s", pQInfo, pQuery->rowSize,
             pQuery->rec.rows, tstrerror(pQInfo->code));
H
Haojun Liao 已提交
7394 7395 7396 7397 7398 7399 7400 7401 7402 7403
    } else {
      *buildRes = false;
      qDebug("QInfo:%p retrieve req set query return result after paused", pQInfo);
      pQInfo->rspContext = pRspContext;
      assert(pQInfo->rspContext != NULL);
    }

    code = pQInfo->code;
    pthread_mutex_unlock(&pQInfo->lock);
  }
H
Haojun Liao 已提交
7404

7405
  return code;
H
hjxilinx 已提交
7406
}
7407

7408
int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *contLen, bool* continueExec) {
7409 7410
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
7411
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
7412
    return TSDB_CODE_QRY_INVALID_QHANDLE;
7413
  }
7414

7415
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
7416 7417
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  size_t  size = getResultSize(pQInfo, &pQuery->rec.rows);
7418

weixin_48148422's avatar
weixin_48148422 已提交
7419
  size += sizeof(int32_t);
7420
  size += sizeof(STableIdInfo) * taosHashGetSize(pQInfo->arrTableIdInfo);
7421

S
TD-1057  
Shengliang Guan 已提交
7422
  *contLen = (int32_t)(size + sizeof(SRetrieveTableRsp));
7423

B
Bomin Zhang 已提交
7424 7425
  // todo proper handle failed to allocate memory,
  // current solution only avoid crash, but cannot return error code to client
7426
  *pRsp = (SRetrieveTableRsp *)rpcMallocCont(*contLen);
B
Bomin Zhang 已提交
7427 7428 7429
  if (*pRsp == NULL) {
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
  }
7430

S
TD-1057  
Shengliang Guan 已提交
7431
  (*pRsp)->numOfRows = htonl((int32_t)pQuery->rec.rows);
7432

H
Haojun Liao 已提交
7433
  if (pQInfo->code == TSDB_CODE_SUCCESS) {
7434
    (*pRsp)->offset   = htobe64(pQuery->limit.offset);
7435
    (*pRsp)->useconds = htobe64(pRuntimeEnv->summary.elapsedTime);
7436
  } else {
7437 7438
    (*pRsp)->offset   = 0;
    (*pRsp)->useconds = htobe64(pRuntimeEnv->summary.elapsedTime);
7439
  }
7440

7441
  (*pRsp)->precision = htons(pQuery->precision);
H
Haojun Liao 已提交
7442 7443
  if (pQuery->rec.rows > 0 && pQInfo->code == TSDB_CODE_SUCCESS) {
    doDumpQueryResult(pQInfo, (*pRsp)->data);
7444
  } else {
H
hjxilinx 已提交
7445
    setQueryStatus(pQuery, QUERY_OVER);
7446
  }
7447

7448
  pQInfo->rspContext = NULL;
7449
  pQInfo->dataReady  = QUERY_RESULT_NOT_READY;
7450

H
Haojun Liao 已提交
7451
  if (IS_QUERY_KILLED(pQInfo) || Q_STATUS_EQUAL(pQuery->status, QUERY_OVER)) {
H
Haojun Liao 已提交
7452
    // here current thread hold the refcount, so it is safe to free tsdbQueryHandle.
7453
    *continueExec = false;
7454
    (*pRsp)->completed = 1;  // notify no more result to client
7455
  } else {
7456
    *continueExec = true;
7457
    qDebug("QInfo:%p has more results to retrieve", pQInfo);
7458 7459
  }

H
Haojun Liao 已提交
7460
  return pQInfo->code;
7461
}
H
hjxilinx 已提交
7462

7463 7464 7465 7466 7467 7468 7469 7470
int32_t qQueryCompleted(qinfo_t qinfo) {
  SQInfo *pQInfo = (SQInfo *)qinfo;

  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
    return TSDB_CODE_QRY_INVALID_QHANDLE;
  }

  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
7471
  return isQueryKilled(pQInfo) || Q_STATUS_EQUAL(pQuery->status, QUERY_OVER);
7472 7473
}

H
Haojun Liao 已提交
7474
int32_t qKillQuery(qinfo_t qinfo) {
H
Haojun Liao 已提交
7475 7476 7477 7478 7479 7480 7481
  SQInfo *pQInfo = (SQInfo *)qinfo;

  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
    return TSDB_CODE_QRY_INVALID_QHANDLE;
  }

  setQueryKilled(pQInfo);
7482 7483 7484

  // Wait for the query executing thread being stopped/
  // Once the query is stopped, the owner of qHandle will be cleared immediately.
7485
  while (pQInfo->owner != 0) {
7486 7487 7488
    taosMsleep(100);
  }

H
Haojun Liao 已提交
7489 7490 7491
  return TSDB_CODE_SUCCESS;
}

7492 7493 7494 7495 7496 7497 7498 7499 7500 7501 7502 7503 7504 7505 7506 7507
static void doSetTagValueToResultBuf(char* output, const char* val, int16_t type, int16_t bytes) {
  if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
    if (val == NULL) {
      setVardataNull(output, type);
    } else {
      memcpy(output, val, varDataTLen(val));
    }
  } else {
    if (val == NULL) {
      setNull(output, type, bytes);
    } else {  // todo here stop will cause client crash
      memcpy(output, val, bytes);
    }
  }
}

H
hjxilinx 已提交
7508 7509 7510
static void buildTagQueryResult(SQInfo* pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
7511

H
Haojun Liao 已提交
7512
  size_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
H
Haojun Liao 已提交
7513
  assert(numOfGroup == 0 || numOfGroup == 1);
7514

H
Haojun Liao 已提交
7515
  if (numOfGroup == 0) {
7516 7517
    return;
  }
7518

H
Haojun Liao 已提交
7519
  SArray* pa = GET_TABLEGROUP(pQInfo, 0);
7520

H
Haojun Liao 已提交
7521
  size_t num = taosArrayGetSize(pa);
7522
  assert(num == pQInfo->tableqinfoGroupInfo.numOfTables);
7523

H
Haojun Liao 已提交
7524
  int32_t count = 0;
H
Haojun Liao 已提交
7525
  int32_t functionId = pQuery->pExpr1[0].base.functionId;
7526 7527
  if (functionId == TSDB_FUNC_TID_TAG) { // return the tags & table Id
    assert(pQuery->numOfOutput == 1);
7528

H
Haojun Liao 已提交
7529
    SExprInfo* pExprInfo = &pQuery->pExpr1[0];
7530
    int32_t rsize = pExprInfo->bytes;
H
Haojun Liao 已提交
7531
    count = 0;
7532

H
Haojun Liao 已提交
7533 7534 7535 7536 7537 7538 7539 7540 7541 7542 7543
    int16_t bytes = pExprInfo->bytes;
    int16_t type = pExprInfo->type;

    for(int32_t i = 0; i < pQuery->numOfTags; ++i) {
      if (pQuery->tagColList[i].colId == pExprInfo->base.colInfo.colId) {
        bytes = pQuery->tagColList[i].bytes;
        type = pQuery->tagColList[i].type;
        break;
      }
    }

H
Haojun Liao 已提交
7544 7545
    while(pQInfo->tableIndex < num && count < pQuery->rec.capacity) {
      int32_t i = pQInfo->tableIndex++;
7546
      STableQueryInfo *item = taosArrayGetP(pa, i);
7547

7548
      char *output = pQuery->sdata[0]->data + count * rsize;
7549
      varDataSetLen(output, rsize - VARSTR_HEADER_SIZE);
7550

7551
      output = varDataVal(output);
H
Haojun Liao 已提交
7552
      STableId* id = TSDB_TABLEID(item->pTable);
7553

7554 7555 7556
      *(int16_t *)output = 0;
      output += sizeof(int16_t);

H
Haojun Liao 已提交
7557 7558
      *(int64_t *)output = id->uid;  // memory align problem, todo serialize
      output += sizeof(id->uid);
7559

H
Haojun Liao 已提交
7560 7561
      *(int32_t *)output = id->tid;
      output += sizeof(id->tid);
7562

7563
      *(int32_t *)output = pQInfo->vgId;
7564
      output += sizeof(pQInfo->vgId);
7565

7566
      if (pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
7567
        char* data = tsdbGetTableName(item->pTable);
7568
        memcpy(output, data, varDataTLen(data));
H
[td-90]  
Haojun Liao 已提交
7569
      } else {
7570 7571
        char* data = tsdbGetTableTagVal(item->pTable, pExprInfo->base.colInfo.colId, type, bytes);
        doSetTagValueToResultBuf(output, data, type, bytes);
H
[td-90]  
Haojun Liao 已提交
7572
      }
7573

H
Haojun Liao 已提交
7574
      count += 1;
7575
    }
7576

7577
    qDebug("QInfo:%p create (tableId, tag) info completed, rows:%d", pQInfo, count);
7578

H
Haojun Liao 已提交
7579 7580 7581 7582
  } else if (functionId == TSDB_FUNC_COUNT) {// handle the "count(tbname)" query
    *(int64_t*) pQuery->sdata[0]->data = num;

    count = 1;
H
Haojun Liao 已提交
7583
    SET_STABLE_QUERY_OVER(pQInfo);
7584
    qDebug("QInfo:%p create count(tbname) query, res:%d rows:1", pQInfo, count);
7585
  } else {  // return only the tags|table name etc.
H
Haojun Liao 已提交
7586
    count = 0;
H
Haojun Liao 已提交
7587
    SSchema tbnameSchema = tGetTableNameColumnSchema();
7588

S
TD-1057  
Shengliang Guan 已提交
7589
    int32_t maxNumOfTables = (int32_t)pQuery->rec.capacity;
7590
    if (pQuery->limit.limit >= 0 && pQuery->limit.limit < pQuery->rec.capacity) {
S
TD-1057  
Shengliang Guan 已提交
7591
      maxNumOfTables = (int32_t)pQuery->limit.limit;
7592 7593
    }

7594
    while(pQInfo->tableIndex < num && count < maxNumOfTables) {
H
Haojun Liao 已提交
7595
      int32_t i = pQInfo->tableIndex++;
7596

7597 7598 7599 7600 7601 7602
      // discard current result due to offset
      if (pQuery->limit.offset > 0) {
        pQuery->limit.offset -= 1;
        continue;
      }

H
Haojun Liao 已提交
7603
      SExprInfo* pExprInfo = pQuery->pExpr1;
7604
      STableQueryInfo* item = taosArrayGetP(pa, i);
7605

7606 7607
      char *data = NULL, *dst = NULL;
      int16_t type = 0, bytes = 0;
7608
      for(int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
Haojun Liao 已提交
7609
        // not assign value in case of user defined constant output column
Y
TD-1230  
yihaoDeng 已提交
7610
        if (TSDB_COL_IS_UD_COL(pExprInfo[j].base.colInfo.flag)) {
H
Haojun Liao 已提交
7611 7612
          continue;
        }
7613

7614
        if (pExprInfo[j].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
7615 7616 7617 7618 7619 7620 7621 7622
          bytes = tbnameSchema.bytes;
          type = tbnameSchema.type;

          data = tsdbGetTableName(item->pTable);
          dst = pQuery->sdata[j]->data + count * tbnameSchema.bytes;
        } else {
          type = pExprInfo[j].type;
          bytes = pExprInfo[j].bytes;
7623

7624 7625
          data = tsdbGetTableTagVal(item->pTable, pExprInfo[j].base.colInfo.colId, type, bytes);
          dst = pQuery->sdata[j]->data + count * pExprInfo[j].bytes;
7626

7627
        }
7628 7629

        doSetTagValueToResultBuf(dst, data, type, bytes);
H
hjxilinx 已提交
7630
      }
H
Haojun Liao 已提交
7631
      count += 1;
H
hjxilinx 已提交
7632
    }
7633

7634
    qDebug("QInfo:%p create tag values results completed, rows:%d", pQInfo, count);
H
hjxilinx 已提交
7635
  }
7636

H
Haojun Liao 已提交
7637
  pQuery->rec.rows = count;
H
hjxilinx 已提交
7638
  setQueryStatus(pQuery, QUERY_COMPLETED);
H
hjxilinx 已提交
7639 7640
}

H
Haojun Liao 已提交
7641
static int64_t getQuerySupportBufSize(size_t numOfTables) {
H
Haojun Liao 已提交
7642 7643 7644 7645
  size_t s1 = sizeof(STableQueryInfo);
  size_t s2 = sizeof(SHashNode);

//  size_t s3 = sizeof(STableCheckInfo);  buffer consumption in tsdb
H
Haojun Liao 已提交
7646
  return (int64_t)((s1 + s2) * 1.5 * numOfTables);
H
Haojun Liao 已提交
7647 7648
}

H
Haojun Liao 已提交
7649
int32_t checkForQueryBuf(size_t numOfTables) {
H
Haojun Liao 已提交
7650 7651
  int64_t t = getQuerySupportBufSize(numOfTables);
  if (tsQueryBufferSize < 0) {
H
Haojun Liao 已提交
7652
    return TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
7653 7654 7655 7656 7657 7658 7659 7660 7661 7662 7663 7664 7665 7666 7667 7668 7669 7670 7671
  } else if (tsQueryBufferSize > 0) {

    while(1) {
      int64_t s = tsQueryBufferSize;
      int64_t remain = s - t;
      if (remain >= 0) {
        if (atomic_val_compare_exchange_64(&tsQueryBufferSize, s, remain) == s) {
          return TSDB_CODE_SUCCESS;
        }
      } else {
        return TSDB_CODE_QRY_NOT_ENOUGH_BUFFER;
      }
    }
  }

  // disable query processing if the value of tsQueryBufferSize is zero.
  return TSDB_CODE_QRY_NOT_ENOUGH_BUFFER;
}

H
Haojun Liao 已提交
7672
void releaseQueryBuf(size_t numOfTables) {
H
Haojun Liao 已提交
7673 7674 7675 7676 7677 7678 7679 7680 7681 7682
  if (tsQueryBufferSize <= 0) {
    return;
  }

  int64_t t = getQuerySupportBufSize(numOfTables);

  // restore value is not enough buffer available
  atomic_add_fetch_64(&tsQueryBufferSize, t);
}

7683 7684 7685 7686 7687 7688 7689
void* qGetResultRetrieveMsg(qinfo_t qinfo) {
  SQInfo* pQInfo = (SQInfo*) qinfo;
  assert(pQInfo != NULL);

  return pQInfo->rspContext;
}

7690 7691 7692 7693 7694 7695 7696
void freeqinfoFn(void *qhandle) {
  void** handle = qhandle;
  if (handle == NULL || *handle == NULL) {
    return;
  }

  qKillQuery(*handle);
H
Haojun Liao 已提交
7697
  qDestroyQueryInfo(*handle);
7698 7699 7700
}

void* qOpenQueryMgmt(int32_t vgId) {
H
Haojun Liao 已提交
7701
  const int32_t REFRESH_HANDLE_INTERVAL = 30; // every 30 seconds, refresh handle pool
7702 7703 7704 7705

  char cacheName[128] = {0};
  sprintf(cacheName, "qhandle_%d", vgId);

7706
  SQueryMgmt* pQueryMgmt = calloc(1, sizeof(SQueryMgmt));
H
Haojun Liao 已提交
7707 7708 7709 7710
  if (pQueryMgmt == NULL) {
    terrno = TSDB_CODE_QRY_OUT_OF_MEMORY;
    return NULL;
  }
7711

S
TD-1530  
Shengliang Guan 已提交
7712
  pQueryMgmt->qinfoPool = taosCacheInit(TSDB_CACHE_PTR_KEY, REFRESH_HANDLE_INTERVAL, true, freeqinfoFn, cacheName);
7713 7714 7715 7716
  pQueryMgmt->closed    = false;
  pQueryMgmt->vgId      = vgId;

  pthread_mutex_init(&pQueryMgmt->lock, NULL);
7717 7718

  qDebug("vgId:%d, open querymgmt success", vgId);
7719
  return pQueryMgmt;
7720 7721
}

H
Haojun Liao 已提交
7722
static void queryMgmtKillQueryFn(void* handle) {
H
Hui Li 已提交
7723 7724
  void** fp = (void**)handle;
  qKillQuery(*fp);
H
Haojun Liao 已提交
7725 7726 7727
}

void qQueryMgmtNotifyClosed(void* pQMgmt) {
7728 7729 7730 7731 7732 7733 7734
  if (pQMgmt == NULL) {
    return;
  }

  SQueryMgmt* pQueryMgmt = pQMgmt;
  qDebug("vgId:%d, set querymgmt closed, wait for all queries cancelled", pQueryMgmt->vgId);

7735
  pthread_mutex_lock(&pQueryMgmt->lock);
7736
  pQueryMgmt->closed = true;
7737
  pthread_mutex_unlock(&pQueryMgmt->lock);
7738

H
Haojun Liao 已提交
7739
  taosCacheRefresh(pQueryMgmt->qinfoPool, queryMgmtKillQueryFn);
7740 7741
}

S
TD-2640  
Shengliang Guan 已提交
7742 7743 7744 7745 7746 7747 7748 7749 7750 7751 7752 7753 7754
void qQueryMgmtReOpen(void *pQMgmt) {
  if (pQMgmt == NULL) {
    return;
  }

  SQueryMgmt *pQueryMgmt = pQMgmt;
  qDebug("vgId:%d, set querymgmt reopen", pQueryMgmt->vgId);

  pthread_mutex_lock(&pQueryMgmt->lock);
  pQueryMgmt->closed = false;
  pthread_mutex_unlock(&pQueryMgmt->lock);
}

7755 7756 7757 7758 7759 7760 7761 7762 7763 7764 7765 7766 7767 7768 7769
void qCleanupQueryMgmt(void* pQMgmt) {
  if (pQMgmt == NULL) {
    return;
  }

  SQueryMgmt* pQueryMgmt = pQMgmt;
  int32_t vgId = pQueryMgmt->vgId;

  assert(pQueryMgmt->closed);

  SCacheObj* pqinfoPool = pQueryMgmt->qinfoPool;
  pQueryMgmt->qinfoPool = NULL;

  taosCacheCleanup(pqinfoPool);
  pthread_mutex_destroy(&pQueryMgmt->lock);
S
TD-1848  
Shengliang Guan 已提交
7770
  tfree(pQueryMgmt);
7771

S
Shengliang Guan 已提交
7772
  qDebug("vgId:%d, queryMgmt cleanup completed", vgId);
7773 7774
}

7775
void** qRegisterQInfo(void* pMgmt, uint64_t qInfo) {
7776
  if (pMgmt == NULL) {
7777
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
7778 7779 7780 7781 7782
    return NULL;
  }

  SQueryMgmt *pQueryMgmt = pMgmt;
  if (pQueryMgmt->qinfoPool == NULL) {
7783
    qError("QInfo:%p failed to add qhandle into qMgmt, since qMgmt is closed", (void *)qInfo);
7784
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
7785 7786 7787
    return NULL;
  }

7788
  pthread_mutex_lock(&pQueryMgmt->lock);
7789
  if (pQueryMgmt->closed) {
7790
    pthread_mutex_unlock(&pQueryMgmt->lock);
7791
    qError("QInfo:%p failed to add qhandle into cache, since qMgmt is colsing", (void *)qInfo);
7792
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
7793 7794
    return NULL;
  } else {
S
TD-1530  
Shengliang Guan 已提交
7795
    TSDB_CACHE_PTR_TYPE handleVal = (TSDB_CACHE_PTR_TYPE) qInfo;
7796 7797
    void** handle = taosCachePut(pQueryMgmt->qinfoPool, &handleVal, sizeof(TSDB_CACHE_PTR_TYPE), &qInfo, sizeof(TSDB_CACHE_PTR_TYPE),
        (getMaximumIdleDurationSec()*1000));
7798
    pthread_mutex_unlock(&pQueryMgmt->lock);
7799 7800 7801 7802 7803

    return handle;
  }
}

S
TD-1530  
Shengliang Guan 已提交
7804
void** qAcquireQInfo(void* pMgmt, uint64_t _key) {
7805 7806
  SQueryMgmt *pQueryMgmt = pMgmt;

B
Bomin Zhang 已提交
7807 7808 7809 7810 7811 7812 7813
  if (pQueryMgmt->closed) {
    terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
    return NULL;
  }

  if (pQueryMgmt->qinfoPool == NULL) {
    terrno = TSDB_CODE_QRY_INVALID_QHANDLE;
7814 7815 7816
    return NULL;
  }

S
TD-1530  
Shengliang Guan 已提交
7817 7818
  TSDB_CACHE_PTR_TYPE key = (TSDB_CACHE_PTR_TYPE)_key;
  void** handle = taosCacheAcquireByKey(pQueryMgmt->qinfoPool, &key, sizeof(TSDB_CACHE_PTR_TYPE));
7819
  if (handle == NULL || *handle == NULL) {
B
Bomin Zhang 已提交
7820
    terrno = TSDB_CODE_QRY_INVALID_QHANDLE;
7821 7822 7823 7824 7825 7826
    return NULL;
  } else {
    return handle;
  }
}

H
Haojun Liao 已提交
7827
void** qReleaseQInfo(void* pMgmt, void* pQInfo, bool freeHandle) {
7828 7829 7830 7831 7832
  SQueryMgmt *pQueryMgmt = pMgmt;
  if (pQueryMgmt->qinfoPool == NULL) {
    return NULL;
  }

H
Haojun Liao 已提交
7833
  taosCacheRelease(pQueryMgmt->qinfoPool, pQInfo, freeHandle);
7834
  return 0;
D
fix bug  
dapan1121 已提交
7835
}