tscLocalMerge.c 56.4 KB
Newer Older
H
hzcheng 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

16
#include "os.h"
S
slguan 已提交
17
#include "tlosertree.h"
H
hzcheng 已提交
18
#include "tscUtil.h"
H
hjxilinx 已提交
19
#include "tschemautil.h"
S
slguan 已提交
20
#include "tsclient.h"
H
hzcheng 已提交
21
#include "tutil.h"
S
slguan 已提交
22
#include "tscLog.h"
23
#include "tscLocalMerge.h"
H
hzcheng 已提交
24 25

typedef struct SCompareParam {
S
slguan 已提交
26 27
  SLocalDataSource **pLocalData;
  tOrderDescriptor * pDesc;
28
  int32_t            num;
S
slguan 已提交
29
  int32_t            groupOrderType;
H
hzcheng 已提交
30 31 32 33 34 35
} SCompareParam;

int32_t treeComparator(const void *pLeft, const void *pRight, void *param) {
  int32_t pLeftIdx = *(int32_t *)pLeft;
  int32_t pRightIdx = *(int32_t *)pRight;

S
slguan 已提交
36 37 38
  SCompareParam *    pParam = (SCompareParam *)param;
  tOrderDescriptor * pDesc = pParam->pDesc;
  SLocalDataSource **pLocalData = pParam->pLocalData;
H
hzcheng 已提交
39 40 41 42 43 44 45 46 47 48

  /* this input is exhausted, set the special value to denote this */
  if (pLocalData[pLeftIdx]->rowIdx == -1) {
    return 1;
  }

  if (pLocalData[pRightIdx]->rowIdx == -1) {
    return -1;
  }

49
  if (pParam->groupOrderType == TSDB_ORDER_DESC) {  // desc
50 51
    return compare_d(pDesc, pParam->num, pLocalData[pLeftIdx]->rowIdx, pLocalData[pLeftIdx]->filePage.data,
                     pParam->num, pLocalData[pRightIdx]->rowIdx, pLocalData[pRightIdx]->filePage.data);
H
hzcheng 已提交
52
  } else {
53 54
    return compare_a(pDesc, pParam->num, pLocalData[pLeftIdx]->rowIdx, pLocalData[pLeftIdx]->filePage.data,
                     pParam->num, pLocalData[pRightIdx]->rowIdx, pLocalData[pRightIdx]->filePage.data);
H
hzcheng 已提交
55 56 57
  }
}

H
hjLiao 已提交
58
static void tscInitSqlContext(SSqlCmd *pCmd, SLocalReducer *pReducer, tOrderDescriptor *pDesc) {
H
hzcheng 已提交
59 60
  /*
   * the fields and offset attributes in pCmd and pModel may be different due to
S
slguan 已提交
61
   * merge requirement. So, the final result in pRes structure is formatted in accordance with the pCmd object.
H
hzcheng 已提交
62
   */
H
hjxilinx 已提交
63
  SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);
H
hjxilinx 已提交
64 65 66
  size_t size = tscSqlExprNumOfExprs(pQueryInfo);
  
  for (int32_t i = 0; i < size; ++i) {
H
hzcheng 已提交
67
    SQLFunctionCtx *pCtx = &pReducer->pCtx[i];
H
hjxilinx 已提交
68 69 70 71
    SSqlExpr *      pExpr = tscSqlExprGet(pQueryInfo, i);

    pCtx->aOutputBuf =
        pReducer->pResultBuf->data + tscFieldInfoGetOffset(pQueryInfo, i) * pReducer->resColModel->capacity;
72
    pCtx->order = pQueryInfo->order.order;
H
hjxilinx 已提交
73
    pCtx->functionId = pExpr->functionId;
S
slguan 已提交
74 75

    // input buffer hold only one point data
H
hjxilinx 已提交
76 77 78
    int16_t  offset = getColumnModelOffset(pDesc->pColumnModel, i);
    SSchema *pSchema = getColumnModelSchema(pDesc->pColumnModel, i);

H
hjxilinx 已提交
79
    pCtx->aInputElemBuf = pReducer->pTempBuffer->data + offset;
H
hzcheng 已提交
80 81

    // input data format comes from pModel
H
hjxilinx 已提交
82 83
    pCtx->inputType = pSchema->type;
    pCtx->inputBytes = pSchema->bytes;
H
hzcheng 已提交
84 85

    // output data format yet comes from pCmd.
H
hjxilinx 已提交
86 87
    pCtx->outputBytes = pExpr->resBytes;
    pCtx->outputType = pExpr->resType;
H
hzcheng 已提交
88 89 90

    pCtx->startOffset = 0;
    pCtx->size = 1;
S
slguan 已提交
91
    pCtx->hasNull = true;
H
hzcheng 已提交
92 93
    pCtx->currentStage = SECONDARY_STAGE_MERGE;

S
slguan 已提交
94
    // for top/bottom function, the output of timestamp is the first column
H
hjxilinx 已提交
95
    int32_t functionId = pExpr->functionId;
S
slguan 已提交
96 97
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      pCtx->ptsOutputBuf = pReducer->pCtx[0].aOutputBuf;
98
      pCtx->param[2].i64Key = pQueryInfo->order.order;
H
hjLiao 已提交
99
      pCtx->param[2].nType  = TSDB_DATA_TYPE_BIGINT;
100
      pCtx->param[1].i64Key = pQueryInfo->order.orderColId;
H
hzcheng 已提交
101
    }
S
slguan 已提交
102 103

    SResultInfo *pResInfo = &pReducer->pResInfo[i];
104
    pResInfo->bufLen = pExpr->interBytes;
H
hjLiao 已提交
105
    pResInfo->interResultBuf = calloc(1, (size_t) pResInfo->bufLen);
S
slguan 已提交
106 107 108 109 110

    pCtx->resultInfo = &pReducer->pResInfo[i];
    pCtx->resultInfo->superTableQ = true;
  }

H
hjxilinx 已提交
111 112
  int16_t          n = 0;
  int16_t          tagLen = 0;
H
hjxilinx 已提交
113
  SQLFunctionCtx **pTagCtx = calloc(pQueryInfo->fieldsInfo.numOfOutput, POINTER_BYTES);
S
slguan 已提交
114

H
hjxilinx 已提交
115
  SQLFunctionCtx *pCtx = NULL;
H
hjxilinx 已提交
116
  for (int32_t i = 0; i < pQueryInfo->fieldsInfo.numOfOutput; ++i) {
117
    SSqlExpr *pExpr = tscSqlExprGet(pQueryInfo, i);
S
slguan 已提交
118 119 120 121 122 123 124 125
    if (pExpr->functionId == TSDB_FUNC_TAG_DUMMY || pExpr->functionId == TSDB_FUNC_TS_DUMMY) {
      tagLen += pExpr->resBytes;
      pTagCtx[n++] = &pReducer->pCtx[i];
    } else if ((aAggs[pExpr->functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
      pCtx = &pReducer->pCtx[i];
    }
  }

B
Bomin Zhang 已提交
126
  if (n == 0 || pCtx == NULL) {
S
slguan 已提交
127 128 129 130 131
    free(pTagCtx);
  } else {
    pCtx->tagInfo.pTagCtxList = pTagCtx;
    pCtx->tagInfo.numOfTagCols = n;
    pCtx->tagInfo.tagsLen = tagLen;
H
hzcheng 已提交
132 133 134
  }
}

135 136 137 138 139 140 141 142 143 144 145 146 147
static SFillColInfo* createFillColInfo(SQueryInfo* pQueryInfo) {
  int32_t numOfCols = tscSqlExprNumOfExprs(pQueryInfo);
  int32_t offset = 0;
  
  SFillColInfo* pFillCol = calloc(numOfCols, sizeof(SFillColInfo));
  for(int32_t i = 0; i < numOfCols; ++i) {
    SSqlExpr* pExpr = tscSqlExprGet(pQueryInfo, i);
    
    pFillCol[i].col.bytes  = pExpr->resBytes;
    pFillCol[i].col.type   = pExpr->resType;
    pFillCol[i].flag       = pExpr->colInfo.flag;
    pFillCol[i].col.offset = offset;
    pFillCol[i].functionId = pExpr->functionId;
148
    pFillCol[i].fillVal.i  = pQueryInfo->fillVal[i];
149 150 151 152 153 154
    offset += pExpr->resBytes;
  }
  
  return pFillCol;
}

H
hzcheng 已提交
155
void tscCreateLocalReducer(tExtMemBuffer **pMemBuffer, int32_t numOfBuffer, tOrderDescriptor *pDesc,
H
hjLiao 已提交
156 157 158 159
                           SColumnModel *finalmodel, SSqlObj* pSql) {
  SSqlCmd* pCmd = &pSql->cmd;
  SSqlRes* pRes = &pSql->res;
  
160
  if (pMemBuffer == NULL) {
H
hjLiao 已提交
161 162 163
    tscLocalReducerEnvDestroy(pMemBuffer, pDesc, finalmodel, numOfBuffer);
  
    tscError("%p pMemBuffer is NULL", pMemBuffer);
164
    pRes->code = TSDB_CODE_TSC_APP_ERROR;
165 166 167 168
    return;
  }
 
  if (pDesc->pColumnModel == NULL) {
H
hzcheng 已提交
169 170
    tscLocalReducerEnvDestroy(pMemBuffer, pDesc, finalmodel, numOfBuffer);

H
hjLiao 已提交
171
    tscError("%p no local buffer or intermediate result format model", pSql);
172
    pRes->code = TSDB_CODE_TSC_APP_ERROR;
H
hzcheng 已提交
173 174 175 176 177 178 179
    return;
  }

  int32_t numOfFlush = 0;
  for (int32_t i = 0; i < numOfBuffer; ++i) {
    int32_t len = pMemBuffer[i]->fileMeta.flushoutData.nLength;
    if (len == 0) {
180
      tscDebug("%p no data retrieved from orderOfVnode:%d", pSql, i + 1);
H
hzcheng 已提交
181 182 183 184 185 186 187 188
      continue;
    }

    numOfFlush += len;
  }

  if (numOfFlush == 0 || numOfBuffer == 0) {
    tscLocalReducerEnvDestroy(pMemBuffer, pDesc, finalmodel, numOfBuffer);
189
    tscDebug("%p retrieved no data", pSql);
S
slguan 已提交
190

H
hzcheng 已提交
191 192 193
    return;
  }

H
hjxilinx 已提交
194
  if (pDesc->pColumnModel->capacity >= pMemBuffer[0]->pageSize) {
H
hjLiao 已提交
195
    tscError("%p Invalid value of buffer capacity %d and page size %d ", pSql, pDesc->pColumnModel->capacity,
H
hjxilinx 已提交
196
             pMemBuffer[0]->pageSize);
S
slguan 已提交
197 198

    tscLocalReducerEnvDestroy(pMemBuffer, pDesc, finalmodel, numOfBuffer);
199
    pRes->code = TSDB_CODE_TSC_APP_ERROR;
H
hzcheng 已提交
200 201 202
    return;
  }

H
hjLiao 已提交
203 204 205
  size_t size = sizeof(SLocalReducer) + POINTER_BYTES * numOfFlush;
  
  SLocalReducer *pReducer = (SLocalReducer *) calloc(1, size);
H
hzcheng 已提交
206
  if (pReducer == NULL) {
H
hjLiao 已提交
207
    tscError("%p failed to create local merge structure, out of memory", pSql);
S
slguan 已提交
208 209

    tscLocalReducerEnvDestroy(pMemBuffer, pDesc, finalmodel, numOfBuffer);
210
    pRes->code = TSDB_CODE_TSC_OUT_OF_MEMORY;
H
hzcheng 已提交
211 212 213 214
    return;
  }

  pReducer->pExtMemBuffer = pMemBuffer;
S
slguan 已提交
215
  pReducer->pLocalDataSrc = (SLocalDataSource **)&pReducer[1];
H
hzcheng 已提交
216 217 218 219
  assert(pReducer->pLocalDataSrc != NULL);

  pReducer->numOfBuffer = numOfFlush;
  pReducer->numOfVnode = numOfBuffer;
220

H
hzcheng 已提交
221
  pReducer->pDesc = pDesc;
222
  tscDebug("%p the number of merged leaves is: %d", pSql, pReducer->numOfBuffer);
H
hzcheng 已提交
223 224 225 226 227 228

  int32_t idx = 0;
  for (int32_t i = 0; i < numOfBuffer; ++i) {
    int32_t numOfFlushoutInFile = pMemBuffer[i]->fileMeta.flushoutData.nLength;

    for (int32_t j = 0; j < numOfFlushoutInFile; ++j) {
H
hjLiao 已提交
229 230 231
      SLocalDataSource *ds = (SLocalDataSource *)malloc(sizeof(SLocalDataSource) + pMemBuffer[0]->pageSize);
      if (ds == NULL) {
        tscError("%p failed to create merge structure", pSql);
232
        pRes->code = TSDB_CODE_TSC_OUT_OF_MEMORY;
B
Bomin Zhang 已提交
233
        tfree(pReducer);
H
hzcheng 已提交
234 235
        return;
      }
H
hjLiao 已提交
236 237
      
      pReducer->pLocalDataSrc[idx] = ds;
H
hzcheng 已提交
238

H
hjLiao 已提交
239 240
      ds->pMemBuffer = pMemBuffer[i];
      ds->flushoutIdx = j;
241
      ds->filePage.num = 0;
H
hjLiao 已提交
242 243
      ds->pageId = 0;
      ds->rowIdx = 0;
H
hzcheng 已提交
244

245
      tscDebug("%p load data from disk into memory, orderOfVnode:%d, total:%d", pSql, i + 1, idx + 1);
H
hjLiao 已提交
246
      tExtMemBufferLoadData(pMemBuffer[i], &(ds->filePage), j, 0);
H
hzcheng 已提交
247
#ifdef _DEBUG_VIEW
248
      printf("load data page into mem for build loser tree: %" PRIu64 " rows\n", ds->filePage.num);
H
hzcheng 已提交
249
      SSrcColumnInfo colInfo[256] = {0};
H
hjxilinx 已提交
250
      SQueryInfo *   pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);
251 252

      tscGetSrcColumnInfo(colInfo, pQueryInfo);
H
hzcheng 已提交
253

254
      tColModelDisplayEx(pDesc->pColumnModel, ds->filePage.data, ds->filePage.num,
H
hjxilinx 已提交
255
                         pMemBuffer[0]->numOfElemsPerPage, colInfo);
H
hzcheng 已提交
256
#endif
H
hjLiao 已提交
257
      
258
      if (ds->filePage.num == 0) {  // no data in this flush, the index does not increase
259
        tscDebug("%p flush data is empty, ignore %d flush record", pSql, idx);
H
hjLiao 已提交
260
        tfree(ds);
H
hzcheng 已提交
261 262
        continue;
      }
H
hjLiao 已提交
263
      
H
hzcheng 已提交
264 265 266
      idx += 1;
    }
  }
H
hjLiao 已提交
267 268
  
  // no data actually, no need to merge result.
H
hzcheng 已提交
269
  if (idx == 0) {
B
Bomin Zhang 已提交
270
    tfree(pReducer);
H
hzcheng 已提交
271 272 273 274 275 276 277 278
    return;
  }

  pReducer->numOfBuffer = idx;

  SCompareParam *param = malloc(sizeof(SCompareParam));
  param->pLocalData = pReducer->pLocalDataSrc;
  param->pDesc = pReducer->pDesc;
279
  param->num = pReducer->pLocalDataSrc[0]->pMemBuffer->numOfElemsPerPage;
H
hjxilinx 已提交
280 281
  SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);

282
  param->groupOrderType = pQueryInfo->groupbyExpr.orderType;
H
Haojun Liao 已提交
283
  pReducer->orderPrjOnSTable = tscOrderedProjectionQueryOnSTable(pQueryInfo, 0);
H
hzcheng 已提交
284 285 286

  pRes->code = tLoserTreeCreate(&pReducer->pLoserTree, pReducer->numOfBuffer, param, treeComparator);
  if (pReducer->pLoserTree == NULL || pRes->code != 0) {
B
Bomin Zhang 已提交
287
    tfree(pReducer);
H
hzcheng 已提交
288 289 290 291 292
    return;
  }

  // the input data format follows the old format, but output in a new format.
  // so, all the input must be parsed as old format
H
hjLiao 已提交
293
  pReducer->pCtx = (SQLFunctionCtx *)calloc(tscSqlExprNumOfExprs(pQueryInfo), sizeof(SQLFunctionCtx));
H
hzcheng 已提交
294 295
  pReducer->rowSize = pMemBuffer[0]->nElemSize;

H
hjxilinx 已提交
296 297
  tscRestoreSQLFuncForSTableQuery(pQueryInfo);
  tscFieldInfoUpdateOffset(pQueryInfo);
H
hzcheng 已提交
298

H
hjxilinx 已提交
299
  if (pReducer->rowSize > pMemBuffer[0]->pageSize) {
H
hzcheng 已提交
300 301 302 303 304 305 306 307
    assert(false);  // todo fixed row size is larger than the minimum page size;
  }

  pReducer->hasPrevRow = false;
  pReducer->hasUnprocessedRow = false;

  pReducer->prevRowOfInput = (char *)calloc(1, pReducer->rowSize);

S
slguan 已提交
308
  // used to keep the latest input row
H
hzcheng 已提交
309 310 311 312
  pReducer->pTempBuffer = (tFilePage *)calloc(1, pReducer->rowSize + sizeof(tFilePage));
  pReducer->discardData = (tFilePage *)calloc(1, pReducer->rowSize + sizeof(tFilePage));
  pReducer->discard = false;

H
hjxilinx 已提交
313
  pReducer->nResultBufSize = pMemBuffer[0]->pageSize * 16;
H
hzcheng 已提交
314
  pReducer->pResultBuf = (tFilePage *)calloc(1, pReducer->nResultBufSize + sizeof(tFilePage));
H
hjxilinx 已提交
315

H
Haojun Liao 已提交
316
  pReducer->finalRowSize = tscGetResRowLength(pQueryInfo->exprList);
H
hzcheng 已提交
317
  pReducer->resColModel = finalmodel;
B
Bomin Zhang 已提交
318
  pReducer->resColModel->capacity = pReducer->nResultBufSize;
319
  assert(pReducer->finalRowSize > 0);
B
Bomin Zhang 已提交
320 321 322
  if (pReducer->finalRowSize > 0) {
    pReducer->resColModel->capacity /= pReducer->finalRowSize;
  }
H
Haojun Liao 已提交
323
  assert(pReducer->finalRowSize <= pReducer->rowSize);
H
hzcheng 已提交
324

H
hjxilinx 已提交
325
  pReducer->pFinalRes = calloc(1, pReducer->rowSize * pReducer->resColModel->capacity);
326
//  pReducer->pBufForInterpo = calloc(1, pReducer->nResultBufSize);
H
hzcheng 已提交
327

H
hjxilinx 已提交
328
  if (pReducer->pTempBuffer == NULL || pReducer->discardData == NULL || pReducer->pResultBuf == NULL ||
329
      /*pReducer->pBufForInterpo == NULL || */pReducer->pFinalRes == NULL || pReducer->prevRowOfInput == NULL) {
S
slguan 已提交
330 331 332
    tfree(pReducer->pTempBuffer);
    tfree(pReducer->discardData);
    tfree(pReducer->pResultBuf);
H
hjxilinx 已提交
333
    tfree(pReducer->pFinalRes);
S
slguan 已提交
334
    tfree(pReducer->prevRowOfInput);
B
Bomin Zhang 已提交
335
    tfree(pReducer);
336
    pRes->code = TSDB_CODE_TSC_OUT_OF_MEMORY;
H
hzcheng 已提交
337 338
    return;
  }
H
hjLiao 已提交
339
  
340 341 342 343
  size_t numOfCols = tscSqlExprNumOfExprs(pQueryInfo);
  
  pReducer->pTempBuffer->num = 0;
  pReducer->pResInfo = calloc(numOfCols, sizeof(SResultInfo));
H
hzcheng 已提交
344

345
  tscCreateResPointerInfo(pRes, pQueryInfo);
H
hjLiao 已提交
346
  tscInitSqlContext(pCmd, pReducer, pDesc);
H
hzcheng 已提交
347

H
hjxilinx 已提交
348 349
  // we change the capacity of schema to denote that there is only one row in temp buffer
  pReducer->pDesc->pColumnModel->capacity = 1;
H
hjxilinx 已提交
350 351

  // restore the limitation value at the last stage
352 353 354 355
  if (tscOrderedProjectionQueryOnSTable(pQueryInfo, 0)) {
    pQueryInfo->limit.limit = pQueryInfo->clauseLimit;
    pQueryInfo->limit.offset = pQueryInfo->prjOffset;
  }
H
hjxilinx 已提交
356

357
  pReducer->offset = pQueryInfo->limit.offset;
H
hjxilinx 已提交
358

H
hzcheng 已提交
359 360 361
  pRes->pLocalReducer = pReducer;
  pRes->numOfGroups = 0;

362
  STableMetaInfo *pTableMetaInfo = tscGetTableMetaInfoFromCmd(pCmd, pCmd->clauseIndex, 0);
363
  STableComInfo tinfo = tscGetTableInfo(pTableMetaInfo->pTableMeta);
H
hjxilinx 已提交
364
  
365
  TSKEY stime = MIN(pQueryInfo->window.skey, pQueryInfo->window.ekey);
H
hjxilinx 已提交
366
  int64_t revisedSTime =
367 368 369 370 371
      taosGetIntervalStartTimestamp(stime, pQueryInfo->intervalTime, pQueryInfo->slidingTimeUnit, tinfo.precision);
  
  if (pQueryInfo->fillType != TSDB_FILL_NONE) {
    SFillColInfo* pFillCol = createFillColInfo(pQueryInfo);
    pReducer->pFillInfo = taosInitFillInfo(pQueryInfo->order.order, revisedSTime, pQueryInfo->groupbyExpr.numOfGroupCols,
H
Haojun Liao 已提交
372 373
                                           4096, numOfCols, pQueryInfo->slidingTime, pQueryInfo->slidingTimeUnit,
                                           tinfo.precision, pQueryInfo->fillType, pFillCol);
374
  }
H
hzcheng 已提交
375

H
hjxilinx 已提交
376
  int32_t startIndex = pQueryInfo->fieldsInfo.numOfOutput - pQueryInfo->groupbyExpr.numOfGroupCols;
H
hzcheng 已提交
377

H
Haojun Liao 已提交
378
  if (pQueryInfo->groupbyExpr.numOfGroupCols > 0 && pReducer->pFillInfo != NULL) {
379
    pReducer->pFillInfo->pTags[0] = (char *)pReducer->pFillInfo->pTags + POINTER_BYTES * pQueryInfo->groupbyExpr.numOfGroupCols;
380
    for (int32_t i = 1; i < pQueryInfo->groupbyExpr.numOfGroupCols; ++i) {
H
hjxilinx 已提交
381
      SSchema *pSchema = getColumnModelSchema(pReducer->resColModel, startIndex + i - 1);
382
      pReducer->pFillInfo->pTags[i] = pSchema->bytes + pReducer->pFillInfo->pTags[i - 1];
H
hzcheng 已提交
383 384
    }
  } else {
385 386 387
    if (pReducer->pFillInfo != NULL) {
      assert(pReducer->pFillInfo->pTags == NULL);
    }
H
hzcheng 已提交
388 389 390 391 392
  }
}

static int32_t tscFlushTmpBufferImpl(tExtMemBuffer *pMemoryBuf, tOrderDescriptor *pDesc, tFilePage *pPage,
                                     int32_t orderType) {
393
  if (pPage->num == 0) {
H
hzcheng 已提交
394 395 396
    return 0;
  }

397
  assert(pPage->num <= pDesc->pColumnModel->capacity);
H
hzcheng 已提交
398 399

  // sort before flush to disk, the data must be consecutively put on tFilePage.
H
Haojun Liao 已提交
400
  if (pDesc->orderInfo.numOfCols > 0) {
401
    tColDataQSort(pDesc, pPage->num, 0, pPage->num - 1, pPage->data, orderType);
H
hzcheng 已提交
402 403 404
  }

#ifdef _DEBUG_VIEW
405 406
  printf("%" PRIu64 " rows data flushed to disk after been sorted:\n", pPage->num);
  tColModelDisplay(pDesc->pColumnModel, pPage->data, pPage->num, pPage->num);
H
hzcheng 已提交
407 408 409
#endif

  // write to cache after being sorted
410
  if (tExtMemBufferPut(pMemoryBuf, pPage->data, pPage->num) < 0) {
H
hzcheng 已提交
411 412 413 414
    tscError("failed to save data in temporary buffer");
    return -1;
  }

415
  pPage->num = 0;
H
hzcheng 已提交
416 417 418 419
  return 0;
}

int32_t tscFlushTmpBuffer(tExtMemBuffer *pMemoryBuf, tOrderDescriptor *pDesc, tFilePage *pPage, int32_t orderType) {
420 421 422
  int32_t ret = 0;
  if ((ret = tscFlushTmpBufferImpl(pMemoryBuf, pDesc, pPage, orderType)) != 0) {
    return ret;
H
hzcheng 已提交
423 424
  }

425 426
  if ((ret = tExtMemBufferFlush(pMemoryBuf)) != 0) {
    return ret;
H
hzcheng 已提交
427 428 429 430 431 432 433
  }

  return 0;
}

int32_t saveToBuffer(tExtMemBuffer *pMemoryBuf, tOrderDescriptor *pDesc, tFilePage *pPage, void *data,
                     int32_t numOfRows, int32_t orderType) {
434
  SColumnModel *pModel = pDesc->pColumnModel;
H
hjxilinx 已提交
435

436
  if (pPage->num + numOfRows <= pModel->capacity) {
437
    tColModelAppend(pModel, pPage, data, 0, numOfRows, numOfRows);
H
hzcheng 已提交
438 439 440
    return 0;
  }

441
  // current buffer is overflow, flush data to extensive buffer
442
  int32_t numOfRemainEntries = pModel->capacity - pPage->num;
H
hzcheng 已提交
443 444
  tColModelAppend(pModel, pPage, data, 0, numOfRemainEntries, numOfRows);

445
  // current buffer is full, need to flushed to disk
446
  assert(pPage->num == pModel->capacity);
447 448 449
  int32_t code = tscFlushTmpBuffer(pMemoryBuf, pDesc, pPage, orderType);
  if (code != 0) {
    return code;
H
hzcheng 已提交
450 451 452 453 454 455
  }

  int32_t remain = numOfRows - numOfRemainEntries;

  while (remain > 0) {
    int32_t numOfWriteElems = 0;
H
hjxilinx 已提交
456 457
    if (remain > pModel->capacity) {
      numOfWriteElems = pModel->capacity;
H
hzcheng 已提交
458 459 460 461 462 463
    } else {
      numOfWriteElems = remain;
    }

    tColModelAppend(pModel, pPage, data, numOfRows - remain, numOfWriteElems, numOfRows);

464
    if (pPage->num == pModel->capacity) {
465 466
      if ((code = tscFlushTmpBuffer(pMemoryBuf, pDesc, pPage, orderType)) != TSDB_CODE_SUCCESS) {
        return code;
H
hzcheng 已提交
467 468
      }
    } else {
469
      pPage->num = numOfWriteElems;
H
hzcheng 已提交
470 471 472 473 474 475 476 477 478 479 480 481 482 483
    }

    remain -= numOfWriteElems;
    numOfRemainEntries += numOfWriteElems;
  }

  return 0;
}

void tscDestroyLocalReducer(SSqlObj *pSql) {
  if (pSql == NULL) {
    return;
  }

484
  tscDebug("%p start to free local reducer", pSql);
H
hzcheng 已提交
485 486
  SSqlRes *pRes = &(pSql->res);
  if (pRes->pLocalReducer == NULL) {
487
    tscDebug("%p local reducer has been freed, abort", pSql);
H
hzcheng 已提交
488 489 490
    return;
  }

H
hjxilinx 已提交
491 492 493
  SSqlCmd *   pCmd = &pSql->cmd;
  SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);

H
hzcheng 已提交
494
  // there is no more result, so we release all allocated resource
H
hjxilinx 已提交
495
  SLocalReducer *pLocalReducer = (SLocalReducer *)atomic_exchange_ptr(&pRes->pLocalReducer, NULL);
H
hzcheng 已提交
496 497
  if (pLocalReducer != NULL) {
    int32_t status = 0;
weixin_48148422's avatar
weixin_48148422 已提交
498
    while ((status = atomic_val_compare_exchange_32(&pLocalReducer->status, TSC_LOCALREDUCE_READY,
H
hzcheng 已提交
499 500
                                                    TSC_LOCALREDUCE_TOBE_FREED)) == TSC_LOCALREDUCE_IN_PROGRESS) {
      taosMsleep(100);
501
      tscDebug("%p waiting for delete procedure, status: %d", pSql, status);
H
hzcheng 已提交
502 503
    }

H
Haojun Liao 已提交
504
    pLocalReducer->pFillInfo = taosDestoryFillInfo(pLocalReducer->pFillInfo);
H
hzcheng 已提交
505

S
slguan 已提交
506
    if (pLocalReducer->pCtx != NULL) {
H
hjxilinx 已提交
507
      for (int32_t i = 0; i < pQueryInfo->fieldsInfo.numOfOutput; ++i) {
S
slguan 已提交
508
        SQLFunctionCtx *pCtx = &pLocalReducer->pCtx[i];
H
hjxilinx 已提交
509

H
hjxilinx 已提交
510
        tVariantDestroy(&pCtx->tag);
511 512 513
        if (pCtx->tagInfo.pTagCtxList != NULL) {
          tfree(pCtx->tagInfo.pTagCtxList);
        }
S
slguan 已提交
514
      }
H
hjxilinx 已提交
515

S
slguan 已提交
516 517 518
      tfree(pLocalReducer->pCtx);
    }

H
hzcheng 已提交
519 520 521 522 523
    tfree(pLocalReducer->prevRowOfInput);

    tfree(pLocalReducer->pTempBuffer);
    tfree(pLocalReducer->pResultBuf);

S
slguan 已提交
524
    if (pLocalReducer->pResInfo != NULL) {
H
hjxilinx 已提交
525
      for (int32_t i = 0; i < pQueryInfo->fieldsInfo.numOfOutput; ++i) {
S
slguan 已提交
526 527 528 529 530 531
        tfree(pLocalReducer->pResInfo[i].interResultBuf);
      }

      tfree(pLocalReducer->pResInfo);
    }

H
hzcheng 已提交
532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549
    if (pLocalReducer->pLoserTree) {
      tfree(pLocalReducer->pLoserTree->param);
      tfree(pLocalReducer->pLoserTree);
    }

    tfree(pLocalReducer->pFinalRes);
    tfree(pLocalReducer->discardData);

    tscLocalReducerEnvDestroy(pLocalReducer->pExtMemBuffer, pLocalReducer->pDesc, pLocalReducer->resColModel,
                              pLocalReducer->numOfVnode);
    for (int32_t i = 0; i < pLocalReducer->numOfBuffer; ++i) {
      tfree(pLocalReducer->pLocalDataSrc[i]);
    }

    pLocalReducer->numOfBuffer = 0;
    pLocalReducer->numOfCompleted = 0;
    free(pLocalReducer);
  } else {
550
    tscDebug("%p already freed or another free function is invoked", pSql);
H
hzcheng 已提交
551 552
  }

553
  tscDebug("%p free local reducer finished", pSql);
H
hzcheng 已提交
554 555
}

H
hjxilinx 已提交
556
static int32_t createOrderDescriptor(tOrderDescriptor **pOrderDesc, SSqlCmd *pCmd, SColumnModel *pModel) {
H
hjxilinx 已提交
557 558 559
  int32_t     numOfGroupByCols = 0;
  SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);

560 561
  if (pQueryInfo->groupbyExpr.numOfGroupCols > 0) {
    numOfGroupByCols = pQueryInfo->groupbyExpr.numOfGroupCols;
H
hzcheng 已提交
562 563 564
  }

  // primary timestamp column is involved in final result
565
  if (pQueryInfo->intervalTime != 0 || tscOrderedProjectionQueryOnSTable(pQueryInfo, 0)) {
H
hzcheng 已提交
566 567 568 569 570
    numOfGroupByCols++;
  }

  int32_t *orderIdx = (int32_t *)calloc(numOfGroupByCols, sizeof(int32_t));
  if (orderIdx == NULL) {
571
    return TSDB_CODE_TSC_OUT_OF_MEMORY;
H
hzcheng 已提交
572 573 574
  }

  if (numOfGroupByCols > 0) {
H
hjxilinx 已提交
575
    int32_t startCols = pQueryInfo->fieldsInfo.numOfOutput - pQueryInfo->groupbyExpr.numOfGroupCols;
H
hzcheng 已提交
576 577

    // tags value locate at the last columns
578
    for (int32_t i = 0; i < pQueryInfo->groupbyExpr.numOfGroupCols; ++i) {
H
hzcheng 已提交
579 580 581
      orderIdx[i] = startCols++;
    }

582
    if (pQueryInfo->intervalTime != 0) {
S
slguan 已提交
583
      // the first column is the timestamp, handles queries like "interval(10m) group by tags"
H
hzcheng 已提交
584 585 586 587
      orderIdx[numOfGroupByCols - 1] = PRIMARYKEY_TIMESTAMP_COL_INDEX;
    }
  }

588
  *pOrderDesc = tOrderDesCreate(orderIdx, numOfGroupByCols, pModel, pQueryInfo->order.order);
H
hzcheng 已提交
589 590 591
  tfree(orderIdx);

  if (*pOrderDesc == NULL) {
592
    return TSDB_CODE_TSC_OUT_OF_MEMORY;
H
hzcheng 已提交
593 594 595 596 597
  } else {
    return TSDB_CODE_SUCCESS;
  }
}

S
slguan 已提交
598
bool isSameGroup(SSqlCmd *pCmd, SLocalReducer *pReducer, char *pPrev, tFilePage *tmpBuffer) {
H
hjxilinx 已提交
599 600
  SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);

S
slguan 已提交
601
  // disable merge procedure for column projection query
H
Haojun Liao 已提交
602
  int16_t functionId = pReducer->pCtx[0].functionId;
603
  assert(functionId != TSDB_FUNC_ARITHM);
H
Haojun Liao 已提交
604
  if (pReducer->orderPrjOnSTable) {
605 606
    return true;
  }
H
hjxilinx 已提交
607

S
slguan 已提交
608 609
  if (functionId == TSDB_FUNC_PRJ || functionId == TSDB_FUNC_ARITHM) {
    return false;
H
hzcheng 已提交
610 611 612
  }

  tOrderDescriptor *pOrderDesc = pReducer->pDesc;
H
Haojun Liao 已提交
613
  SColumnOrderInfo* orderInfo = &pOrderDesc->orderInfo;
614

S
slguan 已提交
615
  // no group by columns, all data belongs to one group
H
Haojun Liao 已提交
616
  int32_t numOfCols = orderInfo->numOfCols;
S
slguan 已提交
617
  if (numOfCols <= 0) {
H
hzcheng 已提交
618 619 620
    return true;
  }

H
Haojun Liao 已提交
621 622 623 624 625
  if (orderInfo->pData[numOfCols - 1] == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
    /*
     * super table interval query
     * if the order columns is the primary timestamp, all result data belongs to one group
     */
626
    assert(pQueryInfo->intervalTime > 0);
H
Haojun Liao 已提交
627 628 629
    if (numOfCols == 1) {
      return true;
    }
S
slguan 已提交
630
  } else {  // simple group by query
631
    assert(pQueryInfo->intervalTime == 0);
S
slguan 已提交
632 633
  }

H
hzcheng 已提交
634
  // only one row exists
H
Haojun Liao 已提交
635 636
  int32_t index = orderInfo->pData[0];
  int32_t offset = (pOrderDesc->pColumnModel)->pFields[index].offset;
637

H
Haojun Liao 已提交
638 639
  int32_t ret = memcmp(pPrev + offset, tmpBuffer->data + offset, pOrderDesc->pColumnModel->rowSize - offset);
  return ret == 0;
H
hzcheng 已提交
640 641 642
}

int32_t tscLocalReducerEnvCreate(SSqlObj *pSql, tExtMemBuffer ***pMemBuffer, tOrderDescriptor **pOrderDesc,
H
hjxilinx 已提交
643
                                 SColumnModel **pFinalModel, uint32_t nBufferSizes) {
H
hzcheng 已提交
644 645 646
  SSqlCmd *pCmd = &pSql->cmd;
  SSqlRes *pRes = &pSql->res;

H
hjxilinx 已提交
647
  SSchema *     pSchema = NULL;
H
hjxilinx 已提交
648
  SColumnModel *pModel = NULL;
H
hzcheng 已提交
649 650
  *pFinalModel = NULL;

H
hjxilinx 已提交
651
  SQueryInfo *    pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);
H
hjxilinx 已提交
652
  STableMetaInfo *pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0);
S
slguan 已提交
653

654
  (*pMemBuffer) = (tExtMemBuffer **)malloc(POINTER_BYTES * pSql->numOfSubs);
H
hzcheng 已提交
655 656
  if (*pMemBuffer == NULL) {
    tscError("%p failed to allocate memory", pSql);
657
    pRes->code = TSDB_CODE_TSC_OUT_OF_MEMORY;
H
hzcheng 已提交
658 659
    return pRes->code;
  }
H
hjxilinx 已提交
660 661 662 663
  
  size_t size = tscSqlExprNumOfExprs(pQueryInfo);
  
  pSchema = (SSchema *)calloc(1, sizeof(SSchema) * size);
H
hzcheng 已提交
664 665
  if (pSchema == NULL) {
    tscError("%p failed to allocate memory", pSql);
666
    pRes->code = TSDB_CODE_TSC_OUT_OF_MEMORY;
H
hzcheng 已提交
667 668 669 670
    return pRes->code;
  }

  int32_t rlen = 0;
H
hjxilinx 已提交
671
  for (int32_t i = 0; i < size; ++i) {
672
    SSqlExpr *pExpr = tscSqlExprGet(pQueryInfo, i);
H
hzcheng 已提交
673 674 675 676 677 678 679

    pSchema[i].bytes = pExpr->resBytes;
    pSchema[i].type = pExpr->resType;

    rlen += pExpr->resBytes;
  }

L
lihui 已提交
680
  int32_t capacity = 0;
H
hjxilinx 已提交
681 682 683
  if (rlen != 0) {
    capacity = nBufferSizes / rlen;
  }
H
hjxilinx 已提交
684 685
  
  pModel = createColumnModel(pSchema, size, capacity);
H
hzcheng 已提交
686

H
hjxilinx 已提交
687
  size_t numOfSubs = pTableMetaInfo->vgroupList->numOfVgroups;
688 689 690 691
  for (int32_t i = 0; i < numOfSubs; ++i) {
    (*pMemBuffer)[i] = createExtMemBuffer(nBufferSizes, rlen, pModel);
    (*pMemBuffer)[i]->flushModel = MULTIPLE_APPEND_MODEL;
  }
H
hzcheng 已提交
692 693

  if (createOrderDescriptor(pOrderDesc, pCmd, pModel) != TSDB_CODE_SUCCESS) {
694
    pRes->code = TSDB_CODE_TSC_OUT_OF_MEMORY;
B
Bomin Zhang 已提交
695
    tfree(pSchema);
H
hzcheng 已提交
696 697 698
    return pRes->code;
  }

H
hjxilinx 已提交
699
  // final result depends on the fields number
H
hjxilinx 已提交
700 701
  memset(pSchema, 0, sizeof(SSchema) * size);
  for (int32_t i = 0; i < size; ++i) {
H
hjxilinx 已提交
702 703
    SSqlExpr *pExpr = tscSqlExprGet(pQueryInfo, i);

704
    SSchema *p1 = tscGetTableColumnSchema(pTableMetaInfo->pTableMeta, pExpr->colInfo.colIndex);
H
hjxilinx 已提交
705

706
    int32_t inter = 0;
H
hjxilinx 已提交
707 708
    int16_t type = -1;
    int16_t bytes = 0;
H
hjxilinx 已提交
709 710 711 712 713 714 715 716 717

    //    if ((pExpr->functionId >= TSDB_FUNC_FIRST_DST && pExpr->functionId <= TSDB_FUNC_LAST_DST) ||
    //        (pExpr->functionId >= TSDB_FUNC_SUM && pExpr->functionId <= TSDB_FUNC_MAX) ||
    //        pExpr->functionId == TSDB_FUNC_LAST_ROW) {
    // the final result size and type in the same as query on single table.
    // so here, set the flag to be false;

    int32_t functionId = pExpr->functionId;
    if (functionId >= TSDB_FUNC_TS && functionId <= TSDB_FUNC_DIFF) {
H
hjxilinx 已提交
718 719
      type = pModel->pFields[i].field.type;
      bytes = pModel->pFields[i].field.bytes;
H
hjxilinx 已提交
720 721 722 723 724 725 726 727
    } else {
      if (functionId == TSDB_FUNC_FIRST_DST) {
        functionId = TSDB_FUNC_FIRST;
      } else if (functionId == TSDB_FUNC_LAST_DST) {
        functionId = TSDB_FUNC_LAST;
      }

      getResultDataInfo(p1->type, p1->bytes, functionId, 0, &type, &bytes, &inter, 0, false);
H
hjxilinx 已提交
728
    }
H
hzcheng 已提交
729

H
hjxilinx 已提交
730 731 732
    pSchema[i].type = type;
    pSchema[i].bytes = bytes;
    strcpy(pSchema[i].name, pModel->pFields[i].field.name);
H
hzcheng 已提交
733
  }
H
hjxilinx 已提交
734 735
  
  *pFinalModel = createColumnModel(pSchema, size, capacity);
H
hzcheng 已提交
736 737 738 739 740 741 742 743 744 745 746
  tfree(pSchema);

  return TSDB_CODE_SUCCESS;
}

/**
 * @param pMemBuffer
 * @param pDesc
 * @param pFinalModel
 * @param numOfVnodes
 */
H
hjxilinx 已提交
747
void tscLocalReducerEnvDestroy(tExtMemBuffer **pMemBuffer, tOrderDescriptor *pDesc, SColumnModel *pFinalModel,
H
hzcheng 已提交
748
                               int32_t numOfVnodes) {
H
hjxilinx 已提交
749
  destroyColumnModel(pFinalModel);
H
hzcheng 已提交
750 751
  tOrderDescDestroy(pDesc);
  for (int32_t i = 0; i < numOfVnodes; ++i) {
H
hjxilinx 已提交
752
    pMemBuffer[i] = destoryExtMemBuffer(pMemBuffer[i]);
H
hzcheng 已提交
753 754 755 756 757 758 759 760 761 762 763 764
  }

  tfree(pMemBuffer);
}

/**
 *
 * @param pLocalReducer
 * @param pOneInterDataSrc
 * @param treeList
 * @return the number of remain input source. if ret == 0, all data has been handled
 */
S
slguan 已提交
765
int32_t loadNewDataFromDiskFor(SLocalReducer *pLocalReducer, SLocalDataSource *pOneInterDataSrc,
H
hzcheng 已提交
766 767 768 769 770 771 772 773 774 775 776
                               bool *needAdjustLoserTree) {
  pOneInterDataSrc->rowIdx = 0;
  pOneInterDataSrc->pageId += 1;

  if (pOneInterDataSrc->pageId <
      pOneInterDataSrc->pMemBuffer->fileMeta.flushoutData.pFlushoutInfo[pOneInterDataSrc->flushoutIdx].numOfPages) {
    tExtMemBufferLoadData(pOneInterDataSrc->pMemBuffer, &(pOneInterDataSrc->filePage), pOneInterDataSrc->flushoutIdx,
                          pOneInterDataSrc->pageId);

#if defined(_DEBUG_VIEW)
    printf("new page load to buffer\n");
H
hjxilinx 已提交
777
    tColModelDisplay(pOneInterDataSrc->pMemBuffer->pColumnModel, pOneInterDataSrc->filePage.data,
778
                     pOneInterDataSrc->filePage.num, pOneInterDataSrc->pMemBuffer->pColumnModel->capacity);
H
hzcheng 已提交
779 780 781 782 783 784 785 786 787 788 789 790 791
#endif
    *needAdjustLoserTree = true;
  } else {
    pLocalReducer->numOfCompleted += 1;

    pOneInterDataSrc->rowIdx = -1;
    pOneInterDataSrc->pageId = -1;
    *needAdjustLoserTree = true;
  }

  return pLocalReducer->numOfBuffer;
}

S
slguan 已提交
792 793
void adjustLoserTreeFromNewData(SLocalReducer *pLocalReducer, SLocalDataSource *pOneInterDataSrc,
                                SLoserTreeInfo *pTree) {
H
hzcheng 已提交
794 795 796 797 798
  /*
   * load a new data page into memory for intermediate dataset source,
   * since it's last record in buffer has been chosen to be processed, as the winner of loser-tree
   */
  bool needToAdjust = true;
799
  if (pOneInterDataSrc->filePage.num <= pOneInterDataSrc->rowIdx) {
H
hzcheng 已提交
800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824
    loadNewDataFromDiskFor(pLocalReducer, pOneInterDataSrc, &needToAdjust);
  }

  /*
   * adjust loser tree otherwise, according to new candidate data
   * if the loser tree is rebuild completed, we do not need to adjust
   */
  if (needToAdjust) {
    int32_t leafNodeIdx = pTree->pNode[0].index + pLocalReducer->numOfBuffer;

#ifdef _DEBUG_VIEW
    printf("before adjust:\t");
    tLoserTreeDisplay(pTree);
#endif

    tLoserTreeAdjust(pTree, leafNodeIdx);

#ifdef _DEBUG_VIEW
    printf("\nafter adjust:\t");
    tLoserTreeDisplay(pTree);
    printf("\n");
#endif
  }
}

H
Haojun Liao 已提交
825
void savePrevRecordAndSetupInterpoInfo(SLocalReducer *pLocalReducer, SQueryInfo *pQueryInfo, SFillInfo *pFillInfo) {
H
hjxilinx 已提交
826
  // discard following dataset in the same group and reset the interpolation information
H
hjxilinx 已提交
827
  STableMetaInfo *pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0);
828

H
hjxilinx 已提交
829
  STableComInfo tinfo = tscGetTableInfo(pTableMetaInfo->pTableMeta);
830

H
Haojun Liao 已提交
831 832 833 834 835 836 837
  if (pFillInfo != NULL) {
    int64_t stime = (pQueryInfo->window.skey < pQueryInfo->window.ekey) ? pQueryInfo->window.skey : pQueryInfo->window.ekey;
    int64_t revisedSTime =
        taosGetIntervalStartTimestamp(stime, pQueryInfo->slidingTime, pQueryInfo->slidingTimeUnit, tinfo.precision);
  
    taosResetFillInfo(pFillInfo, revisedSTime);
  }
H
hzcheng 已提交
838 839

  pLocalReducer->discard = true;
840
  pLocalReducer->discardData->num = 0;
H
hzcheng 已提交
841

H
hjxilinx 已提交
842
  SColumnModel *pModel = pLocalReducer->pDesc->pColumnModel;
H
hzcheng 已提交
843 844 845 846
  tColModelAppend(pModel, pLocalReducer->discardData, pLocalReducer->prevRowOfInput, 0, 1, 1);
}

// todo merge with following function
H
hjxilinx 已提交
847
// static void reversedCopyResultToDstBuf(SQueryInfo* pQueryInfo, SSqlRes *pRes, tFilePage *pFinalDataPage) {
H
hjxilinx 已提交
848
//
H
hjxilinx 已提交
849
//  for (int32_t i = 0; i < pQueryInfo->exprList.numOfExprs; ++i) {
H
hjxilinx 已提交
850
//    TAOS_FIELD *pField = tscFieldInfoGetField(&pQueryInfo->fieldsInfo, i);
H
hjxilinx 已提交
851 852 853 854 855 856 857 858 859 860 861 862
//
//    int32_t offset = tscFieldInfoGetOffset(pQueryInfo, i);
//    char *  src = pFinalDataPage->data + (pRes->numOfRows - 1) * pField->bytes + pRes->numOfRows * offset;
//    char *  dst = pRes->data + pRes->numOfRows * offset;
//
//    for (int32_t j = 0; j < pRes->numOfRows; ++j) {
//      memcpy(dst, src, (size_t)pField->bytes);
//      dst += pField->bytes;
//      src -= pField->bytes;
//    }
//  }
//}
H
hzcheng 已提交
863

H
hjxilinx 已提交
864 865
static void reversedCopyFromInterpolationToDstBuf(SQueryInfo *pQueryInfo, SSqlRes *pRes, tFilePage **pResPages,
                                                  SLocalReducer *pLocalReducer) {
H
hjxilinx 已提交
866
  assert(0);
H
hjxilinx 已提交
867 868 869 870
  size_t size = tscSqlExprNumOfExprs(pQueryInfo);
  
  for (int32_t i = 0; i < size; ++i) {
    TAOS_FIELD *pField = tscFieldInfoGetField(&pQueryInfo->fieldsInfo, i);
H
hzcheng 已提交
871

872
    int32_t offset = tscFieldInfoGetOffset(pQueryInfo, i);
H
hjxilinx 已提交
873
    assert(offset == getColumnModelOffset(pLocalReducer->resColModel, i));
H
hzcheng 已提交
874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889

    char *src = pResPages[i]->data + (pRes->numOfRows - 1) * pField->bytes;
    char *dst = pRes->data + pRes->numOfRows * offset;

    for (int32_t j = 0; j < pRes->numOfRows; ++j) {
      memcpy(dst, src, (size_t)pField->bytes);
      dst += pField->bytes;
      src -= pField->bytes;
    }
  }
}

/*
 * Note: pRes->pLocalReducer may be null, due to the fact that "tscDestroyLocalReducer" is called
 * by "interuptHandler" function in shell
 */
H
Haojun Liao 已提交
890
static void doFillResult(SSqlObj *pSql, SLocalReducer *pLocalReducer, bool doneOutput) {
H
hjxilinx 已提交
891 892
  SSqlCmd *   pCmd = &pSql->cmd;
  SSqlRes *   pRes = &pSql->res;
893
  
H
hjxilinx 已提交
894 895 896
  tFilePage * pFinalDataPage = pLocalReducer->pResultBuf;
  SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);

H
Haojun Liao 已提交
897 898 899 900 901 902 903 904
//  if (pRes->pLocalReducer != pLocalReducer) {
//    /*
//     * Release the SSqlObj is called, and it is int destroying function invoked by other thread.
//     * However, the other thread will WAIT until current process fully completes.
//     * Since the flag of release struct is set by doLocalReduce function
//     */
//    assert(pRes->pLocalReducer == NULL);
//  }
H
hzcheng 已提交
905

H
Haojun Liao 已提交
906
  // no interval query, no fill operation
907
  if (pQueryInfo->intervalTime == 0 || pQueryInfo->fillType == TSDB_FILL_NONE) {
H
hzcheng 已提交
908
    pRes->data = pLocalReducer->pFinalRes;
909
    pRes->numOfRows = pFinalDataPage->num;
H
Haojun Liao 已提交
910
    pRes->numOfClauseTotal += pRes->numOfRows;
H
hzcheng 已提交
911

912 913
    if (pQueryInfo->limit.offset > 0) {
      if (pQueryInfo->limit.offset < pRes->numOfRows) {
914
        int32_t prevSize = pFinalDataPage->num;
915
        tColModelErase(pLocalReducer->resColModel, pFinalDataPage, prevSize, 0, pQueryInfo->limit.offset - 1);
H
hzcheng 已提交
916 917

        /* remove the hole in column model */
S
slguan 已提交
918
        tColModelCompact(pLocalReducer->resColModel, pFinalDataPage, prevSize);
H
hzcheng 已提交
919

920
        pRes->numOfRows -= pQueryInfo->limit.offset;
H
Haojun Liao 已提交
921
        pRes->numOfClauseTotal -= pQueryInfo->limit.offset;
922
        pQueryInfo->limit.offset = 0;
H
hzcheng 已提交
923
      } else {
924
        pQueryInfo->limit.offset -= pRes->numOfRows;
H
hzcheng 已提交
925
        pRes->numOfRows = 0;
H
Haojun Liao 已提交
926
        pRes->numOfClauseTotal = 0;
H
hzcheng 已提交
927 928 929
      }
    }

H
Haojun Liao 已提交
930
    if (pQueryInfo->limit.limit >= 0 && pRes->numOfClauseTotal > pQueryInfo->limit.limit) {
H
hzcheng 已提交
931
      /* impose the limitation of output rows on the final result */
932
      int32_t prevSize = pFinalDataPage->num;
H
Haojun Liao 已提交
933 934
      int32_t overflow = pRes->numOfClauseTotal - pQueryInfo->limit.limit;
      assert(overflow < pRes->numOfRows);
H
hzcheng 已提交
935

H
Haojun Liao 已提交
936
      pRes->numOfClauseTotal = pQueryInfo->limit.limit;
H
Haojun Liao 已提交
937 938
      pRes->numOfRows -= overflow;
      pFinalDataPage->num -= overflow;
H
hzcheng 已提交
939

S
slguan 已提交
940
      tColModelCompact(pLocalReducer->resColModel, pFinalDataPage, prevSize);
H
hzcheng 已提交
941 942

      /* set remain data to be discarded, and reset the interpolation information */
943
      savePrevRecordAndSetupInterpoInfo(pLocalReducer, pQueryInfo, pLocalReducer->pFillInfo);
H
hzcheng 已提交
944 945
    }

H
Haojun Liao 已提交
946
    memcpy(pRes->data, pFinalDataPage->data, pRes->numOfRows * pLocalReducer->finalRowSize);
947
    pFinalDataPage->num = 0;
H
hzcheng 已提交
948 949 950
    return;
  }

951 952
  SFillInfo *pFillInfo = pLocalReducer->pFillInfo;
  int64_t actualETime = MAX(pQueryInfo->window.skey, pQueryInfo->window.ekey);
H
hzcheng 已提交
953

H
hjxilinx 已提交
954 955 956
  tFilePage **pResPages = malloc(POINTER_BYTES * pQueryInfo->fieldsInfo.numOfOutput);
  for (int32_t i = 0; i < pQueryInfo->fieldsInfo.numOfOutput; ++i) {
    TAOS_FIELD *pField = tscFieldInfoGetField(&pQueryInfo->fieldsInfo, i);
H
hjxilinx 已提交
957
    pResPages[i] = calloc(1, sizeof(tFilePage) + pField->bytes * pLocalReducer->resColModel->capacity);
H
hzcheng 已提交
958
  }
H
hjxilinx 已提交
959
  
H
hzcheng 已提交
960
  while (1) {
961
    int64_t newRows = taosGenerateDataBlock(pFillInfo, pResPages, pLocalReducer->resColModel->capacity);
H
hzcheng 已提交
962

963 964
    if (pQueryInfo->limit.offset < newRows) {
      newRows -= pQueryInfo->limit.offset;
H
hzcheng 已提交
965

966
      if (pQueryInfo->limit.offset > 0) {
H
hjxilinx 已提交
967 968
        for (int32_t i = 0; i < pQueryInfo->fieldsInfo.numOfOutput; ++i) {
          TAOS_FIELD *pField = tscFieldInfoGetField(&pQueryInfo->fieldsInfo, i);
H
hjxilinx 已提交
969 970
          memmove(pResPages[i]->data, pResPages[i]->data + pField->bytes * pQueryInfo->limit.offset,
                  newRows * pField->bytes);
H
hzcheng 已提交
971 972 973 974 975
        }
      }

      pRes->data = pLocalReducer->pFinalRes;
      pRes->numOfRows = newRows;
H
Haojun Liao 已提交
976
      pRes->numOfClauseTotal += newRows;
H
hzcheng 已提交
977

978
      pQueryInfo->limit.offset = 0;
H
hzcheng 已提交
979 980
      break;
    } else {
981
      pQueryInfo->limit.offset -= newRows;
H
hzcheng 已提交
982 983
      pRes->numOfRows = 0;

984
      int32_t rpoints = taosNumOfRemainRows(pFillInfo);
H
hzcheng 已提交
985
      if (rpoints <= 0) {
986
        if (!doneOutput) { // reduce procedure has not completed yet, but current results for fill are exhausted
H
hzcheng 已提交
987 988 989 990
          break;
        }

        /* all output for current group are completed */
H
Haojun Liao 已提交
991
        int32_t totalRemainRows = getFilledNumOfRes(pFillInfo, actualETime, pLocalReducer->resColModel->capacity);
H
hzcheng 已提交
992 993 994 995 996 997 998 999
        if (totalRemainRows <= 0) {
          break;
        }
      }
    }
  }

  if (pRes->numOfRows > 0) {
H
Haojun Liao 已提交
1000
    if (pQueryInfo->limit.limit >= 0 && pRes->numOfClauseTotal > pQueryInfo->limit.limit) {
H
Haojun Liao 已提交
1001 1002
      int32_t overflow = pRes->numOfClauseTotal - pQueryInfo->limit.limit;
      pRes->numOfRows -= overflow;
H
hzcheng 已提交
1003 1004 1005

      assert(pRes->numOfRows >= 0);

H
Haojun Liao 已提交
1006
      pRes->numOfClauseTotal = pQueryInfo->limit.limit;
H
Haojun Liao 已提交
1007
      pFinalDataPage->num -= overflow;
H
hzcheng 已提交
1008 1009

      /* set remain data to be discarded, and reset the interpolation information */
1010
      savePrevRecordAndSetupInterpoInfo(pLocalReducer, pQueryInfo, pFillInfo);
H
hzcheng 已提交
1011 1012
    }

1013
    if (pQueryInfo->order.order == TSDB_ORDER_ASC) {
H
hjxilinx 已提交
1014 1015
      for (int32_t i = 0; i < pQueryInfo->fieldsInfo.numOfOutput; ++i) {
        TAOS_FIELD *pField = tscFieldInfoGetField(&pQueryInfo->fieldsInfo, i);
H
hjxilinx 已提交
1016
        int16_t     offset = getColumnModelOffset(pLocalReducer->resColModel, i);
H
hjxilinx 已提交
1017
        memcpy(pRes->data + offset * pRes->numOfRows, pResPages[i]->data, pField->bytes * pRes->numOfRows);
H
hzcheng 已提交
1018
      }
H
hjxilinx 已提交
1019
    } else {  // todo bug??
1020
      reversedCopyFromInterpolationToDstBuf(pQueryInfo, pRes, pResPages, pLocalReducer);
H
hzcheng 已提交
1021 1022 1023
    }
  }

1024
  pFinalDataPage->num = 0;
H
hjxilinx 已提交
1025
  for (int32_t i = 0; i < pQueryInfo->fieldsInfo.numOfOutput; ++i) {
H
hzcheng 已提交
1026 1027
    tfree(pResPages[i]);
  }
1028
  
H
hzcheng 已提交
1029 1030 1031
  tfree(pResPages);
}

S
slguan 已提交
1032
static void savePreviousRow(SLocalReducer *pLocalReducer, tFilePage *tmpBuffer) {
H
hjxilinx 已提交
1033
  SColumnModel *pColumnModel = pLocalReducer->pDesc->pColumnModel;
1034
  assert(pColumnModel->capacity == 1 && tmpBuffer->num == 1);
H
hzcheng 已提交
1035 1036

  // copy to previous temp buffer
H
hjxilinx 已提交
1037
  for (int32_t i = 0; i < pColumnModel->numOfCols; ++i) {
H
hjxilinx 已提交
1038 1039 1040
    SSchema *pSchema = getColumnModelSchema(pColumnModel, i);
    int16_t  offset = getColumnModelOffset(pColumnModel, i);

H
hjxilinx 已提交
1041
    memcpy(pLocalReducer->prevRowOfInput + offset, tmpBuffer->data + offset, pSchema->bytes);
H
hzcheng 已提交
1042 1043
  }

1044
  tmpBuffer->num = 0;
H
hzcheng 已提交
1045 1046 1047
  pLocalReducer->hasPrevRow = true;
}

H
hjxilinx 已提交
1048
static void doExecuteSecondaryMerge(SSqlCmd *pCmd, SLocalReducer *pLocalReducer, bool needInit) {
S
slguan 已提交
1049
  // the tag columns need to be set before all functions execution
H
hjxilinx 已提交
1050
  SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);
1051

H
hjxilinx 已提交
1052 1053
  size_t size = tscSqlExprNumOfExprs(pQueryInfo);
  for (int32_t j = 0; j < size; ++j) {
S
slguan 已提交
1054
    SQLFunctionCtx *pCtx = &pLocalReducer->pCtx[j];
H
hzcheng 已提交
1055

S
slguan 已提交
1056
    // tags/tags_dummy function, the tag field of SQLFunctionCtx is from the input buffer
H
Haojun Liao 已提交
1057
    int32_t functionId = pCtx->functionId;
H
hjxilinx 已提交
1058
    if (functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TS_DUMMY) {
S
slguan 已提交
1059
      tVariantDestroy(&pCtx->tag);
1060 1061 1062 1063 1064 1065 1066 1067
      char* input = pCtx->aInputElemBuf;
      
      if (pCtx->inputType == TSDB_DATA_TYPE_BINARY || pCtx->inputType == TSDB_DATA_TYPE_NCHAR) {
        assert(varDataLen(input) <= pCtx->inputBytes);
        tVariantCreateFromBinary(&pCtx->tag, varDataVal(input), varDataLen(input), pCtx->inputType);
      } else {
        tVariantCreateFromBinary(&pCtx->tag, input, pCtx->inputBytes, pCtx->inputType);
      }
H
Haojun Liao 已提交
1068 1069 1070
    } else if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      SSqlExpr *pExpr = tscSqlExprGet(pQueryInfo, j);
      pCtx->param[0].i64Key = pExpr->param[0].i64Key;
H
hzcheng 已提交
1071 1072
    }

S
slguan 已提交
1073 1074 1075
    pCtx->currentStage = SECONDARY_STAGE_MERGE;

    if (needInit) {
H
Haojun Liao 已提交
1076
      aAggs[pCtx->functionId].init(pCtx);
S
slguan 已提交
1077 1078 1079
    }
  }

H
hjxilinx 已提交
1080
  for (int32_t j = 0; j < size; ++j) {
H
Haojun Liao 已提交
1081
    int32_t functionId = pLocalReducer->pCtx[j].functionId;
S
slguan 已提交
1082 1083 1084 1085 1086 1087 1088
    if (functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }

    aAggs[functionId].distSecondaryMergeFunc(&pLocalReducer->pCtx[j]);
  }
}
H
hzcheng 已提交
1089

H
hjxilinx 已提交
1090
static void handleUnprocessedRow(SSqlCmd *pCmd, SLocalReducer *pLocalReducer, tFilePage *tmpBuffer) {
S
slguan 已提交
1091 1092 1093
  if (pLocalReducer->hasUnprocessedRow) {
    pLocalReducer->hasUnprocessedRow = false;
    doExecuteSecondaryMerge(pCmd, pLocalReducer, true);
S
slguan 已提交
1094
    savePreviousRow(pLocalReducer, tmpBuffer);
H
hzcheng 已提交
1095 1096 1097
  }
}

1098
static int64_t getNumOfResultLocal(SQueryInfo *pQueryInfo, SQLFunctionCtx *pCtx) {
H
hzcheng 已提交
1099
  int64_t maxOutput = 0;
H
hjxilinx 已提交
1100 1101 1102
  
  size_t size = tscSqlExprNumOfExprs(pQueryInfo);
  for (int32_t j = 0; j < size; ++j) {
H
hzcheng 已提交
1103 1104 1105 1106
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
H
Haojun Liao 已提交
1107
    int32_t functionId = pCtx[j].functionId;
H
hzcheng 已提交
1108 1109 1110
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
      continue;
    }
H
hjxilinx 已提交
1111

H
Haojun Liao 已提交
1112 1113 1114
    SResultInfo* pResInfo = GET_RES_INFO(&pCtx[j]);
    if (maxOutput < pResInfo->numOfRes) {
      maxOutput = pResInfo->numOfRes;
H
hzcheng 已提交
1115 1116
    }
  }
H
hjxilinx 已提交
1117

H
hzcheng 已提交
1118 1119 1120 1121
  return maxOutput;
}

/*
S
slguan 已提交
1122
 * in handling the top/bottom query, which produce more than one rows result,
H
hzcheng 已提交
1123 1124
 * the tsdb_func_tags only fill the first row of results, the remain rows need to
 * filled with the same result, which is the tags, specified in group by clause
S
slguan 已提交
1125
 *
H
hzcheng 已提交
1126
 */
H
hjxilinx 已提交
1127
static void fillMultiRowsOfTagsVal(SQueryInfo *pQueryInfo, int32_t numOfRes, SLocalReducer *pLocalReducer) {
S
slguan 已提交
1128
  int32_t maxBufSize = 0;  // find the max tags column length to prepare the buffer
H
hjxilinx 已提交
1129 1130 1131
  size_t size = tscSqlExprNumOfExprs(pQueryInfo);
  
  for (int32_t k = 0; k < size; ++k) {
1132
    SSqlExpr *pExpr = tscSqlExprGet(pQueryInfo, k);
S
slguan 已提交
1133
    if (maxBufSize < pExpr->resBytes && pExpr->functionId == TSDB_FUNC_TAG) {
H
hzcheng 已提交
1134 1135 1136 1137 1138 1139
      maxBufSize = pExpr->resBytes;
    }
  }

  assert(maxBufSize >= 0);

H
hjxilinx 已提交
1140
  char *buf = malloc((size_t)maxBufSize);
H
hjxilinx 已提交
1141
  for (int32_t k = 0; k < size; ++k) {
H
Haojun Liao 已提交
1142 1143
    SQLFunctionCtx *pCtx = &pLocalReducer->pCtx[k];
    if (pCtx->functionId != TSDB_FUNC_TAG) {
S
slguan 已提交
1144 1145 1146
      continue;
    }

H
hzcheng 已提交
1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159
    int32_t inc = numOfRes - 1;  // tsdb_func_tag function only produce one row of result
    memset(buf, 0, (size_t)maxBufSize);
    memcpy(buf, pCtx->aOutputBuf, (size_t)pCtx->outputBytes);

    for (int32_t i = 0; i < inc; ++i) {
      pCtx->aOutputBuf += pCtx->outputBytes;
      memcpy(pCtx->aOutputBuf, buf, (size_t)pCtx->outputBytes);
    }
  }

  free(buf);
}

H
hjxilinx 已提交
1160
int32_t finalizeRes(SQueryInfo *pQueryInfo, SLocalReducer *pLocalReducer) {
H
hjxilinx 已提交
1161 1162 1163
  size_t size = tscSqlExprNumOfExprs(pQueryInfo);
  
  for (int32_t k = 0; k < size; ++k) {
H
Haojun Liao 已提交
1164 1165
    SQLFunctionCtx* pCtx = &pLocalReducer->pCtx[k];
    aAggs[pCtx->functionId].xFinalize(pCtx);
H
hzcheng 已提交
1166 1167 1168 1169
  }

  pLocalReducer->hasPrevRow = false;

1170
  int32_t numOfRes = (int32_t)getNumOfResultLocal(pQueryInfo, pLocalReducer->pCtx);
1171
  pLocalReducer->pResultBuf->num += numOfRes;
H
hzcheng 已提交
1172

1173
  fillMultiRowsOfTagsVal(pQueryInfo, numOfRes, pLocalReducer);
H
hzcheng 已提交
1174 1175 1176 1177 1178 1179 1180 1181 1182 1183
  return numOfRes;
}

/*
 * points merge:
 * points are merged according to the sort info, which is tags columns and timestamp column.
 * In case of points without either tags columns or timestamp, such as
 * results generated by simple aggregation function, we merge them all into one points
 * *Exception*: column projection query, required no merge procedure
 */
H
hjxilinx 已提交
1184
bool needToMerge(SQueryInfo *pQueryInfo, SLocalReducer *pLocalReducer, tFilePage *tmpBuffer) {
H
hzcheng 已提交
1185
  int32_t ret = 0;  // merge all result by default
1186

H
Haojun Liao 已提交
1187
  int16_t functionId = pLocalReducer->pCtx[0].functionId;
1188 1189 1190

  // todo opt performance
  if ((/*functionId == TSDB_FUNC_PRJ || */functionId == TSDB_FUNC_ARITHM) || (tscIsProjectionQueryOnSTable(pQueryInfo, 0))) {  // column projection query
H
hzcheng 已提交
1191 1192 1193
    ret = 1;                                                            // disable merge procedure
  } else {
    tOrderDescriptor *pDesc = pLocalReducer->pDesc;
H
Haojun Liao 已提交
1194
    if (pDesc->orderInfo.numOfCols > 0) {
1195
      if (pDesc->tsOrder == TSDB_ORDER_ASC) {  // asc
H
hzcheng 已提交
1196
        // todo refactor comparator
S
slguan 已提交
1197
        ret = compare_a(pLocalReducer->pDesc, 1, 0, pLocalReducer->prevRowOfInput, 1, 0, tmpBuffer->data);
H
hzcheng 已提交
1198
      } else {  // desc
S
slguan 已提交
1199
        ret = compare_d(pLocalReducer->pDesc, 1, 0, pLocalReducer->prevRowOfInput, 1, 0, tmpBuffer->data);
H
hzcheng 已提交
1200 1201 1202 1203 1204 1205 1206 1207
      }
    }
  }

  /* if ret == 0, means the result belongs to the same group */
  return (ret == 0);
}

H
hjxilinx 已提交
1208
static bool reachGroupResultLimit(SQueryInfo *pQueryInfo, SSqlRes *pRes) {
1209
  return (pRes->numOfGroups >= pQueryInfo->slimit.limit && pQueryInfo->slimit.limit >= 0);
S
slguan 已提交
1210 1211 1212 1213 1214 1215
}

static bool saveGroupResultInfo(SSqlObj *pSql) {
  SSqlCmd *pCmd = &pSql->cmd;
  SSqlRes *pRes = &pSql->res;

H
hjxilinx 已提交
1216
  SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);
S
slguan 已提交
1217 1218
  pRes->numOfGroups += 1;

S
slguan 已提交
1219
  // the output group is limited by the slimit clause
1220
  if (reachGroupResultLimit(pQueryInfo, pRes)) {
S
slguan 已提交
1221 1222 1223 1224
    return true;
  }

  //    pRes->pGroupRec = realloc(pRes->pGroupRec, pRes->numOfGroups*sizeof(SResRec));
H
hzcheng 已提交
1225
  //    pRes->pGroupRec[pRes->numOfGroups-1].numOfRows = pRes->numOfRows;
H
Haojun Liao 已提交
1226
  //    pRes->pGroupRec[pRes->numOfGroups-1].numOfClauseTotal = pRes->numOfClauseTotal;
S
slguan 已提交
1227 1228

  return false;
H
hzcheng 已提交
1229 1230
}

S
slguan 已提交
1231 1232 1233 1234 1235 1236 1237 1238
/**
 *
 * @param pSql
 * @param pLocalReducer
 * @param noMoreCurrentGroupRes
 * @return if current group is skipped, return false, and do NOT record it into pRes->numOfGroups
 */
bool doGenerateFinalResults(SSqlObj *pSql, SLocalReducer *pLocalReducer, bool noMoreCurrentGroupRes) {
H
hjxilinx 已提交
1239 1240 1241 1242 1243
  SSqlCmd *pCmd = &pSql->cmd;
  SSqlRes *pRes = &pSql->res;

  SQueryInfo *  pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);
  tFilePage *   pResBuf = pLocalReducer->pResultBuf;
H
hjxilinx 已提交
1244
  SColumnModel *pModel = pLocalReducer->resColModel;
H
hzcheng 已提交
1245

S
slguan 已提交
1246 1247 1248
  pRes->code = TSDB_CODE_SUCCESS;

  /*
1249
   * Ignore the output of the current group since this group is skipped by user
S
slguan 已提交
1250 1251
   * We set the numOfRows to be 0 and discard the possible remain results.
   */
1252
  if (pQueryInfo->slimit.offset > 0) {
S
slguan 已提交
1253
    pRes->numOfRows = 0;
1254
    pQueryInfo->slimit.offset -= 1;
S
slguan 已提交
1255
    pLocalReducer->discard = !noMoreCurrentGroupRes;
1256
    
S
slguan 已提交
1257 1258 1259
    return false;
  }

H
hjxilinx 已提交
1260
  tColModelCompact(pModel, pResBuf, pModel->capacity);
H
hzcheng 已提交
1261 1262 1263

#ifdef _DEBUG_VIEW
  printf("final result before interpo:\n");
1264
//  tColModelDisplay(pLocalReducer->resColModel, pLocalReducer->pBufForInterpo, pResBuf->num, pResBuf->num);
H
hzcheng 已提交
1265
#endif
1266 1267
  
  SFillInfo* pFillInfo = pLocalReducer->pFillInfo;
H
Haojun Liao 已提交
1268
  if (pFillInfo != NULL) {
H
Haojun Liao 已提交
1269
    taosFillSetStartInfo(pFillInfo, pResBuf->num, pQueryInfo->window.ekey);
H
Haojun Liao 已提交
1270 1271
    taosFillCopyInputDataFromOneFilePage(pFillInfo, pResBuf);
  }
1272
  
H
Haojun Liao 已提交
1273
  doFillResult(pSql, pLocalReducer, noMoreCurrentGroupRes);
S
slguan 已提交
1274
  return true;
H
hzcheng 已提交
1275 1276
}

H
hjxilinx 已提交
1277
void resetOutputBuf(SQueryInfo *pQueryInfo, SLocalReducer *pLocalReducer) {  // reset output buffer to the beginning
H
hjxilinx 已提交
1278
  for (int32_t i = 0; i < pQueryInfo->fieldsInfo.numOfOutput; ++i) {
H
hzcheng 已提交
1279
    pLocalReducer->pCtx[i].aOutputBuf =
H
hjxilinx 已提交
1280
        pLocalReducer->pResultBuf->data + tscFieldInfoGetOffset(pQueryInfo, i) * pLocalReducer->resColModel->capacity;
H
hzcheng 已提交
1281 1282 1283 1284 1285
  }

  memset(pLocalReducer->pResultBuf, 0, pLocalReducer->nResultBufSize + sizeof(tFilePage));
}

S
slguan 已提交
1286
static void resetEnvForNewResultset(SSqlRes *pRes, SSqlCmd *pCmd, SLocalReducer *pLocalReducer) {
S
slguan 已提交
1287
  // In handling data in other groups, we need to reset the interpolation information for a new group data
H
hzcheng 已提交
1288
  pRes->numOfRows = 0;
H
Haojun Liao 已提交
1289
  pRes->numOfClauseTotal = 0;
H
hjxilinx 已提交
1290 1291 1292

  SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);

1293
  pQueryInfo->limit.offset = pLocalReducer->offset;
H
hzcheng 已提交
1294

1295
  STableMetaInfo *pTableMetaInfo = tscGetTableMetaInfoFromCmd(pCmd, pCmd->clauseIndex, 0);
H
hjxilinx 已提交
1296
  STableComInfo tinfo = tscGetTableInfo(pTableMetaInfo->pTableMeta);
H
hjxilinx 已提交
1297 1298
  
  int8_t precision = tinfo.precision;
H
hjxilinx 已提交
1299

S
slguan 已提交
1300
  // for group result interpolation, do not return if not data is generated
1301 1302
  if (pQueryInfo->fillType != TSDB_FILL_NONE) {
    TSKEY skey = MIN(pQueryInfo->window.skey, pQueryInfo->window.ekey);
H
hjxilinx 已提交
1303
    int64_t newTime =
1304 1305 1306 1307
        taosGetIntervalStartTimestamp(skey, pQueryInfo->intervalTime, pQueryInfo->slidingTimeUnit, precision);
//    taosResetFillInfo(pLocalReducer->pFillInfo, pQueryInfo->order.order, newTime,
//                        pQueryInfo->groupbyExpr.numOfGroupCols, 4096, 0, NULL, pLocalReducer->rowSize);
    taosResetFillInfo(pLocalReducer->pFillInfo, newTime);
H
hzcheng 已提交
1308 1309 1310
  }
}

S
slguan 已提交
1311 1312 1313 1314
static bool isAllSourcesCompleted(SLocalReducer *pLocalReducer) {
  return (pLocalReducer->numOfBuffer == pLocalReducer->numOfCompleted);
}

1315
static bool doBuildFilledResultForGroup(SSqlObj *pSql) {
H
hzcheng 已提交
1316 1317 1318
  SSqlCmd *pCmd = &pSql->cmd;
  SSqlRes *pRes = &pSql->res;

1319 1320 1321
  SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);
  SLocalReducer *pLocalReducer = pRes->pLocalReducer;
  SFillInfo *pFillInfo = pLocalReducer->pFillInfo;
S
slguan 已提交
1322

1323 1324
  if (pFillInfo != NULL && taosNumOfRemainRows(pFillInfo) > 0) {
    assert(pQueryInfo->fillType != TSDB_FILL_NONE);
H
hzcheng 已提交
1325

S
slguan 已提交
1326
    tFilePage *pFinalDataBuf = pLocalReducer->pResultBuf;
H
Haojun Liao 已提交
1327
    int64_t etime = *(int64_t *)(pFinalDataBuf->data + TSDB_KEYSIZE * (pFillInfo->numOfRows - 1));
H
hzcheng 已提交
1328

1329
    // the first column must be the timestamp column
H
Haojun Liao 已提交
1330 1331
    int32_t rows = getFilledNumOfRes(pFillInfo, etime, pLocalReducer->resColModel->capacity);
    if (rows > 0) {  // do fill gap
H
Haojun Liao 已提交
1332
      doFillResult(pSql, pLocalReducer, false);
S
slguan 已提交
1333
    }
H
hzcheng 已提交
1334

S
slguan 已提交
1335 1336 1337
    return true;
  } else {
    return false;
H
hzcheng 已提交
1338
  }
S
slguan 已提交
1339
}
H
hzcheng 已提交
1340

S
slguan 已提交
1341 1342 1343 1344
static bool doHandleLastRemainData(SSqlObj *pSql) {
  SSqlCmd *pCmd = &pSql->cmd;
  SSqlRes *pRes = &pSql->res;

1345 1346
  SLocalReducer *pLocalReducer = pRes->pLocalReducer;
  SFillInfo     *pFillInfo = pLocalReducer->pFillInfo;
H
hzcheng 已提交
1347

S
slguan 已提交
1348
  bool prevGroupCompleted = (!pLocalReducer->discard) && pLocalReducer->hasUnprocessedRow;
S
slguan 已提交
1349

H
Haojun Liao 已提交
1350
  SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);
H
hzcheng 已提交
1351

S
slguan 已提交
1352 1353
  if ((isAllSourcesCompleted(pLocalReducer) && !pLocalReducer->hasPrevRow) || pLocalReducer->pLocalDataSrc[0] == NULL ||
      prevGroupCompleted) {
1354 1355
    // if fillType == TSDB_FILL_NONE, return directly
    if (pQueryInfo->fillType != TSDB_FILL_NONE) {
H
hjxilinx 已提交
1356
      int64_t etime = (pQueryInfo->window.skey < pQueryInfo->window.ekey) ? pQueryInfo->window.ekey : pQueryInfo->window.skey;
H
hzcheng 已提交
1357

H
Haojun Liao 已提交
1358 1359
      assert(pFillInfo->numOfRows == 0);
      int32_t rows = getFilledNumOfRes(pFillInfo, etime, pLocalReducer->resColModel->capacity);
H
hzcheng 已提交
1360
      if (rows > 0) {  // do interpo
H
Haojun Liao 已提交
1361
        doFillResult(pSql, pLocalReducer, true);
H
hzcheng 已提交
1362 1363 1364
      }
    }

S
slguan 已提交
1365 1366 1367 1368 1369 1370 1371 1372
    /*
     * 1. numOfRows == 0, means no interpolation results are generated.
     * 2. if all local data sources are consumed, and no un-processed rows exist.
     *
     * No results will be generated and query completed.
     */
    if (pRes->numOfRows > 0 || (isAllSourcesCompleted(pLocalReducer) && (!pLocalReducer->hasUnprocessedRow))) {
      return true;
H
hzcheng 已提交
1373
    }
S
slguan 已提交
1374 1375 1376 1377 1378 1379 1380

    // start to process result for a new group and save the result info of previous group
    if (saveGroupResultInfo(pSql)) {
      return true;
    }

    resetEnvForNewResultset(pRes, pCmd, pLocalReducer);
H
hzcheng 已提交
1381 1382
  }

S
slguan 已提交
1383 1384
  return false;
}
H
hzcheng 已提交
1385

H
hjxilinx 已提交
1386 1387 1388 1389
static void doProcessResultInNextWindow(SSqlObj *pSql, int32_t numOfRes) {
  SSqlCmd *pCmd = &pSql->cmd;
  SSqlRes *pRes = &pSql->res;

S
slguan 已提交
1390
  SLocalReducer *pLocalReducer = pRes->pLocalReducer;
H
hjxilinx 已提交
1391
  SQueryInfo *   pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);
H
hjxilinx 已提交
1392
  size_t size = tscSqlExprNumOfExprs(pQueryInfo);
H
hjxilinx 已提交
1393

H
hjxilinx 已提交
1394
  for (int32_t k = 0; k < size; ++k) {
S
slguan 已提交
1395 1396
    SQLFunctionCtx *pCtx = &pLocalReducer->pCtx[k];
    pCtx->aOutputBuf += pCtx->outputBytes * numOfRes;
S
slguan 已提交
1397 1398

    // set the correct output timestamp column position
H
Haojun Liao 已提交
1399
    if (pCtx->functionId == TSDB_FUNC_TOP || pCtx->functionId == TSDB_FUNC_BOTTOM) {
S
slguan 已提交
1400
      pCtx->ptsOutputBuf = ((char *)pCtx->ptsOutputBuf + TSDB_KEYSIZE * numOfRes);
H
hzcheng 已提交
1401
    }
S
slguan 已提交
1402 1403
  }

S
slguan 已提交
1404
  doExecuteSecondaryMerge(pCmd, pLocalReducer, true);
S
slguan 已提交
1405 1406
}

1407
int32_t tscDoLocalMerge(SSqlObj *pSql) {
S
slguan 已提交
1408 1409
  SSqlCmd *pCmd = &pSql->cmd;
  SSqlRes *pRes = &pSql->res;
H
hjxilinx 已提交
1410

H
hjxilinx 已提交
1411
  tscResetForNextRetrieve(pRes);
H
hjxilinx 已提交
1412

S
slguan 已提交
1413
  if (pSql->signature != pSql || pRes == NULL || pRes->pLocalReducer == NULL) {  // all data has been processed
1414
    tscDebug("%p %s call the drop local reducer", pSql, __FUNCTION__);
S
slguan 已提交
1415
    tscDestroyLocalReducer(pSql);
H
hzcheng 已提交
1416 1417
    return 0;
  }
H
hjxilinx 已提交
1418

S
slguan 已提交
1419
  SLocalReducer *pLocalReducer = pRes->pLocalReducer;
H
hjxilinx 已提交
1420 1421
  SQueryInfo *   pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);

S
slguan 已提交
1422 1423
  // set the data merge in progress
  int32_t prevStatus =
weixin_48148422's avatar
weixin_48148422 已提交
1424
      atomic_val_compare_exchange_32(&pLocalReducer->status, TSC_LOCALREDUCE_READY, TSC_LOCALREDUCE_IN_PROGRESS);
H
hjxilinx 已提交
1425
  if (prevStatus != TSC_LOCALREDUCE_READY) {
H
hjxilinx 已提交
1426
    assert(prevStatus == TSC_LOCALREDUCE_TOBE_FREED);  // it is in tscDestroyLocalReducer function already
S
slguan 已提交
1427 1428 1429 1430 1431 1432 1433 1434 1435 1436
    return TSDB_CODE_SUCCESS;
  }

  tFilePage *tmpBuffer = pLocalReducer->pTempBuffer;

  if (doHandleLastRemainData(pSql)) {
    pLocalReducer->status = TSC_LOCALREDUCE_READY;  // set the flag, taos_free_result can release this result.
    return TSDB_CODE_SUCCESS;
  }

1437
  if (doBuildFilledResultForGroup(pSql)) {
S
slguan 已提交
1438 1439 1440 1441
    pLocalReducer->status = TSC_LOCALREDUCE_READY;  // set the flag, taos_free_result can release this result.
    return TSDB_CODE_SUCCESS;
  }

H
hzcheng 已提交
1442 1443 1444
  SLoserTreeInfo *pTree = pLocalReducer->pLoserTree;

  // clear buffer
S
slguan 已提交
1445
  handleUnprocessedRow(pCmd, pLocalReducer, tmpBuffer);
H
hjxilinx 已提交
1446
  SColumnModel *pModel = pLocalReducer->pDesc->pColumnModel;
H
hzcheng 已提交
1447 1448

  while (1) {
S
slguan 已提交
1449
    if (isAllSourcesCompleted(pLocalReducer)) {
H
hzcheng 已提交
1450 1451 1452 1453 1454 1455
      break;
    }

#ifdef _DEBUG_VIEW
    printf("chosen data in pTree[0] = %d\n", pTree->pNode[0].index);
#endif
1456
    assert((pTree->pNode[0].index < pLocalReducer->numOfBuffer) && (pTree->pNode[0].index >= 0) && tmpBuffer->num == 0);
H
hzcheng 已提交
1457 1458

    // chosen from loser tree
S
slguan 已提交
1459
    SLocalDataSource *pOneDataSrc = pLocalReducer->pLocalDataSrc[pTree->pNode[0].index];
H
hzcheng 已提交
1460

S
slguan 已提交
1461
    tColModelAppend(pModel, tmpBuffer, pOneDataSrc->filePage.data, pOneDataSrc->rowIdx, 1,
H
hjxilinx 已提交
1462
                    pOneDataSrc->pMemBuffer->pColumnModel->capacity);
H
hzcheng 已提交
1463 1464 1465 1466

#if defined(_DEBUG_VIEW)
    printf("chosen row:\t");
    SSrcColumnInfo colInfo[256] = {0};
1467
    tscGetSrcColumnInfo(colInfo, pQueryInfo);
H
hzcheng 已提交
1468

1469
    tColModelDisplayEx(pModel, tmpBuffer->data, tmpBuffer->num, pModel->capacity, colInfo);
H
hzcheng 已提交
1470
#endif
S
slguan 已提交
1471

H
hzcheng 已提交
1472 1473 1474 1475
    if (pLocalReducer->discard) {
      assert(pLocalReducer->hasUnprocessedRow == false);

      /* current record belongs to the same group of previous record, need to discard it */
S
slguan 已提交
1476
      if (isSameGroup(pCmd, pLocalReducer, pLocalReducer->discardData->data, tmpBuffer)) {
1477
        tmpBuffer->num = 0;
H
hzcheng 已提交
1478 1479
        pOneDataSrc->rowIdx += 1;

S
slguan 已提交
1480 1481 1482 1483
        adjustLoserTreeFromNewData(pLocalReducer, pOneDataSrc, pTree);

        // all inputs are exhausted, abort current process
        if (isAllSourcesCompleted(pLocalReducer)) {
H
hzcheng 已提交
1484 1485 1486
          break;
        }

S
slguan 已提交
1487
        // data belongs to the same group needs to be discarded
H
hzcheng 已提交
1488 1489 1490
        continue;
      } else {
        pLocalReducer->discard = false;
1491
        pLocalReducer->discardData->num = 0;
H
hzcheng 已提交
1492

S
slguan 已提交
1493 1494 1495 1496 1497 1498
        if (saveGroupResultInfo(pSql)) {
          pLocalReducer->status = TSC_LOCALREDUCE_READY;
          return TSDB_CODE_SUCCESS;
        }

        resetEnvForNewResultset(pRes, pCmd, pLocalReducer);
H
hzcheng 已提交
1499 1500 1501 1502
      }
    }

    if (pLocalReducer->hasPrevRow) {
1503
      if (needToMerge(pQueryInfo, pLocalReducer, tmpBuffer)) {
S
slguan 已提交
1504
        // belong to the group of the previous row, continue process it
S
slguan 已提交
1505
        doExecuteSecondaryMerge(pCmd, pLocalReducer, false);
H
hzcheng 已提交
1506 1507

        // copy to buffer
S
slguan 已提交
1508 1509 1510 1511 1512 1513
        savePreviousRow(pLocalReducer, tmpBuffer);
      } else {
        /*
         * current row does not belong to the group of previous row.
         * so the processing of previous group is completed.
         */
1514
        int32_t numOfRes = finalizeRes(pQueryInfo, pLocalReducer);
H
hzcheng 已提交
1515

S
slguan 已提交
1516
        bool       sameGroup = isSameGroup(pCmd, pLocalReducer, pLocalReducer->prevRowOfInput, tmpBuffer);
H
hzcheng 已提交
1517 1518 1519
        tFilePage *pResBuf = pLocalReducer->pResultBuf;

        /*
1520
         * if the previous group does NOT generate any result (pResBuf->num == 0),
H
hzcheng 已提交
1521 1522
         * continue to process results instead of return results.
         */
1523
        if ((!sameGroup && pResBuf->num > 0) || (pResBuf->num == pLocalReducer->resColModel->capacity)) {
H
hzcheng 已提交
1524
          // does not belong to the same group
S
slguan 已提交
1525
          bool notSkipped = doGenerateFinalResults(pSql, pLocalReducer, !sameGroup);
H
hzcheng 已提交
1526

S
slguan 已提交
1527
          // this row needs to discard, since it belongs to the group of previous
H
hzcheng 已提交
1528 1529
          if (pLocalReducer->discard && sameGroup) {
            pLocalReducer->hasUnprocessedRow = false;
1530
            tmpBuffer->num = 0;
H
hzcheng 已提交
1531
          } else {
S
slguan 已提交
1532
            // current row does not belongs to the previous group, so it is not be handled yet.
H
hzcheng 已提交
1533 1534 1535
            pLocalReducer->hasUnprocessedRow = true;
          }

1536
          resetOutputBuf(pQueryInfo, pLocalReducer);
H
hzcheng 已提交
1537 1538
          pOneDataSrc->rowIdx += 1;

S
slguan 已提交
1539 1540
          // here we do not check the return value
          adjustLoserTreeFromNewData(pLocalReducer, pOneDataSrc, pTree);
H
hzcheng 已提交
1541 1542 1543
          assert(pLocalReducer->status == TSC_LOCALREDUCE_IN_PROGRESS);

          if (pRes->numOfRows == 0) {
S
slguan 已提交
1544
            handleUnprocessedRow(pCmd, pLocalReducer, tmpBuffer);
H
hzcheng 已提交
1545 1546

            if (!sameGroup) {
S
slguan 已提交
1547 1548 1549 1550 1551 1552 1553 1554 1555 1556
              /*
               * previous group is done, prepare for the next group
               * If previous group is not skipped, keep it in pRes->numOfGroups
               */
              if (notSkipped && saveGroupResultInfo(pSql)) {
                pLocalReducer->status = TSC_LOCALREDUCE_READY;
                return TSDB_CODE_SUCCESS;
              }

              resetEnvForNewResultset(pRes, pCmd, pLocalReducer);
H
hzcheng 已提交
1557 1558 1559 1560 1561 1562 1563
            }
          } else {
            /*
             * if next record belongs to a new group, we do not handle this record here.
             * We start the process in a new round.
             */
            if (sameGroup) {
S
slguan 已提交
1564
              handleUnprocessedRow(pCmd, pLocalReducer, tmpBuffer);
H
hzcheng 已提交
1565 1566 1567
            }
          }

S
slguan 已提交
1568 1569 1570 1571 1572 1573
          // current group has no result,
          if (pRes->numOfRows == 0) {
            continue;
          } else {
            pLocalReducer->status = TSC_LOCALREDUCE_READY;  // set the flag, taos_free_result can release this result.
            return TSDB_CODE_SUCCESS;
H
hzcheng 已提交
1574
          }
S
slguan 已提交
1575
        } else {  // result buffer is not full
H
hjxilinx 已提交
1576
          doProcessResultInNextWindow(pSql, numOfRes);
S
slguan 已提交
1577
          savePreviousRow(pLocalReducer, tmpBuffer);
H
hzcheng 已提交
1578 1579
        }
      }
S
slguan 已提交
1580
    } else {
S
slguan 已提交
1581
      doExecuteSecondaryMerge(pCmd, pLocalReducer, true);
S
slguan 已提交
1582
      savePreviousRow(pLocalReducer, tmpBuffer);  // copy the processed row to buffer
H
hzcheng 已提交
1583 1584 1585
    }

    pOneDataSrc->rowIdx += 1;
S
slguan 已提交
1586
    adjustLoserTreeFromNewData(pLocalReducer, pOneDataSrc, pTree);
H
hzcheng 已提交
1587 1588 1589
  }

  if (pLocalReducer->hasPrevRow) {
1590
    finalizeRes(pQueryInfo, pLocalReducer);
H
hzcheng 已提交
1591 1592
  }

1593
  if (pLocalReducer->pResultBuf->num) {
H
hzcheng 已提交
1594 1595 1596 1597
    doGenerateFinalResults(pSql, pLocalReducer, true);
  }

  assert(pLocalReducer->status == TSC_LOCALREDUCE_IN_PROGRESS && pRes->row == 0);
S
slguan 已提交
1598
  pLocalReducer->status = TSC_LOCALREDUCE_READY;  // set the flag, taos_free_result can release this result.
H
hzcheng 已提交
1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616

  return TSDB_CODE_SUCCESS;
}

void tscInitResObjForLocalQuery(SSqlObj *pObj, int32_t numOfRes, int32_t rowLen) {
  SSqlRes *pRes = &pObj->res;
  if (pRes->pLocalReducer != NULL) {
    tscDestroyLocalReducer(pObj);
  }

  pRes->qhandle = 1;  // hack to pass the safety check in fetch_row function
  pRes->numOfRows = 0;
  pRes->row = 0;

  pRes->rspType = 0;  // used as a flag to denote if taos_retrieved() has been called yet
  pRes->pLocalReducer = (SLocalReducer *)calloc(1, sizeof(SLocalReducer));

  /*
S
slguan 已提交
1617 1618
   * we need one additional byte space
   * the sprintf function needs one additional space to put '\0' at the end of string
H
hzcheng 已提交
1619 1620 1621 1622
   */
  size_t allocSize = numOfRes * rowLen + sizeof(tFilePage) + 1;
  pRes->pLocalReducer->pResultBuf = (tFilePage *)calloc(1, allocSize);

1623
  pRes->pLocalReducer->pResultBuf->num = numOfRes;
H
hzcheng 已提交
1624 1625
  pRes->data = pRes->pLocalReducer->pResultBuf->data;
}