tscLocalMerge.c 56.3 KB
Newer Older
H
hzcheng 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

16
#include "os.h"
S
slguan 已提交
17
#include "tlosertree.h"
H
hzcheng 已提交
18
#include "tscUtil.h"
H
hjxilinx 已提交
19
#include "tschemautil.h"
S
slguan 已提交
20
#include "tsclient.h"
H
hzcheng 已提交
21
#include "tutil.h"
S
slguan 已提交
22
#include "tscLog.h"
23
#include "tscLocalMerge.h"
H
hzcheng 已提交
24 25

typedef struct SCompareParam {
S
slguan 已提交
26 27
  SLocalDataSource **pLocalData;
  tOrderDescriptor * pDesc;
28
  int32_t            num;
S
slguan 已提交
29
  int32_t            groupOrderType;
H
hzcheng 已提交
30 31 32 33 34 35
} SCompareParam;

int32_t treeComparator(const void *pLeft, const void *pRight, void *param) {
  int32_t pLeftIdx = *(int32_t *)pLeft;
  int32_t pRightIdx = *(int32_t *)pRight;

S
slguan 已提交
36 37 38
  SCompareParam *    pParam = (SCompareParam *)param;
  tOrderDescriptor * pDesc = pParam->pDesc;
  SLocalDataSource **pLocalData = pParam->pLocalData;
H
hzcheng 已提交
39 40 41 42 43 44 45 46 47 48

  /* this input is exhausted, set the special value to denote this */
  if (pLocalData[pLeftIdx]->rowIdx == -1) {
    return 1;
  }

  if (pLocalData[pRightIdx]->rowIdx == -1) {
    return -1;
  }

49
  if (pParam->groupOrderType == TSDB_ORDER_DESC) {  // desc
50 51
    return compare_d(pDesc, pParam->num, pLocalData[pLeftIdx]->rowIdx, pLocalData[pLeftIdx]->filePage.data,
                     pParam->num, pLocalData[pRightIdx]->rowIdx, pLocalData[pRightIdx]->filePage.data);
H
hzcheng 已提交
52
  } else {
53 54
    return compare_a(pDesc, pParam->num, pLocalData[pLeftIdx]->rowIdx, pLocalData[pLeftIdx]->filePage.data,
                     pParam->num, pLocalData[pRightIdx]->rowIdx, pLocalData[pRightIdx]->filePage.data);
H
hzcheng 已提交
55 56 57
  }
}

H
hjLiao 已提交
58
static void tscInitSqlContext(SSqlCmd *pCmd, SLocalReducer *pReducer, tOrderDescriptor *pDesc) {
H
hzcheng 已提交
59 60
  /*
   * the fields and offset attributes in pCmd and pModel may be different due to
S
slguan 已提交
61
   * merge requirement. So, the final result in pRes structure is formatted in accordance with the pCmd object.
H
hzcheng 已提交
62
   */
H
hjxilinx 已提交
63
  SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);
H
hjxilinx 已提交
64 65 66
  size_t size = tscSqlExprNumOfExprs(pQueryInfo);
  
  for (int32_t i = 0; i < size; ++i) {
H
hzcheng 已提交
67
    SQLFunctionCtx *pCtx = &pReducer->pCtx[i];
H
hjxilinx 已提交
68 69 70 71
    SSqlExpr *      pExpr = tscSqlExprGet(pQueryInfo, i);

    pCtx->aOutputBuf =
        pReducer->pResultBuf->data + tscFieldInfoGetOffset(pQueryInfo, i) * pReducer->resColModel->capacity;
72
    pCtx->order = pQueryInfo->order.order;
H
hjxilinx 已提交
73
    pCtx->functionId = pExpr->functionId;
S
slguan 已提交
74 75

    // input buffer hold only one point data
H
hjxilinx 已提交
76 77 78
    int16_t  offset = getColumnModelOffset(pDesc->pColumnModel, i);
    SSchema *pSchema = getColumnModelSchema(pDesc->pColumnModel, i);

H
hjxilinx 已提交
79
    pCtx->aInputElemBuf = pReducer->pTempBuffer->data + offset;
H
hzcheng 已提交
80 81

    // input data format comes from pModel
H
hjxilinx 已提交
82 83
    pCtx->inputType = pSchema->type;
    pCtx->inputBytes = pSchema->bytes;
H
hzcheng 已提交
84 85

    // output data format yet comes from pCmd.
H
hjxilinx 已提交
86 87
    pCtx->outputBytes = pExpr->resBytes;
    pCtx->outputType = pExpr->resType;
H
hzcheng 已提交
88 89 90

    pCtx->startOffset = 0;
    pCtx->size = 1;
S
slguan 已提交
91
    pCtx->hasNull = true;
H
hzcheng 已提交
92 93
    pCtx->currentStage = SECONDARY_STAGE_MERGE;

S
slguan 已提交
94
    // for top/bottom function, the output of timestamp is the first column
H
hjxilinx 已提交
95
    int32_t functionId = pExpr->functionId;
S
slguan 已提交
96 97
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      pCtx->ptsOutputBuf = pReducer->pCtx[0].aOutputBuf;
98
      pCtx->param[2].i64Key = pQueryInfo->order.order;
H
hjLiao 已提交
99
      pCtx->param[2].nType  = TSDB_DATA_TYPE_BIGINT;
100
      pCtx->param[1].i64Key = pQueryInfo->order.orderColId;
H
hzcheng 已提交
101
    }
S
slguan 已提交
102 103

    SResultInfo *pResInfo = &pReducer->pResInfo[i];
104
    pResInfo->bufLen = pExpr->interBytes;
H
hjLiao 已提交
105
    pResInfo->interResultBuf = calloc(1, (size_t) pResInfo->bufLen);
S
slguan 已提交
106 107 108 109 110

    pCtx->resultInfo = &pReducer->pResInfo[i];
    pCtx->resultInfo->superTableQ = true;
  }

H
hjxilinx 已提交
111 112
  int16_t          n = 0;
  int16_t          tagLen = 0;
H
hjxilinx 已提交
113
  SQLFunctionCtx **pTagCtx = calloc(pQueryInfo->fieldsInfo.numOfOutput, POINTER_BYTES);
S
slguan 已提交
114

H
hjxilinx 已提交
115
  SQLFunctionCtx *pCtx = NULL;
H
hjxilinx 已提交
116
  for (int32_t i = 0; i < pQueryInfo->fieldsInfo.numOfOutput; ++i) {
117
    SSqlExpr *pExpr = tscSqlExprGet(pQueryInfo, i);
S
slguan 已提交
118 119 120 121 122 123 124 125 126 127 128 129 130 131
    if (pExpr->functionId == TSDB_FUNC_TAG_DUMMY || pExpr->functionId == TSDB_FUNC_TS_DUMMY) {
      tagLen += pExpr->resBytes;
      pTagCtx[n++] = &pReducer->pCtx[i];
    } else if ((aAggs[pExpr->functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
      pCtx = &pReducer->pCtx[i];
    }
  }

  if (n == 0) {
    free(pTagCtx);
  } else {
    pCtx->tagInfo.pTagCtxList = pTagCtx;
    pCtx->tagInfo.numOfTagCols = n;
    pCtx->tagInfo.tagsLen = tagLen;
H
hzcheng 已提交
132 133 134
  }
}

135 136 137 138 139 140 141 142 143 144 145 146 147
static SFillColInfo* createFillColInfo(SQueryInfo* pQueryInfo) {
  int32_t numOfCols = tscSqlExprNumOfExprs(pQueryInfo);
  int32_t offset = 0;
  
  SFillColInfo* pFillCol = calloc(numOfCols, sizeof(SFillColInfo));
  for(int32_t i = 0; i < numOfCols; ++i) {
    SSqlExpr* pExpr = tscSqlExprGet(pQueryInfo, i);
    
    pFillCol[i].col.bytes  = pExpr->resBytes;
    pFillCol[i].col.type   = pExpr->resType;
    pFillCol[i].flag       = pExpr->colInfo.flag;
    pFillCol[i].col.offset = offset;
    pFillCol[i].functionId = pExpr->functionId;
148
    pFillCol[i].fillVal.i  = pQueryInfo->fillVal[i];
149 150 151 152 153 154
    offset += pExpr->resBytes;
  }
  
  return pFillCol;
}

H
hzcheng 已提交
155
void tscCreateLocalReducer(tExtMemBuffer **pMemBuffer, int32_t numOfBuffer, tOrderDescriptor *pDesc,
H
hjLiao 已提交
156 157 158 159
                           SColumnModel *finalmodel, SSqlObj* pSql) {
  SSqlCmd* pCmd = &pSql->cmd;
  SSqlRes* pRes = &pSql->res;
  
160
  if (pMemBuffer == NULL) {
H
hjLiao 已提交
161 162 163
    tscLocalReducerEnvDestroy(pMemBuffer, pDesc, finalmodel, numOfBuffer);
  
    tscError("%p pMemBuffer is NULL", pMemBuffer);
164
    pRes->code = TSDB_CODE_TSC_APP_ERROR;
165 166 167 168
    return;
  }
 
  if (pDesc->pColumnModel == NULL) {
H
hzcheng 已提交
169 170
    tscLocalReducerEnvDestroy(pMemBuffer, pDesc, finalmodel, numOfBuffer);

H
hjLiao 已提交
171
    tscError("%p no local buffer or intermediate result format model", pSql);
172
    pRes->code = TSDB_CODE_TSC_APP_ERROR;
H
hzcheng 已提交
173 174 175 176 177 178 179
    return;
  }

  int32_t numOfFlush = 0;
  for (int32_t i = 0; i < numOfBuffer; ++i) {
    int32_t len = pMemBuffer[i]->fileMeta.flushoutData.nLength;
    if (len == 0) {
H
hjLiao 已提交
180
      tscTrace("%p no data retrieved from orderOfVnode:%d", pSql, i + 1);
H
hzcheng 已提交
181 182 183 184 185 186 187 188
      continue;
    }

    numOfFlush += len;
  }

  if (numOfFlush == 0 || numOfBuffer == 0) {
    tscLocalReducerEnvDestroy(pMemBuffer, pDesc, finalmodel, numOfBuffer);
H
hjLiao 已提交
189
    tscTrace("%p retrieved no data", pSql);
S
slguan 已提交
190

H
hzcheng 已提交
191 192 193
    return;
  }

H
hjxilinx 已提交
194
  if (pDesc->pColumnModel->capacity >= pMemBuffer[0]->pageSize) {
H
hjLiao 已提交
195
    tscError("%p Invalid value of buffer capacity %d and page size %d ", pSql, pDesc->pColumnModel->capacity,
H
hjxilinx 已提交
196
             pMemBuffer[0]->pageSize);
S
slguan 已提交
197 198

    tscLocalReducerEnvDestroy(pMemBuffer, pDesc, finalmodel, numOfBuffer);
199
    pRes->code = TSDB_CODE_TSC_APP_ERROR;
H
hzcheng 已提交
200 201 202
    return;
  }

H
hjLiao 已提交
203 204 205
  size_t size = sizeof(SLocalReducer) + POINTER_BYTES * numOfFlush;
  
  SLocalReducer *pReducer = (SLocalReducer *) calloc(1, size);
H
hzcheng 已提交
206
  if (pReducer == NULL) {
H
hjLiao 已提交
207
    tscError("%p failed to create local merge structure, out of memory", pSql);
S
slguan 已提交
208 209

    tscLocalReducerEnvDestroy(pMemBuffer, pDesc, finalmodel, numOfBuffer);
210
    pRes->code = TSDB_CODE_TSC_OUT_OF_MEMORY;
H
hzcheng 已提交
211 212 213 214
    return;
  }

  pReducer->pExtMemBuffer = pMemBuffer;
S
slguan 已提交
215
  pReducer->pLocalDataSrc = (SLocalDataSource **)&pReducer[1];
H
hzcheng 已提交
216 217 218 219
  assert(pReducer->pLocalDataSrc != NULL);

  pReducer->numOfBuffer = numOfFlush;
  pReducer->numOfVnode = numOfBuffer;
220

H
hzcheng 已提交
221
  pReducer->pDesc = pDesc;
H
hjLiao 已提交
222
  tscTrace("%p the number of merged leaves is: %d", pSql, pReducer->numOfBuffer);
H
hzcheng 已提交
223 224 225 226 227 228

  int32_t idx = 0;
  for (int32_t i = 0; i < numOfBuffer; ++i) {
    int32_t numOfFlushoutInFile = pMemBuffer[i]->fileMeta.flushoutData.nLength;

    for (int32_t j = 0; j < numOfFlushoutInFile; ++j) {
H
hjLiao 已提交
229 230 231
      SLocalDataSource *ds = (SLocalDataSource *)malloc(sizeof(SLocalDataSource) + pMemBuffer[0]->pageSize);
      if (ds == NULL) {
        tscError("%p failed to create merge structure", pSql);
232
        pRes->code = TSDB_CODE_TSC_OUT_OF_MEMORY;
B
Bomin Zhang 已提交
233
        tfree(pReducer);
H
hzcheng 已提交
234 235
        return;
      }
H
hjLiao 已提交
236 237
      
      pReducer->pLocalDataSrc[idx] = ds;
H
hzcheng 已提交
238

H
hjLiao 已提交
239 240
      ds->pMemBuffer = pMemBuffer[i];
      ds->flushoutIdx = j;
241
      ds->filePage.num = 0;
H
hjLiao 已提交
242 243
      ds->pageId = 0;
      ds->rowIdx = 0;
H
hzcheng 已提交
244

H
hjLiao 已提交
245 246
      tscTrace("%p load data from disk into memory, orderOfVnode:%d, total:%d", pSql, i + 1, idx + 1);
      tExtMemBufferLoadData(pMemBuffer[i], &(ds->filePage), j, 0);
H
hzcheng 已提交
247
#ifdef _DEBUG_VIEW
248
      printf("load data page into mem for build loser tree: %" PRIu64 " rows\n", ds->filePage.num);
H
hzcheng 已提交
249
      SSrcColumnInfo colInfo[256] = {0};
H
hjxilinx 已提交
250
      SQueryInfo *   pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);
251 252

      tscGetSrcColumnInfo(colInfo, pQueryInfo);
H
hzcheng 已提交
253

254
      tColModelDisplayEx(pDesc->pColumnModel, ds->filePage.data, ds->filePage.num,
H
hjxilinx 已提交
255
                         pMemBuffer[0]->numOfElemsPerPage, colInfo);
H
hzcheng 已提交
256
#endif
H
hjLiao 已提交
257
      
258
      if (ds->filePage.num == 0) {  // no data in this flush, the index does not increase
H
hjLiao 已提交
259 260
        tscTrace("%p flush data is empty, ignore %d flush record", pSql, idx);
        tfree(ds);
H
hzcheng 已提交
261 262
        continue;
      }
H
hjLiao 已提交
263
      
H
hzcheng 已提交
264 265 266
      idx += 1;
    }
  }
H
hjLiao 已提交
267 268
  
  // no data actually, no need to merge result.
H
hzcheng 已提交
269
  if (idx == 0) {
B
Bomin Zhang 已提交
270
    tfree(pReducer);
H
hzcheng 已提交
271 272 273 274 275 276 277 278
    return;
  }

  pReducer->numOfBuffer = idx;

  SCompareParam *param = malloc(sizeof(SCompareParam));
  param->pLocalData = pReducer->pLocalDataSrc;
  param->pDesc = pReducer->pDesc;
279
  param->num = pReducer->pLocalDataSrc[0]->pMemBuffer->numOfElemsPerPage;
H
hjxilinx 已提交
280 281
  SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);

282
  param->groupOrderType = pQueryInfo->groupbyExpr.orderType;
H
Haojun Liao 已提交
283
  pReducer->orderPrjOnSTable = tscOrderedProjectionQueryOnSTable(pQueryInfo, 0);
H
hzcheng 已提交
284 285 286

  pRes->code = tLoserTreeCreate(&pReducer->pLoserTree, pReducer->numOfBuffer, param, treeComparator);
  if (pReducer->pLoserTree == NULL || pRes->code != 0) {
B
Bomin Zhang 已提交
287
    tfree(pReducer);
H
hzcheng 已提交
288 289 290 291 292
    return;
  }

  // the input data format follows the old format, but output in a new format.
  // so, all the input must be parsed as old format
H
hjLiao 已提交
293
  pReducer->pCtx = (SQLFunctionCtx *)calloc(tscSqlExprNumOfExprs(pQueryInfo), sizeof(SQLFunctionCtx));
H
hzcheng 已提交
294 295
  pReducer->rowSize = pMemBuffer[0]->nElemSize;

H
hjxilinx 已提交
296 297
  tscRestoreSQLFuncForSTableQuery(pQueryInfo);
  tscFieldInfoUpdateOffset(pQueryInfo);
H
hzcheng 已提交
298

H
hjxilinx 已提交
299
  if (pReducer->rowSize > pMemBuffer[0]->pageSize) {
H
hzcheng 已提交
300 301 302 303 304 305 306 307
    assert(false);  // todo fixed row size is larger than the minimum page size;
  }

  pReducer->hasPrevRow = false;
  pReducer->hasUnprocessedRow = false;

  pReducer->prevRowOfInput = (char *)calloc(1, pReducer->rowSize);

S
slguan 已提交
308
  // used to keep the latest input row
H
hzcheng 已提交
309 310 311 312
  pReducer->pTempBuffer = (tFilePage *)calloc(1, pReducer->rowSize + sizeof(tFilePage));
  pReducer->discardData = (tFilePage *)calloc(1, pReducer->rowSize + sizeof(tFilePage));
  pReducer->discard = false;

H
hjxilinx 已提交
313
  pReducer->nResultBufSize = pMemBuffer[0]->pageSize * 16;
H
hzcheng 已提交
314
  pReducer->pResultBuf = (tFilePage *)calloc(1, pReducer->nResultBufSize + sizeof(tFilePage));
H
hjxilinx 已提交
315

H
Haojun Liao 已提交
316
  pReducer->finalRowSize = tscGetResRowLength(pQueryInfo->exprList);
H
hzcheng 已提交
317
  pReducer->resColModel = finalmodel;
H
Haojun Liao 已提交
318 319
  pReducer->resColModel->capacity = pReducer->nResultBufSize / pReducer->finalRowSize;
  assert(pReducer->finalRowSize <= pReducer->rowSize);
H
hzcheng 已提交
320

H
hjxilinx 已提交
321
  pReducer->pFinalRes = calloc(1, pReducer->rowSize * pReducer->resColModel->capacity);
322
//  pReducer->pBufForInterpo = calloc(1, pReducer->nResultBufSize);
H
hzcheng 已提交
323

H
hjxilinx 已提交
324
  if (pReducer->pTempBuffer == NULL || pReducer->discardData == NULL || pReducer->pResultBuf == NULL ||
325
      /*pReducer->pBufForInterpo == NULL || */pReducer->pFinalRes == NULL || pReducer->prevRowOfInput == NULL) {
S
slguan 已提交
326 327 328
    tfree(pReducer->pTempBuffer);
    tfree(pReducer->discardData);
    tfree(pReducer->pResultBuf);
H
hjxilinx 已提交
329
    tfree(pReducer->pFinalRes);
S
slguan 已提交
330
    tfree(pReducer->prevRowOfInput);
B
Bomin Zhang 已提交
331
    tfree(pReducer);
332
    pRes->code = TSDB_CODE_TSC_OUT_OF_MEMORY;
H
hzcheng 已提交
333 334
    return;
  }
H
hjLiao 已提交
335
  
336 337 338 339
  size_t numOfCols = tscSqlExprNumOfExprs(pQueryInfo);
  
  pReducer->pTempBuffer->num = 0;
  pReducer->pResInfo = calloc(numOfCols, sizeof(SResultInfo));
H
hzcheng 已提交
340

341
  tscCreateResPointerInfo(pRes, pQueryInfo);
H
hjLiao 已提交
342
  tscInitSqlContext(pCmd, pReducer, pDesc);
H
hzcheng 已提交
343

H
hjxilinx 已提交
344 345
  // we change the capacity of schema to denote that there is only one row in temp buffer
  pReducer->pDesc->pColumnModel->capacity = 1;
H
hjxilinx 已提交
346 347

  // restore the limitation value at the last stage
348 349 350 351
  if (tscOrderedProjectionQueryOnSTable(pQueryInfo, 0)) {
    pQueryInfo->limit.limit = pQueryInfo->clauseLimit;
    pQueryInfo->limit.offset = pQueryInfo->prjOffset;
  }
H
hjxilinx 已提交
352

353
  pReducer->offset = pQueryInfo->limit.offset;
H
hjxilinx 已提交
354

H
hzcheng 已提交
355 356 357
  pRes->pLocalReducer = pReducer;
  pRes->numOfGroups = 0;

358
  STableMetaInfo *pTableMetaInfo = tscGetTableMetaInfoFromCmd(pCmd, pCmd->clauseIndex, 0);
359
  STableComInfo tinfo = tscGetTableInfo(pTableMetaInfo->pTableMeta);
H
hjxilinx 已提交
360
  
361
  TSKEY stime = MIN(pQueryInfo->window.skey, pQueryInfo->window.ekey);
H
hjxilinx 已提交
362
  int64_t revisedSTime =
363 364 365 366 367
      taosGetIntervalStartTimestamp(stime, pQueryInfo->intervalTime, pQueryInfo->slidingTimeUnit, tinfo.precision);
  
  if (pQueryInfo->fillType != TSDB_FILL_NONE) {
    SFillColInfo* pFillCol = createFillColInfo(pQueryInfo);
    pReducer->pFillInfo = taosInitFillInfo(pQueryInfo->order.order, revisedSTime, pQueryInfo->groupbyExpr.numOfGroupCols,
H
Haojun Liao 已提交
368 369
                                           4096, numOfCols, pQueryInfo->slidingTime, pQueryInfo->slidingTimeUnit,
                                           tinfo.precision, pQueryInfo->fillType, pFillCol);
370
  }
H
hzcheng 已提交
371

H
hjxilinx 已提交
372
  int32_t startIndex = pQueryInfo->fieldsInfo.numOfOutput - pQueryInfo->groupbyExpr.numOfGroupCols;
H
hzcheng 已提交
373

H
Haojun Liao 已提交
374
  if (pQueryInfo->groupbyExpr.numOfGroupCols > 0 && pReducer->pFillInfo != NULL) {
375
    pReducer->pFillInfo->pTags[0] = (char *)pReducer->pFillInfo->pTags + POINTER_BYTES * pQueryInfo->groupbyExpr.numOfGroupCols;
376
    for (int32_t i = 1; i < pQueryInfo->groupbyExpr.numOfGroupCols; ++i) {
H
hjxilinx 已提交
377
      SSchema *pSchema = getColumnModelSchema(pReducer->resColModel, startIndex + i - 1);
378
      pReducer->pFillInfo->pTags[i] = pSchema->bytes + pReducer->pFillInfo->pTags[i - 1];
H
hzcheng 已提交
379 380
    }
  } else {
381 382 383
    if (pReducer->pFillInfo != NULL) {
      assert(pReducer->pFillInfo->pTags == NULL);
    }
H
hzcheng 已提交
384 385 386 387 388
  }
}

static int32_t tscFlushTmpBufferImpl(tExtMemBuffer *pMemoryBuf, tOrderDescriptor *pDesc, tFilePage *pPage,
                                     int32_t orderType) {
389
  if (pPage->num == 0) {
H
hzcheng 已提交
390 391 392
    return 0;
  }

393
  assert(pPage->num <= pDesc->pColumnModel->capacity);
H
hzcheng 已提交
394 395

  // sort before flush to disk, the data must be consecutively put on tFilePage.
H
Haojun Liao 已提交
396
  if (pDesc->orderInfo.numOfCols > 0) {
397
    tColDataQSort(pDesc, pPage->num, 0, pPage->num - 1, pPage->data, orderType);
H
hzcheng 已提交
398 399 400
  }

#ifdef _DEBUG_VIEW
401 402
  printf("%" PRIu64 " rows data flushed to disk after been sorted:\n", pPage->num);
  tColModelDisplay(pDesc->pColumnModel, pPage->data, pPage->num, pPage->num);
H
hzcheng 已提交
403 404 405
#endif

  // write to cache after being sorted
406
  if (tExtMemBufferPut(pMemoryBuf, pPage->data, pPage->num) < 0) {
H
hzcheng 已提交
407 408 409 410
    tscError("failed to save data in temporary buffer");
    return -1;
  }

411
  pPage->num = 0;
H
hzcheng 已提交
412 413 414 415
  return 0;
}

int32_t tscFlushTmpBuffer(tExtMemBuffer *pMemoryBuf, tOrderDescriptor *pDesc, tFilePage *pPage, int32_t orderType) {
416 417 418
  int32_t ret = 0;
  if ((ret = tscFlushTmpBufferImpl(pMemoryBuf, pDesc, pPage, orderType)) != 0) {
    return ret;
H
hzcheng 已提交
419 420
  }

421 422
  if ((ret = tExtMemBufferFlush(pMemoryBuf)) != 0) {
    return ret;
H
hzcheng 已提交
423 424 425 426 427 428 429
  }

  return 0;
}

int32_t saveToBuffer(tExtMemBuffer *pMemoryBuf, tOrderDescriptor *pDesc, tFilePage *pPage, void *data,
                     int32_t numOfRows, int32_t orderType) {
430
  SColumnModel *pModel = pDesc->pColumnModel;
H
hjxilinx 已提交
431

432
  if (pPage->num + numOfRows <= pModel->capacity) {
433
    tColModelAppend(pModel, pPage, data, 0, numOfRows, numOfRows);
H
hzcheng 已提交
434 435 436
    return 0;
  }

437
  // current buffer is overflow, flush data to extensive buffer
438
  int32_t numOfRemainEntries = pModel->capacity - pPage->num;
H
hzcheng 已提交
439 440
  tColModelAppend(pModel, pPage, data, 0, numOfRemainEntries, numOfRows);

441
  // current buffer is full, need to flushed to disk
442
  assert(pPage->num == pModel->capacity);
H
hzcheng 已提交
443 444 445 446 447 448 449 450 451
  int32_t ret = tscFlushTmpBuffer(pMemoryBuf, pDesc, pPage, orderType);
  if (ret != 0) {
    return -1;
  }

  int32_t remain = numOfRows - numOfRemainEntries;

  while (remain > 0) {
    int32_t numOfWriteElems = 0;
H
hjxilinx 已提交
452 453
    if (remain > pModel->capacity) {
      numOfWriteElems = pModel->capacity;
H
hzcheng 已提交
454 455 456 457 458 459
    } else {
      numOfWriteElems = remain;
    }

    tColModelAppend(pModel, pPage, data, numOfRows - remain, numOfWriteElems, numOfRows);

460
    if (pPage->num == pModel->capacity) {
H
hjLiao 已提交
461
      if (tscFlushTmpBuffer(pMemoryBuf, pDesc, pPage, orderType) != TSDB_CODE_SUCCESS) {
H
hzcheng 已提交
462 463 464
        return -1;
      }
    } else {
465
      pPage->num = numOfWriteElems;
H
hzcheng 已提交
466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486
    }

    remain -= numOfWriteElems;
    numOfRemainEntries += numOfWriteElems;
  }

  return 0;
}

void tscDestroyLocalReducer(SSqlObj *pSql) {
  if (pSql == NULL) {
    return;
  }

  tscTrace("%p start to free local reducer", pSql);
  SSqlRes *pRes = &(pSql->res);
  if (pRes->pLocalReducer == NULL) {
    tscTrace("%p local reducer has been freed, abort", pSql);
    return;
  }

H
hjxilinx 已提交
487 488 489
  SSqlCmd *   pCmd = &pSql->cmd;
  SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);

H
hzcheng 已提交
490
  // there is no more result, so we release all allocated resource
H
hjxilinx 已提交
491
  SLocalReducer *pLocalReducer = (SLocalReducer *)atomic_exchange_ptr(&pRes->pLocalReducer, NULL);
H
hzcheng 已提交
492 493
  if (pLocalReducer != NULL) {
    int32_t status = 0;
weixin_48148422's avatar
weixin_48148422 已提交
494
    while ((status = atomic_val_compare_exchange_32(&pLocalReducer->status, TSC_LOCALREDUCE_READY,
H
hzcheng 已提交
495 496 497 498 499
                                                    TSC_LOCALREDUCE_TOBE_FREED)) == TSC_LOCALREDUCE_IN_PROGRESS) {
      taosMsleep(100);
      tscTrace("%p waiting for delete procedure, status: %d", pSql, status);
    }

H
Haojun Liao 已提交
500
    pLocalReducer->pFillInfo = taosDestoryFillInfo(pLocalReducer->pFillInfo);
H
hzcheng 已提交
501

S
slguan 已提交
502
    if (pLocalReducer->pCtx != NULL) {
H
hjxilinx 已提交
503
      for (int32_t i = 0; i < pQueryInfo->fieldsInfo.numOfOutput; ++i) {
S
slguan 已提交
504
        SQLFunctionCtx *pCtx = &pLocalReducer->pCtx[i];
H
hjxilinx 已提交
505

H
hjxilinx 已提交
506
        tVariantDestroy(&pCtx->tag);
507 508 509
        if (pCtx->tagInfo.pTagCtxList != NULL) {
          tfree(pCtx->tagInfo.pTagCtxList);
        }
S
slguan 已提交
510
      }
H
hjxilinx 已提交
511

S
slguan 已提交
512 513 514
      tfree(pLocalReducer->pCtx);
    }

H
hzcheng 已提交
515 516 517 518 519
    tfree(pLocalReducer->prevRowOfInput);

    tfree(pLocalReducer->pTempBuffer);
    tfree(pLocalReducer->pResultBuf);

S
slguan 已提交
520
    if (pLocalReducer->pResInfo != NULL) {
H
hjxilinx 已提交
521
      for (int32_t i = 0; i < pQueryInfo->fieldsInfo.numOfOutput; ++i) {
S
slguan 已提交
522 523 524 525 526 527
        tfree(pLocalReducer->pResInfo[i].interResultBuf);
      }

      tfree(pLocalReducer->pResInfo);
    }

H
hzcheng 已提交
528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551
    if (pLocalReducer->pLoserTree) {
      tfree(pLocalReducer->pLoserTree->param);
      tfree(pLocalReducer->pLoserTree);
    }

    tfree(pLocalReducer->pFinalRes);
    tfree(pLocalReducer->discardData);

    tscLocalReducerEnvDestroy(pLocalReducer->pExtMemBuffer, pLocalReducer->pDesc, pLocalReducer->resColModel,
                              pLocalReducer->numOfVnode);
    for (int32_t i = 0; i < pLocalReducer->numOfBuffer; ++i) {
      tfree(pLocalReducer->pLocalDataSrc[i]);
    }

    pLocalReducer->numOfBuffer = 0;
    pLocalReducer->numOfCompleted = 0;
    free(pLocalReducer);
  } else {
    tscTrace("%p already freed or another free function is invoked", pSql);
  }

  tscTrace("%p free local reducer finished", pSql);
}

H
hjxilinx 已提交
552
static int32_t createOrderDescriptor(tOrderDescriptor **pOrderDesc, SSqlCmd *pCmd, SColumnModel *pModel) {
H
hjxilinx 已提交
553 554 555
  int32_t     numOfGroupByCols = 0;
  SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);

556 557
  if (pQueryInfo->groupbyExpr.numOfGroupCols > 0) {
    numOfGroupByCols = pQueryInfo->groupbyExpr.numOfGroupCols;
H
hzcheng 已提交
558 559 560
  }

  // primary timestamp column is involved in final result
561
  if (pQueryInfo->intervalTime != 0 || tscOrderedProjectionQueryOnSTable(pQueryInfo, 0)) {
H
hzcheng 已提交
562 563 564 565 566
    numOfGroupByCols++;
  }

  int32_t *orderIdx = (int32_t *)calloc(numOfGroupByCols, sizeof(int32_t));
  if (orderIdx == NULL) {
567
    return TSDB_CODE_TSC_OUT_OF_MEMORY;
H
hzcheng 已提交
568 569 570
  }

  if (numOfGroupByCols > 0) {
H
hjxilinx 已提交
571
    int32_t startCols = pQueryInfo->fieldsInfo.numOfOutput - pQueryInfo->groupbyExpr.numOfGroupCols;
H
hzcheng 已提交
572 573

    // tags value locate at the last columns
574
    for (int32_t i = 0; i < pQueryInfo->groupbyExpr.numOfGroupCols; ++i) {
H
hzcheng 已提交
575 576 577
      orderIdx[i] = startCols++;
    }

578
    if (pQueryInfo->intervalTime != 0) {
S
slguan 已提交
579
      // the first column is the timestamp, handles queries like "interval(10m) group by tags"
H
hzcheng 已提交
580 581 582 583
      orderIdx[numOfGroupByCols - 1] = PRIMARYKEY_TIMESTAMP_COL_INDEX;
    }
  }

584
  *pOrderDesc = tOrderDesCreate(orderIdx, numOfGroupByCols, pModel, pQueryInfo->order.order);
H
hzcheng 已提交
585 586 587
  tfree(orderIdx);

  if (*pOrderDesc == NULL) {
588
    return TSDB_CODE_TSC_OUT_OF_MEMORY;
H
hzcheng 已提交
589 590 591 592 593
  } else {
    return TSDB_CODE_SUCCESS;
  }
}

S
slguan 已提交
594
bool isSameGroup(SSqlCmd *pCmd, SLocalReducer *pReducer, char *pPrev, tFilePage *tmpBuffer) {
H
hjxilinx 已提交
595 596
  SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);

S
slguan 已提交
597
  // disable merge procedure for column projection query
H
Haojun Liao 已提交
598
  int16_t functionId = pReducer->pCtx[0].functionId;
599
  assert(functionId != TSDB_FUNC_ARITHM);
H
Haojun Liao 已提交
600
  if (pReducer->orderPrjOnSTable) {
601 602
    return true;
  }
H
hjxilinx 已提交
603

S
slguan 已提交
604 605
  if (functionId == TSDB_FUNC_PRJ || functionId == TSDB_FUNC_ARITHM) {
    return false;
H
hzcheng 已提交
606 607 608
  }

  tOrderDescriptor *pOrderDesc = pReducer->pDesc;
H
Haojun Liao 已提交
609
  SColumnOrderInfo* orderInfo = &pOrderDesc->orderInfo;
610

S
slguan 已提交
611
  // no group by columns, all data belongs to one group
H
Haojun Liao 已提交
612
  int32_t numOfCols = orderInfo->numOfCols;
S
slguan 已提交
613
  if (numOfCols <= 0) {
H
hzcheng 已提交
614 615 616
    return true;
  }

H
Haojun Liao 已提交
617 618 619 620 621
  if (orderInfo->pData[numOfCols - 1] == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
    /*
     * super table interval query
     * if the order columns is the primary timestamp, all result data belongs to one group
     */
622
    assert(pQueryInfo->intervalTime > 0);
H
Haojun Liao 已提交
623 624 625
    if (numOfCols == 1) {
      return true;
    }
S
slguan 已提交
626
  } else {  // simple group by query
627
    assert(pQueryInfo->intervalTime == 0);
S
slguan 已提交
628 629
  }

H
hzcheng 已提交
630
  // only one row exists
H
Haojun Liao 已提交
631 632
  int32_t index = orderInfo->pData[0];
  int32_t offset = (pOrderDesc->pColumnModel)->pFields[index].offset;
633

H
Haojun Liao 已提交
634 635
  int32_t ret = memcmp(pPrev + offset, tmpBuffer->data + offset, pOrderDesc->pColumnModel->rowSize - offset);
  return ret == 0;
H
hzcheng 已提交
636 637 638
}

int32_t tscLocalReducerEnvCreate(SSqlObj *pSql, tExtMemBuffer ***pMemBuffer, tOrderDescriptor **pOrderDesc,
H
hjxilinx 已提交
639
                                 SColumnModel **pFinalModel, uint32_t nBufferSizes) {
H
hzcheng 已提交
640 641 642
  SSqlCmd *pCmd = &pSql->cmd;
  SSqlRes *pRes = &pSql->res;

H
hjxilinx 已提交
643
  SSchema *     pSchema = NULL;
H
hjxilinx 已提交
644
  SColumnModel *pModel = NULL;
H
hzcheng 已提交
645 646
  *pFinalModel = NULL;

H
hjxilinx 已提交
647
  SQueryInfo *    pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);
H
hjxilinx 已提交
648
  STableMetaInfo *pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0);
S
slguan 已提交
649

650
  (*pMemBuffer) = (tExtMemBuffer **)malloc(POINTER_BYTES * pSql->numOfSubs);
H
hzcheng 已提交
651 652
  if (*pMemBuffer == NULL) {
    tscError("%p failed to allocate memory", pSql);
653
    pRes->code = TSDB_CODE_TSC_OUT_OF_MEMORY;
H
hzcheng 已提交
654 655
    return pRes->code;
  }
H
hjxilinx 已提交
656 657 658 659
  
  size_t size = tscSqlExprNumOfExprs(pQueryInfo);
  
  pSchema = (SSchema *)calloc(1, sizeof(SSchema) * size);
H
hzcheng 已提交
660 661
  if (pSchema == NULL) {
    tscError("%p failed to allocate memory", pSql);
662
    pRes->code = TSDB_CODE_TSC_OUT_OF_MEMORY;
H
hzcheng 已提交
663 664 665 666
    return pRes->code;
  }

  int32_t rlen = 0;
H
hjxilinx 已提交
667
  for (int32_t i = 0; i < size; ++i) {
668
    SSqlExpr *pExpr = tscSqlExprGet(pQueryInfo, i);
H
hzcheng 已提交
669 670 671 672 673 674 675

    pSchema[i].bytes = pExpr->resBytes;
    pSchema[i].type = pExpr->resType;

    rlen += pExpr->resBytes;
  }

L
lihui 已提交
676
  int32_t capacity = 0;
H
hjxilinx 已提交
677 678 679
  if (rlen != 0) {
    capacity = nBufferSizes / rlen;
  }
H
hjxilinx 已提交
680 681
  
  pModel = createColumnModel(pSchema, size, capacity);
H
hzcheng 已提交
682

H
hjxilinx 已提交
683
  size_t numOfSubs = pTableMetaInfo->vgroupList->numOfVgroups;
684 685 686 687
  for (int32_t i = 0; i < numOfSubs; ++i) {
    (*pMemBuffer)[i] = createExtMemBuffer(nBufferSizes, rlen, pModel);
    (*pMemBuffer)[i]->flushModel = MULTIPLE_APPEND_MODEL;
  }
H
hzcheng 已提交
688 689

  if (createOrderDescriptor(pOrderDesc, pCmd, pModel) != TSDB_CODE_SUCCESS) {
690
    pRes->code = TSDB_CODE_TSC_OUT_OF_MEMORY;
B
Bomin Zhang 已提交
691
    tfree(pSchema);
H
hzcheng 已提交
692 693 694
    return pRes->code;
  }

H
hjxilinx 已提交
695
  // final result depends on the fields number
H
hjxilinx 已提交
696 697
  memset(pSchema, 0, sizeof(SSchema) * size);
  for (int32_t i = 0; i < size; ++i) {
H
hjxilinx 已提交
698 699
    SSqlExpr *pExpr = tscSqlExprGet(pQueryInfo, i);

700
    SSchema *p1 = tscGetTableColumnSchema(pTableMetaInfo->pTableMeta, pExpr->colInfo.colIndex);
H
hjxilinx 已提交
701

702
    int32_t inter = 0;
H
hjxilinx 已提交
703 704
    int16_t type = -1;
    int16_t bytes = 0;
H
hjxilinx 已提交
705 706 707 708 709 710 711 712 713

    //    if ((pExpr->functionId >= TSDB_FUNC_FIRST_DST && pExpr->functionId <= TSDB_FUNC_LAST_DST) ||
    //        (pExpr->functionId >= TSDB_FUNC_SUM && pExpr->functionId <= TSDB_FUNC_MAX) ||
    //        pExpr->functionId == TSDB_FUNC_LAST_ROW) {
    // the final result size and type in the same as query on single table.
    // so here, set the flag to be false;

    int32_t functionId = pExpr->functionId;
    if (functionId >= TSDB_FUNC_TS && functionId <= TSDB_FUNC_DIFF) {
H
hjxilinx 已提交
714 715
      type = pModel->pFields[i].field.type;
      bytes = pModel->pFields[i].field.bytes;
H
hjxilinx 已提交
716 717 718 719 720 721 722 723
    } else {
      if (functionId == TSDB_FUNC_FIRST_DST) {
        functionId = TSDB_FUNC_FIRST;
      } else if (functionId == TSDB_FUNC_LAST_DST) {
        functionId = TSDB_FUNC_LAST;
      }

      getResultDataInfo(p1->type, p1->bytes, functionId, 0, &type, &bytes, &inter, 0, false);
H
hjxilinx 已提交
724
    }
H
hzcheng 已提交
725

H
hjxilinx 已提交
726 727 728
    pSchema[i].type = type;
    pSchema[i].bytes = bytes;
    strcpy(pSchema[i].name, pModel->pFields[i].field.name);
H
hzcheng 已提交
729
  }
H
hjxilinx 已提交
730 731
  
  *pFinalModel = createColumnModel(pSchema, size, capacity);
H
hzcheng 已提交
732 733 734 735 736 737 738 739 740 741 742
  tfree(pSchema);

  return TSDB_CODE_SUCCESS;
}

/**
 * @param pMemBuffer
 * @param pDesc
 * @param pFinalModel
 * @param numOfVnodes
 */
H
hjxilinx 已提交
743
void tscLocalReducerEnvDestroy(tExtMemBuffer **pMemBuffer, tOrderDescriptor *pDesc, SColumnModel *pFinalModel,
H
hzcheng 已提交
744
                               int32_t numOfVnodes) {
H
hjxilinx 已提交
745
  destroyColumnModel(pFinalModel);
H
hzcheng 已提交
746 747
  tOrderDescDestroy(pDesc);
  for (int32_t i = 0; i < numOfVnodes; ++i) {
H
hjxilinx 已提交
748
    pMemBuffer[i] = destoryExtMemBuffer(pMemBuffer[i]);
H
hzcheng 已提交
749 750 751 752 753 754 755 756 757 758 759 760
  }

  tfree(pMemBuffer);
}

/**
 *
 * @param pLocalReducer
 * @param pOneInterDataSrc
 * @param treeList
 * @return the number of remain input source. if ret == 0, all data has been handled
 */
S
slguan 已提交
761
int32_t loadNewDataFromDiskFor(SLocalReducer *pLocalReducer, SLocalDataSource *pOneInterDataSrc,
H
hzcheng 已提交
762 763 764 765 766 767 768 769 770 771 772
                               bool *needAdjustLoserTree) {
  pOneInterDataSrc->rowIdx = 0;
  pOneInterDataSrc->pageId += 1;

  if (pOneInterDataSrc->pageId <
      pOneInterDataSrc->pMemBuffer->fileMeta.flushoutData.pFlushoutInfo[pOneInterDataSrc->flushoutIdx].numOfPages) {
    tExtMemBufferLoadData(pOneInterDataSrc->pMemBuffer, &(pOneInterDataSrc->filePage), pOneInterDataSrc->flushoutIdx,
                          pOneInterDataSrc->pageId);

#if defined(_DEBUG_VIEW)
    printf("new page load to buffer\n");
H
hjxilinx 已提交
773
    tColModelDisplay(pOneInterDataSrc->pMemBuffer->pColumnModel, pOneInterDataSrc->filePage.data,
774
                     pOneInterDataSrc->filePage.num, pOneInterDataSrc->pMemBuffer->pColumnModel->capacity);
H
hzcheng 已提交
775 776 777 778 779 780 781 782 783 784 785 786 787
#endif
    *needAdjustLoserTree = true;
  } else {
    pLocalReducer->numOfCompleted += 1;

    pOneInterDataSrc->rowIdx = -1;
    pOneInterDataSrc->pageId = -1;
    *needAdjustLoserTree = true;
  }

  return pLocalReducer->numOfBuffer;
}

S
slguan 已提交
788 789
void adjustLoserTreeFromNewData(SLocalReducer *pLocalReducer, SLocalDataSource *pOneInterDataSrc,
                                SLoserTreeInfo *pTree) {
H
hzcheng 已提交
790 791 792 793 794
  /*
   * load a new data page into memory for intermediate dataset source,
   * since it's last record in buffer has been chosen to be processed, as the winner of loser-tree
   */
  bool needToAdjust = true;
795
  if (pOneInterDataSrc->filePage.num <= pOneInterDataSrc->rowIdx) {
H
hzcheng 已提交
796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820
    loadNewDataFromDiskFor(pLocalReducer, pOneInterDataSrc, &needToAdjust);
  }

  /*
   * adjust loser tree otherwise, according to new candidate data
   * if the loser tree is rebuild completed, we do not need to adjust
   */
  if (needToAdjust) {
    int32_t leafNodeIdx = pTree->pNode[0].index + pLocalReducer->numOfBuffer;

#ifdef _DEBUG_VIEW
    printf("before adjust:\t");
    tLoserTreeDisplay(pTree);
#endif

    tLoserTreeAdjust(pTree, leafNodeIdx);

#ifdef _DEBUG_VIEW
    printf("\nafter adjust:\t");
    tLoserTreeDisplay(pTree);
    printf("\n");
#endif
  }
}

H
Haojun Liao 已提交
821
void savePrevRecordAndSetupInterpoInfo(SLocalReducer *pLocalReducer, SQueryInfo *pQueryInfo, SFillInfo *pFillInfo) {
H
hjxilinx 已提交
822
  // discard following dataset in the same group and reset the interpolation information
H
hjxilinx 已提交
823
  STableMetaInfo *pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0);
824

H
hjxilinx 已提交
825
  STableComInfo tinfo = tscGetTableInfo(pTableMetaInfo->pTableMeta);
826

H
Haojun Liao 已提交
827 828 829 830 831 832 833
  if (pFillInfo != NULL) {
    int64_t stime = (pQueryInfo->window.skey < pQueryInfo->window.ekey) ? pQueryInfo->window.skey : pQueryInfo->window.ekey;
    int64_t revisedSTime =
        taosGetIntervalStartTimestamp(stime, pQueryInfo->slidingTime, pQueryInfo->slidingTimeUnit, tinfo.precision);
  
    taosResetFillInfo(pFillInfo, revisedSTime);
  }
H
hzcheng 已提交
834 835

  pLocalReducer->discard = true;
836
  pLocalReducer->discardData->num = 0;
H
hzcheng 已提交
837

H
hjxilinx 已提交
838
  SColumnModel *pModel = pLocalReducer->pDesc->pColumnModel;
H
hzcheng 已提交
839 840 841 842
  tColModelAppend(pModel, pLocalReducer->discardData, pLocalReducer->prevRowOfInput, 0, 1, 1);
}

// todo merge with following function
H
hjxilinx 已提交
843
// static void reversedCopyResultToDstBuf(SQueryInfo* pQueryInfo, SSqlRes *pRes, tFilePage *pFinalDataPage) {
H
hjxilinx 已提交
844
//
H
hjxilinx 已提交
845
//  for (int32_t i = 0; i < pQueryInfo->exprList.numOfExprs; ++i) {
H
hjxilinx 已提交
846
//    TAOS_FIELD *pField = tscFieldInfoGetField(&pQueryInfo->fieldsInfo, i);
H
hjxilinx 已提交
847 848 849 850 851 852 853 854 855 856 857 858
//
//    int32_t offset = tscFieldInfoGetOffset(pQueryInfo, i);
//    char *  src = pFinalDataPage->data + (pRes->numOfRows - 1) * pField->bytes + pRes->numOfRows * offset;
//    char *  dst = pRes->data + pRes->numOfRows * offset;
//
//    for (int32_t j = 0; j < pRes->numOfRows; ++j) {
//      memcpy(dst, src, (size_t)pField->bytes);
//      dst += pField->bytes;
//      src -= pField->bytes;
//    }
//  }
//}
H
hzcheng 已提交
859

H
hjxilinx 已提交
860 861
static void reversedCopyFromInterpolationToDstBuf(SQueryInfo *pQueryInfo, SSqlRes *pRes, tFilePage **pResPages,
                                                  SLocalReducer *pLocalReducer) {
H
hjxilinx 已提交
862
  assert(0);
H
hjxilinx 已提交
863 864 865 866
  size_t size = tscSqlExprNumOfExprs(pQueryInfo);
  
  for (int32_t i = 0; i < size; ++i) {
    TAOS_FIELD *pField = tscFieldInfoGetField(&pQueryInfo->fieldsInfo, i);
H
hzcheng 已提交
867

868
    int32_t offset = tscFieldInfoGetOffset(pQueryInfo, i);
H
hjxilinx 已提交
869
    assert(offset == getColumnModelOffset(pLocalReducer->resColModel, i));
H
hzcheng 已提交
870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885

    char *src = pResPages[i]->data + (pRes->numOfRows - 1) * pField->bytes;
    char *dst = pRes->data + pRes->numOfRows * offset;

    for (int32_t j = 0; j < pRes->numOfRows; ++j) {
      memcpy(dst, src, (size_t)pField->bytes);
      dst += pField->bytes;
      src -= pField->bytes;
    }
  }
}

/*
 * Note: pRes->pLocalReducer may be null, due to the fact that "tscDestroyLocalReducer" is called
 * by "interuptHandler" function in shell
 */
H
Haojun Liao 已提交
886
static void doFillResult(SSqlObj *pSql, SLocalReducer *pLocalReducer, bool doneOutput) {
H
hjxilinx 已提交
887 888
  SSqlCmd *   pCmd = &pSql->cmd;
  SSqlRes *   pRes = &pSql->res;
889
  
H
hjxilinx 已提交
890 891 892
  tFilePage * pFinalDataPage = pLocalReducer->pResultBuf;
  SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);

H
Haojun Liao 已提交
893 894 895 896 897 898 899 900
//  if (pRes->pLocalReducer != pLocalReducer) {
//    /*
//     * Release the SSqlObj is called, and it is int destroying function invoked by other thread.
//     * However, the other thread will WAIT until current process fully completes.
//     * Since the flag of release struct is set by doLocalReduce function
//     */
//    assert(pRes->pLocalReducer == NULL);
//  }
H
hzcheng 已提交
901

H
Haojun Liao 已提交
902
  // no interval query, no fill operation
903
  if (pQueryInfo->intervalTime == 0 || pQueryInfo->fillType == TSDB_FILL_NONE) {
H
hzcheng 已提交
904
    pRes->data = pLocalReducer->pFinalRes;
905
    pRes->numOfRows = pFinalDataPage->num;
H
Haojun Liao 已提交
906
    pRes->numOfClauseTotal += pRes->numOfRows;
H
hzcheng 已提交
907

908 909
    if (pQueryInfo->limit.offset > 0) {
      if (pQueryInfo->limit.offset < pRes->numOfRows) {
910
        int32_t prevSize = pFinalDataPage->num;
911
        tColModelErase(pLocalReducer->resColModel, pFinalDataPage, prevSize, 0, pQueryInfo->limit.offset - 1);
H
hzcheng 已提交
912 913

        /* remove the hole in column model */
S
slguan 已提交
914
        tColModelCompact(pLocalReducer->resColModel, pFinalDataPage, prevSize);
H
hzcheng 已提交
915

916
        pRes->numOfRows -= pQueryInfo->limit.offset;
H
Haojun Liao 已提交
917
        pRes->numOfClauseTotal -= pQueryInfo->limit.offset;
918
        pQueryInfo->limit.offset = 0;
H
hzcheng 已提交
919
      } else {
920
        pQueryInfo->limit.offset -= pRes->numOfRows;
H
hzcheng 已提交
921
        pRes->numOfRows = 0;
H
Haojun Liao 已提交
922
        pRes->numOfClauseTotal = 0;
H
hzcheng 已提交
923 924 925
      }
    }

H
Haojun Liao 已提交
926
    if (pQueryInfo->limit.limit >= 0 && pRes->numOfClauseTotal > pQueryInfo->limit.limit) {
H
hzcheng 已提交
927
      /* impose the limitation of output rows on the final result */
928
      int32_t prevSize = pFinalDataPage->num;
H
Haojun Liao 已提交
929 930
      int32_t overflow = pRes->numOfClauseTotal - pQueryInfo->limit.limit;
      assert(overflow < pRes->numOfRows);
H
hzcheng 已提交
931

H
Haojun Liao 已提交
932
      pRes->numOfClauseTotal = pQueryInfo->limit.limit;
H
Haojun Liao 已提交
933 934
      pRes->numOfRows -= overflow;
      pFinalDataPage->num -= overflow;
H
hzcheng 已提交
935

S
slguan 已提交
936
      tColModelCompact(pLocalReducer->resColModel, pFinalDataPage, prevSize);
H
hzcheng 已提交
937 938

      /* set remain data to be discarded, and reset the interpolation information */
939
      savePrevRecordAndSetupInterpoInfo(pLocalReducer, pQueryInfo, pLocalReducer->pFillInfo);
H
hzcheng 已提交
940 941
    }

H
Haojun Liao 已提交
942
    memcpy(pRes->data, pFinalDataPage->data, pRes->numOfRows * pLocalReducer->finalRowSize);
943
    pFinalDataPage->num = 0;
H
hzcheng 已提交
944 945 946
    return;
  }

947 948
  SFillInfo *pFillInfo = pLocalReducer->pFillInfo;
  int64_t actualETime = MAX(pQueryInfo->window.skey, pQueryInfo->window.ekey);
H
hzcheng 已提交
949

H
hjxilinx 已提交
950 951 952
  tFilePage **pResPages = malloc(POINTER_BYTES * pQueryInfo->fieldsInfo.numOfOutput);
  for (int32_t i = 0; i < pQueryInfo->fieldsInfo.numOfOutput; ++i) {
    TAOS_FIELD *pField = tscFieldInfoGetField(&pQueryInfo->fieldsInfo, i);
H
hjxilinx 已提交
953
    pResPages[i] = calloc(1, sizeof(tFilePage) + pField->bytes * pLocalReducer->resColModel->capacity);
H
hzcheng 已提交
954
  }
H
hjxilinx 已提交
955
  
H
hzcheng 已提交
956
  while (1) {
957
    int64_t newRows = taosGenerateDataBlock(pFillInfo, pResPages, pLocalReducer->resColModel->capacity);
H
hzcheng 已提交
958

959 960
    if (pQueryInfo->limit.offset < newRows) {
      newRows -= pQueryInfo->limit.offset;
H
hzcheng 已提交
961

962
      if (pQueryInfo->limit.offset > 0) {
H
hjxilinx 已提交
963 964
        for (int32_t i = 0; i < pQueryInfo->fieldsInfo.numOfOutput; ++i) {
          TAOS_FIELD *pField = tscFieldInfoGetField(&pQueryInfo->fieldsInfo, i);
H
hjxilinx 已提交
965 966
          memmove(pResPages[i]->data, pResPages[i]->data + pField->bytes * pQueryInfo->limit.offset,
                  newRows * pField->bytes);
H
hzcheng 已提交
967 968 969 970 971
        }
      }

      pRes->data = pLocalReducer->pFinalRes;
      pRes->numOfRows = newRows;
H
Haojun Liao 已提交
972
      pRes->numOfClauseTotal += newRows;
H
hzcheng 已提交
973

974
      pQueryInfo->limit.offset = 0;
H
hzcheng 已提交
975 976
      break;
    } else {
977
      pQueryInfo->limit.offset -= newRows;
H
hzcheng 已提交
978 979
      pRes->numOfRows = 0;

980
      int32_t rpoints = taosNumOfRemainRows(pFillInfo);
H
hzcheng 已提交
981
      if (rpoints <= 0) {
982
        if (!doneOutput) { // reduce procedure has not completed yet, but current results for fill are exhausted
H
hzcheng 已提交
983 984 985 986
          break;
        }

        /* all output for current group are completed */
H
Haojun Liao 已提交
987
        int32_t totalRemainRows = getFilledNumOfRes(pFillInfo, actualETime, pLocalReducer->resColModel->capacity);
H
hzcheng 已提交
988 989 990 991 992 993 994 995
        if (totalRemainRows <= 0) {
          break;
        }
      }
    }
  }

  if (pRes->numOfRows > 0) {
H
Haojun Liao 已提交
996
    if (pQueryInfo->limit.limit >= 0 && pRes->numOfClauseTotal > pQueryInfo->limit.limit) {
H
Haojun Liao 已提交
997 998
      int32_t overflow = pRes->numOfClauseTotal - pQueryInfo->limit.limit;
      pRes->numOfRows -= overflow;
H
hzcheng 已提交
999 1000 1001

      assert(pRes->numOfRows >= 0);

H
Haojun Liao 已提交
1002
      pRes->numOfClauseTotal = pQueryInfo->limit.limit;
H
Haojun Liao 已提交
1003
      pFinalDataPage->num -= overflow;
H
hzcheng 已提交
1004 1005

      /* set remain data to be discarded, and reset the interpolation information */
1006
      savePrevRecordAndSetupInterpoInfo(pLocalReducer, pQueryInfo, pFillInfo);
H
hzcheng 已提交
1007 1008
    }

1009
    if (pQueryInfo->order.order == TSDB_ORDER_ASC) {
H
hjxilinx 已提交
1010 1011
      for (int32_t i = 0; i < pQueryInfo->fieldsInfo.numOfOutput; ++i) {
        TAOS_FIELD *pField = tscFieldInfoGetField(&pQueryInfo->fieldsInfo, i);
H
hjxilinx 已提交
1012
        int16_t     offset = getColumnModelOffset(pLocalReducer->resColModel, i);
H
hjxilinx 已提交
1013
        memcpy(pRes->data + offset * pRes->numOfRows, pResPages[i]->data, pField->bytes * pRes->numOfRows);
H
hzcheng 已提交
1014
      }
H
hjxilinx 已提交
1015
    } else {  // todo bug??
1016
      reversedCopyFromInterpolationToDstBuf(pQueryInfo, pRes, pResPages, pLocalReducer);
H
hzcheng 已提交
1017 1018 1019
    }
  }

1020
  pFinalDataPage->num = 0;
H
hjxilinx 已提交
1021
  for (int32_t i = 0; i < pQueryInfo->fieldsInfo.numOfOutput; ++i) {
H
hzcheng 已提交
1022 1023
    tfree(pResPages[i]);
  }
1024
  
H
hzcheng 已提交
1025 1026 1027
  tfree(pResPages);
}

S
slguan 已提交
1028
static void savePreviousRow(SLocalReducer *pLocalReducer, tFilePage *tmpBuffer) {
H
hjxilinx 已提交
1029
  SColumnModel *pColumnModel = pLocalReducer->pDesc->pColumnModel;
1030
  assert(pColumnModel->capacity == 1 && tmpBuffer->num == 1);
H
hzcheng 已提交
1031 1032

  // copy to previous temp buffer
H
hjxilinx 已提交
1033
  for (int32_t i = 0; i < pColumnModel->numOfCols; ++i) {
H
hjxilinx 已提交
1034 1035 1036
    SSchema *pSchema = getColumnModelSchema(pColumnModel, i);
    int16_t  offset = getColumnModelOffset(pColumnModel, i);

H
hjxilinx 已提交
1037
    memcpy(pLocalReducer->prevRowOfInput + offset, tmpBuffer->data + offset, pSchema->bytes);
H
hzcheng 已提交
1038 1039
  }

1040
  tmpBuffer->num = 0;
H
hzcheng 已提交
1041 1042 1043
  pLocalReducer->hasPrevRow = true;
}

H
hjxilinx 已提交
1044
static void doExecuteSecondaryMerge(SSqlCmd *pCmd, SLocalReducer *pLocalReducer, bool needInit) {
S
slguan 已提交
1045
  // the tag columns need to be set before all functions execution
H
hjxilinx 已提交
1046
  SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);
1047

H
hjxilinx 已提交
1048 1049
  size_t size = tscSqlExprNumOfExprs(pQueryInfo);
  for (int32_t j = 0; j < size; ++j) {
S
slguan 已提交
1050
    SQLFunctionCtx *pCtx = &pLocalReducer->pCtx[j];
H
hzcheng 已提交
1051

S
slguan 已提交
1052
    // tags/tags_dummy function, the tag field of SQLFunctionCtx is from the input buffer
H
Haojun Liao 已提交
1053
    int32_t functionId = pCtx->functionId;
H
hjxilinx 已提交
1054
    if (functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TS_DUMMY) {
S
slguan 已提交
1055
      tVariantDestroy(&pCtx->tag);
1056 1057 1058 1059 1060 1061 1062 1063
      char* input = pCtx->aInputElemBuf;
      
      if (pCtx->inputType == TSDB_DATA_TYPE_BINARY || pCtx->inputType == TSDB_DATA_TYPE_NCHAR) {
        assert(varDataLen(input) <= pCtx->inputBytes);
        tVariantCreateFromBinary(&pCtx->tag, varDataVal(input), varDataLen(input), pCtx->inputType);
      } else {
        tVariantCreateFromBinary(&pCtx->tag, input, pCtx->inputBytes, pCtx->inputType);
      }
H
Haojun Liao 已提交
1064 1065 1066
    } else if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      SSqlExpr *pExpr = tscSqlExprGet(pQueryInfo, j);
      pCtx->param[0].i64Key = pExpr->param[0].i64Key;
H
hzcheng 已提交
1067 1068
    }

S
slguan 已提交
1069 1070 1071
    pCtx->currentStage = SECONDARY_STAGE_MERGE;

    if (needInit) {
H
Haojun Liao 已提交
1072
      aAggs[pCtx->functionId].init(pCtx);
S
slguan 已提交
1073 1074 1075
    }
  }

H
hjxilinx 已提交
1076
  for (int32_t j = 0; j < size; ++j) {
H
Haojun Liao 已提交
1077
    int32_t functionId = pLocalReducer->pCtx[j].functionId;
S
slguan 已提交
1078 1079 1080 1081 1082 1083 1084
    if (functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }

    aAggs[functionId].distSecondaryMergeFunc(&pLocalReducer->pCtx[j]);
  }
}
H
hzcheng 已提交
1085

H
hjxilinx 已提交
1086
static void handleUnprocessedRow(SSqlCmd *pCmd, SLocalReducer *pLocalReducer, tFilePage *tmpBuffer) {
S
slguan 已提交
1087 1088 1089
  if (pLocalReducer->hasUnprocessedRow) {
    pLocalReducer->hasUnprocessedRow = false;
    doExecuteSecondaryMerge(pCmd, pLocalReducer, true);
S
slguan 已提交
1090
    savePreviousRow(pLocalReducer, tmpBuffer);
H
hzcheng 已提交
1091 1092 1093
  }
}

1094
static int64_t getNumOfResultLocal(SQueryInfo *pQueryInfo, SQLFunctionCtx *pCtx) {
H
hzcheng 已提交
1095
  int64_t maxOutput = 0;
H
hjxilinx 已提交
1096 1097 1098
  
  size_t size = tscSqlExprNumOfExprs(pQueryInfo);
  for (int32_t j = 0; j < size; ++j) {
H
hzcheng 已提交
1099 1100 1101 1102
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
H
Haojun Liao 已提交
1103
    int32_t functionId = pCtx[j].functionId;
H
hzcheng 已提交
1104 1105 1106
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
      continue;
    }
H
hjxilinx 已提交
1107

H
Haojun Liao 已提交
1108 1109 1110
    SResultInfo* pResInfo = GET_RES_INFO(&pCtx[j]);
    if (maxOutput < pResInfo->numOfRes) {
      maxOutput = pResInfo->numOfRes;
H
hzcheng 已提交
1111 1112
    }
  }
H
hjxilinx 已提交
1113

H
hzcheng 已提交
1114 1115 1116 1117
  return maxOutput;
}

/*
S
slguan 已提交
1118
 * in handling the top/bottom query, which produce more than one rows result,
H
hzcheng 已提交
1119 1120
 * the tsdb_func_tags only fill the first row of results, the remain rows need to
 * filled with the same result, which is the tags, specified in group by clause
S
slguan 已提交
1121
 *
H
hzcheng 已提交
1122
 */
H
hjxilinx 已提交
1123
static void fillMultiRowsOfTagsVal(SQueryInfo *pQueryInfo, int32_t numOfRes, SLocalReducer *pLocalReducer) {
S
slguan 已提交
1124
  int32_t maxBufSize = 0;  // find the max tags column length to prepare the buffer
H
hjxilinx 已提交
1125 1126 1127
  size_t size = tscSqlExprNumOfExprs(pQueryInfo);
  
  for (int32_t k = 0; k < size; ++k) {
1128
    SSqlExpr *pExpr = tscSqlExprGet(pQueryInfo, k);
S
slguan 已提交
1129
    if (maxBufSize < pExpr->resBytes && pExpr->functionId == TSDB_FUNC_TAG) {
H
hzcheng 已提交
1130 1131 1132 1133 1134 1135
      maxBufSize = pExpr->resBytes;
    }
  }

  assert(maxBufSize >= 0);

H
hjxilinx 已提交
1136
  char *buf = malloc((size_t)maxBufSize);
H
hjxilinx 已提交
1137
  for (int32_t k = 0; k < size; ++k) {
H
Haojun Liao 已提交
1138 1139
    SQLFunctionCtx *pCtx = &pLocalReducer->pCtx[k];
    if (pCtx->functionId != TSDB_FUNC_TAG) {
S
slguan 已提交
1140 1141 1142
      continue;
    }

H
hzcheng 已提交
1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155
    int32_t inc = numOfRes - 1;  // tsdb_func_tag function only produce one row of result
    memset(buf, 0, (size_t)maxBufSize);
    memcpy(buf, pCtx->aOutputBuf, (size_t)pCtx->outputBytes);

    for (int32_t i = 0; i < inc; ++i) {
      pCtx->aOutputBuf += pCtx->outputBytes;
      memcpy(pCtx->aOutputBuf, buf, (size_t)pCtx->outputBytes);
    }
  }

  free(buf);
}

H
hjxilinx 已提交
1156
int32_t finalizeRes(SQueryInfo *pQueryInfo, SLocalReducer *pLocalReducer) {
H
hjxilinx 已提交
1157 1158 1159
  size_t size = tscSqlExprNumOfExprs(pQueryInfo);
  
  for (int32_t k = 0; k < size; ++k) {
H
Haojun Liao 已提交
1160 1161
    SQLFunctionCtx* pCtx = &pLocalReducer->pCtx[k];
    aAggs[pCtx->functionId].xFinalize(pCtx);
H
hzcheng 已提交
1162 1163 1164 1165
  }

  pLocalReducer->hasPrevRow = false;

1166
  int32_t numOfRes = (int32_t)getNumOfResultLocal(pQueryInfo, pLocalReducer->pCtx);
1167
  pLocalReducer->pResultBuf->num += numOfRes;
H
hzcheng 已提交
1168

1169
  fillMultiRowsOfTagsVal(pQueryInfo, numOfRes, pLocalReducer);
H
hzcheng 已提交
1170 1171 1172 1173 1174 1175 1176 1177 1178 1179
  return numOfRes;
}

/*
 * points merge:
 * points are merged according to the sort info, which is tags columns and timestamp column.
 * In case of points without either tags columns or timestamp, such as
 * results generated by simple aggregation function, we merge them all into one points
 * *Exception*: column projection query, required no merge procedure
 */
H
hjxilinx 已提交
1180
bool needToMerge(SQueryInfo *pQueryInfo, SLocalReducer *pLocalReducer, tFilePage *tmpBuffer) {
H
hzcheng 已提交
1181
  int32_t ret = 0;  // merge all result by default
1182

H
Haojun Liao 已提交
1183
  int16_t functionId = pLocalReducer->pCtx[0].functionId;
1184 1185 1186

  // todo opt performance
  if ((/*functionId == TSDB_FUNC_PRJ || */functionId == TSDB_FUNC_ARITHM) || (tscIsProjectionQueryOnSTable(pQueryInfo, 0))) {  // column projection query
H
hzcheng 已提交
1187 1188 1189
    ret = 1;                                                            // disable merge procedure
  } else {
    tOrderDescriptor *pDesc = pLocalReducer->pDesc;
H
Haojun Liao 已提交
1190
    if (pDesc->orderInfo.numOfCols > 0) {
1191
      if (pDesc->tsOrder == TSDB_ORDER_ASC) {  // asc
H
hzcheng 已提交
1192
        // todo refactor comparator
S
slguan 已提交
1193
        ret = compare_a(pLocalReducer->pDesc, 1, 0, pLocalReducer->prevRowOfInput, 1, 0, tmpBuffer->data);
H
hzcheng 已提交
1194
      } else {  // desc
S
slguan 已提交
1195
        ret = compare_d(pLocalReducer->pDesc, 1, 0, pLocalReducer->prevRowOfInput, 1, 0, tmpBuffer->data);
H
hzcheng 已提交
1196 1197 1198 1199 1200 1201 1202 1203
      }
    }
  }

  /* if ret == 0, means the result belongs to the same group */
  return (ret == 0);
}

H
hjxilinx 已提交
1204
static bool reachGroupResultLimit(SQueryInfo *pQueryInfo, SSqlRes *pRes) {
1205
  return (pRes->numOfGroups >= pQueryInfo->slimit.limit && pQueryInfo->slimit.limit >= 0);
S
slguan 已提交
1206 1207 1208 1209 1210 1211
}

static bool saveGroupResultInfo(SSqlObj *pSql) {
  SSqlCmd *pCmd = &pSql->cmd;
  SSqlRes *pRes = &pSql->res;

H
hjxilinx 已提交
1212
  SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);
S
slguan 已提交
1213 1214
  pRes->numOfGroups += 1;

S
slguan 已提交
1215
  // the output group is limited by the slimit clause
1216
  if (reachGroupResultLimit(pQueryInfo, pRes)) {
S
slguan 已提交
1217 1218 1219 1220
    return true;
  }

  //    pRes->pGroupRec = realloc(pRes->pGroupRec, pRes->numOfGroups*sizeof(SResRec));
H
hzcheng 已提交
1221
  //    pRes->pGroupRec[pRes->numOfGroups-1].numOfRows = pRes->numOfRows;
H
Haojun Liao 已提交
1222
  //    pRes->pGroupRec[pRes->numOfGroups-1].numOfClauseTotal = pRes->numOfClauseTotal;
S
slguan 已提交
1223 1224

  return false;
H
hzcheng 已提交
1225 1226
}

S
slguan 已提交
1227 1228 1229 1230 1231 1232 1233 1234
/**
 *
 * @param pSql
 * @param pLocalReducer
 * @param noMoreCurrentGroupRes
 * @return if current group is skipped, return false, and do NOT record it into pRes->numOfGroups
 */
bool doGenerateFinalResults(SSqlObj *pSql, SLocalReducer *pLocalReducer, bool noMoreCurrentGroupRes) {
H
hjxilinx 已提交
1235 1236 1237 1238 1239
  SSqlCmd *pCmd = &pSql->cmd;
  SSqlRes *pRes = &pSql->res;

  SQueryInfo *  pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);
  tFilePage *   pResBuf = pLocalReducer->pResultBuf;
H
hjxilinx 已提交
1240
  SColumnModel *pModel = pLocalReducer->resColModel;
H
hzcheng 已提交
1241

S
slguan 已提交
1242 1243 1244
  pRes->code = TSDB_CODE_SUCCESS;

  /*
1245
   * Ignore the output of the current group since this group is skipped by user
S
slguan 已提交
1246 1247
   * We set the numOfRows to be 0 and discard the possible remain results.
   */
1248
  if (pQueryInfo->slimit.offset > 0) {
S
slguan 已提交
1249
    pRes->numOfRows = 0;
1250
    pQueryInfo->slimit.offset -= 1;
S
slguan 已提交
1251
    pLocalReducer->discard = !noMoreCurrentGroupRes;
1252
    
S
slguan 已提交
1253 1254 1255
    return false;
  }

H
hjxilinx 已提交
1256
  tColModelCompact(pModel, pResBuf, pModel->capacity);
H
hzcheng 已提交
1257 1258 1259

#ifdef _DEBUG_VIEW
  printf("final result before interpo:\n");
1260
//  tColModelDisplay(pLocalReducer->resColModel, pLocalReducer->pBufForInterpo, pResBuf->num, pResBuf->num);
H
hzcheng 已提交
1261
#endif
1262 1263
  
  SFillInfo* pFillInfo = pLocalReducer->pFillInfo;
H
Haojun Liao 已提交
1264
  if (pFillInfo != NULL) {
H
Haojun Liao 已提交
1265
    taosFillSetStartInfo(pFillInfo, pResBuf->num, pQueryInfo->window.ekey);
H
Haojun Liao 已提交
1266 1267
    taosFillCopyInputDataFromOneFilePage(pFillInfo, pResBuf);
  }
1268
  
H
Haojun Liao 已提交
1269
  doFillResult(pSql, pLocalReducer, noMoreCurrentGroupRes);
S
slguan 已提交
1270
  return true;
H
hzcheng 已提交
1271 1272
}

H
hjxilinx 已提交
1273
void resetOutputBuf(SQueryInfo *pQueryInfo, SLocalReducer *pLocalReducer) {  // reset output buffer to the beginning
H
hjxilinx 已提交
1274
  for (int32_t i = 0; i < pQueryInfo->fieldsInfo.numOfOutput; ++i) {
H
hzcheng 已提交
1275
    pLocalReducer->pCtx[i].aOutputBuf =
H
hjxilinx 已提交
1276
        pLocalReducer->pResultBuf->data + tscFieldInfoGetOffset(pQueryInfo, i) * pLocalReducer->resColModel->capacity;
H
hzcheng 已提交
1277 1278 1279 1280 1281
  }

  memset(pLocalReducer->pResultBuf, 0, pLocalReducer->nResultBufSize + sizeof(tFilePage));
}

S
slguan 已提交
1282
static void resetEnvForNewResultset(SSqlRes *pRes, SSqlCmd *pCmd, SLocalReducer *pLocalReducer) {
S
slguan 已提交
1283
  // In handling data in other groups, we need to reset the interpolation information for a new group data
H
hzcheng 已提交
1284
  pRes->numOfRows = 0;
H
Haojun Liao 已提交
1285
  pRes->numOfClauseTotal = 0;
H
hjxilinx 已提交
1286 1287 1288

  SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);

1289
  pQueryInfo->limit.offset = pLocalReducer->offset;
H
hzcheng 已提交
1290

1291
  STableMetaInfo *pTableMetaInfo = tscGetTableMetaInfoFromCmd(pCmd, pCmd->clauseIndex, 0);
H
hjxilinx 已提交
1292
  STableComInfo tinfo = tscGetTableInfo(pTableMetaInfo->pTableMeta);
H
hjxilinx 已提交
1293 1294
  
  int8_t precision = tinfo.precision;
H
hjxilinx 已提交
1295

S
slguan 已提交
1296
  // for group result interpolation, do not return if not data is generated
1297 1298
  if (pQueryInfo->fillType != TSDB_FILL_NONE) {
    TSKEY skey = MIN(pQueryInfo->window.skey, pQueryInfo->window.ekey);
H
hjxilinx 已提交
1299
    int64_t newTime =
1300 1301 1302 1303
        taosGetIntervalStartTimestamp(skey, pQueryInfo->intervalTime, pQueryInfo->slidingTimeUnit, precision);
//    taosResetFillInfo(pLocalReducer->pFillInfo, pQueryInfo->order.order, newTime,
//                        pQueryInfo->groupbyExpr.numOfGroupCols, 4096, 0, NULL, pLocalReducer->rowSize);
    taosResetFillInfo(pLocalReducer->pFillInfo, newTime);
H
hzcheng 已提交
1304 1305 1306
  }
}

S
slguan 已提交
1307 1308 1309 1310
static bool isAllSourcesCompleted(SLocalReducer *pLocalReducer) {
  return (pLocalReducer->numOfBuffer == pLocalReducer->numOfCompleted);
}

1311
static bool doBuildFilledResultForGroup(SSqlObj *pSql) {
H
hzcheng 已提交
1312 1313 1314
  SSqlCmd *pCmd = &pSql->cmd;
  SSqlRes *pRes = &pSql->res;

1315 1316 1317
  SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);
  SLocalReducer *pLocalReducer = pRes->pLocalReducer;
  SFillInfo *pFillInfo = pLocalReducer->pFillInfo;
S
slguan 已提交
1318

1319 1320
  if (pFillInfo != NULL && taosNumOfRemainRows(pFillInfo) > 0) {
    assert(pQueryInfo->fillType != TSDB_FILL_NONE);
H
hzcheng 已提交
1321

S
slguan 已提交
1322
    tFilePage *pFinalDataBuf = pLocalReducer->pResultBuf;
H
Haojun Liao 已提交
1323
    int64_t etime = *(int64_t *)(pFinalDataBuf->data + TSDB_KEYSIZE * (pFillInfo->numOfRows - 1));
H
hzcheng 已提交
1324

1325
    // the first column must be the timestamp column
H
Haojun Liao 已提交
1326 1327
    int32_t rows = getFilledNumOfRes(pFillInfo, etime, pLocalReducer->resColModel->capacity);
    if (rows > 0) {  // do fill gap
H
Haojun Liao 已提交
1328
      doFillResult(pSql, pLocalReducer, false);
S
slguan 已提交
1329
    }
H
hzcheng 已提交
1330

S
slguan 已提交
1331 1332 1333
    return true;
  } else {
    return false;
H
hzcheng 已提交
1334
  }
S
slguan 已提交
1335
}
H
hzcheng 已提交
1336

S
slguan 已提交
1337 1338 1339 1340
static bool doHandleLastRemainData(SSqlObj *pSql) {
  SSqlCmd *pCmd = &pSql->cmd;
  SSqlRes *pRes = &pSql->res;

1341 1342
  SLocalReducer *pLocalReducer = pRes->pLocalReducer;
  SFillInfo     *pFillInfo = pLocalReducer->pFillInfo;
H
hzcheng 已提交
1343

S
slguan 已提交
1344
  bool prevGroupCompleted = (!pLocalReducer->discard) && pLocalReducer->hasUnprocessedRow;
S
slguan 已提交
1345

H
Haojun Liao 已提交
1346
  SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);
H
hzcheng 已提交
1347

S
slguan 已提交
1348 1349
  if ((isAllSourcesCompleted(pLocalReducer) && !pLocalReducer->hasPrevRow) || pLocalReducer->pLocalDataSrc[0] == NULL ||
      prevGroupCompleted) {
1350 1351
    // if fillType == TSDB_FILL_NONE, return directly
    if (pQueryInfo->fillType != TSDB_FILL_NONE) {
H
hjxilinx 已提交
1352
      int64_t etime = (pQueryInfo->window.skey < pQueryInfo->window.ekey) ? pQueryInfo->window.ekey : pQueryInfo->window.skey;
H
hzcheng 已提交
1353

H
Haojun Liao 已提交
1354 1355
      assert(pFillInfo->numOfRows == 0);
      int32_t rows = getFilledNumOfRes(pFillInfo, etime, pLocalReducer->resColModel->capacity);
H
hzcheng 已提交
1356
      if (rows > 0) {  // do interpo
H
Haojun Liao 已提交
1357
        doFillResult(pSql, pLocalReducer, true);
H
hzcheng 已提交
1358 1359 1360
      }
    }

S
slguan 已提交
1361 1362 1363 1364 1365 1366 1367 1368
    /*
     * 1. numOfRows == 0, means no interpolation results are generated.
     * 2. if all local data sources are consumed, and no un-processed rows exist.
     *
     * No results will be generated and query completed.
     */
    if (pRes->numOfRows > 0 || (isAllSourcesCompleted(pLocalReducer) && (!pLocalReducer->hasUnprocessedRow))) {
      return true;
H
hzcheng 已提交
1369
    }
S
slguan 已提交
1370 1371 1372 1373 1374 1375 1376

    // start to process result for a new group and save the result info of previous group
    if (saveGroupResultInfo(pSql)) {
      return true;
    }

    resetEnvForNewResultset(pRes, pCmd, pLocalReducer);
H
hzcheng 已提交
1377 1378
  }

S
slguan 已提交
1379 1380
  return false;
}
H
hzcheng 已提交
1381

H
hjxilinx 已提交
1382 1383 1384 1385
static void doProcessResultInNextWindow(SSqlObj *pSql, int32_t numOfRes) {
  SSqlCmd *pCmd = &pSql->cmd;
  SSqlRes *pRes = &pSql->res;

S
slguan 已提交
1386
  SLocalReducer *pLocalReducer = pRes->pLocalReducer;
H
hjxilinx 已提交
1387
  SQueryInfo *   pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);
H
hjxilinx 已提交
1388
  size_t size = tscSqlExprNumOfExprs(pQueryInfo);
H
hjxilinx 已提交
1389

H
hjxilinx 已提交
1390
  for (int32_t k = 0; k < size; ++k) {
S
slguan 已提交
1391 1392
    SQLFunctionCtx *pCtx = &pLocalReducer->pCtx[k];
    pCtx->aOutputBuf += pCtx->outputBytes * numOfRes;
S
slguan 已提交
1393 1394

    // set the correct output timestamp column position
H
Haojun Liao 已提交
1395
    if (pCtx->functionId == TSDB_FUNC_TOP || pCtx->functionId == TSDB_FUNC_BOTTOM) {
S
slguan 已提交
1396
      pCtx->ptsOutputBuf = ((char *)pCtx->ptsOutputBuf + TSDB_KEYSIZE * numOfRes);
H
hzcheng 已提交
1397
    }
S
slguan 已提交
1398 1399
  }

S
slguan 已提交
1400
  doExecuteSecondaryMerge(pCmd, pLocalReducer, true);
S
slguan 已提交
1401 1402
}

1403
int32_t tscDoLocalMerge(SSqlObj *pSql) {
S
slguan 已提交
1404 1405
  SSqlCmd *pCmd = &pSql->cmd;
  SSqlRes *pRes = &pSql->res;
H
hjxilinx 已提交
1406

H
hjxilinx 已提交
1407
  tscResetForNextRetrieve(pRes);
H
hjxilinx 已提交
1408

S
slguan 已提交
1409
  if (pSql->signature != pSql || pRes == NULL || pRes->pLocalReducer == NULL) {  // all data has been processed
H
hjxilinx 已提交
1410
    tscTrace("%p %s call the drop local reducer", pSql, __FUNCTION__);
S
slguan 已提交
1411
    tscDestroyLocalReducer(pSql);
H
hzcheng 已提交
1412 1413
    return 0;
  }
H
hjxilinx 已提交
1414

S
slguan 已提交
1415
  SLocalReducer *pLocalReducer = pRes->pLocalReducer;
H
hjxilinx 已提交
1416 1417
  SQueryInfo *   pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);

S
slguan 已提交
1418 1419
  // set the data merge in progress
  int32_t prevStatus =
weixin_48148422's avatar
weixin_48148422 已提交
1420
      atomic_val_compare_exchange_32(&pLocalReducer->status, TSC_LOCALREDUCE_READY, TSC_LOCALREDUCE_IN_PROGRESS);
H
hjxilinx 已提交
1421
  if (prevStatus != TSC_LOCALREDUCE_READY) {
H
hjxilinx 已提交
1422
    assert(prevStatus == TSC_LOCALREDUCE_TOBE_FREED);  // it is in tscDestroyLocalReducer function already
S
slguan 已提交
1423 1424 1425 1426 1427 1428 1429 1430 1431 1432
    return TSDB_CODE_SUCCESS;
  }

  tFilePage *tmpBuffer = pLocalReducer->pTempBuffer;

  if (doHandleLastRemainData(pSql)) {
    pLocalReducer->status = TSC_LOCALREDUCE_READY;  // set the flag, taos_free_result can release this result.
    return TSDB_CODE_SUCCESS;
  }

1433
  if (doBuildFilledResultForGroup(pSql)) {
S
slguan 已提交
1434 1435 1436 1437
    pLocalReducer->status = TSC_LOCALREDUCE_READY;  // set the flag, taos_free_result can release this result.
    return TSDB_CODE_SUCCESS;
  }

H
hzcheng 已提交
1438 1439 1440
  SLoserTreeInfo *pTree = pLocalReducer->pLoserTree;

  // clear buffer
S
slguan 已提交
1441
  handleUnprocessedRow(pCmd, pLocalReducer, tmpBuffer);
H
hjxilinx 已提交
1442
  SColumnModel *pModel = pLocalReducer->pDesc->pColumnModel;
H
hzcheng 已提交
1443 1444

  while (1) {
S
slguan 已提交
1445
    if (isAllSourcesCompleted(pLocalReducer)) {
H
hzcheng 已提交
1446 1447 1448 1449 1450 1451
      break;
    }

#ifdef _DEBUG_VIEW
    printf("chosen data in pTree[0] = %d\n", pTree->pNode[0].index);
#endif
1452
    assert((pTree->pNode[0].index < pLocalReducer->numOfBuffer) && (pTree->pNode[0].index >= 0) && tmpBuffer->num == 0);
H
hzcheng 已提交
1453 1454

    // chosen from loser tree
S
slguan 已提交
1455
    SLocalDataSource *pOneDataSrc = pLocalReducer->pLocalDataSrc[pTree->pNode[0].index];
H
hzcheng 已提交
1456

S
slguan 已提交
1457
    tColModelAppend(pModel, tmpBuffer, pOneDataSrc->filePage.data, pOneDataSrc->rowIdx, 1,
H
hjxilinx 已提交
1458
                    pOneDataSrc->pMemBuffer->pColumnModel->capacity);
H
hzcheng 已提交
1459 1460 1461 1462

#if defined(_DEBUG_VIEW)
    printf("chosen row:\t");
    SSrcColumnInfo colInfo[256] = {0};
1463
    tscGetSrcColumnInfo(colInfo, pQueryInfo);
H
hzcheng 已提交
1464

1465
    tColModelDisplayEx(pModel, tmpBuffer->data, tmpBuffer->num, pModel->capacity, colInfo);
H
hzcheng 已提交
1466
#endif
S
slguan 已提交
1467

H
hzcheng 已提交
1468 1469 1470 1471
    if (pLocalReducer->discard) {
      assert(pLocalReducer->hasUnprocessedRow == false);

      /* current record belongs to the same group of previous record, need to discard it */
S
slguan 已提交
1472
      if (isSameGroup(pCmd, pLocalReducer, pLocalReducer->discardData->data, tmpBuffer)) {
1473
        tmpBuffer->num = 0;
H
hzcheng 已提交
1474 1475
        pOneDataSrc->rowIdx += 1;

S
slguan 已提交
1476 1477 1478 1479
        adjustLoserTreeFromNewData(pLocalReducer, pOneDataSrc, pTree);

        // all inputs are exhausted, abort current process
        if (isAllSourcesCompleted(pLocalReducer)) {
H
hzcheng 已提交
1480 1481 1482
          break;
        }

S
slguan 已提交
1483
        // data belongs to the same group needs to be discarded
H
hzcheng 已提交
1484 1485 1486
        continue;
      } else {
        pLocalReducer->discard = false;
1487
        pLocalReducer->discardData->num = 0;
H
hzcheng 已提交
1488

S
slguan 已提交
1489 1490 1491 1492 1493 1494
        if (saveGroupResultInfo(pSql)) {
          pLocalReducer->status = TSC_LOCALREDUCE_READY;
          return TSDB_CODE_SUCCESS;
        }

        resetEnvForNewResultset(pRes, pCmd, pLocalReducer);
H
hzcheng 已提交
1495 1496 1497 1498
      }
    }

    if (pLocalReducer->hasPrevRow) {
1499
      if (needToMerge(pQueryInfo, pLocalReducer, tmpBuffer)) {
S
slguan 已提交
1500
        // belong to the group of the previous row, continue process it
S
slguan 已提交
1501
        doExecuteSecondaryMerge(pCmd, pLocalReducer, false);
H
hzcheng 已提交
1502 1503

        // copy to buffer
S
slguan 已提交
1504 1505 1506 1507 1508 1509
        savePreviousRow(pLocalReducer, tmpBuffer);
      } else {
        /*
         * current row does not belong to the group of previous row.
         * so the processing of previous group is completed.
         */
1510
        int32_t numOfRes = finalizeRes(pQueryInfo, pLocalReducer);
H
hzcheng 已提交
1511

S
slguan 已提交
1512
        bool       sameGroup = isSameGroup(pCmd, pLocalReducer, pLocalReducer->prevRowOfInput, tmpBuffer);
H
hzcheng 已提交
1513 1514 1515
        tFilePage *pResBuf = pLocalReducer->pResultBuf;

        /*
1516
         * if the previous group does NOT generate any result (pResBuf->num == 0),
H
hzcheng 已提交
1517 1518
         * continue to process results instead of return results.
         */
1519
        if ((!sameGroup && pResBuf->num > 0) || (pResBuf->num == pLocalReducer->resColModel->capacity)) {
H
hzcheng 已提交
1520
          // does not belong to the same group
S
slguan 已提交
1521
          bool notSkipped = doGenerateFinalResults(pSql, pLocalReducer, !sameGroup);
H
hzcheng 已提交
1522

S
slguan 已提交
1523
          // this row needs to discard, since it belongs to the group of previous
H
hzcheng 已提交
1524 1525
          if (pLocalReducer->discard && sameGroup) {
            pLocalReducer->hasUnprocessedRow = false;
1526
            tmpBuffer->num = 0;
H
hzcheng 已提交
1527
          } else {
S
slguan 已提交
1528
            // current row does not belongs to the previous group, so it is not be handled yet.
H
hzcheng 已提交
1529 1530 1531
            pLocalReducer->hasUnprocessedRow = true;
          }

1532
          resetOutputBuf(pQueryInfo, pLocalReducer);
H
hzcheng 已提交
1533 1534
          pOneDataSrc->rowIdx += 1;

S
slguan 已提交
1535 1536
          // here we do not check the return value
          adjustLoserTreeFromNewData(pLocalReducer, pOneDataSrc, pTree);
H
hzcheng 已提交
1537 1538 1539
          assert(pLocalReducer->status == TSC_LOCALREDUCE_IN_PROGRESS);

          if (pRes->numOfRows == 0) {
S
slguan 已提交
1540
            handleUnprocessedRow(pCmd, pLocalReducer, tmpBuffer);
H
hzcheng 已提交
1541 1542

            if (!sameGroup) {
S
slguan 已提交
1543 1544 1545 1546 1547 1548 1549 1550 1551 1552
              /*
               * previous group is done, prepare for the next group
               * If previous group is not skipped, keep it in pRes->numOfGroups
               */
              if (notSkipped && saveGroupResultInfo(pSql)) {
                pLocalReducer->status = TSC_LOCALREDUCE_READY;
                return TSDB_CODE_SUCCESS;
              }

              resetEnvForNewResultset(pRes, pCmd, pLocalReducer);
H
hzcheng 已提交
1553 1554 1555 1556 1557 1558 1559
            }
          } else {
            /*
             * if next record belongs to a new group, we do not handle this record here.
             * We start the process in a new round.
             */
            if (sameGroup) {
S
slguan 已提交
1560
              handleUnprocessedRow(pCmd, pLocalReducer, tmpBuffer);
H
hzcheng 已提交
1561 1562 1563
            }
          }

S
slguan 已提交
1564 1565 1566 1567 1568 1569
          // current group has no result,
          if (pRes->numOfRows == 0) {
            continue;
          } else {
            pLocalReducer->status = TSC_LOCALREDUCE_READY;  // set the flag, taos_free_result can release this result.
            return TSDB_CODE_SUCCESS;
H
hzcheng 已提交
1570
          }
S
slguan 已提交
1571
        } else {  // result buffer is not full
H
hjxilinx 已提交
1572
          doProcessResultInNextWindow(pSql, numOfRes);
S
slguan 已提交
1573
          savePreviousRow(pLocalReducer, tmpBuffer);
H
hzcheng 已提交
1574 1575
        }
      }
S
slguan 已提交
1576
    } else {
S
slguan 已提交
1577
      doExecuteSecondaryMerge(pCmd, pLocalReducer, true);
S
slguan 已提交
1578
      savePreviousRow(pLocalReducer, tmpBuffer);  // copy the processed row to buffer
H
hzcheng 已提交
1579 1580 1581
    }

    pOneDataSrc->rowIdx += 1;
S
slguan 已提交
1582
    adjustLoserTreeFromNewData(pLocalReducer, pOneDataSrc, pTree);
H
hzcheng 已提交
1583 1584 1585
  }

  if (pLocalReducer->hasPrevRow) {
1586
    finalizeRes(pQueryInfo, pLocalReducer);
H
hzcheng 已提交
1587 1588
  }

1589
  if (pLocalReducer->pResultBuf->num) {
H
hzcheng 已提交
1590 1591 1592 1593
    doGenerateFinalResults(pSql, pLocalReducer, true);
  }

  assert(pLocalReducer->status == TSC_LOCALREDUCE_IN_PROGRESS && pRes->row == 0);
S
slguan 已提交
1594
  pLocalReducer->status = TSC_LOCALREDUCE_READY;  // set the flag, taos_free_result can release this result.
H
hzcheng 已提交
1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612

  return TSDB_CODE_SUCCESS;
}

void tscInitResObjForLocalQuery(SSqlObj *pObj, int32_t numOfRes, int32_t rowLen) {
  SSqlRes *pRes = &pObj->res;
  if (pRes->pLocalReducer != NULL) {
    tscDestroyLocalReducer(pObj);
  }

  pRes->qhandle = 1;  // hack to pass the safety check in fetch_row function
  pRes->numOfRows = 0;
  pRes->row = 0;

  pRes->rspType = 0;  // used as a flag to denote if taos_retrieved() has been called yet
  pRes->pLocalReducer = (SLocalReducer *)calloc(1, sizeof(SLocalReducer));

  /*
S
slguan 已提交
1613 1614
   * we need one additional byte space
   * the sprintf function needs one additional space to put '\0' at the end of string
H
hzcheng 已提交
1615 1616 1617 1618
   */
  size_t allocSize = numOfRes * rowLen + sizeof(tFilePage) + 1;
  pRes->pLocalReducer->pResultBuf = (tFilePage *)calloc(1, allocSize);

1619
  pRes->pLocalReducer->pResultBuf->num = numOfRes;
H
hzcheng 已提交
1620 1621
  pRes->data = pRes->pLocalReducer->pResultBuf->data;
}