tscLocalMerge.c 55.3 KB
Newer Older
H
hzcheng 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

16
#include "os.h"
S
slguan 已提交
17
#include "tlosertree.h"
H
hzcheng 已提交
18
#include "tscUtil.h"
H
hjxilinx 已提交
19
#include "tschemautil.h"
S
slguan 已提交
20
#include "tsclient.h"
H
hzcheng 已提交
21
#include "tutil.h"
S
slguan 已提交
22
#include "tscLog.h"
23
#include "tscLocalMerge.h"
H
hzcheng 已提交
24 25

typedef struct SCompareParam {
S
slguan 已提交
26 27
  SLocalDataSource **pLocalData;
  tOrderDescriptor * pDesc;
28
  int32_t            num;
S
slguan 已提交
29
  int32_t            groupOrderType;
H
hzcheng 已提交
30 31 32 33 34 35
} SCompareParam;

int32_t treeComparator(const void *pLeft, const void *pRight, void *param) {
  int32_t pLeftIdx = *(int32_t *)pLeft;
  int32_t pRightIdx = *(int32_t *)pRight;

S
slguan 已提交
36 37 38
  SCompareParam *    pParam = (SCompareParam *)param;
  tOrderDescriptor * pDesc = pParam->pDesc;
  SLocalDataSource **pLocalData = pParam->pLocalData;
H
hzcheng 已提交
39 40 41 42 43 44 45 46 47 48

  /* this input is exhausted, set the special value to denote this */
  if (pLocalData[pLeftIdx]->rowIdx == -1) {
    return 1;
  }

  if (pLocalData[pRightIdx]->rowIdx == -1) {
    return -1;
  }

49
  if (pParam->groupOrderType == TSDB_ORDER_DESC) {  // desc
50 51
    return compare_d(pDesc, pParam->num, pLocalData[pLeftIdx]->rowIdx, pLocalData[pLeftIdx]->filePage.data,
                     pParam->num, pLocalData[pRightIdx]->rowIdx, pLocalData[pRightIdx]->filePage.data);
H
hzcheng 已提交
52
  } else {
53 54
    return compare_a(pDesc, pParam->num, pLocalData[pLeftIdx]->rowIdx, pLocalData[pLeftIdx]->filePage.data,
                     pParam->num, pLocalData[pRightIdx]->rowIdx, pLocalData[pRightIdx]->filePage.data);
H
hzcheng 已提交
55 56 57
  }
}

H
hjLiao 已提交
58
static void tscInitSqlContext(SSqlCmd *pCmd, SLocalReducer *pReducer, tOrderDescriptor *pDesc) {
H
hzcheng 已提交
59 60
  /*
   * the fields and offset attributes in pCmd and pModel may be different due to
S
slguan 已提交
61
   * merge requirement. So, the final result in pRes structure is formatted in accordance with the pCmd object.
H
hzcheng 已提交
62
   */
H
hjxilinx 已提交
63
  SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);
H
hjxilinx 已提交
64 65 66
  size_t size = tscSqlExprNumOfExprs(pQueryInfo);
  
  for (int32_t i = 0; i < size; ++i) {
H
hzcheng 已提交
67
    SQLFunctionCtx *pCtx = &pReducer->pCtx[i];
H
hjxilinx 已提交
68 69
    SSqlExpr *      pExpr = tscSqlExprGet(pQueryInfo, i);

H
Haojun Liao 已提交
70
    pCtx->aOutputBuf = pReducer->pResultBuf->data + pExpr->offset * pReducer->resColModel->capacity;
71
    pCtx->order = pQueryInfo->order.order;
H
hjxilinx 已提交
72
    pCtx->functionId = pExpr->functionId;
S
slguan 已提交
73 74

    // input buffer hold only one point data
H
hjxilinx 已提交
75 76 77
    int16_t  offset = getColumnModelOffset(pDesc->pColumnModel, i);
    SSchema *pSchema = getColumnModelSchema(pDesc->pColumnModel, i);

H
hjxilinx 已提交
78
    pCtx->aInputElemBuf = pReducer->pTempBuffer->data + offset;
H
hzcheng 已提交
79 80

    // input data format comes from pModel
H
hjxilinx 已提交
81 82
    pCtx->inputType = pSchema->type;
    pCtx->inputBytes = pSchema->bytes;
H
hzcheng 已提交
83 84

    // output data format yet comes from pCmd.
H
hjxilinx 已提交
85 86
    pCtx->outputBytes = pExpr->resBytes;
    pCtx->outputType = pExpr->resType;
H
hzcheng 已提交
87 88 89

    pCtx->startOffset = 0;
    pCtx->size = 1;
S
slguan 已提交
90
    pCtx->hasNull = true;
H
hzcheng 已提交
91 92
    pCtx->currentStage = SECONDARY_STAGE_MERGE;

S
slguan 已提交
93
    // for top/bottom function, the output of timestamp is the first column
H
hjxilinx 已提交
94
    int32_t functionId = pExpr->functionId;
S
slguan 已提交
95 96
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      pCtx->ptsOutputBuf = pReducer->pCtx[0].aOutputBuf;
97
      pCtx->param[2].i64Key = pQueryInfo->order.order;
H
hjLiao 已提交
98
      pCtx->param[2].nType  = TSDB_DATA_TYPE_BIGINT;
99
      pCtx->param[1].i64Key = pQueryInfo->order.orderColId;
H
hzcheng 已提交
100
    }
S
slguan 已提交
101 102

    SResultInfo *pResInfo = &pReducer->pResInfo[i];
103
    pResInfo->bufLen = pExpr->interBytes;
H
hjLiao 已提交
104
    pResInfo->interResultBuf = calloc(1, (size_t) pResInfo->bufLen);
S
slguan 已提交
105 106 107 108 109

    pCtx->resultInfo = &pReducer->pResInfo[i];
    pCtx->resultInfo->superTableQ = true;
  }

H
hjxilinx 已提交
110 111
  int16_t          n = 0;
  int16_t          tagLen = 0;
H
hjxilinx 已提交
112
  SQLFunctionCtx **pTagCtx = calloc(pQueryInfo->fieldsInfo.numOfOutput, POINTER_BYTES);
S
slguan 已提交
113

H
hjxilinx 已提交
114
  SQLFunctionCtx *pCtx = NULL;
H
hjxilinx 已提交
115
  for (int32_t i = 0; i < pQueryInfo->fieldsInfo.numOfOutput; ++i) {
116
    SSqlExpr *pExpr = tscSqlExprGet(pQueryInfo, i);
S
slguan 已提交
117 118 119 120 121 122 123 124
    if (pExpr->functionId == TSDB_FUNC_TAG_DUMMY || pExpr->functionId == TSDB_FUNC_TS_DUMMY) {
      tagLen += pExpr->resBytes;
      pTagCtx[n++] = &pReducer->pCtx[i];
    } else if ((aAggs[pExpr->functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
      pCtx = &pReducer->pCtx[i];
    }
  }

B
Bomin Zhang 已提交
125
  if (n == 0 || pCtx == NULL) {
S
slguan 已提交
126 127 128 129 130
    free(pTagCtx);
  } else {
    pCtx->tagInfo.pTagCtxList = pTagCtx;
    pCtx->tagInfo.numOfTagCols = n;
    pCtx->tagInfo.tagsLen = tagLen;
H
hzcheng 已提交
131 132 133
  }
}

134
static SFillColInfo* createFillColInfo(SQueryInfo* pQueryInfo) {
S
TD-1057  
Shengliang Guan 已提交
135
  int32_t numOfCols = (int32_t)tscSqlExprNumOfExprs(pQueryInfo);
136 137 138 139 140 141 142
  int32_t offset = 0;
  
  SFillColInfo* pFillCol = calloc(numOfCols, sizeof(SFillColInfo));
  for(int32_t i = 0; i < numOfCols; ++i) {
    SSqlExpr* pExpr = tscSqlExprGet(pQueryInfo, i);
    
    pFillCol[i].col.bytes  = pExpr->resBytes;
S
TD-1057  
Shengliang Guan 已提交
143
    pFillCol[i].col.type   = (int8_t)pExpr->resType;
144
    pFillCol[i].col.colId  = pExpr->colInfo.colId;
145 146 147
    pFillCol[i].flag       = pExpr->colInfo.flag;
    pFillCol[i].col.offset = offset;
    pFillCol[i].functionId = pExpr->functionId;
148
    pFillCol[i].fillVal.i  = pQueryInfo->fillVal[i];
149 150 151 152 153 154
    offset += pExpr->resBytes;
  }
  
  return pFillCol;
}

H
hzcheng 已提交
155
void tscCreateLocalReducer(tExtMemBuffer **pMemBuffer, int32_t numOfBuffer, tOrderDescriptor *pDesc,
H
hjLiao 已提交
156 157 158 159
                           SColumnModel *finalmodel, SSqlObj* pSql) {
  SSqlCmd* pCmd = &pSql->cmd;
  SSqlRes* pRes = &pSql->res;
  
160
  if (pMemBuffer == NULL) {
H
hjLiao 已提交
161 162
    tscLocalReducerEnvDestroy(pMemBuffer, pDesc, finalmodel, numOfBuffer);
    tscError("%p pMemBuffer is NULL", pMemBuffer);
163
    pRes->code = TSDB_CODE_TSC_APP_ERROR;
164 165 166 167
    return;
  }
 
  if (pDesc->pColumnModel == NULL) {
H
hzcheng 已提交
168
    tscLocalReducerEnvDestroy(pMemBuffer, pDesc, finalmodel, numOfBuffer);
H
hjLiao 已提交
169
    tscError("%p no local buffer or intermediate result format model", pSql);
170
    pRes->code = TSDB_CODE_TSC_APP_ERROR;
H
hzcheng 已提交
171 172 173 174 175 176 177
    return;
  }

  int32_t numOfFlush = 0;
  for (int32_t i = 0; i < numOfBuffer; ++i) {
    int32_t len = pMemBuffer[i]->fileMeta.flushoutData.nLength;
    if (len == 0) {
178
      tscDebug("%p no data retrieved from orderOfVnode:%d", pSql, i + 1);
H
hzcheng 已提交
179 180 181 182 183 184 185 186
      continue;
    }

    numOfFlush += len;
  }

  if (numOfFlush == 0 || numOfBuffer == 0) {
    tscLocalReducerEnvDestroy(pMemBuffer, pDesc, finalmodel, numOfBuffer);
187
    tscDebug("%p retrieved no data", pSql);
H
hzcheng 已提交
188 189 190
    return;
  }

H
hjxilinx 已提交
191
  if (pDesc->pColumnModel->capacity >= pMemBuffer[0]->pageSize) {
H
hjLiao 已提交
192
    tscError("%p Invalid value of buffer capacity %d and page size %d ", pSql, pDesc->pColumnModel->capacity,
H
hjxilinx 已提交
193
             pMemBuffer[0]->pageSize);
S
slguan 已提交
194 195

    tscLocalReducerEnvDestroy(pMemBuffer, pDesc, finalmodel, numOfBuffer);
196
    pRes->code = TSDB_CODE_TSC_APP_ERROR;
H
hzcheng 已提交
197 198 199
    return;
  }

H
hjLiao 已提交
200 201 202
  size_t size = sizeof(SLocalReducer) + POINTER_BYTES * numOfFlush;
  
  SLocalReducer *pReducer = (SLocalReducer *) calloc(1, size);
H
hzcheng 已提交
203
  if (pReducer == NULL) {
H
hjLiao 已提交
204
    tscError("%p failed to create local merge structure, out of memory", pSql);
S
slguan 已提交
205 206

    tscLocalReducerEnvDestroy(pMemBuffer, pDesc, finalmodel, numOfBuffer);
207
    pRes->code = TSDB_CODE_TSC_OUT_OF_MEMORY;
H
hzcheng 已提交
208 209 210 211
    return;
  }

  pReducer->pExtMemBuffer = pMemBuffer;
S
slguan 已提交
212
  pReducer->pLocalDataSrc = (SLocalDataSource **)&pReducer[1];
H
hzcheng 已提交
213 214 215 216
  assert(pReducer->pLocalDataSrc != NULL);

  pReducer->numOfBuffer = numOfFlush;
  pReducer->numOfVnode = numOfBuffer;
217

H
hzcheng 已提交
218
  pReducer->pDesc = pDesc;
219
  tscDebug("%p the number of merged leaves is: %d", pSql, pReducer->numOfBuffer);
H
hzcheng 已提交
220 221 222 223 224 225

  int32_t idx = 0;
  for (int32_t i = 0; i < numOfBuffer; ++i) {
    int32_t numOfFlushoutInFile = pMemBuffer[i]->fileMeta.flushoutData.nLength;

    for (int32_t j = 0; j < numOfFlushoutInFile; ++j) {
H
hjLiao 已提交
226 227 228
      SLocalDataSource *ds = (SLocalDataSource *)malloc(sizeof(SLocalDataSource) + pMemBuffer[0]->pageSize);
      if (ds == NULL) {
        tscError("%p failed to create merge structure", pSql);
229
        pRes->code = TSDB_CODE_TSC_OUT_OF_MEMORY;
S
Shengliang Guan 已提交
230
        taosTFree(pReducer);
H
hzcheng 已提交
231 232
        return;
      }
H
hjLiao 已提交
233 234
      
      pReducer->pLocalDataSrc[idx] = ds;
H
hzcheng 已提交
235

H
hjLiao 已提交
236 237
      ds->pMemBuffer = pMemBuffer[i];
      ds->flushoutIdx = j;
238
      ds->filePage.num = 0;
H
hjLiao 已提交
239 240
      ds->pageId = 0;
      ds->rowIdx = 0;
H
hzcheng 已提交
241

242
      tscDebug("%p load data from disk into memory, orderOfVnode:%d, total:%d", pSql, i + 1, idx + 1);
H
hjLiao 已提交
243
      tExtMemBufferLoadData(pMemBuffer[i], &(ds->filePage), j, 0);
H
hzcheng 已提交
244
#ifdef _DEBUG_VIEW
245
      printf("load data page into mem for build loser tree: %" PRIu64 " rows\n", ds->filePage.num);
H
hzcheng 已提交
246
      SSrcColumnInfo colInfo[256] = {0};
H
hjxilinx 已提交
247
      SQueryInfo *   pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);
248 249

      tscGetSrcColumnInfo(colInfo, pQueryInfo);
H
hzcheng 已提交
250

251
      tColModelDisplayEx(pDesc->pColumnModel, ds->filePage.data, ds->filePage.num,
H
hjxilinx 已提交
252
                         pMemBuffer[0]->numOfElemsPerPage, colInfo);
H
hzcheng 已提交
253
#endif
H
hjLiao 已提交
254
      
255
      if (ds->filePage.num == 0) {  // no data in this flush, the index does not increase
256
        tscDebug("%p flush data is empty, ignore %d flush record", pSql, idx);
S
Shengliang Guan 已提交
257
        taosTFree(ds);
H
hzcheng 已提交
258 259
        continue;
      }
H
hjLiao 已提交
260
      
H
hzcheng 已提交
261 262 263
      idx += 1;
    }
  }
H
hjLiao 已提交
264 265
  
  // no data actually, no need to merge result.
H
hzcheng 已提交
266
  if (idx == 0) {
S
Shengliang Guan 已提交
267
    taosTFree(pReducer);
H
hzcheng 已提交
268 269 270 271 272 273
    return;
  }

  pReducer->numOfBuffer = idx;

  SCompareParam *param = malloc(sizeof(SCompareParam));
B
Bomin Zhang 已提交
274
  if (param == NULL) {
S
Shengliang Guan 已提交
275
    taosTFree(pReducer);
B
Bomin Zhang 已提交
276 277
    return;
  }
H
Haojun Liao 已提交
278

H
hzcheng 已提交
279 280
  param->pLocalData = pReducer->pLocalDataSrc;
  param->pDesc = pReducer->pDesc;
281
  param->num = pReducer->pLocalDataSrc[0]->pMemBuffer->numOfElemsPerPage;
H
hjxilinx 已提交
282 283
  SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);

284
  param->groupOrderType = pQueryInfo->groupbyExpr.orderType;
H
Haojun Liao 已提交
285
  pReducer->orderPrjOnSTable = tscOrderedProjectionQueryOnSTable(pQueryInfo, 0);
H
hzcheng 已提交
286 287 288

  pRes->code = tLoserTreeCreate(&pReducer->pLoserTree, pReducer->numOfBuffer, param, treeComparator);
  if (pReducer->pLoserTree == NULL || pRes->code != 0) {
S
Shengliang Guan 已提交
289 290
    taosTFree(param);
    taosTFree(pReducer);
H
hzcheng 已提交
291 292 293 294 295
    return;
  }

  // the input data format follows the old format, but output in a new format.
  // so, all the input must be parsed as old format
H
hjLiao 已提交
296
  pReducer->pCtx = (SQLFunctionCtx *)calloc(tscSqlExprNumOfExprs(pQueryInfo), sizeof(SQLFunctionCtx));
H
hzcheng 已提交
297 298
  pReducer->rowSize = pMemBuffer[0]->nElemSize;

H
hjxilinx 已提交
299 300
  tscRestoreSQLFuncForSTableQuery(pQueryInfo);
  tscFieldInfoUpdateOffset(pQueryInfo);
H
hzcheng 已提交
301

H
hjxilinx 已提交
302
  if (pReducer->rowSize > pMemBuffer[0]->pageSize) {
H
hzcheng 已提交
303 304 305 306 307 308 309 310
    assert(false);  // todo fixed row size is larger than the minimum page size;
  }

  pReducer->hasPrevRow = false;
  pReducer->hasUnprocessedRow = false;

  pReducer->prevRowOfInput = (char *)calloc(1, pReducer->rowSize);

S
slguan 已提交
311
  // used to keep the latest input row
H
hzcheng 已提交
312 313 314 315
  pReducer->pTempBuffer = (tFilePage *)calloc(1, pReducer->rowSize + sizeof(tFilePage));
  pReducer->discardData = (tFilePage *)calloc(1, pReducer->rowSize + sizeof(tFilePage));
  pReducer->discard = false;

H
hjxilinx 已提交
316
  pReducer->nResultBufSize = pMemBuffer[0]->pageSize * 16;
H
hzcheng 已提交
317
  pReducer->pResultBuf = (tFilePage *)calloc(1, pReducer->nResultBufSize + sizeof(tFilePage));
H
hjxilinx 已提交
318

H
Haojun Liao 已提交
319
  pReducer->finalRowSize = tscGetResRowLength(pQueryInfo->exprList);
H
hzcheng 已提交
320
  pReducer->resColModel = finalmodel;
B
Bomin Zhang 已提交
321
  pReducer->resColModel->capacity = pReducer->nResultBufSize;
322

323
  assert(pReducer->finalRowSize > 0);
B
Bomin Zhang 已提交
324 325 326
  if (pReducer->finalRowSize > 0) {
    pReducer->resColModel->capacity /= pReducer->finalRowSize;
  }
H
Haojun Liao 已提交
327
  assert(pReducer->finalRowSize <= pReducer->rowSize);
H
hzcheng 已提交
328

H
hjxilinx 已提交
329
  pReducer->pFinalRes = calloc(1, pReducer->rowSize * pReducer->resColModel->capacity);
H
hzcheng 已提交
330

H
hjxilinx 已提交
331
  if (pReducer->pTempBuffer == NULL || pReducer->discardData == NULL || pReducer->pResultBuf == NULL ||
332
      /*pReducer->pBufForInterpo == NULL || */pReducer->pFinalRes == NULL || pReducer->prevRowOfInput == NULL) {
S
Shengliang Guan 已提交
333 334 335 336 337 338 339 340
    taosTFree(pReducer->pTempBuffer);
    taosTFree(pReducer->discardData);
    taosTFree(pReducer->pResultBuf);
    taosTFree(pReducer->pFinalRes);
    taosTFree(pReducer->prevRowOfInput);
    taosTFree(pReducer->pLoserTree);
    taosTFree(param);
    taosTFree(pReducer);
341
    pRes->code = TSDB_CODE_TSC_OUT_OF_MEMORY;
H
hzcheng 已提交
342 343
    return;
  }
H
hjLiao 已提交
344
  
345 346 347 348
  size_t numOfCols = tscSqlExprNumOfExprs(pQueryInfo);
  
  pReducer->pTempBuffer->num = 0;
  pReducer->pResInfo = calloc(numOfCols, sizeof(SResultInfo));
H
hzcheng 已提交
349

350
  tscCreateResPointerInfo(pRes, pQueryInfo);
H
hjLiao 已提交
351
  tscInitSqlContext(pCmd, pReducer, pDesc);
H
hzcheng 已提交
352

H
hjxilinx 已提交
353 354
  // we change the capacity of schema to denote that there is only one row in temp buffer
  pReducer->pDesc->pColumnModel->capacity = 1;
H
hjxilinx 已提交
355 356

  // restore the limitation value at the last stage
357 358 359 360
  if (tscOrderedProjectionQueryOnSTable(pQueryInfo, 0)) {
    pQueryInfo->limit.limit = pQueryInfo->clauseLimit;
    pQueryInfo->limit.offset = pQueryInfo->prjOffset;
  }
H
hjxilinx 已提交
361

S
TD-1057  
Shengliang Guan 已提交
362
  pReducer->offset = (int32_t)pQueryInfo->limit.offset;
H
hjxilinx 已提交
363

H
hzcheng 已提交
364 365 366
  pRes->pLocalReducer = pReducer;
  pRes->numOfGroups = 0;

367
  STableMetaInfo *pTableMetaInfo = tscGetTableMetaInfoFromCmd(pCmd, pCmd->clauseIndex, 0);
368
  STableComInfo tinfo = tscGetTableInfo(pTableMetaInfo->pTableMeta);
H
hjxilinx 已提交
369
  
370
  TSKEY stime = (pQueryInfo->order.order == TSDB_ORDER_ASC)? pQueryInfo->window.skey : pQueryInfo->window.ekey;
371
  int64_t revisedSTime = taosTimeTruncate(stime, &pQueryInfo->interval, tinfo.precision);
372 373 374 375
  
  if (pQueryInfo->fillType != TSDB_FILL_NONE) {
    SFillColInfo* pFillCol = createFillColInfo(pQueryInfo);
    pReducer->pFillInfo = taosInitFillInfo(pQueryInfo->order.order, revisedSTime, pQueryInfo->groupbyExpr.numOfGroupCols,
376
                                           4096, (int32_t)numOfCols, pQueryInfo->interval.sliding, pQueryInfo->interval.slidingUnit,
H
Haojun Liao 已提交
377
                                           tinfo.precision, pQueryInfo->fillType, pFillCol);
378
  }
H
hzcheng 已提交
379 380 381 382
}

static int32_t tscFlushTmpBufferImpl(tExtMemBuffer *pMemoryBuf, tOrderDescriptor *pDesc, tFilePage *pPage,
                                     int32_t orderType) {
383
  if (pPage->num == 0) {
H
hzcheng 已提交
384 385 386
    return 0;
  }

387
  assert(pPage->num <= pDesc->pColumnModel->capacity);
H
hzcheng 已提交
388 389

  // sort before flush to disk, the data must be consecutively put on tFilePage.
H
Haojun Liao 已提交
390
  if (pDesc->orderInfo.numOfCols > 0) {
S
TD-1057  
Shengliang Guan 已提交
391
    tColDataQSort(pDesc, (int32_t)pPage->num, 0, (int32_t)pPage->num - 1, pPage->data, orderType);
H
hzcheng 已提交
392 393 394
  }

#ifdef _DEBUG_VIEW
395 396
  printf("%" PRIu64 " rows data flushed to disk after been sorted:\n", pPage->num);
  tColModelDisplay(pDesc->pColumnModel, pPage->data, pPage->num, pPage->num);
H
hzcheng 已提交
397 398 399
#endif

  // write to cache after being sorted
S
TD-1057  
Shengliang Guan 已提交
400
  if (tExtMemBufferPut(pMemoryBuf, pPage->data, (int32_t)pPage->num) < 0) {
H
hzcheng 已提交
401 402 403 404
    tscError("failed to save data in temporary buffer");
    return -1;
  }

405
  pPage->num = 0;
H
hzcheng 已提交
406 407 408 409
  return 0;
}

int32_t tscFlushTmpBuffer(tExtMemBuffer *pMemoryBuf, tOrderDescriptor *pDesc, tFilePage *pPage, int32_t orderType) {
410 411 412
  int32_t ret = 0;
  if ((ret = tscFlushTmpBufferImpl(pMemoryBuf, pDesc, pPage, orderType)) != 0) {
    return ret;
H
hzcheng 已提交
413 414
  }

415 416
  if ((ret = tExtMemBufferFlush(pMemoryBuf)) != 0) {
    return ret;
H
hzcheng 已提交
417 418 419 420 421 422 423
  }

  return 0;
}

int32_t saveToBuffer(tExtMemBuffer *pMemoryBuf, tOrderDescriptor *pDesc, tFilePage *pPage, void *data,
                     int32_t numOfRows, int32_t orderType) {
424
  SColumnModel *pModel = pDesc->pColumnModel;
H
hjxilinx 已提交
425

426
  if (pPage->num + numOfRows <= pModel->capacity) {
427
    tColModelAppend(pModel, pPage, data, 0, numOfRows, numOfRows);
H
hzcheng 已提交
428 429 430
    return 0;
  }

431
  // current buffer is overflow, flush data to extensive buffer
S
TD-1057  
Shengliang Guan 已提交
432
  int32_t numOfRemainEntries = pModel->capacity - (int32_t)pPage->num;
H
hzcheng 已提交
433 434
  tColModelAppend(pModel, pPage, data, 0, numOfRemainEntries, numOfRows);

435
  // current buffer is full, need to flushed to disk
436
  assert(pPage->num == pModel->capacity);
437 438 439
  int32_t code = tscFlushTmpBuffer(pMemoryBuf, pDesc, pPage, orderType);
  if (code != 0) {
    return code;
H
hzcheng 已提交
440 441 442 443 444 445
  }

  int32_t remain = numOfRows - numOfRemainEntries;

  while (remain > 0) {
    int32_t numOfWriteElems = 0;
H
hjxilinx 已提交
446 447
    if (remain > pModel->capacity) {
      numOfWriteElems = pModel->capacity;
H
hzcheng 已提交
448 449 450 451 452 453
    } else {
      numOfWriteElems = remain;
    }

    tColModelAppend(pModel, pPage, data, numOfRows - remain, numOfWriteElems, numOfRows);

454
    if (pPage->num == pModel->capacity) {
455 456
      if ((code = tscFlushTmpBuffer(pMemoryBuf, pDesc, pPage, orderType)) != TSDB_CODE_SUCCESS) {
        return code;
H
hzcheng 已提交
457 458
      }
    } else {
459
      pPage->num = numOfWriteElems;
H
hzcheng 已提交
460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478
    }

    remain -= numOfWriteElems;
    numOfRemainEntries += numOfWriteElems;
  }

  return 0;
}

void tscDestroyLocalReducer(SSqlObj *pSql) {
  if (pSql == NULL) {
    return;
  }

  SSqlRes *pRes = &(pSql->res);
  if (pRes->pLocalReducer == NULL) {
    return;
  }

H
hjxilinx 已提交
479 480 481
  SSqlCmd *   pCmd = &pSql->cmd;
  SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);

H
hzcheng 已提交
482
  // there is no more result, so we release all allocated resource
H
hjxilinx 已提交
483
  SLocalReducer *pLocalReducer = (SLocalReducer *)atomic_exchange_ptr(&pRes->pLocalReducer, NULL);
H
hzcheng 已提交
484 485
  if (pLocalReducer != NULL) {
    int32_t status = 0;
weixin_48148422's avatar
weixin_48148422 已提交
486
    while ((status = atomic_val_compare_exchange_32(&pLocalReducer->status, TSC_LOCALREDUCE_READY,
H
hzcheng 已提交
487 488
                                                    TSC_LOCALREDUCE_TOBE_FREED)) == TSC_LOCALREDUCE_IN_PROGRESS) {
      taosMsleep(100);
489
      tscDebug("%p waiting for delete procedure, status: %d", pSql, status);
H
hzcheng 已提交
490 491
    }

H
Haojun Liao 已提交
492
    pLocalReducer->pFillInfo = taosDestoryFillInfo(pLocalReducer->pFillInfo);
H
hzcheng 已提交
493

S
slguan 已提交
494
    if (pLocalReducer->pCtx != NULL) {
H
hjxilinx 已提交
495
      for (int32_t i = 0; i < pQueryInfo->fieldsInfo.numOfOutput; ++i) {
S
slguan 已提交
496
        SQLFunctionCtx *pCtx = &pLocalReducer->pCtx[i];
H
hjxilinx 已提交
497

H
hjxilinx 已提交
498
        tVariantDestroy(&pCtx->tag);
499
        if (pCtx->tagInfo.pTagCtxList != NULL) {
S
Shengliang Guan 已提交
500
          taosTFree(pCtx->tagInfo.pTagCtxList);
501
        }
S
slguan 已提交
502
      }
H
hjxilinx 已提交
503

S
Shengliang Guan 已提交
504
      taosTFree(pLocalReducer->pCtx);
S
slguan 已提交
505 506
    }

S
Shengliang Guan 已提交
507
    taosTFree(pLocalReducer->prevRowOfInput);
H
hzcheng 已提交
508

S
Shengliang Guan 已提交
509 510
    taosTFree(pLocalReducer->pTempBuffer);
    taosTFree(pLocalReducer->pResultBuf);
H
hzcheng 已提交
511

S
slguan 已提交
512
    if (pLocalReducer->pResInfo != NULL) {
H
hjxilinx 已提交
513
      for (int32_t i = 0; i < pQueryInfo->fieldsInfo.numOfOutput; ++i) {
S
Shengliang Guan 已提交
514
        taosTFree(pLocalReducer->pResInfo[i].interResultBuf);
S
slguan 已提交
515 516
      }

S
Shengliang Guan 已提交
517
      taosTFree(pLocalReducer->pResInfo);
S
slguan 已提交
518 519
    }

H
hzcheng 已提交
520
    if (pLocalReducer->pLoserTree) {
S
Shengliang Guan 已提交
521 522
      taosTFree(pLocalReducer->pLoserTree->param);
      taosTFree(pLocalReducer->pLoserTree);
H
hzcheng 已提交
523 524
    }

S
Shengliang Guan 已提交
525 526
    taosTFree(pLocalReducer->pFinalRes);
    taosTFree(pLocalReducer->discardData);
H
hzcheng 已提交
527 528 529 530

    tscLocalReducerEnvDestroy(pLocalReducer->pExtMemBuffer, pLocalReducer->pDesc, pLocalReducer->resColModel,
                              pLocalReducer->numOfVnode);
    for (int32_t i = 0; i < pLocalReducer->numOfBuffer; ++i) {
S
Shengliang Guan 已提交
531
      taosTFree(pLocalReducer->pLocalDataSrc[i]);
H
hzcheng 已提交
532 533 534 535 536 537
    }

    pLocalReducer->numOfBuffer = 0;
    pLocalReducer->numOfCompleted = 0;
    free(pLocalReducer);
  } else {
538
    tscDebug("%p already freed or another free function is invoked", pSql);
H
hzcheng 已提交
539 540
  }

541
  tscDebug("%p free local reducer finished", pSql);
H
hzcheng 已提交
542 543
}

H
hjxilinx 已提交
544
static int32_t createOrderDescriptor(tOrderDescriptor **pOrderDesc, SSqlCmd *pCmd, SColumnModel *pModel) {
H
hjxilinx 已提交
545 546 547
  int32_t     numOfGroupByCols = 0;
  SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);

548 549
  if (pQueryInfo->groupbyExpr.numOfGroupCols > 0) {
    numOfGroupByCols = pQueryInfo->groupbyExpr.numOfGroupCols;
H
hzcheng 已提交
550 551 552
  }

  // primary timestamp column is involved in final result
553
  if (pQueryInfo->interval.interval != 0 || tscOrderedProjectionQueryOnSTable(pQueryInfo, 0)) {
H
hzcheng 已提交
554 555 556
    numOfGroupByCols++;
  }

H
Haojun Liao 已提交
557 558
  int32_t *orderColIndexList = (int32_t *)calloc(numOfGroupByCols, sizeof(int32_t));
  if (orderColIndexList == NULL) {
559
    return TSDB_CODE_TSC_OUT_OF_MEMORY;
H
hzcheng 已提交
560 561 562 563
  }

  if (numOfGroupByCols > 0) {

H
Haojun Liao 已提交
564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583
    if (pQueryInfo->groupbyExpr.numOfGroupCols > 0) {
      int32_t startCols = pQueryInfo->fieldsInfo.numOfOutput - pQueryInfo->groupbyExpr.numOfGroupCols;

      // the last "pQueryInfo->groupbyExpr.numOfGroupCols" columns are order-by columns
      for (int32_t i = 0; i < pQueryInfo->groupbyExpr.numOfGroupCols; ++i) {
        orderColIndexList[i] = startCols++;
      }

      if (pQueryInfo->interval.interval != 0) {
        // the first column is the timestamp, handles queries like "interval(10m) group by tags"
        orderColIndexList[numOfGroupByCols - 1] = PRIMARYKEY_TIMESTAMP_COL_INDEX; //TODO ???
      }
    } else { // it is the orderby ts asc/desc projection query for super table
      size_t size = tscSqlExprNumOfExprs(pQueryInfo);
      for (int32_t i = 0; i < size; ++i) {
        SSqlExpr* pExpr = tscSqlExprGet(pQueryInfo, i);
        if (pExpr->functionId == TSDB_FUNC_PRJ && pExpr->colInfo.colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
          orderColIndexList[0] = i;
        }
      }
H
hzcheng 已提交
584

H
Haojun Liao 已提交
585
      assert(pQueryInfo->order.orderColId == PRIMARYKEY_TIMESTAMP_COL_INDEX);
H
hzcheng 已提交
586 587 588
    }
  }

H
Haojun Liao 已提交
589 590
  *pOrderDesc = tOrderDesCreate(orderColIndexList, numOfGroupByCols, pModel, pQueryInfo->order.order);
  taosTFree(orderColIndexList);
H
hzcheng 已提交
591 592

  if (*pOrderDesc == NULL) {
593
    return TSDB_CODE_TSC_OUT_OF_MEMORY;
H
hzcheng 已提交
594 595 596 597 598
  } else {
    return TSDB_CODE_SUCCESS;
  }
}

S
slguan 已提交
599
bool isSameGroup(SSqlCmd *pCmd, SLocalReducer *pReducer, char *pPrev, tFilePage *tmpBuffer) {
H
hjxilinx 已提交
600 601
  SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);

S
slguan 已提交
602
  // disable merge procedure for column projection query
H
Haojun Liao 已提交
603
  int16_t functionId = pReducer->pCtx[0].functionId;
604
  assert(functionId != TSDB_FUNC_ARITHM);
H
Haojun Liao 已提交
605
  if (pReducer->orderPrjOnSTable) {
606 607
    return true;
  }
H
hjxilinx 已提交
608

S
slguan 已提交
609 610
  if (functionId == TSDB_FUNC_PRJ || functionId == TSDB_FUNC_ARITHM) {
    return false;
H
hzcheng 已提交
611 612 613
  }

  tOrderDescriptor *pOrderDesc = pReducer->pDesc;
H
Haojun Liao 已提交
614
  SColumnOrderInfo* orderInfo = &pOrderDesc->orderInfo;
615

S
slguan 已提交
616
  // no group by columns, all data belongs to one group
H
Haojun Liao 已提交
617
  int32_t numOfCols = orderInfo->numOfCols;
S
slguan 已提交
618
  if (numOfCols <= 0) {
H
hzcheng 已提交
619 620 621
    return true;
  }

H
Haojun Liao 已提交
622
  if (orderInfo->colIndex[numOfCols - 1] == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
H
Haojun Liao 已提交
623 624 625 626
    /*
     * super table interval query
     * if the order columns is the primary timestamp, all result data belongs to one group
     */
627
    assert(pQueryInfo->interval.interval > 0);
H
Haojun Liao 已提交
628 629 630
    if (numOfCols == 1) {
      return true;
    }
S
slguan 已提交
631
  } else {  // simple group by query
632
    assert(pQueryInfo->interval.interval == 0);
S
slguan 已提交
633 634
  }

H
hzcheng 已提交
635
  // only one row exists
H
Haojun Liao 已提交
636
  int32_t index = orderInfo->colIndex[0];
H
Haojun Liao 已提交
637
  int32_t offset = (pOrderDesc->pColumnModel)->pFields[index].offset;
638

H
Haojun Liao 已提交
639 640
  int32_t ret = memcmp(pPrev + offset, tmpBuffer->data + offset, pOrderDesc->pColumnModel->rowSize - offset);
  return ret == 0;
H
hzcheng 已提交
641 642 643
}

int32_t tscLocalReducerEnvCreate(SSqlObj *pSql, tExtMemBuffer ***pMemBuffer, tOrderDescriptor **pOrderDesc,
H
hjxilinx 已提交
644
                                 SColumnModel **pFinalModel, uint32_t nBufferSizes) {
H
hzcheng 已提交
645 646 647
  SSqlCmd *pCmd = &pSql->cmd;
  SSqlRes *pRes = &pSql->res;

H
hjxilinx 已提交
648
  SSchema *     pSchema = NULL;
H
hjxilinx 已提交
649
  SColumnModel *pModel = NULL;
H
hzcheng 已提交
650 651
  *pFinalModel = NULL;

H
hjxilinx 已提交
652
  SQueryInfo *    pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);
H
hjxilinx 已提交
653
  STableMetaInfo *pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0);
S
slguan 已提交
654

H
Haojun Liao 已提交
655
  (*pMemBuffer) = (tExtMemBuffer **)malloc(POINTER_BYTES * pSql->subState.numOfSub);
H
hzcheng 已提交
656 657
  if (*pMemBuffer == NULL) {
    tscError("%p failed to allocate memory", pSql);
658
    pRes->code = TSDB_CODE_TSC_OUT_OF_MEMORY;
H
hzcheng 已提交
659 660
    return pRes->code;
  }
H
hjxilinx 已提交
661 662 663 664
  
  size_t size = tscSqlExprNumOfExprs(pQueryInfo);
  
  pSchema = (SSchema *)calloc(1, sizeof(SSchema) * size);
H
hzcheng 已提交
665 666
  if (pSchema == NULL) {
    tscError("%p failed to allocate memory", pSql);
667
    pRes->code = TSDB_CODE_TSC_OUT_OF_MEMORY;
H
hzcheng 已提交
668 669 670 671
    return pRes->code;
  }

  int32_t rlen = 0;
H
hjxilinx 已提交
672
  for (int32_t i = 0; i < size; ++i) {
673
    SSqlExpr *pExpr = tscSqlExprGet(pQueryInfo, i);
H
hzcheng 已提交
674 675

    pSchema[i].bytes = pExpr->resBytes;
S
TD-1057  
Shengliang Guan 已提交
676
    pSchema[i].type = (int8_t)pExpr->resType;
H
hzcheng 已提交
677 678 679
    rlen += pExpr->resBytes;
  }

L
lihui 已提交
680
  int32_t capacity = 0;
H
hjxilinx 已提交
681 682 683
  if (rlen != 0) {
    capacity = nBufferSizes / rlen;
  }
H
hjxilinx 已提交
684
  
S
TD-1057  
Shengliang Guan 已提交
685
  pModel = createColumnModel(pSchema, (int32_t)size, capacity);
H
hzcheng 已提交
686

H
Haojun Liao 已提交
687 688 689 690 691 692
  int32_t pg = DEFAULT_PAGE_SIZE;
  int32_t overhead = sizeof(tFilePage);
  while((pg - overhead) < pModel->rowSize * 2) {
    pg *= 2;
  }

H
hjxilinx 已提交
693
  size_t numOfSubs = pTableMetaInfo->vgroupList->numOfVgroups;
694
  for (int32_t i = 0; i < numOfSubs; ++i) {
H
Haojun Liao 已提交
695
    (*pMemBuffer)[i] = createExtMemBuffer(nBufferSizes, rlen, pg, pModel);
696 697
    (*pMemBuffer)[i]->flushModel = MULTIPLE_APPEND_MODEL;
  }
H
hzcheng 已提交
698 699

  if (createOrderDescriptor(pOrderDesc, pCmd, pModel) != TSDB_CODE_SUCCESS) {
700
    pRes->code = TSDB_CODE_TSC_OUT_OF_MEMORY;
S
Shengliang Guan 已提交
701
    taosTFree(pSchema);
H
hzcheng 已提交
702 703 704
    return pRes->code;
  }

H
hjxilinx 已提交
705
  // final result depends on the fields number
H
hjxilinx 已提交
706 707
  memset(pSchema, 0, sizeof(SSchema) * size);
  for (int32_t i = 0; i < size; ++i) {
H
hjxilinx 已提交
708 709
    SSqlExpr *pExpr = tscSqlExprGet(pQueryInfo, i);

710
    SSchema *p1 = tscGetTableColumnSchema(pTableMetaInfo->pTableMeta, pExpr->colInfo.colIndex);
H
hjxilinx 已提交
711

712
    int32_t inter = 0;
H
hjxilinx 已提交
713 714
    int16_t type = -1;
    int16_t bytes = 0;
H
hjxilinx 已提交
715 716 717 718 719 720 721 722 723

    //    if ((pExpr->functionId >= TSDB_FUNC_FIRST_DST && pExpr->functionId <= TSDB_FUNC_LAST_DST) ||
    //        (pExpr->functionId >= TSDB_FUNC_SUM && pExpr->functionId <= TSDB_FUNC_MAX) ||
    //        pExpr->functionId == TSDB_FUNC_LAST_ROW) {
    // the final result size and type in the same as query on single table.
    // so here, set the flag to be false;

    int32_t functionId = pExpr->functionId;
    if (functionId >= TSDB_FUNC_TS && functionId <= TSDB_FUNC_DIFF) {
H
hjxilinx 已提交
724 725
      type = pModel->pFields[i].field.type;
      bytes = pModel->pFields[i].field.bytes;
H
hjxilinx 已提交
726 727 728 729 730 731 732 733
    } else {
      if (functionId == TSDB_FUNC_FIRST_DST) {
        functionId = TSDB_FUNC_FIRST;
      } else if (functionId == TSDB_FUNC_LAST_DST) {
        functionId = TSDB_FUNC_LAST;
      }

      getResultDataInfo(p1->type, p1->bytes, functionId, 0, &type, &bytes, &inter, 0, false);
H
hjxilinx 已提交
734
    }
H
hzcheng 已提交
735

S
TD-1057  
Shengliang Guan 已提交
736
    pSchema[i].type = (uint8_t)type;
H
hjxilinx 已提交
737 738
    pSchema[i].bytes = bytes;
    strcpy(pSchema[i].name, pModel->pFields[i].field.name);
H
hzcheng 已提交
739
  }
H
hjxilinx 已提交
740
  
S
TD-1057  
Shengliang Guan 已提交
741
  *pFinalModel = createColumnModel(pSchema, (int32_t)size, capacity);
S
Shengliang Guan 已提交
742
  taosTFree(pSchema);
H
hzcheng 已提交
743 744 745 746 747 748 749 750 751 752

  return TSDB_CODE_SUCCESS;
}

/**
 * @param pMemBuffer
 * @param pDesc
 * @param pFinalModel
 * @param numOfVnodes
 */
H
hjxilinx 已提交
753
void tscLocalReducerEnvDestroy(tExtMemBuffer **pMemBuffer, tOrderDescriptor *pDesc, SColumnModel *pFinalModel,
H
hzcheng 已提交
754
                               int32_t numOfVnodes) {
H
hjxilinx 已提交
755
  destroyColumnModel(pFinalModel);
H
hzcheng 已提交
756
  tOrderDescDestroy(pDesc);
H
Haojun Liao 已提交
757

H
hzcheng 已提交
758
  for (int32_t i = 0; i < numOfVnodes; ++i) {
H
hjxilinx 已提交
759
    pMemBuffer[i] = destoryExtMemBuffer(pMemBuffer[i]);
H
hzcheng 已提交
760 761
  }

S
Shengliang Guan 已提交
762
  taosTFree(pMemBuffer);
H
hzcheng 已提交
763 764 765 766 767 768 769 770 771
}

/**
 *
 * @param pLocalReducer
 * @param pOneInterDataSrc
 * @param treeList
 * @return the number of remain input source. if ret == 0, all data has been handled
 */
S
slguan 已提交
772
int32_t loadNewDataFromDiskFor(SLocalReducer *pLocalReducer, SLocalDataSource *pOneInterDataSrc,
H
hzcheng 已提交
773 774 775 776
                               bool *needAdjustLoserTree) {
  pOneInterDataSrc->rowIdx = 0;
  pOneInterDataSrc->pageId += 1;

S
TD-1057  
Shengliang Guan 已提交
777
  if ((uint32_t)pOneInterDataSrc->pageId <
H
hzcheng 已提交
778 779 780 781 782 783
      pOneInterDataSrc->pMemBuffer->fileMeta.flushoutData.pFlushoutInfo[pOneInterDataSrc->flushoutIdx].numOfPages) {
    tExtMemBufferLoadData(pOneInterDataSrc->pMemBuffer, &(pOneInterDataSrc->filePage), pOneInterDataSrc->flushoutIdx,
                          pOneInterDataSrc->pageId);

#if defined(_DEBUG_VIEW)
    printf("new page load to buffer\n");
H
hjxilinx 已提交
784
    tColModelDisplay(pOneInterDataSrc->pMemBuffer->pColumnModel, pOneInterDataSrc->filePage.data,
785
                     pOneInterDataSrc->filePage.num, pOneInterDataSrc->pMemBuffer->pColumnModel->capacity);
H
hzcheng 已提交
786 787 788 789 790 791 792 793 794 795 796 797 798
#endif
    *needAdjustLoserTree = true;
  } else {
    pLocalReducer->numOfCompleted += 1;

    pOneInterDataSrc->rowIdx = -1;
    pOneInterDataSrc->pageId = -1;
    *needAdjustLoserTree = true;
  }

  return pLocalReducer->numOfBuffer;
}

S
slguan 已提交
799 800
void adjustLoserTreeFromNewData(SLocalReducer *pLocalReducer, SLocalDataSource *pOneInterDataSrc,
                                SLoserTreeInfo *pTree) {
H
hzcheng 已提交
801 802 803 804 805
  /*
   * load a new data page into memory for intermediate dataset source,
   * since it's last record in buffer has been chosen to be processed, as the winner of loser-tree
   */
  bool needToAdjust = true;
806
  if (pOneInterDataSrc->filePage.num <= pOneInterDataSrc->rowIdx) {
H
hzcheng 已提交
807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831
    loadNewDataFromDiskFor(pLocalReducer, pOneInterDataSrc, &needToAdjust);
  }

  /*
   * adjust loser tree otherwise, according to new candidate data
   * if the loser tree is rebuild completed, we do not need to adjust
   */
  if (needToAdjust) {
    int32_t leafNodeIdx = pTree->pNode[0].index + pLocalReducer->numOfBuffer;

#ifdef _DEBUG_VIEW
    printf("before adjust:\t");
    tLoserTreeDisplay(pTree);
#endif

    tLoserTreeAdjust(pTree, leafNodeIdx);

#ifdef _DEBUG_VIEW
    printf("\nafter adjust:\t");
    tLoserTreeDisplay(pTree);
    printf("\n");
#endif
  }
}

832
void savePrevRecordAndSetupFillInfo(SLocalReducer *pLocalReducer, SQueryInfo *pQueryInfo, SFillInfo *pFillInfo) {
H
hjxilinx 已提交
833
  // discard following dataset in the same group and reset the interpolation information
H
hjxilinx 已提交
834
  STableMetaInfo *pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0);
835

H
hjxilinx 已提交
836
  STableComInfo tinfo = tscGetTableInfo(pTableMetaInfo->pTableMeta);
837

H
Haojun Liao 已提交
838 839
  if (pFillInfo != NULL) {
    int64_t stime = (pQueryInfo->window.skey < pQueryInfo->window.ekey) ? pQueryInfo->window.skey : pQueryInfo->window.ekey;
840
    int64_t revisedSTime = taosTimeTruncate(stime, &pQueryInfo->interval, tinfo.precision);
H
Haojun Liao 已提交
841 842 843
  
    taosResetFillInfo(pFillInfo, revisedSTime);
  }
H
hzcheng 已提交
844 845

  pLocalReducer->discard = true;
846
  pLocalReducer->discardData->num = 0;
H
hzcheng 已提交
847

H
hjxilinx 已提交
848
  SColumnModel *pModel = pLocalReducer->pDesc->pColumnModel;
H
hzcheng 已提交
849 850 851
  tColModelAppend(pModel, pLocalReducer->discardData, pLocalReducer->prevRowOfInput, 0, 1, 1);
}

852
static void genFinalResWithoutFill(SSqlRes* pRes, SLocalReducer *pLocalReducer, SQueryInfo* pQueryInfo) {
853
  assert(pQueryInfo->interval.interval == 0 || pQueryInfo->fillType == TSDB_FILL_NONE);
H
hjxilinx 已提交
854

855
  tFilePage * pBeforeFillData = pLocalReducer->pResultBuf;
H
hzcheng 已提交
856

857 858
  pRes->data = pLocalReducer->pFinalRes;
  pRes->numOfRows = pBeforeFillData->num;
H
hzcheng 已提交
859

860 861 862 863
  if (pQueryInfo->limit.offset > 0) {
    if (pQueryInfo->limit.offset < pRes->numOfRows) {
      int32_t prevSize = (int32_t)pBeforeFillData->num;
      tColModelErase(pLocalReducer->resColModel, pBeforeFillData, prevSize, 0, (int32_t)pQueryInfo->limit.offset - 1);
H
hzcheng 已提交
864

865 866 867 868 869 870 871 872
      /* remove the hole in column model */
      tColModelCompact(pLocalReducer->resColModel, pBeforeFillData, prevSize);

      pRes->numOfRows -= pQueryInfo->limit.offset;
      pQueryInfo->limit.offset = 0;
    } else {
      pQueryInfo->limit.offset -= pRes->numOfRows;
      pRes->numOfRows = 0;
H
hzcheng 已提交
873
    }
874
  }
H
hzcheng 已提交
875

876
  pRes->numOfRowsGroup += pRes->numOfRows;
H
Haojun Liao 已提交
877

878 879 880 881 882
  // impose the limitation of output rows on the final result
  if (pQueryInfo->limit.limit >= 0 && pRes->numOfRowsGroup > pQueryInfo->limit.limit) {
    int32_t prevSize = (int32_t)pBeforeFillData->num;
    int32_t overflow = (int32_t)(pRes->numOfRowsGroup - pQueryInfo->limit.limit);
    assert(overflow < pRes->numOfRows);
H
hzcheng 已提交
883

884 885 886
    pRes->numOfRowsGroup = pQueryInfo->limit.limit;
    pRes->numOfRows -= overflow;
    pBeforeFillData->num -= overflow;
H
hzcheng 已提交
887

888
    tColModelCompact(pLocalReducer->resColModel, pBeforeFillData, prevSize);
H
hzcheng 已提交
889

890 891 892
    // set remain data to be discarded, and reset the interpolation information
    savePrevRecordAndSetupFillInfo(pLocalReducer, pQueryInfo, pLocalReducer->pFillInfo);
  }
H
hzcheng 已提交
893

894
  memcpy(pRes->data, pBeforeFillData->data, (size_t)(pRes->numOfRows * pLocalReducer->finalRowSize));
H
Haojun Liao 已提交
895

896 897 898 899 900 901 902 903 904 905 906 907 908 909 910
  pRes->numOfClauseTotal += pRes->numOfRows;
  pBeforeFillData->num = 0;
}

/*
 * Note: pRes->pLocalReducer may be null, due to the fact that "tscDestroyLocalReducer" is called
 * by "interuptHandler" function in shell
 */
static void doFillResult(SSqlObj *pSql, SLocalReducer *pLocalReducer, bool doneOutput) {
  SSqlCmd *pCmd = &pSql->cmd;
  SSqlRes *pRes = &pSql->res;
  
  tFilePage  *pBeforeFillData = pLocalReducer->pResultBuf;
  SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);
  SFillInfo  *pFillInfo = pLocalReducer->pFillInfo;
H
hzcheng 已提交
911

912 913
  // todo extract function
  int64_t actualETime = (pQueryInfo->order.order == TSDB_ORDER_ASC)? pQueryInfo->window.ekey: pQueryInfo->window.skey;
H
hzcheng 已提交
914

H
hjxilinx 已提交
915 916 917
  tFilePage **pResPages = malloc(POINTER_BYTES * pQueryInfo->fieldsInfo.numOfOutput);
  for (int32_t i = 0; i < pQueryInfo->fieldsInfo.numOfOutput; ++i) {
    TAOS_FIELD *pField = tscFieldInfoGetField(&pQueryInfo->fieldsInfo, i);
H
hjxilinx 已提交
918
    pResPages[i] = calloc(1, sizeof(tFilePage) + pField->bytes * pLocalReducer->resColModel->capacity);
H
hzcheng 已提交
919
  }
H
Haojun Liao 已提交
920

H
hzcheng 已提交
921
  while (1) {
922
    int64_t newRows = taosGenerateDataBlock(pFillInfo, pResPages, pLocalReducer->resColModel->capacity);
H
hzcheng 已提交
923

924 925
    if (pQueryInfo->limit.offset < newRows) {
      newRows -= pQueryInfo->limit.offset;
H
hzcheng 已提交
926

927
      if (pQueryInfo->limit.offset > 0) {
H
hjxilinx 已提交
928 929
        for (int32_t i = 0; i < pQueryInfo->fieldsInfo.numOfOutput; ++i) {
          TAOS_FIELD *pField = tscFieldInfoGetField(&pQueryInfo->fieldsInfo, i);
H
hjxilinx 已提交
930
          memmove(pResPages[i]->data, pResPages[i]->data + pField->bytes * pQueryInfo->limit.offset,
931
                  (size_t)(newRows * pField->bytes));
H
hzcheng 已提交
932 933 934 935 936 937
        }
      }

      pRes->data = pLocalReducer->pFinalRes;
      pRes->numOfRows = newRows;

938
      pQueryInfo->limit.offset = 0;
H
hzcheng 已提交
939 940
      break;
    } else {
941
      pQueryInfo->limit.offset -= newRows;
H
hzcheng 已提交
942 943
      pRes->numOfRows = 0;

944
      int32_t rpoints = taosNumOfRemainRows(pFillInfo);
H
hzcheng 已提交
945
      if (rpoints <= 0) {
946
        if (!doneOutput) { // reduce procedure has not completed yet, but current results for fill are exhausted
H
hzcheng 已提交
947 948 949
          break;
        }

950
        // all output in current group are completed
S
TD-1057  
Shengliang Guan 已提交
951
        int32_t totalRemainRows = (int32_t)getFilledNumOfRes(pFillInfo, actualETime, pLocalReducer->resColModel->capacity);
H
hzcheng 已提交
952 953 954 955 956 957 958 959
        if (totalRemainRows <= 0) {
          break;
        }
      }
    }
  }

  if (pRes->numOfRows > 0) {
960
    int32_t currentTotal = (int32_t)(pRes->numOfRowsGroup + pRes->numOfRows);
H
hzcheng 已提交
961

962 963 964 965 966
    if (pQueryInfo->limit.limit >= 0 && currentTotal > pQueryInfo->limit.limit) {
      int32_t overflow = (int32_t)(currentTotal - pQueryInfo->limit.limit);

      pRes->numOfRows -= overflow;
      assert(pRes->numOfRows >= 0);
H
Haojun Liao 已提交
967

H
hzcheng 已提交
968
      /* set remain data to be discarded, and reset the interpolation information */
969
      savePrevRecordAndSetupFillInfo(pLocalReducer, pQueryInfo, pFillInfo);
H
hzcheng 已提交
970 971
    }

972 973 974
    for (int32_t i = 0; i < pQueryInfo->fieldsInfo.numOfOutput; ++i) {
      TAOS_FIELD *pField = tscFieldInfoGetField(&pQueryInfo->fieldsInfo, i);
      int16_t     offset = getColumnModelOffset(pLocalReducer->resColModel, i);
975
      memcpy(pRes->data + offset * pRes->numOfRows, pResPages[i]->data, (size_t)(pField->bytes * pRes->numOfRows));
H
hzcheng 已提交
976
    }
H
Haojun Liao 已提交
977 978 979

    pRes->numOfRowsGroup += pRes->numOfRows;
    pRes->numOfClauseTotal += pRes->numOfRows;
H
hzcheng 已提交
980 981
  }

982
  pBeforeFillData->num = 0;
H
hjxilinx 已提交
983
  for (int32_t i = 0; i < pQueryInfo->fieldsInfo.numOfOutput; ++i) {
S
Shengliang Guan 已提交
984
    taosTFree(pResPages[i]);
H
hzcheng 已提交
985
  }
986
  
S
Shengliang Guan 已提交
987
  taosTFree(pResPages);
H
hzcheng 已提交
988 989
}

S
slguan 已提交
990
static void savePreviousRow(SLocalReducer *pLocalReducer, tFilePage *tmpBuffer) {
H
hjxilinx 已提交
991
  SColumnModel *pColumnModel = pLocalReducer->pDesc->pColumnModel;
992
  assert(pColumnModel->capacity == 1 && tmpBuffer->num == 1);
H
hzcheng 已提交
993 994

  // copy to previous temp buffer
H
hjxilinx 已提交
995
  for (int32_t i = 0; i < pColumnModel->numOfCols; ++i) {
H
hjxilinx 已提交
996 997 998
    SSchema *pSchema = getColumnModelSchema(pColumnModel, i);
    int16_t  offset = getColumnModelOffset(pColumnModel, i);

H
hjxilinx 已提交
999
    memcpy(pLocalReducer->prevRowOfInput + offset, tmpBuffer->data + offset, pSchema->bytes);
H
hzcheng 已提交
1000 1001
  }

1002
  tmpBuffer->num = 0;
H
hzcheng 已提交
1003 1004 1005
  pLocalReducer->hasPrevRow = true;
}

H
hjxilinx 已提交
1006
static void doExecuteSecondaryMerge(SSqlCmd *pCmd, SLocalReducer *pLocalReducer, bool needInit) {
S
slguan 已提交
1007
  // the tag columns need to be set before all functions execution
H
hjxilinx 已提交
1008
  SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);
1009

H
hjxilinx 已提交
1010 1011
  size_t size = tscSqlExprNumOfExprs(pQueryInfo);
  for (int32_t j = 0; j < size; ++j) {
S
slguan 已提交
1012
    SQLFunctionCtx *pCtx = &pLocalReducer->pCtx[j];
H
hzcheng 已提交
1013

S
slguan 已提交
1014
    // tags/tags_dummy function, the tag field of SQLFunctionCtx is from the input buffer
H
Haojun Liao 已提交
1015
    int32_t functionId = pCtx->functionId;
H
hjxilinx 已提交
1016
    if (functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TS_DUMMY) {
S
slguan 已提交
1017
      tVariantDestroy(&pCtx->tag);
1018 1019 1020 1021 1022 1023 1024 1025
      char* input = pCtx->aInputElemBuf;
      
      if (pCtx->inputType == TSDB_DATA_TYPE_BINARY || pCtx->inputType == TSDB_DATA_TYPE_NCHAR) {
        assert(varDataLen(input) <= pCtx->inputBytes);
        tVariantCreateFromBinary(&pCtx->tag, varDataVal(input), varDataLen(input), pCtx->inputType);
      } else {
        tVariantCreateFromBinary(&pCtx->tag, input, pCtx->inputBytes, pCtx->inputType);
      }
H
Haojun Liao 已提交
1026 1027 1028
    } else if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      SSqlExpr *pExpr = tscSqlExprGet(pQueryInfo, j);
      pCtx->param[0].i64Key = pExpr->param[0].i64Key;
H
hzcheng 已提交
1029 1030
    }

S
slguan 已提交
1031 1032 1033
    pCtx->currentStage = SECONDARY_STAGE_MERGE;

    if (needInit) {
H
Haojun Liao 已提交
1034
      aAggs[pCtx->functionId].init(pCtx);
S
slguan 已提交
1035 1036 1037
    }
  }

H
hjxilinx 已提交
1038
  for (int32_t j = 0; j < size; ++j) {
H
Haojun Liao 已提交
1039
    int32_t functionId = pLocalReducer->pCtx[j].functionId;
S
slguan 已提交
1040 1041 1042 1043 1044 1045 1046
    if (functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }

    aAggs[functionId].distSecondaryMergeFunc(&pLocalReducer->pCtx[j]);
  }
}
H
hzcheng 已提交
1047

H
hjxilinx 已提交
1048
static void handleUnprocessedRow(SSqlCmd *pCmd, SLocalReducer *pLocalReducer, tFilePage *tmpBuffer) {
S
slguan 已提交
1049 1050 1051
  if (pLocalReducer->hasUnprocessedRow) {
    pLocalReducer->hasUnprocessedRow = false;
    doExecuteSecondaryMerge(pCmd, pLocalReducer, true);
S
slguan 已提交
1052
    savePreviousRow(pLocalReducer, tmpBuffer);
H
hzcheng 已提交
1053 1054 1055
  }
}

1056
static int64_t getNumOfResultLocal(SQueryInfo *pQueryInfo, SQLFunctionCtx *pCtx) {
H
hzcheng 已提交
1057
  int64_t maxOutput = 0;
H
hjxilinx 已提交
1058 1059 1060
  
  size_t size = tscSqlExprNumOfExprs(pQueryInfo);
  for (int32_t j = 0; j < size; ++j) {
H
hzcheng 已提交
1061 1062 1063 1064
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
H
Haojun Liao 已提交
1065
    int32_t functionId = pCtx[j].functionId;
H
hzcheng 已提交
1066 1067 1068
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
      continue;
    }
H
hjxilinx 已提交
1069

H
Haojun Liao 已提交
1070 1071 1072
    SResultInfo* pResInfo = GET_RES_INFO(&pCtx[j]);
    if (maxOutput < pResInfo->numOfRes) {
      maxOutput = pResInfo->numOfRes;
H
hzcheng 已提交
1073 1074
    }
  }
H
hjxilinx 已提交
1075

H
hzcheng 已提交
1076 1077 1078 1079
  return maxOutput;
}

/*
S
slguan 已提交
1080
 * in handling the top/bottom query, which produce more than one rows result,
H
hzcheng 已提交
1081 1082
 * the tsdb_func_tags only fill the first row of results, the remain rows need to
 * filled with the same result, which is the tags, specified in group by clause
S
slguan 已提交
1083
 *
H
hzcheng 已提交
1084
 */
H
hjxilinx 已提交
1085
static void fillMultiRowsOfTagsVal(SQueryInfo *pQueryInfo, int32_t numOfRes, SLocalReducer *pLocalReducer) {
S
slguan 已提交
1086
  int32_t maxBufSize = 0;  // find the max tags column length to prepare the buffer
H
hjxilinx 已提交
1087 1088 1089
  size_t size = tscSqlExprNumOfExprs(pQueryInfo);
  
  for (int32_t k = 0; k < size; ++k) {
1090
    SSqlExpr *pExpr = tscSqlExprGet(pQueryInfo, k);
S
slguan 已提交
1091
    if (maxBufSize < pExpr->resBytes && pExpr->functionId == TSDB_FUNC_TAG) {
H
hzcheng 已提交
1092 1093 1094 1095 1096 1097
      maxBufSize = pExpr->resBytes;
    }
  }

  assert(maxBufSize >= 0);

H
hjxilinx 已提交
1098
  char *buf = malloc((size_t)maxBufSize);
H
hjxilinx 已提交
1099
  for (int32_t k = 0; k < size; ++k) {
H
Haojun Liao 已提交
1100 1101
    SQLFunctionCtx *pCtx = &pLocalReducer->pCtx[k];
    if (pCtx->functionId != TSDB_FUNC_TAG) {
S
slguan 已提交
1102 1103 1104
      continue;
    }

H
hzcheng 已提交
1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117
    int32_t inc = numOfRes - 1;  // tsdb_func_tag function only produce one row of result
    memset(buf, 0, (size_t)maxBufSize);
    memcpy(buf, pCtx->aOutputBuf, (size_t)pCtx->outputBytes);

    for (int32_t i = 0; i < inc; ++i) {
      pCtx->aOutputBuf += pCtx->outputBytes;
      memcpy(pCtx->aOutputBuf, buf, (size_t)pCtx->outputBytes);
    }
  }

  free(buf);
}

H
hjxilinx 已提交
1118
int32_t finalizeRes(SQueryInfo *pQueryInfo, SLocalReducer *pLocalReducer) {
H
hjxilinx 已提交
1119 1120 1121
  size_t size = tscSqlExprNumOfExprs(pQueryInfo);
  
  for (int32_t k = 0; k < size; ++k) {
H
Haojun Liao 已提交
1122 1123
    SQLFunctionCtx* pCtx = &pLocalReducer->pCtx[k];
    aAggs[pCtx->functionId].xFinalize(pCtx);
H
hzcheng 已提交
1124 1125 1126 1127
  }

  pLocalReducer->hasPrevRow = false;

1128
  int32_t numOfRes = (int32_t)getNumOfResultLocal(pQueryInfo, pLocalReducer->pCtx);
1129
  pLocalReducer->pResultBuf->num += numOfRes;
H
hzcheng 已提交
1130

1131
  fillMultiRowsOfTagsVal(pQueryInfo, numOfRes, pLocalReducer);
H
hzcheng 已提交
1132 1133 1134 1135 1136 1137 1138 1139 1140 1141
  return numOfRes;
}

/*
 * points merge:
 * points are merged according to the sort info, which is tags columns and timestamp column.
 * In case of points without either tags columns or timestamp, such as
 * results generated by simple aggregation function, we merge them all into one points
 * *Exception*: column projection query, required no merge procedure
 */
H
hjxilinx 已提交
1142
bool needToMerge(SQueryInfo *pQueryInfo, SLocalReducer *pLocalReducer, tFilePage *tmpBuffer) {
H
hzcheng 已提交
1143
  int32_t ret = 0;  // merge all result by default
1144

H
Haojun Liao 已提交
1145
  int16_t functionId = pLocalReducer->pCtx[0].functionId;
1146 1147 1148

  // todo opt performance
  if ((/*functionId == TSDB_FUNC_PRJ || */functionId == TSDB_FUNC_ARITHM) || (tscIsProjectionQueryOnSTable(pQueryInfo, 0))) {  // column projection query
H
hzcheng 已提交
1149 1150 1151
    ret = 1;                                                            // disable merge procedure
  } else {
    tOrderDescriptor *pDesc = pLocalReducer->pDesc;
H
Haojun Liao 已提交
1152
    if (pDesc->orderInfo.numOfCols > 0) {
1153
      if (pDesc->tsOrder == TSDB_ORDER_ASC) {  // asc
H
hzcheng 已提交
1154
        // todo refactor comparator
S
slguan 已提交
1155
        ret = compare_a(pLocalReducer->pDesc, 1, 0, pLocalReducer->prevRowOfInput, 1, 0, tmpBuffer->data);
H
hzcheng 已提交
1156
      } else {  // desc
S
slguan 已提交
1157
        ret = compare_d(pLocalReducer->pDesc, 1, 0, pLocalReducer->prevRowOfInput, 1, 0, tmpBuffer->data);
H
hzcheng 已提交
1158 1159 1160 1161 1162 1163 1164 1165
      }
    }
  }

  /* if ret == 0, means the result belongs to the same group */
  return (ret == 0);
}

H
hjxilinx 已提交
1166
static bool reachGroupResultLimit(SQueryInfo *pQueryInfo, SSqlRes *pRes) {
1167
  return (pRes->numOfGroups >= pQueryInfo->slimit.limit && pQueryInfo->slimit.limit >= 0);
S
slguan 已提交
1168 1169 1170 1171 1172 1173
}

static bool saveGroupResultInfo(SSqlObj *pSql) {
  SSqlCmd *pCmd = &pSql->cmd;
  SSqlRes *pRes = &pSql->res;

H
hjxilinx 已提交
1174
  SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);
H
Haojun Liao 已提交
1175 1176 1177 1178

  if (pRes->numOfRowsGroup > 0) {
    pRes->numOfGroups += 1;
  }
S
slguan 已提交
1179

S
slguan 已提交
1180
  // the output group is limited by the slimit clause
1181
  if (reachGroupResultLimit(pQueryInfo, pRes)) {
S
slguan 已提交
1182 1183 1184 1185
    return true;
  }

  //    pRes->pGroupRec = realloc(pRes->pGroupRec, pRes->numOfGroups*sizeof(SResRec));
H
hzcheng 已提交
1186
  //    pRes->pGroupRec[pRes->numOfGroups-1].numOfRows = pRes->numOfRows;
H
Haojun Liao 已提交
1187
  //    pRes->pGroupRec[pRes->numOfGroups-1].numOfClauseTotal = pRes->numOfClauseTotal;
S
slguan 已提交
1188 1189

  return false;
H
hzcheng 已提交
1190 1191
}

S
slguan 已提交
1192 1193 1194 1195 1196 1197 1198
/**
 *
 * @param pSql
 * @param pLocalReducer
 * @param noMoreCurrentGroupRes
 * @return if current group is skipped, return false, and do NOT record it into pRes->numOfGroups
 */
1199
bool genFinalResults(SSqlObj *pSql, SLocalReducer *pLocalReducer, bool noMoreCurrentGroupRes) {
H
hjxilinx 已提交
1200 1201 1202 1203 1204
  SSqlCmd *pCmd = &pSql->cmd;
  SSqlRes *pRes = &pSql->res;

  SQueryInfo *  pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);
  tFilePage *   pResBuf = pLocalReducer->pResultBuf;
H
hjxilinx 已提交
1205
  SColumnModel *pModel = pLocalReducer->resColModel;
H
hzcheng 已提交
1206

S
slguan 已提交
1207 1208 1209
  pRes->code = TSDB_CODE_SUCCESS;

  /*
1210
   * Ignore the output of the current group since this group is skipped by user
S
slguan 已提交
1211 1212
   * We set the numOfRows to be 0 and discard the possible remain results.
   */
1213
  if (pQueryInfo->slimit.offset > 0) {
S
slguan 已提交
1214
    pRes->numOfRows = 0;
1215
    pQueryInfo->slimit.offset -= 1;
S
slguan 已提交
1216
    pLocalReducer->discard = !noMoreCurrentGroupRes;
H
Haojun Liao 已提交
1217 1218 1219 1220 1221 1222

    if (pLocalReducer->discard) {
      SColumnModel *pInternModel = pLocalReducer->pDesc->pColumnModel;
      tColModelAppend(pInternModel, pLocalReducer->discardData, pLocalReducer->pTempBuffer->data, 0, 1, 1);
    }

S
slguan 已提交
1223 1224 1225
    return false;
  }

H
hjxilinx 已提交
1226
  tColModelCompact(pModel, pResBuf, pModel->capacity);
H
hzcheng 已提交
1227 1228 1229

#ifdef _DEBUG_VIEW
  printf("final result before interpo:\n");
1230
//  tColModelDisplay(pLocalReducer->resColModel, pLocalReducer->pBufForInterpo, pResBuf->num, pResBuf->num);
H
hzcheng 已提交
1231
#endif
1232 1233

  // no interval query, no fill operation
1234
  if (pQueryInfo->interval.interval == 0 || pQueryInfo->fillType == TSDB_FILL_NONE) {
1235 1236 1237 1238
    genFinalResWithoutFill(pRes, pLocalReducer, pQueryInfo);
  } else {
    SFillInfo* pFillInfo = pLocalReducer->pFillInfo;
    if (pFillInfo != NULL) {
1239 1240 1241
      TSKEY ekey = (pQueryInfo->order.order == TSDB_ORDER_ASC)? pQueryInfo->window.ekey: pQueryInfo->window.skey;

      taosFillSetStartInfo(pFillInfo, (int32_t)pResBuf->num, ekey);
1242 1243 1244 1245 1246 1247
      taosFillCopyInputDataFromOneFilePage(pFillInfo, pResBuf);
    }
    
    doFillResult(pSql, pLocalReducer, noMoreCurrentGroupRes);
  }

S
slguan 已提交
1248
  return true;
H
hzcheng 已提交
1249 1250
}

H
hjxilinx 已提交
1251
void resetOutputBuf(SQueryInfo *pQueryInfo, SLocalReducer *pLocalReducer) {  // reset output buffer to the beginning
H
hjxilinx 已提交
1252
  for (int32_t i = 0; i < pQueryInfo->fieldsInfo.numOfOutput; ++i) {
H
hzcheng 已提交
1253
    pLocalReducer->pCtx[i].aOutputBuf =
H
hjxilinx 已提交
1254
        pLocalReducer->pResultBuf->data + tscFieldInfoGetOffset(pQueryInfo, i) * pLocalReducer->resColModel->capacity;
H
hzcheng 已提交
1255 1256 1257 1258 1259
  }

  memset(pLocalReducer->pResultBuf, 0, pLocalReducer->nResultBufSize + sizeof(tFilePage));
}

S
slguan 已提交
1260
static void resetEnvForNewResultset(SSqlRes *pRes, SSqlCmd *pCmd, SLocalReducer *pLocalReducer) {
S
slguan 已提交
1261
  // In handling data in other groups, we need to reset the interpolation information for a new group data
H
hzcheng 已提交
1262
  pRes->numOfRows = 0;
H
Haojun Liao 已提交
1263
  pRes->numOfRowsGroup = 0;
H
hjxilinx 已提交
1264 1265 1266

  SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);

1267
  pQueryInfo->limit.offset = pLocalReducer->offset;
H
hzcheng 已提交
1268

1269
  STableMetaInfo *pTableMetaInfo = tscGetTableMetaInfoFromCmd(pCmd, pCmd->clauseIndex, 0);
H
hjxilinx 已提交
1270
  STableComInfo tinfo = tscGetTableInfo(pTableMetaInfo->pTableMeta);
H
hjxilinx 已提交
1271
  
S
slguan 已提交
1272
  // for group result interpolation, do not return if not data is generated
1273
  if (pQueryInfo->fillType != TSDB_FILL_NONE) {
1274
    TSKEY skey = (pQueryInfo->order.order == TSDB_ORDER_ASC)? pQueryInfo->window.skey:pQueryInfo->window.ekey;//MIN(pQueryInfo->window.skey, pQueryInfo->window.ekey);
1275
    int64_t newTime = taosTimeTruncate(skey, &pQueryInfo->interval, tinfo.precision);
1276
    taosResetFillInfo(pLocalReducer->pFillInfo, newTime);
H
hzcheng 已提交
1277 1278 1279
  }
}

S
slguan 已提交
1280 1281 1282 1283
static bool isAllSourcesCompleted(SLocalReducer *pLocalReducer) {
  return (pLocalReducer->numOfBuffer == pLocalReducer->numOfCompleted);
}

1284
static bool doBuildFilledResultForGroup(SSqlObj *pSql) {
H
hzcheng 已提交
1285 1286 1287
  SSqlCmd *pCmd = &pSql->cmd;
  SSqlRes *pRes = &pSql->res;

1288 1289 1290
  SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);
  SLocalReducer *pLocalReducer = pRes->pLocalReducer;
  SFillInfo *pFillInfo = pLocalReducer->pFillInfo;
S
slguan 已提交
1291

1292 1293
  if (pFillInfo != NULL && taosNumOfRemainRows(pFillInfo) > 0) {
    assert(pQueryInfo->fillType != TSDB_FILL_NONE);
H
hzcheng 已提交
1294

S
slguan 已提交
1295
    tFilePage *pFinalDataBuf = pLocalReducer->pResultBuf;
H
Haojun Liao 已提交
1296
    int64_t etime = *(int64_t *)(pFinalDataBuf->data + TSDB_KEYSIZE * (pFillInfo->numOfRows - 1));
H
hzcheng 已提交
1297

1298
    // the first column must be the timestamp column
S
TD-1057  
Shengliang Guan 已提交
1299
    int32_t rows = (int32_t)getFilledNumOfRes(pFillInfo, etime, pLocalReducer->resColModel->capacity);
H
Haojun Liao 已提交
1300
    if (rows > 0) {  // do fill gap
H
Haojun Liao 已提交
1301
      doFillResult(pSql, pLocalReducer, false);
S
slguan 已提交
1302
    }
H
hzcheng 已提交
1303

S
slguan 已提交
1304 1305 1306
    return true;
  } else {
    return false;
H
hzcheng 已提交
1307
  }
S
slguan 已提交
1308
}
H
hzcheng 已提交
1309

S
slguan 已提交
1310 1311 1312 1313
static bool doHandleLastRemainData(SSqlObj *pSql) {
  SSqlCmd *pCmd = &pSql->cmd;
  SSqlRes *pRes = &pSql->res;

1314 1315
  SLocalReducer *pLocalReducer = pRes->pLocalReducer;
  SFillInfo     *pFillInfo = pLocalReducer->pFillInfo;
H
hzcheng 已提交
1316

S
slguan 已提交
1317
  bool prevGroupCompleted = (!pLocalReducer->discard) && pLocalReducer->hasUnprocessedRow;
S
slguan 已提交
1318

H
Haojun Liao 已提交
1319
  SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);
H
hzcheng 已提交
1320

S
slguan 已提交
1321 1322
  if ((isAllSourcesCompleted(pLocalReducer) && !pLocalReducer->hasPrevRow) || pLocalReducer->pLocalDataSrc[0] == NULL ||
      prevGroupCompleted) {
1323
    // if fillType == TSDB_FILL_NONE, return directly
H
Haojun Liao 已提交
1324 1325
    if (pQueryInfo->fillType != TSDB_FILL_NONE &&
      ((pRes->numOfRowsGroup < pQueryInfo->limit.limit && pQueryInfo->limit.limit > 0) || (pQueryInfo->limit.limit < 0))) {
1326
      int64_t etime = (pQueryInfo->order.order == TSDB_ORDER_ASC)? pQueryInfo->window.ekey : pQueryInfo->window.skey;
H
hzcheng 已提交
1327

S
TD-1057  
Shengliang Guan 已提交
1328
      int32_t rows = (int32_t)getFilledNumOfRes(pFillInfo, etime, pLocalReducer->resColModel->capacity);
1329
      if (rows > 0) {
H
Haojun Liao 已提交
1330
        doFillResult(pSql, pLocalReducer, true);
H
hzcheng 已提交
1331 1332 1333
      }
    }

S
slguan 已提交
1334 1335 1336 1337 1338 1339 1340 1341
    /*
     * 1. numOfRows == 0, means no interpolation results are generated.
     * 2. if all local data sources are consumed, and no un-processed rows exist.
     *
     * No results will be generated and query completed.
     */
    if (pRes->numOfRows > 0 || (isAllSourcesCompleted(pLocalReducer) && (!pLocalReducer->hasUnprocessedRow))) {
      return true;
H
hzcheng 已提交
1342
    }
S
slguan 已提交
1343 1344 1345 1346 1347 1348 1349

    // start to process result for a new group and save the result info of previous group
    if (saveGroupResultInfo(pSql)) {
      return true;
    }

    resetEnvForNewResultset(pRes, pCmd, pLocalReducer);
H
hzcheng 已提交
1350 1351
  }

S
slguan 已提交
1352 1353
  return false;
}
H
hzcheng 已提交
1354

H
hjxilinx 已提交
1355 1356 1357 1358
static void doProcessResultInNextWindow(SSqlObj *pSql, int32_t numOfRes) {
  SSqlCmd *pCmd = &pSql->cmd;
  SSqlRes *pRes = &pSql->res;

S
slguan 已提交
1359
  SLocalReducer *pLocalReducer = pRes->pLocalReducer;
H
hjxilinx 已提交
1360
  SQueryInfo *   pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);
H
hjxilinx 已提交
1361
  size_t size = tscSqlExprNumOfExprs(pQueryInfo);
H
hjxilinx 已提交
1362

H
hjxilinx 已提交
1363
  for (int32_t k = 0; k < size; ++k) {
S
slguan 已提交
1364 1365
    SQLFunctionCtx *pCtx = &pLocalReducer->pCtx[k];
    pCtx->aOutputBuf += pCtx->outputBytes * numOfRes;
S
slguan 已提交
1366 1367

    // set the correct output timestamp column position
H
Haojun Liao 已提交
1368
    if (pCtx->functionId == TSDB_FUNC_TOP || pCtx->functionId == TSDB_FUNC_BOTTOM) {
S
slguan 已提交
1369
      pCtx->ptsOutputBuf = ((char *)pCtx->ptsOutputBuf + TSDB_KEYSIZE * numOfRes);
H
hzcheng 已提交
1370
    }
S
slguan 已提交
1371 1372
  }

S
slguan 已提交
1373
  doExecuteSecondaryMerge(pCmd, pLocalReducer, true);
S
slguan 已提交
1374 1375
}

1376
int32_t tscDoLocalMerge(SSqlObj *pSql) {
S
slguan 已提交
1377 1378
  SSqlCmd *pCmd = &pSql->cmd;
  SSqlRes *pRes = &pSql->res;
H
hjxilinx 已提交
1379

H
hjxilinx 已提交
1380
  tscResetForNextRetrieve(pRes);
H
hjxilinx 已提交
1381

S
slguan 已提交
1382
  if (pSql->signature != pSql || pRes == NULL || pRes->pLocalReducer == NULL) {  // all data has been processed
H
Haojun Liao 已提交
1383 1384
    tscError("%p local merge abort due to error occurs, code:%s", pSql, tstrerror(pRes->code));
    return pRes->code;
H
hzcheng 已提交
1385
  }
H
hjxilinx 已提交
1386

S
slguan 已提交
1387
  SLocalReducer *pLocalReducer = pRes->pLocalReducer;
H
Haojun Liao 已提交
1388
  SQueryInfo    *pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);
H
hjxilinx 已提交
1389

S
slguan 已提交
1390 1391
  // set the data merge in progress
  int32_t prevStatus =
weixin_48148422's avatar
weixin_48148422 已提交
1392
      atomic_val_compare_exchange_32(&pLocalReducer->status, TSC_LOCALREDUCE_READY, TSC_LOCALREDUCE_IN_PROGRESS);
H
hjxilinx 已提交
1393
  if (prevStatus != TSC_LOCALREDUCE_READY) {
H
hjxilinx 已提交
1394
    assert(prevStatus == TSC_LOCALREDUCE_TOBE_FREED);  // it is in tscDestroyLocalReducer function already
S
slguan 已提交
1395 1396 1397 1398 1399 1400 1401 1402 1403 1404
    return TSDB_CODE_SUCCESS;
  }

  tFilePage *tmpBuffer = pLocalReducer->pTempBuffer;

  if (doHandleLastRemainData(pSql)) {
    pLocalReducer->status = TSC_LOCALREDUCE_READY;  // set the flag, taos_free_result can release this result.
    return TSDB_CODE_SUCCESS;
  }

1405
  if (doBuildFilledResultForGroup(pSql)) {
S
slguan 已提交
1406 1407 1408 1409
    pLocalReducer->status = TSC_LOCALREDUCE_READY;  // set the flag, taos_free_result can release this result.
    return TSDB_CODE_SUCCESS;
  }

H
hzcheng 已提交
1410 1411 1412
  SLoserTreeInfo *pTree = pLocalReducer->pLoserTree;

  // clear buffer
S
slguan 已提交
1413
  handleUnprocessedRow(pCmd, pLocalReducer, tmpBuffer);
H
hjxilinx 已提交
1414
  SColumnModel *pModel = pLocalReducer->pDesc->pColumnModel;
H
hzcheng 已提交
1415 1416

  while (1) {
S
slguan 已提交
1417
    if (isAllSourcesCompleted(pLocalReducer)) {
H
hzcheng 已提交
1418 1419 1420 1421 1422 1423
      break;
    }

#ifdef _DEBUG_VIEW
    printf("chosen data in pTree[0] = %d\n", pTree->pNode[0].index);
#endif
1424
    assert((pTree->pNode[0].index < pLocalReducer->numOfBuffer) && (pTree->pNode[0].index >= 0) && tmpBuffer->num == 0);
H
hzcheng 已提交
1425 1426

    // chosen from loser tree
S
slguan 已提交
1427
    SLocalDataSource *pOneDataSrc = pLocalReducer->pLocalDataSrc[pTree->pNode[0].index];
H
hzcheng 已提交
1428

S
slguan 已提交
1429
    tColModelAppend(pModel, tmpBuffer, pOneDataSrc->filePage.data, pOneDataSrc->rowIdx, 1,
H
hjxilinx 已提交
1430
                    pOneDataSrc->pMemBuffer->pColumnModel->capacity);
H
hzcheng 已提交
1431 1432 1433 1434

#if defined(_DEBUG_VIEW)
    printf("chosen row:\t");
    SSrcColumnInfo colInfo[256] = {0};
1435
    tscGetSrcColumnInfo(colInfo, pQueryInfo);
H
hzcheng 已提交
1436

1437
    tColModelDisplayEx(pModel, tmpBuffer->data, tmpBuffer->num, pModel->capacity, colInfo);
H
hzcheng 已提交
1438
#endif
S
slguan 已提交
1439

H
hzcheng 已提交
1440 1441 1442 1443
    if (pLocalReducer->discard) {
      assert(pLocalReducer->hasUnprocessedRow == false);

      /* current record belongs to the same group of previous record, need to discard it */
S
slguan 已提交
1444
      if (isSameGroup(pCmd, pLocalReducer, pLocalReducer->discardData->data, tmpBuffer)) {
1445
        tmpBuffer->num = 0;
H
hzcheng 已提交
1446 1447
        pOneDataSrc->rowIdx += 1;

S
slguan 已提交
1448 1449 1450 1451
        adjustLoserTreeFromNewData(pLocalReducer, pOneDataSrc, pTree);

        // all inputs are exhausted, abort current process
        if (isAllSourcesCompleted(pLocalReducer)) {
H
hzcheng 已提交
1452 1453 1454
          break;
        }

S
slguan 已提交
1455
        // data belongs to the same group needs to be discarded
H
hzcheng 已提交
1456 1457 1458
        continue;
      } else {
        pLocalReducer->discard = false;
1459
        pLocalReducer->discardData->num = 0;
H
hzcheng 已提交
1460

S
slguan 已提交
1461 1462 1463 1464 1465 1466
        if (saveGroupResultInfo(pSql)) {
          pLocalReducer->status = TSC_LOCALREDUCE_READY;
          return TSDB_CODE_SUCCESS;
        }

        resetEnvForNewResultset(pRes, pCmd, pLocalReducer);
H
hzcheng 已提交
1467 1468 1469 1470
      }
    }

    if (pLocalReducer->hasPrevRow) {
1471
      if (needToMerge(pQueryInfo, pLocalReducer, tmpBuffer)) {
S
slguan 已提交
1472
        // belong to the group of the previous row, continue process it
S
slguan 已提交
1473
        doExecuteSecondaryMerge(pCmd, pLocalReducer, false);
H
hzcheng 已提交
1474 1475

        // copy to buffer
S
slguan 已提交
1476 1477 1478 1479 1480 1481
        savePreviousRow(pLocalReducer, tmpBuffer);
      } else {
        /*
         * current row does not belong to the group of previous row.
         * so the processing of previous group is completed.
         */
1482
        int32_t numOfRes = finalizeRes(pQueryInfo, pLocalReducer);
H
Haojun Liao 已提交
1483
        bool   sameGroup = isSameGroup(pCmd, pLocalReducer, pLocalReducer->prevRowOfInput, tmpBuffer);
H
hzcheng 已提交
1484 1485 1486 1487

        tFilePage *pResBuf = pLocalReducer->pResultBuf;

        /*
1488
         * if the previous group does NOT generate any result (pResBuf->num == 0),
H
hzcheng 已提交
1489 1490
         * continue to process results instead of return results.
         */
1491
        if ((!sameGroup && pResBuf->num > 0) || (pResBuf->num == pLocalReducer->resColModel->capacity)) {
H
hzcheng 已提交
1492
          // does not belong to the same group
1493
          bool notSkipped = genFinalResults(pSql, pLocalReducer, !sameGroup);
H
hzcheng 已提交
1494

S
slguan 已提交
1495
          // this row needs to discard, since it belongs to the group of previous
H
hzcheng 已提交
1496 1497
          if (pLocalReducer->discard && sameGroup) {
            pLocalReducer->hasUnprocessedRow = false;
1498
            tmpBuffer->num = 0;
H
hzcheng 已提交
1499
          } else {
S
slguan 已提交
1500
            // current row does not belongs to the previous group, so it is not be handled yet.
H
hzcheng 已提交
1501 1502 1503
            pLocalReducer->hasUnprocessedRow = true;
          }

1504
          resetOutputBuf(pQueryInfo, pLocalReducer);
H
hzcheng 已提交
1505 1506
          pOneDataSrc->rowIdx += 1;

S
slguan 已提交
1507 1508
          // here we do not check the return value
          adjustLoserTreeFromNewData(pLocalReducer, pOneDataSrc, pTree);
H
hzcheng 已提交
1509 1510 1511
          assert(pLocalReducer->status == TSC_LOCALREDUCE_IN_PROGRESS);

          if (pRes->numOfRows == 0) {
S
slguan 已提交
1512
            handleUnprocessedRow(pCmd, pLocalReducer, tmpBuffer);
H
hzcheng 已提交
1513 1514

            if (!sameGroup) {
S
slguan 已提交
1515 1516 1517 1518 1519 1520 1521 1522 1523 1524
              /*
               * previous group is done, prepare for the next group
               * If previous group is not skipped, keep it in pRes->numOfGroups
               */
              if (notSkipped && saveGroupResultInfo(pSql)) {
                pLocalReducer->status = TSC_LOCALREDUCE_READY;
                return TSDB_CODE_SUCCESS;
              }

              resetEnvForNewResultset(pRes, pCmd, pLocalReducer);
H
hzcheng 已提交
1525 1526 1527 1528 1529 1530 1531
            }
          } else {
            /*
             * if next record belongs to a new group, we do not handle this record here.
             * We start the process in a new round.
             */
            if (sameGroup) {
S
slguan 已提交
1532
              handleUnprocessedRow(pCmd, pLocalReducer, tmpBuffer);
H
hzcheng 已提交
1533 1534 1535
            }
          }

S
slguan 已提交
1536 1537 1538 1539 1540 1541
          // current group has no result,
          if (pRes->numOfRows == 0) {
            continue;
          } else {
            pLocalReducer->status = TSC_LOCALREDUCE_READY;  // set the flag, taos_free_result can release this result.
            return TSDB_CODE_SUCCESS;
H
hzcheng 已提交
1542
          }
S
slguan 已提交
1543
        } else {  // result buffer is not full
H
hjxilinx 已提交
1544
          doProcessResultInNextWindow(pSql, numOfRes);
S
slguan 已提交
1545
          savePreviousRow(pLocalReducer, tmpBuffer);
H
hzcheng 已提交
1546 1547
        }
      }
S
slguan 已提交
1548
    } else {
S
slguan 已提交
1549
      doExecuteSecondaryMerge(pCmd, pLocalReducer, true);
S
slguan 已提交
1550
      savePreviousRow(pLocalReducer, tmpBuffer);  // copy the processed row to buffer
H
hzcheng 已提交
1551 1552 1553
    }

    pOneDataSrc->rowIdx += 1;
S
slguan 已提交
1554
    adjustLoserTreeFromNewData(pLocalReducer, pOneDataSrc, pTree);
H
hzcheng 已提交
1555 1556 1557
  }

  if (pLocalReducer->hasPrevRow) {
1558
    finalizeRes(pQueryInfo, pLocalReducer);
H
hzcheng 已提交
1559 1560
  }

1561
  if (pLocalReducer->pResultBuf->num) {
1562
    genFinalResults(pSql, pLocalReducer, true);
H
hzcheng 已提交
1563 1564 1565
  }

  assert(pLocalReducer->status == TSC_LOCALREDUCE_IN_PROGRESS && pRes->row == 0);
S
slguan 已提交
1566
  pLocalReducer->status = TSC_LOCALREDUCE_READY;  // set the flag, taos_free_result can release this result.
H
hzcheng 已提交
1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584

  return TSDB_CODE_SUCCESS;
}

void tscInitResObjForLocalQuery(SSqlObj *pObj, int32_t numOfRes, int32_t rowLen) {
  SSqlRes *pRes = &pObj->res;
  if (pRes->pLocalReducer != NULL) {
    tscDestroyLocalReducer(pObj);
  }

  pRes->qhandle = 1;  // hack to pass the safety check in fetch_row function
  pRes->numOfRows = 0;
  pRes->row = 0;

  pRes->rspType = 0;  // used as a flag to denote if taos_retrieved() has been called yet
  pRes->pLocalReducer = (SLocalReducer *)calloc(1, sizeof(SLocalReducer));

  /*
S
slguan 已提交
1585 1586
   * we need one additional byte space
   * the sprintf function needs one additional space to put '\0' at the end of string
H
hzcheng 已提交
1587 1588 1589 1590
   */
  size_t allocSize = numOfRes * rowLen + sizeof(tFilePage) + 1;
  pRes->pLocalReducer->pResultBuf = (tFilePage *)calloc(1, allocSize);

1591
  pRes->pLocalReducer->pResultBuf->num = numOfRes;
H
hzcheng 已提交
1592 1593
  pRes->data = pRes->pLocalReducer->pResultBuf->data;
}