未验证 提交 e62e125d 编写于 作者: D dapan1121 提交者: GitHub

Merge pull request #20569 from taosdata/feat/mode_opt

enh(query): improve mode function page buffer allocation
...@@ -32,9 +32,6 @@ ...@@ -32,9 +32,6 @@
#define TAIL_MAX_POINTS_NUM 100 #define TAIL_MAX_POINTS_NUM 100
#define TAIL_MAX_OFFSET 100 #define TAIL_MAX_OFFSET 100
#define UNIQUE_MAX_RESULT_SIZE (1024 * 1024 * 10)
#define MODE_MAX_RESULT_SIZE UNIQUE_MAX_RESULT_SIZE
#define HLL_BUCKET_BITS 14 // The bits of the bucket #define HLL_BUCKET_BITS 14 // The bits of the bucket
#define HLL_DATA_BITS (64 - HLL_BUCKET_BITS) #define HLL_DATA_BITS (64 - HLL_BUCKET_BITS)
#define HLL_BUCKETS (1 << HLL_BUCKET_BITS) #define HLL_BUCKETS (1 << HLL_BUCKET_BITS)
...@@ -244,12 +241,11 @@ typedef struct SUniqueInfo { ...@@ -244,12 +241,11 @@ typedef struct SUniqueInfo {
typedef struct SModeItem { typedef struct SModeItem {
int64_t count; int64_t count;
STuplePos dataPos;
STuplePos tuplePos; STuplePos tuplePos;
char data[];
} SModeItem; } SModeItem;
typedef struct SModeInfo { typedef struct SModeInfo {
int32_t numOfPoints;
uint8_t colType; uint8_t colType;
int16_t colBytes; int16_t colBytes;
SHashObj* pHash; SHashObj* pHash;
...@@ -257,7 +253,7 @@ typedef struct SModeInfo { ...@@ -257,7 +253,7 @@ typedef struct SModeInfo {
STuplePos nullTuplePos; STuplePos nullTuplePos;
bool nullTupleSaved; bool nullTupleSaved;
char pItems[]; char* buf; // serialize data buffer
} SModeInfo; } SModeInfo;
typedef struct SDerivInfo { typedef struct SDerivInfo {
...@@ -3113,7 +3109,7 @@ void* serializeTupleData(const SSDataBlock* pSrcBlock, int32_t rowIndex, SSubsid ...@@ -3113,7 +3109,7 @@ void* serializeTupleData(const SSDataBlock* pSrcBlock, int32_t rowIndex, SSubsid
return buf; return buf;
} }
static int32_t doSaveTupleData(SSerializeDataHandle* pHandle, const void* pBuf, size_t length, STupleKey key, static int32_t doSaveTupleData(SSerializeDataHandle* pHandle, const void* pBuf, size_t length, STupleKey* key,
STuplePos* pPos) { STuplePos* pPos) {
STuplePos p = {0}; STuplePos p = {0};
if (pHandle->pBuf != NULL) { if (pHandle->pBuf != NULL) {
...@@ -3149,8 +3145,8 @@ static int32_t doSaveTupleData(SSerializeDataHandle* pHandle, const void* pBuf, ...@@ -3149,8 +3145,8 @@ static int32_t doSaveTupleData(SSerializeDataHandle* pHandle, const void* pBuf,
releaseBufPage(pHandle->pBuf, pPage); releaseBufPage(pHandle->pBuf, pPage);
} else { } else {
// other tuple save policy // other tuple save policy
if (streamStateFuncPut(pHandle->pState, &key, pBuf, length) >= 0) { if (streamStateFuncPut(pHandle->pState, key, pBuf, length) >= 0) {
p.streamTupleKey = key; p.streamTupleKey = *key;
} }
} }
...@@ -3174,7 +3170,7 @@ int32_t saveTupleData(SqlFunctionCtx* pCtx, int32_t rowIndex, const SSDataBlock* ...@@ -3174,7 +3170,7 @@ int32_t saveTupleData(SqlFunctionCtx* pCtx, int32_t rowIndex, const SSDataBlock*
} }
char* buf = serializeTupleData(pSrcBlock, rowIndex, &pCtx->subsidiaries, pCtx->subsidiaries.buf); char* buf = serializeTupleData(pSrcBlock, rowIndex, &pCtx->subsidiaries, pCtx->subsidiaries.buf);
return doSaveTupleData(&pCtx->saveHandle, buf, pCtx->subsidiaries.rowLen, key, pPos); return doSaveTupleData(&pCtx->saveHandle, buf, pCtx->subsidiaries.rowLen, &key, pPos);
} }
static int32_t doUpdateTupleData(SSerializeDataHandle* pHandle, const void* pBuf, size_t length, STuplePos* pPos) { static int32_t doUpdateTupleData(SSerializeDataHandle* pHandle, const void* pBuf, size_t length, STuplePos* pPos) {
...@@ -4949,7 +4945,7 @@ int32_t uniqueFunction(SqlFunctionCtx* pCtx) { ...@@ -4949,7 +4945,7 @@ int32_t uniqueFunction(SqlFunctionCtx* pCtx) {
} }
bool getModeFuncEnv(SFunctionNode* pFunc, SFuncExecEnv* pEnv) { bool getModeFuncEnv(SFunctionNode* pFunc, SFuncExecEnv* pEnv) {
pEnv->calcMemSize = sizeof(SModeInfo) + MODE_MAX_RESULT_SIZE; pEnv->calcMemSize = sizeof(SModeInfo);
return true; return true;
} }
...@@ -4959,7 +4955,6 @@ bool modeFunctionSetup(SqlFunctionCtx* pCtx, SResultRowEntryInfo* pResInfo) { ...@@ -4959,7 +4955,6 @@ bool modeFunctionSetup(SqlFunctionCtx* pCtx, SResultRowEntryInfo* pResInfo) {
} }
SModeInfo* pInfo = GET_ROWCELL_INTERBUF(pResInfo); SModeInfo* pInfo = GET_ROWCELL_INTERBUF(pResInfo);
pInfo->numOfPoints = 0;
pInfo->colType = pCtx->resDataInfo.type; pInfo->colType = pCtx->resDataInfo.type;
pInfo->colBytes = pCtx->resDataInfo.bytes; pInfo->colBytes = pCtx->resDataInfo.bytes;
if (pInfo->pHash != NULL) { if (pInfo->pHash != NULL) {
...@@ -4970,38 +4965,60 @@ bool modeFunctionSetup(SqlFunctionCtx* pCtx, SResultRowEntryInfo* pResInfo) { ...@@ -4970,38 +4965,60 @@ bool modeFunctionSetup(SqlFunctionCtx* pCtx, SResultRowEntryInfo* pResInfo) {
pInfo->nullTupleSaved = false; pInfo->nullTupleSaved = false;
pInfo->nullTuplePos.pageId = -1; pInfo->nullTuplePos.pageId = -1;
pInfo->buf = taosMemoryMalloc(pInfo->colBytes);
return true; return true;
} }
static void modeFunctionCleanup(SModeInfo * pInfo) {
taosHashCleanup(pInfo->pHash);
taosMemoryFreeClear(pInfo->buf);
}
static int32_t saveModeTupleData(SqlFunctionCtx* pCtx, char* data, SModeInfo *pInfo, STuplePos* pPos) {
if (IS_VAR_DATA_TYPE(pInfo->colType)) {
memcpy(pInfo->buf, data, varDataTLen(data));
} else {
memcpy(pInfo->buf, data, pInfo->colBytes);
}
return doSaveTupleData(&pCtx->saveHandle, pInfo->buf, pInfo->colBytes, NULL, pPos);
}
static int32_t doModeAdd(SModeInfo* pInfo, int32_t rowIndex, SqlFunctionCtx* pCtx, char* data) { static int32_t doModeAdd(SModeInfo* pInfo, int32_t rowIndex, SqlFunctionCtx* pCtx, char* data) {
int32_t hashKeyBytes = IS_STR_DATA_TYPE(pInfo->colType) ? varDataTLen(data) : pInfo->colBytes; int32_t code = TSDB_CODE_SUCCESS;
SModeItem** pHashItem = taosHashGet(pInfo->pHash, data, hashKeyBytes); int32_t hashKeyBytes = IS_STR_DATA_TYPE(pInfo->colType) ? varDataTLen(data) : pInfo->colBytes;
SModeItem* pHashItem = (SModeItem *)taosHashGet(pInfo->pHash, data, hashKeyBytes);
if (pHashItem == NULL) { if (pHashItem == NULL) {
int32_t size = sizeof(SModeItem) + pInfo->colBytes; int32_t size = sizeof(SModeItem);
SModeItem* pItem = (SModeItem*)(pInfo->pItems + pInfo->numOfPoints * size); SModeItem item = {0};
memcpy(pItem->data, data, hashKeyBytes);
pItem->count += 1; item.count += 1;
code = saveModeTupleData(pCtx, data, pInfo, &item.dataPos);
if (code != TSDB_CODE_SUCCESS) {
return code;
}
if (pCtx->subsidiaries.num > 0) { if (pCtx->subsidiaries.num > 0) {
int32_t code = saveTupleData(pCtx, rowIndex, pCtx->pSrcBlock, &pItem->tuplePos); code = saveTupleData(pCtx, rowIndex, pCtx->pSrcBlock, &item.tuplePos);
if (code != TSDB_CODE_SUCCESS) { if (code != TSDB_CODE_SUCCESS) {
return code; return code;
} }
} }
taosHashPut(pInfo->pHash, data, hashKeyBytes, &pItem, sizeof(SModeItem*)); taosHashPut(pInfo->pHash, data, hashKeyBytes, &item, sizeof(SModeItem));
pInfo->numOfPoints++;
} else { } else {
(*pHashItem)->count += 1; pHashItem->count += 1;
if (pCtx->subsidiaries.num > 0) { if (pCtx->subsidiaries.num > 0) {
int32_t code = updateTupleData(pCtx, rowIndex, pCtx->pSrcBlock, &((*pHashItem)->tuplePos)); int32_t code = updateTupleData(pCtx, rowIndex, pCtx->pSrcBlock, &pHashItem->tuplePos);
if (code != TSDB_CODE_SUCCESS) { if (code != TSDB_CODE_SUCCESS) {
return code; return code;
} }
} }
} }
return TSDB_CODE_SUCCESS; return code;
} }
int32_t modeFunction(SqlFunctionCtx* pCtx) { int32_t modeFunction(SqlFunctionCtx* pCtx) {
...@@ -5024,18 +5041,15 @@ int32_t modeFunction(SqlFunctionCtx* pCtx) { ...@@ -5024,18 +5041,15 @@ int32_t modeFunction(SqlFunctionCtx* pCtx) {
char* data = colDataGetData(pInputCol, i); char* data = colDataGetData(pInputCol, i);
int32_t code = doModeAdd(pInfo, i, pCtx, data); int32_t code = doModeAdd(pInfo, i, pCtx, data);
if (code != TSDB_CODE_SUCCESS) { if (code != TSDB_CODE_SUCCESS) {
modeFunctionCleanup(pInfo);
return code; return code;
} }
if (sizeof(SModeInfo) + pInfo->numOfPoints * (sizeof(SModeItem) + pInfo->colBytes) >= MODE_MAX_RESULT_SIZE) {
taosHashCleanup(pInfo->pHash);
return TSDB_CODE_OUT_OF_MEMORY;
}
} }
if (numOfElems == 0 && pCtx->subsidiaries.num > 0 && !pInfo->nullTupleSaved) { if (numOfElems == 0 && pCtx->subsidiaries.num > 0 && !pInfo->nullTupleSaved) {
int32_t code = saveTupleData(pCtx, pInput->startRowIndex, pCtx->pSrcBlock, &pInfo->nullTuplePos); int32_t code = saveTupleData(pCtx, pInput->startRowIndex, pCtx->pSrcBlock, &pInfo->nullTuplePos);
if (code != TSDB_CODE_SUCCESS) { if (code != TSDB_CODE_SUCCESS) {
modeFunctionCleanup(pInfo);
return code; return code;
} }
pInfo->nullTupleSaved = true; pInfo->nullTupleSaved = true;
...@@ -5054,26 +5068,37 @@ int32_t modeFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock) { ...@@ -5054,26 +5068,37 @@ int32_t modeFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock) {
SColumnInfoData* pCol = taosArrayGet(pBlock->pDataBlock, slotId); SColumnInfoData* pCol = taosArrayGet(pBlock->pDataBlock, slotId);
int32_t currentRow = pBlock->info.rows; int32_t currentRow = pBlock->info.rows;
int32_t resIndex = -1; STuplePos resDataPos, resTuplePos;
int32_t maxCount = 0; int32_t maxCount = 0;
for (int32_t i = 0; i < pInfo->numOfPoints; ++i) {
SModeItem* pItem = (SModeItem*)(pInfo->pItems + i * (sizeof(SModeItem) + pInfo->colBytes)); void *pIter = taosHashIterate(pInfo->pHash, NULL);
while (pIter != NULL) {
SModeItem *pItem = (SModeItem *)pIter;
if (pItem->count >= maxCount) { if (pItem->count >= maxCount) {
maxCount = pItem->count; maxCount = pItem->count;
resIndex = i; resDataPos = pItem->dataPos;
resTuplePos = pItem->tuplePos;
} }
pIter = taosHashIterate(pInfo->pHash, pIter);
} }
if (maxCount != 0) { if (maxCount != 0) {
SModeItem* pResItem = (SModeItem*)(pInfo->pItems + resIndex * (sizeof(SModeItem) + pInfo->colBytes)); const char* pData = loadTupleData(pCtx, &resDataPos);
colDataSetVal(pCol, currentRow, pResItem->data, false); if (pData == NULL) {
code = setSelectivityValue(pCtx, pBlock, &pResItem->tuplePos, currentRow); code = TSDB_CODE_NO_AVAIL_DISK;
modeFunctionCleanup(pInfo);
return code;
}
colDataSetVal(pCol, currentRow, pData, false);
code = setSelectivityValue(pCtx, pBlock, &resTuplePos, currentRow);
} else { } else {
colDataSetNULL(pCol, currentRow); colDataSetNULL(pCol, currentRow);
code = setSelectivityValue(pCtx, pBlock, &pInfo->nullTuplePos, currentRow); code = setSelectivityValue(pCtx, pBlock, &pInfo->nullTuplePos, currentRow);
} }
taosHashCleanup(pInfo->pHash); modeFunctionCleanup(pInfo);
return code; return code;
} }
......
...@@ -53,6 +53,7 @@ class TDTestCase: ...@@ -53,6 +53,7 @@ class TDTestCase:
( '2022-02-01 01:01:20.000', 6, 66666, 666, 66, 6.66, 66.66, 1, "binary6", "nchar6", now()+6a ) ( '2022-02-01 01:01:20.000', 6, 66666, 666, 66, 6.66, 66.66, 1, "binary6", "nchar6", now()+6a )
( '2022-10-28 01:01:26.000', 7, 00000, 000, 00, 0.00, 00.00, 1, "binary7", "nchar7", "1970-01-01 08:00:00.000" ) ( '2022-10-28 01:01:26.000', 7, 00000, 000, 00, 0.00, 00.00, 1, "binary7", "nchar7", "1970-01-01 08:00:00.000" )
( '2022-12-01 01:01:30.000', 8, -88888, -888, -88, -8.88, -88.88, 0, "binary8", "nchar8", "1969-01-01 01:00:00.000" ) ( '2022-12-01 01:01:30.000', 8, -88888, -888, -88, -8.88, -88.88, 0, "binary8", "nchar8", "1969-01-01 01:00:00.000" )
( '2022-12-31 01:01:32.000', 9, -99999999999999999, -999, -99, -9.99, -999999999999999999999.99, 1, "binary9", "nchar9", "1900-01-01 00:00:00.000" )
( '2022-12-31 01:01:36.000', 9, -99999999999999999, -999, -99, -9.99, -999999999999999999999.99, 1, "binary9", "nchar9", "1900-01-01 00:00:00.000" ) ( '2022-12-31 01:01:36.000', 9, -99999999999999999, -999, -99, -9.99, -999999999999999999999.99, 1, "binary9", "nchar9", "1900-01-01 00:00:00.000" )
( '2023-02-21 01:01:01.000', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL ) ( '2023-02-21 01:01:01.000', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL )
''' '''
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册