diff --git a/include/libs/function/function.h b/include/libs/function/function.h index 6f2a67546658228615ebe20eade9e626e9a13ed7..240772bfc27101adac022f3cb2a90bfd0fc55b7b 100644 --- a/include/libs/function/function.h +++ b/include/libs/function/function.h @@ -115,7 +115,7 @@ typedef struct SInputColumnInfoData { int32_t startRowIndex; // handle started row index int32_t numOfRows; // the number of rows needs to be handled int32_t numOfInputCols; // PTS is not included - bool colDataAggIsSet; // if agg is set or not + bool colDataSMAIsSet; // if agg is set or not SColumnInfoData *pPTS; // primary timestamp column SColumnInfoData **pData; SColumnDataAgg **pColumnDataAgg; diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index f16462268755d5e4936987f83d58d697bcdb5d92..5031d75231557270abccf745c3686c7d18df3502 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -349,13 +349,13 @@ typedef struct { } SFunctionCtxStatus; static void functionCtxSave(SqlFunctionCtx* pCtx, SFunctionCtxStatus* pStatus) { - pStatus->hasAgg = pCtx->input.colDataAggIsSet; + pStatus->hasAgg = pCtx->input.colDataSMAIsSet; pStatus->numOfRows = pCtx->input.numOfRows; pStatus->startOffset = pCtx->input.startRowIndex; } static void functionCtxRestore(SqlFunctionCtx* pCtx, SFunctionCtxStatus* pStatus) { - pCtx->input.colDataAggIsSet = pStatus->hasAgg; + pCtx->input.colDataSMAIsSet = pStatus->hasAgg; pCtx->input.numOfRows = pStatus->numOfRows; pCtx->input.startRowIndex = pStatus->startOffset; } @@ -372,8 +372,8 @@ void doApplyFunctions(SExecTaskInfo* taskInfo, SqlFunctionCtx* pCtx, SColumnInfo // not a whole block involved in query processing, statistics data can not be used // NOTE: the original value of isSet have been changed here - if (pCtx[k].input.colDataAggIsSet && forwardStep < numOfTotal) { - pCtx[k].input.colDataAggIsSet = false; + if (pCtx[k].input.colDataSMAIsSet && forwardStep < numOfTotal) { + pCtx[k].input.colDataSMAIsSet = false; } if (fmIsWindowPseudoColumnFunc(pCtx[k].functionId)) { @@ -486,7 +486,7 @@ static int32_t doSetInputDataBlock(SExprSupp* pExprSup, SSDataBlock* pBlock, int SInputColumnInfoData* pInput = &pCtx[i].input; pInput->uid = pBlock->info.uid; - pInput->colDataAggIsSet = false; + pInput->colDataSMAIsSet = false; SExprInfo* pOneExpr = &pExprSup->pExprInfo[i]; for (int32_t j = 0; j < pOneExpr->base.numOfParams; ++j) { @@ -798,7 +798,7 @@ void setBlockSMAInfo(SqlFunctionCtx* pCtx, SExprInfo* pExprInfo, SSDataBlock* pB pInput->totalRows = numOfRows; if (pBlock->pBlockAgg != NULL) { - pInput->colDataAggIsSet = true; + pInput->colDataSMAIsSet = true; for (int32_t j = 0; j < pExprInfo->base.numOfParams; ++j) { SFunctParam* pFuncParam = &pExprInfo->base.pParam[j]; @@ -807,7 +807,7 @@ void setBlockSMAInfo(SqlFunctionCtx* pCtx, SExprInfo* pExprInfo, SSDataBlock* pB int32_t slotId = pFuncParam->pCol->slotId; pInput->pColumnDataAgg[j] = pBlock->pBlockAgg[slotId]; if (pInput->pColumnDataAgg[j] == NULL) { - pInput->colDataAggIsSet = false; + pInput->colDataSMAIsSet = false; } // Here we set the column info data since the data type for each column data is required, but @@ -818,7 +818,7 @@ void setBlockSMAInfo(SqlFunctionCtx* pCtx, SExprInfo* pExprInfo, SSDataBlock* pB } } } else { - pInput->colDataAggIsSet = false; + pInput->colDataSMAIsSet = false; } } diff --git a/source/libs/function/src/builtinsimpl.c b/source/libs/function/src/builtinsimpl.c index 26f9c3ad0b33d90f79dfe7d50fdf9783ccdc4991..bf79cb5191e3ca3d9f2017bbf7d9a082823cf7fb 100644 --- a/source/libs/function/src/builtinsimpl.c +++ b/source/libs/function/src/builtinsimpl.c @@ -498,13 +498,13 @@ static int32_t getNumOfElems(SqlFunctionCtx* pCtx) { int32_t numOfElem = 0; /* - * 1. column data missing (schema modified) causes pInputCol->hasNull == true. pInput->colDataAggIsSet == true; - * 2. for general non-primary key columns, pInputCol->hasNull may be true or false, pInput->colDataAggIsSet == true; - * 3. for primary key column, pInputCol->hasNull always be false, pInput->colDataAggIsSet == false; + * 1. column data missing (schema modified) causes pInputCol->hasNull == true. pInput->colDataSMAIsSet == true; + * 2. for general non-primary key columns, pInputCol->hasNull may be true or false, pInput->colDataSMAIsSet == true; + * 3. for primary key column, pInputCol->hasNull always be false, pInput->colDataSMAIsSet == false; */ SInputColumnInfoData* pInput = &pCtx->input; SColumnInfoData* pInputCol = pInput->pData[0]; - if (pInput->colDataAggIsSet && pInput->totalRows == pInput->numOfRows) { + if (pInput->colDataSMAIsSet && pInput->totalRows == pInput->numOfRows) { numOfElem = pInput->numOfRows - pInput->pColumnDataAgg[0]->numOfNull; ASSERT(numOfElem >= 0); } else { @@ -593,7 +593,7 @@ int32_t sumFunction(SqlFunctionCtx* pCtx) { goto _sum_over; } - if (pInput->colDataAggIsSet) { + if (pInput->colDataSMAIsSet) { numOfElem = pInput->numOfRows - pAgg->numOfNull; ASSERT(numOfElem >= 0); @@ -658,7 +658,7 @@ int32_t sumInvertFunction(SqlFunctionCtx* pCtx) { SSumRes* pSumRes = GET_ROWCELL_INTERBUF(GET_RES_INFO(pCtx)); - if (pInput->colDataAggIsSet) { + if (pInput->colDataSMAIsSet) { numOfElem = pInput->numOfRows - pAgg->numOfNull; ASSERT(numOfElem >= 0); @@ -770,7 +770,7 @@ bool getSumFuncEnv(SFunctionNode* UNUSED_PARAM(pFunc), SFuncExecEnv* pEnv) { // goto _avg_over; // } // -// if (pInput->colDataAggIsSet) { +// if (pInput->colDataSMAIsSet) { // numOfElem = numOfRows - pAgg->numOfNull; // ASSERT(numOfElem >= 0); // @@ -1161,7 +1161,7 @@ bool getMinmaxFuncEnv(SFunctionNode* UNUSED_PARAM(pFunc), SFuncExecEnv* pEnv) { // } // // // data in current data block are qualified to the query -// if (pInput->colDataAggIsSet) { +// if (pInput->colDataSMAIsSet) { // numOfElems = pInput->numOfRows - pAgg->numOfNull; // ASSERT(pInput->numOfRows == pInput->totalRows && numOfElems >= 0); // if (numOfElems == 0) { @@ -2471,7 +2471,7 @@ int32_t percentileFunction(SqlFunctionCtx* pCtx) { // the first stage, only acquire the min/max value if (pInfo->stage == 0) { - if (pCtx->input.colDataAggIsSet) { + if (pCtx->input.colDataSMAIsSet) { double tmin = 0.0, tmax = 0.0; if (IS_SIGNED_NUMERIC_TYPE(type)) { tmin = (double)GET_INT64_VAL(&pAgg->min); @@ -2933,14 +2933,14 @@ int32_t firstFunction(SqlFunctionCtx* pCtx) { pInfo->bytes = pInputCol->info.bytes; // All null data column, return directly. - if (pInput->colDataAggIsSet && (pInput->pColumnDataAgg[0]->numOfNull == pInput->totalRows)) { + if (pInput->colDataSMAIsSet && (pInput->pColumnDataAgg[0]->numOfNull == pInput->totalRows)) { ASSERT(pInputCol->hasNull == true); // save selectivity value for column consisted of all null values firstlastSaveTupleData(pCtx->pSrcBlock, pInput->startRowIndex, pCtx, pInfo); return 0; } - SColumnDataAgg* pColAgg = (pInput->colDataAggIsSet) ? pInput->pColumnDataAgg[0] : NULL; + SColumnDataAgg* pColAgg = (pInput->colDataSMAIsSet) ? pInput->pColumnDataAgg[0] : NULL; TSKEY startKey = getRowPTs(pInput->pPTS, 0); TSKEY endKey = getRowPTs(pInput->pPTS, pInput->totalRows - 1); @@ -3037,14 +3037,14 @@ int32_t lastFunction(SqlFunctionCtx* pCtx) { pInfo->bytes = bytes; // All null data column, return directly. - if (pInput->colDataAggIsSet && (pInput->pColumnDataAgg[0]->numOfNull == pInput->totalRows)) { + if (pInput->colDataSMAIsSet && (pInput->pColumnDataAgg[0]->numOfNull == pInput->totalRows)) { ASSERT(pInputCol->hasNull == true); // save selectivity value for column consisted of all null values firstlastSaveTupleData(pCtx->pSrcBlock, pInput->startRowIndex, pCtx, pInfo); return 0; } - SColumnDataAgg* pColAgg = (pInput->colDataAggIsSet) ? pInput->pColumnDataAgg[0] : NULL; + SColumnDataAgg* pColAgg = (pInput->colDataSMAIsSet) ? pInput->pColumnDataAgg[0] : NULL; TSKEY startKey = getRowPTs(pInput->pPTS, 0); TSKEY endKey = getRowPTs(pInput->pPTS, pInput->totalRows - 1); @@ -3988,7 +3988,7 @@ int32_t spreadFunction(SqlFunctionCtx* pCtx) { SSpreadInfo* pInfo = GET_ROWCELL_INTERBUF(GET_RES_INFO(pCtx)); - if (pInput->colDataAggIsSet) { + if (pInput->colDataSMAIsSet) { numOfElems = pInput->numOfRows - pAgg->numOfNull; if (numOfElems == 0) { goto _spread_over; @@ -4163,7 +4163,7 @@ int32_t elapsedFunction(SqlFunctionCtx* pCtx) { goto _elapsed_over; } - if (pInput->colDataAggIsSet) { + if (pInput->colDataSMAIsSet) { if (pInfo->min == TSKEY_MAX) { pInfo->min = GET_INT64_VAL(&pAgg->min); pInfo->max = GET_INT64_VAL(&pAgg->max); diff --git a/source/libs/function/src/detail/tavgfunction.c b/source/libs/function/src/detail/tavgfunction.c index 01e0a499eb6e7c2babada5c4845b4739828f0b74..9c3b9cf573c07344e6161e9e57d645b62cf0d6f9 100644 --- a/source/libs/function/src/detail/tavgfunction.c +++ b/source/libs/function/src/detail/tavgfunction.c @@ -48,15 +48,14 @@ typedef struct SAvgRes { int16_t type; // store the original input type, used in merge function } SAvgRes; -static void floatVectorSumAVX(const SInputColumnInfoData* pInput, const float* plist, SAvgRes* pRes) { +static void floatVectorSumAVX(const float* plist, int32_t numOfRows, SAvgRes* pRes) { #if __AVX__ // find the start position that are aligned to 32bytes address in memory - int32_t startIndex = 0; //((uint64_t)plist) & ((1<<8u)-1); int32_t bitWidth = 8; + int32_t remainder = numOfRows % bitWidth; + int32_t rounds = numOfRows / bitWidth; - int32_t remain = (pInput->numOfRows - startIndex) % bitWidth; - int32_t rounds = (pInput->numOfRows - startIndex) / bitWidth; - const float* p = &plist[startIndex]; + const float* p = plist; __m256 val; __m256 sum = _mm256_setzero_ps(); @@ -71,18 +70,126 @@ static void floatVectorSumAVX(const SInputColumnInfoData* pInput, const float* p const float* q = (const float*)∑ pRes->sum.dsum += q[0] + q[1] + q[2] + q[3] + q[4] + q[5] + q[6] + q[7]; - // calculate the front and the reminder items in array list - for (int32_t j = 0; j < startIndex; ++j) { - pRes->sum.dsum += plist[j]; + int32_t startIndex = rounds * bitWidth; + for (int32_t j = 0; j < remainder; ++j) { + pRes->sum.dsum += plist[j + startIndex]; } +#endif +} + +static void doubleVectorSumAVX(const double* plist, int32_t numOfRows, SAvgRes* pRes) { +#if __AVX__ + // find the start position that are aligned to 32bytes address in memory + int32_t bitWidth = 4; + int32_t remainder = numOfRows % bitWidth; + int32_t rounds = numOfRows / bitWidth; + + const double* p = plist; + + __m256d val; + __m256d sum = _mm256_setzero_pd(); - startIndex += rounds * bitWidth; - for (int32_t j = 0; j < remain; ++j) { + for (int32_t i = 0; i < rounds; ++i) { + val = _mm256_loadu_pd(p); + sum = _mm256_add_pd(sum, val); + p += bitWidth; + } + + // let sum up the final results + const double* q = (const double*)∑ + pRes->sum.dsum += q[0] + q[1] + q[2] + q[3]; + + int32_t startIndex = rounds * bitWidth; + for (int32_t j = 0; j < remainder; ++j) { pRes->sum.dsum += plist[j + startIndex]; } #endif } +static void i8VectorSumAVX2(const int8_t* plist, int32_t numOfRows, SAvgRes* pRes) { +#if __AVX2__ + // find the start position that are aligned to 32bytes address in memory + int32_t bitWidth = 16; + int32_t remainder = numOfRows % bitWidth; + int32_t rounds = numOfRows / bitWidth; + + const int8_t* p = plist; + + __m256i sum = _mm256_setzero_si256(); + + for (int32_t i = 0; i < rounds; ++i) { + __m256i val = _mm256_lddqu_si256((__m256i*)p); +// __m256i extVal = _mm256_cvtepi8_epi64(val); + sum = _mm256_add_epi8(sum, val); + p += bitWidth; + } + + // let sum up the final results + const int8_t* q = (const int8_t*)∑ + pRes->sum.isum += q[0] + q[1] + q[2] + q[3]; + + int32_t startIndex = rounds * bitWidth; + for (int32_t j = 0; j < remainder; ++j) { + pRes->sum.isum += plist[j + startIndex]; + } +#endif +} + +static void i32VectorSumAVX2(const int32_t* plist, int32_t numOfRows, SAvgRes* pRes) { +#if __AVX2__ + // find the start position that are aligned to 32bytes address in memory + int32_t bitWidth = 8; + int32_t remainder = numOfRows % bitWidth; + int32_t rounds = numOfRows / bitWidth; + + const int32_t* p = plist; + + __m256i sum = _mm256_setzero_si256(); + for (int32_t i = 0; i < rounds; ++i) { + __m256i val = _mm256_lddqu_si256((__m256i*)p); + sum = _mm256_add_epi32(sum, val); + p += bitWidth; + } + + // let sum up the final results + const int64_t* q = (const int64_t*)∑ + pRes->sum.isum += q[0] + q[1] + q[2] + q[3]; + + int32_t startIndex = rounds * bitWidth; + for (int32_t j = 0; j < remainder; ++j) { + pRes->sum.isum += plist[j + startIndex]; + } +#endif +} + +static void i64VectorSumAVX2(const int64_t* plist, int32_t numOfRows, SAvgRes* pRes) { +#if __AVX2__ + // find the start position that are aligned to 32bytes address in memory + int32_t bitWidth = 4; + int32_t remainder = numOfRows % bitWidth; + int32_t rounds = numOfRows / bitWidth; + + const int64_t* p = plist; + + __m256i sum = _mm256_setzero_si256(); + + for (int32_t i = 0; i < rounds; ++i) { + __m256i val = _mm256_lddqu_si256((__m256i*)p); + sum = _mm256_add_epi64(sum, val); + p += bitWidth; + } + + // let sum up the final results + const int64_t* q = (const int64_t*)∑ + pRes->sum.isum += q[0] + q[1] + q[2] + q[3]; + + int32_t startIndex = rounds * bitWidth; + for (int32_t j = 0; j < remainder; ++j) { + pRes->sum.isum += plist[j + startIndex]; + } +#endif +} + static int32_t handleFloatCols(const SColumnInfoData* pCol, const SInputColumnInfoData* pInput, SAvgRes* pRes) { int32_t numOfElems = 0; float* plist = (float*)pCol->pData; @@ -105,7 +212,7 @@ static int32_t handleFloatCols(const SColumnInfoData* pCol, const SInputColumnIn // 3. If the CPU supports AVX, let's employ AVX instructions to speedup this loop if (tsAVXEnable && tsSIMDEnable) { - floatVectorSumAVX(pInput, plist, pRes); + floatVectorSumAVX(plist, pInput->numOfRows, pRes); } else { for (int32_t i = pInput->startRowIndex; i < pInput->numOfRows + pInput->startRowIndex; ++i) { pRes->sum.dsum += plist[i]; @@ -133,8 +240,25 @@ bool avgFunctionSetup(SqlFunctionCtx* pCtx, SResultRowEntryInfo* pResultInfo) { return true; } +static int32_t calculateAvgBySMAInfo(SAvgRes* pRes, int32_t numOfRows, int32_t type, const SColumnDataAgg* pAgg) { + int32_t numOfElem = numOfRows - pAgg->numOfNull; + ASSERT(numOfElem >= 0); + + pRes->count += numOfElem; + if (IS_SIGNED_NUMERIC_TYPE(type)) { + pRes->sum.isum += pAgg->sum; + } else if (IS_UNSIGNED_NUMERIC_TYPE(type)) { + pRes->sum.usum += pAgg->sum; + } else if (IS_FLOAT_TYPE(type)) { + pRes->sum.dsum += GET_DOUBLE_VAL((const char*)&(pAgg->sum)); + } + + return numOfElem; +} + int32_t avgFunction(SqlFunctionCtx* pCtx) { - int32_t numOfElem = 0; + int32_t numOfElem = 0; + const int32_t THRESHOLD_SIZE = 8; SInputColumnInfoData* pInput = &pCtx->input; SColumnDataAgg* pAgg = pInput->pColumnDataAgg[0]; @@ -154,19 +278,149 @@ int32_t avgFunction(SqlFunctionCtx* pCtx) { goto _avg_over; } - if (pInput->colDataAggIsSet) { - numOfElem = numOfRows - pAgg->numOfNull; - ASSERT(numOfElem >= 0); - - pAvgRes->count += numOfElem; - if (IS_SIGNED_NUMERIC_TYPE(type)) { - pAvgRes->sum.isum += pAgg->sum; - } else if (IS_UNSIGNED_NUMERIC_TYPE(type)) { - pAvgRes->sum.usum += pAgg->sum; - } else if (IS_FLOAT_TYPE(type)) { - pAvgRes->sum.dsum += GET_DOUBLE_VAL((const char*)&(pAgg->sum)); + if (pInput->colDataSMAIsSet) { // try to use SMA if available + numOfElem = calculateAvgBySMAInfo(pAvgRes, numOfRows, type, pAgg); + } else if (!pCol->hasNull) { // try to employ the simd instructions to speed up the loop + numOfElem = pInput->numOfRows; + pAvgRes->count += pInput->numOfRows; + + bool simdAvaiable = tsAVXEnable && tsSIMDEnable && (numOfRows > THRESHOLD_SIZE); + + switch(type) { + case TSDB_DATA_TYPE_TINYINT: { + const int8_t* plist = (const int8_t*) pCol->pData; + + // 1. If the CPU supports AVX, let's employ AVX instructions to speedup this loop + if (simdAvaiable) { + i8VectorSumAVX2(plist, numOfRows, pAvgRes); + } else { + for (int32_t i = pInput->startRowIndex; i < pInput->numOfRows + pInput->startRowIndex; ++i) { + pAvgRes->sum.isum += plist[i]; + } + } + break; + } + case TSDB_DATA_TYPE_SMALLINT: { + const double* plist = (const double*)pCol->pData; + + // 1. If the CPU supports AVX, let's employ AVX instructions to speedup this loop + if (simdAvaiable) { + doubleVectorSumAVX(plist, numOfRows, pAvgRes); + } else { + for (int32_t i = pInput->startRowIndex; i < pInput->numOfRows + pInput->startRowIndex; ++i) { + pAvgRes->sum.isum += plist[i]; + } + } + break; + } + case TSDB_DATA_TYPE_INT: { + const int32_t* plist = (const int32_t*) pCol->pData; + + // 1. If the CPU supports AVX, let's employ AVX instructions to speedup this loop + if (simdAvaiable) { + i32VectorSumAVX2(plist, numOfRows, pAvgRes); + } else { + for (int32_t i = pInput->startRowIndex; i < pInput->numOfRows + pInput->startRowIndex; ++i) { + pAvgRes->sum.isum += plist[i]; + } + } + break; + } + case TSDB_DATA_TYPE_BIGINT: { + const int64_t* plist = (const int64_t*) pCol->pData; + + // 1. If the CPU supports AVX, let's employ AVX instructions to speedup this loop + if (simdAvaiable) { + i64VectorSumAVX2(plist, numOfRows, pAvgRes); + } else { + for (int32_t i = pInput->startRowIndex; i < pInput->numOfRows + pInput->startRowIndex; ++i) { + pAvgRes->sum.isum += plist[i]; + } + } + break; + } + case TSDB_DATA_TYPE_FLOAT: { + const float* plist = (const float*) pCol->pData; + + // 1. If the CPU supports AVX, let's employ AVX instructions to speedup this loop + if (simdAvaiable) { + floatVectorSumAVX(plist, numOfRows, pAvgRes); + } else { + for (int32_t i = pInput->startRowIndex; i < pInput->numOfRows + pInput->startRowIndex; ++i) { + pAvgRes->sum.dsum += plist[i]; + } + } + break; + } + case TSDB_DATA_TYPE_DOUBLE: { + const double* plist = (const double*) pCol->pData; + + // 1. If the CPU supports AVX, let's employ AVX instructions to speedup this loop + if (simdAvaiable) { + doubleVectorSumAVX(plist, numOfRows, pAvgRes); + } else { + for (int32_t i = pInput->startRowIndex; i < pInput->numOfRows + pInput->startRowIndex; ++i) { + pAvgRes->sum.dsum += plist[i]; + } + } + break; + } + case TSDB_DATA_TYPE_UTINYINT: { + const double* plist = (const double*) pCol->pData; + + // 1. If the CPU supports AVX, let's employ AVX instructions to speedup this loop + if (simdAvaiable) { + doubleVectorSumAVX(plist, numOfRows, pAvgRes); + } else { + for (int32_t i = pInput->startRowIndex; i < pInput->numOfRows + pInput->startRowIndex; ++i) { + pAvgRes->sum.usum += plist[i]; + } + } + break; + } + case TSDB_DATA_TYPE_USMALLINT: { + const double* plist = (const double*) pCol->pData; + + // 1. If the CPU supports AVX, let's employ AVX instructions to speedup this loop + if (simdAvaiable) { + doubleVectorSumAVX(plist, numOfRows, pAvgRes); + } else { + for (int32_t i = pInput->startRowIndex; i < pInput->numOfRows + pInput->startRowIndex; ++i) { + pAvgRes->sum.usum += plist[i]; + } + } + break; + } + case TSDB_DATA_TYPE_UINT: { + const double* plist = (const double*) pCol->pData; + + // 1. If the CPU supports AVX, let's employ AVX instructions to speedup this loop + if (simdAvaiable) { + doubleVectorSumAVX(plist, numOfRows, pAvgRes); + } else { + for (int32_t i = pInput->startRowIndex; i < pInput->numOfRows + pInput->startRowIndex; ++i) { + pAvgRes->sum.usum += plist[i]; + } + } + break; + } + case TSDB_DATA_TYPE_UBIGINT: { + const double* plist = (const double*) pCol->pData; + + // 1. If the CPU supports AVX, let's employ AVX instructions to speedup this loop + if (simdAvaiable) { + doubleVectorSumAVX(plist, numOfRows, pAvgRes); + } else { + for (int32_t i = pInput->startRowIndex; i < pInput->numOfRows + pInput->startRowIndex; ++i) { + pAvgRes->sum.usum += plist[i]; + } + } + break; + } + default: + ASSERT(0); } - } else { // computing based on the true data block + } else { switch (type) { case TSDB_DATA_TYPE_TINYINT: { int8_t* plist = (int8_t*)pCol->pData; diff --git a/source/libs/function/src/detail/tminmax.c b/source/libs/function/src/detail/tminmax.c index 074e5ef428044fdd9381c77a83acb036c6923216..d239315e0ec51ad0d967a9eff19bf5160e34e70f 100644 --- a/source/libs/function/src/detail/tminmax.c +++ b/source/libs/function/src/detail/tminmax.c @@ -36,7 +36,7 @@ static int32_t i32VectorCmpAVX2(const int32_t* pData, int32_t numOfRows, bool is if (!isMinFunc) { // max function for (int32_t i = 0; i < rounds; ++i) { - next = _mm256_loadu_si256((__m256i*)p); + next = _mm256_lddqu_si256((__m256i*)p); initialVal = _mm256_max_epi32(initialVal, next); p += bitWidth; } @@ -61,7 +61,7 @@ static int32_t i32VectorCmpAVX2(const int32_t* pData, int32_t numOfRows, bool is } } else { // min function for (int32_t i = 0; i < rounds; ++i) { - next = _mm256_loadu_si256((__m256i*)p); + next = _mm256_lddqu_si256((__m256i*)p); initialVal = _mm256_min_epi32(initialVal, next); p += bitWidth; } @@ -369,7 +369,7 @@ int32_t doMinMaxHelper(SqlFunctionCtx* pCtx, int32_t isMinFunc) { } // data in current data block are qualified to the query - if (pInput->colDataAggIsSet) { + if (pInput->colDataSMAIsSet) { numOfElems = pInput->numOfRows - pAgg->numOfNull; ASSERT(pInput->numOfRows == pInput->totalRows && numOfElems >= 0); if (numOfElems == 0) {