diff --git a/source/libs/function/inc/builtinsimpl.h b/source/libs/function/inc/builtinsimpl.h index 307a82e256f694416a3bc386e510613bf8bc313b..a7ac53fab61593240a0859d16b44f4dd27a49ddd 100644 --- a/source/libs/function/inc/builtinsimpl.h +++ b/source/libs/function/inc/builtinsimpl.h @@ -32,6 +32,7 @@ typedef struct SSumRes { int16_t type; int64_t prevTs; bool isPrevTsSet; + bool overflow; // if overflow is true, dsum to be used for any type; } SSumRes; typedef struct SMinmaxResInfo { diff --git a/source/libs/function/src/detail/tavgfunction.c b/source/libs/function/src/detail/tavgfunction.c index 60bf30d8edb524b1a2c17955bca830f1b4f34213..8e010181d17ea12fe08491498217d21c48e562bb 100644 --- a/source/libs/function/src/detail/tavgfunction.c +++ b/source/libs/function/src/detail/tavgfunction.c @@ -41,6 +41,57 @@ } \ } while (0) +// define signed number sum with check overflow +#define CHECK_OVERFLOW_SUM_SIGNED(out, val) \ + if (out->sum.overflow) { \ + out->sum.dsum += val; \ + } else if (out->sum.isum > 0 && val > 0 && INT64_MAX - out->sum.isum <= val || \ + out->sum.isum < 0 && val < 0 && INT64_MIN - out->sum.isum >= val) { \ + double dsum = (double)out->sum.isum; \ + out->sum.overflow = true; \ + out->sum.dsum = dsum + val; \ + } else { \ + out->sum.isum += val; \ + } + +// val is big than INT64_MAX, val come from merge +#define CHECK_OVERFLOW_SUM_SIGNED_BIG(out, val, big) \ + if (out->sum.overflow) { \ + out->sum.dsum += val; \ + } else if (out->sum.isum > 0 && val > 0 && INT64_MAX - out->sum.isum <= val || \ + out->sum.isum < 0 && val < 0 && INT64_MIN - out->sum.isum >= val || \ + big) { \ + double dsum = (double)out->sum.isum; \ + out->sum.overflow = true; \ + out->sum.dsum = dsum + val; \ + } else { \ + out->sum.isum += val; \ + } + +// define unsigned number sum with check overflow +#define CHECK_OVERFLOW_SUM_UNSIGNED(out, val) \ + if (out->sum.overflow) { \ + out->sum.dsum += val; \ + } else if (UINT64_MAX - out->sum.usum <= val) { \ + double dsum = (double)out->sum.usum; \ + out->sum.overflow = true; \ + out->sum.dsum = dsum + val; \ + } else { \ + out->sum.usum += val; \ + } + +// val is big than UINT64_MAX, val come from merge +#define CHECK_OVERFLOW_SUM_UNSIGNED_BIG(out, val, big) \ + if (out->sum.overflow) { \ + out->sum.dsum += val; \ + } else if (UINT64_MAX - out->sum.usum <= val || big) { \ + double dsum = (double)out->sum.usum; \ + out->sum.overflow = true; \ + out->sum.dsum = dsum + val; \ + } else { \ + out->sum.usum += val; \ + } + typedef struct SAvgRes { double result; SSumRes sum; @@ -319,9 +370,9 @@ static int32_t calculateAvgBySMAInfo(SAvgRes* pRes, int32_t numOfRows, int32_t t pRes->count += numOfElem; if (IS_SIGNED_NUMERIC_TYPE(type)) { - pRes->sum.isum += pAgg->sum; + CHECK_OVERFLOW_SUM_SIGNED(pRes, pAgg->sum); } else if (IS_UNSIGNED_NUMERIC_TYPE(type)) { - pRes->sum.usum += pAgg->sum; + CHECK_OVERFLOW_SUM_UNSIGNED(pRes, pAgg->sum); } else if (IS_FLOAT_TYPE(type)) { pRes->sum.dsum += GET_DOUBLE_VAL((const char*)&(pAgg->sum)); } @@ -344,7 +395,7 @@ static int32_t doAddNumericVector(SColumnInfoData* pCol, int32_t type, SInputCol numOfElems += 1; pRes->count += 1; - pRes->sum.isum += plist[i]; + CHECK_OVERFLOW_SUM_SIGNED(pRes, plist[i]) } break; @@ -359,7 +410,7 @@ static int32_t doAddNumericVector(SColumnInfoData* pCol, int32_t type, SInputCol numOfElems += 1; pRes->count += 1; - pRes->sum.isum += plist[i]; + CHECK_OVERFLOW_SUM_SIGNED(pRes, plist[i]) } break; } @@ -373,7 +424,7 @@ static int32_t doAddNumericVector(SColumnInfoData* pCol, int32_t type, SInputCol numOfElems += 1; pRes->count += 1; - pRes->sum.isum += plist[i]; + CHECK_OVERFLOW_SUM_SIGNED(pRes, plist[i]) } break; @@ -388,7 +439,7 @@ static int32_t doAddNumericVector(SColumnInfoData* pCol, int32_t type, SInputCol numOfElems += 1; pRes->count += 1; - pRes->sum.isum += plist[i]; + CHECK_OVERFLOW_SUM_SIGNED(pRes, plist[i]) } break; } @@ -402,7 +453,7 @@ static int32_t doAddNumericVector(SColumnInfoData* pCol, int32_t type, SInputCol numOfElems += 1; pRes->count += 1; - pRes->sum.usum += plist[i]; + CHECK_OVERFLOW_SUM_UNSIGNED(pRes, plist[i]) } break; @@ -417,7 +468,7 @@ static int32_t doAddNumericVector(SColumnInfoData* pCol, int32_t type, SInputCol numOfElems += 1; pRes->count += 1; - pRes->sum.usum += plist[i]; + CHECK_OVERFLOW_SUM_UNSIGNED(pRes, plist[i]) } break; } @@ -431,7 +482,7 @@ static int32_t doAddNumericVector(SColumnInfoData* pCol, int32_t type, SInputCol numOfElems += 1; pRes->count += 1; - pRes->sum.usum += plist[i]; + CHECK_OVERFLOW_SUM_UNSIGNED(pRes, plist[i]) } break; @@ -446,7 +497,8 @@ static int32_t doAddNumericVector(SColumnInfoData* pCol, int32_t type, SInputCol numOfElems += 1; pRes->count += 1; - pRes->sum.usum += plist[i]; + CHECK_OVERFLOW_SUM_UNSIGNED(pRes, plist[i]) + } break; } @@ -527,9 +579,9 @@ int32_t avgFunction(SqlFunctionCtx* pCtx) { } else { for (int32_t i = pInput->startRowIndex; i < pInput->numOfRows + pInput->startRowIndex; ++i) { if (type == TSDB_DATA_TYPE_TINYINT) { - pAvgRes->sum.isum += plist[i]; + CHECK_OVERFLOW_SUM_SIGNED(pAvgRes, plist[i]) } else { - pAvgRes->sum.usum += (uint8_t)plist[i]; + CHECK_OVERFLOW_SUM_UNSIGNED(pAvgRes, (uint8_t)plist[i]) } } } @@ -546,9 +598,9 @@ int32_t avgFunction(SqlFunctionCtx* pCtx) { } else { for (int32_t i = pInput->startRowIndex; i < pInput->numOfRows + pInput->startRowIndex; ++i) { if (type == TSDB_DATA_TYPE_SMALLINT) { - pAvgRes->sum.isum += plist[i]; + CHECK_OVERFLOW_SUM_SIGNED(pAvgRes, plist[i]) } else { - pAvgRes->sum.usum += (uint16_t)plist[i]; + CHECK_OVERFLOW_SUM_UNSIGNED(pAvgRes, (uint16_t)plist[i]) } } } @@ -565,9 +617,9 @@ int32_t avgFunction(SqlFunctionCtx* pCtx) { } else { for (int32_t i = pInput->startRowIndex; i < pInput->numOfRows + pInput->startRowIndex; ++i) { if (type == TSDB_DATA_TYPE_INT) { - pAvgRes->sum.isum += plist[i]; + CHECK_OVERFLOW_SUM_SIGNED(pAvgRes, plist[i]) } else { - pAvgRes->sum.usum += (uint32_t)plist[i]; + CHECK_OVERFLOW_SUM_UNSIGNED(pAvgRes, (uint32_t)plist[i]) } } } @@ -584,9 +636,9 @@ int32_t avgFunction(SqlFunctionCtx* pCtx) { } else { for (int32_t i = pInput->startRowIndex; i < pInput->numOfRows + pInput->startRowIndex; ++i) { if (type == TSDB_DATA_TYPE_BIGINT) { - pAvgRes->sum.isum += plist[i]; + CHECK_OVERFLOW_SUM_SIGNED(pAvgRes, plist[i]) } else { - pAvgRes->sum.usum += (uint64_t)plist[i]; + CHECK_OVERFLOW_SUM_UNSIGNED(pAvgRes, (uint64_t)plist[i]) } } } @@ -639,9 +691,11 @@ static void avgTransferInfo(SAvgRes* pInput, SAvgRes* pOutput) { pOutput->type = pInput->type; if (IS_SIGNED_NUMERIC_TYPE(pOutput->type)) { - pOutput->sum.isum += pInput->sum.isum; + bool overflow = pInput->sum.overflow; + CHECK_OVERFLOW_SUM_SIGNED_BIG(pOutput, (overflow ? pInput->sum.dsum : pInput->sum.isum), overflow); } else if (IS_UNSIGNED_NUMERIC_TYPE(pOutput->type)) { - pOutput->sum.usum += pInput->sum.usum; + bool overflow = pInput->sum.overflow; + CHECK_OVERFLOW_SUM_UNSIGNED_BIG(pOutput, (overflow ? pInput->sum.dsum : pInput->sum.usum), overflow); } else { pOutput->sum.dsum += pInput->sum.dsum; } @@ -741,9 +795,9 @@ int32_t avgCombine(SqlFunctionCtx* pDestCtx, SqlFunctionCtx* pSourceCtx) { int16_t type = pDBuf->type == TSDB_DATA_TYPE_NULL ? pSBuf->type : pDBuf->type; if (IS_SIGNED_NUMERIC_TYPE(type)) { - pDBuf->sum.isum += pSBuf->sum.isum; + CHECK_OVERFLOW_SUM_SIGNED(pDBuf, pSBuf->sum.isum) } else if (IS_UNSIGNED_NUMERIC_TYPE(type)) { - pDBuf->sum.usum += pSBuf->sum.usum; + CHECK_OVERFLOW_SUM_UNSIGNED(pDBuf, pSBuf->sum.usum) } else { pDBuf->sum.dsum += pSBuf->sum.dsum; } @@ -759,7 +813,10 @@ int32_t avgFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock) { int32_t type = pRes->type; if (pRes->count > 0) { - if (IS_SIGNED_NUMERIC_TYPE(type)) { + if(pRes->sum.overflow) { + // overflow flag set , use dsum + pRes->result = pRes->sum.dsum / ((double)pRes->count); + }else if (IS_SIGNED_NUMERIC_TYPE(type)) { pRes->result = pRes->sum.isum / ((double)pRes->count); } else if (IS_UNSIGNED_NUMERIC_TYPE(type)) { pRes->result = pRes->sum.usum / ((double)pRes->count); diff --git a/tests/system-test/2-query/avg.py b/tests/system-test/2-query/avg.py index 1d4d9a2494da5bdbf9a4d1798b396f667989606a..dd8b38f9eaac4deba77ff30082b6d162c0d369c7 100644 --- a/tests/system-test/2-query/avg.py +++ b/tests/system-test/2-query/avg.py @@ -409,7 +409,7 @@ class TDTestCase: tdSql.query(f"select avg(c1) ,avg(c2) , avg(c3) , avg(c4), avg(c5), avg(c6) from {dbname}.sub1_bound ") tdSql.checkRows(1) tdSql.checkData(0,0,920350133.571428537) - tdSql.checkData(0,1,1.3176245766935393e+18) + tdSql.checkData(0,1,3.952873730080618e+18) tdSql.checkData(0,2,14042.142857143) tdSql.checkData(0,3,53.571428571) tdSql.checkData(0,4,5.828571332045761e+37) @@ -419,13 +419,56 @@ class TDTestCase: # check + - * / in functions tdSql.query(f" select avg(c1+1) ,avg(c2) , avg(c3*1) , avg(c4/2), avg(c5)/2, avg(c6) from {dbname}.sub1_bound ") tdSql.checkData(0,0,920350134.5714285) - tdSql.checkData(0,1,1.3176245766935393e+18) + tdSql.checkData(0,1,3.952873730080618e+18) tdSql.checkData(0,2,14042.142857143) tdSql.checkData(0,3,26.785714286) tdSql.checkData(0,4,2.9142856660228804e+37) tdSql.checkData(0,5,None) + # + # test bigint to check overflow + # + def avg_check_overflow(self): + # create db + tdSql.execute(f"drop database if exists db") + tdSql.execute(f"create database if not exists db") + time.sleep(3) + tdSql.execute(f"use db") + tdSql.execute(f"create table db.st(ts timestamp, ibv bigint, ubv bigint unsigned) tags(area int)") + # insert t1 data + tdSql.execute(f"insert into db.t1 using db.st tags(1) values(now,9223372036854775801,18446744073709551611)") + tdSql.execute(f"insert into db.t1 using db.st tags(1) values(now,8223372036854775801,17446744073709551611)") + tdSql.execute(f"insert into db.t1 using db.st tags(1) values(now,7223372036854775801,16446744073709551611)") + # insert t2 data + tdSql.execute(f"insert into db.t2 using db.st tags(2) values(now,9223372036854775801,18446744073709551611)") + tdSql.execute(f"insert into db.t2 using db.st tags(2) values(now,8223372036854775801,17446744073709551611)") + tdSql.execute(f"insert into db.t2 using db.st tags(2) values(now,7223372036854775801,16446744073709551611)") + + # check single table answer + tdSql.query(f"select avg(ibv), avg(ubv) from db.t1") + tdSql.checkRows(1) + tdSql.checkData(0, 0,8.223372036854776e+18) + tdSql.checkData(0, 1,1.744674407370955e+19) + + # check super table + tdSql.query(f"select avg(ibv), avg(ubv) from db.st") + tdSql.checkRows(1) + tdSql.checkData(0, 0,8.223372036854776e+18) + tdSql.checkData(0, 1,1.744674407370955e+19) + + # check child query + tdSql.query(f"select avg(ibv), avg(ubv) from (select * from db.st)") + tdSql.checkRows(1) + tdSql.checkData(0, 0,8.223372036854776e+18) + tdSql.checkData(0, 1,1.744674407370955e+19) + # check group by + tdSql.query(f"select avg(ibv), avg(ubv) from db.st group by tbname") + tdSql.checkRows(2) + tdSql.checkData(0, 0,8.223372036854776e+18) + tdSql.checkData(0, 1,1.744674407370955e+19) + tdSql.checkData(1, 0,8.223372036854776e+18) + tdSql.checkData(1, 1,1.744674407370955e+19) def run(self): # sourcery skip: extract-duplicate-method, remove-redundant-fstring tdSql.prepare() @@ -455,6 +498,8 @@ class TDTestCase: self.avg_func_filter() self.avg_check_unsigned() + # check avg overflow + self.avg_check_overflow() def stop(self): tdSql.close() tdLog.success(f"{__file__} successfully executed")