未验证 提交 9355392c 编写于 作者: G Ganlin Zhao 提交者: GitHub

Merge pull request #14537 from taosdata/fix/TD-16877

fix(query): stddev function support unsigned data types
......@@ -90,12 +90,14 @@ typedef struct SStddevRes {
double result;
int64_t count;
union {
double quadraticDSum;
int64_t quadraticISum;
double quadraticDSum;
int64_t quadraticISum;
uint64_t quadraticUSum;
};
union {
double dsum;
int64_t isum;
double dsum;
int64_t isum;
uint64_t usum;
};
int16_t type;
} SStddevRes;
......@@ -1729,6 +1731,68 @@ int32_t stddevFunction(SqlFunctionCtx* pCtx) {
break;
}
case TSDB_DATA_TYPE_UTINYINT: {
uint8_t* plist = (uint8_t*)pCol->pData;
for (int32_t i = start; i < numOfRows + start; ++i) {
if (pCol->hasNull && colDataIsNull_f(pCol->nullbitmap, i)) {
continue;
}
numOfElem += 1;
pStddevRes->count += 1;
pStddevRes->usum += plist[i];
pStddevRes->quadraticISum += plist[i] * plist[i];
}
break;
}
case TSDB_DATA_TYPE_USMALLINT: {
uint16_t* plist = (uint16_t*)pCol->pData;
for (int32_t i = start; i < numOfRows + pInput->startRowIndex; ++i) {
if (pCol->hasNull && colDataIsNull_f(pCol->nullbitmap, i)) {
continue;
}
numOfElem += 1;
pStddevRes->count += 1;
pStddevRes->usum += plist[i];
pStddevRes->quadraticISum += plist[i] * plist[i];
}
break;
}
case TSDB_DATA_TYPE_UINT: {
uint32_t* plist = (uint32_t*)pCol->pData;
for (int32_t i = start; i < numOfRows + pInput->startRowIndex; ++i) {
if (pCol->hasNull && colDataIsNull_f(pCol->nullbitmap, i)) {
continue;
}
numOfElem += 1;
pStddevRes->count += 1;
pStddevRes->usum += plist[i];
pStddevRes->quadraticISum += plist[i] * plist[i];
}
break;
}
case TSDB_DATA_TYPE_UBIGINT: {
uint64_t* plist = (uint64_t*)pCol->pData;
for (int32_t i = start; i < numOfRows + pInput->startRowIndex; ++i) {
if (pCol->hasNull && colDataIsNull_f(pCol->nullbitmap, i)) {
continue;
}
numOfElem += 1;
pStddevRes->count += 1;
pStddevRes->usum += plist[i];
pStddevRes->quadraticISum += plist[i] * plist[i];
}
break;
}
case TSDB_DATA_TYPE_FLOAT: {
float* plist = (float*)pCol->pData;
for (int32_t i = start; i < numOfRows + pInput->startRowIndex; ++i) {
......@@ -1771,9 +1835,12 @@ _stddev_over:
static void stddevTransferInfo(SStddevRes* pInput, SStddevRes* pOutput) {
pOutput->type = pInput->type;
if (IS_INTEGER_TYPE(pOutput->type)) {
if (IS_SIGNED_NUMERIC_TYPE(pOutput->type)) {
pOutput->quadraticISum += pInput->quadraticISum;
pOutput->isum += pInput->isum;
} else if (IS_UNSIGNED_NUMERIC_TYPE(pOutput->type)) {
pOutput->quadraticUSum += pInput->quadraticUSum;
pOutput->usum += pInput->usum;
} else {
pOutput->quadraticDSum += pInput->quadraticDSum;
pOutput->dsum += pInput->dsum;
......@@ -1848,6 +1915,22 @@ int32_t stddevInvertFunction(SqlFunctionCtx* pCtx) {
LIST_STDDEV_SUB_N(pStddevRes->isum, int64_t);
break;
}
case TSDB_DATA_TYPE_UTINYINT: {
LIST_STDDEV_SUB_N(pStddevRes->isum, uint8_t);
break;
}
case TSDB_DATA_TYPE_USMALLINT: {
LIST_STDDEV_SUB_N(pStddevRes->isum, uint16_t);
break;
}
case TSDB_DATA_TYPE_UINT: {
LIST_STDDEV_SUB_N(pStddevRes->isum, uint32_t);
break;
}
case TSDB_DATA_TYPE_UBIGINT: {
LIST_STDDEV_SUB_N(pStddevRes->isum, uint64_t);
break;
}
case TSDB_DATA_TYPE_FLOAT: {
LIST_STDDEV_SUB_N(pStddevRes->dsum, float);
break;
......@@ -1871,9 +1954,12 @@ int32_t stddevFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock) {
int32_t type = pStddevRes->type;
double avg;
if (IS_INTEGER_TYPE(type)) {
if (IS_SIGNED_NUMERIC_TYPE(type)) {
avg = pStddevRes->isum / ((double)pStddevRes->count);
pStddevRes->result = sqrt(fabs(pStddevRes->quadraticISum / ((double)pStddevRes->count) - avg * avg));
} else if (IS_UNSIGNED_NUMERIC_TYPE(type)) {
avg = pStddevRes->usum / ((double)pStddevRes->count);
pStddevRes->result = sqrt(fabs(pStddevRes->quadraticUSum / ((double)pStddevRes->count) - avg * avg));
} else {
avg = pStddevRes->dsum / ((double)pStddevRes->count);
pStddevRes->result = sqrt(fabs(pStddevRes->quadraticDSum / ((double)pStddevRes->count) - avg * avg));
......@@ -1913,9 +1999,12 @@ int32_t stddevCombine(SqlFunctionCtx* pDestCtx, SqlFunctionCtx* pSourceCtx) {
SResultRowEntryInfo* pSResInfo = GET_RES_INFO(pSourceCtx);
SStddevRes* pSBuf = GET_ROWCELL_INTERBUF(pSResInfo);
if (IS_INTEGER_TYPE(type)) {
if (IS_SIGNED_NUMERIC_TYPE(type)) {
pDBuf->isum += pSBuf->isum;
pDBuf->quadraticISum += pSBuf->quadraticISum;
} else if (IS_UNSIGNED_NUMERIC_TYPE(type)) {
pDBuf->usum += pSBuf->usum;
pDBuf->quadraticUSum += pSBuf->quadraticUSum;
} else {
pDBuf->dsum += pSBuf->dsum;
pDBuf->quadraticDSum += pSBuf->quadraticDSum;
......
......@@ -7,7 +7,7 @@ import platform
import math
class TDTestCase:
updatecfgDict = {'debugFlag': 143 ,"cDebugFlag":143,"uDebugFlag":143 ,"rpcDebugFlag":143 , "tmrDebugFlag":143 ,
updatecfgDict = {'debugFlag': 143 ,"cDebugFlag":143,"uDebugFlag":143 ,"rpcDebugFlag":143 , "tmrDebugFlag":143 ,
"jniDebugFlag":143 ,"simDebugFlag":143,"dDebugFlag":143, "dDebugFlag":143,"vDebugFlag":143,"mDebugFlag":143,"qDebugFlag":143,
"wDebugFlag":143,"sDebugFlag":143,"tsdbDebugFlag":143,"tqDebugFlag":143 ,"fsDebugFlag":143 ,"fnDebugFlag":143,
"maxTablesPerVnode":2 ,"minTablesPerVnode":2,"tableIncStepPerVnode":2 }
......@@ -24,7 +24,7 @@ class TDTestCase:
stddev_sql = f"select stddev({col_name}) from {tbname};"
same_sql = f"select {col_name} from {tbname} where {col_name} is not null "
tdSql.query(same_sql)
pre_data = np.array(tdSql.queryResult)[np.array(tdSql.queryResult) != None]
if (platform.system().lower() == 'windows' and pre_data.dtype == 'int32'):
......@@ -32,21 +32,21 @@ class TDTestCase:
pre_avg = np.sum(pre_data)/len(pre_data)
# Calculate variance
stddev_result = 0
stddev_result = 0
for num in tdSql.queryResult:
stddev_result += (num-pre_avg)*(num-pre_avg)/len(tdSql.queryResult)
stddev_result = math.sqrt(stddev_result)
tdSql.query(stddev_sql)
if -0.0001 < tdSql.queryResult[0][0]-stddev_result < 0.0001:
tdLog.info(" sql:%s; row:0 col:0 data:%d , expect:%d"%(stddev_sql,tdSql.queryResult[0][0],stddev_result))
else:
tdLog.exit(" sql:%s; row:0 col:0 data:%d , expect:%d"%(stddev_sql,tdSql.queryResult[0][0],stddev_result))
def prepare_datas_of_distribute(self):
# prepate datas for 20 tables distributed at different vgroups
tdSql.execute("create database if not exists testdb keep 3650 duration 1000 vgroups 5")
tdSql.execute(" use testdb ")
......@@ -117,17 +117,17 @@ class TDTestCase:
vgroups = tdSql.queryResult
vnode_tables={}
for vgroup_id in vgroups:
vnode_tables[vgroup_id[0]]=[]
# check sub_table of per vnode ,make sure sub_table has been distributed
tdSql.query("show tables like 'ct%'")
table_names = tdSql.queryResult
tablenames = []
for table_name in table_names:
vnode_tables[table_name[6]].append(table_name[0])
vnode_tables[table_name[6]].append(table_name[0])
self.vnode_disbutes = vnode_tables
count = 0
......@@ -138,14 +138,14 @@ class TDTestCase:
tdLog.exit(" the datas of all not satisfy sub_table has been distributed ")
def check_stddev_distribute_diff_vnode(self,col_name):
vgroup_ids = []
for k ,v in self.vnode_disbutes.items():
if len(v)>=2:
vgroup_ids.append(k)
distribute_tbnames = []
for vgroup_id in vgroup_ids:
vnode_tables = self.vnode_disbutes[vgroup_id]
distribute_tbnames.append(random.sample(vnode_tables,1)[0])
......@@ -154,7 +154,7 @@ class TDTestCase:
tbname_ins += "'%s' ,"%tbname
tbname_filters = tbname_ins[:-1]
stddev_sql = f"select stddev({col_name}) from stb1 where tbname in ({tbname_filters});"
same_sql = f"select {col_name} from stb1 where tbname in ({tbname_filters}) and {col_name} is not null "
......@@ -166,7 +166,7 @@ class TDTestCase:
pre_avg = np.sum(pre_data)/len(pre_data)
# Calculate variance
stddev_result = 0
stddev_result = 0
for num in tdSql.queryResult:
stddev_result += (num-pre_avg)*(num-pre_avg)/len(tdSql.queryResult)
......@@ -177,8 +177,8 @@ class TDTestCase:
def check_stddev_status(self):
# check max function work status
# check max function work status
tdSql.query("show tables like 'ct%'")
table_names = tdSql.queryResult
tablenames = []
......@@ -187,31 +187,31 @@ class TDTestCase:
tdSql.query("desc stb1")
col_names = tdSql.queryResult
colnames = []
for col_name in col_names:
if col_name[1] in ["INT" ,"BIGINT" ,"SMALLINT" ,"TINYINT" , "FLOAT" ,"DOUBLE"]:
colnames.append(col_name[0])
for tablename in tablenames:
for colname in colnames:
if colname.startswith("c"):
self.check_stddev_functions(tablename,colname)
else:
# self.check_stddev_functions(tablename,colname)
# self.check_stddev_functions(tablename,colname)
pass
# check max function for different vnode
# check max function for different vnode
for colname in colnames:
if colname.startswith("c"):
self.check_stddev_distribute_diff_vnode(colname)
else:
# self.check_stddev_distribute_diff_vnode(colname) # bug for tag
# self.check_stddev_distribute_diff_vnode(colname) # bug for tag
pass
def distribute_agg_query(self):
# basic filter
tdSql.query(" select stddev(c1) from stb1 ")
......@@ -235,7 +235,7 @@ class TDTestCase:
tdSql.query("select stddev(c1) from stb1 where t1> 4 partition by tbname")
tdSql.checkRows(15)
# union all
# union all
tdSql.query("select stddev(c1) from stb1 union all select stddev(c1) from stb1 ")
tdSql.checkRows(2)
tdSql.checkData(0,0,6.694663959)
......@@ -244,7 +244,7 @@ class TDTestCase:
tdSql.checkRows(1)
tdSql.checkData(0,0,0.000000000)
# join
# join
tdSql.execute(" create database if not exists db ")
tdSql.execute(" use db ")
......@@ -252,7 +252,7 @@ class TDTestCase:
tdSql.execute(" create table tb1 using st tags(1) ")
tdSql.execute(" create table tb2 using st tags(2) ")
for i in range(10):
ts = i*10 + self.ts
tdSql.execute(f" insert into tb1 values({ts},{i},{i}.0)")
......@@ -263,7 +263,7 @@ class TDTestCase:
tdSql.checkData(0,0,2.872281323)
tdSql.checkData(0,1,2.872281323)
# group by
# group by
tdSql.execute(" use testdb ")
# partition by tbname or partition by tag
......@@ -295,7 +295,7 @@ class TDTestCase:
self.check_stddev_status()
self.distribute_agg_query()
def stop(self):
tdSql.close()
tdLog.success("%s successfully executed" % __file__)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册