hyperloglog.py 16.9 KB
Newer Older
C
cpwu 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
import datetime

from util.log import *
from util.sql import *
from util.cases import *
from util.dnodes import *

PRIMARY_COL = "ts"

INT_COL     = "c1"
BINT_COL    = "c2"
SINT_COL    = "c3"
TINT_COL    = "c4"
FLOAT_COL   = "c5"
DOUBLE_COL  = "c6"
BOOL_COL    = "c7"

BINARY_COL  = "c8"
NCHAR_COL   = "c9"
TS_COL      = "c10"

NUM_COL     = [ INT_COL, BINT_COL, SINT_COL, TINT_COL, FLOAT_COL, DOUBLE_COL, ]
CHAR_COL    = [ BINARY_COL, NCHAR_COL, ]
BOOLEAN_COL = [ BOOL_COL, ]
TS_TYPE_COL = [ TS_COL, ]

ALL_COL = [ INT_COL, BINT_COL, SINT_COL, TINT_COL, FLOAT_COL, DOUBLE_COL, BOOL_COL, BINARY_COL, NCHAR_COL, TS_COL ]
C
cpwu 已提交
28
DBNAME = "db"
C
cpwu 已提交
29 30

class TDTestCase:
G
Ganlin Zhao 已提交
31

“happyguoxy” 已提交
32
    updatecfgDict = {"maxTablesPerVnode":2 ,"minTablesPerVnode":2,"tableIncStepPerVnode":2 }
C
cpwu 已提交
33

34
    def init(self, conn, logSql, replicaVar=1):
35
        self.replicaVar = int(replicaVar)
C
cpwu 已提交
36 37 38 39
        tdLog.debug(f"start to excute {__file__}")
        tdSql.init(conn.cursor())

    def __query_condition(self,tbname):
C
cpwu 已提交
40
        return [ f"{any_col}" for any_col in ALL_COL ]
C
cpwu 已提交
41 42 43

    def __join_condition(self, tb_list, filter=PRIMARY_COL, INNER=False):
        table_reference = tb_list[0]
C
cpwu 已提交
44
        join_condition = zwtable_reference
C
cpwu 已提交
45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87
        join = "inner join" if INNER else "join"
        for i in range(len(tb_list[1:])):
            join_condition += f" {join} {tb_list[i+1]} on {table_reference}.{filter}={tb_list[i+1]}.{filter}"

        return join_condition

    def __where_condition(self, col=None, tbname=None, query_conditon=None):
        if query_conditon and isinstance(query_conditon, str):
            if query_conditon.startswith("count"):
                query_conditon = query_conditon[6:-1]
            elif query_conditon.startswith("max"):
                query_conditon = query_conditon[4:-1]
            elif query_conditon.startswith("sum"):
                query_conditon = query_conditon[4:-1]
            elif query_conditon.startswith("min"):
                query_conditon = query_conditon[4:-1]

        if query_conditon:
            return f" where {query_conditon} is not null"
        if col in NUM_COL:
            return f" where abs( {tbname}.{col} ) >= 0"
        if col in CHAR_COL:
            return f" where lower( {tbname}.{col} ) like 'bina%' or lower( {tbname}.{col} ) like '_cha%' "
        if col in BOOLEAN_COL:
            return f" where {tbname}.{col} in (false, true)  "
        if col in TS_TYPE_COL or col in PRIMARY_COL:
            return f" where cast( {tbname}.{col} as binary(16) ) is not null "

        return ""

    def __group_condition(self, col, having = None):
        if isinstance(col, str):
            if col.startswith("count"):
                col = col[6:-1]
            elif col.startswith("max"):
                col = col[4:-1]
            elif col.startswith("sum"):
                col = col[4:-1]
            elif col.startswith("min"):
                col = col[4:-1]
        return f" group by {col} having {having}" if having else f" group by {col} "

    def __single_sql(self, select_clause, from_clause, where_condition="", group_condition=""):
C
cpwu 已提交
88
        if isinstance(select_clause, str) and "on" not in from_clause and select_clause.split(".")[0].split("(")[-1] != from_clause.split(".")[0]:
C
cpwu 已提交
89
            return
C
cpwu 已提交
90
        return f"select hyperloglog({select_clause}) from {from_clause} {where_condition} {group_condition}"
C
cpwu 已提交
91 92

    @property
C
cpwu 已提交
93
    def __tb_list(self, dbname=DBNAME):
C
cpwu 已提交
94
        return [
C
cpwu 已提交
95 96 97 98 99
            f"{dbname}.ct1",
            f"{dbname}.ct4",
            f"{dbname}.t1",
            f"{dbname}.ct2",
            f"{dbname}.stb1",
C
cpwu 已提交
100 101 102 103 104 105
        ]

    def sql_list(self):
        sqls = []
        __no_join_tblist = self.__tb_list
        for tb in __no_join_tblist:
C
cpwu 已提交
106 107 108 109 110 111 112 113 114 115 116 117
            tbname = tb.split(".")[-1]
            select_claus_list = self.__query_condition(tbname)
            for select_claus in select_claus_list:
                group_claus = self.__group_condition(col=select_claus)
                where_claus = self.__where_condition(query_conditon=select_claus)
                having_claus = self.__group_condition(col=select_claus, having=f"{select_claus} is not null")
                sqls.extend(
                    (
                        self.__single_sql(select_claus, tb, where_claus, having_claus),
                        self.__single_sql(select_claus, tb,),
                        self.__single_sql(select_claus, tb, where_condition=where_claus),
                        self.__single_sql(select_claus, tb, group_condition=group_claus),
C
cpwu 已提交
118
                    )
C
cpwu 已提交
119
                )
C
cpwu 已提交
120 121 122 123 124

        # return filter(None, sqls)
        return list(filter(None, sqls))


C
cpwu 已提交
125
    def hyperloglog_check(self):
C
cpwu 已提交
126 127 128 129 130 131
        sqls = self.sql_list()
        tdLog.printNoPrefix("===step 1: curent case, must return query OK")
        for i in range(len(sqls)):
            tdLog.info(f"sql: {sqls[i]}")
            tdSql.query(sqls[i])

C
cpwu 已提交
132 133
    def __test_current(self, dbname=DBNAME):
        tdSql.query(f"select hyperloglog(ts) from {dbname}.ct1")
C
cpwu 已提交
134
        tdSql.checkRows(1)
C
cpwu 已提交
135
        tdSql.query(f"select hyperloglog(c1) from {dbname}.ct2")
C
cpwu 已提交
136
        tdSql.checkRows(1)
C
cpwu 已提交
137
        tdSql.query(f"select hyperloglog(c1) from {dbname}.ct4 group by c1")
C
cpwu 已提交
138
        tdSql.checkRows(self.rows + 3)
C
cpwu 已提交
139
        tdSql.query(f"select hyperloglog(c1) from {dbname}.ct4 group by c7")
C
cpwu 已提交
140
        tdSql.checkRows(3)
C
cpwu 已提交
141
        tdSql.query(f"select hyperloglog(ct2.c1) from {dbname}.ct4 ct4 join {dbname}.ct2 ct2 on ct4.ts=ct2.ts")
C
cpwu 已提交
142 143
        tdSql.checkRows(1)
        tdSql.checkData(0, 0, self.rows + 2)
C
cpwu 已提交
144
        tdSql.query(f"select hyperloglog(c1), c1 from {dbname}.stb1 group by c1")
C
cpwu 已提交
145
        for i in range(tdSql.queryRows):
C
cpwu 已提交
146
            tdSql.checkData(i, 0, 1) if  tdSql.queryResult[i][1] is not None else tdSql.checkData(i, 0, 0)
C
cpwu 已提交
147

C
cpwu 已提交
148
        self.hyperloglog_check()
C
cpwu 已提交
149

C
cpwu 已提交
150
    def __test_error(self, dbname=DBNAME):
C
cpwu 已提交
151 152

        tdLog.printNoPrefix("===step 0: err case, must return err")
C
cpwu 已提交
153 154 155 156 157 158 159 160 161
        tdSql.error( f"select hyperloglog() from {dbname}.ct1" )
        tdSql.error( f"select hyperloglog(c1, c2) from {dbname}.ct2" )
        # tdSql.error( f"select hyperloglog(1) from {dbname}.stb1" )
        # tdSql.error( f"select hyperloglog(abs(c1)) from {dbname}.ct4" )
        tdSql.error( f"select hyperloglog(count(c1)) from {dbname}.t1" )
        # tdSql.error( f"select hyperloglog(1) from {dbname}.ct2" )
        tdSql.error( f"select hyperloglog({NUM_COL[0]}, {NUM_COL[1]}) from {dbname}.ct4" )
        tdSql.error( f'''select hyperloglog(['c1 + c1', 'c1 + c2', 'c1 + c3', 'c1 + c4', 'c1 + c5', 'c1 + c6', 'c1 + c7', 'c1 + c8', 'c1 + c9', 'c1 + c10'])
                    from {dbname}.ct1
C
cpwu 已提交
162 163
                    where ['c1 + c1', 'c1 + c2', 'c1 + c3', 'c1 + c4', 'c1 + c5', 'c1 + c6', 'c1 + c7', 'c1 + c8', 'c1 + c9', 'c1 + c10'] is not null
                    group by ['c1 + c1', 'c1 + c2', 'c1 + c3', 'c1 + c4', 'c1 + c5', 'c1 + c6', 'c1 + c7', 'c1 + c8', 'c1 + c9', 'c1 + c10']
C
cpwu 已提交
164
                    having ['c1 + c1', 'c1 + c2', 'c1 + c3', 'c1 + c4', 'c1 + c5', 'c1 + c6', 'c1 + c7', 'c1 + c8', 'c1 + c9', 'c1 + c10'] is not null''' )
C
cpwu 已提交
165 166 167 168 169

    def all_test(self):
        self.__test_error()
        self.__test_current()

C
cpwu 已提交
170
    def __create_tb(self, dbname=DBNAME):
C
cpwu 已提交
171 172

        tdLog.printNoPrefix("==========step1:create table")
C
cpwu 已提交
173
        create_stb_sql  =  f'''create table {dbname}.stb1(
C
cpwu 已提交
174 175 176 177 178
                ts timestamp, {INT_COL} int, {BINT_COL} bigint, {SINT_COL} smallint, {TINT_COL} tinyint,
                 {FLOAT_COL} float, {DOUBLE_COL} double, {BOOL_COL} bool,
                 {BINARY_COL} binary(16), {NCHAR_COL} nchar(32), {TS_COL} timestamp
            ) tags (t1 int)
            '''
C
cpwu 已提交
179
        create_ntb_sql = f'''create table {dbname}.t1(
C
cpwu 已提交
180 181 182 183 184 185 186 187 188
                ts timestamp, {INT_COL} int, {BINT_COL} bigint, {SINT_COL} smallint, {TINT_COL} tinyint,
                 {FLOAT_COL} float, {DOUBLE_COL} double, {BOOL_COL} bool,
                 {BINARY_COL} binary(16), {NCHAR_COL} nchar(32), {TS_COL} timestamp
            )
            '''
        tdSql.execute(create_stb_sql)
        tdSql.execute(create_ntb_sql)

        for i in range(4):
C
cpwu 已提交
189
            tdSql.execute(f'create table {dbname}.ct{i+1} using {dbname}.stb1 tags ( {i+1} )')
C
cpwu 已提交
190
            { i % 32767 }, { i % 127}, { i * 1.11111 }, { i * 1000.1111 }, { i % 2}
C
cpwu 已提交
191
    def __create_stable(self, dbname=DBNAME, stbname='stb',column_dict={'ts':'timestamp','col1': 'tinyint','col2': 'smallint','col3': 'int',
J
jiacy-jcy 已提交
192 193 194 195 196 197 198 199 200 201 202
                                                        'col4': 'bigint','col5': 'tinyint unsigned','col6': 'smallint unsigned','col7': 'int unsigned',
                                                        'col8': 'bigint unsigned','col9': 'float','col10': 'double','col11': 'bool','col12': 'binary(20)','col13': 'nchar(20)'},
                                            tag_dict={'ts_tag':'timestamp','t1': 'tinyint','t2': 'smallint','t3': 'int',
                                                        't4': 'bigint','t5': 'tinyint unsigned','t6': 'smallint unsigned','t7': 'int unsigned',
                                                        't8': 'bigint unsigned','t9': 'float','t10': 'double','t11': 'bool','t12': 'binary(20)','t13': 'nchar(20)'}):
        column_sql = ''
        tag_sql = ''
        for k,v in column_dict.items():
            column_sql += f"{k} {v},"
        for k,v in tag_dict.items():
            tag_sql += f"{k} {v},"
C
cpwu 已提交
203
        tdSql.execute(f'create table if not exists {dbname}.{stbname} ({column_sql[:-1]}) tags({tag_sql[:-1]})')
C
cpwu 已提交
204

J
jiacy-jcy 已提交
205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
    def __hyperloglog_check_distribute(self):
        dbname = "dbtest"
        stbname = "stb"
        childtable_num = 20
        vgroups_num = 4
        row_num = 10
        ts = 1537146000000
        binary_str = 'taosdata'
        nchar_str = '涛思数据'
        column_dict = {
            'ts':'timestamp',
            'col1': 'tinyint',
            'col2': 'smallint',
            'col3': 'int',
            'col4': 'bigint',
            'col5': 'tinyint unsigned',
            'col6': 'smallint unsigned',
            'col7': 'int unsigned',
            'col8': 'bigint unsigned',
            'col9': 'float',
            'col10': 'double',
            'col11': 'bool',
            'col12': 'binary(20)',
            'col13': 'nchar(20)'
        }
        tag_dict = {
            'loc':'nchar(20)'
        }
        tdSql.execute(f"create database if not exists {dbname} vgroups {vgroups_num}")
        tdSql.execute(f'use {dbname}')
C
cpwu 已提交
235
        self.__create_stable(dbname, stbname,column_dict,tag_dict)
J
jiacy-jcy 已提交
236
        for i in range(childtable_num):
C
cpwu 已提交
237
            tdSql.execute(f"create table {dbname}.{stbname}_{i} using {dbname}.{stbname} tags('beijing')")
X
Xiaoyu Wang 已提交
238
        tdSql.query(f"select * from information_schema.ins_tables where db_name = '{dbname}'")
J
jiacy-jcy 已提交
239 240 241 242 243 244 245 246 247 248
        vgroup_list = []
        for i in range(len(tdSql.queryResult)):
            vgroup_list.append(tdSql.queryResult[i][6])
        vgroup_list_set = set(vgroup_list)
        for i in vgroup_list_set:
            vgroups_num = vgroup_list.count(i)
            if vgroups_num >=2:
                tdLog.info(f'This scene with {vgroups_num} vgroups is ok!')
                continue
            else:
C
cpwu 已提交
249
                tdLog.exit(f'This scene does not meet the requirements with {vgroups_num} vgroup!\n')
J
jiacy-jcy 已提交
250
        for i in range(row_num):
C
cpwu 已提交
251
            tdSql.execute(f"insert into {dbname}.stb_1 values(%d, %d, %d, %d, %d, %d, %d, %d, %d, %f, %f, %d, '{binary_str}%d', '{nchar_str}%d')"
J
jiacy-jcy 已提交
252 253
                          % (ts + i, i + 1, i + 1, i + 1, i + 1, i + 1, i + 1, i + 1, i + 1, i + 0.1, i + 0.1, i % 2, i + 1, i + 1))
        for k in column_dict.keys():
C
cpwu 已提交
254
            tdSql.query(f"select hyperloglog({k}) from {dbname}.{stbname}")
J
jiacy-jcy 已提交
255
            tdSql.checkRows(1)
C
cpwu 已提交
256
            tdSql.query(f"select hyperloglog({k}) from {dbname}.{stbname} group by {k}")
J
jiacy-jcy 已提交
257 258

        tdSql.execute(f'drop database {dbname}')
C
cpwu 已提交
259

C
cpwu 已提交
260

C
cpwu 已提交
261
    def __insert_data(self, rows, dbname=DBNAME):
C
cpwu 已提交
262 263 264
        now_time = int(datetime.datetime.timestamp(datetime.datetime.now()) * 1000)
        for i in range(rows):
            tdSql.execute(
C
cpwu 已提交
265
                f"insert into {dbname}.ct1 values ( { now_time - i * 1000 }, {i}, {11111 * i}, {111 * i % 32767 }, {11 * i % 127}, {1.11*i}, {1100.0011*i}, {i%2}, 'binary{i}', 'nchar_测试_{i}', { now_time + 1 * i } )"
C
cpwu 已提交
266 267
            )
            tdSql.execute(
C
cpwu 已提交
268
                f"insert into {dbname}.ct4 values ( { now_time - i * 7776000000 }, {i}, {11111 * i}, {111 * i % 32767 }, {11 * i % 127}, {1.11*i}, {1100.0011*i}, {i%2}, 'binary{i}', 'nchar_测试_{i}', { now_time + 1 * i } )"
C
cpwu 已提交
269 270
            )
            tdSql.execute(
C
cpwu 已提交
271
                f"insert into {dbname}.ct2 values ( { now_time - i * 7776000000 }, {-i},  {-11111 * i}, {-111 * i % 32767 }, {-11 * i % 127}, {-1.11*i}, {-1100.0011*i}, {i%2}, 'binary{i}', 'nchar_测试_{i}', { now_time + 1 * i } )"
C
cpwu 已提交
272 273
            )
        tdSql.execute(
C
cpwu 已提交
274
            f'''insert into {dbname}.ct1 values
C
cpwu 已提交
275 276 277 278 279 280
            ( { now_time - rows * 5 }, 0, 0, 0, 0, 0, 0, 0, 'binary0', 'nchar_测试_0', { now_time + 8 } )
            ( { now_time + 10000 }, { rows }, -99999, -999, -99, -9.99, -99.99, 1, 'binary9', 'nchar_测试_9', { now_time + 9 } )
            '''
        )

        tdSql.execute(
C
cpwu 已提交
281
            f'''insert into {dbname}.ct4 values
C
cpwu 已提交
282 283 284 285 286 287 288 289 290 291 292 293 294 295 296
            ( { now_time - rows * 7776000000 }, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL )
            ( { now_time - rows * 3888000000 + 10800000 }, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL )
            ( { now_time +  7776000000 }, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL )
            (
                { now_time + 5184000000}, {pow(2,31)-pow(2,15)}, {pow(2,63)-pow(2,30)}, 32767, 127,
                { 3.3 * pow(10,38) }, { 1.3 * pow(10,308) }, { rows % 2 }, "binary_limit-1", "nchar_测试_limit-1", { now_time - 86400000}
                )
            (
                { now_time + 2592000000 }, {pow(2,31)-pow(2,16)}, {pow(2,63)-pow(2,31)}, 32766, 126,
                { 3.2 * pow(10,38) }, { 1.2 * pow(10,308) }, { (rows-1) % 2 }, "binary_limit-2", "nchar_测试_limit-2", { now_time - 172800000}
                )
            '''
        )

        tdSql.execute(
C
cpwu 已提交
297
            f'''insert into {dbname}.ct2 values
C
cpwu 已提交
298 299 300 301 302 303 304 305 306 307 308 309 310 311 312
            ( { now_time - rows * 7776000000 }, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL )
            ( { now_time - rows * 3888000000 + 10800000 }, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL )
            ( { now_time + 7776000000 }, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL )
            (
                { now_time + 5184000000 }, { -1 * pow(2,31) + pow(2,15) }, { -1 * pow(2,63) + pow(2,30) }, -32766, -126,
                { -1 * 3.2 * pow(10,38) }, { -1.2 * pow(10,308) }, { rows % 2 }, "binary_limit-1", "nchar_测试_limit-1", { now_time - 86400000 }
                )
            (
                { now_time + 2592000000 }, { -1 * pow(2,31) + pow(2,16) }, { -1 * pow(2,63) + pow(2,31) }, -32767, -127,
                { - 3.3 * pow(10,38) }, { -1.3 * pow(10,308) }, { (rows-1) % 2 }, "binary_limit-2", "nchar_测试_limit-2", { now_time - 172800000 }
                )
            '''
        )

        for i in range(rows):
C
cpwu 已提交
313
            insert_data = f'''insert into {dbname}.t1 values
C
cpwu 已提交
314 315 316 317 318
                ( { now_time - i * 3600000 }, {i}, {i * 11111}, { i % 32767 }, { i % 127}, { i * 1.11111 }, { i * 1000.1111 }, { i % 2},
                "binary_{i}", "nchar_测试_{i}", { now_time - 1000 * i } )
                '''
            tdSql.execute(insert_data)
        tdSql.execute(
C
cpwu 已提交
319
            f'''insert into {dbname}.t1 values
C
cpwu 已提交
320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336
            ( { now_time + 10800000 }, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL )
            ( { now_time - (( rows // 2 ) * 60 + 30) * 60000 }, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL )
            ( { now_time - rows * 3600000 }, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL )
            ( { now_time + 7200000 }, { pow(2,31) - pow(2,15) }, { pow(2,63) - pow(2,30) }, 32767, 127,
                { 3.3 * pow(10,38) }, { 1.3 * pow(10,308) }, { rows % 2 },
                "binary_limit-1", "nchar_测试_limit-1", { now_time - 86400000 }
                )
            (
                { now_time + 3600000 } , { pow(2,31) - pow(2,16) }, { pow(2,63) - pow(2,31) }, 32766, 126,
                { 3.2 * pow(10,38) }, { 1.2 * pow(10,308) }, { (rows-1) % 2 },
                "binary_limit-2", "nchar_测试_limit-2", { now_time - 172800000 }
                )
            '''
        )


    def run(self):
C
cpwu 已提交
337
        tdSql.prepare(dbname=DBNAME)
C
cpwu 已提交
338 339

        tdLog.printNoPrefix("==========step1:create table")
C
cpwu 已提交
340
        self.__create_tb(dbname=DBNAME)
C
cpwu 已提交
341 342 343

        tdLog.printNoPrefix("==========step2:insert data")
        self.rows = 10
C
cpwu 已提交
344
        self.__insert_data(self.rows,dbname=DBNAME)
C
cpwu 已提交
345 346 347 348

        tdLog.printNoPrefix("==========step3:all check")
        self.all_test()

C
cpwu 已提交
349
        tdSql.execute("flush database db")
C
cpwu 已提交
350 351 352 353

        tdLog.printNoPrefix("==========step4:after wal, all check again ")
        self.all_test()

J
jiacy-jcy 已提交
354 355 356 357
        tdLog.printNoPrefix("==========step5: distribute scene check")
        self.__hyperloglog_check_distribute()


C
cpwu 已提交
358 359 360 361 362 363
    def stop(self):
        tdSql.close()
        tdLog.success(f"{__file__} successfully executed")

tdCases.addLinux(__file__, TDTestCase())
tdCases.addWindows(__file__, TDTestCase())