crash_gen.py 62.3 KB
Newer Older
1
#!/usr/bin/python3.7
S
Steven Li 已提交
2 3 4 5 6 7 8 9 10 11 12 13
###################################################################
#           Copyright (c) 2016 by TAOS Technologies, Inc.
#                     All rights reserved.
#
#  This file is proprietary and confidential to TAOS Technologies.
#  No part of this file may be reproduced, stored, transmitted,
#  disclosed or used in any form or by any means other than as
#  expressly provided by the written permission from Jianhui Tao
#
###################################################################

# -*- coding: utf-8 -*-
14 15
from __future__ import annotations  # For type hinting before definition, ref: https://stackoverflow.com/questions/33533148/how-do-i-specify-that-the-return-type-of-a-method-is-the-same-as-the-class-itsel    

S
Steven Li 已提交
16
import sys
17
import os
18
import traceback
19 20 21 22
# Require Python 3
if sys.version_info[0] < 3:
    raise Exception("Must be using Python 3")

S
Steven Li 已提交
23
import getopt
24
import argparse
25
import copy
S
Steven Li 已提交
26 27 28

import threading
import random
29
import time
S
Steven Li 已提交
30
import logging
31
import datetime
32
import textwrap
S
Steven Li 已提交
33

34
from typing import List
35
from typing import Dict
36
from typing import Set
37

S
Steven Li 已提交
38 39 40 41 42
from util.log import *
from util.dnodes import *
from util.cases import *
from util.sql import *

43
import crash_gen
S
Steven Li 已提交
44 45
import taos

46
# Global variables, tried to keep a small number. 
47 48 49 50

# Command-line/Environment Configurations, will set a bit later
# ConfigNameSpace = argparse.Namespace
gConfig = argparse.Namespace() # Dummy value, will be replaced later
51
logger = None
S
Steven Li 已提交
52

53 54
def runThread(wt: WorkerThread):    
    wt.run()
55

56 57 58 59 60 61 62 63
class CrashGenError(Exception):
    def __init__(self, msg=None, errno=None):
        self.msg = msg    
        self.errno = errno
    
    def __str__(self):
        return self.msg

S
Steven Li 已提交
64
class WorkerThread:
65
    def __init__(self, pool: ThreadPool, tid, 
66 67 68 69
            tc: ThreadCoordinator,
            # te: TaskExecutor,
            ): # note: main thread context!
        # self._curStep = -1 
70
        self._pool = pool
71
        self._tid = tid        
72
        self._tc = tc # type: ThreadCoordinator
S
Steven Li 已提交
73
        # self.threadIdent = threading.get_ident()
74 75
        self._thread = threading.Thread(target=runThread, args=(self,))
        self._stepGate = threading.Event()
S
Steven Li 已提交
76

77
        # Let us have a DB connection of our own
78 79 80
        if ( gConfig.per_thread_db_connection ): # type: ignore
            self._dbConn = DbConn()   

81 82
        self._dbInUse = False # if "use db" was executed already

83
    def logDebug(self, msg):
S
Steven Li 已提交
84
        logger.debug("    TRD[{}] {}".format(self._tid, msg))
85 86

    def logInfo(self, msg):
S
Steven Li 已提交
87
        logger.info("    TRD[{}] {}".format(self._tid, msg))
88

89 90 91 92 93 94 95 96
    def dbInUse(self):
        return self._dbInUse

    def useDb(self):
        if ( not self._dbInUse ):
            self.execSql("use db")
        self._dbInUse = True

97 98
    def getTaskExecutor(self):
        return self._tc.getTaskExecutor()     
99

S
Steven Li 已提交
100
    def start(self):
101
        self._thread.start()  # AFTER the thread is recorded
S
Steven Li 已提交
102

103
    def run(self): 
S
Steven Li 已提交
104
        # initialization after thread starts, in the thread context
105
        # self.isSleeping = False
106 107
        logger.info("Starting to run thread: {}".format(self._tid))

108
        if ( gConfig.per_thread_db_connection ): # type: ignore
109
            logger.debug("Worker thread openning database connection")
110
            self._dbConn.open()
S
Steven Li 已提交
111

112 113
        self._doTaskLoop()       
        
114
        # clean up
115
        if ( gConfig.per_thread_db_connection ): # type: ignore 
116
            self._dbConn.close()
117

118 119 120
    def _doTaskLoop(self) :
        # while self._curStep < self._pool.maxSteps:
        # tc = ThreadCoordinator(None)
121 122 123
        while True:  
            tc = self._tc # Thread Coordinator, the overall master            
            tc.crossStepBarrier()  # shared barrier first, INCLUDING the last one
S
Steven Li 已提交
124
            logger.debug("[TRD] Worker thread [{}] exited barrier...".format(self._tid))
125
            self.crossStepGate()   # then per-thread gate, after being tapped
S
Steven Li 已提交
126
            logger.debug("[TRD] Worker thread [{}] exited step gate...".format(self._tid))
127
            if not self._tc.isRunning():
S
Steven Li 已提交
128
                logger.debug("[TRD] Thread Coordinator not running any more, worker thread now stopping...")
129 130
                break

131
            # Fetch a task from the Thread Coordinator
S
Steven Li 已提交
132
            logger.debug("[TRD] Worker thread [{}] about to fetch task".format(self._tid))
133
            task = tc.fetchTask()
134 135

            # Execute such a task
S
Steven Li 已提交
136
            logger.debug("[TRD] Worker thread [{}] about to execute task: {}".format(self._tid, task.__class__.__name__))
137
            task.execute(self)
138
            tc.saveExecutedTask(task)
S
Steven Li 已提交
139
            logger.debug("[TRD] Worker thread [{}] finished executing task".format(self._tid))
140 141

            self._dbInUse = False # there may be changes between steps
142
  
S
Steven Li 已提交
143
    def verifyThreadSelf(self): # ensure we are called by this own thread
144
        if ( threading.get_ident() != self._thread.ident ): 
S
Steven Li 已提交
145 146 147 148 149 150 151
            raise RuntimeError("Unexpectly called from other threads")

    def verifyThreadMain(self): # ensure we are called by the main thread
        if ( threading.get_ident() != threading.main_thread().ident ): 
            raise RuntimeError("Unexpectly called from other threads")

    def verifyThreadAlive(self):
152
        if ( not self._thread.is_alive() ):
S
Steven Li 已提交
153 154
            raise RuntimeError("Unexpected dead thread")

155
    # A gate is different from a barrier in that a thread needs to be "tapped"
S
Steven Li 已提交
156 157 158 159
    def crossStepGate(self):
        self.verifyThreadAlive()
        self.verifyThreadSelf() # only allowed by ourselves
        
160
        # Wait again at the "gate", waiting to be "tapped"
S
Steven Li 已提交
161
        logger.debug("[TRD] Worker thread {} about to cross the step gate".format(self._tid))
162 163
        self._stepGate.wait() 
        self._stepGate.clear()
S
Steven Li 已提交
164
        
165
        # self._curStep += 1  # off to a new step...
S
Steven Li 已提交
166 167 168 169

    def tapStepGate(self): # give it a tap, release the thread waiting there
        self.verifyThreadAlive()
        self.verifyThreadMain() # only allowed for main thread
170
 
S
Steven Li 已提交
171
        logger.debug("[TRD] Tapping worker thread {}".format(self._tid))
172 173
        self._stepGate.set() # wake up!        
        time.sleep(0) # let the released thread run a bit
174

175
    def execSql(self, sql): # TODO: expose DbConn directly
176 177 178
        if ( gConfig.per_thread_db_connection ):
            return self._dbConn.execute(sql)            
        else:
179
            return self._tc.getDbManager().getDbConn().execute(sql)
180

181 182 183 184 185 186 187 188 189 190 191 192
    def querySql(self, sql): # TODO: expose DbConn directly
        if ( gConfig.per_thread_db_connection ):
            return self._dbConn.query(sql)            
        else:
            return self._tc.getDbManager().getDbConn().query(sql)

    def getQueryResult(self):
        if ( gConfig.per_thread_db_connection ):
            return self._dbConn.getQueryResult()       
        else:
            return self._tc.getDbManager().getDbConn().getQueryResult()

193 194 195 196
    def getDbConn(self):
        if ( gConfig.per_thread_db_connection ):
            return self._dbConn     
        else:
197
            return self._tc.getDbManager().getDbConn()
198

199 200 201 202 203
    # def querySql(self, sql): # not "execute", since we are out side the DB context
    #     if ( gConfig.per_thread_db_connection ):
    #         return self._dbConn.query(sql)            
    #     else:
    #         return self._tc.getDbState().getDbConn().query(sql)
204

205
class ThreadCoordinator:
206
    def __init__(self, pool: ThreadPool, dbManager):
207 208
        self._curStep = -1 # first step is 0
        self._pool = pool
209
        # self._wd = wd
210
        self._te = None # prepare for every new step
211
        self._dbManager = dbManager
212 213
        self._executedTasks: List[Task] = [] # in a given step
        self._lock = threading.RLock() # sync access for a few things
S
Steven Li 已提交
214

215
        self._stepBarrier = threading.Barrier(self._pool.numThreads + 1) # one barrier for all threads
216
        self._execStats = ExecutionStats()
S
Steven Li 已提交
217

218 219 220
    def getTaskExecutor(self):
        return self._te

221 222
    def getDbManager(self) -> DbManager :
        return self._dbManager
223

224 225 226
    def crossStepBarrier(self):
        self._stepBarrier.wait()

227 228
    def run(self):              
        self._pool.createAndStartThreads(self)
S
Steven Li 已提交
229 230

        # Coordinate all threads step by step
231 232
        self._curStep = -1 # not started yet
        maxSteps = gConfig.max_steps # type: ignore
233 234 235
        self._execStats.startExec() # start the stop watch
        failed = False
        while(self._curStep < maxSteps-1 and not failed):  # maxStep==10, last curStep should be 9
S
Steven Li 已提交
236 237 238
            if not gConfig.debug: 
                print(".", end="", flush=True) # print this only if we are not in debug mode
            logger.debug("[TRD] Main thread going to sleep")
239

240
            # Now ready to enter a step
241 242 243 244
            self.crossStepBarrier() # let other threads go past the pool barrier, but wait at the thread gate
            self._stepBarrier.reset() # Other worker threads should now be at the "gate"            

            # At this point, all threads should be pass the overall "barrier" and before the per-thread "gate"
245
            try:
246 247 248 249
                sm = self._dbManager.getStateMachine()
                logger.debug("[STT] starting transitions")
                sm.transition(self._executedTasks) # at end of step, transiton the DB state
                logger.debug("[STT] transition ended")
250
                # Due to limitation (or maybe not) of the Python library, we cannot share connections across threads
251 252 253 254 255 256
                if sm.hasDatabase() :
                    for t in self._pool.threadList:
                        logger.debug("[DB] use db for all worker threads")
                        t.useDb()
                        # t.execSql("use db") # main thread executing "use db" on behalf of every worker thread

257 258 259 260 261 262 263 264 265 266 267 268 269
            except taos.error.ProgrammingError as err:
                if ( err.msg == 'network unavailable' ): # broken DB connection
                    logger.info("DB connection broken, execution failed")
                    traceback.print_stack()
                    failed = True
                    self._te = None # Not running any more
                    self._execStats.registerFailure("Broken DB Connection")
                    # continue # don't do that, need to tap all threads at end, and maybe signal them to stop
                else:
                    raise 
            finally:
                pass
            
270
            self.resetExecutedTasks() # clear the tasks after we are done
271 272

            # Get ready for next step
S
Steven Li 已提交
273
            logger.debug("<-- Step {} finished".format(self._curStep))
274
            self._curStep += 1 # we are about to get into next step. TODO: race condition here!                
275
            logger.debug("\r\n\n--> Step {} starts with main thread waking up".format(self._curStep)) # Now not all threads had time to go to sleep
276

277
            # A new TE for the new step
278 279
            if not failed: # only if not failed
                self._te = TaskExecutor(self._curStep)
280

S
Steven Li 已提交
281
            logger.debug("[TRD] Main thread waking up at step {}, tapping worker threads".format(self._curStep)) # Now not all threads had time to go to sleep            
S
Steven Li 已提交
282 283
            self.tapAllThreads()

284
        logger.debug("Main thread ready to finish up...")
285 286 287 288 289 290 291 292
        if not failed: # only in regular situations
            self.crossStepBarrier() # Cross it one last time, after all threads finish
            self._stepBarrier.reset()
            logger.debug("Main thread in exclusive zone...")
            self._te = None # No more executor, time to end
            logger.debug("Main thread tapping all threads one last time...")
            self.tapAllThreads() # Let the threads run one last time

293 294
        logger.debug("Main thread joining all threads")
        self._pool.joinAll() # Get all threads to finish
295
        logger.info("\nAll worker threads finished")
296 297 298
        self._execStats.endExec()

    def logStats(self):
299
        self._execStats.logStats()
S
Steven Li 已提交
300 301 302

    def tapAllThreads(self): # in a deterministic manner
        wakeSeq = []
303
        for i in range(self._pool.numThreads): # generate a random sequence
S
Steven Li 已提交
304 305 306 307
            if Dice.throw(2) == 1 :
                wakeSeq.append(i)
            else:
                wakeSeq.insert(0, i)
308
        logger.debug("[TRD] Main thread waking up worker threads: {}".format(str(wakeSeq)))
309
        # TODO: set dice seed to a deterministic value
S
Steven Li 已提交
310
        for i in wakeSeq:
311
            self._pool.threadList[i].tapStepGate() # TODO: maybe a bit too deep?!
S
Steven Li 已提交
312 313
            time.sleep(0) # yield

314 315 316 317 318 319
    def isRunning(self):
        return self._te != None

    def fetchTask(self) -> Task :
        if ( not self.isRunning() ): # no task
            raise RuntimeError("Cannot fetch task when not running")
320 321
        # return self._wd.pickTask()
        # Alternatively, let's ask the DbState for the appropriate task
322 323 324 325 326 327 328
        # dbState = self.getDbState()
        # tasks = dbState.getTasksAtState() # TODO: create every time?
        # nTasks = len(tasks)
        # i = Dice.throw(nTasks)
        # logger.debug(" (dice:{}/{}) ".format(i, nTasks))
        # # return copy.copy(tasks[i]) # Needs a fresh copy, to save execution results, etc.
        # return tasks[i].clone() # TODO: still necessary?
329
        taskType = self.getDbManager().getStateMachine().pickTaskType() # pick a task type for current state
330
        return taskType(self.getDbManager(), self._execStats) # create a task from it
331 332 333

    def resetExecutedTasks(self):
        self._executedTasks = [] # should be under single thread
334 335 336 337

    def saveExecutedTask(self, task):
        with self._lock:
            self._executedTasks.append(task)
338 339

# We define a class to run a number of threads in locking steps.
340
class ThreadPool:
341
    def __init__(self, numThreads, maxSteps):
342 343 344 345
        self.numThreads = numThreads
        self.maxSteps = maxSteps
        # Internal class variables
        self.curStep = 0
346
        self.threadList = [] # type: List[WorkerThread]
347 348
        
    # starting to run all the threads, in locking steps
349
    def createAndStartThreads(self, tc: ThreadCoordinator):
350
        for tid in range(0, self.numThreads): # Create the threads
351
            workerThread = WorkerThread(self, tid, tc)            
352 353 354 355 356 357 358 359
            self.threadList.append(workerThread)
            workerThread.start() # start, but should block immediately before step 0

    def joinAll(self):
        for workerThread in self.threadList:
            logger.debug("Joining thread...")
            workerThread._thread.join()

360 361
# A queue of continguous POSITIVE integers, used by DbManager to generate continuous numbers
# for new table names
S
Steven Li 已提交
362 363
class LinearQueue():
    def __init__(self):
364
        self.firstIndex = 1  # 1st ever element
S
Steven Li 已提交
365
        self.lastIndex = 0
366
        self._lock = threading.RLock() # our functions may call each other
367
        self.inUse = set() # the indexes that are in use right now
S
Steven Li 已提交
368

369 370 371 372 373 374 375 376 377
    def toText(self):
        return "[{}..{}], in use: {}".format(self.firstIndex, self.lastIndex, self.inUse)

    # Push (add new element, largest) to the tail, and mark it in use
    def push(self): 
        with self._lock:
            # if ( self.isEmpty() ): 
            #     self.lastIndex = self.firstIndex 
            #     return self.firstIndex
378 379
            # Otherwise we have something
            self.lastIndex += 1
380 381
            self.allocate(self.lastIndex)
            # self.inUse.add(self.lastIndex) # mark it in use immediately
382
            return self.lastIndex
S
Steven Li 已提交
383 384

    def pop(self):
385
        with self._lock:
386
            if ( self.isEmpty() ): 
387 388 389
                # raise RuntimeError("Cannot pop an empty queue") 
                return False # TODO: None?
            
390
            index = self.firstIndex
391
            if ( index in self.inUse ):
392 393
                return False

394 395 396 397 398 399 400
            self.firstIndex += 1
            return index

    def isEmpty(self):
        return self.firstIndex > self.lastIndex

    def popIfNotEmpty(self):
401
        with self._lock:
402 403 404 405
            if (self.isEmpty()):
                return 0
            return self.pop()

S
Steven Li 已提交
406
    def allocate(self, i):
407
        with self._lock:
408
            # logger.debug("LQ allocating item {}".format(i))
409 410 411 412
            if ( i in self.inUse ):
                raise RuntimeError("Cannot re-use same index in queue: {}".format(i))
            self.inUse.add(i)

S
Steven Li 已提交
413
    def release(self, i):
414
        with self._lock:
415 416
            # logger.debug("LQ releasing item {}".format(i))
            self.inUse.remove(i) # KeyError possible, TODO: why?
417 418 419 420

    def size(self):
        return self.lastIndex + 1 - self.firstIndex

S
Steven Li 已提交
421
    def pickAndAllocate(self):
422 423 424
        if ( self.isEmpty() ):
            return None
        with self._lock:
425 426 427 428
            cnt = 0 # counting the interations
            while True:
                cnt += 1
                if ( cnt > self.size()*10 ): # 10x iteration already
429 430
                    # raise RuntimeError("Failed to allocate LinearQueue element")
                    return None
431 432
                ret = Dice.throwRange(self.firstIndex, self.lastIndex+1)
                if ( not ret in self.inUse ):
433 434 435 436 437
                    self.allocate(ret)
                    return ret

class DbConn:
    def __init__(self):
438 439
        self._conn = None 
        self._cursor = None
440 441 442 443 444 445 446
        self.isOpen = False
        
    def open(self): # Open connection
        if ( self.isOpen ):
            raise RuntimeError("Cannot re-open an existing DB connection")

        cfgPath = "../../build/test/cfg" 
447 448
        self._conn = taos.connect(host="127.0.0.1", config=cfgPath) # TODO: make configurable
        self._cursor = self._conn.cursor()
449

450 451
        # Get the connection/cursor ready
        self._cursor.execute('reset query cache')
452
        # self._cursor.execute('use db') # do this at the beginning of every step
453 454

        # Open connection
455
        self._tdSql = TDSql()
456
        self._tdSql.init(self._cursor)
457
        logger.debug("[DB] data connection opened")
458 459 460 461 462
        self.isOpen = True

    def resetDb(self): # reset the whole database, etc.
        if ( not self.isOpen ):
            raise RuntimeError("Cannot reset database until connection is open")
463 464 465
        # self._tdSql.prepare() # Recreate database, etc.

        self._cursor.execute('drop database if exists db')
466 467
        logger.debug("Resetting DB, dropped database")
        # self._cursor.execute('create database db')
468 469
        # self._cursor.execute('use db')

470 471 472 473 474 475
        # tdSql.execute('show databases')

    def close(self):
        if ( not self.isOpen ):
            raise RuntimeError("Cannot clean up database until connection is open")
        self._tdSql.close()
476
        logger.debug("[DB] Database connection closed")
477
        self.isOpen = False
S
Steven Li 已提交
478

479
    def execute(self, sql): 
480
        if ( not self.isOpen ):
481
            raise RuntimeError("Cannot execute database commands until connection is open")
482 483 484 485
        logger.debug("[SQL] Executing SQL: {}".format(sql))
        nRows = self._tdSql.execute(sql)
        logger.debug("[SQL] Execution Result, nRows = {}, SQL = {}".format(nRows, sql))
        return nRows
S
Steven Li 已提交
486

487
    def query(self, sql) :  # return rows affected
488 489
        if ( not self.isOpen ):
            raise RuntimeError("Cannot query database until connection is open")
490 491
        logger.debug("[SQL] Executing SQL: {}".format(sql))
        nRows = self._tdSql.query(sql)
492
        logger.debug("[SQL] Query Result, nRows = {}, SQL = {}".format(nRows, sql))
493
        return nRows
494
        # results are in: return self._tdSql.queryResult
495

496 497 498
    def getQueryResult(self):
        return self._tdSql.queryResult

499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516
    def _queryAny(self, sql) : # actual query result as an int
        if ( not self.isOpen ):
            raise RuntimeError("Cannot query database until connection is open")
        tSql = self._tdSql
        nRows = tSql.query(sql)
        if nRows != 1 :
            raise RuntimeError("Unexpected result for query: {}, rows = {}".format(sql, nRows))
        if tSql.queryRows != 1 or tSql.queryCols != 1:
            raise RuntimeError("Unexpected result set for query: {}".format(sql))
        return tSql.queryResult[0][0]

    def queryScalar(self, sql) -> int :
        return self._queryAny(sql)

    def queryString(self, sql) -> str :
        return self._queryAny(sql)
    
class AnyState:
517
    STATE_INVALID    = -1
518 519 520 521
    STATE_EMPTY      = 0  # nothing there, no even a DB
    STATE_DB_ONLY    = 1  # we have a DB, but nothing else
    STATE_TABLE_ONLY = 2  # we have a table, but totally empty
    STATE_HAS_DATA   = 3  # we have some data in the table
522 523 524 525 526
    _stateNames = ["Invalid", "Empty", "DB_Only", "Table_Only", "Has_Data"]

    STATE_VAL_IDX = 0
    CAN_CREATE_DB = 1
    CAN_DROP_DB = 2
527 528
    CAN_CREATE_FIXED_SUPER_TABLE = 3
    CAN_DROP_FIXED_SUPER_TABLE = 4
529 530 531 532 533 534 535
    CAN_ADD_DATA = 5
    CAN_READ_DATA = 6

    def __init__(self):
        self._info = self.getInfo()

    def __str__(self):
S
Steven Li 已提交
536
        return self._stateNames[self._info[self.STATE_VAL_IDX] + 1] # -1 hack to accomodate the STATE_INVALID case
537 538 539 540

    def getInfo(self):
        raise RuntimeError("Must be overriden by child classes")

S
Steven Li 已提交
541 542 543 544 545 546 547 548
    def equals(self, other):
        if isinstance(other, int):
            return self.getValIndex() == other
        elif isinstance(other, AnyState):
            return self.getValIndex() == other.getValIndex()
        else:
            raise RuntimeError("Unexpected comparison, type = {}".format(type(other)))

549 550 551
    def verifyTasksToState(self, tasks, newState):
        raise RuntimeError("Must be overriden by child classes")

S
Steven Li 已提交
552 553 554
    def getValIndex(self):
        return self._info[self.STATE_VAL_IDX]

555 556 557 558 559 560
    def getValue(self):
        return self._info[self.STATE_VAL_IDX]
    def canCreateDb(self):
        return self._info[self.CAN_CREATE_DB]
    def canDropDb(self):
        return self._info[self.CAN_DROP_DB]
561 562 563 564
    def canCreateFixedSuperTable(self):
        return self._info[self.CAN_CREATE_FIXED_SUPER_TABLE]
    def canDropFixedSuperTable(self):
        return self._info[self.CAN_DROP_FIXED_SUPER_TABLE]
565 566 567 568 569 570 571 572 573 574 575
    def canAddData(self):
        return self._info[self.CAN_ADD_DATA]
    def canReadData(self):
        return self._info[self.CAN_READ_DATA]

    def assertAtMostOneSuccess(self, tasks, cls):
        sCnt = 0
        for task in tasks :
            if not isinstance(task, cls):
                continue
            if task.isSuccess():
S
Steven Li 已提交
576
                # task.logDebug("Task success found")
577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611
                sCnt += 1
                if ( sCnt >= 2 ):
                    raise RuntimeError("Unexpected more than 1 success with task: {}".format(cls))

    def assertIfExistThenSuccess(self, tasks, cls):
        sCnt = 0
        exists = False
        for task in tasks :
            if not isinstance(task, cls):
                continue
            exists = True # we have a valid instance
            if task.isSuccess():
                sCnt += 1
        if ( exists and sCnt <= 0 ):
            raise RuntimeError("Unexpected zero success for task: {}".format(cls))

    def assertNoTask(self, tasks, cls):
        for task in tasks :
            if isinstance(task, cls):
                raise CrashGenError("This task: {}, is not expected to be present, given the success/failure of others".format(cls.__name__))

    def assertNoSuccess(self, tasks, cls):
        for task in tasks :
            if isinstance(task, cls):
                if task.isSuccess():
                    raise RuntimeError("Unexpected successful task: {}".format(cls))

    def hasSuccess(self, tasks, cls):
        for task in tasks :
            if not isinstance(task, cls):
                continue
            if task.isSuccess():
                return True
        return False

S
Steven Li 已提交
612 613 614 615 616 617
    def hasTask(self, tasks, cls):
        for task in tasks :
            if isinstance(task, cls):
                return True
        return False

618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637
class StateInvalid(AnyState):
    def getInfo(self):
        return [
            self.STATE_INVALID,
            False, False, # can create/drop Db
            False, False, # can create/drop fixed table
            False, False, # can insert/read data with fixed table
        ]

    # def verifyTasksToState(self, tasks, newState):

class StateEmpty(AnyState):
    def getInfo(self):
        return [
            self.STATE_EMPTY,
            True, False, # can create/drop Db
            False, False, # can create/drop fixed table
            False, False, # can insert/read data with fixed table
        ]

S
Steven Li 已提交
638
    def verifyTasksToState(self, tasks, newState): 
639 640 641
        if ( self.hasSuccess(tasks, TaskCreateDb) ): # at EMPTY, if there's succes in creating DB
            if ( not self.hasTask(tasks, TaskDropDb) ) : # and no drop_db tasks
                self.assertAtMostOneSuccess(tasks, TaskCreateDb) # we must have at most one. TODO: compare numbers
642 643 644 645 646 647 648 649 650 651 652

class StateDbOnly(AnyState):
    def getInfo(self):
        return [
            self.STATE_DB_ONLY,
            False, True,
            True, False,
            False, False,
        ]

    def verifyTasksToState(self, tasks, newState):
653 654 655
        if ( not self.hasTask(tasks, TaskCreateDb) ):
            self.assertAtMostOneSuccess(tasks, TaskDropDb) # only if we don't create any more
        self.assertIfExistThenSuccess(tasks, TaskDropDb)
S
Steven Li 已提交
656
        # self.assertAtMostOneSuccess(tasks, CreateFixedTableTask) # not true in massively parrallel cases
657
        # Nothing to be said about adding data task
658
        # if ( self.hasSuccess(tasks, DropDbTask) ): # dropped the DB
659
            # self.assertHasTask(tasks, DropDbTask) # implied by hasSuccess
660
            # self.assertAtMostOneSuccess(tasks, DropDbTask)
661
            # self._state = self.STATE_EMPTY
662 663 664 665
        # if ( self.hasSuccess(tasks, TaskCreateSuperTable) ): # did not drop db, create table success
        #     # self.assertHasTask(tasks, CreateFixedTableTask) # tried to create table
        #     if ( not self.hasTask(tasks, TaskDropSuperTable) ): 
        #         self.assertAtMostOneSuccess(tasks, TaskCreateSuperTable) # at most 1 attempt is successful, if we don't drop anything
666
            # self.assertNoTask(tasks, DropDbTask) # should have have tried
667 668 669 670 671 672 673 674 675 676 677 678
            # if ( not self.hasSuccess(tasks, AddFixedDataTask) ): # just created table, no data yet
            #     # can't say there's add-data attempts, since they may all fail
            #     self._state = self.STATE_TABLE_ONLY
            # else:                    
            #     self._state = self.STATE_HAS_DATA
        # What about AddFixedData?
        # elif ( self.hasSuccess(tasks, AddFixedDataTask) ):
        #     self._state = self.STATE_HAS_DATA
        # else: # no success in dropping db tasks, no success in create fixed table? read data should also fail
        #     # raise RuntimeError("Unexpected no-success scenario")   # We might just landed all failure tasks, 
        #     self._state = self.STATE_DB_ONLY  # no change

679
class StateSuperTableOnly(AnyState):
680 681 682 683 684 685 686 687 688
    def getInfo(self):
        return [
            self.STATE_TABLE_ONLY,
            False, True,
            False, True,
            True, True,
        ]

    def verifyTasksToState(self, tasks, newState):
689
        if ( self.hasSuccess(tasks, TaskDropSuperTable) ): # we are able to drop the table
690 691 692
            #self.assertAtMostOneSuccess(tasks, TaskDropSuperTable)
            self.hasSuccess(tasks, TaskCreateSuperTable) # we must have had recreted it

693
            # self._state = self.STATE_DB_ONLY
S
Steven Li 已提交
694 695
        # elif ( self.hasSuccess(tasks, AddFixedDataTask) ): # no success dropping the table, but added data
        #     self.assertNoTask(tasks, DropFixedTableTask) # not true in massively parrallel cases
696
            # self._state = self.STATE_HAS_DATA
S
Steven Li 已提交
697 698 699
        # elif ( self.hasSuccess(tasks, ReadFixedDataTask) ): # no success in prev cases, but was able to read data
            # self.assertNoTask(tasks, DropFixedTableTask)
            # self.assertNoTask(tasks, AddFixedDataTask)
700
            # self._state = self.STATE_TABLE_ONLY # no change
S
Steven Li 已提交
701 702 703
        # else: # did not drop table, did not insert data, did not read successfully, that is impossible
        #     raise RuntimeError("Unexpected no-success scenarios")
        # TODO: need to revamp!!
704 705 706 707 708 709 710 711 712 713 714

class StateHasData(AnyState):
    def getInfo(self):
        return [
            self.STATE_HAS_DATA,
            False, True,
            False, True,
            True, True,
        ]

    def verifyTasksToState(self, tasks, newState):
S
Steven Li 已提交
715
        if ( newState.equals(AnyState.STATE_EMPTY) ):
716 717 718
            self.hasSuccess(tasks, TaskDropDb)
            if ( not self.hasTask(tasks, TaskCreateDb) ) : 
                self.assertAtMostOneSuccess(tasks, TaskDropDb) # TODO: dicy
S
Steven Li 已提交
719
        elif ( newState.equals(AnyState.STATE_DB_ONLY) ): # in DB only
720 721 722
            if ( not self.hasTask(tasks, TaskCreateDb)): # without a create_db task
                self.assertNoTask(tasks, TaskDropDb) # we must have drop_db task
            self.hasSuccess(tasks, TaskDropSuperTable)
723
            # self.assertAtMostOneSuccess(tasks, DropFixedSuperTableTask) # TODO: dicy
724 725 726 727
        # elif ( newState.equals(AnyState.STATE_TABLE_ONLY) ): # data deleted
            # self.assertNoTask(tasks, TaskDropDb)
            # self.assertNoTask(tasks, TaskDropSuperTable)
            # self.assertNoTask(tasks, TaskAddData)
S
Steven Li 已提交
728
            # self.hasSuccess(tasks, DeleteDataTasks)
729
        else: # should be STATE_HAS_DATA
730 731
            if (not self.hasTask(tasks, TaskCreateDb) ): # only if we didn't create one
                self.assertNoTask(tasks, TaskDropDb) # we shouldn't have dropped it
732 733
            if (not self.hasTask(tasks, TaskCreateSuperTable)) :  # if we didn't create the table
                self.assertNoTask(tasks, TaskDropSuperTable) # we should not have a task that drops it            
734
            # self.assertIfExistThenSuccess(tasks, ReadFixedDataTask)
S
Steven Li 已提交
735

736
class StateMechine:
737 738 739 740 741 742 743 744
    def __init__(self, dbConn):
        self._dbConn = dbConn
        self._curState = self._findCurrentState() # starting state
        self._stateWeights = [1,3,5,15] # transitition target probabilities, indexed with value of STATE_EMPTY, STATE_DB_ONLY, etc.
        
    def getCurrentState(self):
        return self._curState

745 746 747
    def hasDatabase(self):
        return self._curState.canDropDb()  # ha, can drop DB means it has one

748 749
    # May be slow, use cautionsly...
    def getTaskTypes(self): # those that can run (directly/indirectly) from the current state
750 751 752 753 754 755
        def typesToStrings(types):
            ss = []
            for t in types:
                ss.append(t.__name__)
            return ss

756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773
        allTaskClasses = StateTransitionTask.__subclasses__() # all state transition tasks
        firstTaskTypes = []
        for tc in allTaskClasses:
            # t = tc(self) # create task object            
            if tc.canBeginFrom(self._curState):
                firstTaskTypes.append(tc)
        # now we have all the tasks that can begin directly from the current state, let's figure out the INDIRECT ones
        taskTypes = firstTaskTypes.copy() # have to have these
        for task1 in firstTaskTypes: # each task type gathered so far
            endState = task1.getEndState() # figure the end state
            if endState == None: # does not change end state
                continue # no use, do nothing
            for tc in allTaskClasses: # what task can further begin from there?
                if tc.canBeginFrom(endState) and (tc not in firstTaskTypes):
                    taskTypes.append(tc) # gather it

        if len(taskTypes) <= 0:
            raise RuntimeError("No suitable task types found for state: {}".format(self._curState))   
774
        logger.debug("[OPS] Tasks found for state {}: {}".format(self._curState, typesToStrings(taskTypes)))     
775 776 777 778 779 780 781 782 783
        return taskTypes

    def _findCurrentState(self):
        dbc = self._dbConn
        ts = time.time() # we use this to debug how fast/slow it is to do the various queries to find the current DB state
        if dbc.query("show databases") == 0 : # no database?!
            # logger.debug("Found EMPTY state")
            logger.debug("[STT] empty database found, between {} and {}".format(ts, time.time()))
            return StateEmpty()
784
        dbc.execute("use db") # did not do this when openning connection, and this is NOT the worker thread, which does this on their own
785 786 787 788 789 790 791 792 793 794 795 796 797 798 799
        if dbc.query("show tables") == 0 : # no tables
            # logger.debug("Found DB ONLY state")
            logger.debug("[STT] DB_ONLY found, between {} and {}".format(ts, time.time()))
            return StateDbOnly()
        if dbc.query("SELECT * FROM db.{}".format(DbManager.getFixedSuperTableName()) ) == 0 : # no regular tables
            # logger.debug("Found TABLE_ONLY state")
            logger.debug("[STT] SUPER_TABLE_ONLY found, between {} and {}".format(ts, time.time()))
            return StateSuperTableOnly()
        else: # has actual tables
            # logger.debug("Found HAS_DATA state")
            logger.debug("[STT] HAS_DATA found, between {} and {}".format(ts, time.time()))
            return StateHasData()

    def transition(self, tasks):
        if ( len(tasks) == 0 ): # before 1st step, or otherwise empty
800
            logger.debug("[STT] Starting State: {}".format(self._curState))
801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851
            return # do nothing

        self._dbConn.execute("show dnodes") # this should show up in the server log, separating steps

        # Generic Checks, first based on the start state
        if self._curState.canCreateDb():
            self._curState.assertIfExistThenSuccess(tasks, TaskCreateDb)
            # self.assertAtMostOneSuccess(tasks, CreateDbTask) # not really, in case of multiple creation and drops

        if self._curState.canDropDb():
            self._curState.assertIfExistThenSuccess(tasks, TaskDropDb)
            # self.assertAtMostOneSuccess(tasks, DropDbTask) # not really in case of drop-create-drop

        # if self._state.canCreateFixedTable():
            # self.assertIfExistThenSuccess(tasks, CreateFixedTableTask) # Not true, DB may be dropped
            # self.assertAtMostOneSuccess(tasks, CreateFixedTableTask) # not really, in case of create-drop-create

        # if self._state.canDropFixedTable():
            # self.assertIfExistThenSuccess(tasks, DropFixedTableTask) # Not True, the whole DB may be dropped
            # self.assertAtMostOneSuccess(tasks, DropFixedTableTask) # not really in case of drop-create-drop

        # if self._state.canAddData():
        #     self.assertIfExistThenSuccess(tasks, AddFixedDataTask)  # not true actually

        # if self._state.canReadData():
            # Nothing for sure

        newState = self._findCurrentState()
        logger.debug("[STT] New DB state determined: {}".format(newState))
        self._curState.verifyTasksToState(tasks, newState) # can old state move to new state through the tasks?
        self._curState = newState

    def pickTaskType(self):
        taskTypes = self.getTaskTypes() # all the task types we can choose from at curent state
        weights = []
        for tt in taskTypes:
            endState = tt.getEndState()
            if endState != None :
                weights.append(self._stateWeights[endState.getValIndex()]) # TODO: change to a method
            else:
                weights.append(10) # read data task, default to 10: TODO: change to a constant
        i = self._weighted_choice_sub(weights)
        # logger.debug(" (weighted random:{}/{}) ".format(i, len(taskTypes)))        
        return taskTypes[i]

    def _weighted_choice_sub(self, weights): # ref: https://eli.thegreenplace.net/2010/01/22/weighted-random-generation-in-python/
        rnd = random.random() * sum(weights) # TODO: use our dice to ensure it being determinstic?
        for i, w in enumerate(weights):
            rnd -= w
            if rnd < 0:
                return i
852

853
# Manager of the Database Data/Connection
854
class DbManager():    
855
    def __init__(self, resetDb = True):
S
Steven Li 已提交
856
        self.tableNumQueue = LinearQueue()
857
        self._lastTick = self.setupLastTick() # datetime.datetime(2019, 1, 1) # initial date time tick
858 859
        self._lastInt  = 0 # next one is initial integer 
        self._lock = threading.RLock()
860
        
861 862
        # self.openDbServerConnection()
        self._dbConn = DbConn()
863 864 865 866
        try:
            self._dbConn.open() # may throw taos.error.ProgrammingError: disconnected
        except taos.error.ProgrammingError as err:
            # print("Error type: {}, msg: {}, value: {}".format(type(err), err.msg, err))
867
            if ( err.msg == 'client disconnected' ): # cannot open DB connection
868 869 870 871 872
                print("Cannot establish DB connection, please re-run script without parameter, and follow the instructions.")
                sys.exit()
            else:
                raise            
        except:
S
Steven Li 已提交
873
            print("[=] Unexpected exception")
874
            raise        
875 876 877

        if resetDb :
            self._dbConn.resetDb() # drop and recreate DB            
878

879 880
        self._stateMachine = StateMechine(self._dbConn) # Do this after dbConn is in proper shape
        
881 882 883
    def getDbConn(self):
        return self._dbConn

884
    def getStateMachine(self) -> StateMechine :
885 886 887 888
        return self._stateMachine

    # def getState(self):
    #     return self._stateMachine.getCurrentState()
889 890 891 892 893 894

    # We aim to create a starting time tick, such that, whenever we run our test here once
    # We should be able to safely create 100,000 records, which will not have any repeated time stamp
    # when we re-run the test in 3 minutes (180 seconds), basically we should expand time duration
    # by a factor of 500.
    # TODO: what if it goes beyond 10 years into the future
895
    # TODO: fix the error as result of above: "tsdb timestamp is out of range"
896
    def setupLastTick(self):
897
        t1 = datetime.datetime(2020, 6, 1)
898
        t2 = datetime.datetime.now()
899 900
        elSec = int(t2.timestamp() - t1.timestamp()) # maybe a very large number, takes 69 years to exceed Python int range
        elSec2 = (  elSec % (8 * 12 * 30 * 24 * 60 * 60 / 500 ) ) * 500 # a number representing seconds within 10 years
901 902
        # print("elSec = {}".format(elSec))
        t3 = datetime.datetime(2012, 1, 1) # default "keep" is 10 years
903
        t4 = datetime.datetime.fromtimestamp( t3.timestamp() + elSec2) # see explanation above
904 905 906
        logger.info("Setting up TICKS to start from: {}".format(t4))
        return t4

S
Steven Li 已提交
907 908 909
    def pickAndAllocateTable(self): # pick any table, and "use" it
        return self.tableNumQueue.pickAndAllocate()

910 911 912 913 914
    def addTable(self):
        with self._lock:
            tIndex = self.tableNumQueue.push()
        return tIndex

915 916
    @classmethod
    def getFixedSuperTableName(cls):
917
        return "fs_table"
918

S
Steven Li 已提交
919 920 921
    def releaseTable(self, i): # return the table back, so others can use it
        self.tableNumQueue.release(i)

922
    def getNextTick(self):
923 924 925
        with self._lock: # prevent duplicate tick
            self._lastTick += datetime.timedelta(0, 1) # add one second to it
            return self._lastTick
926 927

    def getNextInt(self):
928 929 930
        with self._lock:
            self._lastInt += 1
            return self._lastInt
931 932

    def getNextBinary(self):
933
        return "Beijing_Shanghai_Los_Angeles_New_York_San_Francisco_Chicago_Beijing_Shanghai_Los_Angeles_New_York_San_Francisco_Chicago_{}".format(self.getNextInt())
934 935 936

    def getNextFloat(self):
        return 0.9 + self.getNextInt()
937
    
S
Steven Li 已提交
938
    def getTableNameToDelete(self):
939
        tblNum = self.tableNumQueue.pop() # TODO: race condition!
940 941 942
        if ( not tblNum ): # maybe false
            return False
        
S
Steven Li 已提交
943 944
        return "table_{}".format(tblNum)

945 946 947
    def cleanUp(self):
        self._dbConn.close()      

948 949 950 951
class TaskExecutor():
    def __init__(self, curStep):
        self._curStep = curStep

952 953 954
    def getCurStep(self):
        return self._curStep

955 956
    def execute(self, task: Task, wt: WorkerThread): # execute a task on a thread
        task.execute(wt)
957

958 959
    # def logInfo(self, msg):
    #     logger.info("    T[{}.x]: ".format(self._curStep) + msg)
960

961 962
    # def logDebug(self, msg):
    #     logger.debug("    T[{}.x]: ".format(self._curStep) + msg)
963

S
Steven Li 已提交
964
class Task():
965 966 967 968
    taskSn = 100

    @classmethod
    def allocTaskNum(cls):
S
Steven Li 已提交
969 970 971
        Task.taskSn += 1 # IMPORTANT: cannot use cls.taskSn, since each sub class will have a copy
        # logger.debug("Allocating taskSN: {}".format(Task.taskSn))
        return Task.taskSn
972

973
    def __init__(self, dbManager: DbManager, execStats: ExecutionStats):        
974
        self._dbManager = dbManager
975
        self._workerThread = None 
976
        self._err = None
977
        self._curStep = None
978
        self._numRows = None # Number of rows affected
979 980 981

        # Assign an incremental task serial number        
        self._taskNum = self.allocTaskNum()
S
Steven Li 已提交
982
        # logger.debug("Creating new task {}...".format(self._taskNum))
983

984
        self._execStats = execStats
985
        self._lastSql = "" # last SQL executed/attempted
986

987 988
    def isSuccess(self):
        return self._err == None
989

990
    def clone(self): # TODO: why do we need this again?
991
        newTask = self.__class__(self._dbManager, self._execStats)
992 993 994
        return newTask

    def logDebug(self, msg):
S
Steven Li 已提交
995
        self._workerThread.logDebug("Step[{}.{}] {}".format(self._curStep, self._taskNum, msg))
996 997

    def logInfo(self, msg):
S
Steven Li 已提交
998
        self._workerThread.logInfo("Step[{}.{}] {}".format(self._curStep, self._taskNum, msg))
999

1000
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
1001
        raise RuntimeError("To be implemeted by child classes, class name: {}".format(self.__class__.__name__))
1002

1003 1004
    def execute(self, wt: WorkerThread):
        wt.verifyThreadSelf()
1005
        self._workerThread = wt # type: ignore
1006 1007

        te = wt.getTaskExecutor()
1008 1009
        self._curStep = te.getCurStep()
        self.logDebug("[-] executing task {}...".format(self.__class__.__name__))
1010 1011

        self._err = None
1012
        self._execStats.beginTaskType(self.__class__.__name__) # mark beginning
1013 1014 1015
        try:
            self._executeInternal(te, wt) # TODO: no return value?
        except taos.error.ProgrammingError as err:
1016
            errno2 = 0x80000000 + err.errno # positive error number
1017
            if ( errno2 in [0x200, 0x360, 0x362, 0x36A, 0x36B, 0x36D, 0x381, 0x380, 0x383, 0x503, 0x600 ]) : # allowed errors
1018
                self.logDebug("[=] Acceptable Taos library exception: errno=0x{:X}, msg: {}, SQL: {}".format(errno2, err, self._lastSql))
1019
                print("_", end="", flush=True)
1020 1021
                self._err = err  
            else:
1022 1023 1024 1025 1026
                errMsg = "[=] Unexpected Taos library exception: errno=0x{:X}, msg: {}, SQL: {}".format(errno2, err, self._lastSql)
                self.logDebug(errMsg)
                if gConfig.debug :
                    raise # so that we see full stack
                else: # non-debug
1027 1028
                    print("\n\n----------------------------\nProgram ABORTED Due to Unexpected TAOS Error: \n\n{}\n".format(errMsg) +
                        "----------------------------\n")
1029
                    sys.exit(-1)
1030
        except:
1031
            self.logDebug("[=] Unexpected exception, SQL: {}".format(self._lastSql))
1032
            raise
1033
        self._execStats.endTaskType(self.__class__.__name__, self.isSuccess())
1034
        
1035 1036
        self.logDebug("[X] task execution completed, {}, status: {}".format(self.__class__.__name__, "Success" if self.isSuccess() else "Failure"))        
        self._execStats.incExecCount(self.__class__.__name__, self.isSuccess()) # TODO: merge with above.
S
Steven Li 已提交
1037

1038
    def execSql(self, sql):
1039
        self._lastSql = sql
1040
        return self._dbManager.execute(sql)
1041

1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053
    def execWtSql(self, wt: WorkerThread, sql): # execute an SQL on the worker thread
        self._lastSql = sql
        return wt.execSql(sql)

    def queryWtSql(self, wt: WorkerThread, sql): # execute an SQL on the worker thread
        self._lastSql = sql
        return wt.querySql(sql)

    def getQueryResult(self, wt: WorkerThread): # execute an SQL on the worker thread
        return wt.getQueryResult()


1054
                  
1055
class ExecutionStats:
1056 1057 1058 1059 1060
    def __init__(self):
        self._execTimes: Dict[str, [int, int]] = {} # total/success times for a task
        self._tasksInProgress = 0
        self._lock = threading.Lock()
        self._firstTaskStartTime = None
1061 1062
        self._execStartTime = None
        self._elapsedTime = 0.0 # total elapsed time
1063 1064
        self._accRunTime = 0.0 # accumulated run time

1065 1066 1067 1068 1069 1070 1071 1072 1073
        self._failed = False
        self._failureReason = None

    def startExec(self):
        self._execStartTime = time.time()

    def endExec(self):
        self._elapsedTime = time.time() - self._execStartTime

1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094
    def incExecCount(self, klassName, isSuccess): # TODO: add a lock here
        if klassName not in self._execTimes:
            self._execTimes[klassName] = [0, 0]
        t = self._execTimes[klassName] # tuple for the data
        t[0] += 1 # index 0 has the "total" execution times
        if isSuccess:
            t[1] += 1 # index 1 has the "success" execution times

    def beginTaskType(self, klassName):
        with self._lock:
            if self._tasksInProgress == 0 : # starting a new round
                self._firstTaskStartTime = time.time() # I am now the first task
            self._tasksInProgress += 1

    def endTaskType(self, klassName, isSuccess):
        with self._lock:
            self._tasksInProgress -= 1
            if self._tasksInProgress == 0 : # all tasks have stopped
                self._accRunTime += (time.time() - self._firstTaskStartTime)
                self._firstTaskStartTime = None

1095 1096 1097 1098
    def registerFailure(self, reason):
        self._failed = True
        self._failureReason = reason

1099
    def logStats(self):
1100 1101 1102 1103
        logger.info("----------------------------------------------------------------------")
        logger.info("| Crash_Gen test {}, with the following stats:".
            format("FAILED (reason: {})".format(self._failureReason) if self._failed else "SUCCEEDED"))
        logger.info("| Task Execution Times (success/total):")
1104 1105
        execTimesAny = 0
        for k, n in self._execTimes.items():            
1106
            execTimesAny += n[0]
1107
            logger.info("|    {0:<24}: {1}/{2}".format(k,n[1],n[0]))
1108
                
1109 1110 1111 1112 1113 1114 1115
        logger.info("| Total Tasks Executed (success or not): {} ".format(execTimesAny))
        logger.info("| Total Tasks In Progress at End: {}".format(self._tasksInProgress))
        logger.info("| Total Task Busy Time (elapsed time when any task is in progress): {:.3f} seconds".format(self._accRunTime))
        logger.info("| Average Per-Task Execution Time: {:.3f} seconds".format(self._accRunTime/execTimesAny))
        logger.info("| Total Elapsed Time (from wall clock): {:.3f} seconds".format(self._elapsedTime))
        logger.info("----------------------------------------------------------------------")
        
1116 1117 1118


class StateTransitionTask(Task):
1119 1120 1121 1122 1123
    LARGE_NUMBER_OF_TABLES = 35
    SMALL_NUMBER_OF_TABLES = 3
    LARGE_NUMBER_OF_RECORDS = 50
    SMALL_NUMBER_OF_RECORDS = 3

1124 1125 1126 1127
    @classmethod
    def getInfo(cls): # each sub class should supply their own information
        raise RuntimeError("Overriding method expected")

1128 1129 1130 1131 1132
    _endState = None 
    @classmethod
    def getEndState(cls): # TODO: optimize by calling it fewer times
        raise RuntimeError("Overriding method expected")

1133 1134 1135
    # @classmethod
    # def getBeginStates(cls):
    #     return cls.getInfo()[0]
1136

1137 1138 1139
    # @classmethod
    # def getEndState(cls): # returning the class name
    #     return cls.getInfo()[0]
1140 1141

    @classmethod
1142 1143 1144
    def canBeginFrom(cls, state: AnyState):
        # return state.getValue() in cls.getBeginStates()
        raise RuntimeError("must be overriden")
1145

1146 1147 1148 1149
    @classmethod
    def getRegTableName(cls, i):
        return "db.reg_table_{}".format(i)

1150 1151 1152
    def execute(self, wt: WorkerThread):
        super().execute(wt)
        
1153
class TaskCreateDb(StateTransitionTask):
1154
    @classmethod
1155 1156
    def getEndState(cls):
        return StateDbOnly() 
1157

1158 1159 1160 1161
    @classmethod
    def canBeginFrom(cls, state: AnyState):
        return state.canCreateDb()

1162
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
1163
        self.execWtSql(wt, "create database db")       
1164

1165
class TaskDropDb(StateTransitionTask):
1166
    @classmethod
1167 1168
    def getEndState(cls):
        return StateEmpty()
1169

1170 1171 1172 1173
    @classmethod
    def canBeginFrom(cls, state: AnyState):
        return state.canDropDb()

1174
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
1175
        self.execWtSql(wt, "drop database db")
S
Steven Li 已提交
1176
        logger.debug("[OPS] database dropped at {}".format(time.time()))
1177

1178
class TaskCreateSuperTable(StateTransitionTask):
1179
    @classmethod
1180 1181
    def getEndState(cls):
        return StateSuperTableOnly()
1182

1183 1184
    @classmethod
    def canBeginFrom(cls, state: AnyState):
1185
        return state.canCreateFixedSuperTable()
1186

1187
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
1188 1189 1190 1191 1192 1193 1194
        if not wt.dbInUse(): # no DB yet, to the best of our knowledge
            logger.debug("Skipping task, no DB yet")
            return

        tblName = self._dbManager.getFixedSuperTableName()   
        # wt.execSql("use db")    # should always be in place
        self.execWtSql(wt, "create table db.{} (ts timestamp, speed int) tags (b binary(200), f float) ".format(tblName))
1195 1196
        # No need to create the regular tables, INSERT will do that automatically

S
Steven Li 已提交
1197

1198
class TaskReadData(StateTransitionTask):
1199
    @classmethod
1200 1201
    def getEndState(cls):
        return None # meaning doesn't affect state
1202

1203 1204 1205 1206
    @classmethod
    def canBeginFrom(cls, state: AnyState):
        return state.canReadData()

1207
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
1208
        sTbName = self._dbManager.getFixedSuperTableName()        
1209 1210
        self.queryWtSql(wt, "select TBNAME from db.{}".format(sTbName)) # TODO: analyze result set later

1211
        if random.randrange(5) == 0 : # 1 in 5 chance, simulate a broken connection. TODO: break connection in all situations
1212 1213
            wt.getDbConn().close()
            wt.getDbConn().open()
1214
        else:
1215
            rTables = self.getQueryResult(wt) # wt.getDbConn().getQueryResult()
1216 1217
            # print("rTables[0] = {}, type = {}".format(rTables[0], type(rTables[0])))
            for rTbName in rTables : # regular tables
1218
                self.execWtSql(wt, "select * from db.{}".format(rTbName[0]))
1219

1220 1221
        # tdSql.query(" cars where tbname in ('carzero', 'carone')")

1222
class TaskDropSuperTable(StateTransitionTask):
1223
    @classmethod
1224 1225
    def getEndState(cls):
        return StateDbOnly() 
1226

1227 1228
    @classmethod
    def canBeginFrom(cls, state: AnyState):
1229
        return state.canDropFixedSuperTable()
1230

1231
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
1232 1233 1234 1235 1236
        # 1/2 chance, we'll drop the regular tables one by one, in a randomized sequence
        if Dice.throw(2) == 0 : 
            tblSeq = list(range(2 + (self.LARGE_NUMBER_OF_TABLES if gConfig.larger_data else self.SMALL_NUMBER_OF_TABLES))) 
            random.shuffle(tblSeq) 
            tickOutput = False # if we have spitted out a "d" character for "drop regular table"
1237
            isSuccess = True
1238
            for i in tblSeq: 
1239
                regTableName = self.getRegTableName(i); # "db.reg_table_{}".format(i)                  
1240
                try:
1241
                    self.execWtSql(wt, "drop table {}".format(regTableName)) # nRows always 0, like MySQL
1242 1243
                except taos.error.ProgrammingError as err:
                    errno2 = 0x80000000 + err.errno # positive error number
1244 1245
                    if ( errno2 in [0x362]) : # mnode invalid table name
                        isSuccess = False
1246
                        logger.debug("[DB] Acceptable error when dropping a table")
1247
                    continue # try to delete next regular table
1248 1249 1250

                if (not tickOutput):
                    tickOutput = True # Print only one time
1251
                    if isSuccess :
1252 1253
                        print("d", end="", flush=True)
                    else:
1254
                        print("f", end="", flush=True)                    
1255 1256 1257 1258

        # Drop the super table itself
        tblName = self._dbManager.getFixedSuperTableName()     
        self.execWtSql(wt, "drop table db.{}".format(tblName))
1259

1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272
class TaskAlterTags(StateTransitionTask):
    @classmethod
    def getEndState(cls):
        return None # meaning doesn't affect state

    @classmethod
    def canBeginFrom(cls, state: AnyState):
        return state.canDropFixedSuperTable() # if we can drop it, we can alter tags

    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
        tblName = self._dbManager.getFixedSuperTableName()   
        dice = Dice.throw(4)
        if dice == 0 :
1273
            sql = "alter table db.{} add tag extraTag int".format(tblName)
1274
        elif dice == 1 :
1275
            sql = "alter table db.{} drop tag extraTag".format(tblName)
1276
        elif dice == 2 :
1277
            sql = "alter table db.{} drop tag newTag".format(tblName)
1278
        else: # dice == 3
1279 1280 1281
            sql = "alter table db.{} change tag extraTag newTag".format(tblName)

        self.execWtSql(wt, sql)
1282

1283
class TaskAddData(StateTransitionTask):
1284
    activeTable : Set[int] = set() # Track which table is being actively worked on
1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298

    # We use these two files to record operations to DB, useful for power-off tests
    fAddLogReady = None
    fAddLogDone = None

    @classmethod
    def prepToRecordOps(cls):
        if gConfig.record_ops :            
            if ( cls.fAddLogReady == None ):
                logger.info("Recording in a file operations to be performed...")
                cls.fAddLogReady = open("add_log_ready.txt", "w")
            if ( cls.fAddLogDone == None ):
                logger.info("Recording in a file operations completed...")
                cls.fAddLogDone = open("add_log_done.txt", "w")
1299

1300
    @classmethod
1301 1302
    def getEndState(cls):
        return StateHasData()
1303 1304 1305 1306

    @classmethod
    def canBeginFrom(cls, state: AnyState):
        return state.canAddData()
1307 1308
        
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
1309
        ds = self._dbManager
1310
        # wt.execSql("use db") # TODO: seems to be an INSERT bug to require this
1311
        tblSeq = list(range(self.LARGE_NUMBER_OF_TABLES if gConfig.larger_data else self.SMALL_NUMBER_OF_TABLES)) 
1312 1313 1314 1315 1316 1317 1318 1319
        random.shuffle(tblSeq) 
        for i in tblSeq: 
            if ( i in self.activeTable ): # wow already active
                # logger.info("Concurrent data insertion into table: {}".format(i))      
                # print("ct({})".format(i), end="", flush=True) # Concurrent insertion into table
                print("x", end="", flush=True)
            else:
                self.activeTable.add(i) # marking it active
1320 1321
            # No need to shuffle data sequence, unless later we decide to do non-increment insertion      
            regTableName = self.getRegTableName(i); # "db.reg_table_{}".format(i)      
1322
            for j in range(self.LARGE_NUMBER_OF_RECORDS if gConfig.larger_data else self.SMALL_NUMBER_OF_RECORDS) : # number of records per table
1323
                nextInt = ds.getNextInt()                
1324 1325 1326 1327 1328 1329 1330
                if gConfig.record_ops:
                    self.prepToRecordOps()
                    self.fAddLogReady.write("Ready to write {} to {}\n".format(nextInt, regTableName))
                    self.fAddLogReady.flush()
                    os.fsync(self.fAddLogReady)
                sql = "insert into {} using {} tags ('{}', {}) values ('{}', {});".format(
                    regTableName, 
1331 1332
                    ds.getFixedSuperTableName(), 
                    ds.getNextBinary(), ds.getNextFloat(),
1333
                    ds.getNextTick(), nextInt)
1334
                self.execWtSql(wt, sql) 
1335 1336 1337 1338
                if gConfig.record_ops:
                    self.fAddLogDone.write("Wrote {} to {}\n".format(nextInt, regTableName))
                    self.fAddLogDone.flush()
                    os.fsync(self.fAddLogDone)
1339
            self.activeTable.discard(i) # not raising an error, unlike remove
1340 1341


S
Steven Li 已提交
1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363
# Deterministic random number generator
class Dice():
    seeded = False # static, uninitialized

    @classmethod
    def seed(cls, s): # static
        if (cls.seeded):
            raise RuntimeError("Cannot seed the random generator more than once")
        cls.verifyRNG()
        random.seed(s)
        cls.seeded = True  # TODO: protect against multi-threading

    @classmethod
    def verifyRNG(cls): # Verify that the RNG is determinstic
        random.seed(0)
        x1 = random.randrange(0, 1000)
        x2 = random.randrange(0, 1000)
        x3 = random.randrange(0, 1000)
        if ( x1 != 864 or x2!=394 or x3!=776 ):
            raise RuntimeError("System RNG is not deterministic")

    @classmethod
1364 1365
    def throw(cls, stop): # get 0 to stop-1
        return cls.throwRange(0, stop)
S
Steven Li 已提交
1366 1367

    @classmethod
1368
    def throwRange(cls, start, stop): # up to stop-1
S
Steven Li 已提交
1369 1370
        if ( not cls.seeded ):
            raise RuntimeError("Cannot throw dice before seeding it")
1371
        return random.randrange(start, stop)
S
Steven Li 已提交
1372 1373 1374


# Anyone needing to carry out work should simply come here
1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396
# class WorkDispatcher():
#     def __init__(self, dbState):
#         # self.totalNumMethods = 2
#         self.tasks = [
#             # CreateTableTask(dbState), # Obsolete
#             # DropTableTask(dbState),
#             # AddDataTask(dbState),
#         ]

#     def throwDice(self):
#         max = len(self.tasks) - 1 
#         dRes = random.randint(0, max)
#         # logger.debug("Threw the dice in range [{},{}], and got: {}".format(0,max,dRes))
#         return dRes

#     def pickTask(self):
#         dice = self.throwDice()
#         return self.tasks[dice]

#     def doWork(self, workerThread):
#         task = self.pickTask()
#         task.execute(workerThread)
S
Steven Li 已提交
1397

S
Steven Li 已提交
1398 1399
class LoggingFilter(logging.Filter):
    def filter(self, record: logging.LogRecord):
S
Steven Li 已提交
1400 1401 1402
        if ( record.levelno >= logging.INFO ) :
            return True # info or above always log

1403 1404

        
S
Steven Li 已提交
1405 1406
        # print("type = {}, value={}".format(type(msg), msg))
        # sys.exit()
S
Steven Li 已提交
1407 1408 1409 1410 1411

        # Commenting out below to adjust...

        # if msg.startswith("[TRD]"):
        #     return False
S
Steven Li 已提交
1412 1413
        return True

1414 1415 1416 1417 1418
class MyLoggingAdapter(logging.LoggerAdapter):    
    def process(self, msg, kwargs):
        return "[{}]{}".format(threading.get_ident() % 10000, msg), kwargs
        # return '[%s] %s' % (self.extra['connid'], msg), kwargs

1419 1420 1421 1422 1423 1424 1425 1426 1427
class MainExec:
    @classmethod
    def runClient(cls):
        # resetDb = False # DEBUG only
        # dbState = DbState(resetDb)  # DBEUG only!
        dbManager = DbManager() # Regular function
        Dice.seed(0) # initial seeding of dice
        thPool = ThreadPool(gConfig.num_threads, gConfig.max_steps)
        tc = ThreadCoordinator(thPool, dbManager)
S
Steven Li 已提交
1428
        
1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479
        tc.run()
        tc.logStats()
        dbManager.cleanUp()    

    @classmethod
    def runService(cls):
        print("Running service...")

    @classmethod
    def runTemp(cls): # for debugging purposes
        # # Hack to exercise reading from disk, imcreasing coverage. TODO: fix
        # dbc = dbState.getDbConn()
        # sTbName = dbState.getFixedSuperTableName()   
        # dbc.execute("create database if not exists db")
        # if not dbState.getState().equals(StateEmpty()):
        #     dbc.execute("use db")     

        # rTables = None
        # try: # the super table may not exist
        #     sql = "select TBNAME from db.{}".format(sTbName)
        #     logger.info("Finding out tables in super table: {}".format(sql))
        #     dbc.query(sql) # TODO: analyze result set later
        #     logger.info("Fetching result")
        #     rTables = dbc.getQueryResult()
        #     logger.info("Result: {}".format(rTables))
        # except taos.error.ProgrammingError as err:
        #     logger.info("Initial Super table OPS error: {}".format(err))
        
        # # sys.exit()
        # if ( not rTables == None):
        #     # print("rTables[0] = {}, type = {}".format(rTables[0], type(rTables[0])))
        #     try:
        #         for rTbName in rTables : # regular tables
        #             ds = dbState
        #             logger.info("Inserting into table: {}".format(rTbName[0]))
        #             sql = "insert into db.{} values ('{}', {});".format(
        #                 rTbName[0],                    
        #                 ds.getNextTick(), ds.getNextInt())
        #             dbc.execute(sql)
        #         for rTbName in rTables : # regular tables        
        #             dbc.query("select * from db.{}".format(rTbName[0])) # TODO: check success failure
        #         logger.info("Initial READING operation is successful")       
        #     except taos.error.ProgrammingError as err:
        #         logger.info("Initial WRITE/READ error: {}".format(err))   
        
        # Sandbox testing code
        # dbc = dbState.getDbConn()
        # while True:
        #     rows = dbc.query("show databases") 
        #     print("Rows: {}, time={}".format(rows, time.time()))
        return 
S
Steven Li 已提交
1480

1481
def main():
1482
    # Super cool Python argument library: https://docs.python.org/3/library/argparse.html
1483 1484 1485 1486 1487 1488 1489 1490 1491
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description=textwrap.dedent('''\
            TDengine Auto Crash Generator (PLEASE NOTICE the Prerequisites Below)
            ---------------------------------------------------------------------
            1. You build TDengine in the top level ./build directory, as described in offical docs
            2. You run the server there before this script: ./build/bin/taosd -c test/cfg

            '''))
1492

1493 1494
    parser.add_argument('-d', '--debug', action='store_true',                        
                        help='Turn on DEBUG mode for more logging (default: false)')
1495 1496
    parser.add_argument('-e', '--run-tdengine', action='store_true',                        
                        help='Run TDengine service in foreground (default: false)')
1497 1498
    parser.add_argument('-l', '--larger-data', action='store_true',                        
                        help='Write larger amount of data during write operations (default: false)')
1499
    parser.add_argument('-p', '--per-thread-db-connection', action='store_true',                        
1500
                        help='Use a single shared db connection (default: false)')
1501 1502
    parser.add_argument('-r', '--record-ops', action='store_true',                        
                        help='Use a pair of always-fsynced fils to record operations performing + performed, for power-off tests (default: false)')                    
1503
    parser.add_argument('-s', '--max-steps', action='store', default=1000, type=int,
1504
                        help='Maximum number of steps to run (default: 100)')
1505
    parser.add_argument('-t', '--num-threads', action='store', default=5, type=int,
1506
                        help='Number of threads to run (default: 10)')
1507

1508
    global gConfig
1509
    gConfig = parser.parse_args()
1510

1511 1512 1513
    # if len(sys.argv) == 1:
    #     parser.print_help()
    #     sys.exit()
1514
 
1515
    # Logging Stuff
1516
    global logger
1517 1518 1519 1520 1521 1522 1523
    _logger = logging.getLogger('CrashGen') # real logger
    _logger.addFilter(LoggingFilter()) 
    ch = logging.StreamHandler()
    _logger.addHandler(ch)

    logger = MyLoggingAdapter(_logger, []) # Logging adapter, to be used as a logger

1524 1525
    if ( gConfig.debug ):
        logger.setLevel(logging.DEBUG) # default seems to be INFO        
S
Steven Li 已提交
1526 1527
    else:
        logger.setLevel(logging.INFO)
1528 1529
    
    # Run server or client
1530 1531 1532 1533
    if gConfig.run_tdengine : # run server
        MainExec.runService()
    else :
        MainExec.runClient()
1534

1535
    
S
Steven Li 已提交
1536
    # logger.info("Crash_Gen execution finished")
1537 1538 1539

if __name__ == "__main__":
    main()