crash_gen.py 55.9 KB
Newer Older
1
#!/usr/bin/python3.7
S
Steven Li 已提交
2 3 4 5 6 7 8 9 10 11 12 13
###################################################################
#           Copyright (c) 2016 by TAOS Technologies, Inc.
#                     All rights reserved.
#
#  This file is proprietary and confidential to TAOS Technologies.
#  No part of this file may be reproduced, stored, transmitted,
#  disclosed or used in any form or by any means other than as
#  expressly provided by the written permission from Jianhui Tao
#
###################################################################

# -*- coding: utf-8 -*-
14 15
from __future__ import annotations  # For type hinting before definition, ref: https://stackoverflow.com/questions/33533148/how-do-i-specify-that-the-return-type-of-a-method-is-the-same-as-the-class-itsel    

S
Steven Li 已提交
16
import sys
17
import traceback
18 19 20 21
# Require Python 3
if sys.version_info[0] < 3:
    raise Exception("Must be using Python 3")

S
Steven Li 已提交
22
import getopt
23
import argparse
24
import copy
S
Steven Li 已提交
25 26 27

import threading
import random
28
import time
S
Steven Li 已提交
29
import logging
30
import datetime
31
import textwrap
S
Steven Li 已提交
32

33
from typing import List
34
from typing import Dict
35
from typing import Set
36

S
Steven Li 已提交
37 38 39 40 41
from util.log import *
from util.dnodes import *
from util.cases import *
from util.sql import *

42
import crash_gen
S
Steven Li 已提交
43 44
import taos

45
# Global variables, tried to keep a small number. 
46 47 48 49

# Command-line/Environment Configurations, will set a bit later
# ConfigNameSpace = argparse.Namespace
gConfig = argparse.Namespace() # Dummy value, will be replaced later
50
logger = None
S
Steven Li 已提交
51

52 53
def runThread(wt: WorkerThread):    
    wt.run()
54

55 56 57 58 59 60 61 62
class CrashGenError(Exception):
    def __init__(self, msg=None, errno=None):
        self.msg = msg    
        self.errno = errno
    
    def __str__(self):
        return self.msg

S
Steven Li 已提交
63
class WorkerThread:
64
    def __init__(self, pool: ThreadPool, tid, 
65 66 67 68
            tc: ThreadCoordinator,
            # te: TaskExecutor,
            ): # note: main thread context!
        # self._curStep = -1 
69
        self._pool = pool
70
        self._tid = tid        
71
        self._tc = tc
S
Steven Li 已提交
72
        # self.threadIdent = threading.get_ident()
73 74
        self._thread = threading.Thread(target=runThread, args=(self,))
        self._stepGate = threading.Event()
S
Steven Li 已提交
75

76
        # Let us have a DB connection of our own
77 78 79
        if ( gConfig.per_thread_db_connection ): # type: ignore
            self._dbConn = DbConn()   

80
    def logDebug(self, msg):
S
Steven Li 已提交
81
        logger.debug("    TRD[{}] {}".format(self._tid, msg))
82 83

    def logInfo(self, msg):
S
Steven Li 已提交
84
        logger.info("    TRD[{}] {}".format(self._tid, msg))
85 86

   
87 88
    def getTaskExecutor(self):
        return self._tc.getTaskExecutor()     
89

S
Steven Li 已提交
90
    def start(self):
91
        self._thread.start()  # AFTER the thread is recorded
S
Steven Li 已提交
92

93
    def run(self): 
S
Steven Li 已提交
94
        # initialization after thread starts, in the thread context
95
        # self.isSleeping = False
96 97
        logger.info("Starting to run thread: {}".format(self._tid))

98
        if ( gConfig.per_thread_db_connection ): # type: ignore
99
            self._dbConn.open()
S
Steven Li 已提交
100

101 102
        self._doTaskLoop()       
        
103
        # clean up
104
        if ( gConfig.per_thread_db_connection ): # type: ignore 
105
            self._dbConn.close()
106

107 108 109
    def _doTaskLoop(self) :
        # while self._curStep < self._pool.maxSteps:
        # tc = ThreadCoordinator(None)
110 111 112
        while True:  
            tc = self._tc # Thread Coordinator, the overall master            
            tc.crossStepBarrier()  # shared barrier first, INCLUDING the last one
S
Steven Li 已提交
113
            logger.debug("[TRD] Worker thread [{}] exited barrier...".format(self._tid))
114
            self.crossStepGate()   # then per-thread gate, after being tapped
S
Steven Li 已提交
115
            logger.debug("[TRD] Worker thread [{}] exited step gate...".format(self._tid))
116
            if not self._tc.isRunning():
S
Steven Li 已提交
117
                logger.debug("[TRD] Thread Coordinator not running any more, worker thread now stopping...")
118 119
                break

S
Steven Li 已提交
120
            logger.debug("[TRD] Worker thread [{}] about to fetch task".format(self._tid))
121
            task = tc.fetchTask()
S
Steven Li 已提交
122
            logger.debug("[TRD] Worker thread [{}] about to execute task: {}".format(self._tid, task.__class__.__name__))
123
            task.execute(self)
124
            tc.saveExecutedTask(task)
S
Steven Li 已提交
125
            logger.debug("[TRD] Worker thread [{}] finished executing task".format(self._tid))
126
  
S
Steven Li 已提交
127
    def verifyThreadSelf(self): # ensure we are called by this own thread
128
        if ( threading.get_ident() != self._thread.ident ): 
S
Steven Li 已提交
129 130 131 132 133 134 135
            raise RuntimeError("Unexpectly called from other threads")

    def verifyThreadMain(self): # ensure we are called by the main thread
        if ( threading.get_ident() != threading.main_thread().ident ): 
            raise RuntimeError("Unexpectly called from other threads")

    def verifyThreadAlive(self):
136
        if ( not self._thread.is_alive() ):
S
Steven Li 已提交
137 138
            raise RuntimeError("Unexpected dead thread")

139
    # A gate is different from a barrier in that a thread needs to be "tapped"
S
Steven Li 已提交
140 141 142 143
    def crossStepGate(self):
        self.verifyThreadAlive()
        self.verifyThreadSelf() # only allowed by ourselves
        
144
        # Wait again at the "gate", waiting to be "tapped"
S
Steven Li 已提交
145
        logger.debug("[TRD] Worker thread {} about to cross the step gate".format(self._tid))
146 147
        self._stepGate.wait() 
        self._stepGate.clear()
S
Steven Li 已提交
148
        
149
        # self._curStep += 1  # off to a new step...
S
Steven Li 已提交
150 151 152 153

    def tapStepGate(self): # give it a tap, release the thread waiting there
        self.verifyThreadAlive()
        self.verifyThreadMain() # only allowed for main thread
154
 
S
Steven Li 已提交
155
        logger.debug("[TRD] Tapping worker thread {}".format(self._tid))
156 157
        self._stepGate.set() # wake up!        
        time.sleep(0) # let the released thread run a bit
158

159
    def execSql(self, sql): # TODO: expose DbConn directly
160 161 162
        if ( gConfig.per_thread_db_connection ):
            return self._dbConn.execute(sql)            
        else:
163
            return self._tc.getDbState().getDbConn().execute(sql)
164

165 166 167 168 169 170
    def getDbConn(self):
        if ( gConfig.per_thread_db_connection ):
            return self._dbConn     
        else:
            return self._tc.getDbState().getDbConn()

171 172 173 174 175
    # def querySql(self, sql): # not "execute", since we are out side the DB context
    #     if ( gConfig.per_thread_db_connection ):
    #         return self._dbConn.query(sql)            
    #     else:
    #         return self._tc.getDbState().getDbConn().query(sql)
176

177
class ThreadCoordinator:
178
    def __init__(self, pool, dbState):
179 180
        self._curStep = -1 # first step is 0
        self._pool = pool
181
        # self._wd = wd
182
        self._te = None # prepare for every new step
183 184 185
        self._dbState = dbState
        self._executedTasks: List[Task] = [] # in a given step
        self._lock = threading.RLock() # sync access for a few things
S
Steven Li 已提交
186

187
        self._stepBarrier = threading.Barrier(self._pool.numThreads + 1) # one barrier for all threads
188
        self._execStats = ExecutionStats()
S
Steven Li 已提交
189

190 191 192
    def getTaskExecutor(self):
        return self._te

193 194 195
    def getDbState(self) -> DbState :
        return self._dbState

196 197 198
    def crossStepBarrier(self):
        self._stepBarrier.wait()

199 200
    def run(self):              
        self._pool.createAndStartThreads(self)
S
Steven Li 已提交
201 202

        # Coordinate all threads step by step
203 204
        self._curStep = -1 # not started yet
        maxSteps = gConfig.max_steps # type: ignore
205 206 207
        self._execStats.startExec() # start the stop watch
        failed = False
        while(self._curStep < maxSteps-1 and not failed):  # maxStep==10, last curStep should be 9
S
Steven Li 已提交
208 209 210
            if not gConfig.debug: 
                print(".", end="", flush=True) # print this only if we are not in debug mode
            logger.debug("[TRD] Main thread going to sleep")
211

212
            # Now ready to enter a step
213 214 215 216
            self.crossStepBarrier() # let other threads go past the pool barrier, but wait at the thread gate
            self._stepBarrier.reset() # Other worker threads should now be at the "gate"            

            # At this point, all threads should be pass the overall "barrier" and before the per-thread "gate"
217 218 219 220 221 222 223 224 225 226 227 228 229 230 231
            try:
                self._dbState.transition(self._executedTasks) # at end of step, transiton the DB state
            except taos.error.ProgrammingError as err:
                if ( err.msg == 'network unavailable' ): # broken DB connection
                    logger.info("DB connection broken, execution failed")
                    traceback.print_stack()
                    failed = True
                    self._te = None # Not running any more
                    self._execStats.registerFailure("Broken DB Connection")
                    # continue # don't do that, need to tap all threads at end, and maybe signal them to stop
                else:
                    raise 
            finally:
                pass
            
232
            self.resetExecutedTasks() # clear the tasks after we are done
233 234

            # Get ready for next step
S
Steven Li 已提交
235
            logger.debug("<-- Step {} finished".format(self._curStep))
236 237
            self._curStep += 1 # we are about to get into next step. TODO: race condition here!                
            logger.debug("\r\n--> Step {} starts with main thread waking up".format(self._curStep)) # Now not all threads had time to go to sleep
238

239
            # A new TE for the new step
240 241
            if not failed: # only if not failed
                self._te = TaskExecutor(self._curStep)
242

S
Steven Li 已提交
243
            logger.debug("[TRD] Main thread waking up at step {}, tapping worker threads".format(self._curStep)) # Now not all threads had time to go to sleep            
S
Steven Li 已提交
244 245
            self.tapAllThreads()

246
        logger.debug("Main thread ready to finish up...")
247 248 249 250 251 252 253 254
        if not failed: # only in regular situations
            self.crossStepBarrier() # Cross it one last time, after all threads finish
            self._stepBarrier.reset()
            logger.debug("Main thread in exclusive zone...")
            self._te = None # No more executor, time to end
            logger.debug("Main thread tapping all threads one last time...")
            self.tapAllThreads() # Let the threads run one last time

255 256
        logger.debug("Main thread joining all threads")
        self._pool.joinAll() # Get all threads to finish
S
Steven Li 已提交
257
        logger.info("All worker thread finished")
258 259 260
        self._execStats.endExec()

    def logStats(self):
261
        self._execStats.logStats()
S
Steven Li 已提交
262 263 264

    def tapAllThreads(self): # in a deterministic manner
        wakeSeq = []
265
        for i in range(self._pool.numThreads): # generate a random sequence
S
Steven Li 已提交
266 267 268 269
            if Dice.throw(2) == 1 :
                wakeSeq.append(i)
            else:
                wakeSeq.insert(0, i)
S
Steven Li 已提交
270
        logger.debug("[TRD] Main thread waking up worker thread: {}".format(str(wakeSeq)))
271
        # TODO: set dice seed to a deterministic value
S
Steven Li 已提交
272
        for i in wakeSeq:
273
            self._pool.threadList[i].tapStepGate() # TODO: maybe a bit too deep?!
S
Steven Li 已提交
274 275
            time.sleep(0) # yield

276 277 278 279 280 281
    def isRunning(self):
        return self._te != None

    def fetchTask(self) -> Task :
        if ( not self.isRunning() ): # no task
            raise RuntimeError("Cannot fetch task when not running")
282 283
        # return self._wd.pickTask()
        # Alternatively, let's ask the DbState for the appropriate task
284 285 286 287 288 289 290 291 292
        # dbState = self.getDbState()
        # tasks = dbState.getTasksAtState() # TODO: create every time?
        # nTasks = len(tasks)
        # i = Dice.throw(nTasks)
        # logger.debug(" (dice:{}/{}) ".format(i, nTasks))
        # # return copy.copy(tasks[i]) # Needs a fresh copy, to save execution results, etc.
        # return tasks[i].clone() # TODO: still necessary?
        taskType = self.getDbState().pickTaskType() # pick a task type for current state
        return taskType(self.getDbState(), self._execStats) # create a task from it
293 294 295

    def resetExecutedTasks(self):
        self._executedTasks = [] # should be under single thread
296 297 298 299

    def saveExecutedTask(self, task):
        with self._lock:
            self._executedTasks.append(task)
300 301

# We define a class to run a number of threads in locking steps.
302
class ThreadPool:
303 304 305 306 307
    def __init__(self, dbState, numThreads, maxSteps, funcSequencer):
        self.numThreads = numThreads
        self.maxSteps = maxSteps
        self.funcSequencer = funcSequencer
        # Internal class variables
308
        # self.dispatcher = WorkDispatcher(dbState) # Obsolete?
309 310 311
        self.curStep = 0
        self.threadList = []
        # self.stepGate = threading.Condition() # Gate to hold/sync all threads
312
        # self.numWaitingThreads = 0    
313 314
        
    # starting to run all the threads, in locking steps
315
    def createAndStartThreads(self, tc: ThreadCoordinator):
316
        for tid in range(0, self.numThreads): # Create the threads
317
            workerThread = WorkerThread(self, tid, tc)            
318 319 320 321 322 323 324 325
            self.threadList.append(workerThread)
            workerThread.start() # start, but should block immediately before step 0

    def joinAll(self):
        for workerThread in self.threadList:
            logger.debug("Joining thread...")
            workerThread._thread.join()

S
Steven Li 已提交
326 327 328
# A queue of continguous POSITIVE integers
class LinearQueue():
    def __init__(self):
329
        self.firstIndex = 1  # 1st ever element
S
Steven Li 已提交
330
        self.lastIndex = 0
331
        self._lock = threading.RLock() # our functions may call each other
332
        self.inUse = set() # the indexes that are in use right now
S
Steven Li 已提交
333

334 335 336 337 338 339 340 341 342
    def toText(self):
        return "[{}..{}], in use: {}".format(self.firstIndex, self.lastIndex, self.inUse)

    # Push (add new element, largest) to the tail, and mark it in use
    def push(self): 
        with self._lock:
            # if ( self.isEmpty() ): 
            #     self.lastIndex = self.firstIndex 
            #     return self.firstIndex
343 344
            # Otherwise we have something
            self.lastIndex += 1
345 346
            self.allocate(self.lastIndex)
            # self.inUse.add(self.lastIndex) # mark it in use immediately
347
            return self.lastIndex
S
Steven Li 已提交
348 349

    def pop(self):
350
        with self._lock:
351
            if ( self.isEmpty() ): 
352 353 354
                # raise RuntimeError("Cannot pop an empty queue") 
                return False # TODO: None?
            
355
            index = self.firstIndex
356
            if ( index in self.inUse ):
357 358
                return False

359 360 361 362 363 364 365
            self.firstIndex += 1
            return index

    def isEmpty(self):
        return self.firstIndex > self.lastIndex

    def popIfNotEmpty(self):
366
        with self._lock:
367 368 369 370
            if (self.isEmpty()):
                return 0
            return self.pop()

S
Steven Li 已提交
371
    def allocate(self, i):
372
        with self._lock:
373
            # logger.debug("LQ allocating item {}".format(i))
374 375 376 377
            if ( i in self.inUse ):
                raise RuntimeError("Cannot re-use same index in queue: {}".format(i))
            self.inUse.add(i)

S
Steven Li 已提交
378
    def release(self, i):
379
        with self._lock:
380 381
            # logger.debug("LQ releasing item {}".format(i))
            self.inUse.remove(i) # KeyError possible, TODO: why?
382 383 384 385

    def size(self):
        return self.lastIndex + 1 - self.firstIndex

S
Steven Li 已提交
386
    def pickAndAllocate(self):
387 388 389
        if ( self.isEmpty() ):
            return None
        with self._lock:
390 391 392 393
            cnt = 0 # counting the interations
            while True:
                cnt += 1
                if ( cnt > self.size()*10 ): # 10x iteration already
394 395
                    # raise RuntimeError("Failed to allocate LinearQueue element")
                    return None
396 397
                ret = Dice.throwRange(self.firstIndex, self.lastIndex+1)
                if ( not ret in self.inUse ):
398 399 400 401 402
                    self.allocate(ret)
                    return ret

class DbConn:
    def __init__(self):
403 404
        self._conn = None 
        self._cursor = None
405 406 407 408 409 410 411
        self.isOpen = False
        
    def open(self): # Open connection
        if ( self.isOpen ):
            raise RuntimeError("Cannot re-open an existing DB connection")

        cfgPath = "../../build/test/cfg" 
412 413
        self._conn = taos.connect(host="127.0.0.1", config=cfgPath) # TODO: make configurable
        self._cursor = self._conn.cursor()
414

415 416
        # Get the connection/cursor ready
        self._cursor.execute('reset query cache')
417
        # self._cursor.execute('use db') # note we do this in _findCurrenState
418 419

        # Open connection
420
        self._tdSql = TDSql()
421
        self._tdSql.init(self._cursor)
422 423 424 425 426
        self.isOpen = True

    def resetDb(self): # reset the whole database, etc.
        if ( not self.isOpen ):
            raise RuntimeError("Cannot reset database until connection is open")
427 428 429
        # self._tdSql.prepare() # Recreate database, etc.

        self._cursor.execute('drop database if exists db')
430 431
        logger.debug("Resetting DB, dropped database")
        # self._cursor.execute('create database db')
432 433
        # self._cursor.execute('use db')

434 435 436 437 438 439 440
        # tdSql.execute('show databases')

    def close(self):
        if ( not self.isOpen ):
            raise RuntimeError("Cannot clean up database until connection is open")
        self._tdSql.close()
        self.isOpen = False
S
Steven Li 已提交
441

442
    def execute(self, sql): 
443
        if ( not self.isOpen ):
444
            raise RuntimeError("Cannot execute database commands until connection is open")
445 446 447 448
        logger.debug("[SQL] Executing SQL: {}".format(sql))
        nRows = self._tdSql.execute(sql)
        logger.debug("[SQL] Execution Result, nRows = {}, SQL = {}".format(nRows, sql))
        return nRows
S
Steven Li 已提交
449

450
    def query(self, sql) :  # return rows affected
451 452
        if ( not self.isOpen ):
            raise RuntimeError("Cannot query database until connection is open")
453 454 455 456
        logger.debug("[SQL] Executing SQL: {}".format(sql))
        nRows = self._tdSql.query(sql)
        logger.debug("[SQL] Execution Result, nRows = {}, SQL = {}".format(nRows, sql))
        return nRows
457
        # results are in: return self._tdSql.queryResult
458

459 460 461
    def getQueryResult(self):
        return self._tdSql.queryResult

462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479
    def _queryAny(self, sql) : # actual query result as an int
        if ( not self.isOpen ):
            raise RuntimeError("Cannot query database until connection is open")
        tSql = self._tdSql
        nRows = tSql.query(sql)
        if nRows != 1 :
            raise RuntimeError("Unexpected result for query: {}, rows = {}".format(sql, nRows))
        if tSql.queryRows != 1 or tSql.queryCols != 1:
            raise RuntimeError("Unexpected result set for query: {}".format(sql))
        return tSql.queryResult[0][0]

    def queryScalar(self, sql) -> int :
        return self._queryAny(sql)

    def queryString(self, sql) -> str :
        return self._queryAny(sql)
    
class AnyState:
480
    STATE_INVALID    = -1
481 482 483 484
    STATE_EMPTY      = 0  # nothing there, no even a DB
    STATE_DB_ONLY    = 1  # we have a DB, but nothing else
    STATE_TABLE_ONLY = 2  # we have a table, but totally empty
    STATE_HAS_DATA   = 3  # we have some data in the table
485 486 487 488 489
    _stateNames = ["Invalid", "Empty", "DB_Only", "Table_Only", "Has_Data"]

    STATE_VAL_IDX = 0
    CAN_CREATE_DB = 1
    CAN_DROP_DB = 2
490 491
    CAN_CREATE_FIXED_SUPER_TABLE = 3
    CAN_DROP_FIXED_SUPER_TABLE = 4
492 493 494 495 496 497 498
    CAN_ADD_DATA = 5
    CAN_READ_DATA = 6

    def __init__(self):
        self._info = self.getInfo()

    def __str__(self):
S
Steven Li 已提交
499
        return self._stateNames[self._info[self.STATE_VAL_IDX] + 1] # -1 hack to accomodate the STATE_INVALID case
500 501 502 503

    def getInfo(self):
        raise RuntimeError("Must be overriden by child classes")

S
Steven Li 已提交
504 505 506 507 508 509 510 511
    def equals(self, other):
        if isinstance(other, int):
            return self.getValIndex() == other
        elif isinstance(other, AnyState):
            return self.getValIndex() == other.getValIndex()
        else:
            raise RuntimeError("Unexpected comparison, type = {}".format(type(other)))

512 513 514
    def verifyTasksToState(self, tasks, newState):
        raise RuntimeError("Must be overriden by child classes")

S
Steven Li 已提交
515 516 517
    def getValIndex(self):
        return self._info[self.STATE_VAL_IDX]

518 519 520 521 522 523
    def getValue(self):
        return self._info[self.STATE_VAL_IDX]
    def canCreateDb(self):
        return self._info[self.CAN_CREATE_DB]
    def canDropDb(self):
        return self._info[self.CAN_DROP_DB]
524 525 526 527
    def canCreateFixedSuperTable(self):
        return self._info[self.CAN_CREATE_FIXED_SUPER_TABLE]
    def canDropFixedSuperTable(self):
        return self._info[self.CAN_DROP_FIXED_SUPER_TABLE]
528 529 530 531 532 533 534 535 536 537 538
    def canAddData(self):
        return self._info[self.CAN_ADD_DATA]
    def canReadData(self):
        return self._info[self.CAN_READ_DATA]

    def assertAtMostOneSuccess(self, tasks, cls):
        sCnt = 0
        for task in tasks :
            if not isinstance(task, cls):
                continue
            if task.isSuccess():
S
Steven Li 已提交
539
                # task.logDebug("Task success found")
540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574
                sCnt += 1
                if ( sCnt >= 2 ):
                    raise RuntimeError("Unexpected more than 1 success with task: {}".format(cls))

    def assertIfExistThenSuccess(self, tasks, cls):
        sCnt = 0
        exists = False
        for task in tasks :
            if not isinstance(task, cls):
                continue
            exists = True # we have a valid instance
            if task.isSuccess():
                sCnt += 1
        if ( exists and sCnt <= 0 ):
            raise RuntimeError("Unexpected zero success for task: {}".format(cls))

    def assertNoTask(self, tasks, cls):
        for task in tasks :
            if isinstance(task, cls):
                raise CrashGenError("This task: {}, is not expected to be present, given the success/failure of others".format(cls.__name__))

    def assertNoSuccess(self, tasks, cls):
        for task in tasks :
            if isinstance(task, cls):
                if task.isSuccess():
                    raise RuntimeError("Unexpected successful task: {}".format(cls))

    def hasSuccess(self, tasks, cls):
        for task in tasks :
            if not isinstance(task, cls):
                continue
            if task.isSuccess():
                return True
        return False

S
Steven Li 已提交
575 576 577 578 579 580
    def hasTask(self, tasks, cls):
        for task in tasks :
            if isinstance(task, cls):
                return True
        return False

581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600
class StateInvalid(AnyState):
    def getInfo(self):
        return [
            self.STATE_INVALID,
            False, False, # can create/drop Db
            False, False, # can create/drop fixed table
            False, False, # can insert/read data with fixed table
        ]

    # def verifyTasksToState(self, tasks, newState):

class StateEmpty(AnyState):
    def getInfo(self):
        return [
            self.STATE_EMPTY,
            True, False, # can create/drop Db
            False, False, # can create/drop fixed table
            False, False, # can insert/read data with fixed table
        ]

S
Steven Li 已提交
601 602 603 604
    def verifyTasksToState(self, tasks, newState): 
        if ( self.hasSuccess(tasks, CreateDbTask) ): # at EMPTY, if there's succes in creating DB
            if ( not self.hasTask(tasks, DropDbTask) ) : # and no drop_db tasks
                self.assertAtMostOneSuccess(tasks, CreateDbTask) # we must have at most one. TODO: compare numbers
605 606 607 608 609 610 611 612 613 614 615

class StateDbOnly(AnyState):
    def getInfo(self):
        return [
            self.STATE_DB_ONLY,
            False, True,
            True, False,
            False, False,
        ]

    def verifyTasksToState(self, tasks, newState):
616 617
        if ( not self.hasTask(tasks, CreateDbTask) ):
            self.assertAtMostOneSuccess(tasks, DropDbTask) # only if we don't create any more
618
        self.assertIfExistThenSuccess(tasks, DropDbTask)
S
Steven Li 已提交
619
        # self.assertAtMostOneSuccess(tasks, CreateFixedTableTask) # not true in massively parrallel cases
620
        # Nothing to be said about adding data task
621
        # if ( self.hasSuccess(tasks, DropDbTask) ): # dropped the DB
622
            # self.assertHasTask(tasks, DropDbTask) # implied by hasSuccess
623
            # self.assertAtMostOneSuccess(tasks, DropDbTask)
624
            # self._state = self.STATE_EMPTY
625
        if ( self.hasSuccess(tasks, CreateFixedSuperTableTask) ): # did not drop db, create table success
626
            # self.assertHasTask(tasks, CreateFixedTableTask) # tried to create table
627 628
            if ( not self.hasTask(tasks, DropFixedSuperTableTask) ): 
                self.assertAtMostOneSuccess(tasks, CreateFixedSuperTableTask) # at most 1 attempt is successful, if we don't drop anything
629
            # self.assertNoTask(tasks, DropDbTask) # should have have tried
630 631 632 633 634 635 636 637 638 639 640 641
            # if ( not self.hasSuccess(tasks, AddFixedDataTask) ): # just created table, no data yet
            #     # can't say there's add-data attempts, since they may all fail
            #     self._state = self.STATE_TABLE_ONLY
            # else:                    
            #     self._state = self.STATE_HAS_DATA
        # What about AddFixedData?
        # elif ( self.hasSuccess(tasks, AddFixedDataTask) ):
        #     self._state = self.STATE_HAS_DATA
        # else: # no success in dropping db tasks, no success in create fixed table? read data should also fail
        #     # raise RuntimeError("Unexpected no-success scenario")   # We might just landed all failure tasks, 
        #     self._state = self.STATE_DB_ONLY  # no change

642
class StateSuperTableOnly(AnyState):
643 644 645 646 647 648 649 650 651
    def getInfo(self):
        return [
            self.STATE_TABLE_ONLY,
            False, True,
            False, True,
            True, True,
        ]

    def verifyTasksToState(self, tasks, newState):
652 653
        if ( self.hasSuccess(tasks, DropFixedSuperTableTask) ): # we are able to drop the table
            self.assertAtMostOneSuccess(tasks, DropFixedSuperTableTask)
654
            # self._state = self.STATE_DB_ONLY
S
Steven Li 已提交
655 656
        # elif ( self.hasSuccess(tasks, AddFixedDataTask) ): # no success dropping the table, but added data
        #     self.assertNoTask(tasks, DropFixedTableTask) # not true in massively parrallel cases
657
            # self._state = self.STATE_HAS_DATA
S
Steven Li 已提交
658 659 660
        # elif ( self.hasSuccess(tasks, ReadFixedDataTask) ): # no success in prev cases, but was able to read data
            # self.assertNoTask(tasks, DropFixedTableTask)
            # self.assertNoTask(tasks, AddFixedDataTask)
661
            # self._state = self.STATE_TABLE_ONLY # no change
S
Steven Li 已提交
662 663 664
        # else: # did not drop table, did not insert data, did not read successfully, that is impossible
        #     raise RuntimeError("Unexpected no-success scenarios")
        # TODO: need to revamp!!
665 666 667 668 669 670 671 672 673 674 675

class StateHasData(AnyState):
    def getInfo(self):
        return [
            self.STATE_HAS_DATA,
            False, True,
            False, True,
            True, True,
        ]

    def verifyTasksToState(self, tasks, newState):
S
Steven Li 已提交
676 677
        if ( newState.equals(AnyState.STATE_EMPTY) ):
            self.hasSuccess(tasks, DropDbTask)
S
Steven Li 已提交
678 679
            if ( not self.hasTask(tasks, CreateDbTask) ) : 
                self.assertAtMostOneSuccess(tasks, DropDbTask) # TODO: dicy
S
Steven Li 已提交
680 681 682
        elif ( newState.equals(AnyState.STATE_DB_ONLY) ): # in DB only
            if ( not self.hasTask(tasks, CreateDbTask)): # without a create_db task
                self.assertNoTask(tasks, DropDbTask) # we must have drop_db task
683
            self.hasSuccess(tasks, DropFixedSuperTableTask)
684
            # self.assertAtMostOneSuccess(tasks, DropFixedSuperTableTask) # TODO: dicy
S
Steven Li 已提交
685 686
        elif ( newState.equals(AnyState.STATE_TABLE_ONLY) ): # data deleted
            self.assertNoTask(tasks, DropDbTask)
687
            self.assertNoTask(tasks, DropFixedSuperTableTask)
S
Steven Li 已提交
688 689
            self.assertNoTask(tasks, AddFixedDataTask)
            # self.hasSuccess(tasks, DeleteDataTasks)
690
        else: # should be STATE_HAS_DATA
S
Steven Li 已提交
691
            self.assertNoTask(tasks, DropDbTask)
692
            if (not self.hasTask(tasks, CreateFixedSuperTableTask)) :  # if we didn't create the table
693 694
                self.assertNoTask(tasks, DropFixedSuperTableTask) # we should not have a task that drops it            
            # self.assertIfExistThenSuccess(tasks, ReadFixedDataTask)
S
Steven Li 已提交
695

696

697 698 699
# State of the database as we believe it to be
class DbState():
    
700
    def __init__(self, resetDb = True):
S
Steven Li 已提交
701
        self.tableNumQueue = LinearQueue()
702
        self._lastTick = self.setupLastTick() # datetime.datetime(2019, 1, 1) # initial date time tick
703 704
        self._lastInt  = 0 # next one is initial integer 
        self._lock = threading.RLock()
705

706
        self._state = StateInvalid() # starting state
707
        self._stateWeights = [1,3,5,10] # indexed with value of STATE_EMPTY, STATE_DB_ONLY, etc.
708
        
709 710
        # self.openDbServerConnection()
        self._dbConn = DbConn()
711 712 713 714 715 716 717 718 719 720
        try:
            self._dbConn.open() # may throw taos.error.ProgrammingError: disconnected
        except taos.error.ProgrammingError as err:
            # print("Error type: {}, msg: {}, value: {}".format(type(err), err.msg, err))
            if ( err.msg == 'disconnected' ): # cannot open DB connection
                print("Cannot establish DB connection, please re-run script without parameter, and follow the instructions.")
                sys.exit()
            else:
                raise            
        except:
S
Steven Li 已提交
721
            print("[=] Unexpected exception")
722
            raise        
723 724 725 726

        if resetDb :
            self._dbConn.resetDb() # drop and recreate DB            
        self._state = self._findCurrentState()
727

728 729 730
    def getDbConn(self):
        return self._dbConn

731 732 733 734 735 736 737 738
    def getState(self):
        return self._state

    # We aim to create a starting time tick, such that, whenever we run our test here once
    # We should be able to safely create 100,000 records, which will not have any repeated time stamp
    # when we re-run the test in 3 minutes (180 seconds), basically we should expand time duration
    # by a factor of 500.
    # TODO: what if it goes beyond 10 years into the future
739
    # TODO: fix the error as result of above: "tsdb timestamp is out of range"
740
    def setupLastTick(self):
741
        t1 = datetime.datetime(2020, 6, 1)
742
        t2 = datetime.datetime.now()
743 744
        elSec = int(t2.timestamp() - t1.timestamp()) # maybe a very large number, takes 69 years to exceed Python int range
        elSec2 = (  elSec % (8 * 12 * 30 * 24 * 60 * 60 / 500 ) ) * 500 # a number representing seconds within 10 years
745 746
        # print("elSec = {}".format(elSec))
        t3 = datetime.datetime(2012, 1, 1) # default "keep" is 10 years
747
        t4 = datetime.datetime.fromtimestamp( t3.timestamp() + elSec2) # see explanation above
748 749 750
        logger.info("Setting up TICKS to start from: {}".format(t4))
        return t4

S
Steven Li 已提交
751 752 753
    def pickAndAllocateTable(self): # pick any table, and "use" it
        return self.tableNumQueue.pickAndAllocate()

754 755 756 757 758
    def addTable(self):
        with self._lock:
            tIndex = self.tableNumQueue.push()
        return tIndex

759 760
    def getFixedSuperTableName(self):
        return "fs_table"
761

S
Steven Li 已提交
762 763 764
    def releaseTable(self, i): # return the table back, so others can use it
        self.tableNumQueue.release(i)

765
    def getNextTick(self):
766 767 768
        with self._lock: # prevent duplicate tick
            self._lastTick += datetime.timedelta(0, 1) # add one second to it
            return self._lastTick
769 770

    def getNextInt(self):
771 772 773
        with self._lock:
            self._lastInt += 1
            return self._lastInt
774 775

    def getNextBinary(self):
776
        return "Beijing_Shanghai_Los_Angeles_New_York_San_Francisco_Chicago_Beijing_Shanghai_Los_Angeles_New_York_San_Francisco_Chicago_{}".format(self.getNextInt())
777 778 779

    def getNextFloat(self):
        return 0.9 + self.getNextInt()
780
    
S
Steven Li 已提交
781
    def getTableNameToDelete(self):
782
        tblNum = self.tableNumQueue.pop() # TODO: race condition!
783 784 785
        if ( not tblNum ): # maybe false
            return False
        
S
Steven Li 已提交
786 787
        return "table_{}".format(tblNum)

788 789 790
    def cleanUp(self):
        self._dbConn.close()      

S
Steven Li 已提交
791 792
    # May be slow, use cautionsly...
    def getTaskTypesAtState(self):        
793
        allTaskClasses = StateTransitionTask.__subclasses__() # all state transition tasks
S
Steven Li 已提交
794
        firstTaskTypes = []
795
        for tc in allTaskClasses:
S
Steven Li 已提交
796
            # t = tc(self) # create task object            
797
            if tc.canBeginFrom(self._state):
S
Steven Li 已提交
798 799 800 801 802 803 804 805
                firstTaskTypes.append(tc)
        # now we have all the tasks that can begin directly from the current state, let's figure out the INDIRECT ones
        taskTypes = firstTaskTypes.copy() # have to have these
        for task1 in firstTaskTypes: # each task type gathered so far
            endState = task1.getEndState() # figure the end state
            if endState == None:
                continue
            for tc in allTaskClasses: # what task can further begin from there?
806
                if tc.canBeginFrom(endState) and (tc not in firstTaskTypes):
S
Steven Li 已提交
807 808
                    taskTypes.append(tc) # gather it

809
        if len(taskTypes) <= 0:
810 811
            raise RuntimeError("No suitable task types found for state: {}".format(self._state))   
        logger.debug("[OPS] Tasks found for state {}: {}".format(self._state, taskTypes))     
812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837
        return taskTypes

        # tasks.append(ReadFixedDataTask(self)) # always for everybody
        # if ( self._state == self.STATE_EMPTY ):
        #     tasks.append(CreateDbTask(self))
        #     tasks.append(CreateFixedTableTask(self))
        # elif ( self._state == self.STATE_DB_ONLY ):
        #     tasks.append(DropDbTask(self))
        #     tasks.append(CreateFixedTableTask(self))
        #     tasks.append(AddFixedDataTask(self))
        # elif ( self._state == self.STATE_TABLE_ONLY ):
        #     tasks.append(DropFixedTableTask(self))
        #     tasks.append(AddFixedDataTask(self))
        # elif ( self._state == self.STATE_HAS_DATA ) : # same as above. TODO: adjust
        #     tasks.append(DropFixedTableTask(self))
        #     tasks.append(AddFixedDataTask(self))
        # else:
        #     raise RuntimeError("Unexpected DbState state: {}".format(self._state))
        # return tasks

    def pickTaskType(self):
        taskTypes = self.getTaskTypesAtState() # all the task types we can choose from at curent state
        weights = []
        for tt in taskTypes:
            endState = tt.getEndState()
            if endState != None :
S
Steven Li 已提交
838
                weights.append(self._stateWeights[endState.getValIndex()]) # TODO: change to a method
839 840 841
            else:
                weights.append(10) # read data task, default to 10: TODO: change to a constant
        i = self._weighted_choice_sub(weights)
842
        # logger.debug(" (weighted random:{}/{}) ".format(i, len(taskTypes)))        
843 844 845 846 847 848 849 850 851
        return taskTypes[i]

    def _weighted_choice_sub(self, weights): # ref: https://eli.thegreenplace.net/2010/01/22/weighted-random-generation-in-python/
        rnd = random.random() * sum(weights) # TODO: use our dice to ensure it being determinstic?
        for i, w in enumerate(weights):
            rnd -= w
            if rnd < 0:
                return i

852 853
    def _findCurrentState(self):
        dbc = self._dbConn
S
Steven Li 已提交
854
        ts = time.time()
855
        if dbc.query("show databases") == 0 : # no database?!
S
Steven Li 已提交
856
            # logger.debug("Found EMPTY state")
S
Steven Li 已提交
857
            logger.debug("[STT] empty database found, between {} and {}".format(ts, time.time()))
858 859 860
            return StateEmpty()
        dbc.execute("use db") # did not do this when openning connection
        if dbc.query("show tables") == 0 : # no tables
S
Steven Li 已提交
861
            # logger.debug("Found DB ONLY state")
S
Steven Li 已提交
862
            logger.debug("[STT] DB_ONLY found, between {} and {}".format(ts, time.time()))
863
            return StateDbOnly()
864
        if dbc.query("SELECT * FROM db.{}".format(self.getFixedSuperTableName()) ) == 0 : # no data
S
Steven Li 已提交
865
            # logger.debug("Found TABLE_ONLY state")
866 867
            logger.debug("[STT] SUPER_TABLE_ONLY found, between {} and {}".format(ts, time.time()))
            return StateSuperTableOnly()
868
        else:
S
Steven Li 已提交
869
            # logger.debug("Found HAS_DATA state")
S
Steven Li 已提交
870
            logger.debug("[STT] HAS_DATA found, between {} and {}".format(ts, time.time()))
871
            return StateHasData()
872
    
873 874 875
    def transition(self, tasks):
        if ( len(tasks) == 0 ): # before 1st step, or otherwise empty
            return # do nothing
876

877
        self._dbConn.execute("show dnodes") # this should show up in the server log, separating steps
878

879 880 881 882
        # Generic Checks, first based on the start state
        if self._state.canCreateDb():
            self._state.assertIfExistThenSuccess(tasks, CreateDbTask)
            # self.assertAtMostOneSuccess(tasks, CreateDbTask) # not really, in case of multiple creation and drops
883

884 885 886
        if self._state.canDropDb():
            self._state.assertIfExistThenSuccess(tasks, DropDbTask)
            # self.assertAtMostOneSuccess(tasks, DropDbTask) # not really in case of drop-create-drop
887

888 889 890
        # if self._state.canCreateFixedTable():
            # self.assertIfExistThenSuccess(tasks, CreateFixedTableTask) # Not true, DB may be dropped
            # self.assertAtMostOneSuccess(tasks, CreateFixedTableTask) # not really, in case of create-drop-create
891

892 893 894
        # if self._state.canDropFixedTable():
            # self.assertIfExistThenSuccess(tasks, DropFixedTableTask) # Not True, the whole DB may be dropped
            # self.assertAtMostOneSuccess(tasks, DropFixedTableTask) # not really in case of drop-create-drop
895

896 897
        # if self._state.canAddData():
        #     self.assertIfExistThenSuccess(tasks, AddFixedDataTask)  # not true actually
898

899 900
        # if self._state.canReadData():
            # Nothing for sure
901

902
        newState = self._findCurrentState()
S
Steven Li 已提交
903
        logger.debug("[STT] New DB state determined: {}".format(newState))
904 905
        self._state.verifyTasksToState(tasks, newState) # can old state move to new state through the tasks?
        self._state = newState
906

907 908 909 910
class TaskExecutor():
    def __init__(self, curStep):
        self._curStep = curStep

911 912 913
    def getCurStep(self):
        return self._curStep

914 915
    def execute(self, task: Task, wt: WorkerThread): # execute a task on a thread
        task.execute(wt)
916

917 918
    # def logInfo(self, msg):
    #     logger.info("    T[{}.x]: ".format(self._curStep) + msg)
919

920 921
    # def logDebug(self, msg):
    #     logger.debug("    T[{}.x]: ".format(self._curStep) + msg)
922

S
Steven Li 已提交
923
class Task():
924 925 926 927
    taskSn = 100

    @classmethod
    def allocTaskNum(cls):
S
Steven Li 已提交
928 929 930
        Task.taskSn += 1 # IMPORTANT: cannot use cls.taskSn, since each sub class will have a copy
        # logger.debug("Allocating taskSN: {}".format(Task.taskSn))
        return Task.taskSn
931

S
Steven Li 已提交
932
    def __init__(self, dbState: DbState, execStats: ExecutionStats):        
933
        self._dbState = dbState
934
        self._workerThread = None 
935
        self._err = None
936
        self._curStep = None
937
        self._numRows = None # Number of rows affected
938 939 940

        # Assign an incremental task serial number        
        self._taskNum = self.allocTaskNum()
S
Steven Li 已提交
941
        # logger.debug("Creating new task {}...".format(self._taskNum))
942

943 944
        self._execStats = execStats

945 946
    def isSuccess(self):
        return self._err == None
947

948 949
    def clone(self): # TODO: why do we need this again?
        newTask = self.__class__(self._dbState, self._execStats)
950 951 952
        return newTask

    def logDebug(self, msg):
S
Steven Li 已提交
953
        self._workerThread.logDebug("Step[{}.{}] {}".format(self._curStep, self._taskNum, msg))
954 955

    def logInfo(self, msg):
S
Steven Li 已提交
956
        self._workerThread.logInfo("Step[{}.{}] {}".format(self._curStep, self._taskNum, msg))
957

958
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
959
        raise RuntimeError("To be implemeted by child classes, class name: {}".format(self.__class__.__name__))
960

961 962
    def execute(self, wt: WorkerThread):
        wt.verifyThreadSelf()
963
        self._workerThread = wt # type: ignore
964 965

        te = wt.getTaskExecutor()
966 967
        self._curStep = te.getCurStep()
        self.logDebug("[-] executing task {}...".format(self.__class__.__name__))
968 969

        self._err = None
970
        self._execStats.beginTaskType(self.__class__.__name__) # mark beginning
971 972 973
        try:
            self._executeInternal(te, wt) # TODO: no return value?
        except taos.error.ProgrammingError as err:
974
            self.logDebug("[=] Taos library exception: errno={:X}, msg: {}".format(err.errno, err))
975
            self._err = err           
976
        except:
S
Steven Li 已提交
977
            self.logDebug("[=] Unexpected exception")
978
            raise
979
        self._execStats.endTaskType(self.__class__.__name__, self.isSuccess())
980
        
981 982
        self.logDebug("[X] task execution completed, {}, status: {}".format(self.__class__.__name__, "Success" if self.isSuccess() else "Failure"))        
        self._execStats.incExecCount(self.__class__.__name__, self.isSuccess()) # TODO: merge with above.
S
Steven Li 已提交
983

984
    def execSql(self, sql):
985
        return self._dbState.execute(sql)
986

987
                  
988
class ExecutionStats:
989 990 991 992 993
    def __init__(self):
        self._execTimes: Dict[str, [int, int]] = {} # total/success times for a task
        self._tasksInProgress = 0
        self._lock = threading.Lock()
        self._firstTaskStartTime = None
994 995
        self._execStartTime = None
        self._elapsedTime = 0.0 # total elapsed time
996 997
        self._accRunTime = 0.0 # accumulated run time

998 999 1000 1001 1002 1003 1004 1005 1006
        self._failed = False
        self._failureReason = None

    def startExec(self):
        self._execStartTime = time.time()

    def endExec(self):
        self._elapsedTime = time.time() - self._execStartTime

1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027
    def incExecCount(self, klassName, isSuccess): # TODO: add a lock here
        if klassName not in self._execTimes:
            self._execTimes[klassName] = [0, 0]
        t = self._execTimes[klassName] # tuple for the data
        t[0] += 1 # index 0 has the "total" execution times
        if isSuccess:
            t[1] += 1 # index 1 has the "success" execution times

    def beginTaskType(self, klassName):
        with self._lock:
            if self._tasksInProgress == 0 : # starting a new round
                self._firstTaskStartTime = time.time() # I am now the first task
            self._tasksInProgress += 1

    def endTaskType(self, klassName, isSuccess):
        with self._lock:
            self._tasksInProgress -= 1
            if self._tasksInProgress == 0 : # all tasks have stopped
                self._accRunTime += (time.time() - self._firstTaskStartTime)
                self._firstTaskStartTime = None

1028 1029 1030 1031
    def registerFailure(self, reason):
        self._failed = True
        self._failureReason = reason

1032
    def logStats(self):
1033 1034 1035 1036
        logger.info("----------------------------------------------------------------------")
        logger.info("| Crash_Gen test {}, with the following stats:".
            format("FAILED (reason: {})".format(self._failureReason) if self._failed else "SUCCEEDED"))
        logger.info("| Task Execution Times (success/total):")
1037 1038
        execTimesAny = 0
        for k, n in self._execTimes.items():            
1039
            execTimesAny += n[0]
1040
            logger.info("|    {0:<24}: {1}/{2}".format(k,n[1],n[0]))
1041
                
1042 1043 1044 1045 1046 1047 1048
        logger.info("| Total Tasks Executed (success or not): {} ".format(execTimesAny))
        logger.info("| Total Tasks In Progress at End: {}".format(self._tasksInProgress))
        logger.info("| Total Task Busy Time (elapsed time when any task is in progress): {:.3f} seconds".format(self._accRunTime))
        logger.info("| Average Per-Task Execution Time: {:.3f} seconds".format(self._accRunTime/execTimesAny))
        logger.info("| Total Elapsed Time (from wall clock): {:.3f} seconds".format(self._elapsedTime))
        logger.info("----------------------------------------------------------------------")
        
1049 1050 1051 1052 1053 1054 1055 1056 1057 1058


class StateTransitionTask(Task):
    # @classmethod
    # def getAllTaskClasses(cls): # static
    #     return cls.__subclasses__()
    @classmethod
    def getInfo(cls): # each sub class should supply their own information
        raise RuntimeError("Overriding method expected")

1059 1060 1061
    # @classmethod
    # def getBeginStates(cls):
    #     return cls.getInfo()[0]
1062 1063

    @classmethod
S
Steven Li 已提交
1064
    def getEndState(cls): # returning the class name
1065
        return cls.getInfo()[0]
1066 1067

    @classmethod
1068 1069 1070
    def canBeginFrom(cls, state: AnyState):
        # return state.getValue() in cls.getBeginStates()
        raise RuntimeError("must be overriden")
1071 1072 1073 1074 1075 1076 1077 1078 1079 1080

    def execute(self, wt: WorkerThread):
        super().execute(wt)
        


class CreateDbTask(StateTransitionTask):
    @classmethod
    def getInfo(cls):
        return [
1081
            # [AnyState.STATE_EMPTY], # can begin from
S
Steven Li 已提交
1082
            StateDbOnly() # end state
1083 1084
        ]

1085 1086 1087 1088
    @classmethod
    def canBeginFrom(cls, state: AnyState):
        return state.canCreateDb()

1089
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
1090
        wt.execSql("create database db")       
1091

1092 1093 1094 1095
class DropDbTask(StateTransitionTask):
    @classmethod
    def getInfo(cls):
        return [
1096
            # [AnyState.STATE_DB_ONLY, AnyState.STATE_TABLE_ONLY, AnyState.STATE_HAS_DATA],
S
Steven Li 已提交
1097
            StateEmpty()
1098 1099
        ]

1100 1101 1102 1103
    @classmethod
    def canBeginFrom(cls, state: AnyState):
        return state.canDropDb()

1104 1105
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
        wt.execSql("drop database db")
S
Steven Li 已提交
1106
        logger.debug("[OPS] database dropped at {}".format(time.time()))
1107

1108
class CreateFixedSuperTableTask(StateTransitionTask):
1109 1110 1111
    @classmethod
    def getInfo(cls):
        return [
1112
            # [AnyState.STATE_DB_ONLY],
1113
            StateSuperTableOnly()
1114
        ]
1115

1116 1117
    @classmethod
    def canBeginFrom(cls, state: AnyState):
1118
        return state.canCreateFixedSuperTable()
1119

1120
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
1121
        tblName = self._dbState.getFixedSuperTableName()        
1122
        wt.execSql("create table db.{} (ts timestamp, speed int) tags (b binary(200), f float) ".format(tblName))
1123 1124
        # No need to create the regular tables, INSERT will do that automatically

S
Steven Li 已提交
1125

1126 1127 1128 1129
class ReadFixedDataTask(StateTransitionTask):
    @classmethod
    def getInfo(cls):
        return [
1130
            # [AnyState.STATE_TABLE_ONLY, AnyState.STATE_HAS_DATA],
1131 1132 1133
            None # meaning doesn't affect state
        ]

1134 1135 1136 1137
    @classmethod
    def canBeginFrom(cls, state: AnyState):
        return state.canReadData()

1138
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
1139 1140 1141
        sTbName = self._dbState.getFixedSuperTableName()        
        dbc = wt.getDbConn()
        dbc.query("select TBNAME from db.{}".format(sTbName)) # TODO: analyze result set later
1142 1143 1144 1145 1146 1147 1148 1149
        if random.randrange(5) == 0 : # 1 in 5 chance, simulate a broken connection. TODO: break connection in all situations
            dbc.close()
            dbc.open()
        else:
            rTables = dbc.getQueryResult()
            # print("rTables[0] = {}, type = {}".format(rTables[0], type(rTables[0])))
            for rTbName in rTables : # regular tables
                dbc.query("select * from db.{}".format(rTbName[0])) # TODO: check success failure
1150

1151 1152
        # tdSql.query(" cars where tbname in ('carzero', 'carone')")

1153
class DropFixedSuperTableTask(StateTransitionTask):
1154 1155 1156
    @classmethod
    def getInfo(cls):
        return [
1157
            # [AnyState.STATE_TABLE_ONLY, AnyState.STATE_HAS_DATA],
S
Steven Li 已提交
1158
            StateDbOnly() # meaning doesn't affect state
1159 1160
        ]

1161 1162
    @classmethod
    def canBeginFrom(cls, state: AnyState):
1163
        return state.canDropFixedSuperTable()
1164

1165
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
1166
        tblName = self._dbState.getFixedSuperTableName()        
1167 1168 1169
        wt.execSql("drop table db.{}".format(tblName))

class AddFixedDataTask(StateTransitionTask):
1170 1171
    activeTable : Set[int] = set() # Track which table is being actively worked on

1172 1173 1174
    @classmethod
    def getInfo(cls):
        return [
1175
            # [AnyState.STATE_TABLE_ONLY, AnyState.STATE_HAS_DATA],
S
Steven Li 已提交
1176
            StateHasData()
1177
        ]
1178 1179 1180 1181

    @classmethod
    def canBeginFrom(cls, state: AnyState):
        return state.canAddData()
1182 1183 1184
        
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
        ds = self._dbState
1185
        wt.execSql("use db") # TODO: seems to be an INSERT bug to require this
1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196
        tblSeq = list(range(35 if gConfig.larger_data else 2)) 
        random.shuffle(tblSeq) 
        for i in tblSeq: 
            if ( i in self.activeTable ): # wow already active
                # logger.info("Concurrent data insertion into table: {}".format(i))      
                # print("ct({})".format(i), end="", flush=True) # Concurrent insertion into table
                print("x", end="", flush=True)
            else:
                self.activeTable.add(i) # marking it active
            # No need to shuffle data sequence, unless later we decide to do non-increment insertion            
            for j in range(50 if gConfig.larger_data else 2) : # number of records per table
1197 1198 1199 1200 1201 1202
                sql = "insert into db.reg_table_{} using {} tags ('{}', {}) values ('{}', {});".format(
                    i, 
                    ds.getFixedSuperTableName(), 
                    ds.getNextBinary(), ds.getNextFloat(),
                    ds.getNextTick(), ds.getNextInt())
                wt.execSql(sql) 
1203
            self.activeTable.discard(i) # not raising an error, unlike remove
1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215


#---------- Non State-Transition Related Tasks ----------#

class CreateTableTask(Task):    
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
        tIndex = self._dbState.addTable()
        self.logDebug("Creating a table {} ...".format(tIndex))
        wt.execSql("create table db.table_{} (ts timestamp, speed int)".format(tIndex))
        self.logDebug("Table {} created.".format(tIndex))
        self._dbState.releaseTable(tIndex)

S
Steven Li 已提交
1216
class DropTableTask(Task):
1217
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
1218
        tableName = self._dbState.getTableNameToDelete()
S
Steven Li 已提交
1219
        if ( not tableName ): # May be "False"
1220
            self.logInfo("Cannot generate a table to delete, skipping...")
S
Steven Li 已提交
1221
            return
1222
        self.logInfo("Dropping a table db.{} ...".format(tableName))
1223
        wt.execSql("drop table db.{}".format(tableName))
1224
        
1225

S
Steven Li 已提交
1226 1227

class AddDataTask(Task):
1228
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
1229
        ds = self._dbState
1230
        self.logInfo("Adding some data... numQueue={}".format(ds.tableNumQueue.toText()))
1231 1232
        tIndex = ds.pickAndAllocateTable()
        if ( tIndex == None ):
1233
            self.logInfo("No table found to add data, skipping...")
1234
            return
1235
        sql = "insert into db.table_{} values ('{}', {});".format(tIndex, ds.getNextTick(), ds.getNextInt())
1236
        self.logDebug("[SQL] Executing SQL: {}".format(sql))
1237 1238
        wt.execSql(sql) 
        ds.releaseTable(tIndex)
1239
        self.logDebug("[OPS] Finished adding data")
S
Steven Li 已提交
1240

1241

S
Steven Li 已提交
1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263
# Deterministic random number generator
class Dice():
    seeded = False # static, uninitialized

    @classmethod
    def seed(cls, s): # static
        if (cls.seeded):
            raise RuntimeError("Cannot seed the random generator more than once")
        cls.verifyRNG()
        random.seed(s)
        cls.seeded = True  # TODO: protect against multi-threading

    @classmethod
    def verifyRNG(cls): # Verify that the RNG is determinstic
        random.seed(0)
        x1 = random.randrange(0, 1000)
        x2 = random.randrange(0, 1000)
        x3 = random.randrange(0, 1000)
        if ( x1 != 864 or x2!=394 or x3!=776 ):
            raise RuntimeError("System RNG is not deterministic")

    @classmethod
1264 1265
    def throw(cls, stop): # get 0 to stop-1
        return cls.throwRange(0, stop)
S
Steven Li 已提交
1266 1267

    @classmethod
1268
    def throwRange(cls, start, stop): # up to stop-1
S
Steven Li 已提交
1269 1270
        if ( not cls.seeded ):
            raise RuntimeError("Cannot throw dice before seeding it")
1271
        return random.randrange(start, stop)
S
Steven Li 已提交
1272 1273 1274


# Anyone needing to carry out work should simply come here
1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296
# class WorkDispatcher():
#     def __init__(self, dbState):
#         # self.totalNumMethods = 2
#         self.tasks = [
#             # CreateTableTask(dbState), # Obsolete
#             # DropTableTask(dbState),
#             # AddDataTask(dbState),
#         ]

#     def throwDice(self):
#         max = len(self.tasks) - 1 
#         dRes = random.randint(0, max)
#         # logger.debug("Threw the dice in range [{},{}], and got: {}".format(0,max,dRes))
#         return dRes

#     def pickTask(self):
#         dice = self.throwDice()
#         return self.tasks[dice]

#     def doWork(self, workerThread):
#         task = self.pickTask()
#         task.execute(workerThread)
S
Steven Li 已提交
1297

S
Steven Li 已提交
1298 1299
class LoggingFilter(logging.Filter):
    def filter(self, record: logging.LogRecord):
S
Steven Li 已提交
1300 1301 1302
        if ( record.levelno >= logging.INFO ) :
            return True # info or above always log

S
Steven Li 已提交
1303 1304 1305
        msg = record.msg
        # print("type = {}, value={}".format(type(msg), msg))
        # sys.exit()
S
Steven Li 已提交
1306 1307 1308 1309 1310

        # Commenting out below to adjust...

        # if msg.startswith("[TRD]"):
        #     return False
S
Steven Li 已提交
1311 1312 1313 1314
        return True

        

1315
def main():
1316
    # Super cool Python argument library: https://docs.python.org/3/library/argparse.html
1317 1318 1319 1320 1321 1322 1323 1324 1325
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description=textwrap.dedent('''\
            TDengine Auto Crash Generator (PLEASE NOTICE the Prerequisites Below)
            ---------------------------------------------------------------------
            1. You build TDengine in the top level ./build directory, as described in offical docs
            2. You run the server there before this script: ./build/bin/taosd -c test/cfg

            '''))
1326 1327
    parser.add_argument('-d', '--debug', action='store_true',                        
                        help='Turn on DEBUG mode for more logging (default: false)')
1328 1329 1330 1331
    parser.add_argument('-l', '--larger-data', action='store_true',                        
                        help='Write larger amount of data during write operations (default: false)')
    parser.add_argument('-p', '--per-thread-db-connection', action='store_true',                        
                        help='Use a single shared db connection (default: false)')
1332 1333 1334 1335
    parser.add_argument('-s', '--max-steps', action='store', default=100, type=int,
                        help='Maximum number of steps to run (default: 100)')
    parser.add_argument('-t', '--num-threads', action='store', default=10, type=int,
                        help='Number of threads to run (default: 10)')
1336

1337
    global gConfig
1338
    gConfig = parser.parse_args()
1339 1340 1341
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit()
1342

1343
    global logger
1344
    logger = logging.getLogger('CrashGen')
S
Steven Li 已提交
1345
    logger.addFilter(LoggingFilter())
1346 1347
    if ( gConfig.debug ):
        logger.setLevel(logging.DEBUG) # default seems to be INFO        
S
Steven Li 已提交
1348 1349
    else:
        logger.setLevel(logging.INFO)
S
Steven Li 已提交
1350 1351 1352
    ch = logging.StreamHandler()
    logger.addHandler(ch)

1353 1354 1355
    # resetDb = False # DEBUG only
    # dbState = DbState(resetDb)  # DBEUG only!
    dbState = DbState() # Regular function
1356 1357
    Dice.seed(0) # initial seeding of dice
    tc = ThreadCoordinator(
1358
        ThreadPool(dbState, gConfig.num_threads, gConfig.max_steps, 0), 
1359
        # WorkDispatcher(dbState), # Obsolete?
1360
        dbState
1361
        )
S
Steven Li 已提交
1362

1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397
    # # Hack to exercise reading from disk, imcreasing coverage. TODO: fix
    # dbc = dbState.getDbConn()
    # sTbName = dbState.getFixedSuperTableName()   
    # dbc.execute("create database if not exists db")
    # if not dbState.getState().equals(StateEmpty()):
    #     dbc.execute("use db")     

    # rTables = None
    # try: # the super table may not exist
    #     sql = "select TBNAME from db.{}".format(sTbName)
    #     logger.info("Finding out tables in super table: {}".format(sql))
    #     dbc.query(sql) # TODO: analyze result set later
    #     logger.info("Fetching result")
    #     rTables = dbc.getQueryResult()
    #     logger.info("Result: {}".format(rTables))
    # except taos.error.ProgrammingError as err:
    #     logger.info("Initial Super table OPS error: {}".format(err))
    
    # # sys.exit()
    # if ( not rTables == None):
    #     # print("rTables[0] = {}, type = {}".format(rTables[0], type(rTables[0])))
    #     try:
    #         for rTbName in rTables : # regular tables
    #             ds = dbState
    #             logger.info("Inserting into table: {}".format(rTbName[0]))
    #             sql = "insert into db.{} values ('{}', {});".format(
    #                 rTbName[0],                    
    #                 ds.getNextTick(), ds.getNextInt())
    #             dbc.execute(sql)
    #         for rTbName in rTables : # regular tables        
    #             dbc.query("select * from db.{}".format(rTbName[0])) # TODO: check success failure
    #         logger.info("Initial READING operation is successful")       
    #     except taos.error.ProgrammingError as err:
    #         logger.info("Initial WRITE/READ error: {}".format(err))   
    
1398 1399
    

S
Steven Li 已提交
1400 1401 1402 1403 1404
    # Sandbox testing code
    # dbc = dbState.getDbConn()
    # while True:
    #     rows = dbc.query("show databases") 
    #     print("Rows: {}, time={}".format(rows, time.time()))
1405
    
1406
    tc.run()
1407 1408 1409
    tc.logStats()
    dbState.cleanUp()    
    
S
Steven Li 已提交
1410
    # logger.info("Crash_Gen execution finished")
1411 1412 1413

if __name__ == "__main__":
    main()