crash_gen.py 56.0 KB
Newer Older
1
#!/usr/bin/python3.7
S
Steven Li 已提交
2 3 4 5 6 7 8 9 10 11 12 13
###################################################################
#           Copyright (c) 2016 by TAOS Technologies, Inc.
#                     All rights reserved.
#
#  This file is proprietary and confidential to TAOS Technologies.
#  No part of this file may be reproduced, stored, transmitted,
#  disclosed or used in any form or by any means other than as
#  expressly provided by the written permission from Jianhui Tao
#
###################################################################

# -*- coding: utf-8 -*-
14 15
from __future__ import annotations  # For type hinting before definition, ref: https://stackoverflow.com/questions/33533148/how-do-i-specify-that-the-return-type-of-a-method-is-the-same-as-the-class-itsel    

S
Steven Li 已提交
16
import sys
17
import os
18
import traceback
19 20 21 22
# Require Python 3
if sys.version_info[0] < 3:
    raise Exception("Must be using Python 3")

S
Steven Li 已提交
23
import getopt
24
import argparse
25
import copy
S
Steven Li 已提交
26 27 28

import threading
import random
29
import time
S
Steven Li 已提交
30
import logging
31
import datetime
32
import textwrap
S
Steven Li 已提交
33

34
from typing import List
35
from typing import Dict
36
from typing import Set
37

S
Steven Li 已提交
38 39 40 41 42
from util.log import *
from util.dnodes import *
from util.cases import *
from util.sql import *

43
import crash_gen
S
Steven Li 已提交
44 45
import taos

46
# Global variables, tried to keep a small number. 
47 48 49 50

# Command-line/Environment Configurations, will set a bit later
# ConfigNameSpace = argparse.Namespace
gConfig = argparse.Namespace() # Dummy value, will be replaced later
51
logger = None
S
Steven Li 已提交
52

53 54
def runThread(wt: WorkerThread):    
    wt.run()
55

56 57 58 59 60 61 62 63
class CrashGenError(Exception):
    def __init__(self, msg=None, errno=None):
        self.msg = msg    
        self.errno = errno
    
    def __str__(self):
        return self.msg

S
Steven Li 已提交
64
class WorkerThread:
65
    def __init__(self, pool: ThreadPool, tid, 
66 67 68 69
            tc: ThreadCoordinator,
            # te: TaskExecutor,
            ): # note: main thread context!
        # self._curStep = -1 
70
        self._pool = pool
71
        self._tid = tid        
72
        self._tc = tc # type: ThreadCoordinator
S
Steven Li 已提交
73
        # self.threadIdent = threading.get_ident()
74 75
        self._thread = threading.Thread(target=runThread, args=(self,))
        self._stepGate = threading.Event()
S
Steven Li 已提交
76

77
        # Let us have a DB connection of our own
78 79 80
        if ( gConfig.per_thread_db_connection ): # type: ignore
            self._dbConn = DbConn()   

81
    def logDebug(self, msg):
S
Steven Li 已提交
82
        logger.debug("    TRD[{}] {}".format(self._tid, msg))
83 84

    def logInfo(self, msg):
S
Steven Li 已提交
85
        logger.info("    TRD[{}] {}".format(self._tid, msg))
86 87

   
88 89
    def getTaskExecutor(self):
        return self._tc.getTaskExecutor()     
90

S
Steven Li 已提交
91
    def start(self):
92
        self._thread.start()  # AFTER the thread is recorded
S
Steven Li 已提交
93

94
    def run(self): 
S
Steven Li 已提交
95
        # initialization after thread starts, in the thread context
96
        # self.isSleeping = False
97 98
        logger.info("Starting to run thread: {}".format(self._tid))

99
        if ( gConfig.per_thread_db_connection ): # type: ignore
100
            self._dbConn.open()
S
Steven Li 已提交
101

102 103
        self._doTaskLoop()       
        
104
        # clean up
105
        if ( gConfig.per_thread_db_connection ): # type: ignore 
106
            self._dbConn.close()
107

108 109 110
    def _doTaskLoop(self) :
        # while self._curStep < self._pool.maxSteps:
        # tc = ThreadCoordinator(None)
111 112 113
        while True:  
            tc = self._tc # Thread Coordinator, the overall master            
            tc.crossStepBarrier()  # shared barrier first, INCLUDING the last one
S
Steven Li 已提交
114
            logger.debug("[TRD] Worker thread [{}] exited barrier...".format(self._tid))
115
            self.crossStepGate()   # then per-thread gate, after being tapped
S
Steven Li 已提交
116
            logger.debug("[TRD] Worker thread [{}] exited step gate...".format(self._tid))
117
            if not self._tc.isRunning():
S
Steven Li 已提交
118
                logger.debug("[TRD] Thread Coordinator not running any more, worker thread now stopping...")
119 120
                break

S
Steven Li 已提交
121
            logger.debug("[TRD] Worker thread [{}] about to fetch task".format(self._tid))
122
            task = tc.fetchTask()
S
Steven Li 已提交
123
            logger.debug("[TRD] Worker thread [{}] about to execute task: {}".format(self._tid, task.__class__.__name__))
124
            task.execute(self)
125
            tc.saveExecutedTask(task)
S
Steven Li 已提交
126
            logger.debug("[TRD] Worker thread [{}] finished executing task".format(self._tid))
127
  
S
Steven Li 已提交
128
    def verifyThreadSelf(self): # ensure we are called by this own thread
129
        if ( threading.get_ident() != self._thread.ident ): 
S
Steven Li 已提交
130 131 132 133 134 135 136
            raise RuntimeError("Unexpectly called from other threads")

    def verifyThreadMain(self): # ensure we are called by the main thread
        if ( threading.get_ident() != threading.main_thread().ident ): 
            raise RuntimeError("Unexpectly called from other threads")

    def verifyThreadAlive(self):
137
        if ( not self._thread.is_alive() ):
S
Steven Li 已提交
138 139
            raise RuntimeError("Unexpected dead thread")

140
    # A gate is different from a barrier in that a thread needs to be "tapped"
S
Steven Li 已提交
141 142 143 144
    def crossStepGate(self):
        self.verifyThreadAlive()
        self.verifyThreadSelf() # only allowed by ourselves
        
145
        # Wait again at the "gate", waiting to be "tapped"
S
Steven Li 已提交
146
        logger.debug("[TRD] Worker thread {} about to cross the step gate".format(self._tid))
147 148
        self._stepGate.wait() 
        self._stepGate.clear()
S
Steven Li 已提交
149
        
150
        # self._curStep += 1  # off to a new step...
S
Steven Li 已提交
151 152 153 154

    def tapStepGate(self): # give it a tap, release the thread waiting there
        self.verifyThreadAlive()
        self.verifyThreadMain() # only allowed for main thread
155
 
S
Steven Li 已提交
156
        logger.debug("[TRD] Tapping worker thread {}".format(self._tid))
157 158
        self._stepGate.set() # wake up!        
        time.sleep(0) # let the released thread run a bit
159

160
    def execSql(self, sql): # TODO: expose DbConn directly
161 162 163
        if ( gConfig.per_thread_db_connection ):
            return self._dbConn.execute(sql)            
        else:
164
            return self._tc.getDbManager().getDbConn().execute(sql)
165

166 167 168 169
    def getDbConn(self):
        if ( gConfig.per_thread_db_connection ):
            return self._dbConn     
        else:
170
            return self._tc.getDbManager().getDbConn()
171

172 173 174 175 176
    # def querySql(self, sql): # not "execute", since we are out side the DB context
    #     if ( gConfig.per_thread_db_connection ):
    #         return self._dbConn.query(sql)            
    #     else:
    #         return self._tc.getDbState().getDbConn().query(sql)
177

178
class ThreadCoordinator:
179
    def __init__(self, pool, dbManager):
180 181
        self._curStep = -1 # first step is 0
        self._pool = pool
182
        # self._wd = wd
183
        self._te = None # prepare for every new step
184
        self._dbManager = dbManager
185 186
        self._executedTasks: List[Task] = [] # in a given step
        self._lock = threading.RLock() # sync access for a few things
S
Steven Li 已提交
187

188
        self._stepBarrier = threading.Barrier(self._pool.numThreads + 1) # one barrier for all threads
189
        self._execStats = ExecutionStats()
S
Steven Li 已提交
190

191 192 193
    def getTaskExecutor(self):
        return self._te

194 195
    def getDbManager(self) -> DbManager :
        return self._dbManager
196

197 198 199
    def crossStepBarrier(self):
        self._stepBarrier.wait()

200 201
    def run(self):              
        self._pool.createAndStartThreads(self)
S
Steven Li 已提交
202 203

        # Coordinate all threads step by step
204 205
        self._curStep = -1 # not started yet
        maxSteps = gConfig.max_steps # type: ignore
206 207 208
        self._execStats.startExec() # start the stop watch
        failed = False
        while(self._curStep < maxSteps-1 and not failed):  # maxStep==10, last curStep should be 9
S
Steven Li 已提交
209 210 211
            if not gConfig.debug: 
                print(".", end="", flush=True) # print this only if we are not in debug mode
            logger.debug("[TRD] Main thread going to sleep")
212

213
            # Now ready to enter a step
214 215 216 217
            self.crossStepBarrier() # let other threads go past the pool barrier, but wait at the thread gate
            self._stepBarrier.reset() # Other worker threads should now be at the "gate"            

            # At this point, all threads should be pass the overall "barrier" and before the per-thread "gate"
218
            try:
219
                self._dbManager.getStateMachine().transition(self._executedTasks) # at end of step, transiton the DB state
220 221 222 223 224 225 226 227 228 229 230 231 232
            except taos.error.ProgrammingError as err:
                if ( err.msg == 'network unavailable' ): # broken DB connection
                    logger.info("DB connection broken, execution failed")
                    traceback.print_stack()
                    failed = True
                    self._te = None # Not running any more
                    self._execStats.registerFailure("Broken DB Connection")
                    # continue # don't do that, need to tap all threads at end, and maybe signal them to stop
                else:
                    raise 
            finally:
                pass
            
233
            self.resetExecutedTasks() # clear the tasks after we are done
234 235

            # Get ready for next step
S
Steven Li 已提交
236
            logger.debug("<-- Step {} finished".format(self._curStep))
237 238
            self._curStep += 1 # we are about to get into next step. TODO: race condition here!                
            logger.debug("\r\n--> Step {} starts with main thread waking up".format(self._curStep)) # Now not all threads had time to go to sleep
239

240
            # A new TE for the new step
241 242
            if not failed: # only if not failed
                self._te = TaskExecutor(self._curStep)
243

S
Steven Li 已提交
244
            logger.debug("[TRD] Main thread waking up at step {}, tapping worker threads".format(self._curStep)) # Now not all threads had time to go to sleep            
S
Steven Li 已提交
245 246
            self.tapAllThreads()

247
        logger.debug("Main thread ready to finish up...")
248 249 250 251 252 253 254 255
        if not failed: # only in regular situations
            self.crossStepBarrier() # Cross it one last time, after all threads finish
            self._stepBarrier.reset()
            logger.debug("Main thread in exclusive zone...")
            self._te = None # No more executor, time to end
            logger.debug("Main thread tapping all threads one last time...")
            self.tapAllThreads() # Let the threads run one last time

256 257
        logger.debug("Main thread joining all threads")
        self._pool.joinAll() # Get all threads to finish
S
Steven Li 已提交
258
        logger.info("All worker thread finished")
259 260 261
        self._execStats.endExec()

    def logStats(self):
262
        self._execStats.logStats()
S
Steven Li 已提交
263 264 265

    def tapAllThreads(self): # in a deterministic manner
        wakeSeq = []
266
        for i in range(self._pool.numThreads): # generate a random sequence
S
Steven Li 已提交
267 268 269 270
            if Dice.throw(2) == 1 :
                wakeSeq.append(i)
            else:
                wakeSeq.insert(0, i)
S
Steven Li 已提交
271
        logger.debug("[TRD] Main thread waking up worker thread: {}".format(str(wakeSeq)))
272
        # TODO: set dice seed to a deterministic value
S
Steven Li 已提交
273
        for i in wakeSeq:
274
            self._pool.threadList[i].tapStepGate() # TODO: maybe a bit too deep?!
S
Steven Li 已提交
275 276
            time.sleep(0) # yield

277 278 279 280 281 282
    def isRunning(self):
        return self._te != None

    def fetchTask(self) -> Task :
        if ( not self.isRunning() ): # no task
            raise RuntimeError("Cannot fetch task when not running")
283 284
        # return self._wd.pickTask()
        # Alternatively, let's ask the DbState for the appropriate task
285 286 287 288 289 290 291
        # dbState = self.getDbState()
        # tasks = dbState.getTasksAtState() # TODO: create every time?
        # nTasks = len(tasks)
        # i = Dice.throw(nTasks)
        # logger.debug(" (dice:{}/{}) ".format(i, nTasks))
        # # return copy.copy(tasks[i]) # Needs a fresh copy, to save execution results, etc.
        # return tasks[i].clone() # TODO: still necessary?
292
        taskType = self.getDbManager().getStateMachine().pickTaskType() # pick a task type for current state
293
        return taskType(self.getDbManager(), self._execStats) # create a task from it
294 295 296

    def resetExecutedTasks(self):
        self._executedTasks = [] # should be under single thread
297 298 299 300

    def saveExecutedTask(self, task):
        with self._lock:
            self._executedTasks.append(task)
301 302

# We define a class to run a number of threads in locking steps.
303
class ThreadPool:
304
    def __init__(self, numThreads, maxSteps):
305 306 307 308 309 310 311
        self.numThreads = numThreads
        self.maxSteps = maxSteps
        # Internal class variables
        self.curStep = 0
        self.threadList = []
        
    # starting to run all the threads, in locking steps
312
    def createAndStartThreads(self, tc: ThreadCoordinator):
313
        for tid in range(0, self.numThreads): # Create the threads
314
            workerThread = WorkerThread(self, tid, tc)            
315 316 317 318 319 320 321 322
            self.threadList.append(workerThread)
            workerThread.start() # start, but should block immediately before step 0

    def joinAll(self):
        for workerThread in self.threadList:
            logger.debug("Joining thread...")
            workerThread._thread.join()

323 324
# A queue of continguous POSITIVE integers, used by DbManager to generate continuous numbers
# for new table names
S
Steven Li 已提交
325 326
class LinearQueue():
    def __init__(self):
327
        self.firstIndex = 1  # 1st ever element
S
Steven Li 已提交
328
        self.lastIndex = 0
329
        self._lock = threading.RLock() # our functions may call each other
330
        self.inUse = set() # the indexes that are in use right now
S
Steven Li 已提交
331

332 333 334 335 336 337 338 339 340
    def toText(self):
        return "[{}..{}], in use: {}".format(self.firstIndex, self.lastIndex, self.inUse)

    # Push (add new element, largest) to the tail, and mark it in use
    def push(self): 
        with self._lock:
            # if ( self.isEmpty() ): 
            #     self.lastIndex = self.firstIndex 
            #     return self.firstIndex
341 342
            # Otherwise we have something
            self.lastIndex += 1
343 344
            self.allocate(self.lastIndex)
            # self.inUse.add(self.lastIndex) # mark it in use immediately
345
            return self.lastIndex
S
Steven Li 已提交
346 347

    def pop(self):
348
        with self._lock:
349
            if ( self.isEmpty() ): 
350 351 352
                # raise RuntimeError("Cannot pop an empty queue") 
                return False # TODO: None?
            
353
            index = self.firstIndex
354
            if ( index in self.inUse ):
355 356
                return False

357 358 359 360 361 362 363
            self.firstIndex += 1
            return index

    def isEmpty(self):
        return self.firstIndex > self.lastIndex

    def popIfNotEmpty(self):
364
        with self._lock:
365 366 367 368
            if (self.isEmpty()):
                return 0
            return self.pop()

S
Steven Li 已提交
369
    def allocate(self, i):
370
        with self._lock:
371
            # logger.debug("LQ allocating item {}".format(i))
372 373 374 375
            if ( i in self.inUse ):
                raise RuntimeError("Cannot re-use same index in queue: {}".format(i))
            self.inUse.add(i)

S
Steven Li 已提交
376
    def release(self, i):
377
        with self._lock:
378 379
            # logger.debug("LQ releasing item {}".format(i))
            self.inUse.remove(i) # KeyError possible, TODO: why?
380 381 382 383

    def size(self):
        return self.lastIndex + 1 - self.firstIndex

S
Steven Li 已提交
384
    def pickAndAllocate(self):
385 386 387
        if ( self.isEmpty() ):
            return None
        with self._lock:
388 389 390 391
            cnt = 0 # counting the interations
            while True:
                cnt += 1
                if ( cnt > self.size()*10 ): # 10x iteration already
392 393
                    # raise RuntimeError("Failed to allocate LinearQueue element")
                    return None
394 395
                ret = Dice.throwRange(self.firstIndex, self.lastIndex+1)
                if ( not ret in self.inUse ):
396 397 398 399 400
                    self.allocate(ret)
                    return ret

class DbConn:
    def __init__(self):
401 402
        self._conn = None 
        self._cursor = None
403 404 405 406 407 408 409
        self.isOpen = False
        
    def open(self): # Open connection
        if ( self.isOpen ):
            raise RuntimeError("Cannot re-open an existing DB connection")

        cfgPath = "../../build/test/cfg" 
410 411
        self._conn = taos.connect(host="127.0.0.1", config=cfgPath) # TODO: make configurable
        self._cursor = self._conn.cursor()
412

413 414
        # Get the connection/cursor ready
        self._cursor.execute('reset query cache')
415
        # self._cursor.execute('use db') # note we do this in _findCurrenState
416 417

        # Open connection
418
        self._tdSql = TDSql()
419
        self._tdSql.init(self._cursor)
420 421 422 423 424
        self.isOpen = True

    def resetDb(self): # reset the whole database, etc.
        if ( not self.isOpen ):
            raise RuntimeError("Cannot reset database until connection is open")
425 426 427
        # self._tdSql.prepare() # Recreate database, etc.

        self._cursor.execute('drop database if exists db')
428 429
        logger.debug("Resetting DB, dropped database")
        # self._cursor.execute('create database db')
430 431
        # self._cursor.execute('use db')

432 433 434 435 436 437 438
        # tdSql.execute('show databases')

    def close(self):
        if ( not self.isOpen ):
            raise RuntimeError("Cannot clean up database until connection is open")
        self._tdSql.close()
        self.isOpen = False
S
Steven Li 已提交
439

440
    def execute(self, sql): 
441
        if ( not self.isOpen ):
442
            raise RuntimeError("Cannot execute database commands until connection is open")
443 444 445 446
        logger.debug("[SQL] Executing SQL: {}".format(sql))
        nRows = self._tdSql.execute(sql)
        logger.debug("[SQL] Execution Result, nRows = {}, SQL = {}".format(nRows, sql))
        return nRows
S
Steven Li 已提交
447

448
    def query(self, sql) :  # return rows affected
449 450
        if ( not self.isOpen ):
            raise RuntimeError("Cannot query database until connection is open")
451 452 453 454
        logger.debug("[SQL] Executing SQL: {}".format(sql))
        nRows = self._tdSql.query(sql)
        logger.debug("[SQL] Execution Result, nRows = {}, SQL = {}".format(nRows, sql))
        return nRows
455
        # results are in: return self._tdSql.queryResult
456

457 458 459
    def getQueryResult(self):
        return self._tdSql.queryResult

460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477
    def _queryAny(self, sql) : # actual query result as an int
        if ( not self.isOpen ):
            raise RuntimeError("Cannot query database until connection is open")
        tSql = self._tdSql
        nRows = tSql.query(sql)
        if nRows != 1 :
            raise RuntimeError("Unexpected result for query: {}, rows = {}".format(sql, nRows))
        if tSql.queryRows != 1 or tSql.queryCols != 1:
            raise RuntimeError("Unexpected result set for query: {}".format(sql))
        return tSql.queryResult[0][0]

    def queryScalar(self, sql) -> int :
        return self._queryAny(sql)

    def queryString(self, sql) -> str :
        return self._queryAny(sql)
    
class AnyState:
478
    STATE_INVALID    = -1
479 480 481 482
    STATE_EMPTY      = 0  # nothing there, no even a DB
    STATE_DB_ONLY    = 1  # we have a DB, but nothing else
    STATE_TABLE_ONLY = 2  # we have a table, but totally empty
    STATE_HAS_DATA   = 3  # we have some data in the table
483 484 485 486 487
    _stateNames = ["Invalid", "Empty", "DB_Only", "Table_Only", "Has_Data"]

    STATE_VAL_IDX = 0
    CAN_CREATE_DB = 1
    CAN_DROP_DB = 2
488 489
    CAN_CREATE_FIXED_SUPER_TABLE = 3
    CAN_DROP_FIXED_SUPER_TABLE = 4
490 491 492 493 494 495 496
    CAN_ADD_DATA = 5
    CAN_READ_DATA = 6

    def __init__(self):
        self._info = self.getInfo()

    def __str__(self):
S
Steven Li 已提交
497
        return self._stateNames[self._info[self.STATE_VAL_IDX] + 1] # -1 hack to accomodate the STATE_INVALID case
498 499 500 501

    def getInfo(self):
        raise RuntimeError("Must be overriden by child classes")

S
Steven Li 已提交
502 503 504 505 506 507 508 509
    def equals(self, other):
        if isinstance(other, int):
            return self.getValIndex() == other
        elif isinstance(other, AnyState):
            return self.getValIndex() == other.getValIndex()
        else:
            raise RuntimeError("Unexpected comparison, type = {}".format(type(other)))

510 511 512
    def verifyTasksToState(self, tasks, newState):
        raise RuntimeError("Must be overriden by child classes")

S
Steven Li 已提交
513 514 515
    def getValIndex(self):
        return self._info[self.STATE_VAL_IDX]

516 517 518 519 520 521
    def getValue(self):
        return self._info[self.STATE_VAL_IDX]
    def canCreateDb(self):
        return self._info[self.CAN_CREATE_DB]
    def canDropDb(self):
        return self._info[self.CAN_DROP_DB]
522 523 524 525
    def canCreateFixedSuperTable(self):
        return self._info[self.CAN_CREATE_FIXED_SUPER_TABLE]
    def canDropFixedSuperTable(self):
        return self._info[self.CAN_DROP_FIXED_SUPER_TABLE]
526 527 528 529 530 531 532 533 534 535 536
    def canAddData(self):
        return self._info[self.CAN_ADD_DATA]
    def canReadData(self):
        return self._info[self.CAN_READ_DATA]

    def assertAtMostOneSuccess(self, tasks, cls):
        sCnt = 0
        for task in tasks :
            if not isinstance(task, cls):
                continue
            if task.isSuccess():
S
Steven Li 已提交
537
                # task.logDebug("Task success found")
538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572
                sCnt += 1
                if ( sCnt >= 2 ):
                    raise RuntimeError("Unexpected more than 1 success with task: {}".format(cls))

    def assertIfExistThenSuccess(self, tasks, cls):
        sCnt = 0
        exists = False
        for task in tasks :
            if not isinstance(task, cls):
                continue
            exists = True # we have a valid instance
            if task.isSuccess():
                sCnt += 1
        if ( exists and sCnt <= 0 ):
            raise RuntimeError("Unexpected zero success for task: {}".format(cls))

    def assertNoTask(self, tasks, cls):
        for task in tasks :
            if isinstance(task, cls):
                raise CrashGenError("This task: {}, is not expected to be present, given the success/failure of others".format(cls.__name__))

    def assertNoSuccess(self, tasks, cls):
        for task in tasks :
            if isinstance(task, cls):
                if task.isSuccess():
                    raise RuntimeError("Unexpected successful task: {}".format(cls))

    def hasSuccess(self, tasks, cls):
        for task in tasks :
            if not isinstance(task, cls):
                continue
            if task.isSuccess():
                return True
        return False

S
Steven Li 已提交
573 574 575 576 577 578
    def hasTask(self, tasks, cls):
        for task in tasks :
            if isinstance(task, cls):
                return True
        return False

579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598
class StateInvalid(AnyState):
    def getInfo(self):
        return [
            self.STATE_INVALID,
            False, False, # can create/drop Db
            False, False, # can create/drop fixed table
            False, False, # can insert/read data with fixed table
        ]

    # def verifyTasksToState(self, tasks, newState):

class StateEmpty(AnyState):
    def getInfo(self):
        return [
            self.STATE_EMPTY,
            True, False, # can create/drop Db
            False, False, # can create/drop fixed table
            False, False, # can insert/read data with fixed table
        ]

S
Steven Li 已提交
599
    def verifyTasksToState(self, tasks, newState): 
600 601 602
        if ( self.hasSuccess(tasks, TaskCreateDb) ): # at EMPTY, if there's succes in creating DB
            if ( not self.hasTask(tasks, TaskDropDb) ) : # and no drop_db tasks
                self.assertAtMostOneSuccess(tasks, TaskCreateDb) # we must have at most one. TODO: compare numbers
603 604 605 606 607 608 609 610 611 612 613

class StateDbOnly(AnyState):
    def getInfo(self):
        return [
            self.STATE_DB_ONLY,
            False, True,
            True, False,
            False, False,
        ]

    def verifyTasksToState(self, tasks, newState):
614 615 616
        if ( not self.hasTask(tasks, TaskCreateDb) ):
            self.assertAtMostOneSuccess(tasks, TaskDropDb) # only if we don't create any more
        self.assertIfExistThenSuccess(tasks, TaskDropDb)
S
Steven Li 已提交
617
        # self.assertAtMostOneSuccess(tasks, CreateFixedTableTask) # not true in massively parrallel cases
618
        # Nothing to be said about adding data task
619
        # if ( self.hasSuccess(tasks, DropDbTask) ): # dropped the DB
620
            # self.assertHasTask(tasks, DropDbTask) # implied by hasSuccess
621
            # self.assertAtMostOneSuccess(tasks, DropDbTask)
622
            # self._state = self.STATE_EMPTY
623
        if ( self.hasSuccess(tasks, TaskCreateSuperTable) ): # did not drop db, create table success
624
            # self.assertHasTask(tasks, CreateFixedTableTask) # tried to create table
625 626
            if ( not self.hasTask(tasks, TaskDropSuperTable) ): 
                self.assertAtMostOneSuccess(tasks, TaskCreateSuperTable) # at most 1 attempt is successful, if we don't drop anything
627
            # self.assertNoTask(tasks, DropDbTask) # should have have tried
628 629 630 631 632 633 634 635 636 637 638 639
            # if ( not self.hasSuccess(tasks, AddFixedDataTask) ): # just created table, no data yet
            #     # can't say there's add-data attempts, since they may all fail
            #     self._state = self.STATE_TABLE_ONLY
            # else:                    
            #     self._state = self.STATE_HAS_DATA
        # What about AddFixedData?
        # elif ( self.hasSuccess(tasks, AddFixedDataTask) ):
        #     self._state = self.STATE_HAS_DATA
        # else: # no success in dropping db tasks, no success in create fixed table? read data should also fail
        #     # raise RuntimeError("Unexpected no-success scenario")   # We might just landed all failure tasks, 
        #     self._state = self.STATE_DB_ONLY  # no change

640
class StateSuperTableOnly(AnyState):
641 642 643 644 645 646 647 648 649
    def getInfo(self):
        return [
            self.STATE_TABLE_ONLY,
            False, True,
            False, True,
            True, True,
        ]

    def verifyTasksToState(self, tasks, newState):
650 651
        if ( self.hasSuccess(tasks, TaskDropSuperTable) ): # we are able to drop the table
            self.assertAtMostOneSuccess(tasks, TaskDropSuperTable)
652
            # self._state = self.STATE_DB_ONLY
S
Steven Li 已提交
653 654
        # elif ( self.hasSuccess(tasks, AddFixedDataTask) ): # no success dropping the table, but added data
        #     self.assertNoTask(tasks, DropFixedTableTask) # not true in massively parrallel cases
655
            # self._state = self.STATE_HAS_DATA
S
Steven Li 已提交
656 657 658
        # elif ( self.hasSuccess(tasks, ReadFixedDataTask) ): # no success in prev cases, but was able to read data
            # self.assertNoTask(tasks, DropFixedTableTask)
            # self.assertNoTask(tasks, AddFixedDataTask)
659
            # self._state = self.STATE_TABLE_ONLY # no change
S
Steven Li 已提交
660 661 662
        # else: # did not drop table, did not insert data, did not read successfully, that is impossible
        #     raise RuntimeError("Unexpected no-success scenarios")
        # TODO: need to revamp!!
663 664 665 666 667 668 669 670 671 672 673

class StateHasData(AnyState):
    def getInfo(self):
        return [
            self.STATE_HAS_DATA,
            False, True,
            False, True,
            True, True,
        ]

    def verifyTasksToState(self, tasks, newState):
S
Steven Li 已提交
674
        if ( newState.equals(AnyState.STATE_EMPTY) ):
675 676 677
            self.hasSuccess(tasks, TaskDropDb)
            if ( not self.hasTask(tasks, TaskCreateDb) ) : 
                self.assertAtMostOneSuccess(tasks, TaskDropDb) # TODO: dicy
S
Steven Li 已提交
678
        elif ( newState.equals(AnyState.STATE_DB_ONLY) ): # in DB only
679 680 681
            if ( not self.hasTask(tasks, TaskCreateDb)): # without a create_db task
                self.assertNoTask(tasks, TaskDropDb) # we must have drop_db task
            self.hasSuccess(tasks, TaskDropSuperTable)
682
            # self.assertAtMostOneSuccess(tasks, DropFixedSuperTableTask) # TODO: dicy
S
Steven Li 已提交
683
        elif ( newState.equals(AnyState.STATE_TABLE_ONLY) ): # data deleted
684 685 686
            self.assertNoTask(tasks, TaskDropDb)
            self.assertNoTask(tasks, TaskDropSuperTable)
            self.assertNoTask(tasks, TaskAddData)
S
Steven Li 已提交
687
            # self.hasSuccess(tasks, DeleteDataTasks)
688
        else: # should be STATE_HAS_DATA
689 690 691
            self.assertNoTask(tasks, TaskDropDb)
            if (not self.hasTask(tasks, TaskCreateSuperTable)) :  # if we didn't create the table
                self.assertNoTask(tasks, TaskDropSuperTable) # we should not have a task that drops it            
692
            # self.assertIfExistThenSuccess(tasks, ReadFixedDataTask)
S
Steven Li 已提交
693

694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799
class StateMechine :
    def __init__(self, dbConn):
        self._dbConn = dbConn
        self._curState = self._findCurrentState() # starting state
        self._stateWeights = [1,3,5,15] # transitition target probabilities, indexed with value of STATE_EMPTY, STATE_DB_ONLY, etc.
        
    def getCurrentState(self):
        return self._curState

    # May be slow, use cautionsly...
    def getTaskTypes(self): # those that can run (directly/indirectly) from the current state
        allTaskClasses = StateTransitionTask.__subclasses__() # all state transition tasks
        firstTaskTypes = []
        for tc in allTaskClasses:
            # t = tc(self) # create task object            
            if tc.canBeginFrom(self._curState):
                firstTaskTypes.append(tc)
        # now we have all the tasks that can begin directly from the current state, let's figure out the INDIRECT ones
        taskTypes = firstTaskTypes.copy() # have to have these
        for task1 in firstTaskTypes: # each task type gathered so far
            endState = task1.getEndState() # figure the end state
            if endState == None: # does not change end state
                continue # no use, do nothing
            for tc in allTaskClasses: # what task can further begin from there?
                if tc.canBeginFrom(endState) and (tc not in firstTaskTypes):
                    taskTypes.append(tc) # gather it

        if len(taskTypes) <= 0:
            raise RuntimeError("No suitable task types found for state: {}".format(self._curState))   
        logger.debug("[OPS] Tasks found for state {}: {}".format(self._curState, taskTypes))     
        return taskTypes

    def _findCurrentState(self):
        dbc = self._dbConn
        ts = time.time() # we use this to debug how fast/slow it is to do the various queries to find the current DB state
        if dbc.query("show databases") == 0 : # no database?!
            # logger.debug("Found EMPTY state")
            logger.debug("[STT] empty database found, between {} and {}".format(ts, time.time()))
            return StateEmpty()
        dbc.execute("use db") # did not do this when openning connection
        if dbc.query("show tables") == 0 : # no tables
            # logger.debug("Found DB ONLY state")
            logger.debug("[STT] DB_ONLY found, between {} and {}".format(ts, time.time()))
            return StateDbOnly()
        if dbc.query("SELECT * FROM db.{}".format(DbManager.getFixedSuperTableName()) ) == 0 : # no regular tables
            # logger.debug("Found TABLE_ONLY state")
            logger.debug("[STT] SUPER_TABLE_ONLY found, between {} and {}".format(ts, time.time()))
            return StateSuperTableOnly()
        else: # has actual tables
            # logger.debug("Found HAS_DATA state")
            logger.debug("[STT] HAS_DATA found, between {} and {}".format(ts, time.time()))
            return StateHasData()

    def transition(self, tasks):
        if ( len(tasks) == 0 ): # before 1st step, or otherwise empty
            return # do nothing

        self._dbConn.execute("show dnodes") # this should show up in the server log, separating steps

        # Generic Checks, first based on the start state
        if self._curState.canCreateDb():
            self._curState.assertIfExistThenSuccess(tasks, TaskCreateDb)
            # self.assertAtMostOneSuccess(tasks, CreateDbTask) # not really, in case of multiple creation and drops

        if self._curState.canDropDb():
            self._curState.assertIfExistThenSuccess(tasks, TaskDropDb)
            # self.assertAtMostOneSuccess(tasks, DropDbTask) # not really in case of drop-create-drop

        # if self._state.canCreateFixedTable():
            # self.assertIfExistThenSuccess(tasks, CreateFixedTableTask) # Not true, DB may be dropped
            # self.assertAtMostOneSuccess(tasks, CreateFixedTableTask) # not really, in case of create-drop-create

        # if self._state.canDropFixedTable():
            # self.assertIfExistThenSuccess(tasks, DropFixedTableTask) # Not True, the whole DB may be dropped
            # self.assertAtMostOneSuccess(tasks, DropFixedTableTask) # not really in case of drop-create-drop

        # if self._state.canAddData():
        #     self.assertIfExistThenSuccess(tasks, AddFixedDataTask)  # not true actually

        # if self._state.canReadData():
            # Nothing for sure

        newState = self._findCurrentState()
        logger.debug("[STT] New DB state determined: {}".format(newState))
        self._curState.verifyTasksToState(tasks, newState) # can old state move to new state through the tasks?
        self._curState = newState

    def pickTaskType(self):
        taskTypes = self.getTaskTypes() # all the task types we can choose from at curent state
        weights = []
        for tt in taskTypes:
            endState = tt.getEndState()
            if endState != None :
                weights.append(self._stateWeights[endState.getValIndex()]) # TODO: change to a method
            else:
                weights.append(10) # read data task, default to 10: TODO: change to a constant
        i = self._weighted_choice_sub(weights)
        # logger.debug(" (weighted random:{}/{}) ".format(i, len(taskTypes)))        
        return taskTypes[i]

    def _weighted_choice_sub(self, weights): # ref: https://eli.thegreenplace.net/2010/01/22/weighted-random-generation-in-python/
        rnd = random.random() * sum(weights) # TODO: use our dice to ensure it being determinstic?
        for i, w in enumerate(weights):
            rnd -= w
            if rnd < 0:
                return i
800

801
# Manager of the Database Data/Connection
802
class DbManager():    
803
    def __init__(self, resetDb = True):
S
Steven Li 已提交
804
        self.tableNumQueue = LinearQueue()
805
        self._lastTick = self.setupLastTick() # datetime.datetime(2019, 1, 1) # initial date time tick
806 807
        self._lastInt  = 0 # next one is initial integer 
        self._lock = threading.RLock()
808
        
809 810
        # self.openDbServerConnection()
        self._dbConn = DbConn()
811 812 813 814
        try:
            self._dbConn.open() # may throw taos.error.ProgrammingError: disconnected
        except taos.error.ProgrammingError as err:
            # print("Error type: {}, msg: {}, value: {}".format(type(err), err.msg, err))
815
            if ( err.msg == 'client disconnected' ): # cannot open DB connection
816 817 818 819 820
                print("Cannot establish DB connection, please re-run script without parameter, and follow the instructions.")
                sys.exit()
            else:
                raise            
        except:
S
Steven Li 已提交
821
            print("[=] Unexpected exception")
822
            raise        
823 824 825

        if resetDb :
            self._dbConn.resetDb() # drop and recreate DB            
826

827 828
        self._stateMachine = StateMechine(self._dbConn) # Do this after dbConn is in proper shape
        
829 830 831
    def getDbConn(self):
        return self._dbConn

832 833 834 835 836
    def getStateMachine(self):
        return self._stateMachine

    # def getState(self):
    #     return self._stateMachine.getCurrentState()
837 838 839 840 841 842

    # We aim to create a starting time tick, such that, whenever we run our test here once
    # We should be able to safely create 100,000 records, which will not have any repeated time stamp
    # when we re-run the test in 3 minutes (180 seconds), basically we should expand time duration
    # by a factor of 500.
    # TODO: what if it goes beyond 10 years into the future
843
    # TODO: fix the error as result of above: "tsdb timestamp is out of range"
844
    def setupLastTick(self):
845
        t1 = datetime.datetime(2020, 6, 1)
846
        t2 = datetime.datetime.now()
847 848
        elSec = int(t2.timestamp() - t1.timestamp()) # maybe a very large number, takes 69 years to exceed Python int range
        elSec2 = (  elSec % (8 * 12 * 30 * 24 * 60 * 60 / 500 ) ) * 500 # a number representing seconds within 10 years
849 850
        # print("elSec = {}".format(elSec))
        t3 = datetime.datetime(2012, 1, 1) # default "keep" is 10 years
851
        t4 = datetime.datetime.fromtimestamp( t3.timestamp() + elSec2) # see explanation above
852 853 854
        logger.info("Setting up TICKS to start from: {}".format(t4))
        return t4

S
Steven Li 已提交
855 856 857
    def pickAndAllocateTable(self): # pick any table, and "use" it
        return self.tableNumQueue.pickAndAllocate()

858 859 860 861 862
    def addTable(self):
        with self._lock:
            tIndex = self.tableNumQueue.push()
        return tIndex

863 864
    @classmethod
    def getFixedSuperTableName(cls):
865
        return "fs_table"
866

S
Steven Li 已提交
867 868 869
    def releaseTable(self, i): # return the table back, so others can use it
        self.tableNumQueue.release(i)

870
    def getNextTick(self):
871 872 873
        with self._lock: # prevent duplicate tick
            self._lastTick += datetime.timedelta(0, 1) # add one second to it
            return self._lastTick
874 875

    def getNextInt(self):
876 877 878
        with self._lock:
            self._lastInt += 1
            return self._lastInt
879 880

    def getNextBinary(self):
881
        return "Beijing_Shanghai_Los_Angeles_New_York_San_Francisco_Chicago_Beijing_Shanghai_Los_Angeles_New_York_San_Francisco_Chicago_{}".format(self.getNextInt())
882 883 884

    def getNextFloat(self):
        return 0.9 + self.getNextInt()
885
    
S
Steven Li 已提交
886
    def getTableNameToDelete(self):
887
        tblNum = self.tableNumQueue.pop() # TODO: race condition!
888 889 890
        if ( not tblNum ): # maybe false
            return False
        
S
Steven Li 已提交
891 892
        return "table_{}".format(tblNum)

893 894 895
    def cleanUp(self):
        self._dbConn.close()      

896 897 898 899
class TaskExecutor():
    def __init__(self, curStep):
        self._curStep = curStep

900 901 902
    def getCurStep(self):
        return self._curStep

903 904
    def execute(self, task: Task, wt: WorkerThread): # execute a task on a thread
        task.execute(wt)
905

906 907
    # def logInfo(self, msg):
    #     logger.info("    T[{}.x]: ".format(self._curStep) + msg)
908

909 910
    # def logDebug(self, msg):
    #     logger.debug("    T[{}.x]: ".format(self._curStep) + msg)
911

S
Steven Li 已提交
912
class Task():
913 914 915 916
    taskSn = 100

    @classmethod
    def allocTaskNum(cls):
S
Steven Li 已提交
917 918 919
        Task.taskSn += 1 # IMPORTANT: cannot use cls.taskSn, since each sub class will have a copy
        # logger.debug("Allocating taskSN: {}".format(Task.taskSn))
        return Task.taskSn
920

921
    def __init__(self, dbManager: DbManager, execStats: ExecutionStats):        
922
        self._dbManager = dbManager
923
        self._workerThread = None 
924
        self._err = None
925
        self._curStep = None
926
        self._numRows = None # Number of rows affected
927 928 929

        # Assign an incremental task serial number        
        self._taskNum = self.allocTaskNum()
S
Steven Li 已提交
930
        # logger.debug("Creating new task {}...".format(self._taskNum))
931

932 933
        self._execStats = execStats

934 935
    def isSuccess(self):
        return self._err == None
936

937
    def clone(self): # TODO: why do we need this again?
938
        newTask = self.__class__(self._dbManager, self._execStats)
939 940 941
        return newTask

    def logDebug(self, msg):
S
Steven Li 已提交
942
        self._workerThread.logDebug("Step[{}.{}] {}".format(self._curStep, self._taskNum, msg))
943 944

    def logInfo(self, msg):
S
Steven Li 已提交
945
        self._workerThread.logInfo("Step[{}.{}] {}".format(self._curStep, self._taskNum, msg))
946

947
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
948
        raise RuntimeError("To be implemeted by child classes, class name: {}".format(self.__class__.__name__))
949

950 951
    def execute(self, wt: WorkerThread):
        wt.verifyThreadSelf()
952
        self._workerThread = wt # type: ignore
953 954

        te = wt.getTaskExecutor()
955 956
        self._curStep = te.getCurStep()
        self.logDebug("[-] executing task {}...".format(self.__class__.__name__))
957 958

        self._err = None
959
        self._execStats.beginTaskType(self.__class__.__name__) # mark beginning
960 961 962
        try:
            self._executeInternal(te, wt) # TODO: no return value?
        except taos.error.ProgrammingError as err:
963
            self.logDebug("[=] Taos library exception: errno={:X}, msg: {}".format(err.errno, err))
964
            self._err = err           
965
        except:
S
Steven Li 已提交
966
            self.logDebug("[=] Unexpected exception")
967
            raise
968
        self._execStats.endTaskType(self.__class__.__name__, self.isSuccess())
969
        
970 971
        self.logDebug("[X] task execution completed, {}, status: {}".format(self.__class__.__name__, "Success" if self.isSuccess() else "Failure"))        
        self._execStats.incExecCount(self.__class__.__name__, self.isSuccess()) # TODO: merge with above.
S
Steven Li 已提交
972

973
    def execSql(self, sql):
974
        return self._dbManager.execute(sql)
975

976
                  
977
class ExecutionStats:
978 979 980 981 982
    def __init__(self):
        self._execTimes: Dict[str, [int, int]] = {} # total/success times for a task
        self._tasksInProgress = 0
        self._lock = threading.Lock()
        self._firstTaskStartTime = None
983 984
        self._execStartTime = None
        self._elapsedTime = 0.0 # total elapsed time
985 986
        self._accRunTime = 0.0 # accumulated run time

987 988 989 990 991 992 993 994 995
        self._failed = False
        self._failureReason = None

    def startExec(self):
        self._execStartTime = time.time()

    def endExec(self):
        self._elapsedTime = time.time() - self._execStartTime

996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016
    def incExecCount(self, klassName, isSuccess): # TODO: add a lock here
        if klassName not in self._execTimes:
            self._execTimes[klassName] = [0, 0]
        t = self._execTimes[klassName] # tuple for the data
        t[0] += 1 # index 0 has the "total" execution times
        if isSuccess:
            t[1] += 1 # index 1 has the "success" execution times

    def beginTaskType(self, klassName):
        with self._lock:
            if self._tasksInProgress == 0 : # starting a new round
                self._firstTaskStartTime = time.time() # I am now the first task
            self._tasksInProgress += 1

    def endTaskType(self, klassName, isSuccess):
        with self._lock:
            self._tasksInProgress -= 1
            if self._tasksInProgress == 0 : # all tasks have stopped
                self._accRunTime += (time.time() - self._firstTaskStartTime)
                self._firstTaskStartTime = None

1017 1018 1019 1020
    def registerFailure(self, reason):
        self._failed = True
        self._failureReason = reason

1021
    def logStats(self):
1022 1023 1024 1025
        logger.info("----------------------------------------------------------------------")
        logger.info("| Crash_Gen test {}, with the following stats:".
            format("FAILED (reason: {})".format(self._failureReason) if self._failed else "SUCCEEDED"))
        logger.info("| Task Execution Times (success/total):")
1026 1027
        execTimesAny = 0
        for k, n in self._execTimes.items():            
1028
            execTimesAny += n[0]
1029
            logger.info("|    {0:<24}: {1}/{2}".format(k,n[1],n[0]))
1030
                
1031 1032 1033 1034 1035 1036 1037
        logger.info("| Total Tasks Executed (success or not): {} ".format(execTimesAny))
        logger.info("| Total Tasks In Progress at End: {}".format(self._tasksInProgress))
        logger.info("| Total Task Busy Time (elapsed time when any task is in progress): {:.3f} seconds".format(self._accRunTime))
        logger.info("| Average Per-Task Execution Time: {:.3f} seconds".format(self._accRunTime/execTimesAny))
        logger.info("| Total Elapsed Time (from wall clock): {:.3f} seconds".format(self._elapsedTime))
        logger.info("----------------------------------------------------------------------")
        
1038 1039 1040 1041 1042 1043 1044


class StateTransitionTask(Task):
    @classmethod
    def getInfo(cls): # each sub class should supply their own information
        raise RuntimeError("Overriding method expected")

1045 1046 1047 1048 1049
    _endState = None 
    @classmethod
    def getEndState(cls): # TODO: optimize by calling it fewer times
        raise RuntimeError("Overriding method expected")

1050 1051 1052
    # @classmethod
    # def getBeginStates(cls):
    #     return cls.getInfo()[0]
1053

1054 1055 1056
    # @classmethod
    # def getEndState(cls): # returning the class name
    #     return cls.getInfo()[0]
1057 1058

    @classmethod
1059 1060 1061
    def canBeginFrom(cls, state: AnyState):
        # return state.getValue() in cls.getBeginStates()
        raise RuntimeError("must be overriden")
1062 1063 1064 1065

    def execute(self, wt: WorkerThread):
        super().execute(wt)
        
1066
class TaskCreateDb(StateTransitionTask):
1067
    @classmethod
1068 1069
    def getEndState(cls):
        return StateDbOnly() 
1070

1071 1072 1073 1074
    @classmethod
    def canBeginFrom(cls, state: AnyState):
        return state.canCreateDb()

1075
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
1076
        wt.execSql("create database db")       
1077

1078
class TaskDropDb(StateTransitionTask):
1079
    @classmethod
1080 1081
    def getEndState(cls):
        return StateEmpty()
1082

1083 1084 1085 1086
    @classmethod
    def canBeginFrom(cls, state: AnyState):
        return state.canDropDb()

1087 1088
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
        wt.execSql("drop database db")
S
Steven Li 已提交
1089
        logger.debug("[OPS] database dropped at {}".format(time.time()))
1090

1091
class TaskCreateSuperTable(StateTransitionTask):
1092
    @classmethod
1093 1094
    def getEndState(cls):
        return StateSuperTableOnly()
1095

1096 1097
    @classmethod
    def canBeginFrom(cls, state: AnyState):
1098
        return state.canCreateFixedSuperTable()
1099

1100
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
1101
        tblName = self._dbManager.getFixedSuperTableName()        
1102
        wt.execSql("create table db.{} (ts timestamp, speed int) tags (b binary(200), f float) ".format(tblName))
1103 1104
        # No need to create the regular tables, INSERT will do that automatically

S
Steven Li 已提交
1105

1106
class TaskReadData(StateTransitionTask):
1107
    @classmethod
1108 1109
    def getEndState(cls):
        return None # meaning doesn't affect state
1110

1111 1112 1113 1114
    @classmethod
    def canBeginFrom(cls, state: AnyState):
        return state.canReadData()

1115
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
1116
        sTbName = self._dbManager.getFixedSuperTableName()        
1117 1118
        dbc = wt.getDbConn()
        dbc.query("select TBNAME from db.{}".format(sTbName)) # TODO: analyze result set later
1119 1120 1121 1122 1123 1124 1125 1126
        if random.randrange(5) == 0 : # 1 in 5 chance, simulate a broken connection. TODO: break connection in all situations
            dbc.close()
            dbc.open()
        else:
            rTables = dbc.getQueryResult()
            # print("rTables[0] = {}, type = {}".format(rTables[0], type(rTables[0])))
            for rTbName in rTables : # regular tables
                dbc.query("select * from db.{}".format(rTbName[0])) # TODO: check success failure
1127

1128 1129
        # tdSql.query(" cars where tbname in ('carzero', 'carone')")

1130
class TaskDropSuperTable(StateTransitionTask):
1131
    @classmethod
1132 1133
    def getEndState(cls):
        return StateDbOnly() 
1134

1135 1136
    @classmethod
    def canBeginFrom(cls, state: AnyState):
1137
        return state.canDropFixedSuperTable()
1138

1139
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
1140
        tblName = self._dbManager.getFixedSuperTableName()        
1141 1142
        wt.execSql("drop table db.{}".format(tblName))

1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163
class TaskAlterTags(StateTransitionTask):
    @classmethod
    def getEndState(cls):
        return None # meaning doesn't affect state

    @classmethod
    def canBeginFrom(cls, state: AnyState):
        return state.canDropFixedSuperTable() # if we can drop it, we can alter tags

    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
        tblName = self._dbManager.getFixedSuperTableName()   
        dice = Dice.throw(4)
        if dice == 0 :
            wt.execSql("alter table db.{} add tag extraTag int".format(tblName))
        elif dice == 1 :
            wt.execSql("alter table db.{} drop tag extraTag".format(tblName))
        elif dice == 2 :
            wt.execSql("alter table db.{} drop tag newTag".format(tblName))
        else: # dice == 3
            wt.execSql("alter table db.{} change tag extraTag newTag".format(tblName))

1164
class TaskAddData(StateTransitionTask):
1165
    activeTable : Set[int] = set() # Track which table is being actively worked on
1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183
    LARGE_NUMBER_OF_TABLES = 35
    SMALL_NUMBER_OF_TABLES = 3
    LARGE_NUMBER_OF_RECORDS = 50
    SMALL_NUMBER_OF_RECORDS = 3

    # We use these two files to record operations to DB, useful for power-off tests
    fAddLogReady = None
    fAddLogDone = None

    @classmethod
    def prepToRecordOps(cls):
        if gConfig.record_ops :            
            if ( cls.fAddLogReady == None ):
                logger.info("Recording in a file operations to be performed...")
                cls.fAddLogReady = open("add_log_ready.txt", "w")
            if ( cls.fAddLogDone == None ):
                logger.info("Recording in a file operations completed...")
                cls.fAddLogDone = open("add_log_done.txt", "w")
1184

1185
    @classmethod
1186 1187
    def getEndState(cls):
        return StateHasData()
1188 1189 1190 1191

    @classmethod
    def canBeginFrom(cls, state: AnyState):
        return state.canAddData()
1192 1193
        
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
1194
        ds = self._dbManager
1195
        wt.execSql("use db") # TODO: seems to be an INSERT bug to require this
1196
        tblSeq = list(range(self.LARGE_NUMBER_OF_TABLES if gConfig.larger_data else self.SMALL_NUMBER_OF_TABLES)) 
1197 1198 1199 1200 1201 1202 1203 1204 1205
        random.shuffle(tblSeq) 
        for i in tblSeq: 
            if ( i in self.activeTable ): # wow already active
                # logger.info("Concurrent data insertion into table: {}".format(i))      
                # print("ct({})".format(i), end="", flush=True) # Concurrent insertion into table
                print("x", end="", flush=True)
            else:
                self.activeTable.add(i) # marking it active
            # No need to shuffle data sequence, unless later we decide to do non-increment insertion            
1206 1207 1208 1209 1210 1211 1212 1213 1214 1215
            for j in range(self.LARGE_NUMBER_OF_RECORDS if gConfig.larger_data else self.SMALL_NUMBER_OF_RECORDS) : # number of records per table
                nextInt = ds.getNextInt()
                regTableName = "db.reg_table_{}".format(i)
                if gConfig.record_ops:
                    self.prepToRecordOps()
                    self.fAddLogReady.write("Ready to write {} to {}\n".format(nextInt, regTableName))
                    self.fAddLogReady.flush()
                    os.fsync(self.fAddLogReady)
                sql = "insert into {} using {} tags ('{}', {}) values ('{}', {});".format(
                    regTableName, 
1216 1217
                    ds.getFixedSuperTableName(), 
                    ds.getNextBinary(), ds.getNextFloat(),
1218
                    ds.getNextTick(), nextInt)
1219
                wt.execSql(sql) 
1220 1221 1222 1223
                if gConfig.record_ops:
                    self.fAddLogDone.write("Wrote {} to {}\n".format(nextInt, regTableName))
                    self.fAddLogDone.flush()
                    os.fsync(self.fAddLogDone)
1224
            self.activeTable.discard(i) # not raising an error, unlike remove
1225 1226


S
Steven Li 已提交
1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248
# Deterministic random number generator
class Dice():
    seeded = False # static, uninitialized

    @classmethod
    def seed(cls, s): # static
        if (cls.seeded):
            raise RuntimeError("Cannot seed the random generator more than once")
        cls.verifyRNG()
        random.seed(s)
        cls.seeded = True  # TODO: protect against multi-threading

    @classmethod
    def verifyRNG(cls): # Verify that the RNG is determinstic
        random.seed(0)
        x1 = random.randrange(0, 1000)
        x2 = random.randrange(0, 1000)
        x3 = random.randrange(0, 1000)
        if ( x1 != 864 or x2!=394 or x3!=776 ):
            raise RuntimeError("System RNG is not deterministic")

    @classmethod
1249 1250
    def throw(cls, stop): # get 0 to stop-1
        return cls.throwRange(0, stop)
S
Steven Li 已提交
1251 1252

    @classmethod
1253
    def throwRange(cls, start, stop): # up to stop-1
S
Steven Li 已提交
1254 1255
        if ( not cls.seeded ):
            raise RuntimeError("Cannot throw dice before seeding it")
1256
        return random.randrange(start, stop)
S
Steven Li 已提交
1257 1258 1259


# Anyone needing to carry out work should simply come here
1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281
# class WorkDispatcher():
#     def __init__(self, dbState):
#         # self.totalNumMethods = 2
#         self.tasks = [
#             # CreateTableTask(dbState), # Obsolete
#             # DropTableTask(dbState),
#             # AddDataTask(dbState),
#         ]

#     def throwDice(self):
#         max = len(self.tasks) - 1 
#         dRes = random.randint(0, max)
#         # logger.debug("Threw the dice in range [{},{}], and got: {}".format(0,max,dRes))
#         return dRes

#     def pickTask(self):
#         dice = self.throwDice()
#         return self.tasks[dice]

#     def doWork(self, workerThread):
#         task = self.pickTask()
#         task.execute(workerThread)
S
Steven Li 已提交
1282

S
Steven Li 已提交
1283 1284
class LoggingFilter(logging.Filter):
    def filter(self, record: logging.LogRecord):
S
Steven Li 已提交
1285 1286 1287
        if ( record.levelno >= logging.INFO ) :
            return True # info or above always log

S
Steven Li 已提交
1288 1289 1290
        msg = record.msg
        # print("type = {}, value={}".format(type(msg), msg))
        # sys.exit()
S
Steven Li 已提交
1291 1292 1293 1294 1295

        # Commenting out below to adjust...

        # if msg.startswith("[TRD]"):
        #     return False
S
Steven Li 已提交
1296 1297 1298 1299
        return True

        

1300
def main():
1301
    # Super cool Python argument library: https://docs.python.org/3/library/argparse.html
1302 1303 1304 1305 1306 1307 1308 1309 1310
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description=textwrap.dedent('''\
            TDengine Auto Crash Generator (PLEASE NOTICE the Prerequisites Below)
            ---------------------------------------------------------------------
            1. You build TDengine in the top level ./build directory, as described in offical docs
            2. You run the server there before this script: ./build/bin/taosd -c test/cfg

            '''))
1311 1312
    parser.add_argument('-d', '--debug', action='store_true',                        
                        help='Turn on DEBUG mode for more logging (default: false)')
1313 1314 1315 1316
    parser.add_argument('-l', '--larger-data', action='store_true',                        
                        help='Write larger amount of data during write operations (default: false)')
    parser.add_argument('-p', '--per-thread-db-connection', action='store_true',                        
                        help='Use a single shared db connection (default: false)')
1317 1318
    parser.add_argument('-r', '--record-ops', action='store_true',                        
                        help='Use a pair of always-fsynced fils to record operations performing + performed, for power-off tests (default: false)')                    
1319 1320 1321 1322
    parser.add_argument('-s', '--max-steps', action='store', default=100, type=int,
                        help='Maximum number of steps to run (default: 100)')
    parser.add_argument('-t', '--num-threads', action='store', default=10, type=int,
                        help='Number of threads to run (default: 10)')
1323

1324
    global gConfig
1325
    gConfig = parser.parse_args()
1326 1327 1328
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit()
1329

1330
    global logger
1331
    logger = logging.getLogger('CrashGen')
S
Steven Li 已提交
1332
    logger.addFilter(LoggingFilter())
1333 1334
    if ( gConfig.debug ):
        logger.setLevel(logging.DEBUG) # default seems to be INFO        
S
Steven Li 已提交
1335 1336
    else:
        logger.setLevel(logging.INFO)
S
Steven Li 已提交
1337 1338 1339
    ch = logging.StreamHandler()
    logger.addHandler(ch)

1340 1341
    # resetDb = False # DEBUG only
    # dbState = DbState(resetDb)  # DBEUG only!
1342
    dbManager = DbManager() # Regular function
1343 1344
    Dice.seed(0) # initial seeding of dice
    tc = ThreadCoordinator(
1345
        ThreadPool(gConfig.num_threads, gConfig.max_steps), 
1346
        # WorkDispatcher(dbState), # Obsolete?
1347
        dbManager
1348
        )
S
Steven Li 已提交
1349

1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384
    # # Hack to exercise reading from disk, imcreasing coverage. TODO: fix
    # dbc = dbState.getDbConn()
    # sTbName = dbState.getFixedSuperTableName()   
    # dbc.execute("create database if not exists db")
    # if not dbState.getState().equals(StateEmpty()):
    #     dbc.execute("use db")     

    # rTables = None
    # try: # the super table may not exist
    #     sql = "select TBNAME from db.{}".format(sTbName)
    #     logger.info("Finding out tables in super table: {}".format(sql))
    #     dbc.query(sql) # TODO: analyze result set later
    #     logger.info("Fetching result")
    #     rTables = dbc.getQueryResult()
    #     logger.info("Result: {}".format(rTables))
    # except taos.error.ProgrammingError as err:
    #     logger.info("Initial Super table OPS error: {}".format(err))
    
    # # sys.exit()
    # if ( not rTables == None):
    #     # print("rTables[0] = {}, type = {}".format(rTables[0], type(rTables[0])))
    #     try:
    #         for rTbName in rTables : # regular tables
    #             ds = dbState
    #             logger.info("Inserting into table: {}".format(rTbName[0]))
    #             sql = "insert into db.{} values ('{}', {});".format(
    #                 rTbName[0],                    
    #                 ds.getNextTick(), ds.getNextInt())
    #             dbc.execute(sql)
    #         for rTbName in rTables : # regular tables        
    #             dbc.query("select * from db.{}".format(rTbName[0])) # TODO: check success failure
    #         logger.info("Initial READING operation is successful")       
    #     except taos.error.ProgrammingError as err:
    #         logger.info("Initial WRITE/READ error: {}".format(err))   
    
1385 1386
    

S
Steven Li 已提交
1387 1388 1389 1390 1391
    # Sandbox testing code
    # dbc = dbState.getDbConn()
    # while True:
    #     rows = dbc.query("show databases") 
    #     print("Rows: {}, time={}".format(rows, time.time()))
1392
    
1393
    tc.run()
1394
    tc.logStats()
1395
    dbManager.cleanUp()    
1396
    
S
Steven Li 已提交
1397
    # logger.info("Crash_Gen execution finished")
1398 1399 1400

if __name__ == "__main__":
    main()