crash_gen.py 86.9 KB
Newer Older
S
Shuduo Sang 已提交
1
# -----!/usr/bin/python3.7
S
Steven Li 已提交
2 3 4 5 6 7 8 9 10 11 12 13
###################################################################
#           Copyright (c) 2016 by TAOS Technologies, Inc.
#                     All rights reserved.
#
#  This file is proprietary and confidential to TAOS Technologies.
#  No part of this file may be reproduced, stored, transmitted,
#  disclosed or used in any form or by any means other than as
#  expressly provided by the written permission from Jianhui Tao
#
###################################################################

# -*- coding: utf-8 -*-
S
Shuduo Sang 已提交
14 15 16
# For type hinting before definition, ref:
# https://stackoverflow.com/questions/33533148/how-do-i-specify-that-the-return-type-of-a-method-is-the-same-as-the-class-itsel
from __future__ import annotations
17

S
Shuduo Sang 已提交
18 19 20
from typing import Set
from typing import Dict
from typing import List
21

S
Shuduo Sang 已提交
22 23
import textwrap
import time
24
import datetime
S
Shuduo Sang 已提交
25
import random
26
import logging
S
Shuduo Sang 已提交
27 28 29 30
import threading
import copy
import argparse
import getopt
31

S
Steven Li 已提交
32
import sys
33
import os
34
import signal
35
import traceback
36 37 38
import resource
from guppy import hpy
import gc
39

40 41
from .service_manager import ServiceManager, TdeInstance
from .misc import Logging, Status, CrashGenError, Dice, Helper, Progress
42 43 44 45
from .db import DbConn, MyTDSql, DbConnNative, DbManager

import taos
import requests
46

47 48 49 50
# Require Python 3
if sys.version_info[0] < 3:
    raise Exception("Must be using Python 3")

S
Shuduo Sang 已提交
51
# Global variables, tried to keep a small number.
52 53 54

# Command-line/Environment Configurations, will set a bit later
# ConfigNameSpace = argparse.Namespace
55 56
gConfig:    argparse.Namespace 
gSvcMgr:    ServiceManager # TODO: refactor this hack, use dep injection
57
# logger:     logging.Logger
58
gContainer: Container
S
Steven Li 已提交
59

60 61
# def runThread(wt: WorkerThread):
#     wt.run()
62

63

S
Steven Li 已提交
64
class WorkerThread:
65
    def __init__(self, pool: ThreadPool, tid, tc: ThreadCoordinator,
S
Shuduo Sang 已提交
66 67 68
                 # te: TaskExecutor,
                 ):  # note: main thread context!
        # self._curStep = -1
69
        self._pool = pool
S
Shuduo Sang 已提交
70 71
        self._tid = tid
        self._tc = tc  # type: ThreadCoordinator
S
Steven Li 已提交
72
        # self.threadIdent = threading.get_ident()
73 74
        # self._thread = threading.Thread(target=runThread, args=(self,))
        self._thread = threading.Thread(target=self.run)
75
        self._stepGate = threading.Event()
S
Steven Li 已提交
76

77
        # Let us have a DB connection of our own
S
Shuduo Sang 已提交
78
        if (gConfig.per_thread_db_connection):  # type: ignore
79
            # print("connector_type = {}".format(gConfig.connector_type))
80 81 82
            tInst = gContainer.defTdeInstance
            if gConfig.connector_type == 'native':                
                self._dbConn = DbConn.createNative(tInst.getDbTarget()) 
83
            elif gConfig.connector_type == 'rest':
84
                self._dbConn = DbConn.createRest(tInst.getDbTarget()) 
85 86 87 88 89 90 91
            elif gConfig.connector_type == 'mixed':
                if Dice.throw(2) == 0: # 1/2 chance
                    self._dbConn = DbConn.createNative() 
                else:
                    self._dbConn = DbConn.createRest() 
            else:
                raise RuntimeError("Unexpected connector type: {}".format(gConfig.connector_type))
92

93
        # self._dbInUse = False  # if "use db" was executed already
94

95
    def logDebug(self, msg):
96
        Logging.debug("    TRD[{}] {}".format(self._tid, msg))
97 98

    def logInfo(self, msg):
99
        Logging.info("    TRD[{}] {}".format(self._tid, msg))
100

101 102
    # def dbInUse(self):
    #     return self._dbInUse
103

104 105 106 107
    # def useDb(self):
    #     if (not self._dbInUse):
    #         self.execSql("use db")
    #     self._dbInUse = True
108

109
    def getTaskExecutor(self):
S
Shuduo Sang 已提交
110
        return self._tc.getTaskExecutor()
111

S
Steven Li 已提交
112
    def start(self):
113
        self._thread.start()  # AFTER the thread is recorded
S
Steven Li 已提交
114

S
Shuduo Sang 已提交
115
    def run(self):
S
Steven Li 已提交
116
        # initialization after thread starts, in the thread context
117
        # self.isSleeping = False
118
        Logging.info("Starting to run thread: {}".format(self._tid))
119

S
Shuduo Sang 已提交
120
        if (gConfig.per_thread_db_connection):  # type: ignore
121
            Logging.debug("Worker thread openning database connection")
122
            self._dbConn.open()
S
Steven Li 已提交
123

S
Shuduo Sang 已提交
124 125
        self._doTaskLoop()

126
        # clean up
S
Shuduo Sang 已提交
127
        if (gConfig.per_thread_db_connection):  # type: ignore
128 129 130
            if self._dbConn.isOpen: #sometimes it is not open
                self._dbConn.close()
            else:
131
                Logging.warning("Cleaning up worker thread, dbConn already closed")
132

S
Shuduo Sang 已提交
133
    def _doTaskLoop(self):
134 135
        # while self._curStep < self._pool.maxSteps:
        # tc = ThreadCoordinator(None)
S
Shuduo Sang 已提交
136 137
        while True:
            tc = self._tc  # Thread Coordinator, the overall master
138 139 140
            try:
                tc.crossStepBarrier()  # shared barrier first, INCLUDING the last one
            except threading.BrokenBarrierError as err: # main thread timed out
141
                print("_bto", end="")
142
                Logging.debug("[TRD] Worker thread exiting due to main thread barrier time-out")
143 144
                break

145
            Logging.debug("[TRD] Worker thread [{}] exited barrier...".format(self._tid))
146
            self.crossStepGate()   # then per-thread gate, after being tapped
147
            Logging.debug("[TRD] Worker thread [{}] exited step gate...".format(self._tid))
148
            if not self._tc.isRunning():
149
                print("_wts", end="")
150
                Logging.debug("[TRD] Thread Coordinator not running any more, worker thread now stopping...")
151 152
                break

153
            # Before we fetch the task and run it, let's ensure we properly "use" the database (not needed any more)
154
            try:
155 156 157
                if (gConfig.per_thread_db_connection):  # most likely TRUE
                    if not self._dbConn.isOpen:  # might have been closed during server auto-restart
                        self._dbConn.open()
158
                # self.useDb() # might encounter exceptions. TODO: catch
159 160
            except taos.error.ProgrammingError as err:
                errno = Helper.convertErrno(err.errno)
161
                if errno in [0x383, 0x386, 0x00B, 0x014]  : # invalid database, dropping, Unable to establish connection, Database not ready
162 163 164 165 166 167
                    # ignore
                    dummy = 0
                else:
                    print("\nCaught programming error. errno=0x{:X}, msg={} ".format(errno, err.msg))
                    raise

168
            # Fetch a task from the Thread Coordinator
169
            Logging.debug( "[TRD] Worker thread [{}] about to fetch task".format(self._tid))
170
            task = tc.fetchTask()
171 172

            # Execute such a task
173
            Logging.debug("[TRD] Worker thread [{}] about to execute task: {}".format(
S
Shuduo Sang 已提交
174
                    self._tid, task.__class__.__name__))
175
            task.execute(self)
176
            tc.saveExecutedTask(task)
177
            Logging.debug("[TRD] Worker thread [{}] finished executing task".format(self._tid))
S
Shuduo Sang 已提交
178

179
            # self._dbInUse = False  # there may be changes between steps
180
        # print("_wtd", end=None) # worker thread died
181

S
Shuduo Sang 已提交
182 183
    def verifyThreadSelf(self):  # ensure we are called by this own thread
        if (threading.get_ident() != self._thread.ident):
S
Steven Li 已提交
184 185
            raise RuntimeError("Unexpectly called from other threads")

S
Shuduo Sang 已提交
186 187
    def verifyThreadMain(self):  # ensure we are called by the main thread
        if (threading.get_ident() != threading.main_thread().ident):
S
Steven Li 已提交
188 189 190
            raise RuntimeError("Unexpectly called from other threads")

    def verifyThreadAlive(self):
S
Shuduo Sang 已提交
191
        if (not self._thread.is_alive()):
S
Steven Li 已提交
192 193
            raise RuntimeError("Unexpected dead thread")

194
    # A gate is different from a barrier in that a thread needs to be "tapped"
S
Steven Li 已提交
195 196
    def crossStepGate(self):
        self.verifyThreadAlive()
S
Shuduo Sang 已提交
197 198
        self.verifyThreadSelf()  # only allowed by ourselves

199
        # Wait again at the "gate", waiting to be "tapped"
200
        Logging.debug(
S
Shuduo Sang 已提交
201 202 203
            "[TRD] Worker thread {} about to cross the step gate".format(
                self._tid))
        self._stepGate.wait()
204
        self._stepGate.clear()
S
Shuduo Sang 已提交
205

206
        # self._curStep += 1  # off to a new step...
S
Steven Li 已提交
207

S
Shuduo Sang 已提交
208
    def tapStepGate(self):  # give it a tap, release the thread waiting there
209
        # self.verifyThreadAlive()
S
Shuduo Sang 已提交
210 211
        self.verifyThreadMain()  # only allowed for main thread

212
        if self._thread.is_alive():
213
            Logging.debug("[TRD] Tapping worker thread {}".format(self._tid))
214 215 216 217
            self._stepGate.set()  # wake up!
            time.sleep(0)  # let the released thread run a bit
        else:
            print("_tad", end="") # Thread already dead
218

S
Shuduo Sang 已提交
219
    def execSql(self, sql):  # TODO: expose DbConn directly
220
        return self.getDbConn().execute(sql)
221

S
Shuduo Sang 已提交
222
    def querySql(self, sql):  # TODO: expose DbConn directly
223
        return self.getDbConn().query(sql)
224 225

    def getQueryResult(self):
226
        return self.getDbConn().getQueryResult()
227

228
    def getDbConn(self) -> DbConn :
S
Shuduo Sang 已提交
229 230
        if (gConfig.per_thread_db_connection):
            return self._dbConn
231
        else:
232
            return self._tc.getDbManager().getDbConn()
233

234 235
    # def querySql(self, sql): # not "execute", since we are out side the DB context
    #     if ( gConfig.per_thread_db_connection ):
S
Shuduo Sang 已提交
236
    #         return self._dbConn.query(sql)
237 238
    #     else:
    #         return self._tc.getDbState().getDbConn().query(sql)
239

240
# The coordinator of all worker threads, mostly running in main thread
S
Shuduo Sang 已提交
241 242


243
class ThreadCoordinator:
244
    WORKER_THREAD_TIMEOUT = 180 # one minute
245

246
    def __init__(self, pool: ThreadPool, dbManager: DbManager):
S
Shuduo Sang 已提交
247
        self._curStep = -1  # first step is 0
248
        self._pool = pool
249
        # self._wd = wd
S
Shuduo Sang 已提交
250
        self._te = None  # prepare for every new step
251
        self._dbManager = dbManager
S
Shuduo Sang 已提交
252 253
        self._executedTasks: List[Task] = []  # in a given step
        self._lock = threading.RLock()  # sync access for a few things
S
Steven Li 已提交
254

S
Shuduo Sang 已提交
255 256
        self._stepBarrier = threading.Barrier(
            self._pool.numThreads + 1)  # one barrier for all threads
257
        self._execStats = ExecutionStats()
258
        self._runStatus = Status.STATUS_RUNNING
259
        self._initDbs()
S
Steven Li 已提交
260

261 262 263
    def getTaskExecutor(self):
        return self._te

S
Shuduo Sang 已提交
264
    def getDbManager(self) -> DbManager:
265
        return self._dbManager
266

267 268
    def crossStepBarrier(self, timeout=None):
        self._stepBarrier.wait(timeout) 
269

270
    def requestToStop(self):
271
        self._runStatus = Status.STATUS_STOPPING
272 273
        self._execStats.registerFailure("User Interruption")

274
    def _runShouldEnd(self, transitionFailed, hasAbortedTask, workerTimeout):
275 276 277
        maxSteps = gConfig.max_steps  # type: ignore
        if self._curStep >= (maxSteps - 1): # maxStep==10, last curStep should be 9
            return True
278
        if self._runStatus != Status.STATUS_RUNNING:
279 280 281 282 283
            return True
        if transitionFailed:
            return True
        if hasAbortedTask:
            return True
284 285
        if workerTimeout:
            return True
286 287 288 289 290 291 292 293 294 295 296 297 298
        return False

    def _hasAbortedTask(self): # from execution of previous step
        for task in self._executedTasks:
            if task.isAborted():
                # print("Task aborted: {}".format(task))
                # hasAbortedTask = True
                return True
        return False

    def _releaseAllWorkerThreads(self, transitionFailed):
        self._curStep += 1  # we are about to get into next step. TODO: race condition here!
        # Now not all threads had time to go to sleep
299
        Logging.debug(
300 301 302 303 304 305 306
            "--\r\n\n--> Step {} starts with main thread waking up".format(self._curStep))

        # A new TE for the new step
        self._te = None # set to empty first, to signal worker thread to stop
        if not transitionFailed:  # only if not failed
            self._te = TaskExecutor(self._curStep)

307
        Logging.debug("[TRD] Main thread waking up at step {}, tapping worker threads".format(
308 309
                self._curStep))  # Now not all threads had time to go to sleep
        # Worker threads will wake up at this point, and each execute it's own task
310
        self.tapAllThreads() # release all worker thread from their "gates"
311 312 313 314 315

    def _syncAtBarrier(self):
         # Now main thread (that's us) is ready to enter a step
        # let other threads go past the pool barrier, but wait at the
        # thread gate
316
        Logging.debug("[TRD] Main thread about to cross the barrier")
317
        self.crossStepBarrier(timeout=self.WORKER_THREAD_TIMEOUT)
318
        self._stepBarrier.reset()  # Other worker threads should now be at the "gate"
319
        Logging.debug("[TRD] Main thread finished crossing the barrier")
320 321 322 323

    def _doTransition(self):
        transitionFailed = False
        try:
324 325 326
            for x in self._dbs:
                db = x # type: Database
                sm = db.getStateMachine()
327
                Logging.debug("[STT] starting transitions for DB: {}".format(db.getName()))
328 329 330
                # at end of step, transiton the DB state
                tasksForDb = db.filterTasks(self._executedTasks)
                sm.transition(tasksForDb, self.getDbManager().getDbConn())
331
                Logging.debug("[STT] transition ended for DB: {}".format(db.getName()))
332 333

            # Due to limitation (or maybe not) of the TD Python library,
334
            # we cannot share connections across threads
335 336 337 338
            # Here we are in main thread, we cannot operate the connections created in workers
            # Moving below to task loop
            # if sm.hasDatabase():
            #     for t in self._pool.threadList:
339
            #         Logging.debug("[DB] use db for all worker threads")
340
            #         t.useDb()
341 342
                    # t.execSql("use db") # main thread executing "use
                    # db" on behalf of every worker thread
343

344 345
        except taos.error.ProgrammingError as err:
            if (err.msg == 'network unavailable'):  # broken DB connection
346
                Logging.info("DB connection broken, execution failed")
347 348 349 350 351 352 353 354
                traceback.print_stack()
                transitionFailed = True
                self._te = None  # Not running any more
                self._execStats.registerFailure("Broken DB Connection")
                # continue # don't do that, need to tap all threads at
                # end, and maybe signal them to stop
            else:
                raise
S
Steven Li 已提交
355
        # return transitionFailed # Why did we have this??!!
356 357 358

        self.resetExecutedTasks()  # clear the tasks after we are done
        # Get ready for next step
359
        Logging.debug("<-- Step {} finished, trasition failed = {}".format(self._curStep, transitionFailed))
360 361
        return transitionFailed

S
Shuduo Sang 已提交
362
    def run(self):
363
        self._pool.createAndStartThreads(self)
S
Steven Li 已提交
364 365

        # Coordinate all threads step by step
S
Shuduo Sang 已提交
366
        self._curStep = -1  # not started yet
367
        
S
Shuduo Sang 已提交
368
        self._execStats.startExec()  # start the stop watch
369 370
        transitionFailed = False
        hasAbortedTask = False
371 372
        workerTimeout = False
        while not self._runShouldEnd(transitionFailed, hasAbortedTask, workerTimeout):
373 374 375
            if not gConfig.debug: # print this only if we are not in debug mode    
                Progress.emit(Progress.STEP_BOUNDARY)            
                # print(".", end="", flush=True)
376 377 378 379 380 381 382 383
            # if (self._curStep % 2) == 0: # print memory usage once every 10 steps
            #     memUsage = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
            #     print("[m:{}]".format(memUsage), end="", flush=True) # print memory usage
            # if (self._curStep % 10) == 3: 
            #     h = hpy()
            #     print("\n")        
            #     print(h.heap())
            
384
                        
385 386
            try:
                self._syncAtBarrier() # For now just cross the barrier
387
                Progress.emit(Progress.END_THREAD_STEP)
388
            except threading.BrokenBarrierError as err:
389
                Logging.info("Main loop aborted, caused by worker thread time-out")
390 391 392 393 394 395
                self._execStats.registerFailure("Aborted due to worker thread timeout")
                print("\n\nWorker Thread time-out detected, important thread info:")
                ts = ThreadStacks()
                ts.print(filterInternal=True)
                workerTimeout = True
                break
396 397

            # At this point, all threads should be pass the overall "barrier" and before the per-thread "gate"
S
Shuduo Sang 已提交
398 399
            # We use this period to do house keeping work, when all worker
            # threads are QUIET.
400 401
            hasAbortedTask = self._hasAbortedTask() # from previous step
            if hasAbortedTask: 
402
                Logging.info("Aborted task encountered, exiting test program")
403
                self._execStats.registerFailure("Aborted Task Encountered")
404
                break # do transition only if tasks are error free
S
Shuduo Sang 已提交
405

406
            # Ending previous step
407 408 409 410
            try:
                transitionFailed = self._doTransition() # To start, we end step -1 first
            except taos.error.ProgrammingError as err:
                transitionFailed = True
411
                errno2 = Helper.convertErrno(err.errno)  # correct error scheme
S
Steven Li 已提交
412
                errMsg = "Transition failed: errno=0x{:X}, msg: {}".format(errno2, err)
413
                Logging.info(errMsg)
414
                traceback.print_exc()
S
Steven Li 已提交
415
                self._execStats.registerFailure(errMsg)
416

417
            # Then we move on to the next step
418
            Progress.emit(Progress.BEGIN_THREAD_STEP)
419
            self._releaseAllWorkerThreads(transitionFailed)                    
420

421
        if hasAbortedTask or transitionFailed : # abnormal ending, workers waiting at "gate"
422
            Logging.debug("Abnormal ending of main thraed")
423
        elif workerTimeout:
424
            Logging.debug("Abnormal ending of main thread, due to worker timeout")
425
        else: # regular ending, workers waiting at "barrier"
426
            Logging.debug("Regular ending, main thread waiting for all worker threads to stop...")
427
            self._syncAtBarrier()
428

429
        self._te = None  # No more executor, time to end
430
        Logging.debug("Main thread tapping all threads one last time...")
431
        self.tapAllThreads()  # Let the threads run one last time
432

433 434
        Logging.debug("\r\n\n--> Main thread ready to finish up...")
        Logging.debug("Main thread joining all threads")
S
Shuduo Sang 已提交
435
        self._pool.joinAll()  # Get all threads to finish
436
        Logging.info("\nAll worker threads finished")
437 438
        self._execStats.endExec()

439 440 441 442 443 444 445 446 447 448 449 450 451
    def cleanup(self): # free resources
        self._pool.cleanup()

        self._pool = None
        self._te = None  
        self._dbManager = None
        self._executedTasks = None
        self._lock = None
        self._stepBarrier = None
        self._execStats = None
        self._runStatus = None


452 453
    def printStats(self):
        self._execStats.printStats()
S
Steven Li 已提交
454

S
Steven Li 已提交
455 456 457 458 459 460
    def isFailed(self):
        return self._execStats.isFailed()

    def getExecStats(self):
        return self._execStats

S
Shuduo Sang 已提交
461
    def tapAllThreads(self):  # in a deterministic manner
S
Steven Li 已提交
462
        wakeSeq = []
S
Shuduo Sang 已提交
463 464
        for i in range(self._pool.numThreads):  # generate a random sequence
            if Dice.throw(2) == 1:
S
Steven Li 已提交
465 466 467
                wakeSeq.append(i)
            else:
                wakeSeq.insert(0, i)
468
        Logging.debug(
S
Shuduo Sang 已提交
469 470
            "[TRD] Main thread waking up worker threads: {}".format(
                str(wakeSeq)))
471
        # TODO: set dice seed to a deterministic value
S
Steven Li 已提交
472
        for i in wakeSeq:
S
Shuduo Sang 已提交
473 474 475
            # TODO: maybe a bit too deep?!
            self._pool.threadList[i].tapStepGate()
            time.sleep(0)  # yield
S
Steven Li 已提交
476

477
    def isRunning(self):
S
Shuduo Sang 已提交
478
        return self._te is not None
479

480 481 482 483 484 485
    def _initDbs(self):
        ''' Initialize multiple databases, invoked at __ini__() time '''
        self._dbs = [] # type: List[Database]
        dbc = self.getDbManager().getDbConn()
        if gConfig.max_dbs == 0:
            self._dbs.append(Database(0, dbc))
S
Steven Li 已提交
486 487
        else:            
            baseDbNumber = int(datetime.datetime.now().timestamp( # Don't use Dice/random, as they are deterministic
488
                )*333) % 888 if gConfig.dynamic_db_table_names else 0
489
            for i in range(gConfig.max_dbs):
490
                self._dbs.append(Database(baseDbNumber + i, dbc))
491 492 493 494 495 496 497 498

    def pickDatabase(self):
        idxDb = 0
        if gConfig.max_dbs != 0 :
            idxDb = Dice.throw(gConfig.max_dbs) # 0 to N-1
        db = self._dbs[idxDb] # type: Database
        return db

S
Shuduo Sang 已提交
499
    def fetchTask(self) -> Task:
500 501 502
        ''' The thread coordinator (that's us) is responsible for fetching a task
            to be executed next.
        '''
S
Shuduo Sang 已提交
503
        if (not self.isRunning()):  # no task
504
            raise RuntimeError("Cannot fetch task when not running")
505

S
Shuduo Sang 已提交
506
        # pick a task type for current state
507
        db = self.pickDatabase()
508
        taskType = db.getStateMachine().pickTaskType() # dynamic name of class
509
        return taskType(self._execStats, db)  # create a task from it
510 511

    def resetExecutedTasks(self):
S
Shuduo Sang 已提交
512
        self._executedTasks = []  # should be under single thread
513 514 515 516

    def saveExecutedTask(self, task):
        with self._lock:
            self._executedTasks.append(task)
517

518
class ThreadPool:
519
    def __init__(self, numThreads, maxSteps):
520 521 522 523
        self.numThreads = numThreads
        self.maxSteps = maxSteps
        # Internal class variables
        self.curStep = 0
S
Shuduo Sang 已提交
524 525
        self.threadList = []  # type: List[WorkerThread]

526
    # starting to run all the threads, in locking steps
527
    def createAndStartThreads(self, tc: ThreadCoordinator):
S
Shuduo Sang 已提交
528 529
        for tid in range(0, self.numThreads):  # Create the threads
            workerThread = WorkerThread(self, tid, tc)
530
            self.threadList.append(workerThread)
S
Shuduo Sang 已提交
531
            workerThread.start()  # start, but should block immediately before step 0
532 533 534

    def joinAll(self):
        for workerThread in self.threadList:
535
            Logging.debug("Joining thread...")
536 537
            workerThread._thread.join()

538 539 540
    def cleanup(self):
        self.threadList = None # maybe clean up each?

541 542
# A queue of continguous POSITIVE integers, used by DbManager to generate continuous numbers
# for new table names
S
Shuduo Sang 已提交
543 544


S
Steven Li 已提交
545 546
class LinearQueue():
    def __init__(self):
547
        self.firstIndex = 1  # 1st ever element
S
Steven Li 已提交
548
        self.lastIndex = 0
S
Shuduo Sang 已提交
549 550
        self._lock = threading.RLock()  # our functions may call each other
        self.inUse = set()  # the indexes that are in use right now
S
Steven Li 已提交
551

552
    def toText(self):
S
Shuduo Sang 已提交
553 554
        return "[{}..{}], in use: {}".format(
            self.firstIndex, self.lastIndex, self.inUse)
555 556

    # Push (add new element, largest) to the tail, and mark it in use
S
Shuduo Sang 已提交
557
    def push(self):
558
        with self._lock:
S
Shuduo Sang 已提交
559 560
            # if ( self.isEmpty() ):
            #     self.lastIndex = self.firstIndex
561
            #     return self.firstIndex
562 563
            # Otherwise we have something
            self.lastIndex += 1
564 565
            self.allocate(self.lastIndex)
            # self.inUse.add(self.lastIndex) # mark it in use immediately
566
            return self.lastIndex
S
Steven Li 已提交
567 568

    def pop(self):
569
        with self._lock:
S
Shuduo Sang 已提交
570 571 572 573
            if (self.isEmpty()):
                # raise RuntimeError("Cannot pop an empty queue")
                return False  # TODO: None?

574
            index = self.firstIndex
S
Shuduo Sang 已提交
575
            if (index in self.inUse):
576 577
                return False

578 579 580 581 582 583 584
            self.firstIndex += 1
            return index

    def isEmpty(self):
        return self.firstIndex > self.lastIndex

    def popIfNotEmpty(self):
585
        with self._lock:
586 587 588 589
            if (self.isEmpty()):
                return 0
            return self.pop()

S
Steven Li 已提交
590
    def allocate(self, i):
591
        with self._lock:
592
            # Logging.debug("LQ allocating item {}".format(i))
S
Shuduo Sang 已提交
593 594 595
            if (i in self.inUse):
                raise RuntimeError(
                    "Cannot re-use same index in queue: {}".format(i))
596 597
            self.inUse.add(i)

S
Steven Li 已提交
598
    def release(self, i):
599
        with self._lock:
600
            # Logging.debug("LQ releasing item {}".format(i))
S
Shuduo Sang 已提交
601
            self.inUse.remove(i)  # KeyError possible, TODO: why?
602 603 604 605

    def size(self):
        return self.lastIndex + 1 - self.firstIndex

S
Steven Li 已提交
606
    def pickAndAllocate(self):
S
Shuduo Sang 已提交
607
        if (self.isEmpty()):
608 609
            return None
        with self._lock:
S
Shuduo Sang 已提交
610
            cnt = 0  # counting the interations
611 612
            while True:
                cnt += 1
S
Shuduo Sang 已提交
613
                if (cnt > self.size() * 10):  # 10x iteration already
614 615
                    # raise RuntimeError("Failed to allocate LinearQueue element")
                    return None
S
Shuduo Sang 已提交
616 617
                ret = Dice.throwRange(self.firstIndex, self.lastIndex + 1)
                if (ret not in self.inUse):
618 619 620
                    self.allocate(ret)
                    return ret

S
Shuduo Sang 已提交
621

622
class AnyState:
S
Shuduo Sang 已提交
623 624 625
    STATE_INVALID = -1
    STATE_EMPTY = 0  # nothing there, no even a DB
    STATE_DB_ONLY = 1  # we have a DB, but nothing else
626
    STATE_TABLE_ONLY = 2  # we have a table, but totally empty
S
Shuduo Sang 已提交
627
    STATE_HAS_DATA = 3  # we have some data in the table
628 629 630 631
    _stateNames = ["Invalid", "Empty", "DB_Only", "Table_Only", "Has_Data"]

    STATE_VAL_IDX = 0
    CAN_CREATE_DB = 1
632 633 634
    # For below, if we can "drop the DB", but strictly speaking 
    # only "under normal circumstances", as we may override it with the -b option
    CAN_DROP_DB = 2  
635 636
    CAN_CREATE_FIXED_SUPER_TABLE = 3
    CAN_DROP_FIXED_SUPER_TABLE = 4
637 638 639 640 641 642 643
    CAN_ADD_DATA = 5
    CAN_READ_DATA = 6

    def __init__(self):
        self._info = self.getInfo()

    def __str__(self):
S
Shuduo Sang 已提交
644 645
        # -1 hack to accomodate the STATE_INVALID case
        return self._stateNames[self._info[self.STATE_VAL_IDX] + 1]
646

647 648
    # Each sub state tells us the "info", about itself, so we can determine
    # on things like canDropDB()
649 650 651
    def getInfo(self):
        raise RuntimeError("Must be overriden by child classes")

S
Steven Li 已提交
652 653 654 655 656 657
    def equals(self, other):
        if isinstance(other, int):
            return self.getValIndex() == other
        elif isinstance(other, AnyState):
            return self.getValIndex() == other.getValIndex()
        else:
S
Shuduo Sang 已提交
658 659 660
            raise RuntimeError(
                "Unexpected comparison, type = {}".format(
                    type(other)))
S
Steven Li 已提交
661

662 663 664
    def verifyTasksToState(self, tasks, newState):
        raise RuntimeError("Must be overriden by child classes")

S
Steven Li 已提交
665 666 667
    def getValIndex(self):
        return self._info[self.STATE_VAL_IDX]

668 669
    def getValue(self):
        return self._info[self.STATE_VAL_IDX]
S
Shuduo Sang 已提交
670

671 672
    def canCreateDb(self):
        return self._info[self.CAN_CREATE_DB]
S
Shuduo Sang 已提交
673

674
    def canDropDb(self):
675 676 677 678
        # If user requests to run up to a number of DBs,
        # we'd then not do drop_db operations any more
        if gConfig.max_dbs > 0 : 
            return False
679
        return self._info[self.CAN_DROP_DB]
S
Shuduo Sang 已提交
680

681 682
    def canCreateFixedSuperTable(self):
        return self._info[self.CAN_CREATE_FIXED_SUPER_TABLE]
S
Shuduo Sang 已提交
683

684 685
    def canDropFixedSuperTable(self):
        return self._info[self.CAN_DROP_FIXED_SUPER_TABLE]
S
Shuduo Sang 已提交
686

687 688
    def canAddData(self):
        return self._info[self.CAN_ADD_DATA]
S
Shuduo Sang 已提交
689

690 691 692 693 694
    def canReadData(self):
        return self._info[self.CAN_READ_DATA]

    def assertAtMostOneSuccess(self, tasks, cls):
        sCnt = 0
S
Shuduo Sang 已提交
695
        for task in tasks:
696 697 698
            if not isinstance(task, cls):
                continue
            if task.isSuccess():
S
Steven Li 已提交
699
                # task.logDebug("Task success found")
700
                sCnt += 1
S
Shuduo Sang 已提交
701 702 703
                if (sCnt >= 2):
                    raise RuntimeError(
                        "Unexpected more than 1 success with task: {}".format(cls))
704 705 706 707

    def assertIfExistThenSuccess(self, tasks, cls):
        sCnt = 0
        exists = False
S
Shuduo Sang 已提交
708
        for task in tasks:
709 710
            if not isinstance(task, cls):
                continue
S
Shuduo Sang 已提交
711
            exists = True  # we have a valid instance
712 713
            if task.isSuccess():
                sCnt += 1
S
Shuduo Sang 已提交
714
        if (exists and sCnt <= 0):
S
Steven Li 已提交
715 716
            raise RuntimeError("Unexpected zero success for task type: {}, from tasks: {}"
                .format(cls, tasks))
717 718

    def assertNoTask(self, tasks, cls):
S
Shuduo Sang 已提交
719
        for task in tasks:
720
            if isinstance(task, cls):
S
Shuduo Sang 已提交
721 722
                raise CrashGenError(
                    "This task: {}, is not expected to be present, given the success/failure of others".format(cls.__name__))
723 724

    def assertNoSuccess(self, tasks, cls):
S
Shuduo Sang 已提交
725
        for task in tasks:
726 727
            if isinstance(task, cls):
                if task.isSuccess():
S
Shuduo Sang 已提交
728 729
                    raise RuntimeError(
                        "Unexpected successful task: {}".format(cls))
730 731

    def hasSuccess(self, tasks, cls):
S
Shuduo Sang 已提交
732
        for task in tasks:
733 734 735 736 737 738
            if not isinstance(task, cls):
                continue
            if task.isSuccess():
                return True
        return False

S
Steven Li 已提交
739
    def hasTask(self, tasks, cls):
S
Shuduo Sang 已提交
740
        for task in tasks:
S
Steven Li 已提交
741 742 743 744
            if isinstance(task, cls):
                return True
        return False

S
Shuduo Sang 已提交
745

746 747 748 749
class StateInvalid(AnyState):
    def getInfo(self):
        return [
            self.STATE_INVALID,
S
Shuduo Sang 已提交
750 751 752
            False, False,  # can create/drop Db
            False, False,  # can create/drop fixed table
            False, False,  # can insert/read data with fixed table
753 754 755 756
        ]

    # def verifyTasksToState(self, tasks, newState):

S
Shuduo Sang 已提交
757

758 759 760 761
class StateEmpty(AnyState):
    def getInfo(self):
        return [
            self.STATE_EMPTY,
S
Shuduo Sang 已提交
762 763 764
            True, False,  # can create/drop Db
            False, False,  # can create/drop fixed table
            False, False,  # can insert/read data with fixed table
765 766
        ]

S
Shuduo Sang 已提交
767 768
    def verifyTasksToState(self, tasks, newState):
        if (self.hasSuccess(tasks, TaskCreateDb)
769
                ):  # at EMPTY, if there's succes in creating DB
S
Shuduo Sang 已提交
770 771 772 773
            if (not self.hasTask(tasks, TaskDropDb)):  # and no drop_db tasks
                # we must have at most one. TODO: compare numbers
                self.assertAtMostOneSuccess(tasks, TaskCreateDb)

774 775 776 777 778 779 780 781 782 783 784

class StateDbOnly(AnyState):
    def getInfo(self):
        return [
            self.STATE_DB_ONLY,
            False, True,
            True, False,
            False, False,
        ]

    def verifyTasksToState(self, tasks, newState):
S
Shuduo Sang 已提交
785 786 787
        if (not self.hasTask(tasks, TaskCreateDb)):
            # only if we don't create any more
            self.assertAtMostOneSuccess(tasks, TaskDropDb)
788 789 790 791 792

        # TODO: restore the below, the problem exists, although unlikely in real-world
        # if (gSvcMgr!=None) and gSvcMgr.isRestarting():     
        # if (gSvcMgr == None) or (not gSvcMgr.isRestarting()) : 
        #     self.assertIfExistThenSuccess(tasks, TaskDropDb)       
793

S
Shuduo Sang 已提交
794

795
class StateSuperTableOnly(AnyState):
796 797 798 799 800 801 802 803 804
    def getInfo(self):
        return [
            self.STATE_TABLE_ONLY,
            False, True,
            False, True,
            True, True,
        ]

    def verifyTasksToState(self, tasks, newState):
S
Shuduo Sang 已提交
805
        if (self.hasSuccess(tasks, TaskDropSuperTable)
806
                ):  # we are able to drop the table
807
            #self.assertAtMostOneSuccess(tasks, TaskDropSuperTable)
S
Shuduo Sang 已提交
808 809
            # we must have had recreted it
            self.hasSuccess(tasks, TaskCreateSuperTable)
810

811
            # self._state = self.STATE_DB_ONLY
S
Steven Li 已提交
812 813
        # elif ( self.hasSuccess(tasks, AddFixedDataTask) ): # no success dropping the table, but added data
        #     self.assertNoTask(tasks, DropFixedTableTask) # not true in massively parrallel cases
814
            # self._state = self.STATE_HAS_DATA
S
Steven Li 已提交
815 816 817
        # elif ( self.hasSuccess(tasks, ReadFixedDataTask) ): # no success in prev cases, but was able to read data
            # self.assertNoTask(tasks, DropFixedTableTask)
            # self.assertNoTask(tasks, AddFixedDataTask)
818
            # self._state = self.STATE_TABLE_ONLY # no change
S
Steven Li 已提交
819 820 821
        # else: # did not drop table, did not insert data, did not read successfully, that is impossible
        #     raise RuntimeError("Unexpected no-success scenarios")
        # TODO: need to revamp!!
822

S
Shuduo Sang 已提交
823

824 825 826 827 828 829 830 831 832 833
class StateHasData(AnyState):
    def getInfo(self):
        return [
            self.STATE_HAS_DATA,
            False, True,
            False, True,
            True, True,
        ]

    def verifyTasksToState(self, tasks, newState):
S
Shuduo Sang 已提交
834
        if (newState.equals(AnyState.STATE_EMPTY)):
835
            self.hasSuccess(tasks, TaskDropDb)
S
Shuduo Sang 已提交
836 837 838 839
            if (not self.hasTask(tasks, TaskCreateDb)):
                self.assertAtMostOneSuccess(tasks, TaskDropDb)  # TODO: dicy
        elif (newState.equals(AnyState.STATE_DB_ONLY)):  # in DB only
            if (not self.hasTask(tasks, TaskCreateDb)
840
                ):  # without a create_db task
S
Shuduo Sang 已提交
841 842
                # we must have drop_db task
                self.assertNoTask(tasks, TaskDropDb)
843
            self.hasSuccess(tasks, TaskDropSuperTable)
844
            # self.assertAtMostOneSuccess(tasks, DropFixedSuperTableTask) # TODO: dicy
845 846 847 848
        # elif ( newState.equals(AnyState.STATE_TABLE_ONLY) ): # data deleted
            # self.assertNoTask(tasks, TaskDropDb)
            # self.assertNoTask(tasks, TaskDropSuperTable)
            # self.assertNoTask(tasks, TaskAddData)
S
Steven Li 已提交
849
            # self.hasSuccess(tasks, DeleteDataTasks)
S
Shuduo Sang 已提交
850 851
        else:  # should be STATE_HAS_DATA
            if (not self.hasTask(tasks, TaskCreateDb)
852
                ):  # only if we didn't create one
S
Shuduo Sang 已提交
853 854 855
                # we shouldn't have dropped it
                self.assertNoTask(tasks, TaskDropDb)
            if (not self.hasTask(tasks, TaskCreateSuperTable)
856
                    ):  # if we didn't create the table
S
Shuduo Sang 已提交
857 858
                # we should not have a task that drops it
                self.assertNoTask(tasks, TaskDropSuperTable)
859
            # self.assertIfExistThenSuccess(tasks, ReadFixedDataTask)
S
Steven Li 已提交
860

S
Shuduo Sang 已提交
861

862
class StateMechine:
863 864 865
    def __init__(self, db: Database): 
        self._db = db
        # transitition target probabilities, indexed with value of STATE_EMPTY, STATE_DB_ONLY, etc.
866
        self._stateWeights = [1, 2, 10, 40]
S
Shuduo Sang 已提交
867

868 869
    def init(self, dbc: DbConn): # late initailization, don't save the dbConn
        self._curState = self._findCurrentState(dbc)  # starting state
870
        Logging.debug("Found Starting State: {}".format(self._curState))
871 872

    # TODO: seems no lnoger used, remove?
873 874 875
    def getCurrentState(self):
        return self._curState

876 877 878
    def hasDatabase(self):
        return self._curState.canDropDb()  # ha, can drop DB means it has one

879
    # May be slow, use cautionsly...
S
Shuduo Sang 已提交
880
    def getTaskTypes(self):  # those that can run (directly/indirectly) from the current state
881 882 883 884 885 886
        def typesToStrings(types):
            ss = []
            for t in types:
                ss.append(t.__name__)
            return ss

S
Shuduo Sang 已提交
887
        allTaskClasses = StateTransitionTask.__subclasses__()  # all state transition tasks
888 889
        firstTaskTypes = []
        for tc in allTaskClasses:
S
Shuduo Sang 已提交
890
            # t = tc(self) # create task object
891 892
            if tc.canBeginFrom(self._curState):
                firstTaskTypes.append(tc)
S
Shuduo Sang 已提交
893 894 895 896 897 898 899 900
        # now we have all the tasks that can begin directly from the current
        # state, let's figure out the INDIRECT ones
        taskTypes = firstTaskTypes.copy()  # have to have these
        for task1 in firstTaskTypes:  # each task type gathered so far
            endState = task1.getEndState()  # figure the end state
            if endState is None:  # does not change end state
                continue  # no use, do nothing
            for tc in allTaskClasses:  # what task can further begin from there?
901
                if tc.canBeginFrom(endState) and (tc not in firstTaskTypes):
S
Shuduo Sang 已提交
902
                    taskTypes.append(tc)  # gather it
903 904

        if len(taskTypes) <= 0:
S
Shuduo Sang 已提交
905 906 907
            raise RuntimeError(
                "No suitable task types found for state: {}".format(
                    self._curState))
908
        Logging.debug(
S
Shuduo Sang 已提交
909 910 911
            "[OPS] Tasks found for state {}: {}".format(
                self._curState,
                typesToStrings(taskTypes)))
912 913
        return taskTypes

914
    def _findCurrentState(self, dbc: DbConn):
S
Shuduo Sang 已提交
915
        ts = time.time()  # we use this to debug how fast/slow it is to do the various queries to find the current DB state
916 917
        dbName =self._db.getName()
        if not dbc.existsDatabase(dbName): # dbc.hasDatabases():  # no database?!
918
            Logging.debug( "[STT] empty database found, between {} and {}".format(ts, time.time()))
919
            return StateEmpty()
S
Shuduo Sang 已提交
920 921
        # did not do this when openning connection, and this is NOT the worker
        # thread, which does this on their own
922
        dbc.use(dbName)
923
        if not dbc.hasTables():  # no tables
924
            Logging.debug("[STT] DB_ONLY found, between {} and {}".format(ts, time.time()))
925
            return StateDbOnly()
926

927 928
        sTable = self._db.getFixedSuperTable()
        if sTable.hasRegTables(dbc, dbName):  # no regular tables
929
            Logging.debug("[STT] SUPER_TABLE_ONLY found, between {} and {}".format(ts, time.time()))
930
            return StateSuperTableOnly()
S
Shuduo Sang 已提交
931
        else:  # has actual tables
932
            Logging.debug("[STT] HAS_DATA found, between {} and {}".format(ts, time.time()))
933 934
            return StateHasData()

935 936
    # We transition the system to a new state by examining the current state itself
    def transition(self, tasks, dbc: DbConn):
S
Shuduo Sang 已提交
937
        if (len(tasks) == 0):  # before 1st step, or otherwise empty
938
            Logging.debug("[STT] Starting State: {}".format(self._curState))
S
Shuduo Sang 已提交
939
            return  # do nothing
940

S
Shuduo Sang 已提交
941
        # this should show up in the server log, separating steps
942
        dbc.execute("show dnodes")
943 944 945 946

        # Generic Checks, first based on the start state
        if self._curState.canCreateDb():
            self._curState.assertIfExistThenSuccess(tasks, TaskCreateDb)
S
Shuduo Sang 已提交
947 948
            # self.assertAtMostOneSuccess(tasks, CreateDbTask) # not really, in
            # case of multiple creation and drops
949 950

        if self._curState.canDropDb():
951
            if gSvcMgr == None: # only if we are running as client-only
S
Steven Li 已提交
952
                self._curState.assertIfExistThenSuccess(tasks, TaskDropDb)
S
Shuduo Sang 已提交
953 954
            # self.assertAtMostOneSuccess(tasks, DropDbTask) # not really in
            # case of drop-create-drop
955 956 957

        # if self._state.canCreateFixedTable():
            # self.assertIfExistThenSuccess(tasks, CreateFixedTableTask) # Not true, DB may be dropped
S
Shuduo Sang 已提交
958 959
            # self.assertAtMostOneSuccess(tasks, CreateFixedTableTask) # not
            # really, in case of create-drop-create
960 961 962

        # if self._state.canDropFixedTable():
            # self.assertIfExistThenSuccess(tasks, DropFixedTableTask) # Not True, the whole DB may be dropped
S
Shuduo Sang 已提交
963 964
            # self.assertAtMostOneSuccess(tasks, DropFixedTableTask) # not
            # really in case of drop-create-drop
965 966

        # if self._state.canAddData():
S
Shuduo Sang 已提交
967 968
        # self.assertIfExistThenSuccess(tasks, AddFixedDataTask)  # not true
        # actually
969 970 971 972

        # if self._state.canReadData():
            # Nothing for sure

973
        newState = self._findCurrentState(dbc)
974
        Logging.debug("[STT] New DB state determined: {}".format(newState))
S
Shuduo Sang 已提交
975 976
        # can old state move to new state through the tasks?
        self._curState.verifyTasksToState(tasks, newState)
977 978 979
        self._curState = newState

    def pickTaskType(self):
S
Shuduo Sang 已提交
980 981
        # all the task types we can choose from at curent state
        taskTypes = self.getTaskTypes()
982 983 984
        weights = []
        for tt in taskTypes:
            endState = tt.getEndState()
S
Shuduo Sang 已提交
985 986 987
            if endState is not None:
                # TODO: change to a method
                weights.append(self._stateWeights[endState.getValIndex()])
988
            else:
S
Shuduo Sang 已提交
989 990
                # read data task, default to 10: TODO: change to a constant
                weights.append(10)
991
        i = self._weighted_choice_sub(weights)
992
        # Logging.debug(" (weighted random:{}/{}) ".format(i, len(taskTypes)))
993 994
        return taskTypes[i]

S
Shuduo Sang 已提交
995 996 997 998 999
    # ref:
    # https://eli.thegreenplace.net/2010/01/22/weighted-random-generation-in-python/
    def _weighted_choice_sub(self, weights):
        # TODO: use our dice to ensure it being determinstic?
        rnd = random.random() * sum(weights)
1000 1001 1002 1003
        for i, w in enumerate(weights):
            rnd -= w
            if rnd < 0:
                return i
1004

1005 1006 1007 1008 1009 1010
class Database:
    ''' We use this to represent an actual TDengine database inside a service instance,
        possibly in a cluster environment.

        For now we use it to manage state transitions in that database
    '''
1011 1012 1013 1014 1015
    _clsLock = threading.Lock() # class wide lock
    _lastInt = 101  # next one is initial integer
    _lastTick = 0
    _lastLaggingTick = 0 # lagging tick, for unsequenced insersions

1016 1017 1018 1019
    def __init__(self, dbNum: int, dbc: DbConn): # TODO: remove dbc
        self._dbNum = dbNum # we assign a number to databases, for our testing purpose
        self._stateMachine = StateMechine(self)
        self._stateMachine.init(dbc)
1020
          
1021
        self._lock = threading.RLock()
S
Shuduo Sang 已提交
1022

1023 1024
    def getStateMachine(self) -> StateMechine:
        return self._stateMachine
S
Shuduo Sang 已提交
1025

1026 1027
    def getDbNum(self):
        return self._dbNum
1028

1029 1030
    def getName(self):
        return "db_{}".format(self._dbNum)
1031

1032 1033 1034 1035 1036 1037
    def filterTasks(self, inTasks: List[Task]): # Pick out those belonging to us
        outTasks = []
        for task in inTasks:
            if task.getDb().isSame(self):
                outTasks.append(task)
        return outTasks
1038

1039 1040 1041 1042 1043
    def isSame(self, other):
        return self._dbNum == other._dbNum

    def exists(self, dbc: DbConn):
        return dbc.existsDatabase(self.getName())
1044

1045 1046 1047 1048 1049 1050 1051
    @classmethod
    def getFixedSuperTableName(cls):
        return "fs_table"

    @classmethod
    def getFixedSuperTable(cls) -> TdSuperTable:
        return TdSuperTable(cls.getFixedSuperTableName())
1052 1053 1054 1055 1056 1057

    # We aim to create a starting time tick, such that, whenever we run our test here once
    # We should be able to safely create 100,000 records, which will not have any repeated time stamp
    # when we re-run the test in 3 minutes (180 seconds), basically we should expand time duration
    # by a factor of 500.
    # TODO: what if it goes beyond 10 years into the future
1058
    # TODO: fix the error as result of above: "tsdb timestamp is out of range"
1059 1060
    @classmethod
    def setupLastTick(cls):
1061
        t1 = datetime.datetime(2020, 6, 1)
1062
        t2 = datetime.datetime.now()
S
Shuduo Sang 已提交
1063 1064 1065 1066
        # maybe a very large number, takes 69 years to exceed Python int range
        elSec = int(t2.timestamp() - t1.timestamp())
        elSec2 = (elSec % (8 * 12 * 30 * 24 * 60 * 60 / 500)) * \
            500  # a number representing seconds within 10 years
1067
        # print("elSec = {}".format(elSec))
S
Shuduo Sang 已提交
1068 1069 1070
        t3 = datetime.datetime(2012, 1, 1)  # default "keep" is 10 years
        t4 = datetime.datetime.fromtimestamp(
            t3.timestamp() + elSec2)  # see explanation above
1071
        Logging.info("Setting up TICKS to start from: {}".format(t4))
1072 1073
        return t4

1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085
    @classmethod
    def getNextTick(cls):        
        with cls._clsLock:  # prevent duplicate tick
            if cls._lastLaggingTick==0:
                # 10k at 1/20 chance, should be enough to avoid overlaps
                cls._lastLaggingTick = cls.setupLastTick() + datetime.timedelta(0, -10000)                 
            if cls._lastTick==0: # should be quite a bit into the future
                cls._lastTick = cls.setupLastTick()  

            if Dice.throw(20) == 0:  # 1 in 20 chance, return lagging tick
                cls._lastLaggingTick += datetime.timedelta(0, 1) # Go back in time 100 seconds
                return cls._lastLaggingTick 
S
Shuduo Sang 已提交
1086 1087
            else:  # regular
                # add one second to it
1088 1089
                cls._lastTick += datetime.timedelta(0, 1)
                return cls._lastTick
1090 1091

    def getNextInt(self):
1092 1093 1094
        with self._lock:
            self._lastInt += 1
            return self._lastInt
1095 1096

    def getNextBinary(self):
S
Shuduo Sang 已提交
1097 1098
        return "Beijing_Shanghai_Los_Angeles_New_York_San_Francisco_Chicago_Beijing_Shanghai_Los_Angeles_New_York_San_Francisco_Chicago_{}".format(
            self.getNextInt())
1099 1100

    def getNextFloat(self):
1101 1102 1103
        ret = 0.9 + self.getNextInt()
        # print("Float obtained: {}".format(ret))
        return ret
S
Shuduo Sang 已提交
1104

1105

1106
class TaskExecutor():
1107
    class BoundedList:
S
Shuduo Sang 已提交
1108
        def __init__(self, size=10):
1109 1110
            self._size = size
            self._list = []
S
Steven Li 已提交
1111
            self._lock = threading.Lock()
1112

S
Shuduo Sang 已提交
1113
        def add(self, n: int):
S
Steven Li 已提交
1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139
            with self._lock:
                if not self._list:  # empty
                    self._list.append(n)
                    return
                # now we should insert
                nItems = len(self._list)
                insPos = 0
                for i in range(nItems):
                    insPos = i
                    if n <= self._list[i]:  # smaller than this item, time to insert
                        break  # found the insertion point
                    insPos += 1  # insert to the right

                if insPos == 0:  # except for the 1st item, # TODO: elimiate first item as gating item
                    return  # do nothing

                # print("Inserting at postion {}, value: {}".format(insPos, n))
                self._list.insert(insPos, n)  # insert

                newLen = len(self._list)
                if newLen <= self._size:
                    return  # do nothing
                elif newLen == (self._size + 1):
                    del self._list[0]  # remove the first item
                else:
                    raise RuntimeError("Corrupt Bounded List")
1140 1141 1142 1143 1144 1145

        def __str__(self):
            return repr(self._list)

    _boundedList = BoundedList()

1146 1147 1148
    def __init__(self, curStep):
        self._curStep = curStep

1149 1150 1151 1152
    @classmethod
    def getBoundedList(cls):
        return cls._boundedList

1153 1154 1155
    def getCurStep(self):
        return self._curStep

S
Shuduo Sang 已提交
1156
    def execute(self, task: Task, wt: WorkerThread):  # execute a task on a thread
1157
        task.execute(wt)
1158

1159 1160 1161 1162
    def recordDataMark(self, n: int):
        # print("[{}]".format(n), end="", flush=True)
        self._boundedList.add(n)

1163
    # def logInfo(self, msg):
1164
    #     Logging.info("    T[{}.x]: ".format(self._curStep) + msg)
1165

1166
    # def logDebug(self, msg):
1167
    #     Logging.debug("    T[{}.x]: ".format(self._curStep) + msg)
1168

S
Shuduo Sang 已提交
1169

S
Steven Li 已提交
1170
class Task():
1171 1172 1173 1174
    ''' A generic "Task" to be executed. For now we decide that there is no
        need to embed a DB connection here, we use whatever the Worker Thread has
        instead. But a task is always associated with a DB
    '''
1175 1176 1177 1178
    taskSn = 100

    @classmethod
    def allocTaskNum(cls):
S
Shuduo Sang 已提交
1179
        Task.taskSn += 1  # IMPORTANT: cannot use cls.taskSn, since each sub class will have a copy
1180
        # Logging.debug("Allocating taskSN: {}".format(Task.taskSn))
S
Steven Li 已提交
1181
        return Task.taskSn
1182

1183
    def __init__(self, execStats: ExecutionStats, db: Database):
S
Shuduo Sang 已提交
1184
        self._workerThread = None
1185
        self._err = None # type: Exception
1186
        self._aborted = False
1187
        self._curStep = None
S
Shuduo Sang 已提交
1188
        self._numRows = None  # Number of rows affected
1189

S
Shuduo Sang 已提交
1190
        # Assign an incremental task serial number
1191
        self._taskNum = self.allocTaskNum()
1192
        # Logging.debug("Creating new task {}...".format(self._taskNum))
1193

1194
        self._execStats = execStats
1195
        self._db = db # A task is always associated/for a specific DB
1196

1197
    def isSuccess(self):
S
Shuduo Sang 已提交
1198
        return self._err is None
1199

1200 1201 1202
    def isAborted(self):
        return self._aborted

S
Shuduo Sang 已提交
1203
    def clone(self):  # TODO: why do we need this again?
1204
        newTask = self.__class__(self._execStats, self._db)
1205 1206
        return newTask

1207 1208 1209
    def getDb(self):
        return self._db

1210
    def logDebug(self, msg):
S
Shuduo Sang 已提交
1211 1212 1213
        self._workerThread.logDebug(
            "Step[{}.{}] {}".format(
                self._curStep, self._taskNum, msg))
1214 1215

    def logInfo(self, msg):
S
Shuduo Sang 已提交
1216 1217 1218
        self._workerThread.logInfo(
            "Step[{}.{}] {}".format(
                self._curStep, self._taskNum, msg))
1219

1220
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
S
Shuduo Sang 已提交
1221 1222 1223
        raise RuntimeError(
            "To be implemeted by child classes, class name: {}".format(
                self.__class__.__name__))
1224

1225 1226 1227
    def _isErrAcceptable(self, errno, msg):
        if errno in [
                0x05,  # TSDB_CODE_RPC_NOT_READY
1228
                0x0B,  # Unable to establish connection, more details in TD-1648
1229
                0x200, # invalid SQL, TODO: re-examine with TD-934
1230
                0x217, # "db not selected", client side defined error code
1231 1232 1233 1234
                # 0x218, # "Table does not exist" client side defined error code
                0x360, # Table already exists
                0x362, 
                # 0x369, # tag already exists
1235 1236 1237 1238 1239 1240 1241
                0x36A, 0x36B, 0x36D,
                0x381, 
                0x380, # "db not selected"
                0x383,
                0x386,  # DB is being dropped?!
                0x503,
                0x510,  # vnode not in ready state
1242
                0x14,   # db not ready, errno changed
1243
                0x600,  # Invalid table ID, why?
1244 1245 1246
                1000  # REST catch-all error
            ]: 
            return True # These are the ALWAYS-ACCEPTABLE ones
1247 1248 1249 1250 1251 1252 1253
        # This case handled below already.
        # elif (errno in [ 0x0B ]) and gConfig.auto_start_service:
        #     return True # We may get "network unavilable" when restarting service
        elif gConfig.ignore_errors: # something is specified on command line
            moreErrnos = [int(v, 0) for v in gConfig.ignore_errors.split(',')]
            if errno in moreErrnos:
                return True
1254 1255 1256
        elif errno == 0x200 : # invalid SQL, we need to div in a bit more
            if msg.find("invalid column name") != -1:
                return True 
1257 1258 1259 1260
            elif msg.find("tags number not matched") != -1: # mismatched tags after modification
                return True
            elif msg.find("duplicated column names") != -1: # also alter table tag issues
                return True
1261
        elif gSvcMgr and (not gSvcMgr.isStable()): # We are managing service, and ...
1262
            Logging.info("Ignoring error when service starting/stopping: errno = {}, msg = {}".format(errno, msg))
S
Steven Li 已提交
1263
            return True
1264 1265 1266 1267
        
        return False # Not an acceptable error


1268 1269
    def execute(self, wt: WorkerThread):
        wt.verifyThreadSelf()
S
Shuduo Sang 已提交
1270
        self._workerThread = wt  # type: ignore
1271 1272

        te = wt.getTaskExecutor()
1273
        self._curStep = te.getCurStep()
S
Shuduo Sang 已提交
1274 1275
        self.logDebug(
            "[-] executing task {}...".format(self.__class__.__name__))
1276

1277
        self._err = None # TODO: type hint mess up?
1278 1279
        self._execStats.beginTaskType(self.__class__.__name__)  # mark beginning
        errno2 = None
1280 1281 1282

        # Now pick a database, and stick with it for the duration of the task execution
        dbName = self._db.getName()
1283
        try:
S
Shuduo Sang 已提交
1284
            self._executeInternal(te, wt)  # TODO: no return value?
1285
        except taos.error.ProgrammingError as err:
1286
            errno2 = Helper.convertErrno(err.errno)
1287
            if (gConfig.continue_on_exception):  # user choose to continue
1288
                self.logDebug("[=] Continue after TAOS exception: errno=0x{:X}, msg: {}, SQL: {}".format(
1289
                        errno2, err, wt.getDbConn().getLastSql()))
1290
                self._err = err
1291 1292
            elif self._isErrAcceptable(errno2, err.__str__()):
                self.logDebug("[=] Acceptable Taos library exception: errno=0x{:X}, msg: {}, SQL: {}".format(
1293
                        errno2, err, wt.getDbConn().getLastSql()))
1294
                print("_", end="", flush=True)
S
Shuduo Sang 已提交
1295
                self._err = err
1296
            else: # not an acceptable error
1297 1298 1299
                errMsg = "[=] Unexpected Taos library exception ({}): errno=0x{:X}, msg: {}, SQL: {}".format(
                    self.__class__.__name__,
                    errno2, err, wt.getDbConn().getLastSql())
1300
                self.logDebug(errMsg)
S
Shuduo Sang 已提交
1301
                if gConfig.debug:
1302 1303
                    # raise # so that we see full stack
                    traceback.print_exc()
1304 1305
                print(
                    "\n\n----------------------------\nProgram ABORTED Due to Unexpected TAOS Error: \n\n{}\n".format(errMsg) +
1306 1307 1308 1309
                    "----------------------------\n")
                # sys.exit(-1)
                self._err = err
                self._aborted = True
S
Shuduo Sang 已提交
1310
        except Exception as e:
S
Steven Li 已提交
1311
            self.logInfo("Non-TAOS exception encountered")
S
Shuduo Sang 已提交
1312
            self._err = e
S
Steven Li 已提交
1313
            self._aborted = True
1314
            traceback.print_exc()
1315
        except BaseException as e:
1316
            self.logInfo("Python base exception encountered")
1317
            self._err = e
1318
            self._aborted = True
S
Steven Li 已提交
1319
            traceback.print_exc()
1320
        except BaseException: # TODO: what is this again??!!
S
Shuduo Sang 已提交
1321 1322
            self.logDebug(
                "[=] Unexpected exception, SQL: {}".format(
1323
                    wt.getDbConn().getLastSql()))
1324
            raise
1325
        self._execStats.endTaskType(self.__class__.__name__, self.isSuccess())
S
Shuduo Sang 已提交
1326 1327 1328 1329

        self.logDebug("[X] task execution completed, {}, status: {}".format(
            self.__class__.__name__, "Success" if self.isSuccess() else "Failure"))
        # TODO: merge with above.
1330
        self._execStats.incExecCount(self.__class__.__name__, self.isSuccess(), errno2)
S
Steven Li 已提交
1331

1332
    # TODO: refactor away, just provide the dbConn
S
Shuduo Sang 已提交
1333
    def execWtSql(self, wt: WorkerThread, sql):  # execute an SQL on the worker thread
1334
        """ Haha """
1335 1336
        return wt.execSql(sql)

S
Shuduo Sang 已提交
1337
    def queryWtSql(self, wt: WorkerThread, sql):  # execute an SQL on the worker thread
1338 1339
        return wt.querySql(sql)

S
Shuduo Sang 已提交
1340
    def getQueryResult(self, wt: WorkerThread):  # execute an SQL on the worker thread
1341 1342 1343
        return wt.getQueryResult()


1344
class ExecutionStats:
1345
    def __init__(self):
S
Shuduo Sang 已提交
1346 1347
        # total/success times for a task
        self._execTimes: Dict[str, [int, int]] = {}
1348 1349 1350
        self._tasksInProgress = 0
        self._lock = threading.Lock()
        self._firstTaskStartTime = None
1351
        self._execStartTime = None
1352
        self._errors = {}
S
Shuduo Sang 已提交
1353 1354
        self._elapsedTime = 0.0  # total elapsed time
        self._accRunTime = 0.0  # accumulated run time
1355

1356 1357 1358
        self._failed = False
        self._failureReason = None

S
Steven Li 已提交
1359
    def __str__(self):
S
Shuduo Sang 已提交
1360 1361
        return "[ExecStats: _failed={}, _failureReason={}".format(
            self._failed, self._failureReason)
S
Steven Li 已提交
1362 1363

    def isFailed(self):
S
Shuduo Sang 已提交
1364
        return self._failed
S
Steven Li 已提交
1365

1366 1367 1368 1369 1370 1371
    def startExec(self):
        self._execStartTime = time.time()

    def endExec(self):
        self._elapsedTime = time.time() - self._execStartTime

1372
    def incExecCount(self, klassName, isSuccess, eno=None):  # TODO: add a lock here
1373 1374
        if klassName not in self._execTimes:
            self._execTimes[klassName] = [0, 0]
S
Shuduo Sang 已提交
1375 1376
        t = self._execTimes[klassName]  # tuple for the data
        t[0] += 1  # index 0 has the "total" execution times
1377
        if isSuccess:
S
Shuduo Sang 已提交
1378
            t[1] += 1  # index 1 has the "success" execution times
1379 1380 1381 1382 1383
        if eno != None:             
            if klassName not in self._errors:
                self._errors[klassName] = {}
            errors = self._errors[klassName]
            errors[eno] = errors[eno]+1 if eno in errors else 1
1384 1385 1386

    def beginTaskType(self, klassName):
        with self._lock:
S
Shuduo Sang 已提交
1387 1388
            if self._tasksInProgress == 0:  # starting a new round
                self._firstTaskStartTime = time.time()  # I am now the first task
1389 1390 1391 1392 1393
            self._tasksInProgress += 1

    def endTaskType(self, klassName, isSuccess):
        with self._lock:
            self._tasksInProgress -= 1
S
Shuduo Sang 已提交
1394
            if self._tasksInProgress == 0:  # all tasks have stopped
1395 1396 1397
                self._accRunTime += (time.time() - self._firstTaskStartTime)
                self._firstTaskStartTime = None

1398 1399 1400 1401
    def registerFailure(self, reason):
        self._failed = True
        self._failureReason = reason

1402
    def printStats(self):
1403
        Logging.info(
S
Shuduo Sang 已提交
1404
            "----------------------------------------------------------------------")
1405
        Logging.info(
S
Shuduo Sang 已提交
1406 1407 1408
            "| Crash_Gen test {}, with the following stats:". format(
                "FAILED (reason: {})".format(
                    self._failureReason) if self._failed else "SUCCEEDED"))
1409
        Logging.info("| Task Execution Times (success/total):")
1410
        execTimesAny = 0.0
S
Shuduo Sang 已提交
1411
        for k, n in self._execTimes.items():
1412
            execTimesAny += n[0]
1413 1414 1415 1416 1417 1418 1419
            errStr = None
            if k in self._errors:
                errors = self._errors[k]
                # print("errors = {}".format(errors))
                errStrs = ["0x{:X}:{}".format(eno, n) for (eno, n) in errors.items()]
                # print("error strings = {}".format(errStrs))
                errStr = ", ".join(errStrs) 
1420
            Logging.info("|    {0:<24}: {1}/{2} (Errors: {3})".format(k, n[1], n[0], errStr))
S
Shuduo Sang 已提交
1421

1422
        Logging.info(
S
Shuduo Sang 已提交
1423
            "| Total Tasks Executed (success or not): {} ".format(execTimesAny))
1424
        Logging.info(
S
Shuduo Sang 已提交
1425 1426
            "| Total Tasks In Progress at End: {}".format(
                self._tasksInProgress))
1427
        Logging.info(
S
Shuduo Sang 已提交
1428 1429
            "| Total Task Busy Time (elapsed time when any task is in progress): {:.3f} seconds".format(
                self._accRunTime))
1430
        Logging.info(
S
Shuduo Sang 已提交
1431
            "| Average Per-Task Execution Time: {:.3f} seconds".format(self._accRunTime / execTimesAny))
1432
        Logging.info(
S
Shuduo Sang 已提交
1433 1434
            "| Total Elapsed Time (from wall clock): {:.3f} seconds".format(
                self._elapsedTime))
1435 1436 1437
        Logging.info("| Top numbers written: {}".format(TaskExecutor.getBoundedList()))
        Logging.info("| Active DB Native Connections (now): {}".format(DbConnNative.totalConnections))
        Logging.info("| Longest native query time: {:.3f} seconds, started: {}".
1438 1439
            format(MyTDSql.longestQueryTime, 
                time.strftime("%x %X", time.localtime(MyTDSql.lqStartTime))) )
1440 1441
        Logging.info("| Longest native query: {}".format(MyTDSql.longestQuery))
        Logging.info(
S
Shuduo Sang 已提交
1442
            "----------------------------------------------------------------------")
1443 1444 1445


class StateTransitionTask(Task):
1446 1447 1448 1449 1450
    LARGE_NUMBER_OF_TABLES = 35
    SMALL_NUMBER_OF_TABLES = 3
    LARGE_NUMBER_OF_RECORDS = 50
    SMALL_NUMBER_OF_RECORDS = 3

1451 1452 1453 1454
    _baseTableNumber = None

    _endState = None

1455
    @classmethod
S
Shuduo Sang 已提交
1456
    def getInfo(cls):  # each sub class should supply their own information
1457
        raise RuntimeError("Overriding method expected")
1458
    
1459
    @classmethod
S
Shuduo Sang 已提交
1460
    def getEndState(cls):  # TODO: optimize by calling it fewer times
1461 1462
        raise RuntimeError("Overriding method expected")

1463 1464 1465
    # @classmethod
    # def getBeginStates(cls):
    #     return cls.getInfo()[0]
1466

1467 1468 1469
    # @classmethod
    # def getEndState(cls): # returning the class name
    #     return cls.getInfo()[0]
1470 1471

    @classmethod
1472 1473 1474
    def canBeginFrom(cls, state: AnyState):
        # return state.getValue() in cls.getBeginStates()
        raise RuntimeError("must be overriden")
1475

1476 1477
    @classmethod
    def getRegTableName(cls, i):
1478
        if ( StateTransitionTask._baseTableNumber is None):
S
Steven Li 已提交
1479 1480
            StateTransitionTask._baseTableNumber = Dice.throw(
                999) if gConfig.dynamic_db_table_names else 0
1481
        return "reg_table_{}".format(StateTransitionTask._baseTableNumber + i)
1482

1483 1484
    def execute(self, wt: WorkerThread):
        super().execute(wt)
S
Shuduo Sang 已提交
1485 1486


1487
class TaskCreateDb(StateTransitionTask):
1488
    @classmethod
1489
    def getEndState(cls):
S
Shuduo Sang 已提交
1490
        return StateDbOnly()
1491

1492 1493 1494 1495
    @classmethod
    def canBeginFrom(cls, state: AnyState):
        return state.canCreateDb()

1496
    # Actually creating the database(es)
1497
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
1498
        # was: self.execWtSql(wt, "create database db")
1499 1500 1501 1502 1503 1504
        repStr = ""
        if gConfig.max_replicas != 1:
            numReplica = Dice.throw(gConfig.max_replicas) + 1 # 1,2 ... N
            repStr = "replica {}".format(numReplica)
        self.execWtSql(wt, "create database {} {}"
            .format(self._db.getName(), repStr) )
1505

1506
class TaskDropDb(StateTransitionTask):
1507
    @classmethod
1508 1509
    def getEndState(cls):
        return StateEmpty()
1510

1511 1512 1513 1514
    @classmethod
    def canBeginFrom(cls, state: AnyState):
        return state.canDropDb()

1515
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
1516
        self.execWtSql(wt, "drop database {}".format(self._db.getName()))
1517
        Logging.debug("[OPS] database dropped at {}".format(time.time()))
1518

1519
class TaskCreateSuperTable(StateTransitionTask):
1520
    @classmethod
1521 1522
    def getEndState(cls):
        return StateSuperTableOnly()
1523

1524 1525
    @classmethod
    def canBeginFrom(cls, state: AnyState):
1526
        return state.canCreateFixedSuperTable()
1527

1528
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
1529
        if not self._db.exists(wt.getDbConn()):
1530
            Logging.debug("Skipping task, no DB yet")
1531 1532
            return

1533
        sTable = self._db.getFixedSuperTable() # type: TdSuperTable
1534
        # wt.execSql("use db")    # should always be in place
1535 1536
        sTable.create(wt.getDbConn(), self._db.getName(), 
            {'ts':'timestamp', 'speed':'int'}, {'b':'binary(200)', 'f':'float'})
1537
        # self.execWtSql(wt,"create table db.{} (ts timestamp, speed int) tags (b binary(200), f float) ".format(tblName))
S
Shuduo Sang 已提交
1538 1539
        # No need to create the regular tables, INSERT will do that
        # automatically
1540

S
Steven Li 已提交
1541

1542 1543 1544 1545
class TdSuperTable:
    def __init__(self, stName):
        self._stName = stName

1546 1547 1548
    def getName(self):
        return self._stName

1549 1550 1551 1552 1553
    # TODO: odd semantic, create() method is usually static?
    def create(self, dbc, dbName, cols: dict, tags: dict):
        '''Creating a super table'''
        sql = "CREATE TABLE {}.{} ({}) TAGS ({})".format(
            dbName,
1554 1555 1556 1557 1558 1559
            self._stName,
            ",".join(['%s %s'%(k,v) for (k,v) in cols.items()]),
            ",".join(['%s %s'%(k,v) for (k,v) in tags.items()])
            )
        dbc.execute(sql)        

1560
    def getRegTables(self, dbc: DbConn, dbName: str):
1561
        try:
1562
            dbc.query("select TBNAME from {}.{}".format(dbName, self._stName))  # TODO: analyze result set later            
1563
        except taos.error.ProgrammingError as err:                    
1564
            errno2 = Helper.convertErrno(err.errno) 
1565
            Logging.debug("[=] Failed to get tables from super table: errno=0x{:X}, msg: {}".format(errno2, err))
1566 1567 1568 1569 1570
            raise

        qr = dbc.getQueryResult()
        return [v[0] for v in qr] # list transformation, ref: https://stackoverflow.com/questions/643823/python-list-transformation

1571 1572
    def hasRegTables(self, dbc: DbConn, dbName: str):
        return dbc.query("SELECT * FROM {}.{}".format(dbName, self._stName)) > 0
1573

1574 1575
    def ensureTable(self, dbc: DbConn, dbName: str, regTableName: str):
        sql = "select tbname from {}.{} where tbname in ('{}')".format(dbName, self._stName, regTableName)
1576 1577
        if dbc.query(sql) >= 1 : # reg table exists already
            return
1578 1579
        sql = "CREATE TABLE {}.{} USING {}.{} tags ({})".format(
            dbName, regTableName, dbName, self._stName, self._getTagStrForSql(dbc, dbName)
1580 1581 1582
        )
        dbc.execute(sql)

1583 1584
    def _getTagStrForSql(self, dbc, dbName: str) :
        tags = self._getTags(dbc, dbName)
1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597
        tagStrs = []
        for tagName in tags: 
            tagType = tags[tagName]
            if tagType == 'BINARY':
                tagStrs.append("'Beijing-Shanghai-LosAngeles'")
            elif tagType == 'FLOAT':
                tagStrs.append('9.9')
            elif tagType == 'INT':
                tagStrs.append('88')
            else:
                raise RuntimeError("Unexpected tag type: {}".format(tagType))
        return ", ".join(tagStrs)

1598 1599
    def _getTags(self, dbc, dbName) -> dict:
        dbc.query("DESCRIBE {}.{}".format(dbName, self._stName))
1600 1601 1602 1603 1604 1605
        stCols = dbc.getQueryResult()
        # print(stCols)
        ret = {row[0]:row[1] for row in stCols if row[3]=='TAG'} # name:type
        # print("Tags retrieved: {}".format(ret))
        return ret

1606 1607
    def addTag(self, dbc, dbName, tagName, tagType):
        if tagName in self._getTags(dbc, dbName): # already 
1608 1609
            return
        # sTable.addTag("extraTag", "int")
1610
        sql = "alter table {}.{} add tag {} {}".format(dbName, self._stName, tagName, tagType)
1611 1612
        dbc.execute(sql)

1613 1614
    def dropTag(self, dbc, dbName, tagName):
        if not tagName in self._getTags(dbc, dbName): # don't have this tag
1615
            return
1616
        sql = "alter table {}.{} drop tag {}".format(dbName, self._stName, tagName)
1617 1618
        dbc.execute(sql)

1619 1620
    def changeTag(self, dbc, dbName, oldTag, newTag):
        tags = self._getTags(dbc, dbName)
1621 1622 1623 1624
        if not oldTag in tags: # don't have this tag
            return
        if newTag in tags: # already have this tag
            return
1625
        sql = "alter table {}.{} change tag {} {}".format(dbName, self._stName, oldTag, newTag)
1626 1627
        dbc.execute(sql)

1628
class TaskReadData(StateTransitionTask):
1629
    @classmethod
1630
    def getEndState(cls):
S
Shuduo Sang 已提交
1631
        return None  # meaning doesn't affect state
1632

1633 1634 1635 1636
    @classmethod
    def canBeginFrom(cls, state: AnyState):
        return state.canReadData()

1637
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
1638
        sTable = self._db.getFixedSuperTable()
1639

1640 1641
        # 1 in 5 chance, simulate a broken connection. 
        if random.randrange(5) == 0:  # TODO: break connection in all situations
1642 1643
            wt.getDbConn().close()
            wt.getDbConn().open()
1644
            print("_r", end="", flush=True)
1645
        
1646
        dbc = wt.getDbConn()
1647 1648
        dbName = self._db.getName()
        for rTbName in sTable.getRegTables(dbc, dbName):  # regular tables
1649
            aggExpr = Dice.choice([
1650 1651 1652
                '*',
                'count(*)',
                'avg(speed)',
1653
                # 'twa(speed)', # TODO: this one REQUIRES a where statement, not reasonable
1654 1655
                'sum(speed)', 
                'stddev(speed)', 
1656
                # SELECTOR functions
1657 1658 1659
                'min(speed)', 
                'max(speed)', 
                'first(speed)', 
1660
                'last(speed)',
1661 1662 1663
                'top(speed, 50)', # TODO: not supported?
                'bottom(speed, 50)', # TODO: not supported?
                'apercentile(speed, 10)', # TODO: TD-1316
1664 1665 1666 1667 1668
                'last_row(speed)',
                # Transformation Functions
                # 'diff(speed)', # TODO: no supported?!
                'spread(speed)'
                ]) # TODO: add more from 'top'
1669 1670 1671
            filterExpr = Dice.choice([ # TODO: add various kind of WHERE conditions
                None
            ])
1672
            try:
1673
                # Run the query against the regular table first
1674
                dbc.execute("select {} from {}.{}".format(aggExpr, dbName, rTbName))
1675
                # Then run it against the super table
1676
                if aggExpr not in ['stddev(speed)']: #TODO: STDDEV not valid for super tables?!
1677
                    dbc.execute("select {} from {}.{}".format(aggExpr, dbName, sTable.getName()))
1678
            except taos.error.ProgrammingError as err:                    
1679
                errno2 = Helper.convertErrno(err.errno)
1680
                Logging.debug("[=] Read Failure: errno=0x{:X}, msg: {}, SQL: {}".format(errno2, err, dbc.getLastSql()))
1681
                raise
S
Shuduo Sang 已提交
1682

1683
class TaskDropSuperTable(StateTransitionTask):
1684
    @classmethod
1685
    def getEndState(cls):
S
Shuduo Sang 已提交
1686
        return StateDbOnly()
1687

1688 1689
    @classmethod
    def canBeginFrom(cls, state: AnyState):
1690
        return state.canDropFixedSuperTable()
1691

1692
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
1693
        # 1/2 chance, we'll drop the regular tables one by one, in a randomized sequence
S
Shuduo Sang 已提交
1694
        if Dice.throw(2) == 0:
1695
            # print("_7_", end="", flush=True)
S
Shuduo Sang 已提交
1696 1697 1698 1699
            tblSeq = list(range(
                2 + (self.LARGE_NUMBER_OF_TABLES if gConfig.larger_data else self.SMALL_NUMBER_OF_TABLES)))
            random.shuffle(tblSeq)
            tickOutput = False  # if we have spitted out a "d" character for "drop regular table"
1700
            isSuccess = True
S
Shuduo Sang 已提交
1701
            for i in tblSeq:
1702
                regTableName = self.getRegTableName(i)  # "db.reg_table_{}".format(i)
1703
                try:
1704 1705
                    self.execWtSql(wt, "drop table {}.{}".
                        format(self._db.getName(), regTableName))  # nRows always 0, like MySQL
S
Shuduo Sang 已提交
1706
                except taos.error.ProgrammingError as err:
1707 1708
                    # correcting for strange error number scheme                    
                    errno2 = Helper.convertErrno(err.errno)
S
Shuduo Sang 已提交
1709
                    if (errno2 in [0x362]):  # mnode invalid table name
1710
                        isSuccess = False
1711
                        Logging.debug("[DB] Acceptable error when dropping a table")
S
Shuduo Sang 已提交
1712
                    continue  # try to delete next regular table
1713 1714

                if (not tickOutput):
S
Shuduo Sang 已提交
1715 1716
                    tickOutput = True  # Print only one time
                    if isSuccess:
1717 1718
                        print("d", end="", flush=True)
                    else:
S
Shuduo Sang 已提交
1719
                        print("f", end="", flush=True)
1720 1721

        # Drop the super table itself
1722 1723
        tblName = self._db.getFixedSuperTableName()
        self.execWtSql(wt, "drop table {}.{}".format(self._db.getName(), tblName))
1724

S
Shuduo Sang 已提交
1725

1726 1727 1728
class TaskAlterTags(StateTransitionTask):
    @classmethod
    def getEndState(cls):
S
Shuduo Sang 已提交
1729
        return None  # meaning doesn't affect state
1730 1731 1732

    @classmethod
    def canBeginFrom(cls, state: AnyState):
S
Shuduo Sang 已提交
1733
        return state.canDropFixedSuperTable()  # if we can drop it, we can alter tags
1734 1735

    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
1736 1737
        # tblName = self._dbManager.getFixedSuperTableName()
        dbc = wt.getDbConn()
1738 1739
        sTable = self._db.getFixedSuperTable()
        dbName = self._db.getName()
1740
        dice = Dice.throw(4)
S
Shuduo Sang 已提交
1741
        if dice == 0:
1742
            sTable.addTag(dbc, dbName, "extraTag", "int")
1743
            # sql = "alter table db.{} add tag extraTag int".format(tblName)
S
Shuduo Sang 已提交
1744
        elif dice == 1:
1745
            sTable.dropTag(dbc, dbName, "extraTag")
1746
            # sql = "alter table db.{} drop tag extraTag".format(tblName)
S
Shuduo Sang 已提交
1747
        elif dice == 2:
1748
            sTable.dropTag(dbc, dbName, "newTag")
1749
            # sql = "alter table db.{} drop tag newTag".format(tblName)
S
Shuduo Sang 已提交
1750
        else:  # dice == 3
1751
            sTable.changeTag(dbc, dbName, "extraTag", "newTag")
1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767
            # sql = "alter table db.{} change tag extraTag newTag".format(tblName)

class TaskRestartService(StateTransitionTask):
    _isRunning = False
    _classLock = threading.Lock()

    @classmethod
    def getEndState(cls):
        return None  # meaning doesn't affect state

    @classmethod
    def canBeginFrom(cls, state: AnyState):
        if gConfig.auto_start_service:
            return state.canDropFixedSuperTable()  # Basicallly when we have the super table
        return False # don't run this otherwise

1768
    CHANCE_TO_RESTART_SERVICE = 200
1769 1770 1771 1772
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
        if not gConfig.auto_start_service: # only execute when we are in -a mode
            print("_a", end="", flush=True)
            return
1773

1774 1775 1776 1777 1778 1779
        with self._classLock:
            if self._isRunning:
                print("Skipping restart task, another running already")
                return
            self._isRunning = True

1780
        if Dice.throw(self.CHANCE_TO_RESTART_SERVICE) == 0: # 1 in N chance
1781 1782 1783
            dbc = wt.getDbConn()
            dbc.execute("show databases") # simple delay, align timing with other workers
            gSvcMgr.restart()
1784

1785
        self._isRunning = False
S
Shuduo Sang 已提交
1786

1787
class TaskAddData(StateTransitionTask):
S
Shuduo Sang 已提交
1788 1789
    # Track which table is being actively worked on
    activeTable: Set[int] = set()
1790

1791 1792 1793
    # We use these two files to record operations to DB, useful for power-off tests
    fAddLogReady = None # type: TextIOWrapper
    fAddLogDone  = None # type: TextIOWrapper
1794 1795 1796

    @classmethod
    def prepToRecordOps(cls):
S
Shuduo Sang 已提交
1797 1798
        if gConfig.record_ops:
            if (cls.fAddLogReady is None):
1799
                Logging.info(
S
Shuduo Sang 已提交
1800
                    "Recording in a file operations to be performed...")
1801
                cls.fAddLogReady = open("add_log_ready.txt", "w")
S
Shuduo Sang 已提交
1802
            if (cls.fAddLogDone is None):
1803
                Logging.info("Recording in a file operations completed...")
1804
                cls.fAddLogDone = open("add_log_done.txt", "w")
1805

1806
    @classmethod
1807 1808
    def getEndState(cls):
        return StateHasData()
1809 1810 1811 1812

    @classmethod
    def canBeginFrom(cls, state: AnyState):
        return state.canAddData()
S
Shuduo Sang 已提交
1813

1814
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
1815 1816
        # ds = self._dbManager # Quite DANGEROUS here, may result in multi-thread client access
        db = self._db
1817
        dbc = wt.getDbConn()
1818
        tblSeq = list(range(
S
Shuduo Sang 已提交
1819 1820 1821 1822
                self.LARGE_NUMBER_OF_TABLES if gConfig.larger_data else self.SMALL_NUMBER_OF_TABLES))
        random.shuffle(tblSeq)
        for i in tblSeq:
            if (i in self.activeTable):  # wow already active
1823
                print("x", end="", flush=True) # concurrent insertion
1824
            else:
S
Shuduo Sang 已提交
1825
                self.activeTable.add(i)  # marking it active
1826
            
1827
            sTable = db.getFixedSuperTable()
1828
            regTableName = self.getRegTableName(i)  # "db.reg_table_{}".format(i)
1829
            sTable.ensureTable(wt.getDbConn(), db.getName(), regTableName)  # Ensure the table exists           
1830 1831
           
            for j in range(self.LARGE_NUMBER_OF_RECORDS if gConfig.larger_data else self.SMALL_NUMBER_OF_RECORDS):  # number of records per table
1832
                nextInt = db.getNextInt()
1833
                nextTick = db.getNextTick()
1834 1835
                if gConfig.record_ops:
                    self.prepToRecordOps()
1836
                    self.fAddLogReady.write("Ready to write {} to {}\n".format(nextInt, regTableName))
1837 1838
                    self.fAddLogReady.flush()
                    os.fsync(self.fAddLogReady)
1839 1840
                sql = "insert into {}.{} values ('{}', {});".format( # removed: tags ('{}', {})
                    db.getName(),
S
Shuduo Sang 已提交
1841
                    regTableName,
1842 1843
                    # ds.getFixedSuperTableName(),
                    # ds.getNextBinary(), ds.getNextFloat(),
1844 1845
                    nextTick, nextInt)
                dbc.execute(sql)
S
Shuduo Sang 已提交
1846 1847
                # Successfully wrote the data into the DB, let's record it
                # somehow
1848
                te.recordDataMark(nextInt)
1849
                if gConfig.record_ops:
S
Shuduo Sang 已提交
1850 1851 1852
                    self.fAddLogDone.write(
                        "Wrote {} to {}\n".format(
                            nextInt, regTableName))
1853 1854
                    self.fAddLogDone.flush()
                    os.fsync(self.fAddLogDone)
1855 1856

                # Now read it back and verify, we might encounter an error if table is dropped
1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873
                if gConfig.verify_data: # only if command line asks for it
                    try:
                        readBack = dbc.queryScalar("SELECT speed from {}.{} WHERE ts= '{}'".
                            format(db.getName(), regTableName, nextTick))
                        if readBack != nextInt :
                            raise taos.error.ProgrammingError(
                                "Failed to read back same data, wrote: {}, read: {}"
                                .format(nextInt, readBack), 0x999)
                    except taos.error.ProgrammingError as err:
                        errno = Helper.convertErrno(err.errno)
                        if errno in [0x991, 0x992]  : # not a single result
                            raise taos.error.ProgrammingError(
                                "Failed to read back same data for tick: {}, wrote: {}, read: {}"
                                .format(nextTick, nextInt, "Empty Result" if errno==0x991 else "Multiple Result"),
                                errno)
                        # Re-throw no matter what
                        raise
1874 1875
                

S
Shuduo Sang 已提交
1876
            self.activeTable.discard(i)  # not raising an error, unlike remove
1877 1878


1879

1880

1881

1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909
class ThreadStacks: # stack info for all threads
    def __init__(self):
        self._allStacks = {}
        allFrames = sys._current_frames()
        for th in threading.enumerate():                        
            stack = traceback.extract_stack(allFrames[th.ident])     
            self._allStacks[th.native_id] = stack

    def print(self, filteredEndName = None, filterInternal = False):
        for thNid, stack in self._allStacks.items(): # for each thread            
            lastFrame = stack[-1]
            if filteredEndName: # we need to filter out stacks that match this name                
                if lastFrame.name == filteredEndName : # end did not match
                    continue
            if filterInternal:
                if lastFrame.name in ['wait', 'invoke_excepthook', 
                    '_wait', # The Barrier exception
                    'svcOutputReader', # the svcMgr thread
                    '__init__']: # the thread that extracted the stack
                    continue # ignore
            # Now print
            print("\n<----- Thread Info for ID: {}".format(thNid))
            for frame in stack:
                # print(frame)
                print("File {filename}, line {lineno}, in {name}".format(
                    filename=frame.filename, lineno=frame.lineno, name=frame.name))
                print("    {}".format(frame.line))
            print("-----> End of Thread Info\n")
S
Shuduo Sang 已提交
1910

1911 1912 1913
class ClientManager:
    def __init__(self):
        print("Starting service manager")
1914 1915
        # signal.signal(signal.SIGTERM, self.sigIntHandler)
        # signal.signal(signal.SIGINT, self.sigIntHandler)
1916

1917
        self._status = Status.STATUS_RUNNING
1918 1919
        self.tc = None

1920 1921
        self.inSigHandler = False

1922
    def sigIntHandler(self, signalNumber, frame):
1923
        if self._status != Status.STATUS_RUNNING:
1924 1925 1926
            print("Repeated SIGINT received, forced exit...")
            # return  # do nothing if it's already not running
            sys.exit(-1)
1927
        self._status = Status.STATUS_STOPPING  # immediately set our status
1928

1929
        print("ClientManager: Terminating program...")
1930 1931
        self.tc.requestToStop()

1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972
    def _doMenu(self):
        choice = ""
        while True:
            print("\nInterrupting Client Program, Choose an Action: ")
            print("1: Resume")
            print("2: Terminate")
            print("3: Show Threads")
            # Remember to update the if range below
            # print("Enter Choice: ", end="", flush=True)
            while choice == "":
                choice = input("Enter Choice: ")
                if choice != "":
                    break  # done with reading repeated input
            if choice in ["1", "2", "3"]:
                break  # we are done with whole method
            print("Invalid choice, please try again.")
            choice = ""  # reset
        return choice

    def sigUsrHandler(self, signalNumber, frame):
        print("Interrupting main thread execution upon SIGUSR1")
        if self.inSigHandler:  # already
            print("Ignoring repeated SIG_USR1...")
            return  # do nothing if it's already not running
        self.inSigHandler = True

        choice = self._doMenu()
        if choice == "1":
            print("Resuming execution...")
            time.sleep(1.0)
        elif choice == "2":
            print("Not implemented yet")
            time.sleep(1.0)
        elif choice == "3":
            ts = ThreadStacks()
            ts.print()
        else:
            raise RuntimeError("Invalid menu choice: {}".format(choice))

        self.inSigHandler = False

1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001
    # TODO: need to revise how we verify data durability
    # def _printLastNumbers(self):  # to verify data durability
    #     dbManager = DbManager()
    #     dbc = dbManager.getDbConn()
    #     if dbc.query("show databases") <= 1:  # no database (we have a default called "log")
    #         return
    #     dbc.execute("use db")
    #     if dbc.query("show tables") == 0:  # no tables
    #         return

    #     sTbName = dbManager.getFixedSuperTableName()

    #     # get all regular tables
    #     # TODO: analyze result set later
    #     dbc.query("select TBNAME from db.{}".format(sTbName))
    #     rTables = dbc.getQueryResult()

    #     bList = TaskExecutor.BoundedList()
    #     for rTbName in rTables:  # regular tables
    #         dbc.query("select speed from db.{}".format(rTbName[0]))
    #         numbers = dbc.getQueryResult()
    #         for row in numbers:
    #             # print("<{}>".format(n), end="", flush=True)
    #             bList.add(row[0])

    #     print("Top numbers in DB right now: {}".format(bList))
    #     print("TDengine client execution is about to start in 2 seconds...")
    #     time.sleep(2.0)
    #     dbManager = None  # release?
2002

2003
    def run(self, svcMgr):    
2004
        # self._printLastNumbers()
2005
        global gConfig
2006

2007 2008 2009 2010
        # Prepare Tde Instance
        global gContainer
        tInst = gContainer.defTdeInstance = TdeInstance() # "subdir to hold the instance"

2011
        dbManager = DbManager(gConfig.connector_type, tInst.getDbTarget())  # Regular function
2012
        thPool = ThreadPool(gConfig.num_threads, gConfig.max_steps)
2013
        self.tc = ThreadCoordinator(thPool, dbManager)
2014
        
2015
        print("Starting client instance to: {}".format(tInst))
2016
        self.tc.run()
S
Steven Li 已提交
2017 2018
        # print("exec stats: {}".format(self.tc.getExecStats()))
        # print("TC failed = {}".format(self.tc.isFailed()))
2019
        if svcMgr: # gConfig.auto_start_service:
2020
            svcMgr.stopTaosService()
2021
            svcMgr = None
2022 2023
        # Print exec status, etc., AFTER showing messages from the server
        self.conclude()
S
Steven Li 已提交
2024
        # print("TC failed (2) = {}".format(self.tc.isFailed()))
S
Shuduo Sang 已提交
2025
        # Linux return code: ref https://shapeshed.com/unix-exit-codes/
2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044
        ret = 1 if self.tc.isFailed() else 0
        self.tc.cleanup()

        # Release global variables
        gConfig = None
        gSvcMgr = None
        logger = None

        # Release variables here
        self.tc = None
        thPool = None
        dbManager = None

        gc.collect() # force garbage collection
        # h = hpy()
        # print("\n----- Final Python Heap -----\n")        
        # print(h.heap())

        return ret
2045 2046

    def conclude(self):
2047
        # self.tc.getDbManager().cleanUp() # clean up first, so we can show ZERO db connections
2048
        self.tc.printStats()
2049

2050
class MainExec:
2051 2052 2053
    def __init__(self):        
        self._clientMgr = None
        self._svcMgr = None
2054

2055 2056 2057
        signal.signal(signal.SIGTERM, self.sigIntHandler)
        signal.signal(signal.SIGINT,  self.sigIntHandler)
        signal.signal(signal.SIGUSR1, self.sigUsrHandler)  # different handler!
2058

2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071
    def sigUsrHandler(self, signalNumber, frame):
        if self._clientMgr:
            self._clientMgr.sigUsrHandler(signalNumber, frame)
        elif self._svcMgr: # Only if no client mgr, we are running alone
            self._svcMgr.sigUsrHandler(signalNumber, frame)
        
    def sigIntHandler(self, signalNumber, frame):
        if self._svcMgr:
            self._svcMgr.sigIntHandler(signalNumber, frame)
        if self._clientMgr:
            self._clientMgr.sigIntHandler(signalNumber, frame)

    def runClient(self):
2072
        global gSvcMgr
2073
        if gConfig.auto_start_service:
2074
            self._svcMgr = ServiceManager()
2075
            gSvcMgr = self._svcMgr # hack alert
2076 2077 2078
            self._svcMgr.startTaosService() # we start, don't run
        
        self._clientMgr = ClientManager()
2079 2080 2081 2082
        ret = None
        try: 
            ret = self._clientMgr.run(self._svcMgr) # stop TAOS service inside
        except requests.exceptions.ConnectionError as err:
2083
            Logging.warning("Failed to open REST connection to DB: {}".format(err.getMessage()))
2084 2085
            # don't raise
        return ret
2086 2087

    def runService(self):
2088
        global gSvcMgr
2089
        self._svcMgr = ServiceManager()
2090 2091
        gSvcMgr = self._svcMgr # save it in a global variable TODO: hack alert

2092
        self._svcMgr.run() # run to some end state
2093 2094
        self._svcMgr = None 
        gSvcMgr = None        
2095

2096 2097 2098 2099
    def init(self): # TODO: refactor
        global gContainer
        gContainer = Container() # micky-mouse DI

2100 2101 2102
        global gSvcMgr # TODO: refactor away
        gSvcMgr = None

2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143
        # Super cool Python argument library:
        # https://docs.python.org/3/library/argparse.html
        parser = argparse.ArgumentParser(
            formatter_class=argparse.RawDescriptionHelpFormatter,
            description=textwrap.dedent('''\
                TDengine Auto Crash Generator (PLEASE NOTICE the Prerequisites Below)
                ---------------------------------------------------------------------
                1. You build TDengine in the top level ./build directory, as described in offical docs
                2. You run the server there before this script: ./build/bin/taosd -c test/cfg

                '''))                      

        parser.add_argument(
            '-a',
            '--auto-start-service',
            action='store_true',
            help='Automatically start/stop the TDengine service (default: false)')
        parser.add_argument(
            '-b',
            '--max-dbs',
            action='store',
            default=0,
            type=int,
            help='Maximum number of DBs to keep, set to disable dropping DB. (default: 0)')
        parser.add_argument(
            '-c',
            '--connector-type',
            action='store',
            default='native',
            type=str,
            help='Connector type to use: native, rest, or mixed (default: 10)')
        parser.add_argument(
            '-d',
            '--debug',
            action='store_true',
            help='Turn on DEBUG mode for more logging (default: false)')
        parser.add_argument(
            '-e',
            '--run-tdengine',
            action='store_true',
            help='Run TDengine service in foreground (default: false)')
2144 2145 2146 2147 2148 2149 2150
        parser.add_argument(
            '-g',
            '--ignore-errors',
            action='store',
            default=None,
            type=str,
            help='Ignore error codes, comma separated, 0x supported (default: None)')
2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162
        parser.add_argument(
            '-i',
            '--max-replicas',
            action='store',
            default=1,
            type=int,
            help='Maximum number of replicas to use, when testing against clusters. (default: 1)')
        parser.add_argument(
            '-l',
            '--larger-data',
            action='store_true',
            help='Write larger amount of data during write operations (default: false)')
2163 2164 2165 2166
        parser.add_argument(
            '-n',
            '--dynamic-db-table-names',
            action='store_true',
2167
            help='Use non-fixed names for dbs/tables, useful for multi-instance executions (default: false)')        
2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204
        parser.add_argument(
            '-p',
            '--per-thread-db-connection',
            action='store_true',
            help='Use a single shared db connection (default: false)')
        parser.add_argument(
            '-r',
            '--record-ops',
            action='store_true',
            help='Use a pair of always-fsynced fils to record operations performing + performed, for power-off tests (default: false)')
        parser.add_argument(
            '-s',
            '--max-steps',
            action='store',
            default=1000,
            type=int,
            help='Maximum number of steps to run (default: 100)')
        parser.add_argument(
            '-t',
            '--num-threads',
            action='store',
            default=5,
            type=int,
            help='Number of threads to run (default: 10)')
        parser.add_argument(
            '-v',
            '--verify-data',
            action='store_true',
            help='Verify data written in a number of places by reading back (default: false)')
        parser.add_argument(
            '-x',
            '--continue-on-exception',
            action='store_true',
            help='Continue execution after encountering unexpected/disallowed errors/exceptions (default: false)')

        global gConfig
        gConfig = parser.parse_args()
S
Shuduo Sang 已提交
2205

2206
        Logging.clsInit(gConfig)
2207 2208 2209 2210 2211 2212 2213 2214

        Dice.seed(0)  # initial seeding of dice

    def run(self):
        if gConfig.run_tdengine:  # run server
            self.runService()
        else:
            return self.runClient()
S
Steven Li 已提交
2215

2216

2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237
class Container():
    _propertyList = {'defTdeInstance'}

    def __init__(self):
        self._cargo = {} # No cargo at the beginning

    def _verifyValidProperty(self, name):
        if not name in self._propertyList:
            raise CrashGenError("Invalid container property: {}".format(name))

    # Called for an attribute, when other mechanisms fail (compare to  __getattribute__)
    def __getattr__(self, name):
        self._verifyValidProperty(name)
        return self._cargo[name] # just a simple lookup

    def __setattr__(self, name, value):
        if name == '_cargo' : # reserved vars
            super().__setattr__(name, value)
            return
        self._verifyValidProperty(name)
        self._cargo[name] = value
S
Shuduo Sang 已提交
2238