crash_gen.py 110.3 KB
Newer Older
S
Shuduo Sang 已提交
1
# -----!/usr/bin/python3.7
S
Steven Li 已提交
2 3 4 5 6 7 8 9 10 11 12 13
###################################################################
#           Copyright (c) 2016 by TAOS Technologies, Inc.
#                     All rights reserved.
#
#  This file is proprietary and confidential to TAOS Technologies.
#  No part of this file may be reproduced, stored, transmitted,
#  disclosed or used in any form or by any means other than as
#  expressly provided by the written permission from Jianhui Tao
#
###################################################################

# -*- coding: utf-8 -*-
S
Shuduo Sang 已提交
14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38
# For type hinting before definition, ref:
# https://stackoverflow.com/questions/33533148/how-do-i-specify-that-the-return-type-of-a-method-is-the-same-as-the-class-itsel
from __future__ import annotations
import taos
import crash_gen
from util.sql import *
from util.cases import *
from util.dnodes import *
from util.log import *
from queue import Queue, Empty
from typing import IO
from typing import Set
from typing import Dict
from typing import List
from requests.auth import HTTPBasicAuth
import textwrap
import datetime
import logging
import time
import random
import threading
import requests
import copy
import argparse
import getopt
39

S
Steven Li 已提交
40
import sys
41
import os
42 43
import io
import signal
44
import traceback
45 46 47 48 49 50 51

try:
    import psutil
except:
    print("Psutil module needed, please install: sudo pip3 install psutil")
    sys.exit(-1)

52 53 54 55
# Require Python 3
if sys.version_info[0] < 3:
    raise Exception("Must be using Python 3")

S
Steven Li 已提交
56

S
Shuduo Sang 已提交
57
# Global variables, tried to keep a small number.
58 59 60

# Command-line/Environment Configurations, will set a bit later
# ConfigNameSpace = argparse.Namespace
S
Shuduo Sang 已提交
61
gConfig = argparse.Namespace()  # Dummy value, will be replaced later
62
gSvcMgr = None # TODO: refactor this hack, use dep injection
63
logger = None
S
Steven Li 已提交
64

S
Shuduo Sang 已提交
65
def runThread(wt: WorkerThread):
66
    wt.run()
67

68 69
class CrashGenError(Exception):
    def __init__(self, msg=None, errno=None):
S
Shuduo Sang 已提交
70
        self.msg = msg
71
        self.errno = errno
S
Shuduo Sang 已提交
72

73 74 75
    def __str__(self):
        return self.msg

S
Shuduo Sang 已提交
76

S
Steven Li 已提交
77
class WorkerThread:
78
    def __init__(self, pool: ThreadPool, tid, tc: ThreadCoordinator,
S
Shuduo Sang 已提交
79 80 81
                 # te: TaskExecutor,
                 ):  # note: main thread context!
        # self._curStep = -1
82
        self._pool = pool
S
Shuduo Sang 已提交
83 84
        self._tid = tid
        self._tc = tc  # type: ThreadCoordinator
S
Steven Li 已提交
85
        # self.threadIdent = threading.get_ident()
86 87
        self._thread = threading.Thread(target=runThread, args=(self,))
        self._stepGate = threading.Event()
S
Steven Li 已提交
88

89
        # Let us have a DB connection of our own
S
Shuduo Sang 已提交
90
        if (gConfig.per_thread_db_connection):  # type: ignore
91
            # print("connector_type = {}".format(gConfig.connector_type))
92 93 94 95 96 97 98 99 100 101 102
            if gConfig.connector_type == 'native':
                self._dbConn = DbConn.createNative() 
            elif gConfig.connector_type == 'rest':
                self._dbConn = DbConn.createRest() 
            elif gConfig.connector_type == 'mixed':
                if Dice.throw(2) == 0: # 1/2 chance
                    self._dbConn = DbConn.createNative() 
                else:
                    self._dbConn = DbConn.createRest() 
            else:
                raise RuntimeError("Unexpected connector type: {}".format(gConfig.connector_type))
103

S
Shuduo Sang 已提交
104
        self._dbInUse = False  # if "use db" was executed already
105

106
    def logDebug(self, msg):
S
Steven Li 已提交
107
        logger.debug("    TRD[{}] {}".format(self._tid, msg))
108 109

    def logInfo(self, msg):
S
Steven Li 已提交
110
        logger.info("    TRD[{}] {}".format(self._tid, msg))
111

112 113 114 115
    def dbInUse(self):
        return self._dbInUse

    def useDb(self):
S
Shuduo Sang 已提交
116
        if (not self._dbInUse):
117 118 119
            self.execSql("use db")
        self._dbInUse = True

120
    def getTaskExecutor(self):
S
Shuduo Sang 已提交
121
        return self._tc.getTaskExecutor()
122

S
Steven Li 已提交
123
    def start(self):
124
        self._thread.start()  # AFTER the thread is recorded
S
Steven Li 已提交
125

S
Shuduo Sang 已提交
126
    def run(self):
S
Steven Li 已提交
127
        # initialization after thread starts, in the thread context
128
        # self.isSleeping = False
129 130
        logger.info("Starting to run thread: {}".format(self._tid))

S
Shuduo Sang 已提交
131
        if (gConfig.per_thread_db_connection):  # type: ignore
132
            logger.debug("Worker thread openning database connection")
133
            self._dbConn.open()
S
Steven Li 已提交
134

S
Shuduo Sang 已提交
135 136
        self._doTaskLoop()

137
        # clean up
S
Shuduo Sang 已提交
138
        if (gConfig.per_thread_db_connection):  # type: ignore
139 140 141 142
            if self._dbConn.isOpen: #sometimes it is not open
                self._dbConn.close()
            else:
                logger.warning("Cleaning up worker thread, dbConn already closed")
143

S
Shuduo Sang 已提交
144
    def _doTaskLoop(self):
145 146
        # while self._curStep < self._pool.maxSteps:
        # tc = ThreadCoordinator(None)
S
Shuduo Sang 已提交
147 148
        while True:
            tc = self._tc  # Thread Coordinator, the overall master
149 150 151
            try:
                tc.crossStepBarrier()  # shared barrier first, INCLUDING the last one
            except threading.BrokenBarrierError as err: # main thread timed out
152
                print("_bto", end="")
153 154 155
                logger.debug("[TRD] Worker thread exiting due to main thread barrier time-out")
                break

156
            logger.debug("[TRD] Worker thread [{}] exited barrier...".format(self._tid))
157
            self.crossStepGate()   # then per-thread gate, after being tapped
158
            logger.debug("[TRD] Worker thread [{}] exited step gate...".format(self._tid))
159
            if not self._tc.isRunning():
160
                print("_wts", end="")
161
                logger.debug("[TRD] Thread Coordinator not running any more, worker thread now stopping...")
162 163
                break

164 165 166 167 168
            # Before we fetch the task and run it, let's ensure we properly "use" the database
            try:
                self.useDb() # might encounter exceptions. TODO: catch
            except taos.error.ProgrammingError as err:
                errno = Helper.convertErrno(err.errno)
169
                if errno in [0x383, 0x386, 0x00B, 0x014]  : # invalid database, dropping, Unable to establish connection, Database not ready
170 171 172 173 174 175
                    # ignore
                    dummy = 0
                else:
                    print("\nCaught programming error. errno=0x{:X}, msg={} ".format(errno, err.msg))
                    raise

176
            # Fetch a task from the Thread Coordinator
177
            logger.debug( "[TRD] Worker thread [{}] about to fetch task".format(self._tid))
178
            task = tc.fetchTask()
179 180

            # Execute such a task
S
Shuduo Sang 已提交
181 182 183
            logger.debug(
                "[TRD] Worker thread [{}] about to execute task: {}".format(
                    self._tid, task.__class__.__name__))
184
            task.execute(self)
185
            tc.saveExecutedTask(task)
186
            logger.debug("[TRD] Worker thread [{}] finished executing task".format(self._tid))
S
Shuduo Sang 已提交
187 188

            self._dbInUse = False  # there may be changes between steps
189
        # print("_wtd", end=None) # worker thread died
190

S
Shuduo Sang 已提交
191 192
    def verifyThreadSelf(self):  # ensure we are called by this own thread
        if (threading.get_ident() != self._thread.ident):
S
Steven Li 已提交
193 194
            raise RuntimeError("Unexpectly called from other threads")

S
Shuduo Sang 已提交
195 196
    def verifyThreadMain(self):  # ensure we are called by the main thread
        if (threading.get_ident() != threading.main_thread().ident):
S
Steven Li 已提交
197 198 199
            raise RuntimeError("Unexpectly called from other threads")

    def verifyThreadAlive(self):
S
Shuduo Sang 已提交
200
        if (not self._thread.is_alive()):
S
Steven Li 已提交
201 202
            raise RuntimeError("Unexpected dead thread")

203
    # A gate is different from a barrier in that a thread needs to be "tapped"
S
Steven Li 已提交
204 205
    def crossStepGate(self):
        self.verifyThreadAlive()
S
Shuduo Sang 已提交
206 207
        self.verifyThreadSelf()  # only allowed by ourselves

208
        # Wait again at the "gate", waiting to be "tapped"
S
Shuduo Sang 已提交
209 210 211 212
        logger.debug(
            "[TRD] Worker thread {} about to cross the step gate".format(
                self._tid))
        self._stepGate.wait()
213
        self._stepGate.clear()
S
Shuduo Sang 已提交
214

215
        # self._curStep += 1  # off to a new step...
S
Steven Li 已提交
216

S
Shuduo Sang 已提交
217
    def tapStepGate(self):  # give it a tap, release the thread waiting there
218
        # self.verifyThreadAlive()
S
Shuduo Sang 已提交
219 220
        self.verifyThreadMain()  # only allowed for main thread

221 222 223 224 225 226
        if self._thread.is_alive():
            logger.debug("[TRD] Tapping worker thread {}".format(self._tid))
            self._stepGate.set()  # wake up!
            time.sleep(0)  # let the released thread run a bit
        else:
            print("_tad", end="") # Thread already dead
227

S
Shuduo Sang 已提交
228
    def execSql(self, sql):  # TODO: expose DbConn directly
229
        return self.getDbConn().execute(sql)
230

S
Shuduo Sang 已提交
231
    def querySql(self, sql):  # TODO: expose DbConn directly
232
        return self.getDbConn().query(sql)
233 234

    def getQueryResult(self):
235
        return self.getDbConn().getQueryResult()
236

237
    def getDbConn(self):
S
Shuduo Sang 已提交
238 239
        if (gConfig.per_thread_db_connection):
            return self._dbConn
240
        else:
241
            return self._tc.getDbManager().getDbConn()
242

243 244
    # def querySql(self, sql): # not "execute", since we are out side the DB context
    #     if ( gConfig.per_thread_db_connection ):
S
Shuduo Sang 已提交
245
    #         return self._dbConn.query(sql)
246 247
    #     else:
    #         return self._tc.getDbState().getDbConn().query(sql)
248

249
# The coordinator of all worker threads, mostly running in main thread
S
Shuduo Sang 已提交
250 251


252
class ThreadCoordinator:
S
Steven Li 已提交
253
    WORKER_THREAD_TIMEOUT = 60 # one minute
254

255
    def __init__(self, pool: ThreadPool, dbManager):
S
Shuduo Sang 已提交
256
        self._curStep = -1  # first step is 0
257
        self._pool = pool
258
        # self._wd = wd
S
Shuduo Sang 已提交
259
        self._te = None  # prepare for every new step
260
        self._dbManager = dbManager
S
Shuduo Sang 已提交
261 262
        self._executedTasks: List[Task] = []  # in a given step
        self._lock = threading.RLock()  # sync access for a few things
S
Steven Li 已提交
263

S
Shuduo Sang 已提交
264 265
        self._stepBarrier = threading.Barrier(
            self._pool.numThreads + 1)  # one barrier for all threads
266
        self._execStats = ExecutionStats()
267
        self._runStatus = MainExec.STATUS_RUNNING
S
Steven Li 已提交
268

269 270 271
    def getTaskExecutor(self):
        return self._te

S
Shuduo Sang 已提交
272
    def getDbManager(self) -> DbManager:
273
        return self._dbManager
274

275 276
    def crossStepBarrier(self, timeout=None):
        self._stepBarrier.wait(timeout) 
277

278 279 280 281
    def requestToStop(self):
        self._runStatus = MainExec.STATUS_STOPPING
        self._execStats.registerFailure("User Interruption")

282
    def _runShouldEnd(self, transitionFailed, hasAbortedTask, workerTimeout):
283 284 285 286 287 288 289 290 291
        maxSteps = gConfig.max_steps  # type: ignore
        if self._curStep >= (maxSteps - 1): # maxStep==10, last curStep should be 9
            return True
        if self._runStatus != MainExec.STATUS_RUNNING:
            return True
        if transitionFailed:
            return True
        if hasAbortedTask:
            return True
292 293
        if workerTimeout:
            return True
294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324
        return False

    def _hasAbortedTask(self): # from execution of previous step
        for task in self._executedTasks:
            if task.isAborted():
                # print("Task aborted: {}".format(task))
                # hasAbortedTask = True
                return True
        return False

    def _releaseAllWorkerThreads(self, transitionFailed):
        self._curStep += 1  # we are about to get into next step. TODO: race condition here!
        # Now not all threads had time to go to sleep
        logger.debug(
            "--\r\n\n--> Step {} starts with main thread waking up".format(self._curStep))

        # A new TE for the new step
        self._te = None # set to empty first, to signal worker thread to stop
        if not transitionFailed:  # only if not failed
            self._te = TaskExecutor(self._curStep)

        logger.debug("[TRD] Main thread waking up at step {}, tapping worker threads".format(
                self._curStep))  # Now not all threads had time to go to sleep
        # Worker threads will wake up at this point, and each execute it's own task
        self.tapAllThreads() # release all worker thread from their "gate"

    def _syncAtBarrier(self):
         # Now main thread (that's us) is ready to enter a step
        # let other threads go past the pool barrier, but wait at the
        # thread gate
        logger.debug("[TRD] Main thread about to cross the barrier")
325
        self.crossStepBarrier(timeout=self.WORKER_THREAD_TIMEOUT)
326 327 328 329 330 331 332 333 334 335 336 337 338
        self._stepBarrier.reset()  # Other worker threads should now be at the "gate"
        logger.debug("[TRD] Main thread finished crossing the barrier")

    def _doTransition(self):
        transitionFailed = False
        try:
            sm = self._dbManager.getStateMachine()
            logger.debug("[STT] starting transitions")
            # at end of step, transiton the DB state
            sm.transition(self._executedTasks)
            logger.debug("[STT] transition ended")
            # Due to limitation (or maybe not) of the Python library,
            # we cannot share connections across threads
339 340 341 342 343 344
            # Here we are in main thread, we cannot operate the connections created in workers
            # Moving below to task loop
            # if sm.hasDatabase():
            #     for t in self._pool.threadList:
            #         logger.debug("[DB] use db for all worker threads")
            #         t.useDb()
345 346 347 348 349 350 351 352 353 354 355 356 357
                    # t.execSql("use db") # main thread executing "use
                    # db" on behalf of every worker thread
        except taos.error.ProgrammingError as err:
            if (err.msg == 'network unavailable'):  # broken DB connection
                logger.info("DB connection broken, execution failed")
                traceback.print_stack()
                transitionFailed = True
                self._te = None  # Not running any more
                self._execStats.registerFailure("Broken DB Connection")
                # continue # don't do that, need to tap all threads at
                # end, and maybe signal them to stop
            else:
                raise
358
        return transitionFailed
359 360 361 362 363 364

        self.resetExecutedTasks()  # clear the tasks after we are done
        # Get ready for next step
        logger.debug("<-- Step {} finished, trasition failed = {}".format(self._curStep, transitionFailed))
        return transitionFailed

S
Shuduo Sang 已提交
365
    def run(self):
366
        self._pool.createAndStartThreads(self)
S
Steven Li 已提交
367 368

        # Coordinate all threads step by step
S
Shuduo Sang 已提交
369
        self._curStep = -1  # not started yet
370
        
S
Shuduo Sang 已提交
371
        self._execStats.startExec()  # start the stop watch
372 373
        transitionFailed = False
        hasAbortedTask = False
374 375
        workerTimeout = False
        while not self._runShouldEnd(transitionFailed, hasAbortedTask, workerTimeout):
376
            if not gConfig.debug: # print this only if we are not in debug mode                
S
Shuduo Sang 已提交
377
                print(".", end="", flush=True)
378
                        
379 380 381 382 383 384 385 386 387 388
            try:
                self._syncAtBarrier() # For now just cross the barrier
            except threading.BrokenBarrierError as err:
                logger.info("Main loop aborted, caused by worker thread time-out")
                self._execStats.registerFailure("Aborted due to worker thread timeout")
                print("\n\nWorker Thread time-out detected, important thread info:")
                ts = ThreadStacks()
                ts.print(filterInternal=True)
                workerTimeout = True
                break
389 390

            # At this point, all threads should be pass the overall "barrier" and before the per-thread "gate"
S
Shuduo Sang 已提交
391 392
            # We use this period to do house keeping work, when all worker
            # threads are QUIET.
393 394 395
            hasAbortedTask = self._hasAbortedTask() # from previous step
            if hasAbortedTask: 
                logger.info("Aborted task encountered, exiting test program")
396
                self._execStats.registerFailure("Aborted Task Encountered")
397
                break # do transition only if tasks are error free
S
Shuduo Sang 已提交
398

399
            # Ending previous step
400 401 402 403
            try:
                transitionFailed = self._doTransition() # To start, we end step -1 first
            except taos.error.ProgrammingError as err:
                transitionFailed = True
404
                errno2 = Helper.convertErrno(err.errno)  # correct error scheme
S
Steven Li 已提交
405 406 407
                errMsg = "Transition failed: errno=0x{:X}, msg: {}".format(errno2, err)
                logger.info(errMsg)
                self._execStats.registerFailure(errMsg)
408

409 410
            # Then we move on to the next step
            self._releaseAllWorkerThreads(transitionFailed)                    
411

412 413
        if hasAbortedTask or transitionFailed : # abnormal ending, workers waiting at "gate"
            logger.debug("Abnormal ending of main thraed")
414 415
        elif workerTimeout:
            logger.debug("Abnormal ending of main thread, due to worker timeout")
416 417 418
        else: # regular ending, workers waiting at "barrier"
            logger.debug("Regular ending, main thread waiting for all worker threads to stop...")
            self._syncAtBarrier()
419

420 421 422
        self._te = None  # No more executor, time to end
        logger.debug("Main thread tapping all threads one last time...")
        self.tapAllThreads()  # Let the threads run one last time
423

424
        logger.debug("\r\n\n--> Main thread ready to finish up...")
425
        logger.debug("Main thread joining all threads")
S
Shuduo Sang 已提交
426
        self._pool.joinAll()  # Get all threads to finish
427
        logger.info("\nAll worker threads finished")
428 429
        self._execStats.endExec()

430 431
    def printStats(self):
        self._execStats.printStats()
S
Steven Li 已提交
432

S
Steven Li 已提交
433 434 435 436 437 438
    def isFailed(self):
        return self._execStats.isFailed()

    def getExecStats(self):
        return self._execStats

S
Shuduo Sang 已提交
439
    def tapAllThreads(self):  # in a deterministic manner
S
Steven Li 已提交
440
        wakeSeq = []
S
Shuduo Sang 已提交
441 442
        for i in range(self._pool.numThreads):  # generate a random sequence
            if Dice.throw(2) == 1:
S
Steven Li 已提交
443 444 445
                wakeSeq.append(i)
            else:
                wakeSeq.insert(0, i)
S
Shuduo Sang 已提交
446 447 448
        logger.debug(
            "[TRD] Main thread waking up worker threads: {}".format(
                str(wakeSeq)))
449
        # TODO: set dice seed to a deterministic value
S
Steven Li 已提交
450
        for i in wakeSeq:
S
Shuduo Sang 已提交
451 452 453
            # TODO: maybe a bit too deep?!
            self._pool.threadList[i].tapStepGate()
            time.sleep(0)  # yield
S
Steven Li 已提交
454

455
    def isRunning(self):
S
Shuduo Sang 已提交
456
        return self._te is not None
457

S
Shuduo Sang 已提交
458 459
    def fetchTask(self) -> Task:
        if (not self.isRunning()):  # no task
460
            raise RuntimeError("Cannot fetch task when not running")
461 462
        # return self._wd.pickTask()
        # Alternatively, let's ask the DbState for the appropriate task
463 464 465 466 467 468 469
        # dbState = self.getDbState()
        # tasks = dbState.getTasksAtState() # TODO: create every time?
        # nTasks = len(tasks)
        # i = Dice.throw(nTasks)
        # logger.debug(" (dice:{}/{}) ".format(i, nTasks))
        # # return copy.copy(tasks[i]) # Needs a fresh copy, to save execution results, etc.
        # return tasks[i].clone() # TODO: still necessary?
S
Shuduo Sang 已提交
470 471 472 473 474
        # pick a task type for current state
        taskType = self.getDbManager().getStateMachine().pickTaskType()
        return taskType(
            self.getDbManager(),
            self._execStats)  # create a task from it
475 476

    def resetExecutedTasks(self):
S
Shuduo Sang 已提交
477
        self._executedTasks = []  # should be under single thread
478 479 480 481

    def saveExecutedTask(self, task):
        with self._lock:
            self._executedTasks.append(task)
482 483

# We define a class to run a number of threads in locking steps.
S
Shuduo Sang 已提交
484

485 486 487 488
class Helper:
    @classmethod
    def convertErrno(cls, errno):
        return errno if (errno > 0) else 0x80000000 + errno
S
Shuduo Sang 已提交
489

490
class ThreadPool:
491
    def __init__(self, numThreads, maxSteps):
492 493 494 495
        self.numThreads = numThreads
        self.maxSteps = maxSteps
        # Internal class variables
        self.curStep = 0
S
Shuduo Sang 已提交
496 497
        self.threadList = []  # type: List[WorkerThread]

498
    # starting to run all the threads, in locking steps
499
    def createAndStartThreads(self, tc: ThreadCoordinator):
S
Shuduo Sang 已提交
500 501
        for tid in range(0, self.numThreads):  # Create the threads
            workerThread = WorkerThread(self, tid, tc)
502
            self.threadList.append(workerThread)
S
Shuduo Sang 已提交
503
            workerThread.start()  # start, but should block immediately before step 0
504 505 506 507 508 509

    def joinAll(self):
        for workerThread in self.threadList:
            logger.debug("Joining thread...")
            workerThread._thread.join()

510 511
# A queue of continguous POSITIVE integers, used by DbManager to generate continuous numbers
# for new table names
S
Shuduo Sang 已提交
512 513


S
Steven Li 已提交
514 515
class LinearQueue():
    def __init__(self):
516
        self.firstIndex = 1  # 1st ever element
S
Steven Li 已提交
517
        self.lastIndex = 0
S
Shuduo Sang 已提交
518 519
        self._lock = threading.RLock()  # our functions may call each other
        self.inUse = set()  # the indexes that are in use right now
S
Steven Li 已提交
520

521
    def toText(self):
S
Shuduo Sang 已提交
522 523
        return "[{}..{}], in use: {}".format(
            self.firstIndex, self.lastIndex, self.inUse)
524 525

    # Push (add new element, largest) to the tail, and mark it in use
S
Shuduo Sang 已提交
526
    def push(self):
527
        with self._lock:
S
Shuduo Sang 已提交
528 529
            # if ( self.isEmpty() ):
            #     self.lastIndex = self.firstIndex
530
            #     return self.firstIndex
531 532
            # Otherwise we have something
            self.lastIndex += 1
533 534
            self.allocate(self.lastIndex)
            # self.inUse.add(self.lastIndex) # mark it in use immediately
535
            return self.lastIndex
S
Steven Li 已提交
536 537

    def pop(self):
538
        with self._lock:
S
Shuduo Sang 已提交
539 540 541 542
            if (self.isEmpty()):
                # raise RuntimeError("Cannot pop an empty queue")
                return False  # TODO: None?

543
            index = self.firstIndex
S
Shuduo Sang 已提交
544
            if (index in self.inUse):
545 546
                return False

547 548 549 550 551 552 553
            self.firstIndex += 1
            return index

    def isEmpty(self):
        return self.firstIndex > self.lastIndex

    def popIfNotEmpty(self):
554
        with self._lock:
555 556 557 558
            if (self.isEmpty()):
                return 0
            return self.pop()

S
Steven Li 已提交
559
    def allocate(self, i):
560
        with self._lock:
561
            # logger.debug("LQ allocating item {}".format(i))
S
Shuduo Sang 已提交
562 563 564
            if (i in self.inUse):
                raise RuntimeError(
                    "Cannot re-use same index in queue: {}".format(i))
565 566
            self.inUse.add(i)

S
Steven Li 已提交
567
    def release(self, i):
568
        with self._lock:
569
            # logger.debug("LQ releasing item {}".format(i))
S
Shuduo Sang 已提交
570
            self.inUse.remove(i)  # KeyError possible, TODO: why?
571 572 573 574

    def size(self):
        return self.lastIndex + 1 - self.firstIndex

S
Steven Li 已提交
575
    def pickAndAllocate(self):
S
Shuduo Sang 已提交
576
        if (self.isEmpty()):
577 578
            return None
        with self._lock:
S
Shuduo Sang 已提交
579
            cnt = 0  # counting the interations
580 581
            while True:
                cnt += 1
S
Shuduo Sang 已提交
582
                if (cnt > self.size() * 10):  # 10x iteration already
583 584
                    # raise RuntimeError("Failed to allocate LinearQueue element")
                    return None
S
Shuduo Sang 已提交
585 586
                ret = Dice.throwRange(self.firstIndex, self.lastIndex + 1)
                if (ret not in self.inUse):
587 588 589
                    self.allocate(ret)
                    return ret

S
Shuduo Sang 已提交
590

591
class DbConn:
592
    TYPE_NATIVE = "native-c"
593
    TYPE_REST =   "rest-api"
594 595 596 597 598 599 600 601 602
    TYPE_INVALID = "invalid"

    @classmethod
    def create(cls, connType):
        if connType == cls.TYPE_NATIVE:
            return DbConnNative()
        elif connType == cls.TYPE_REST:
            return DbConnRest()
        else:
S
Shuduo Sang 已提交
603 604
            raise RuntimeError(
                "Unexpected connection type: {}".format(connType))
605 606 607 608 609 610 611 612 613

    @classmethod
    def createNative(cls):
        return cls.create(cls.TYPE_NATIVE)

    @classmethod
    def createRest(cls):
        return cls.create(cls.TYPE_REST)

614 615
    def __init__(self):
        self.isOpen = False
616
        self._type = self.TYPE_INVALID
617 618 619 620
        self._lastSql = None

    def getLastSql(self):
        return self._lastSql
621 622

    def open(self):
S
Shuduo Sang 已提交
623
        if (self.isOpen):
624 625
            raise RuntimeError("Cannot re-open an existing DB connection")

626 627
        # below implemented by child classes
        self.openByType()
628

629
        logger.debug("[DB] data connection opened, type = {}".format(self._type))
630 631
        self.isOpen = True

S
Shuduo Sang 已提交
632 633
    def resetDb(self):  # reset the whole database, etc.
        if (not self.isOpen):
634
            raise RuntimeError("Cannot reset database until connection is open")
635 636
        # self._tdSql.prepare() # Recreate database, etc.

637
        self.execute('drop database if exists db')
638 639
        logger.debug("Resetting DB, dropped database")
        # self._cursor.execute('create database db')
640
        # self._cursor.execute('use db')
641 642
        # tdSql.execute('show databases')

S
Shuduo Sang 已提交
643
    def queryScalar(self, sql) -> int:
644 645
        return self._queryAny(sql)

S
Shuduo Sang 已提交
646
    def queryString(self, sql) -> str:
647 648
        return self._queryAny(sql)

S
Shuduo Sang 已提交
649 650
    def _queryAny(self, sql):  # actual query result as an int
        if (not self.isOpen):
651
            raise RuntimeError("Cannot query database until connection is open")
652
        nRows = self.query(sql)
S
Shuduo Sang 已提交
653
        if nRows != 1:
654
            raise RuntimeError("Unexpected result for query: {}, rows = {}".format(sql, nRows))
655
        if self.getResultRows() != 1 or self.getResultCols() != 1:
656
            raise RuntimeError("Unexpected result set for query: {}".format(sql))
657 658
        return self.getQueryResult()[0][0]

659 660 661 662
    def use(self, dbName):
        self.execute("use {}".format(dbName))

    def hasDatabases(self):
663
        return self.query("show databases") > 1 # We now have a "log" database by default
664 665 666 667

    def hasTables(self):
        return self.query("show tables") > 0

668 669
    def execute(self, sql):
        raise RuntimeError("Unexpected execution, should be overriden")
S
Shuduo Sang 已提交
670

671 672 673
    def query(self, sql) -> int: # return num rows returned
        raise RuntimeError("Unexpected execution, should be overriden")

674 675
    def openByType(self):
        raise RuntimeError("Unexpected execution, should be overriden")
S
Shuduo Sang 已提交
676

677 678
    def getQueryResult(self):
        raise RuntimeError("Unexpected execution, should be overriden")
S
Shuduo Sang 已提交
679

680 681
    def getResultRows(self):
        raise RuntimeError("Unexpected execution, should be overriden")
S
Shuduo Sang 已提交
682

683 684 685 686
    def getResultCols(self):
        raise RuntimeError("Unexpected execution, should be overriden")

# Sample: curl -u root:taosdata -d "show databases" localhost:6020/rest/sql
S
Shuduo Sang 已提交
687 688


689 690 691 692
class DbConnRest(DbConn):
    def __init__(self):
        super().__init__()
        self._type = self.TYPE_REST
S
Shuduo Sang 已提交
693
        self._url = "http://localhost:6020/rest/sql"  # fixed for now
694 695
        self._result = None

S
Shuduo Sang 已提交
696 697 698
    def openByType(self):  # Open connection
        pass  # do nothing, always open

699
    def close(self):
S
Shuduo Sang 已提交
700
        if (not self.isOpen):
701
            raise RuntimeError("Cannot clean up database until connection is open")
702 703 704 705 706
        # Do nothing for REST
        logger.debug("[DB] REST Database connection closed")
        self.isOpen = False

    def _doSql(self, sql):
707
        self._lastSql = sql # remember this, last SQL attempted
708 709 710
        try:
            r = requests.post(self._url, 
                data = sql,
711
                auth = HTTPBasicAuth('root', 'taosdata'))         
712 713 714
        except:
            print("REST API Failure (TODO: more info here)")
            raise
715 716
        rj = r.json()
        # Sanity check for the "Json Result"
S
Shuduo Sang 已提交
717
        if ('status' not in rj):
718 719
            raise RuntimeError("No status in REST response")

S
Shuduo Sang 已提交
720 721 722 723
        if rj['status'] == 'error':  # clearly reported error
            if ('code' not in rj):  # error without code
                raise RuntimeError("REST error return without code")
            errno = rj['code']  # May need to massage this in the future
724
            # print("Raising programming error with REST return: {}".format(rj))
S
Shuduo Sang 已提交
725 726
            raise taos.error.ProgrammingError(
                rj['desc'], errno)  # todo: check existance of 'desc'
727

S
Shuduo Sang 已提交
728 729 730 731
        if rj['status'] != 'succ':  # better be this
            raise RuntimeError(
                "Unexpected REST return status: {}".format(
                    rj['status']))
732 733

        nRows = rj['rows'] if ('rows' in rj) else 0
S
Shuduo Sang 已提交
734
        self._result = rj
735 736
        return nRows

S
Shuduo Sang 已提交
737 738 739 740
    def execute(self, sql):
        if (not self.isOpen):
            raise RuntimeError(
                "Cannot execute database commands until connection is open")
741 742
        logger.debug("[SQL-REST] Executing SQL: {}".format(sql))
        nRows = self._doSql(sql)
S
Shuduo Sang 已提交
743 744
        logger.debug(
            "[SQL-REST] Execution Result, nRows = {}, SQL = {}".format(nRows, sql))
745 746
        return nRows

S
Shuduo Sang 已提交
747
    def query(self, sql):  # return rows affected
748 749 750 751 752 753 754 755 756 757 758 759 760
        return self.execute(sql)

    def getQueryResult(self):
        return self._result['data']

    def getResultRows(self):
        print(self._result)
        raise RuntimeError("TBD")
        # return self._tdSql.queryRows

    def getResultCols(self):
        print(self._result)
        raise RuntimeError("TBD")
S
Shuduo Sang 已提交
761

762
    # Duplicate code from TDMySQL, TODO: merge all this into DbConnNative
763 764


765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792
class MyTDSql:
    def __init__(self):
        self.queryRows = 0
        self.queryCols = 0
        self.affectedRows = 0

    def init(self, cursor, log=True):
        self.cursor = cursor
        # if (log):
        #     caller = inspect.getframeinfo(inspect.stack()[1][0])
        #     self.cursor.log(caller.filename + ".sql")

    def close(self):
        self.cursor.close()

    def query(self, sql):
        self.sql = sql
        try:
            self.cursor.execute(sql)
            self.queryResult = self.cursor.fetchall()
            self.queryRows = len(self.queryResult)
            self.queryCols = len(self.cursor.description)
        except Exception as e:
            # caller = inspect.getframeinfo(inspect.stack()[1][0])
            # args = (caller.filename, caller.lineno, sql, repr(e))
            # tdLog.exit("%s(%d) failed: sql:%s, %s" % args)
            raise
        return self.queryRows
793

794 795 796 797 798 799 800 801 802 803 804
    def execute(self, sql):
        self.sql = sql
        try:
            self.affectedRows = self.cursor.execute(sql)
        except Exception as e:
            # caller = inspect.getframeinfo(inspect.stack()[1][0])
            # args = (caller.filename, caller.lineno, sql, repr(e))
            # tdLog.exit("%s(%d) failed: sql:%s, %s" % args)
            raise
        return self.affectedRows

S
Shuduo Sang 已提交
805

806
class DbConnNative(DbConn):
807 808 809 810
    # Class variables
    _lock = threading.Lock()
    _connInfoDisplayed = False

811 812
    def __init__(self):
        super().__init__()
813
        self._type = self.TYPE_NATIVE
S
Shuduo Sang 已提交
814
        self._conn = None
815
        self._cursor = None        
S
Shuduo Sang 已提交
816

817 818 819 820 821 822 823
    def getBuildPath(self):
        selfPath = os.path.dirname(os.path.realpath(__file__))
        if ("community" in selfPath):
            projPath = selfPath[:selfPath.find("communit")]
        else:
            projPath = selfPath[:selfPath.find("tests")]

824
        buildPath = None
825 826 827 828
        for root, dirs, files in os.walk(projPath):
            if ("taosd" in files):
                rootRealPath = os.path.dirname(os.path.realpath(root))
                if ("packaging" not in rootRealPath):
S
Shuduo Sang 已提交
829
                    buildPath = root[:len(root) - len("/build/bin")]
830
                    break
831
        if buildPath == None:
832
            raise RuntimeError("Failed to determine buildPath, selfPath={}, projPath={}".format(selfPath, projPath))
833 834
        return buildPath

835
    
S
Shuduo Sang 已提交
836
    def openByType(self):  # Open connection
837
        cfgPath = self.getBuildPath() + "/test/cfg"
838
        hostAddr = "127.0.0.1"
839

840 841 842 843 844
        with self._lock: # force single threading for opening DB connections
            if not self._connInfoDisplayed:
                self.__class__._connInfoDisplayed = True # updating CLASS variable
                logger.info("Initiating TAOS native connection to {}, using config at {}".format(hostAddr, cfgPath))
            
845
            self._conn = taos.connect(host=hostAddr, config=cfgPath)  # TODO: make configurable
846 847
            self._cursor = self._conn.cursor()
        
848
        self._cursor.execute('reset query cache')
S
Shuduo Sang 已提交
849
        # self._cursor.execute('use db') # do this at the beginning of every
850 851

        # Open connection
852
        self._tdSql = MyTDSql()
853
        self._tdSql.init(self._cursor)
S
Shuduo Sang 已提交
854

855
    def close(self):
S
Shuduo Sang 已提交
856
        if (not self.isOpen):
857
            raise RuntimeError("Cannot clean up database until connection is open")
858
        self._tdSql.close()
859
        logger.debug("[DB] Database connection closed")
860
        self.isOpen = False
S
Steven Li 已提交
861

S
Shuduo Sang 已提交
862 863 864 865
    def execute(self, sql):
        if (not self.isOpen):
            raise RuntimeError(
                "Cannot execute database commands until connection is open")
866
        logger.debug("[SQL] Executing SQL: {}".format(sql))
867
        self._lastSql = sql
868
        nRows = self._tdSql.execute(sql)
S
Shuduo Sang 已提交
869 870 871
        logger.debug(
            "[SQL] Execution Result, nRows = {}, SQL = {}".format(
                nRows, sql))
872
        return nRows
S
Steven Li 已提交
873

S
Shuduo Sang 已提交
874 875 876 877
    def query(self, sql):  # return rows affected
        if (not self.isOpen):
            raise RuntimeError(
                "Cannot query database until connection is open")
878
        logger.debug("[SQL] Executing SQL: {}".format(sql))
879
        self._lastSql = sql
880
        nRows = self._tdSql.query(sql)
S
Shuduo Sang 已提交
881 882 883
        logger.debug(
            "[SQL] Query Result, nRows = {}, SQL = {}".format(
                nRows, sql))
884
        return nRows
885
        # results are in: return self._tdSql.queryResult
886

887 888 889
    def getQueryResult(self):
        return self._tdSql.queryResult

890 891
    def getResultRows(self):
        return self._tdSql.queryRows
892

893 894
    def getResultCols(self):
        return self._tdSql.queryCols
895

S
Shuduo Sang 已提交
896

897
class AnyState:
S
Shuduo Sang 已提交
898 899 900
    STATE_INVALID = -1
    STATE_EMPTY = 0  # nothing there, no even a DB
    STATE_DB_ONLY = 1  # we have a DB, but nothing else
901
    STATE_TABLE_ONLY = 2  # we have a table, but totally empty
S
Shuduo Sang 已提交
902
    STATE_HAS_DATA = 3  # we have some data in the table
903 904 905 906 907
    _stateNames = ["Invalid", "Empty", "DB_Only", "Table_Only", "Has_Data"]

    STATE_VAL_IDX = 0
    CAN_CREATE_DB = 1
    CAN_DROP_DB = 2
908 909
    CAN_CREATE_FIXED_SUPER_TABLE = 3
    CAN_DROP_FIXED_SUPER_TABLE = 4
910 911 912 913 914 915 916
    CAN_ADD_DATA = 5
    CAN_READ_DATA = 6

    def __init__(self):
        self._info = self.getInfo()

    def __str__(self):
S
Shuduo Sang 已提交
917 918
        # -1 hack to accomodate the STATE_INVALID case
        return self._stateNames[self._info[self.STATE_VAL_IDX] + 1]
919 920 921 922

    def getInfo(self):
        raise RuntimeError("Must be overriden by child classes")

S
Steven Li 已提交
923 924 925 926 927 928
    def equals(self, other):
        if isinstance(other, int):
            return self.getValIndex() == other
        elif isinstance(other, AnyState):
            return self.getValIndex() == other.getValIndex()
        else:
S
Shuduo Sang 已提交
929 930 931
            raise RuntimeError(
                "Unexpected comparison, type = {}".format(
                    type(other)))
S
Steven Li 已提交
932

933 934 935
    def verifyTasksToState(self, tasks, newState):
        raise RuntimeError("Must be overriden by child classes")

S
Steven Li 已提交
936 937 938
    def getValIndex(self):
        return self._info[self.STATE_VAL_IDX]

939 940
    def getValue(self):
        return self._info[self.STATE_VAL_IDX]
S
Shuduo Sang 已提交
941

942 943
    def canCreateDb(self):
        return self._info[self.CAN_CREATE_DB]
S
Shuduo Sang 已提交
944

945 946
    def canDropDb(self):
        return self._info[self.CAN_DROP_DB]
S
Shuduo Sang 已提交
947

948 949
    def canCreateFixedSuperTable(self):
        return self._info[self.CAN_CREATE_FIXED_SUPER_TABLE]
S
Shuduo Sang 已提交
950

951 952
    def canDropFixedSuperTable(self):
        return self._info[self.CAN_DROP_FIXED_SUPER_TABLE]
S
Shuduo Sang 已提交
953

954 955
    def canAddData(self):
        return self._info[self.CAN_ADD_DATA]
S
Shuduo Sang 已提交
956

957 958 959 960 961
    def canReadData(self):
        return self._info[self.CAN_READ_DATA]

    def assertAtMostOneSuccess(self, tasks, cls):
        sCnt = 0
S
Shuduo Sang 已提交
962
        for task in tasks:
963 964 965
            if not isinstance(task, cls):
                continue
            if task.isSuccess():
S
Steven Li 已提交
966
                # task.logDebug("Task success found")
967
                sCnt += 1
S
Shuduo Sang 已提交
968 969 970
                if (sCnt >= 2):
                    raise RuntimeError(
                        "Unexpected more than 1 success with task: {}".format(cls))
971 972 973 974

    def assertIfExistThenSuccess(self, tasks, cls):
        sCnt = 0
        exists = False
S
Shuduo Sang 已提交
975
        for task in tasks:
976 977
            if not isinstance(task, cls):
                continue
S
Shuduo Sang 已提交
978
            exists = True  # we have a valid instance
979 980
            if task.isSuccess():
                sCnt += 1
S
Shuduo Sang 已提交
981 982 983
        if (exists and sCnt <= 0):
            raise RuntimeError(
                "Unexpected zero success for task: {}".format(cls))
984 985

    def assertNoTask(self, tasks, cls):
S
Shuduo Sang 已提交
986
        for task in tasks:
987
            if isinstance(task, cls):
S
Shuduo Sang 已提交
988 989
                raise CrashGenError(
                    "This task: {}, is not expected to be present, given the success/failure of others".format(cls.__name__))
990 991

    def assertNoSuccess(self, tasks, cls):
S
Shuduo Sang 已提交
992
        for task in tasks:
993 994
            if isinstance(task, cls):
                if task.isSuccess():
S
Shuduo Sang 已提交
995 996
                    raise RuntimeError(
                        "Unexpected successful task: {}".format(cls))
997 998

    def hasSuccess(self, tasks, cls):
S
Shuduo Sang 已提交
999
        for task in tasks:
1000 1001 1002 1003 1004 1005
            if not isinstance(task, cls):
                continue
            if task.isSuccess():
                return True
        return False

S
Steven Li 已提交
1006
    def hasTask(self, tasks, cls):
S
Shuduo Sang 已提交
1007
        for task in tasks:
S
Steven Li 已提交
1008 1009 1010 1011
            if isinstance(task, cls):
                return True
        return False

S
Shuduo Sang 已提交
1012

1013 1014 1015 1016
class StateInvalid(AnyState):
    def getInfo(self):
        return [
            self.STATE_INVALID,
S
Shuduo Sang 已提交
1017 1018 1019
            False, False,  # can create/drop Db
            False, False,  # can create/drop fixed table
            False, False,  # can insert/read data with fixed table
1020 1021 1022 1023
        ]

    # def verifyTasksToState(self, tasks, newState):

S
Shuduo Sang 已提交
1024

1025 1026 1027 1028
class StateEmpty(AnyState):
    def getInfo(self):
        return [
            self.STATE_EMPTY,
S
Shuduo Sang 已提交
1029 1030 1031
            True, False,  # can create/drop Db
            False, False,  # can create/drop fixed table
            False, False,  # can insert/read data with fixed table
1032 1033
        ]

S
Shuduo Sang 已提交
1034 1035
    def verifyTasksToState(self, tasks, newState):
        if (self.hasSuccess(tasks, TaskCreateDb)
1036
                ):  # at EMPTY, if there's succes in creating DB
S
Shuduo Sang 已提交
1037 1038 1039 1040
            if (not self.hasTask(tasks, TaskDropDb)):  # and no drop_db tasks
                # we must have at most one. TODO: compare numbers
                self.assertAtMostOneSuccess(tasks, TaskCreateDb)

1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051

class StateDbOnly(AnyState):
    def getInfo(self):
        return [
            self.STATE_DB_ONLY,
            False, True,
            True, False,
            False, False,
        ]

    def verifyTasksToState(self, tasks, newState):
S
Shuduo Sang 已提交
1052 1053 1054
        if (not self.hasTask(tasks, TaskCreateDb)):
            # only if we don't create any more
            self.assertAtMostOneSuccess(tasks, TaskDropDb)
1055 1056 1057 1058 1059

        # TODO: restore the below, the problem exists, although unlikely in real-world
        # if (gSvcMgr!=None) and gSvcMgr.isRestarting():     
        # if (gSvcMgr == None) or (not gSvcMgr.isRestarting()) : 
        #     self.assertIfExistThenSuccess(tasks, TaskDropDb)       
1060

S
Shuduo Sang 已提交
1061

1062
class StateSuperTableOnly(AnyState):
1063 1064 1065 1066 1067 1068 1069 1070 1071
    def getInfo(self):
        return [
            self.STATE_TABLE_ONLY,
            False, True,
            False, True,
            True, True,
        ]

    def verifyTasksToState(self, tasks, newState):
S
Shuduo Sang 已提交
1072
        if (self.hasSuccess(tasks, TaskDropSuperTable)
1073
                ):  # we are able to drop the table
1074
            #self.assertAtMostOneSuccess(tasks, TaskDropSuperTable)
S
Shuduo Sang 已提交
1075 1076
            # we must have had recreted it
            self.hasSuccess(tasks, TaskCreateSuperTable)
1077

1078
            # self._state = self.STATE_DB_ONLY
S
Steven Li 已提交
1079 1080
        # elif ( self.hasSuccess(tasks, AddFixedDataTask) ): # no success dropping the table, but added data
        #     self.assertNoTask(tasks, DropFixedTableTask) # not true in massively parrallel cases
1081
            # self._state = self.STATE_HAS_DATA
S
Steven Li 已提交
1082 1083 1084
        # elif ( self.hasSuccess(tasks, ReadFixedDataTask) ): # no success in prev cases, but was able to read data
            # self.assertNoTask(tasks, DropFixedTableTask)
            # self.assertNoTask(tasks, AddFixedDataTask)
1085
            # self._state = self.STATE_TABLE_ONLY # no change
S
Steven Li 已提交
1086 1087 1088
        # else: # did not drop table, did not insert data, did not read successfully, that is impossible
        #     raise RuntimeError("Unexpected no-success scenarios")
        # TODO: need to revamp!!
1089

S
Shuduo Sang 已提交
1090

1091 1092 1093 1094 1095 1096 1097 1098 1099 1100
class StateHasData(AnyState):
    def getInfo(self):
        return [
            self.STATE_HAS_DATA,
            False, True,
            False, True,
            True, True,
        ]

    def verifyTasksToState(self, tasks, newState):
S
Shuduo Sang 已提交
1101
        if (newState.equals(AnyState.STATE_EMPTY)):
1102
            self.hasSuccess(tasks, TaskDropDb)
S
Shuduo Sang 已提交
1103 1104 1105 1106
            if (not self.hasTask(tasks, TaskCreateDb)):
                self.assertAtMostOneSuccess(tasks, TaskDropDb)  # TODO: dicy
        elif (newState.equals(AnyState.STATE_DB_ONLY)):  # in DB only
            if (not self.hasTask(tasks, TaskCreateDb)
1107
                ):  # without a create_db task
S
Shuduo Sang 已提交
1108 1109
                # we must have drop_db task
                self.assertNoTask(tasks, TaskDropDb)
1110
            self.hasSuccess(tasks, TaskDropSuperTable)
1111
            # self.assertAtMostOneSuccess(tasks, DropFixedSuperTableTask) # TODO: dicy
1112 1113 1114 1115
        # elif ( newState.equals(AnyState.STATE_TABLE_ONLY) ): # data deleted
            # self.assertNoTask(tasks, TaskDropDb)
            # self.assertNoTask(tasks, TaskDropSuperTable)
            # self.assertNoTask(tasks, TaskAddData)
S
Steven Li 已提交
1116
            # self.hasSuccess(tasks, DeleteDataTasks)
S
Shuduo Sang 已提交
1117 1118
        else:  # should be STATE_HAS_DATA
            if (not self.hasTask(tasks, TaskCreateDb)
1119
                ):  # only if we didn't create one
S
Shuduo Sang 已提交
1120 1121 1122
                # we shouldn't have dropped it
                self.assertNoTask(tasks, TaskDropDb)
            if (not self.hasTask(tasks, TaskCreateSuperTable)
1123
                    ):  # if we didn't create the table
S
Shuduo Sang 已提交
1124 1125
                # we should not have a task that drops it
                self.assertNoTask(tasks, TaskDropSuperTable)
1126
            # self.assertIfExistThenSuccess(tasks, ReadFixedDataTask)
S
Steven Li 已提交
1127

S
Shuduo Sang 已提交
1128

1129
class StateMechine:
1130 1131
    def __init__(self, dbConn):
        self._dbConn = dbConn
S
Shuduo Sang 已提交
1132 1133 1134
        self._curState = self._findCurrentState()  # starting state
        # transitition target probabilities, indexed with value of STATE_EMPTY,
        # STATE_DB_ONLY, etc.
1135
        self._stateWeights = [1, 2, 10, 40]
S
Shuduo Sang 已提交
1136

1137 1138 1139
    def getCurrentState(self):
        return self._curState

1140 1141 1142
    def hasDatabase(self):
        return self._curState.canDropDb()  # ha, can drop DB means it has one

1143
    # May be slow, use cautionsly...
S
Shuduo Sang 已提交
1144
    def getTaskTypes(self):  # those that can run (directly/indirectly) from the current state
1145 1146 1147 1148 1149 1150
        def typesToStrings(types):
            ss = []
            for t in types:
                ss.append(t.__name__)
            return ss

S
Shuduo Sang 已提交
1151
        allTaskClasses = StateTransitionTask.__subclasses__()  # all state transition tasks
1152 1153
        firstTaskTypes = []
        for tc in allTaskClasses:
S
Shuduo Sang 已提交
1154
            # t = tc(self) # create task object
1155 1156
            if tc.canBeginFrom(self._curState):
                firstTaskTypes.append(tc)
S
Shuduo Sang 已提交
1157 1158 1159 1160 1161 1162 1163 1164
        # now we have all the tasks that can begin directly from the current
        # state, let's figure out the INDIRECT ones
        taskTypes = firstTaskTypes.copy()  # have to have these
        for task1 in firstTaskTypes:  # each task type gathered so far
            endState = task1.getEndState()  # figure the end state
            if endState is None:  # does not change end state
                continue  # no use, do nothing
            for tc in allTaskClasses:  # what task can further begin from there?
1165
                if tc.canBeginFrom(endState) and (tc not in firstTaskTypes):
S
Shuduo Sang 已提交
1166
                    taskTypes.append(tc)  # gather it
1167 1168

        if len(taskTypes) <= 0:
S
Shuduo Sang 已提交
1169 1170 1171 1172 1173 1174 1175
            raise RuntimeError(
                "No suitable task types found for state: {}".format(
                    self._curState))
        logger.debug(
            "[OPS] Tasks found for state {}: {}".format(
                self._curState,
                typesToStrings(taskTypes)))
1176 1177 1178 1179
        return taskTypes

    def _findCurrentState(self):
        dbc = self._dbConn
S
Shuduo Sang 已提交
1180
        ts = time.time()  # we use this to debug how fast/slow it is to do the various queries to find the current DB state
1181 1182
        if not dbc.hasDatabases():  # no database?!
            logger.debug( "[STT] empty database found, between {} and {}".format(ts, time.time()))
1183
            return StateEmpty()
S
Shuduo Sang 已提交
1184 1185
        # did not do this when openning connection, and this is NOT the worker
        # thread, which does this on their own
1186 1187 1188
        dbc.use("db")
        if not dbc.hasTables():  # no tables
            logger.debug("[STT] DB_ONLY found, between {} and {}".format(ts, time.time()))
1189
            return StateDbOnly()
1190 1191 1192 1193

        sTable = DbManager.getFixedSuperTable()
        if sTable.hasRegTables(dbc):  # no regular tables
            logger.debug("[STT] SUPER_TABLE_ONLY found, between {} and {}".format(ts, time.time()))
1194
            return StateSuperTableOnly()
S
Shuduo Sang 已提交
1195
        else:  # has actual tables
1196
            logger.debug("[STT] HAS_DATA found, between {} and {}".format(ts, time.time()))
1197 1198 1199
            return StateHasData()

    def transition(self, tasks):
S
Shuduo Sang 已提交
1200
        if (len(tasks) == 0):  # before 1st step, or otherwise empty
1201
            logger.debug("[STT] Starting State: {}".format(self._curState))
S
Shuduo Sang 已提交
1202
            return  # do nothing
1203

S
Shuduo Sang 已提交
1204 1205
        # this should show up in the server log, separating steps
        self._dbConn.execute("show dnodes")
1206 1207 1208 1209

        # Generic Checks, first based on the start state
        if self._curState.canCreateDb():
            self._curState.assertIfExistThenSuccess(tasks, TaskCreateDb)
S
Shuduo Sang 已提交
1210 1211
            # self.assertAtMostOneSuccess(tasks, CreateDbTask) # not really, in
            # case of multiple creation and drops
1212 1213

        if self._curState.canDropDb():
1214
            if gSvcMgr == None: # only if we are running as client-only
S
Steven Li 已提交
1215
                self._curState.assertIfExistThenSuccess(tasks, TaskDropDb)
S
Shuduo Sang 已提交
1216 1217
            # self.assertAtMostOneSuccess(tasks, DropDbTask) # not really in
            # case of drop-create-drop
1218 1219 1220

        # if self._state.canCreateFixedTable():
            # self.assertIfExistThenSuccess(tasks, CreateFixedTableTask) # Not true, DB may be dropped
S
Shuduo Sang 已提交
1221 1222
            # self.assertAtMostOneSuccess(tasks, CreateFixedTableTask) # not
            # really, in case of create-drop-create
1223 1224 1225

        # if self._state.canDropFixedTable():
            # self.assertIfExistThenSuccess(tasks, DropFixedTableTask) # Not True, the whole DB may be dropped
S
Shuduo Sang 已提交
1226 1227
            # self.assertAtMostOneSuccess(tasks, DropFixedTableTask) # not
            # really in case of drop-create-drop
1228 1229

        # if self._state.canAddData():
S
Shuduo Sang 已提交
1230 1231
        # self.assertIfExistThenSuccess(tasks, AddFixedDataTask)  # not true
        # actually
1232 1233 1234 1235 1236 1237

        # if self._state.canReadData():
            # Nothing for sure

        newState = self._findCurrentState()
        logger.debug("[STT] New DB state determined: {}".format(newState))
S
Shuduo Sang 已提交
1238 1239
        # can old state move to new state through the tasks?
        self._curState.verifyTasksToState(tasks, newState)
1240 1241 1242
        self._curState = newState

    def pickTaskType(self):
S
Shuduo Sang 已提交
1243 1244
        # all the task types we can choose from at curent state
        taskTypes = self.getTaskTypes()
1245 1246 1247
        weights = []
        for tt in taskTypes:
            endState = tt.getEndState()
S
Shuduo Sang 已提交
1248 1249 1250
            if endState is not None:
                # TODO: change to a method
                weights.append(self._stateWeights[endState.getValIndex()])
1251
            else:
S
Shuduo Sang 已提交
1252 1253
                # read data task, default to 10: TODO: change to a constant
                weights.append(10)
1254
        i = self._weighted_choice_sub(weights)
S
Shuduo Sang 已提交
1255
        # logger.debug(" (weighted random:{}/{}) ".format(i, len(taskTypes)))
1256 1257
        return taskTypes[i]

S
Shuduo Sang 已提交
1258 1259 1260 1261 1262
    # ref:
    # https://eli.thegreenplace.net/2010/01/22/weighted-random-generation-in-python/
    def _weighted_choice_sub(self, weights):
        # TODO: use our dice to ensure it being determinstic?
        rnd = random.random() * sum(weights)
1263 1264 1265 1266
        for i, w in enumerate(weights):
            rnd -= w
            if rnd < 0:
                return i
1267

1268
# Manager of the Database Data/Connection
S
Shuduo Sang 已提交
1269 1270 1271 1272


class DbManager():
    def __init__(self, resetDb=True):
S
Steven Li 已提交
1273
        self.tableNumQueue = LinearQueue()
S
Shuduo Sang 已提交
1274 1275 1276
        # datetime.datetime(2019, 1, 1) # initial date time tick
        self._lastTick = self.setupLastTick()
        self._lastInt = 0  # next one is initial integer
1277
        self._lock = threading.RLock()
S
Shuduo Sang 已提交
1278

1279
        # self.openDbServerConnection()
S
Shuduo Sang 已提交
1280 1281
        self._dbConn = DbConn.createNative() if (
            gConfig.connector_type == 'native') else DbConn.createRest()
1282
        try:
S
Shuduo Sang 已提交
1283
            self._dbConn.open()  # may throw taos.error.ProgrammingError: disconnected
1284 1285
        except taos.error.ProgrammingError as err:
            # print("Error type: {}, msg: {}, value: {}".format(type(err), err.msg, err))
S
Shuduo Sang 已提交
1286 1287 1288
            if (err.msg == 'client disconnected'):  # cannot open DB connection
                print(
                    "Cannot establish DB connection, please re-run script without parameter, and follow the instructions.")
S
Steven Li 已提交
1289
                sys.exit(2)
1290
            else:
S
Shuduo Sang 已提交
1291 1292
                raise
        except BaseException:
S
Steven Li 已提交
1293
            print("[=] Unexpected exception")
S
Shuduo Sang 已提交
1294 1295 1296 1297
            raise

        if resetDb:
            self._dbConn.resetDb()  # drop and recreate DB
1298

S
Shuduo Sang 已提交
1299 1300
        # Do this after dbConn is in proper shape
        self._stateMachine = StateMechine(self._dbConn)
1301

1302 1303 1304
    def getDbConn(self):
        return self._dbConn

S
Shuduo Sang 已提交
1305
    def getStateMachine(self) -> StateMechine:
1306 1307 1308 1309
        return self._stateMachine

    # def getState(self):
    #     return self._stateMachine.getCurrentState()
1310 1311 1312 1313 1314 1315

    # We aim to create a starting time tick, such that, whenever we run our test here once
    # We should be able to safely create 100,000 records, which will not have any repeated time stamp
    # when we re-run the test in 3 minutes (180 seconds), basically we should expand time duration
    # by a factor of 500.
    # TODO: what if it goes beyond 10 years into the future
1316
    # TODO: fix the error as result of above: "tsdb timestamp is out of range"
1317
    def setupLastTick(self):
1318
        t1 = datetime.datetime(2020, 6, 1)
1319
        t2 = datetime.datetime.now()
S
Shuduo Sang 已提交
1320 1321 1322 1323
        # maybe a very large number, takes 69 years to exceed Python int range
        elSec = int(t2.timestamp() - t1.timestamp())
        elSec2 = (elSec % (8 * 12 * 30 * 24 * 60 * 60 / 500)) * \
            500  # a number representing seconds within 10 years
1324
        # print("elSec = {}".format(elSec))
S
Shuduo Sang 已提交
1325 1326 1327
        t3 = datetime.datetime(2012, 1, 1)  # default "keep" is 10 years
        t4 = datetime.datetime.fromtimestamp(
            t3.timestamp() + elSec2)  # see explanation above
1328 1329 1330
        logger.info("Setting up TICKS to start from: {}".format(t4))
        return t4

S
Shuduo Sang 已提交
1331
    def pickAndAllocateTable(self):  # pick any table, and "use" it
S
Steven Li 已提交
1332 1333
        return self.tableNumQueue.pickAndAllocate()

1334 1335 1336 1337 1338
    def addTable(self):
        with self._lock:
            tIndex = self.tableNumQueue.push()
        return tIndex

1339 1340
    @classmethod
    def getFixedSuperTableName(cls):
1341
        return "fs_table"
1342

1343 1344 1345 1346
    @classmethod
    def getFixedSuperTable(cls):
        return TdSuperTable(cls.getFixedSuperTableName())

S
Shuduo Sang 已提交
1347
    def releaseTable(self, i):  # return the table back, so others can use it
S
Steven Li 已提交
1348 1349
        self.tableNumQueue.release(i)

1350
    def getNextTick(self):
S
Shuduo Sang 已提交
1351
        with self._lock:  # prevent duplicate tick
1352 1353
            if Dice.throw(20) == 0:  # 1 in 20 chance
                return self._lastTick + datetime.timedelta(0, -100) # Go back in time 100 seconds
S
Shuduo Sang 已提交
1354 1355 1356
            else:  # regular
                # add one second to it
                self._lastTick += datetime.timedelta(0, 1)
S
Steven Li 已提交
1357
                return self._lastTick
1358 1359

    def getNextInt(self):
1360 1361 1362
        with self._lock:
            self._lastInt += 1
            return self._lastInt
1363 1364

    def getNextBinary(self):
S
Shuduo Sang 已提交
1365 1366
        return "Beijing_Shanghai_Los_Angeles_New_York_San_Francisco_Chicago_Beijing_Shanghai_Los_Angeles_New_York_San_Francisco_Chicago_{}".format(
            self.getNextInt())
1367 1368

    def getNextFloat(self):
1369 1370 1371
        ret = 0.9 + self.getNextInt()
        # print("Float obtained: {}".format(ret))
        return ret
S
Shuduo Sang 已提交
1372

S
Steven Li 已提交
1373
    def getTableNameToDelete(self):
S
Shuduo Sang 已提交
1374 1375
        tblNum = self.tableNumQueue.pop()  # TODO: race condition!
        if (not tblNum):  # maybe false
1376
            return False
S
Shuduo Sang 已提交
1377

S
Steven Li 已提交
1378 1379
        return "table_{}".format(tblNum)

1380
    def cleanUp(self):
S
Shuduo Sang 已提交
1381 1382
        self._dbConn.close()

1383

1384
class TaskExecutor():
1385
    class BoundedList:
S
Shuduo Sang 已提交
1386
        def __init__(self, size=10):
1387 1388
            self._size = size
            self._list = []
S
Steven Li 已提交
1389
            self._lock = threading.Lock()
1390

S
Shuduo Sang 已提交
1391
        def add(self, n: int):
S
Steven Li 已提交
1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417
            with self._lock:
                if not self._list:  # empty
                    self._list.append(n)
                    return
                # now we should insert
                nItems = len(self._list)
                insPos = 0
                for i in range(nItems):
                    insPos = i
                    if n <= self._list[i]:  # smaller than this item, time to insert
                        break  # found the insertion point
                    insPos += 1  # insert to the right

                if insPos == 0:  # except for the 1st item, # TODO: elimiate first item as gating item
                    return  # do nothing

                # print("Inserting at postion {}, value: {}".format(insPos, n))
                self._list.insert(insPos, n)  # insert

                newLen = len(self._list)
                if newLen <= self._size:
                    return  # do nothing
                elif newLen == (self._size + 1):
                    del self._list[0]  # remove the first item
                else:
                    raise RuntimeError("Corrupt Bounded List")
1418 1419 1420 1421 1422 1423

        def __str__(self):
            return repr(self._list)

    _boundedList = BoundedList()

1424 1425 1426
    def __init__(self, curStep):
        self._curStep = curStep

1427 1428 1429 1430
    @classmethod
    def getBoundedList(cls):
        return cls._boundedList

1431 1432 1433
    def getCurStep(self):
        return self._curStep

S
Shuduo Sang 已提交
1434
    def execute(self, task: Task, wt: WorkerThread):  # execute a task on a thread
1435
        task.execute(wt)
1436

1437 1438 1439 1440
    def recordDataMark(self, n: int):
        # print("[{}]".format(n), end="", flush=True)
        self._boundedList.add(n)

1441 1442
    # def logInfo(self, msg):
    #     logger.info("    T[{}.x]: ".format(self._curStep) + msg)
1443

1444 1445
    # def logDebug(self, msg):
    #     logger.debug("    T[{}.x]: ".format(self._curStep) + msg)
1446

S
Shuduo Sang 已提交
1447

S
Steven Li 已提交
1448
class Task():
1449 1450 1451 1452
    taskSn = 100

    @classmethod
    def allocTaskNum(cls):
S
Shuduo Sang 已提交
1453
        Task.taskSn += 1  # IMPORTANT: cannot use cls.taskSn, since each sub class will have a copy
S
Steven Li 已提交
1454 1455
        # logger.debug("Allocating taskSN: {}".format(Task.taskSn))
        return Task.taskSn
1456

S
Shuduo Sang 已提交
1457
    def __init__(self, dbManager: DbManager, execStats: ExecutionStats):
1458
        self._dbManager = dbManager
S
Shuduo Sang 已提交
1459
        self._workerThread = None
1460
        self._err = None
1461
        self._aborted = False
1462
        self._curStep = None
S
Shuduo Sang 已提交
1463
        self._numRows = None  # Number of rows affected
1464

S
Shuduo Sang 已提交
1465
        # Assign an incremental task serial number
1466
        self._taskNum = self.allocTaskNum()
S
Steven Li 已提交
1467
        # logger.debug("Creating new task {}...".format(self._taskNum))
1468

1469 1470
        self._execStats = execStats

1471
    def isSuccess(self):
S
Shuduo Sang 已提交
1472
        return self._err is None
1473

1474 1475 1476
    def isAborted(self):
        return self._aborted

S
Shuduo Sang 已提交
1477
    def clone(self):  # TODO: why do we need this again?
1478
        newTask = self.__class__(self._dbManager, self._execStats)
1479 1480 1481
        return newTask

    def logDebug(self, msg):
S
Shuduo Sang 已提交
1482 1483 1484
        self._workerThread.logDebug(
            "Step[{}.{}] {}".format(
                self._curStep, self._taskNum, msg))
1485 1486

    def logInfo(self, msg):
S
Shuduo Sang 已提交
1487 1488 1489
        self._workerThread.logInfo(
            "Step[{}.{}] {}".format(
                self._curStep, self._taskNum, msg))
1490

1491
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
S
Shuduo Sang 已提交
1492 1493 1494
        raise RuntimeError(
            "To be implemeted by child classes, class name: {}".format(
                self.__class__.__name__))
1495

1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508
    def _isErrAcceptable(self, errno, msg):
        if errno in [
                0x05,  # TSDB_CODE_RPC_NOT_READY
                # 0x200, # invalid SQL, TODO: re-examine with TD-934
                0x360, 0x362, 
                0x369, # tag already exists
                0x36A, 0x36B, 0x36D,
                0x381, 
                0x380, # "db not selected"
                0x383,
                0x386,  # DB is being dropped?!
                0x503,
                0x510,  # vnode not in ready state
1509
                0x14,   # db not ready, errno changed
1510 1511 1512 1513
                0x600,
                1000  # REST catch-all error
            ]: 
            return True # These are the ALWAYS-ACCEPTABLE ones
1514 1515
        elif (errno in [ 0x0B ]) and gConfig.auto_start_service:
            return True # We may get "network unavilable" when restarting service
1516 1517 1518
        elif errno == 0x200 : # invalid SQL, we need to div in a bit more
            if msg.find("invalid column name") != -1:
                return True 
1519 1520 1521 1522
            elif msg.find("tags number not matched") != -1: # mismatched tags after modification
                return True
            elif msg.find("duplicated column names") != -1: # also alter table tag issues
                return True
S
Steven Li 已提交
1523 1524 1525
        elif (gSvcMgr!=None) and gSvcMgr.isRestarting():
            logger.info("Ignoring error when service is restarting: errno = {}, msg = {}".format(errno, msg))
            return True
1526 1527 1528 1529
        
        return False # Not an acceptable error


1530 1531
    def execute(self, wt: WorkerThread):
        wt.verifyThreadSelf()
S
Shuduo Sang 已提交
1532
        self._workerThread = wt  # type: ignore
1533 1534

        te = wt.getTaskExecutor()
1535
        self._curStep = te.getCurStep()
S
Shuduo Sang 已提交
1536 1537
        self.logDebug(
            "[-] executing task {}...".format(self.__class__.__name__))
1538 1539

        self._err = None
1540 1541
        self._execStats.beginTaskType(self.__class__.__name__)  # mark beginning
        errno2 = None
1542
        try:
S
Shuduo Sang 已提交
1543
            self._executeInternal(te, wt)  # TODO: no return value?
1544
        except taos.error.ProgrammingError as err:
1545
            errno2 = Helper.convertErrno(err.errno)
1546
            if (gConfig.continue_on_exception):  # user choose to continue
1547
                self.logDebug("[=] Continue after TAOS exception: errno=0x{:X}, msg: {}, SQL: {}".format(
1548
                        errno2, err, wt.getDbConn().getLastSql()))
1549
                self._err = err
1550 1551
            elif self._isErrAcceptable(errno2, err.__str__()):
                self.logDebug("[=] Acceptable Taos library exception: errno=0x{:X}, msg: {}, SQL: {}".format(
1552
                        errno2, err, wt.getDbConn().getLastSql()))
1553
                print("_", end="", flush=True)
S
Shuduo Sang 已提交
1554
                self._err = err
1555
            else: # not an acceptable error
1556 1557 1558
                errMsg = "[=] Unexpected Taos library exception ({}): errno=0x{:X}, msg: {}, SQL: {}".format(
                    self.__class__.__name__,
                    errno2, err, wt.getDbConn().getLastSql())
1559
                self.logDebug(errMsg)
S
Shuduo Sang 已提交
1560
                if gConfig.debug:
1561 1562
                    # raise # so that we see full stack
                    traceback.print_exc()
1563 1564
                print(
                    "\n\n----------------------------\nProgram ABORTED Due to Unexpected TAOS Error: \n\n{}\n".format(errMsg) +
1565 1566 1567 1568
                    "----------------------------\n")
                # sys.exit(-1)
                self._err = err
                self._aborted = True
S
Shuduo Sang 已提交
1569
        except Exception as e:
S
Steven Li 已提交
1570
            self.logInfo("Non-TAOS exception encountered")
S
Shuduo Sang 已提交
1571
            self._err = e
S
Steven Li 已提交
1572
            self._aborted = True
1573
            traceback.print_exc()
1574
        except BaseException as e:
1575
            self.logInfo("Python base exception encountered")
1576
            self._err = e
1577
            self._aborted = True
S
Steven Li 已提交
1578
            traceback.print_exc()
S
Shuduo Sang 已提交
1579 1580 1581
        except BaseException:
            self.logDebug(
                "[=] Unexpected exception, SQL: {}".format(
1582
                    wt.getDbConn().getLastSql()))
1583
            raise
1584
        self._execStats.endTaskType(self.__class__.__name__, self.isSuccess())
S
Shuduo Sang 已提交
1585 1586 1587 1588

        self.logDebug("[X] task execution completed, {}, status: {}".format(
            self.__class__.__name__, "Success" if self.isSuccess() else "Failure"))
        # TODO: merge with above.
1589
        self._execStats.incExecCount(self.__class__.__name__, self.isSuccess(), errno2)
S
Steven Li 已提交
1590

1591
    def execSql(self, sql):
1592
        return self._dbManager.execute(sql)
1593

S
Shuduo Sang 已提交
1594
    def execWtSql(self, wt: WorkerThread, sql):  # execute an SQL on the worker thread
1595 1596
        return wt.execSql(sql)

S
Shuduo Sang 已提交
1597
    def queryWtSql(self, wt: WorkerThread, sql):  # execute an SQL on the worker thread
1598 1599
        return wt.querySql(sql)

S
Shuduo Sang 已提交
1600
    def getQueryResult(self, wt: WorkerThread):  # execute an SQL on the worker thread
1601 1602 1603
        return wt.getQueryResult()


1604
class ExecutionStats:
1605
    def __init__(self):
S
Shuduo Sang 已提交
1606 1607
        # total/success times for a task
        self._execTimes: Dict[str, [int, int]] = {}
1608 1609 1610
        self._tasksInProgress = 0
        self._lock = threading.Lock()
        self._firstTaskStartTime = None
1611
        self._execStartTime = None
1612
        self._errors = {}
S
Shuduo Sang 已提交
1613 1614
        self._elapsedTime = 0.0  # total elapsed time
        self._accRunTime = 0.0  # accumulated run time
1615

1616 1617 1618
        self._failed = False
        self._failureReason = None

S
Steven Li 已提交
1619
    def __str__(self):
S
Shuduo Sang 已提交
1620 1621
        return "[ExecStats: _failed={}, _failureReason={}".format(
            self._failed, self._failureReason)
S
Steven Li 已提交
1622 1623

    def isFailed(self):
S
Shuduo Sang 已提交
1624
        return self._failed
S
Steven Li 已提交
1625

1626 1627 1628 1629 1630 1631
    def startExec(self):
        self._execStartTime = time.time()

    def endExec(self):
        self._elapsedTime = time.time() - self._execStartTime

1632
    def incExecCount(self, klassName, isSuccess, eno=None):  # TODO: add a lock here
1633 1634
        if klassName not in self._execTimes:
            self._execTimes[klassName] = [0, 0]
S
Shuduo Sang 已提交
1635 1636
        t = self._execTimes[klassName]  # tuple for the data
        t[0] += 1  # index 0 has the "total" execution times
1637
        if isSuccess:
S
Shuduo Sang 已提交
1638
            t[1] += 1  # index 1 has the "success" execution times
1639 1640 1641 1642 1643
        if eno != None:             
            if klassName not in self._errors:
                self._errors[klassName] = {}
            errors = self._errors[klassName]
            errors[eno] = errors[eno]+1 if eno in errors else 1
1644 1645 1646

    def beginTaskType(self, klassName):
        with self._lock:
S
Shuduo Sang 已提交
1647 1648
            if self._tasksInProgress == 0:  # starting a new round
                self._firstTaskStartTime = time.time()  # I am now the first task
1649 1650 1651 1652 1653
            self._tasksInProgress += 1

    def endTaskType(self, klassName, isSuccess):
        with self._lock:
            self._tasksInProgress -= 1
S
Shuduo Sang 已提交
1654
            if self._tasksInProgress == 0:  # all tasks have stopped
1655 1656 1657
                self._accRunTime += (time.time() - self._firstTaskStartTime)
                self._firstTaskStartTime = None

1658 1659 1660 1661
    def registerFailure(self, reason):
        self._failed = True
        self._failureReason = reason

1662
    def printStats(self):
S
Shuduo Sang 已提交
1663 1664 1665 1666 1667 1668
        logger.info(
            "----------------------------------------------------------------------")
        logger.info(
            "| Crash_Gen test {}, with the following stats:". format(
                "FAILED (reason: {})".format(
                    self._failureReason) if self._failed else "SUCCEEDED"))
1669
        logger.info("| Task Execution Times (success/total):")
1670
        execTimesAny = 0
S
Shuduo Sang 已提交
1671
        for k, n in self._execTimes.items():
1672
            execTimesAny += n[0]
1673 1674 1675 1676 1677 1678 1679 1680
            errStr = None
            if k in self._errors:
                errors = self._errors[k]
                # print("errors = {}".format(errors))
                errStrs = ["0x{:X}:{}".format(eno, n) for (eno, n) in errors.items()]
                # print("error strings = {}".format(errStrs))
                errStr = ", ".join(errStrs) 
            logger.info("|    {0:<24}: {1}/{2} (Errors: {3})".format(k, n[1], n[0], errStr))
S
Shuduo Sang 已提交
1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699

        logger.info(
            "| Total Tasks Executed (success or not): {} ".format(execTimesAny))
        logger.info(
            "| Total Tasks In Progress at End: {}".format(
                self._tasksInProgress))
        logger.info(
            "| Total Task Busy Time (elapsed time when any task is in progress): {:.3f} seconds".format(
                self._accRunTime))
        logger.info(
            "| Average Per-Task Execution Time: {:.3f} seconds".format(self._accRunTime / execTimesAny))
        logger.info(
            "| Total Elapsed Time (from wall clock): {:.3f} seconds".format(
                self._elapsedTime))
        logger.info(
            "| Top numbers written: {}".format(
                TaskExecutor.getBoundedList()))
        logger.info(
            "----------------------------------------------------------------------")
1700 1701 1702


class StateTransitionTask(Task):
1703 1704 1705 1706 1707
    LARGE_NUMBER_OF_TABLES = 35
    SMALL_NUMBER_OF_TABLES = 3
    LARGE_NUMBER_OF_RECORDS = 50
    SMALL_NUMBER_OF_RECORDS = 3

1708
    @classmethod
S
Shuduo Sang 已提交
1709
    def getInfo(cls):  # each sub class should supply their own information
1710 1711
        raise RuntimeError("Overriding method expected")

S
Shuduo Sang 已提交
1712
    _endState = None
1713
    @classmethod
S
Shuduo Sang 已提交
1714
    def getEndState(cls):  # TODO: optimize by calling it fewer times
1715 1716
        raise RuntimeError("Overriding method expected")

1717 1718 1719
    # @classmethod
    # def getBeginStates(cls):
    #     return cls.getInfo()[0]
1720

1721 1722 1723
    # @classmethod
    # def getEndState(cls): # returning the class name
    #     return cls.getInfo()[0]
1724 1725

    @classmethod
1726 1727 1728
    def canBeginFrom(cls, state: AnyState):
        # return state.getValue() in cls.getBeginStates()
        raise RuntimeError("must be overriden")
1729

1730 1731
    @classmethod
    def getRegTableName(cls, i):
1732
        return "reg_table_{}".format(i)
1733

1734 1735
    def execute(self, wt: WorkerThread):
        super().execute(wt)
S
Shuduo Sang 已提交
1736 1737


1738
class TaskCreateDb(StateTransitionTask):
1739
    @classmethod
1740
    def getEndState(cls):
S
Shuduo Sang 已提交
1741
        return StateDbOnly()
1742

1743 1744 1745 1746
    @classmethod
    def canBeginFrom(cls, state: AnyState):
        return state.canCreateDb()

1747
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
S
Shuduo Sang 已提交
1748 1749
        self.execWtSql(wt, "create database db")

1750

1751
class TaskDropDb(StateTransitionTask):
1752
    @classmethod
1753 1754
    def getEndState(cls):
        return StateEmpty()
1755

1756 1757 1758 1759
    @classmethod
    def canBeginFrom(cls, state: AnyState):
        return state.canDropDb()

1760
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
1761
        self.execWtSql(wt, "drop database db")
S
Steven Li 已提交
1762
        logger.debug("[OPS] database dropped at {}".format(time.time()))
1763

S
Shuduo Sang 已提交
1764

1765
class TaskCreateSuperTable(StateTransitionTask):
1766
    @classmethod
1767 1768
    def getEndState(cls):
        return StateSuperTableOnly()
1769

1770 1771
    @classmethod
    def canBeginFrom(cls, state: AnyState):
1772
        return state.canCreateFixedSuperTable()
1773

1774
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
S
Shuduo Sang 已提交
1775
        if not wt.dbInUse():  # no DB yet, to the best of our knowledge
1776 1777 1778
            logger.debug("Skipping task, no DB yet")
            return

1779
        sTable = self._dbManager.getFixedSuperTable()
1780
        # wt.execSql("use db")    # should always be in place
1781 1782
        sTable.create(wt.getDbConn(), {'ts':'timestamp', 'speed':'int'}, {'b':'binary(200)', 'f':'float'})
        # self.execWtSql(wt,"create table db.{} (ts timestamp, speed int) tags (b binary(200), f float) ".format(tblName))
S
Shuduo Sang 已提交
1783 1784
        # No need to create the regular tables, INSERT will do that
        # automatically
1785

S
Steven Li 已提交
1786

1787 1788 1789 1790
class TdSuperTable:
    def __init__(self, stName):
        self._stName = stName

1791 1792 1793
    def getName(self):
        return self._stName

1794 1795 1796 1797 1798 1799 1800 1801
    def create(self, dbc, cols: dict, tags: dict):
        sql = "CREATE TABLE db.{} ({}) TAGS ({})".format(
            self._stName,
            ",".join(['%s %s'%(k,v) for (k,v) in cols.items()]),
            ",".join(['%s %s'%(k,v) for (k,v) in tags.items()])
            )
        dbc.execute(sql)        

1802 1803 1804 1805
    def getRegTables(self, dbc: DbConn):
        try:
            dbc.query("select TBNAME from db.{}".format(self._stName))  # TODO: analyze result set later            
        except taos.error.ProgrammingError as err:                    
1806
            errno2 = Helper.convertErrno(err.errno) 
1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819
            logger.debug("[=] Failed to get tables from super table: errno=0x{:X}, msg: {}".format(errno2, err))
            raise

        qr = dbc.getQueryResult()
        return [v[0] for v in qr] # list transformation, ref: https://stackoverflow.com/questions/643823/python-list-transformation

    def hasRegTables(self, dbc: DbConn):
        return dbc.query("SELECT * FROM db.{}".format(self._stName)) > 0

    def ensureTable(self, dbc: DbConn, regTableName: str):
        sql = "select tbname from {} where tbname in ('{}')".format(self._stName, regTableName)
        if dbc.query(sql) >= 1 : # reg table exists already
            return
1820 1821
        sql = "CREATE TABLE {} USING {} tags ({})".format(
            regTableName, self._stName, self._getTagStrForSql(dbc)
1822 1823 1824
        )
        dbc.execute(sql)

1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869
    def _getTagStrForSql(self, dbc) :
        tags = self._getTags(dbc)
        tagStrs = []
        for tagName in tags: 
            tagType = tags[tagName]
            if tagType == 'BINARY':
                tagStrs.append("'Beijing-Shanghai-LosAngeles'")
            elif tagType == 'FLOAT':
                tagStrs.append('9.9')
            elif tagType == 'INT':
                tagStrs.append('88')
            else:
                raise RuntimeError("Unexpected tag type: {}".format(tagType))
        return ", ".join(tagStrs)

    def _getTags(self, dbc) -> dict:
        dbc.query("DESCRIBE {}".format(self._stName))
        stCols = dbc.getQueryResult()
        # print(stCols)
        ret = {row[0]:row[1] for row in stCols if row[3]=='TAG'} # name:type
        # print("Tags retrieved: {}".format(ret))
        return ret

    def addTag(self, dbc, tagName, tagType):
        if tagName in self._getTags(dbc): # already 
            return
        # sTable.addTag("extraTag", "int")
        sql = "alter table db.{} add tag {} {}".format(self._stName, tagName, tagType)
        dbc.execute(sql)

    def dropTag(self, dbc, tagName):
        if not tagName in self._getTags(dbc): # don't have this tag
            return
        sql = "alter table db.{} drop tag {}".format(self._stName, tagName)
        dbc.execute(sql)

    def changeTag(self, dbc, oldTag, newTag):
        tags = self._getTags(dbc)
        if not oldTag in tags: # don't have this tag
            return
        if newTag in tags: # already have this tag
            return
        sql = "alter table db.{} change tag {} {}".format(self._stName, oldTag, newTag)
        dbc.execute(sql)

1870
class TaskReadData(StateTransitionTask):
1871
    @classmethod
1872
    def getEndState(cls):
S
Shuduo Sang 已提交
1873
        return None  # meaning doesn't affect state
1874

1875 1876 1877 1878
    @classmethod
    def canBeginFrom(cls, state: AnyState):
        return state.canReadData()

1879
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
1880
        sTable = self._dbManager.getFixedSuperTable()
1881

S
Shuduo Sang 已提交
1882 1883
        if random.randrange(
                5) == 0:  # 1 in 5 chance, simulate a broken connection. TODO: break connection in all situations
1884 1885
            wt.getDbConn().close()
            wt.getDbConn().open()
1886
        
1887 1888 1889 1890 1891 1892
        dbc = wt.getDbConn()
        for rTbName in sTable.getRegTables(dbc):  # regular tables
            aggExpr = Dice.choice([
                '*', 
                'count(*)', 
                'avg(speed)', 
1893
                # 'twa(speed)', # TODO: this one REQUIRES a where statement, not reasonable
1894 1895 1896 1897 1898 1899 1900 1901 1902
                'sum(speed)', 
                'stddev(speed)', 
                'min(speed)', 
                'max(speed)', 
                'first(speed)', 
                'last(speed)']) # TODO: add more from 'top'
            filterExpr = Dice.choice([ # TODO: add various kind of WHERE conditions
                None
            ])
1903
            try:
1904 1905 1906
                dbc.execute("select {} from db.{}".format(aggExpr, rTbName))
                if aggExpr not in ['stddev(speed)']: #TODO: STDDEV not valid for super tables?!
                    dbc.execute("select {} from db.{}".format(aggExpr, sTable.getName()))
1907
            except taos.error.ProgrammingError as err:                    
1908
                errno2 = Helper.convertErrno(err.errno)
1909
                logger.debug("[=] Read Failure: errno=0x{:X}, msg: {}, SQL: {}".format(errno2, err, dbc.getLastSql()))
1910
                raise
S
Shuduo Sang 已提交
1911

1912
class TaskDropSuperTable(StateTransitionTask):
1913
    @classmethod
1914
    def getEndState(cls):
S
Shuduo Sang 已提交
1915
        return StateDbOnly()
1916

1917 1918
    @classmethod
    def canBeginFrom(cls, state: AnyState):
1919
        return state.canDropFixedSuperTable()
1920

1921
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
S
Shuduo Sang 已提交
1922 1923 1924 1925 1926 1927 1928
        # 1/2 chance, we'll drop the regular tables one by one, in a randomized
        # sequence
        if Dice.throw(2) == 0:
            tblSeq = list(range(
                2 + (self.LARGE_NUMBER_OF_TABLES if gConfig.larger_data else self.SMALL_NUMBER_OF_TABLES)))
            random.shuffle(tblSeq)
            tickOutput = False  # if we have spitted out a "d" character for "drop regular table"
1929
            isSuccess = True
S
Shuduo Sang 已提交
1930 1931 1932
            for i in tblSeq:
                regTableName = self.getRegTableName(
                    i)  # "db.reg_table_{}".format(i)
1933
                try:
S
Shuduo Sang 已提交
1934 1935 1936
                    self.execWtSql(wt, "drop table {}".format(
                        regTableName))  # nRows always 0, like MySQL
                except taos.error.ProgrammingError as err:
1937 1938
                    # correcting for strange error number scheme                    
                    errno2 = Helper.convertErrno(err.errno)
S
Shuduo Sang 已提交
1939
                    if (errno2 in [0x362]):  # mnode invalid table name
1940
                        isSuccess = False
S
Shuduo Sang 已提交
1941 1942 1943
                        logger.debug(
                            "[DB] Acceptable error when dropping a table")
                    continue  # try to delete next regular table
1944 1945

                if (not tickOutput):
S
Shuduo Sang 已提交
1946 1947
                    tickOutput = True  # Print only one time
                    if isSuccess:
1948 1949
                        print("d", end="", flush=True)
                    else:
S
Shuduo Sang 已提交
1950
                        print("f", end="", flush=True)
1951 1952

        # Drop the super table itself
S
Shuduo Sang 已提交
1953
        tblName = self._dbManager.getFixedSuperTableName()
1954
        self.execWtSql(wt, "drop table db.{}".format(tblName))
1955

S
Shuduo Sang 已提交
1956

1957 1958 1959
class TaskAlterTags(StateTransitionTask):
    @classmethod
    def getEndState(cls):
S
Shuduo Sang 已提交
1960
        return None  # meaning doesn't affect state
1961 1962 1963

    @classmethod
    def canBeginFrom(cls, state: AnyState):
S
Shuduo Sang 已提交
1964
        return state.canDropFixedSuperTable()  # if we can drop it, we can alter tags
1965 1966

    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
1967 1968 1969
        # tblName = self._dbManager.getFixedSuperTableName()
        dbc = wt.getDbConn()
        sTable = self._dbManager.getFixedSuperTable()
1970
        dice = Dice.throw(4)
S
Shuduo Sang 已提交
1971
        if dice == 0:
1972 1973
            sTable.addTag(dbc, "extraTag", "int")
            # sql = "alter table db.{} add tag extraTag int".format(tblName)
S
Shuduo Sang 已提交
1974
        elif dice == 1:
1975 1976
            sTable.dropTag(dbc, "extraTag")
            # sql = "alter table db.{} drop tag extraTag".format(tblName)
S
Shuduo Sang 已提交
1977
        elif dice == 2:
1978 1979
            sTable.dropTag(dbc, "newTag")
            # sql = "alter table db.{} drop tag newTag".format(tblName)
S
Shuduo Sang 已提交
1980
        else:  # dice == 3
1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997
            sTable.changeTag(dbc, "extraTag", "newTag")
            # sql = "alter table db.{} change tag extraTag newTag".format(tblName)

class TaskRestartService(StateTransitionTask):
    _isRunning = False
    _classLock = threading.Lock()

    @classmethod
    def getEndState(cls):
        return None  # meaning doesn't affect state

    @classmethod
    def canBeginFrom(cls, state: AnyState):
        if gConfig.auto_start_service:
            return state.canDropFixedSuperTable()  # Basicallly when we have the super table
        return False # don't run this otherwise

1998
    CHANCE_TO_RESTART_SERVICE = 100
1999 2000 2001 2002
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
        if not gConfig.auto_start_service: # only execute when we are in -a mode
            print("_a", end="", flush=True)
            return
2003

2004 2005 2006 2007 2008 2009
        with self._classLock:
            if self._isRunning:
                print("Skipping restart task, another running already")
                return
            self._isRunning = True

2010
        if Dice.throw(self.CHANCE_TO_RESTART_SERVICE) == 0: # 1 in N chance
2011 2012 2013
            dbc = wt.getDbConn()
            dbc.execute("show databases") # simple delay, align timing with other workers
            gSvcMgr.restart()
2014

2015
        self._isRunning = False
S
Shuduo Sang 已提交
2016

2017
class TaskAddData(StateTransitionTask):
S
Shuduo Sang 已提交
2018 2019
    # Track which table is being actively worked on
    activeTable: Set[int] = set()
2020

S
Shuduo Sang 已提交
2021 2022
    # We use these two files to record operations to DB, useful for power-off
    # tests
2023 2024 2025 2026 2027
    fAddLogReady = None
    fAddLogDone = None

    @classmethod
    def prepToRecordOps(cls):
S
Shuduo Sang 已提交
2028 2029 2030 2031
        if gConfig.record_ops:
            if (cls.fAddLogReady is None):
                logger.info(
                    "Recording in a file operations to be performed...")
2032
                cls.fAddLogReady = open("add_log_ready.txt", "w")
S
Shuduo Sang 已提交
2033
            if (cls.fAddLogDone is None):
2034 2035
                logger.info("Recording in a file operations completed...")
                cls.fAddLogDone = open("add_log_done.txt", "w")
2036

2037
    @classmethod
2038 2039
    def getEndState(cls):
        return StateHasData()
2040 2041 2042 2043

    @classmethod
    def canBeginFrom(cls, state: AnyState):
        return state.canAddData()
S
Shuduo Sang 已提交
2044

2045
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
2046
        ds = self._dbManager # Quite DANGEROUS here, may result in multi-thread client access
2047
        tblSeq = list(range(
S
Shuduo Sang 已提交
2048 2049 2050 2051
                self.LARGE_NUMBER_OF_TABLES if gConfig.larger_data else self.SMALL_NUMBER_OF_TABLES))
        random.shuffle(tblSeq)
        for i in tblSeq:
            if (i in self.activeTable):  # wow already active
2052
                print("x", end="", flush=True) # concurrent insertion
2053
            else:
S
Shuduo Sang 已提交
2054
                self.activeTable.add(i)  # marking it active
2055 2056 2057
            
            sTable = ds.getFixedSuperTable()
            regTableName = self.getRegTableName(i)  # "db.reg_table_{}".format(i)
2058
            sTable.ensureTable(wt.getDbConn(), regTableName)  # Ensure the table exists           
2059 2060
           
            for j in range(self.LARGE_NUMBER_OF_RECORDS if gConfig.larger_data else self.SMALL_NUMBER_OF_RECORDS):  # number of records per table
S
Shuduo Sang 已提交
2061
                nextInt = ds.getNextInt()
2062 2063
                if gConfig.record_ops:
                    self.prepToRecordOps()
2064
                    self.fAddLogReady.write("Ready to write {} to {}\n".format(nextInt, regTableName))
2065 2066
                    self.fAddLogReady.flush()
                    os.fsync(self.fAddLogReady)
2067
                sql = "insert into {} values ('{}', {});".format( # removed: tags ('{}', {})
S
Shuduo Sang 已提交
2068
                    regTableName,
2069 2070
                    # ds.getFixedSuperTableName(),
                    # ds.getNextBinary(), ds.getNextFloat(),
2071
                    ds.getNextTick(), nextInt)
S
Shuduo Sang 已提交
2072 2073 2074
                self.execWtSql(wt, sql)
                # Successfully wrote the data into the DB, let's record it
                # somehow
2075
                te.recordDataMark(nextInt)
2076
                if gConfig.record_ops:
S
Shuduo Sang 已提交
2077 2078 2079
                    self.fAddLogDone.write(
                        "Wrote {} to {}\n".format(
                            nextInt, regTableName))
2080 2081
                    self.fAddLogDone.flush()
                    os.fsync(self.fAddLogDone)
S
Shuduo Sang 已提交
2082
            self.activeTable.discard(i)  # not raising an error, unlike remove
2083 2084


S
Steven Li 已提交
2085 2086
# Deterministic random number generator
class Dice():
S
Shuduo Sang 已提交
2087
    seeded = False  # static, uninitialized
S
Steven Li 已提交
2088 2089

    @classmethod
S
Shuduo Sang 已提交
2090
    def seed(cls, s):  # static
S
Steven Li 已提交
2091
        if (cls.seeded):
S
Shuduo Sang 已提交
2092 2093
            raise RuntimeError(
                "Cannot seed the random generator more than once")
S
Steven Li 已提交
2094 2095 2096 2097 2098
        cls.verifyRNG()
        random.seed(s)
        cls.seeded = True  # TODO: protect against multi-threading

    @classmethod
S
Shuduo Sang 已提交
2099
    def verifyRNG(cls):  # Verify that the RNG is determinstic
S
Steven Li 已提交
2100 2101 2102 2103
        random.seed(0)
        x1 = random.randrange(0, 1000)
        x2 = random.randrange(0, 1000)
        x3 = random.randrange(0, 1000)
S
Shuduo Sang 已提交
2104
        if (x1 != 864 or x2 != 394 or x3 != 776):
S
Steven Li 已提交
2105 2106 2107
            raise RuntimeError("System RNG is not deterministic")

    @classmethod
S
Shuduo Sang 已提交
2108
    def throw(cls, stop):  # get 0 to stop-1
2109
        return cls.throwRange(0, stop)
S
Steven Li 已提交
2110 2111

    @classmethod
S
Shuduo Sang 已提交
2112 2113
    def throwRange(cls, start, stop):  # up to stop-1
        if (not cls.seeded):
S
Steven Li 已提交
2114
            raise RuntimeError("Cannot throw dice before seeding it")
2115
        return random.randrange(start, stop)
S
Steven Li 已提交
2116

2117 2118 2119 2120
    @classmethod
    def choice(cls, cList):
        return random.choice(cList)

S
Steven Li 已提交
2121

S
Steven Li 已提交
2122 2123
class LoggingFilter(logging.Filter):
    def filter(self, record: logging.LogRecord):
S
Shuduo Sang 已提交
2124 2125
        if (record.levelno >= logging.INFO):
            return True  # info or above always log
S
Steven Li 已提交
2126

S
Steven Li 已提交
2127 2128 2129 2130
        # Commenting out below to adjust...

        # if msg.startswith("[TRD]"):
        #     return False
S
Steven Li 已提交
2131 2132
        return True

S
Shuduo Sang 已提交
2133 2134

class MyLoggingAdapter(logging.LoggerAdapter):
2135 2136 2137 2138
    def process(self, msg, kwargs):
        return "[{}]{}".format(threading.get_ident() % 10000, msg), kwargs
        # return '[%s] %s' % (self.extra['connid'], msg), kwargs

S
Shuduo Sang 已提交
2139 2140

class SvcManager:
2141
    def __init__(self):
2142
        print("Starting TDengine Service Manager")
2143 2144 2145
        # signal.signal(signal.SIGTERM, self.sigIntHandler) # Moved to MainExec
        # signal.signal(signal.SIGINT, self.sigIntHandler)
        # signal.signal(signal.SIGUSR1, self.sigUsrHandler)  # different handler!
2146

2147
        self.inSigHandler = False
2148 2149
        # self._status = MainExec.STATUS_RUNNING # set inside
        # _startTaosService()
2150
        self.svcMgrThread = None
2151 2152
        self._lock = threading.Lock()
        self._isRestarting = False
2153

2154 2155 2156 2157 2158 2159 2160 2161
    def _doMenu(self):
        choice = ""
        while True:
            print("\nInterrupting Service Program, Choose an Action: ")
            print("1: Resume")
            print("2: Terminate")
            print("3: Restart")
            # Remember to update the if range below
S
Shuduo Sang 已提交
2162
            # print("Enter Choice: ", end="", flush=True)
2163 2164 2165
            while choice == "":
                choice = input("Enter Choice: ")
                if choice != "":
S
Shuduo Sang 已提交
2166 2167 2168
                    break  # done with reading repeated input
            if choice in ["1", "2", "3"]:
                break  # we are done with whole method
2169
            print("Invalid choice, please try again.")
S
Shuduo Sang 已提交
2170
            choice = ""  # reset
2171 2172
        return choice

S
Shuduo Sang 已提交
2173
    def sigUsrHandler(self, signalNumber, frame):
2174
        print("Interrupting main thread execution upon SIGUSR1")
2175
        if self.inSigHandler:  # already
2176
            print("Ignoring repeated SIG...")
S
Shuduo Sang 已提交
2177
            return  # do nothing if it's already not running
2178
        self.inSigHandler = True
2179 2180

        choice = self._doMenu()
S
Shuduo Sang 已提交
2181 2182 2183 2184 2185
        if choice == "1":
            # TODO: can the sub-process be blocked due to us not reading from
            # queue?
            self.sigHandlerResume()
        elif choice == "2":
2186
            self.stopTaosService()
2187 2188
        elif choice == "3": # Restart
            self.restart()
2189 2190
        else:
            raise RuntimeError("Invalid menu choice: {}".format(choice))
2191

2192 2193
        self.inSigHandler = False

2194
    def sigIntHandler(self, signalNumber, frame):
2195
        print("SvcManager: INT Signal Handler starting...")
2196
        if self.inSigHandler:
2197 2198
            print("Ignoring repeated SIG_INT...")
            return
2199
        self.inSigHandler = True
2200

S
Shuduo Sang 已提交
2201
        self.stopTaosService()
2202
        print("SvcManager: INT Signal Handler returning...")
2203
        self.inSigHandler = False
2204

S
Shuduo Sang 已提交
2205
    def sigHandlerResume(self):
2206
        print("Resuming TDengine service manager thread (main thread)...\n\n")
2207

2208
    def _checkServiceManagerThread(self):
2209 2210 2211 2212
        if self.svcMgrThread:  # valid svc mgr thread
            if self.svcMgrThread.isStopped():  # done?
                self.svcMgrThread.procIpcBatch()  # one last time. TODO: appropriate?
                self.svcMgrThread = None  # no more
2213 2214

    def _procIpcAll(self):
2215 2216 2217 2218 2219 2220
        while self.isRunning() or self.isRestarting() :  # for as long as the svc mgr thread is still here
            if self.isRunning():
                self.svcMgrThread.procIpcBatch()  # regular processing,
                self._checkServiceManagerThread()
            elif self.isRetarting():
                print("Service restarting...")
2221 2222 2223 2224 2225
            time.sleep(0.5)  # pause, before next round
        print(
            "Service Manager Thread (with subprocess) has ended, main thread now exiting...")

    def startTaosService(self):
2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238
        with self._lock:
            if self.svcMgrThread:
                raise RuntimeError("Cannot start TAOS service when one may already be running")

            # Find if there's already a taosd service, and then kill it
            for proc in psutil.process_iter():
                if proc.name() == 'taosd':
                    print("Killing an existing TAOSD process in 2 seconds... press CTRL-C to interrupe")
                    time.sleep(2.0)
                    proc.kill()
                # print("Process: {}".format(proc.name()))
            
            self.svcMgrThread = ServiceManagerThread()  # create the object
S
Steven Li 已提交
2239
            print("Attempting to start TAOS service started, printing out output...")
2240
            self.svcMgrThread.start()            
2241 2242 2243 2244
            self.svcMgrThread.procIpcBatch(
                trimToTarget=10,
                forceOutput=True)  # for printing 10 lines
            print("TAOS service started")
2245 2246

    def stopTaosService(self, outputLines=20):
2247 2248 2249 2250
        with self._lock:
            if not self.isRunning():
                logger.warning("Cannot stop TAOS service, not running")
                return
2251

2252 2253 2254 2255 2256
            print("Terminating Service Manager Thread (SMT) execution...")
            self.svcMgrThread.stop()
            if self.svcMgrThread.isStopped():
                self.svcMgrThread.procIpcBatch(outputLines)  # one last time
                self.svcMgrThread = None
2257 2258
                print("End of TDengine Service Output")
                print("----- TDengine Service (managed by SMT) is now terminated -----\n")
2259 2260
            else:
                print("WARNING: SMT did not terminate as expected")
2261 2262 2263

    def run(self):
        self.startTaosService()
2264
        self._procIpcAll()  # pump/process all the messages, may encounter SIG + restart
2265
        if self.isRunning():  # if sig handler hasn't destroyed it by now
2266 2267
            self.stopTaosService()  # should have started already

2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281
    def restart(self):
        if self._isRestarting:
            logger.warning("Cannot restart service when it's already restarting")
            return

        self._isRestarting = True
        if self.isRunning():
            self.stopTaosService()
        else:
            logger.warning("Service not running when restart requested")

        self.startTaosService()
        self._isRestarting = False

2282 2283
    def isRunning(self):
        return self.svcMgrThread != None
2284

2285 2286 2287
    def isRestarting(self):
        return self._isRestarting

2288 2289 2290 2291 2292
class ServiceManagerThread:
    MAX_QUEUE_SIZE = 10000

    def __init__(self):
        self._tdeSubProcess = None
2293
        self._thread = None
2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310
        self._status = None

    def getStatus(self):
        return self._status

    def isRunning(self):
        # return self._thread and self._thread.is_alive()
        return self._status == MainExec.STATUS_RUNNING

    def isStopping(self):
        return self._status == MainExec.STATUS_STOPPING

    def isStopped(self):
        return self._status == MainExec.STATUS_STOPPED

    # Start the thread (with sub process), and wait for the sub service
    # to become fully operational
2311 2312
    def start(self):
        if self._thread:
2313
            raise RuntimeError("Unexpected _thread")
2314
        if self._tdeSubProcess:
2315 2316 2317 2318
            raise RuntimeError("TDengine sub process already created/running")

        self._status = MainExec.STATUS_STARTING

2319
        self._tdeSubProcess = TdeSubProcess()
2320 2321 2322 2323
        self._tdeSubProcess.start()

        self._ipcQueue = Queue()
        self._thread = threading.Thread(
2324
            target=self.svcOutputReader,
2325
            args=(self._tdeSubProcess.getStdOut(), self._ipcQueue))
2326
        self._thread.daemon = True  # thread dies with the program
2327 2328
        self._thread.start()

2329 2330 2331 2332 2333 2334
        self._thread2 = threading.Thread(
            target=self.svcErrorReader,
            args=(self._tdeSubProcess.getStdErr(), self._ipcQueue))
        self._thread2.daemon = True  # thread dies with the program
        self._thread2.start()

2335
        # wait for service to start
2336
        for i in range(0, 10):
2337 2338 2339
            time.sleep(1.0)
            # self.procIpcBatch() # don't pump message during start up
            print("_zz_", end="", flush=True)
2340
            if self._status == MainExec.STATUS_RUNNING:
2341
                logger.info("[] TDengine service READY to process requests")
2342 2343
                return  # now we've started
        # TODO: handle this better?
2344
        self.procIpcBatch(20, True) # display output before cronking out, trim to last 20 msgs, force output
2345
        raise RuntimeError("TDengine service did not start successfully")
2346 2347 2348 2349 2350 2351

    def stop(self):
        # can be called from both main thread or signal handler
        print("Terminating TDengine service running as the sub process...")
        if self.isStopped():
            print("Service already stopped")
2352
            return
2353 2354 2355
        if self.isStopping():
            print("Service is already being stopped")
            return
2356 2357 2358 2359
        # Linux will send Control-C generated SIGINT to the TDengine process
        # already, ref:
        # https://unix.stackexchange.com/questions/176235/fork-and-how-signals-are-delivered-to-processes
        if not self._tdeSubProcess:
2360
            raise RuntimeError("sub process object missing")
2361

2362
        self._status = MainExec.STATUS_STOPPING
2363 2364
        retCode = self._tdeSubProcess.stop()
        print("Attempted to stop sub process, got return code: {}".format(retCode))
2365 2366
        if (retCode==-11): # SGV
            logger.error("[[--ERROR--]]: TDengine service SEGV fault (check core file!)")
2367

2368
        if self._tdeSubProcess.isRunning():  # still running
2369 2370
            print("FAILED to stop sub process, it is still running... pid = {}".format(
                    self._tdeSubProcess.getPid()))
2371
        else:
2372 2373 2374
            self._tdeSubProcess = None  # not running any more
            self.join()  # stop the thread, change the status, etc.

2375 2376 2377
    def join(self):
        # TODO: sanity check
        if not self.isStopping():
2378 2379 2380
            raise RuntimeError(
                "Unexpected status when ending svc mgr thread: {}".format(
                    self._status))
2381

2382
        if self._thread:
2383
            self._thread.join()
2384
            self._thread = None
2385
            self._status = MainExec.STATUS_STOPPED
2386 2387 2388
            # STD ERR thread
            self._thread2.join()
            self._thread2 = None
S
Shuduo Sang 已提交
2389
        else:
2390
            print("Joining empty thread, doing nothing")
2391 2392 2393

    def _trimQueue(self, targetSize):
        if targetSize <= 0:
2394
            return  # do nothing
2395
        q = self._ipcQueue
2396
        if (q.qsize() <= targetSize):  # no need to trim
2397 2398 2399 2400
            return

        logger.debug("Triming IPC queue to target size: {}".format(targetSize))
        itemsToTrim = q.qsize() - targetSize
2401
        for i in range(0, itemsToTrim):
2402 2403 2404
            try:
                q.get_nowait()
            except Empty:
2405 2406
                break  # break out of for loop, no more trimming

2407
    TD_READY_MSG = "TDengine is initialized successfully"
S
Shuduo Sang 已提交
2408

2409 2410
    def procIpcBatch(self, trimToTarget=0, forceOutput=False):
        self._trimQueue(trimToTarget)  # trim if necessary
S
Shuduo Sang 已提交
2411 2412
        # Process all the output generated by the underlying sub process,
        # managed by IO thread
2413
        print("<", end="", flush=True)
S
Shuduo Sang 已提交
2414 2415
        while True:
            try:
2416
                line = self._ipcQueue.get_nowait()  # getting output at fast speed
2417
                self._printProgress("_o")
2418 2419 2420
            except Empty:
                # time.sleep(2.3) # wait only if there's no output
                # no more output
2421
                print(".>", end="", flush=True)
S
Shuduo Sang 已提交
2422
                return  # we are done with THIS BATCH
2423
            else:  # got line, printing out
2424 2425 2426 2427 2428 2429 2430
                if forceOutput:
                    logger.info(line)
                else:
                    logger.debug(line)
        print(">", end="", flush=True)

    _ProgressBars = ["--", "//", "||", "\\\\"]
2431

2432 2433
    def _printProgress(self, msg):  # TODO: assuming 2 chars
        print(msg, end="", flush=True)
2434 2435 2436
        pBar = self._ProgressBars[Dice.throw(4)]
        print(pBar, end="", flush=True)
        print('\b\b\b\b', end="", flush=True)
2437

2438 2439 2440
    def svcOutputReader(self, out: IO, queue):
        # Important Reference: https://stackoverflow.com/questions/375427/non-blocking-read-on-a-subprocess-pipe-in-python
        # print("This is the svcOutput Reader...")
2441
        # for line in out :
2442 2443 2444
        for line in iter(out.readline, b''):
            # print("Finished reading a line: {}".format(line))
            # print("Adding item to queue...")
2445 2446 2447 2448 2449
            try:
                line = line.decode("utf-8").rstrip()
            except UnicodeError:
                print("\nNon-UTF8 server output: {}\n".format(line))

2450 2451
            # This might block, and then causing "out" buffer to block
            queue.put(line)
2452 2453
            self._printProgress("_i")

2454 2455
            if self._status == MainExec.STATUS_STARTING:  # we are starting, let's see if we have started
                if line.find(self.TD_READY_MSG) != -1:  # found
S
Steven Li 已提交
2456
                    logger.info("Waiting for the service to become FULLY READY")
2457
                    time.sleep(5.0) # wait for the server to truly start. TODO: remove this
S
Steven Li 已提交
2458 2459
                    logger.info("Service is now FULLY READY")   
                    self._status = MainExec.STATUS_RUNNING                 
2460 2461

            # Trim the queue if necessary: TODO: try this 1 out of 10 times
2462
            self._trimQueue(self.MAX_QUEUE_SIZE * 9 // 10)  # trim to 90% size
2463

2464 2465 2466
            if self.isStopping():  # TODO: use thread status instead
                # WAITING for stopping sub process to finish its outptu
                print("_w", end="", flush=True)
2467 2468

            # queue.put(line)
2469 2470
        # meaning sub process must have died
        print("\nNo more output from IO thread managing TDengine service")
2471 2472
        out.close()

2473 2474 2475 2476
    def svcErrorReader(self, err: IO, queue):
        for line in iter(err.readline, b''):
            print("\nTD Svc STDERR: {}".format(line))

2477 2478

class TdeSubProcess:
2479 2480 2481 2482 2483
    def __init__(self):
        self.subProcess = None

    def getStdOut(self):
        return self.subProcess.stdout
2484

2485 2486 2487
    def getStdErr(self):
        return self.subProcess.stderr

2488
    def isRunning(self):
2489
        return self.subProcess is not None
2490

2491 2492 2493
    def getPid(self):
        return self.subProcess.pid

S
Shuduo Sang 已提交
2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507
    def getBuildPath(self):
        selfPath = os.path.dirname(os.path.realpath(__file__))
        if ("community" in selfPath):
            projPath = selfPath[:selfPath.find("communit")]
        else:
            projPath = selfPath[:selfPath.find("tests")]

        for root, dirs, files in os.walk(projPath):
            if ("taosd" in files):
                rootRealPath = os.path.dirname(os.path.realpath(root))
                if ("packaging" not in rootRealPath):
                    buildPath = root[:len(root) - len("/build/bin")]
                    break
        return buildPath
2508

2509
    def start(self):
2510
        ON_POSIX = 'posix' in sys.builtin_module_names
S
Shuduo Sang 已提交
2511

2512 2513 2514
        taosdPath = self.getBuildPath() + "/build/bin/taosd"
        cfgPath = self.getBuildPath() + "/test/cfg"

2515 2516 2517
        # Delete the log files
        logPath = self.getBuildPath() + "/test/log"
        # ref: https://stackoverflow.com/questions/1995373/deleting-all-files-in-a-directory-with-python/1995397
2518 2519 2520 2521
        # filelist = [ f for f in os.listdir(logPath) ] # if f.endswith(".bak") ]
        # for f in filelist:
        #     filePath = os.path.join(logPath, f)
        #     print("Removing log file: {}".format(filePath))
2522 2523 2524 2525 2526 2527
        #     os.remove(filePath)        
        if os.path.exists(logPath):
            logPathSaved = logPath + "_" + time.strftime('%Y-%m-%d-%H-%M-%S')
            logger.info("Saving old log files to: {}".format(logPathSaved))
            os.rename(logPath, logPathSaved)
        # os.mkdir(logPath) # recreate, no need actually, TDengine will auto-create with proper perms
2528
            
S
Shuduo Sang 已提交
2529
        svcCmd = [taosdPath, '-c', cfgPath]
2530
        # svcCmdSingle = "{} -c {}".format(taosdPath, cfgPath)
2531
        # svcCmd = ['vmstat', '1']
S
Shuduo Sang 已提交
2532
        if self.subProcess:  # already there
2533 2534
            raise RuntimeError("Corrupt process state")

S
Steven Li 已提交
2535
        # print("Starting service: {}".format(svcCmd))
2536
        self.subProcess = subprocess.Popen(
2537 2538
            svcCmd, shell=False,
            # svcCmdSingle, shell=True, # capture core dump?
S
Shuduo Sang 已提交
2539
            stdout=subprocess.PIPE,
2540
            stderr=subprocess.PIPE,
2541
            # bufsize=1, # not supported in binary mode
S
Steven Li 已提交
2542 2543
            close_fds=ON_POSIX
            )  # had text=True, which interferred with reading EOF
2544

2545 2546 2547
    def stop(self):
        if not self.subProcess:
            print("Sub process already stopped")
2548
            return -1
2549

2550
        retCode = self.subProcess.poll() # contains real sub process return code
S
Shuduo Sang 已提交
2551
        if retCode:  # valid return code, process ended
2552
            self.subProcess = None
S
Shuduo Sang 已提交
2553 2554
        else:  # process still alive, let's interrupt it
            print(
2555
                "Sub process is running, sending SIG_INT and waiting for it to terminate...")
S
Shuduo Sang 已提交
2556 2557 2558 2559
            # sub process should end, then IPC queue should end, causing IO
            # thread to end
            self.subProcess.send_signal(signal.SIGINT)
            try:
2560
                self.subProcess.wait(10)
2561
                retCode = self.subProcess.returncode
2562 2563
            except subprocess.TimeoutExpired as err:
                print("Time out waiting for TDengine service process to exit")
2564
                retCode = -3
2565
            else:
2566
                print("TDengine service process terminated successfully from SIG_INT")
2567
                retCode = -4
2568
                self.subProcess = None
2569
        return retCode
2570

2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598
class ThreadStacks: # stack info for all threads
    def __init__(self):
        self._allStacks = {}
        allFrames = sys._current_frames()
        for th in threading.enumerate():                        
            stack = traceback.extract_stack(allFrames[th.ident])     
            self._allStacks[th.native_id] = stack

    def print(self, filteredEndName = None, filterInternal = False):
        for thNid, stack in self._allStacks.items(): # for each thread            
            lastFrame = stack[-1]
            if filteredEndName: # we need to filter out stacks that match this name                
                if lastFrame.name == filteredEndName : # end did not match
                    continue
            if filterInternal:
                if lastFrame.name in ['wait', 'invoke_excepthook', 
                    '_wait', # The Barrier exception
                    'svcOutputReader', # the svcMgr thread
                    '__init__']: # the thread that extracted the stack
                    continue # ignore
            # Now print
            print("\n<----- Thread Info for ID: {}".format(thNid))
            for frame in stack:
                # print(frame)
                print("File {filename}, line {lineno}, in {name}".format(
                    filename=frame.filename, lineno=frame.lineno, name=frame.name))
                print("    {}".format(frame.line))
            print("-----> End of Thread Info\n")
S
Shuduo Sang 已提交
2599

2600 2601 2602
class ClientManager:
    def __init__(self):
        print("Starting service manager")
2603 2604
        # signal.signal(signal.SIGTERM, self.sigIntHandler)
        # signal.signal(signal.SIGINT, self.sigIntHandler)
2605

2606
        self._status = MainExec.STATUS_RUNNING
2607 2608
        self.tc = None

2609 2610
        self.inSigHandler = False

2611
    def sigIntHandler(self, signalNumber, frame):
2612
        if self._status != MainExec.STATUS_RUNNING:
2613 2614 2615
            print("Repeated SIGINT received, forced exit...")
            # return  # do nothing if it's already not running
            sys.exit(-1)
2616
        self._status = MainExec.STATUS_STOPPING  # immediately set our status
2617

2618
        print("ClientManager: Terminating program...")
2619 2620
        self.tc.requestToStop()

2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661
    def _doMenu(self):
        choice = ""
        while True:
            print("\nInterrupting Client Program, Choose an Action: ")
            print("1: Resume")
            print("2: Terminate")
            print("3: Show Threads")
            # Remember to update the if range below
            # print("Enter Choice: ", end="", flush=True)
            while choice == "":
                choice = input("Enter Choice: ")
                if choice != "":
                    break  # done with reading repeated input
            if choice in ["1", "2", "3"]:
                break  # we are done with whole method
            print("Invalid choice, please try again.")
            choice = ""  # reset
        return choice

    def sigUsrHandler(self, signalNumber, frame):
        print("Interrupting main thread execution upon SIGUSR1")
        if self.inSigHandler:  # already
            print("Ignoring repeated SIG_USR1...")
            return  # do nothing if it's already not running
        self.inSigHandler = True

        choice = self._doMenu()
        if choice == "1":
            print("Resuming execution...")
            time.sleep(1.0)
        elif choice == "2":
            print("Not implemented yet")
            time.sleep(1.0)
        elif choice == "3":
            ts = ThreadStacks()
            ts.print()
        else:
            raise RuntimeError("Invalid menu choice: {}".format(choice))

        self.inSigHandler = False

S
Shuduo Sang 已提交
2662
    def _printLastNumbers(self):  # to verify data durability
2663 2664
        dbManager = DbManager(resetDb=False)
        dbc = dbManager.getDbConn()
2665
        if dbc.query("show databases") <= 1:  # no database (we have a default called "log")
2666
            return
2667
        dbc.execute("use db")
S
Shuduo Sang 已提交
2668
        if dbc.query("show tables") == 0:  # no tables
S
Steven Li 已提交
2669
            return
2670

S
Shuduo Sang 已提交
2671
        sTbName = dbManager.getFixedSuperTableName()
2672 2673

        # get all regular tables
S
Shuduo Sang 已提交
2674 2675
        # TODO: analyze result set later
        dbc.query("select TBNAME from db.{}".format(sTbName))
2676 2677 2678
        rTables = dbc.getQueryResult()

        bList = TaskExecutor.BoundedList()
S
Shuduo Sang 已提交
2679
        for rTbName in rTables:  # regular tables
2680 2681
            dbc.query("select speed from db.{}".format(rTbName[0]))
            numbers = dbc.getQueryResult()
S
Shuduo Sang 已提交
2682
            for row in numbers:
2683 2684 2685 2686 2687 2688
                # print("<{}>".format(n), end="", flush=True)
                bList.add(row[0])

        print("Top numbers in DB right now: {}".format(bList))
        print("TDengine client execution is about to start in 2 seconds...")
        time.sleep(2.0)
S
Shuduo Sang 已提交
2689
        dbManager = None  # release?
2690 2691 2692 2693

    def prepare(self):
        self._printLastNumbers()

2694
    def run(self, svcMgr):    
2695 2696
        self._printLastNumbers()

S
Shuduo Sang 已提交
2697
        dbManager = DbManager()  # Regular function
2698
        thPool = ThreadPool(gConfig.num_threads, gConfig.max_steps)
2699
        self.tc = ThreadCoordinator(thPool, dbManager)
2700
        
2701
        self.tc.run()
S
Steven Li 已提交
2702 2703
        # print("exec stats: {}".format(self.tc.getExecStats()))
        # print("TC failed = {}".format(self.tc.isFailed()))
2704
        if svcMgr: # gConfig.auto_start_service:
2705
            svcMgr.stopTaosService()
2706 2707
        # Print exec status, etc., AFTER showing messages from the server
        self.conclude()
S
Steven Li 已提交
2708
        # print("TC failed (2) = {}".format(self.tc.isFailed()))
S
Shuduo Sang 已提交
2709 2710
        # Linux return code: ref https://shapeshed.com/unix-exit-codes/
        return 1 if self.tc.isFailed() else 0
2711 2712 2713

    def conclude(self):
        self.tc.printStats()
S
Shuduo Sang 已提交
2714
        self.tc.getDbManager().cleanUp()
2715 2716

class MainExec:
2717 2718 2719
    STATUS_STARTING = 1
    STATUS_RUNNING = 2
    STATUS_STOPPING = 3
2720
    STATUS_STOPPED = 4
2721

2722 2723 2724
    def __init__(self):        
        self._clientMgr = None
        self._svcMgr = None
2725

2726 2727 2728
        signal.signal(signal.SIGTERM, self.sigIntHandler)
        signal.signal(signal.SIGINT,  self.sigIntHandler)
        signal.signal(signal.SIGUSR1, self.sigUsrHandler)  # different handler!
2729

2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742
    def sigUsrHandler(self, signalNumber, frame):
        if self._clientMgr:
            self._clientMgr.sigUsrHandler(signalNumber, frame)
        elif self._svcMgr: # Only if no client mgr, we are running alone
            self._svcMgr.sigUsrHandler(signalNumber, frame)
        
    def sigIntHandler(self, signalNumber, frame):
        if self._svcMgr:
            self._svcMgr.sigIntHandler(signalNumber, frame)
        if self._clientMgr:
            self._clientMgr.sigIntHandler(signalNumber, frame)

    def runClient(self):
2743
        global gSvcMgr
2744 2745
        if gConfig.auto_start_service:
            self._svcMgr = SvcManager()
2746
            gSvcMgr = self._svcMgr # hack alert
2747 2748 2749
            self._svcMgr.startTaosService() # we start, don't run
        
        self._clientMgr = ClientManager()
2750 2751 2752 2753 2754 2755 2756
        ret = None
        try: 
            ret = self._clientMgr.run(self._svcMgr) # stop TAOS service inside
        except requests.exceptions.ConnectionError as err:
            logger.warning("Failed to open REST connection to DB")
            # don't raise
        return ret
2757 2758

    def runService(self):
2759
        global gSvcMgr
2760
        self._svcMgr = SvcManager()
2761 2762
        gSvcMgr = self._svcMgr # save it in a global variable TODO: hack alert

2763
        self._svcMgr.run() # run to some end state
2764 2765
        self._svcMgr = None 
        gSvcMgr = None        
2766 2767

    def runTemp(self):  # for debugging purposes
2768 2769
        # # Hack to exercise reading from disk, imcreasing coverage. TODO: fix
        # dbc = dbState.getDbConn()
S
Shuduo Sang 已提交
2770
        # sTbName = dbState.getFixedSuperTableName()
2771 2772
        # dbc.execute("create database if not exists db")
        # if not dbState.getState().equals(StateEmpty()):
S
Shuduo Sang 已提交
2773
        #     dbc.execute("use db")
2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784

        # rTables = None
        # try: # the super table may not exist
        #     sql = "select TBNAME from db.{}".format(sTbName)
        #     logger.info("Finding out tables in super table: {}".format(sql))
        #     dbc.query(sql) # TODO: analyze result set later
        #     logger.info("Fetching result")
        #     rTables = dbc.getQueryResult()
        #     logger.info("Result: {}".format(rTables))
        # except taos.error.ProgrammingError as err:
        #     logger.info("Initial Super table OPS error: {}".format(err))
S
Shuduo Sang 已提交
2785

2786 2787 2788 2789 2790 2791 2792 2793
        # # sys.exit()
        # if ( not rTables == None):
        #     # print("rTables[0] = {}, type = {}".format(rTables[0], type(rTables[0])))
        #     try:
        #         for rTbName in rTables : # regular tables
        #             ds = dbState
        #             logger.info("Inserting into table: {}".format(rTbName[0]))
        #             sql = "insert into db.{} values ('{}', {});".format(
S
Shuduo Sang 已提交
2794
        #                 rTbName[0],
2795 2796
        #                 ds.getNextTick(), ds.getNextInt())
        #             dbc.execute(sql)
S
Shuduo Sang 已提交
2797
        #         for rTbName in rTables : # regular tables
2798
        #             dbc.query("select * from db.{}".format(rTbName[0])) # TODO: check success failure
S
Shuduo Sang 已提交
2799
        #         logger.info("Initial READING operation is successful")
2800
        #     except taos.error.ProgrammingError as err:
S
Shuduo Sang 已提交
2801 2802
        #         logger.info("Initial WRITE/READ error: {}".format(err))

2803 2804 2805
        # Sandbox testing code
        # dbc = dbState.getDbConn()
        # while True:
S
Shuduo Sang 已提交
2806
        #     rows = dbc.query("show databases")
2807
        #     print("Rows: {}, time={}".format(rows, time.time()))
S
Shuduo Sang 已提交
2808 2809
        return

S
Steven Li 已提交
2810

2811
def main():
S
Shuduo Sang 已提交
2812 2813
    # Super cool Python argument library:
    # https://docs.python.org/3/library/argparse.html
2814 2815 2816 2817 2818 2819 2820 2821 2822
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description=textwrap.dedent('''\
            TDengine Auto Crash Generator (PLEASE NOTICE the Prerequisites Below)
            ---------------------------------------------------------------------
            1. You build TDengine in the top level ./build directory, as described in offical docs
            2. You run the server there before this script: ./build/bin/taosd -c test/cfg

            '''))
2823

2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844
    # parser.add_argument('-a', '--auto-start-service', action='store_true',                        
    #                     help='Automatically start/stop the TDengine service (default: false)')
    # parser.add_argument('-c', '--connector-type', action='store', default='native', type=str,
    #                     help='Connector type to use: native, rest, or mixed (default: 10)')
    # parser.add_argument('-d', '--debug', action='store_true',                        
    #                     help='Turn on DEBUG mode for more logging (default: false)')
    # parser.add_argument('-e', '--run-tdengine', action='store_true',                        
    #                     help='Run TDengine service in foreground (default: false)')
    # parser.add_argument('-l', '--larger-data', action='store_true',                        
    #                     help='Write larger amount of data during write operations (default: false)')
    # parser.add_argument('-p', '--per-thread-db-connection', action='store_true',                        
    #                     help='Use a single shared db connection (default: false)')
    # parser.add_argument('-r', '--record-ops', action='store_true',                        
    #                     help='Use a pair of always-fsynced fils to record operations performing + performed, for power-off tests (default: false)')                    
    # parser.add_argument('-s', '--max-steps', action='store', default=1000, type=int,
    #                     help='Maximum number of steps to run (default: 100)')
    # parser.add_argument('-t', '--num-threads', action='store', default=5, type=int,
    #                     help='Number of threads to run (default: 10)')
    # parser.add_argument('-x', '--continue-on-exception', action='store_true',                        
    #                     help='Continue execution after encountering unexpected/disallowed errors/exceptions (default: false)')                        

S
Shuduo Sang 已提交
2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895
    parser.add_argument(
        '-a',
        '--auto-start-service',
        action='store_true',
        help='Automatically start/stop the TDengine service (default: false)')
    parser.add_argument(
        '-c',
        '--connector-type',
        action='store',
        default='native',
        type=str,
        help='Connector type to use: native, rest, or mixed (default: 10)')
    parser.add_argument(
        '-d',
        '--debug',
        action='store_true',
        help='Turn on DEBUG mode for more logging (default: false)')
    parser.add_argument(
        '-e',
        '--run-tdengine',
        action='store_true',
        help='Run TDengine service in foreground (default: false)')
    parser.add_argument(
        '-l',
        '--larger-data',
        action='store_true',
        help='Write larger amount of data during write operations (default: false)')
    parser.add_argument(
        '-p',
        '--per-thread-db-connection',
        action='store_true',
        help='Use a single shared db connection (default: false)')
    parser.add_argument(
        '-r',
        '--record-ops',
        action='store_true',
        help='Use a pair of always-fsynced fils to record operations performing + performed, for power-off tests (default: false)')
    parser.add_argument(
        '-s',
        '--max-steps',
        action='store',
        default=1000,
        type=int,
        help='Maximum number of steps to run (default: 100)')
    parser.add_argument(
        '-t',
        '--num-threads',
        action='store',
        default=5,
        type=int,
        help='Number of threads to run (default: 10)')
2896 2897 2898 2899 2900
    parser.add_argument(
        '-x',
        '--continue-on-exception',
        action='store_true',
        help='Continue execution after encountering unexpected/disallowed errors/exceptions (default: false)')
2901

2902
    global gConfig
2903
    gConfig = parser.parse_args()
2904

2905
    # Logging Stuff
2906
    global logger
S
Shuduo Sang 已提交
2907 2908
    _logger = logging.getLogger('CrashGen')  # real logger
    _logger.addFilter(LoggingFilter())
2909 2910 2911
    ch = logging.StreamHandler()
    _logger.addHandler(ch)

S
Shuduo Sang 已提交
2912 2913
    # Logging adapter, to be used as a logger
    logger = MyLoggingAdapter(_logger, [])
2914

S
Shuduo Sang 已提交
2915 2916
    if (gConfig.debug):
        logger.setLevel(logging.DEBUG)  # default seems to be INFO
S
Steven Li 已提交
2917 2918
    else:
        logger.setLevel(logging.INFO)
S
Shuduo Sang 已提交
2919

2920 2921
    Dice.seed(0)  # initial seeding of dice

2922
    # Run server or client
2923
    mExec = MainExec()
S
Shuduo Sang 已提交
2924
    if gConfig.run_tdengine:  # run server
2925
        mExec.runService()
S
Shuduo Sang 已提交
2926
    else:
2927
        return mExec.runClient()
2928

S
Shuduo Sang 已提交
2929

2930
if __name__ == "__main__":
S
Steven Li 已提交
2931 2932 2933
    exitCode = main()
    # print("Exiting with code: {}".format(exitCode))
    sys.exit(exitCode)