crash_gen.py 107.5 KB
Newer Older
S
Shuduo Sang 已提交
1
# -----!/usr/bin/python3.7
S
Steven Li 已提交
2 3 4 5 6 7 8 9 10 11 12 13
###################################################################
#           Copyright (c) 2016 by TAOS Technologies, Inc.
#                     All rights reserved.
#
#  This file is proprietary and confidential to TAOS Technologies.
#  No part of this file may be reproduced, stored, transmitted,
#  disclosed or used in any form or by any means other than as
#  expressly provided by the written permission from Jianhui Tao
#
###################################################################

# -*- coding: utf-8 -*-
S
Shuduo Sang 已提交
14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38
# For type hinting before definition, ref:
# https://stackoverflow.com/questions/33533148/how-do-i-specify-that-the-return-type-of-a-method-is-the-same-as-the-class-itsel
from __future__ import annotations
import taos
import crash_gen
from util.sql import *
from util.cases import *
from util.dnodes import *
from util.log import *
from queue import Queue, Empty
from typing import IO
from typing import Set
from typing import Dict
from typing import List
from requests.auth import HTTPBasicAuth
import textwrap
import datetime
import logging
import time
import random
import threading
import requests
import copy
import argparse
import getopt
39

S
Steven Li 已提交
40
import sys
41
import os
42 43
import io
import signal
44
import traceback
45 46 47 48 49 50 51

try:
    import psutil
except:
    print("Psutil module needed, please install: sudo pip3 install psutil")
    sys.exit(-1)

52 53 54 55
# Require Python 3
if sys.version_info[0] < 3:
    raise Exception("Must be using Python 3")

S
Steven Li 已提交
56

S
Shuduo Sang 已提交
57
# Global variables, tried to keep a small number.
58 59 60

# Command-line/Environment Configurations, will set a bit later
# ConfigNameSpace = argparse.Namespace
S
Shuduo Sang 已提交
61
gConfig = argparse.Namespace()  # Dummy value, will be replaced later
62
gSvcMgr = None # TODO: refactor this hack, use dep injection
63
logger = None
S
Steven Li 已提交
64

S
Shuduo Sang 已提交
65
def runThread(wt: WorkerThread):
66
    wt.run()
67

68 69
class CrashGenError(Exception):
    def __init__(self, msg=None, errno=None):
S
Shuduo Sang 已提交
70
        self.msg = msg
71
        self.errno = errno
S
Shuduo Sang 已提交
72

73 74 75
    def __str__(self):
        return self.msg

S
Shuduo Sang 已提交
76

S
Steven Li 已提交
77
class WorkerThread:
78
    def __init__(self, pool: ThreadPool, tid, tc: ThreadCoordinator,
S
Shuduo Sang 已提交
79 80 81
                 # te: TaskExecutor,
                 ):  # note: main thread context!
        # self._curStep = -1
82
        self._pool = pool
S
Shuduo Sang 已提交
83 84
        self._tid = tid
        self._tc = tc  # type: ThreadCoordinator
S
Steven Li 已提交
85
        # self.threadIdent = threading.get_ident()
86 87
        self._thread = threading.Thread(target=runThread, args=(self,))
        self._stepGate = threading.Event()
S
Steven Li 已提交
88

89
        # Let us have a DB connection of our own
S
Shuduo Sang 已提交
90
        if (gConfig.per_thread_db_connection):  # type: ignore
91
            # print("connector_type = {}".format(gConfig.connector_type))
92 93 94 95 96 97 98 99 100 101 102
            if gConfig.connector_type == 'native':
                self._dbConn = DbConn.createNative() 
            elif gConfig.connector_type == 'rest':
                self._dbConn = DbConn.createRest() 
            elif gConfig.connector_type == 'mixed':
                if Dice.throw(2) == 0: # 1/2 chance
                    self._dbConn = DbConn.createNative() 
                else:
                    self._dbConn = DbConn.createRest() 
            else:
                raise RuntimeError("Unexpected connector type: {}".format(gConfig.connector_type))
103

S
Shuduo Sang 已提交
104
        self._dbInUse = False  # if "use db" was executed already
105

106
    def logDebug(self, msg):
S
Steven Li 已提交
107
        logger.debug("    TRD[{}] {}".format(self._tid, msg))
108 109

    def logInfo(self, msg):
S
Steven Li 已提交
110
        logger.info("    TRD[{}] {}".format(self._tid, msg))
111

112 113 114 115
    def dbInUse(self):
        return self._dbInUse

    def useDb(self):
S
Shuduo Sang 已提交
116
        if (not self._dbInUse):
117 118 119
            self.execSql("use db")
        self._dbInUse = True

120
    def getTaskExecutor(self):
S
Shuduo Sang 已提交
121
        return self._tc.getTaskExecutor()
122

S
Steven Li 已提交
123
    def start(self):
124
        self._thread.start()  # AFTER the thread is recorded
S
Steven Li 已提交
125

S
Shuduo Sang 已提交
126
    def run(self):
S
Steven Li 已提交
127
        # initialization after thread starts, in the thread context
128
        # self.isSleeping = False
129 130
        logger.info("Starting to run thread: {}".format(self._tid))

S
Shuduo Sang 已提交
131
        if (gConfig.per_thread_db_connection):  # type: ignore
132
            logger.debug("Worker thread openning database connection")
133
            self._dbConn.open()
S
Steven Li 已提交
134

S
Shuduo Sang 已提交
135 136
        self._doTaskLoop()

137
        # clean up
S
Shuduo Sang 已提交
138
        if (gConfig.per_thread_db_connection):  # type: ignore
139 140 141 142
            if self._dbConn.isOpen: #sometimes it is not open
                self._dbConn.close()
            else:
                logger.warning("Cleaning up worker thread, dbConn already closed")
143

S
Shuduo Sang 已提交
144
    def _doTaskLoop(self):
145 146
        # while self._curStep < self._pool.maxSteps:
        # tc = ThreadCoordinator(None)
S
Shuduo Sang 已提交
147 148
        while True:
            tc = self._tc  # Thread Coordinator, the overall master
149 150 151
            try:
                tc.crossStepBarrier()  # shared barrier first, INCLUDING the last one
            except threading.BrokenBarrierError as err: # main thread timed out
152
                print("_bto", end="")
153 154 155
                logger.debug("[TRD] Worker thread exiting due to main thread barrier time-out")
                break

156
            logger.debug("[TRD] Worker thread [{}] exited barrier...".format(self._tid))
157
            self.crossStepGate()   # then per-thread gate, after being tapped
158
            logger.debug("[TRD] Worker thread [{}] exited step gate...".format(self._tid))
159
            if not self._tc.isRunning():
160
                print("_wts", end="")
161
                logger.debug("[TRD] Thread Coordinator not running any more, worker thread now stopping...")
162 163
                break

164
            # Fetch a task from the Thread Coordinator
165
            logger.debug( "[TRD] Worker thread [{}] about to fetch task".format(self._tid))
166
            task = tc.fetchTask()
167 168

            # Execute such a task
S
Shuduo Sang 已提交
169 170 171
            logger.debug(
                "[TRD] Worker thread [{}] about to execute task: {}".format(
                    self._tid, task.__class__.__name__))
172
            task.execute(self)
173
            tc.saveExecutedTask(task)
174
            logger.debug("[TRD] Worker thread [{}] finished executing task".format(self._tid))
S
Shuduo Sang 已提交
175 176

            self._dbInUse = False  # there may be changes between steps
177
        # print("_wtd", end=None) # worker thread died
178

S
Shuduo Sang 已提交
179 180
    def verifyThreadSelf(self):  # ensure we are called by this own thread
        if (threading.get_ident() != self._thread.ident):
S
Steven Li 已提交
181 182
            raise RuntimeError("Unexpectly called from other threads")

S
Shuduo Sang 已提交
183 184
    def verifyThreadMain(self):  # ensure we are called by the main thread
        if (threading.get_ident() != threading.main_thread().ident):
S
Steven Li 已提交
185 186 187
            raise RuntimeError("Unexpectly called from other threads")

    def verifyThreadAlive(self):
S
Shuduo Sang 已提交
188
        if (not self._thread.is_alive()):
S
Steven Li 已提交
189 190
            raise RuntimeError("Unexpected dead thread")

191
    # A gate is different from a barrier in that a thread needs to be "tapped"
S
Steven Li 已提交
192 193
    def crossStepGate(self):
        self.verifyThreadAlive()
S
Shuduo Sang 已提交
194 195
        self.verifyThreadSelf()  # only allowed by ourselves

196
        # Wait again at the "gate", waiting to be "tapped"
S
Shuduo Sang 已提交
197 198 199 200
        logger.debug(
            "[TRD] Worker thread {} about to cross the step gate".format(
                self._tid))
        self._stepGate.wait()
201
        self._stepGate.clear()
S
Shuduo Sang 已提交
202

203
        # self._curStep += 1  # off to a new step...
S
Steven Li 已提交
204

S
Shuduo Sang 已提交
205
    def tapStepGate(self):  # give it a tap, release the thread waiting there
206
        # self.verifyThreadAlive()
S
Shuduo Sang 已提交
207 208
        self.verifyThreadMain()  # only allowed for main thread

209 210 211 212 213 214
        if self._thread.is_alive():
            logger.debug("[TRD] Tapping worker thread {}".format(self._tid))
            self._stepGate.set()  # wake up!
            time.sleep(0)  # let the released thread run a bit
        else:
            print("_tad", end="") # Thread already dead
215

S
Shuduo Sang 已提交
216
    def execSql(self, sql):  # TODO: expose DbConn directly
217
        return self.getDbConn().execute(sql)
218

S
Shuduo Sang 已提交
219
    def querySql(self, sql):  # TODO: expose DbConn directly
220
        return self.getDbConn().query(sql)
221 222

    def getQueryResult(self):
223
        return self.getDbConn().getQueryResult()
224

225
    def getDbConn(self):
S
Shuduo Sang 已提交
226 227
        if (gConfig.per_thread_db_connection):
            return self._dbConn
228
        else:
229
            return self._tc.getDbManager().getDbConn()
230

231 232
    # def querySql(self, sql): # not "execute", since we are out side the DB context
    #     if ( gConfig.per_thread_db_connection ):
S
Shuduo Sang 已提交
233
    #         return self._dbConn.query(sql)
234 235
    #     else:
    #         return self._tc.getDbState().getDbConn().query(sql)
236

237
# The coordinator of all worker threads, mostly running in main thread
S
Shuduo Sang 已提交
238 239


240
class ThreadCoordinator:
241 242
    WORKER_THREAD_TIMEOUT = 30

243
    def __init__(self, pool: ThreadPool, dbManager):
S
Shuduo Sang 已提交
244
        self._curStep = -1  # first step is 0
245
        self._pool = pool
246
        # self._wd = wd
S
Shuduo Sang 已提交
247
        self._te = None  # prepare for every new step
248
        self._dbManager = dbManager
S
Shuduo Sang 已提交
249 250
        self._executedTasks: List[Task] = []  # in a given step
        self._lock = threading.RLock()  # sync access for a few things
S
Steven Li 已提交
251

S
Shuduo Sang 已提交
252 253
        self._stepBarrier = threading.Barrier(
            self._pool.numThreads + 1)  # one barrier for all threads
254
        self._execStats = ExecutionStats()
255
        self._runStatus = MainExec.STATUS_RUNNING
S
Steven Li 已提交
256

257 258 259
    def getTaskExecutor(self):
        return self._te

S
Shuduo Sang 已提交
260
    def getDbManager(self) -> DbManager:
261
        return self._dbManager
262

263 264
    def crossStepBarrier(self, timeout=None):
        self._stepBarrier.wait(timeout) 
265

266 267 268 269
    def requestToStop(self):
        self._runStatus = MainExec.STATUS_STOPPING
        self._execStats.registerFailure("User Interruption")

270
    def _runShouldEnd(self, transitionFailed, hasAbortedTask, workerTimeout):
271 272 273 274 275 276 277 278 279
        maxSteps = gConfig.max_steps  # type: ignore
        if self._curStep >= (maxSteps - 1): # maxStep==10, last curStep should be 9
            return True
        if self._runStatus != MainExec.STATUS_RUNNING:
            return True
        if transitionFailed:
            return True
        if hasAbortedTask:
            return True
280 281
        if workerTimeout:
            return True
282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312
        return False

    def _hasAbortedTask(self): # from execution of previous step
        for task in self._executedTasks:
            if task.isAborted():
                # print("Task aborted: {}".format(task))
                # hasAbortedTask = True
                return True
        return False

    def _releaseAllWorkerThreads(self, transitionFailed):
        self._curStep += 1  # we are about to get into next step. TODO: race condition here!
        # Now not all threads had time to go to sleep
        logger.debug(
            "--\r\n\n--> Step {} starts with main thread waking up".format(self._curStep))

        # A new TE for the new step
        self._te = None # set to empty first, to signal worker thread to stop
        if not transitionFailed:  # only if not failed
            self._te = TaskExecutor(self._curStep)

        logger.debug("[TRD] Main thread waking up at step {}, tapping worker threads".format(
                self._curStep))  # Now not all threads had time to go to sleep
        # Worker threads will wake up at this point, and each execute it's own task
        self.tapAllThreads() # release all worker thread from their "gate"

    def _syncAtBarrier(self):
         # Now main thread (that's us) is ready to enter a step
        # let other threads go past the pool barrier, but wait at the
        # thread gate
        logger.debug("[TRD] Main thread about to cross the barrier")
313
        self.crossStepBarrier(timeout=self.WORKER_THREAD_TIMEOUT)
314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343
        self._stepBarrier.reset()  # Other worker threads should now be at the "gate"
        logger.debug("[TRD] Main thread finished crossing the barrier")

    def _doTransition(self):
        transitionFailed = False
        try:
            sm = self._dbManager.getStateMachine()
            logger.debug("[STT] starting transitions")
            # at end of step, transiton the DB state
            sm.transition(self._executedTasks)
            logger.debug("[STT] transition ended")
            # Due to limitation (or maybe not) of the Python library,
            # we cannot share connections across threads
            if sm.hasDatabase():
                for t in self._pool.threadList:
                    logger.debug("[DB] use db for all worker threads")
                    t.useDb()
                    # t.execSql("use db") # main thread executing "use
                    # db" on behalf of every worker thread
        except taos.error.ProgrammingError as err:
            if (err.msg == 'network unavailable'):  # broken DB connection
                logger.info("DB connection broken, execution failed")
                traceback.print_stack()
                transitionFailed = True
                self._te = None  # Not running any more
                self._execStats.registerFailure("Broken DB Connection")
                # continue # don't do that, need to tap all threads at
                # end, and maybe signal them to stop
            else:
                raise
344
        return transitionFailed
345 346 347 348 349 350

        self.resetExecutedTasks()  # clear the tasks after we are done
        # Get ready for next step
        logger.debug("<-- Step {} finished, trasition failed = {}".format(self._curStep, transitionFailed))
        return transitionFailed

S
Shuduo Sang 已提交
351
    def run(self):
352
        self._pool.createAndStartThreads(self)
S
Steven Li 已提交
353 354

        # Coordinate all threads step by step
S
Shuduo Sang 已提交
355
        self._curStep = -1  # not started yet
356
        
S
Shuduo Sang 已提交
357
        self._execStats.startExec()  # start the stop watch
358 359
        transitionFailed = False
        hasAbortedTask = False
360 361
        workerTimeout = False
        while not self._runShouldEnd(transitionFailed, hasAbortedTask, workerTimeout):
362
            if not gConfig.debug: # print this only if we are not in debug mode                
S
Shuduo Sang 已提交
363
                print(".", end="", flush=True)
364
                        
365 366 367 368 369 370 371 372 373 374
            try:
                self._syncAtBarrier() # For now just cross the barrier
            except threading.BrokenBarrierError as err:
                logger.info("Main loop aborted, caused by worker thread time-out")
                self._execStats.registerFailure("Aborted due to worker thread timeout")
                print("\n\nWorker Thread time-out detected, important thread info:")
                ts = ThreadStacks()
                ts.print(filterInternal=True)
                workerTimeout = True
                break
375 376

            # At this point, all threads should be pass the overall "barrier" and before the per-thread "gate"
S
Shuduo Sang 已提交
377 378
            # We use this period to do house keeping work, when all worker
            # threads are QUIET.
379 380 381
            hasAbortedTask = self._hasAbortedTask() # from previous step
            if hasAbortedTask: 
                logger.info("Aborted task encountered, exiting test program")
382
                self._execStats.registerFailure("Aborted Task Encountered")
383
                break # do transition only if tasks are error free
S
Shuduo Sang 已提交
384

385
            # Ending previous step
386 387 388 389 390 391 392
            try:
                transitionFailed = self._doTransition() # To start, we end step -1 first
            except taos.error.ProgrammingError as err:
                transitionFailed = True
                errno2 = err.errno if (err.errno > 0) else 0x80000000 + err.errno  # correct error scheme
                logger.info("Transition failed: errno=0x{:X}, msg: {}".format(errno2, err))

393 394
            # Then we move on to the next step
            self._releaseAllWorkerThreads(transitionFailed)                    
395

396 397
        if hasAbortedTask or transitionFailed : # abnormal ending, workers waiting at "gate"
            logger.debug("Abnormal ending of main thraed")
398 399
        elif workerTimeout:
            logger.debug("Abnormal ending of main thread, due to worker timeout")
400 401 402
        else: # regular ending, workers waiting at "barrier"
            logger.debug("Regular ending, main thread waiting for all worker threads to stop...")
            self._syncAtBarrier()
403

404 405 406
        self._te = None  # No more executor, time to end
        logger.debug("Main thread tapping all threads one last time...")
        self.tapAllThreads()  # Let the threads run one last time
407

408
        logger.debug("\r\n\n--> Main thread ready to finish up...")
409
        logger.debug("Main thread joining all threads")
S
Shuduo Sang 已提交
410
        self._pool.joinAll()  # Get all threads to finish
411
        logger.info("\nAll worker threads finished")
412 413
        self._execStats.endExec()

414 415
    def printStats(self):
        self._execStats.printStats()
S
Steven Li 已提交
416

S
Steven Li 已提交
417 418 419 420 421 422
    def isFailed(self):
        return self._execStats.isFailed()

    def getExecStats(self):
        return self._execStats

S
Shuduo Sang 已提交
423
    def tapAllThreads(self):  # in a deterministic manner
S
Steven Li 已提交
424
        wakeSeq = []
S
Shuduo Sang 已提交
425 426
        for i in range(self._pool.numThreads):  # generate a random sequence
            if Dice.throw(2) == 1:
S
Steven Li 已提交
427 428 429
                wakeSeq.append(i)
            else:
                wakeSeq.insert(0, i)
S
Shuduo Sang 已提交
430 431 432
        logger.debug(
            "[TRD] Main thread waking up worker threads: {}".format(
                str(wakeSeq)))
433
        # TODO: set dice seed to a deterministic value
S
Steven Li 已提交
434
        for i in wakeSeq:
S
Shuduo Sang 已提交
435 436 437
            # TODO: maybe a bit too deep?!
            self._pool.threadList[i].tapStepGate()
            time.sleep(0)  # yield
S
Steven Li 已提交
438

439
    def isRunning(self):
S
Shuduo Sang 已提交
440
        return self._te is not None
441

S
Shuduo Sang 已提交
442 443
    def fetchTask(self) -> Task:
        if (not self.isRunning()):  # no task
444
            raise RuntimeError("Cannot fetch task when not running")
445 446
        # return self._wd.pickTask()
        # Alternatively, let's ask the DbState for the appropriate task
447 448 449 450 451 452 453
        # dbState = self.getDbState()
        # tasks = dbState.getTasksAtState() # TODO: create every time?
        # nTasks = len(tasks)
        # i = Dice.throw(nTasks)
        # logger.debug(" (dice:{}/{}) ".format(i, nTasks))
        # # return copy.copy(tasks[i]) # Needs a fresh copy, to save execution results, etc.
        # return tasks[i].clone() # TODO: still necessary?
S
Shuduo Sang 已提交
454 455 456 457 458
        # pick a task type for current state
        taskType = self.getDbManager().getStateMachine().pickTaskType()
        return taskType(
            self.getDbManager(),
            self._execStats)  # create a task from it
459 460

    def resetExecutedTasks(self):
S
Shuduo Sang 已提交
461
        self._executedTasks = []  # should be under single thread
462 463 464 465

    def saveExecutedTask(self, task):
        with self._lock:
            self._executedTasks.append(task)
466 467

# We define a class to run a number of threads in locking steps.
S
Shuduo Sang 已提交
468 469


470
class ThreadPool:
471
    def __init__(self, numThreads, maxSteps):
472 473 474 475
        self.numThreads = numThreads
        self.maxSteps = maxSteps
        # Internal class variables
        self.curStep = 0
S
Shuduo Sang 已提交
476 477
        self.threadList = []  # type: List[WorkerThread]

478
    # starting to run all the threads, in locking steps
479
    def createAndStartThreads(self, tc: ThreadCoordinator):
S
Shuduo Sang 已提交
480 481
        for tid in range(0, self.numThreads):  # Create the threads
            workerThread = WorkerThread(self, tid, tc)
482
            self.threadList.append(workerThread)
S
Shuduo Sang 已提交
483
            workerThread.start()  # start, but should block immediately before step 0
484 485 486 487 488 489

    def joinAll(self):
        for workerThread in self.threadList:
            logger.debug("Joining thread...")
            workerThread._thread.join()

490 491
# A queue of continguous POSITIVE integers, used by DbManager to generate continuous numbers
# for new table names
S
Shuduo Sang 已提交
492 493


S
Steven Li 已提交
494 495
class LinearQueue():
    def __init__(self):
496
        self.firstIndex = 1  # 1st ever element
S
Steven Li 已提交
497
        self.lastIndex = 0
S
Shuduo Sang 已提交
498 499
        self._lock = threading.RLock()  # our functions may call each other
        self.inUse = set()  # the indexes that are in use right now
S
Steven Li 已提交
500

501
    def toText(self):
S
Shuduo Sang 已提交
502 503
        return "[{}..{}], in use: {}".format(
            self.firstIndex, self.lastIndex, self.inUse)
504 505

    # Push (add new element, largest) to the tail, and mark it in use
S
Shuduo Sang 已提交
506
    def push(self):
507
        with self._lock:
S
Shuduo Sang 已提交
508 509
            # if ( self.isEmpty() ):
            #     self.lastIndex = self.firstIndex
510
            #     return self.firstIndex
511 512
            # Otherwise we have something
            self.lastIndex += 1
513 514
            self.allocate(self.lastIndex)
            # self.inUse.add(self.lastIndex) # mark it in use immediately
515
            return self.lastIndex
S
Steven Li 已提交
516 517

    def pop(self):
518
        with self._lock:
S
Shuduo Sang 已提交
519 520 521 522
            if (self.isEmpty()):
                # raise RuntimeError("Cannot pop an empty queue")
                return False  # TODO: None?

523
            index = self.firstIndex
S
Shuduo Sang 已提交
524
            if (index in self.inUse):
525 526
                return False

527 528 529 530 531 532 533
            self.firstIndex += 1
            return index

    def isEmpty(self):
        return self.firstIndex > self.lastIndex

    def popIfNotEmpty(self):
534
        with self._lock:
535 536 537 538
            if (self.isEmpty()):
                return 0
            return self.pop()

S
Steven Li 已提交
539
    def allocate(self, i):
540
        with self._lock:
541
            # logger.debug("LQ allocating item {}".format(i))
S
Shuduo Sang 已提交
542 543 544
            if (i in self.inUse):
                raise RuntimeError(
                    "Cannot re-use same index in queue: {}".format(i))
545 546
            self.inUse.add(i)

S
Steven Li 已提交
547
    def release(self, i):
548
        with self._lock:
549
            # logger.debug("LQ releasing item {}".format(i))
S
Shuduo Sang 已提交
550
            self.inUse.remove(i)  # KeyError possible, TODO: why?
551 552 553 554

    def size(self):
        return self.lastIndex + 1 - self.firstIndex

S
Steven Li 已提交
555
    def pickAndAllocate(self):
S
Shuduo Sang 已提交
556
        if (self.isEmpty()):
557 558
            return None
        with self._lock:
S
Shuduo Sang 已提交
559
            cnt = 0  # counting the interations
560 561
            while True:
                cnt += 1
S
Shuduo Sang 已提交
562
                if (cnt > self.size() * 10):  # 10x iteration already
563 564
                    # raise RuntimeError("Failed to allocate LinearQueue element")
                    return None
S
Shuduo Sang 已提交
565 566
                ret = Dice.throwRange(self.firstIndex, self.lastIndex + 1)
                if (ret not in self.inUse):
567 568 569
                    self.allocate(ret)
                    return ret

S
Shuduo Sang 已提交
570

571
class DbConn:
572
    TYPE_NATIVE = "native-c"
573
    TYPE_REST =   "rest-api"
574 575 576 577 578 579 580 581 582
    TYPE_INVALID = "invalid"

    @classmethod
    def create(cls, connType):
        if connType == cls.TYPE_NATIVE:
            return DbConnNative()
        elif connType == cls.TYPE_REST:
            return DbConnRest()
        else:
S
Shuduo Sang 已提交
583 584
            raise RuntimeError(
                "Unexpected connection type: {}".format(connType))
585 586 587 588 589 590 591 592 593

    @classmethod
    def createNative(cls):
        return cls.create(cls.TYPE_NATIVE)

    @classmethod
    def createRest(cls):
        return cls.create(cls.TYPE_REST)

594 595
    def __init__(self):
        self.isOpen = False
596
        self._type = self.TYPE_INVALID
597 598 599 600
        self._lastSql = None

    def getLastSql(self):
        return self._lastSql
601 602

    def open(self):
S
Shuduo Sang 已提交
603
        if (self.isOpen):
604 605
            raise RuntimeError("Cannot re-open an existing DB connection")

606 607
        # below implemented by child classes
        self.openByType()
608

609
        logger.debug("[DB] data connection opened, type = {}".format(self._type))
610 611
        self.isOpen = True

S
Shuduo Sang 已提交
612 613 614 615
    def resetDb(self):  # reset the whole database, etc.
        if (not self.isOpen):
            raise RuntimeError(
                "Cannot reset database until connection is open")
616 617
        # self._tdSql.prepare() # Recreate database, etc.

618
        self.execute('drop database if exists db')
619 620
        logger.debug("Resetting DB, dropped database")
        # self._cursor.execute('create database db')
621
        # self._cursor.execute('use db')
622 623
        # tdSql.execute('show databases')

S
Shuduo Sang 已提交
624
    def queryScalar(self, sql) -> int:
625 626
        return self._queryAny(sql)

S
Shuduo Sang 已提交
627
    def queryString(self, sql) -> str:
628 629
        return self._queryAny(sql)

S
Shuduo Sang 已提交
630 631
    def _queryAny(self, sql):  # actual query result as an int
        if (not self.isOpen):
632
            raise RuntimeError("Cannot query database until connection is open")
633
        nRows = self.query(sql)
S
Shuduo Sang 已提交
634
        if nRows != 1:
635
            raise RuntimeError("Unexpected result for query: {}, rows = {}".format(sql, nRows))
636
        if self.getResultRows() != 1 or self.getResultCols() != 1:
637
            raise RuntimeError("Unexpected result set for query: {}".format(sql))
638 639
        return self.getQueryResult()[0][0]

640 641 642 643 644 645 646 647 648
    def use(self, dbName):
        self.execute("use {}".format(dbName))

    def hasDatabases(self):
        return self.query("show databases") > 0

    def hasTables(self):
        return self.query("show tables") > 0

649 650
    def execute(self, sql):
        raise RuntimeError("Unexpected execution, should be overriden")
S
Shuduo Sang 已提交
651

652 653 654
    def query(self, sql) -> int: # return num rows returned
        raise RuntimeError("Unexpected execution, should be overriden")

655 656
    def openByType(self):
        raise RuntimeError("Unexpected execution, should be overriden")
S
Shuduo Sang 已提交
657

658 659
    def getQueryResult(self):
        raise RuntimeError("Unexpected execution, should be overriden")
S
Shuduo Sang 已提交
660

661 662
    def getResultRows(self):
        raise RuntimeError("Unexpected execution, should be overriden")
S
Shuduo Sang 已提交
663

664 665 666 667
    def getResultCols(self):
        raise RuntimeError("Unexpected execution, should be overriden")

# Sample: curl -u root:taosdata -d "show databases" localhost:6020/rest/sql
S
Shuduo Sang 已提交
668 669


670 671 672 673
class DbConnRest(DbConn):
    def __init__(self):
        super().__init__()
        self._type = self.TYPE_REST
S
Shuduo Sang 已提交
674
        self._url = "http://localhost:6020/rest/sql"  # fixed for now
675 676
        self._result = None

S
Shuduo Sang 已提交
677 678 679
    def openByType(self):  # Open connection
        pass  # do nothing, always open

680
    def close(self):
S
Shuduo Sang 已提交
681 682 683
        if (not self.isOpen):
            raise RuntimeError(
                "Cannot clean up database until connection is open")
684 685 686 687 688
        # Do nothing for REST
        logger.debug("[DB] REST Database connection closed")
        self.isOpen = False

    def _doSql(self, sql):
689
        self._lastSql = sql # remember this, last SQL attempted
690 691 692
        try:
            r = requests.post(self._url, 
                data = sql,
693
                auth = HTTPBasicAuth('root', 'taosdata'))         
694 695 696
        except:
            print("REST API Failure (TODO: more info here)")
            raise
697 698
        rj = r.json()
        # Sanity check for the "Json Result"
S
Shuduo Sang 已提交
699
        if ('status' not in rj):
700 701
            raise RuntimeError("No status in REST response")

S
Shuduo Sang 已提交
702 703 704 705
        if rj['status'] == 'error':  # clearly reported error
            if ('code' not in rj):  # error without code
                raise RuntimeError("REST error return without code")
            errno = rj['code']  # May need to massage this in the future
706
            # print("Raising programming error with REST return: {}".format(rj))
S
Shuduo Sang 已提交
707 708
            raise taos.error.ProgrammingError(
                rj['desc'], errno)  # todo: check existance of 'desc'
709

S
Shuduo Sang 已提交
710 711 712 713
        if rj['status'] != 'succ':  # better be this
            raise RuntimeError(
                "Unexpected REST return status: {}".format(
                    rj['status']))
714 715

        nRows = rj['rows'] if ('rows' in rj) else 0
S
Shuduo Sang 已提交
716
        self._result = rj
717 718
        return nRows

S
Shuduo Sang 已提交
719 720 721 722
    def execute(self, sql):
        if (not self.isOpen):
            raise RuntimeError(
                "Cannot execute database commands until connection is open")
723 724
        logger.debug("[SQL-REST] Executing SQL: {}".format(sql))
        nRows = self._doSql(sql)
S
Shuduo Sang 已提交
725 726
        logger.debug(
            "[SQL-REST] Execution Result, nRows = {}, SQL = {}".format(nRows, sql))
727 728
        return nRows

S
Shuduo Sang 已提交
729
    def query(self, sql):  # return rows affected
730 731 732 733 734 735 736 737 738 739 740 741 742
        return self.execute(sql)

    def getQueryResult(self):
        return self._result['data']

    def getResultRows(self):
        print(self._result)
        raise RuntimeError("TBD")
        # return self._tdSql.queryRows

    def getResultCols(self):
        print(self._result)
        raise RuntimeError("TBD")
S
Shuduo Sang 已提交
743

744
    # Duplicate code from TDMySQL, TODO: merge all this into DbConnNative
745 746


747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774
class MyTDSql:
    def __init__(self):
        self.queryRows = 0
        self.queryCols = 0
        self.affectedRows = 0

    def init(self, cursor, log=True):
        self.cursor = cursor
        # if (log):
        #     caller = inspect.getframeinfo(inspect.stack()[1][0])
        #     self.cursor.log(caller.filename + ".sql")

    def close(self):
        self.cursor.close()

    def query(self, sql):
        self.sql = sql
        try:
            self.cursor.execute(sql)
            self.queryResult = self.cursor.fetchall()
            self.queryRows = len(self.queryResult)
            self.queryCols = len(self.cursor.description)
        except Exception as e:
            # caller = inspect.getframeinfo(inspect.stack()[1][0])
            # args = (caller.filename, caller.lineno, sql, repr(e))
            # tdLog.exit("%s(%d) failed: sql:%s, %s" % args)
            raise
        return self.queryRows
775

776 777 778 779 780 781 782 783 784 785 786
    def execute(self, sql):
        self.sql = sql
        try:
            self.affectedRows = self.cursor.execute(sql)
        except Exception as e:
            # caller = inspect.getframeinfo(inspect.stack()[1][0])
            # args = (caller.filename, caller.lineno, sql, repr(e))
            # tdLog.exit("%s(%d) failed: sql:%s, %s" % args)
            raise
        return self.affectedRows

S
Shuduo Sang 已提交
787

788
class DbConnNative(DbConn):
789 790 791 792
    # Class variables
    _lock = threading.Lock()
    _connInfoDisplayed = False

793 794
    def __init__(self):
        super().__init__()
795
        self._type = self.TYPE_NATIVE
S
Shuduo Sang 已提交
796
        self._conn = None
797
        self._cursor = None
798
        
S
Shuduo Sang 已提交
799

800 801 802 803 804 805 806
    def getBuildPath(self):
        selfPath = os.path.dirname(os.path.realpath(__file__))
        if ("community" in selfPath):
            projPath = selfPath[:selfPath.find("communit")]
        else:
            projPath = selfPath[:selfPath.find("tests")]

807
        buildPath = None
808 809 810 811
        for root, dirs, files in os.walk(projPath):
            if ("taosd" in files):
                rootRealPath = os.path.dirname(os.path.realpath(root))
                if ("packaging" not in rootRealPath):
S
Shuduo Sang 已提交
812
                    buildPath = root[:len(root) - len("/build/bin")]
813
                    break
814 815
        if buildPath == None:
            raise RuntimeError("Failed to determine buildPath, selfPath={}".format(self_path))
816 817
        return buildPath

818
    
S
Shuduo Sang 已提交
819
    def openByType(self):  # Open connection
820
        cfgPath = self.getBuildPath() + "/test/cfg"
821
        hostAddr = "127.0.0.1"
822

823 824 825 826 827
        with self._lock: # force single threading for opening DB connections
            if not self._connInfoDisplayed:
                self.__class__._connInfoDisplayed = True # updating CLASS variable
                logger.info("Initiating TAOS native connection to {}, using config at {}".format(hostAddr, cfgPath))
            
828
            self._conn = taos.connect(host=hostAddr, config=cfgPath)  # TODO: make configurable
829 830
            self._cursor = self._conn.cursor()
        
831
        self._cursor.execute('reset query cache')
S
Shuduo Sang 已提交
832
        # self._cursor.execute('use db') # do this at the beginning of every
833 834

        # Open connection
835
        self._tdSql = MyTDSql()
836
        self._tdSql.init(self._cursor)
S
Shuduo Sang 已提交
837

838
    def close(self):
S
Shuduo Sang 已提交
839 840 841
        if (not self.isOpen):
            raise RuntimeError(
                "Cannot clean up database until connection is open")
842
        self._tdSql.close()
843
        logger.debug("[DB] Database connection closed")
844
        self.isOpen = False
S
Steven Li 已提交
845

S
Shuduo Sang 已提交
846 847 848 849
    def execute(self, sql):
        if (not self.isOpen):
            raise RuntimeError(
                "Cannot execute database commands until connection is open")
850 851
        logger.debug("[SQL] Executing SQL: {}".format(sql))
        nRows = self._tdSql.execute(sql)
S
Shuduo Sang 已提交
852 853 854
        logger.debug(
            "[SQL] Execution Result, nRows = {}, SQL = {}".format(
                nRows, sql))
855
        return nRows
S
Steven Li 已提交
856

S
Shuduo Sang 已提交
857 858 859 860
    def query(self, sql):  # return rows affected
        if (not self.isOpen):
            raise RuntimeError(
                "Cannot query database until connection is open")
861 862
        logger.debug("[SQL] Executing SQL: {}".format(sql))
        nRows = self._tdSql.query(sql)
S
Shuduo Sang 已提交
863 864 865
        logger.debug(
            "[SQL] Query Result, nRows = {}, SQL = {}".format(
                nRows, sql))
866
        return nRows
867
        # results are in: return self._tdSql.queryResult
868

869 870 871
    def getQueryResult(self):
        return self._tdSql.queryResult

872 873
    def getResultRows(self):
        return self._tdSql.queryRows
874

875 876
    def getResultCols(self):
        return self._tdSql.queryCols
877

S
Shuduo Sang 已提交
878

879
class AnyState:
S
Shuduo Sang 已提交
880 881 882
    STATE_INVALID = -1
    STATE_EMPTY = 0  # nothing there, no even a DB
    STATE_DB_ONLY = 1  # we have a DB, but nothing else
883
    STATE_TABLE_ONLY = 2  # we have a table, but totally empty
S
Shuduo Sang 已提交
884
    STATE_HAS_DATA = 3  # we have some data in the table
885 886 887 888 889
    _stateNames = ["Invalid", "Empty", "DB_Only", "Table_Only", "Has_Data"]

    STATE_VAL_IDX = 0
    CAN_CREATE_DB = 1
    CAN_DROP_DB = 2
890 891
    CAN_CREATE_FIXED_SUPER_TABLE = 3
    CAN_DROP_FIXED_SUPER_TABLE = 4
892 893 894 895 896 897 898
    CAN_ADD_DATA = 5
    CAN_READ_DATA = 6

    def __init__(self):
        self._info = self.getInfo()

    def __str__(self):
S
Shuduo Sang 已提交
899 900
        # -1 hack to accomodate the STATE_INVALID case
        return self._stateNames[self._info[self.STATE_VAL_IDX] + 1]
901 902 903 904

    def getInfo(self):
        raise RuntimeError("Must be overriden by child classes")

S
Steven Li 已提交
905 906 907 908 909 910
    def equals(self, other):
        if isinstance(other, int):
            return self.getValIndex() == other
        elif isinstance(other, AnyState):
            return self.getValIndex() == other.getValIndex()
        else:
S
Shuduo Sang 已提交
911 912 913
            raise RuntimeError(
                "Unexpected comparison, type = {}".format(
                    type(other)))
S
Steven Li 已提交
914

915 916 917
    def verifyTasksToState(self, tasks, newState):
        raise RuntimeError("Must be overriden by child classes")

S
Steven Li 已提交
918 919 920
    def getValIndex(self):
        return self._info[self.STATE_VAL_IDX]

921 922
    def getValue(self):
        return self._info[self.STATE_VAL_IDX]
S
Shuduo Sang 已提交
923

924 925
    def canCreateDb(self):
        return self._info[self.CAN_CREATE_DB]
S
Shuduo Sang 已提交
926

927 928
    def canDropDb(self):
        return self._info[self.CAN_DROP_DB]
S
Shuduo Sang 已提交
929

930 931
    def canCreateFixedSuperTable(self):
        return self._info[self.CAN_CREATE_FIXED_SUPER_TABLE]
S
Shuduo Sang 已提交
932

933 934
    def canDropFixedSuperTable(self):
        return self._info[self.CAN_DROP_FIXED_SUPER_TABLE]
S
Shuduo Sang 已提交
935

936 937
    def canAddData(self):
        return self._info[self.CAN_ADD_DATA]
S
Shuduo Sang 已提交
938

939 940 941 942 943
    def canReadData(self):
        return self._info[self.CAN_READ_DATA]

    def assertAtMostOneSuccess(self, tasks, cls):
        sCnt = 0
S
Shuduo Sang 已提交
944
        for task in tasks:
945 946 947
            if not isinstance(task, cls):
                continue
            if task.isSuccess():
S
Steven Li 已提交
948
                # task.logDebug("Task success found")
949
                sCnt += 1
S
Shuduo Sang 已提交
950 951 952
                if (sCnt >= 2):
                    raise RuntimeError(
                        "Unexpected more than 1 success with task: {}".format(cls))
953 954 955 956

    def assertIfExistThenSuccess(self, tasks, cls):
        sCnt = 0
        exists = False
S
Shuduo Sang 已提交
957
        for task in tasks:
958 959
            if not isinstance(task, cls):
                continue
S
Shuduo Sang 已提交
960
            exists = True  # we have a valid instance
961 962
            if task.isSuccess():
                sCnt += 1
S
Shuduo Sang 已提交
963 964 965
        if (exists and sCnt <= 0):
            raise RuntimeError(
                "Unexpected zero success for task: {}".format(cls))
966 967

    def assertNoTask(self, tasks, cls):
S
Shuduo Sang 已提交
968
        for task in tasks:
969
            if isinstance(task, cls):
S
Shuduo Sang 已提交
970 971
                raise CrashGenError(
                    "This task: {}, is not expected to be present, given the success/failure of others".format(cls.__name__))
972 973

    def assertNoSuccess(self, tasks, cls):
S
Shuduo Sang 已提交
974
        for task in tasks:
975 976
            if isinstance(task, cls):
                if task.isSuccess():
S
Shuduo Sang 已提交
977 978
                    raise RuntimeError(
                        "Unexpected successful task: {}".format(cls))
979 980

    def hasSuccess(self, tasks, cls):
S
Shuduo Sang 已提交
981
        for task in tasks:
982 983 984 985 986 987
            if not isinstance(task, cls):
                continue
            if task.isSuccess():
                return True
        return False

S
Steven Li 已提交
988
    def hasTask(self, tasks, cls):
S
Shuduo Sang 已提交
989
        for task in tasks:
S
Steven Li 已提交
990 991 992 993
            if isinstance(task, cls):
                return True
        return False

S
Shuduo Sang 已提交
994

995 996 997 998
class StateInvalid(AnyState):
    def getInfo(self):
        return [
            self.STATE_INVALID,
S
Shuduo Sang 已提交
999 1000 1001
            False, False,  # can create/drop Db
            False, False,  # can create/drop fixed table
            False, False,  # can insert/read data with fixed table
1002 1003 1004 1005
        ]

    # def verifyTasksToState(self, tasks, newState):

S
Shuduo Sang 已提交
1006

1007 1008 1009 1010
class StateEmpty(AnyState):
    def getInfo(self):
        return [
            self.STATE_EMPTY,
S
Shuduo Sang 已提交
1011 1012 1013
            True, False,  # can create/drop Db
            False, False,  # can create/drop fixed table
            False, False,  # can insert/read data with fixed table
1014 1015
        ]

S
Shuduo Sang 已提交
1016 1017
    def verifyTasksToState(self, tasks, newState):
        if (self.hasSuccess(tasks, TaskCreateDb)
1018
                ):  # at EMPTY, if there's succes in creating DB
S
Shuduo Sang 已提交
1019 1020 1021 1022
            if (not self.hasTask(tasks, TaskDropDb)):  # and no drop_db tasks
                # we must have at most one. TODO: compare numbers
                self.assertAtMostOneSuccess(tasks, TaskCreateDb)

1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033

class StateDbOnly(AnyState):
    def getInfo(self):
        return [
            self.STATE_DB_ONLY,
            False, True,
            True, False,
            False, False,
        ]

    def verifyTasksToState(self, tasks, newState):
S
Shuduo Sang 已提交
1034 1035 1036
        if (not self.hasTask(tasks, TaskCreateDb)):
            # only if we don't create any more
            self.assertAtMostOneSuccess(tasks, TaskDropDb)
1037 1038 1039 1040 1041

        # TODO: restore the below, the problem exists, although unlikely in real-world
        # if (gSvcMgr!=None) and gSvcMgr.isRestarting():     
        # if (gSvcMgr == None) or (not gSvcMgr.isRestarting()) : 
        #     self.assertIfExistThenSuccess(tasks, TaskDropDb)       
1042

S
Shuduo Sang 已提交
1043

1044
class StateSuperTableOnly(AnyState):
1045 1046 1047 1048 1049 1050 1051 1052 1053
    def getInfo(self):
        return [
            self.STATE_TABLE_ONLY,
            False, True,
            False, True,
            True, True,
        ]

    def verifyTasksToState(self, tasks, newState):
S
Shuduo Sang 已提交
1054
        if (self.hasSuccess(tasks, TaskDropSuperTable)
1055
                ):  # we are able to drop the table
1056
            #self.assertAtMostOneSuccess(tasks, TaskDropSuperTable)
S
Shuduo Sang 已提交
1057 1058
            # we must have had recreted it
            self.hasSuccess(tasks, TaskCreateSuperTable)
1059

1060
            # self._state = self.STATE_DB_ONLY
S
Steven Li 已提交
1061 1062
        # elif ( self.hasSuccess(tasks, AddFixedDataTask) ): # no success dropping the table, but added data
        #     self.assertNoTask(tasks, DropFixedTableTask) # not true in massively parrallel cases
1063
            # self._state = self.STATE_HAS_DATA
S
Steven Li 已提交
1064 1065 1066
        # elif ( self.hasSuccess(tasks, ReadFixedDataTask) ): # no success in prev cases, but was able to read data
            # self.assertNoTask(tasks, DropFixedTableTask)
            # self.assertNoTask(tasks, AddFixedDataTask)
1067
            # self._state = self.STATE_TABLE_ONLY # no change
S
Steven Li 已提交
1068 1069 1070
        # else: # did not drop table, did not insert data, did not read successfully, that is impossible
        #     raise RuntimeError("Unexpected no-success scenarios")
        # TODO: need to revamp!!
1071

S
Shuduo Sang 已提交
1072

1073 1074 1075 1076 1077 1078 1079 1080 1081 1082
class StateHasData(AnyState):
    def getInfo(self):
        return [
            self.STATE_HAS_DATA,
            False, True,
            False, True,
            True, True,
        ]

    def verifyTasksToState(self, tasks, newState):
S
Shuduo Sang 已提交
1083
        if (newState.equals(AnyState.STATE_EMPTY)):
1084
            self.hasSuccess(tasks, TaskDropDb)
S
Shuduo Sang 已提交
1085 1086 1087 1088
            if (not self.hasTask(tasks, TaskCreateDb)):
                self.assertAtMostOneSuccess(tasks, TaskDropDb)  # TODO: dicy
        elif (newState.equals(AnyState.STATE_DB_ONLY)):  # in DB only
            if (not self.hasTask(tasks, TaskCreateDb)
1089
                ):  # without a create_db task
S
Shuduo Sang 已提交
1090 1091
                # we must have drop_db task
                self.assertNoTask(tasks, TaskDropDb)
1092
            self.hasSuccess(tasks, TaskDropSuperTable)
1093
            # self.assertAtMostOneSuccess(tasks, DropFixedSuperTableTask) # TODO: dicy
1094 1095 1096 1097
        # elif ( newState.equals(AnyState.STATE_TABLE_ONLY) ): # data deleted
            # self.assertNoTask(tasks, TaskDropDb)
            # self.assertNoTask(tasks, TaskDropSuperTable)
            # self.assertNoTask(tasks, TaskAddData)
S
Steven Li 已提交
1098
            # self.hasSuccess(tasks, DeleteDataTasks)
S
Shuduo Sang 已提交
1099 1100
        else:  # should be STATE_HAS_DATA
            if (not self.hasTask(tasks, TaskCreateDb)
1101
                ):  # only if we didn't create one
S
Shuduo Sang 已提交
1102 1103 1104
                # we shouldn't have dropped it
                self.assertNoTask(tasks, TaskDropDb)
            if (not self.hasTask(tasks, TaskCreateSuperTable)
1105
                    ):  # if we didn't create the table
S
Shuduo Sang 已提交
1106 1107
                # we should not have a task that drops it
                self.assertNoTask(tasks, TaskDropSuperTable)
1108
            # self.assertIfExistThenSuccess(tasks, ReadFixedDataTask)
S
Steven Li 已提交
1109

S
Shuduo Sang 已提交
1110

1111
class StateMechine:
1112 1113
    def __init__(self, dbConn):
        self._dbConn = dbConn
S
Shuduo Sang 已提交
1114 1115 1116
        self._curState = self._findCurrentState()  # starting state
        # transitition target probabilities, indexed with value of STATE_EMPTY,
        # STATE_DB_ONLY, etc.
1117
        self._stateWeights = [1, 2, 10, 40]
S
Shuduo Sang 已提交
1118

1119 1120 1121
    def getCurrentState(self):
        return self._curState

1122 1123 1124
    def hasDatabase(self):
        return self._curState.canDropDb()  # ha, can drop DB means it has one

1125
    # May be slow, use cautionsly...
S
Shuduo Sang 已提交
1126
    def getTaskTypes(self):  # those that can run (directly/indirectly) from the current state
1127 1128 1129 1130 1131 1132
        def typesToStrings(types):
            ss = []
            for t in types:
                ss.append(t.__name__)
            return ss

S
Shuduo Sang 已提交
1133
        allTaskClasses = StateTransitionTask.__subclasses__()  # all state transition tasks
1134 1135
        firstTaskTypes = []
        for tc in allTaskClasses:
S
Shuduo Sang 已提交
1136
            # t = tc(self) # create task object
1137 1138
            if tc.canBeginFrom(self._curState):
                firstTaskTypes.append(tc)
S
Shuduo Sang 已提交
1139 1140 1141 1142 1143 1144 1145 1146
        # now we have all the tasks that can begin directly from the current
        # state, let's figure out the INDIRECT ones
        taskTypes = firstTaskTypes.copy()  # have to have these
        for task1 in firstTaskTypes:  # each task type gathered so far
            endState = task1.getEndState()  # figure the end state
            if endState is None:  # does not change end state
                continue  # no use, do nothing
            for tc in allTaskClasses:  # what task can further begin from there?
1147
                if tc.canBeginFrom(endState) and (tc not in firstTaskTypes):
S
Shuduo Sang 已提交
1148
                    taskTypes.append(tc)  # gather it
1149 1150

        if len(taskTypes) <= 0:
S
Shuduo Sang 已提交
1151 1152 1153 1154 1155 1156 1157
            raise RuntimeError(
                "No suitable task types found for state: {}".format(
                    self._curState))
        logger.debug(
            "[OPS] Tasks found for state {}: {}".format(
                self._curState,
                typesToStrings(taskTypes)))
1158 1159 1160 1161
        return taskTypes

    def _findCurrentState(self):
        dbc = self._dbConn
S
Shuduo Sang 已提交
1162
        ts = time.time()  # we use this to debug how fast/slow it is to do the various queries to find the current DB state
1163 1164
        if not dbc.hasDatabases():  # no database?!
            logger.debug( "[STT] empty database found, between {} and {}".format(ts, time.time()))
1165
            return StateEmpty()
S
Shuduo Sang 已提交
1166 1167
        # did not do this when openning connection, and this is NOT the worker
        # thread, which does this on their own
1168 1169 1170
        dbc.use("db")
        if not dbc.hasTables():  # no tables
            logger.debug("[STT] DB_ONLY found, between {} and {}".format(ts, time.time()))
1171
            return StateDbOnly()
1172 1173 1174 1175

        sTable = DbManager.getFixedSuperTable()
        if sTable.hasRegTables(dbc):  # no regular tables
            logger.debug("[STT] SUPER_TABLE_ONLY found, between {} and {}".format(ts, time.time()))
1176
            return StateSuperTableOnly()
S
Shuduo Sang 已提交
1177
        else:  # has actual tables
1178
            logger.debug("[STT] HAS_DATA found, between {} and {}".format(ts, time.time()))
1179 1180 1181
            return StateHasData()

    def transition(self, tasks):
S
Shuduo Sang 已提交
1182
        if (len(tasks) == 0):  # before 1st step, or otherwise empty
1183
            logger.debug("[STT] Starting State: {}".format(self._curState))
S
Shuduo Sang 已提交
1184
            return  # do nothing
1185

S
Shuduo Sang 已提交
1186 1187
        # this should show up in the server log, separating steps
        self._dbConn.execute("show dnodes")
1188 1189 1190 1191

        # Generic Checks, first based on the start state
        if self._curState.canCreateDb():
            self._curState.assertIfExistThenSuccess(tasks, TaskCreateDb)
S
Shuduo Sang 已提交
1192 1193
            # self.assertAtMostOneSuccess(tasks, CreateDbTask) # not really, in
            # case of multiple creation and drops
1194 1195

        if self._curState.canDropDb():
1196
            if gSvcMgr == None: # only if we are running as client-only
S
Steven Li 已提交
1197
                self._curState.assertIfExistThenSuccess(tasks, TaskDropDb)
S
Shuduo Sang 已提交
1198 1199
            # self.assertAtMostOneSuccess(tasks, DropDbTask) # not really in
            # case of drop-create-drop
1200 1201 1202

        # if self._state.canCreateFixedTable():
            # self.assertIfExistThenSuccess(tasks, CreateFixedTableTask) # Not true, DB may be dropped
S
Shuduo Sang 已提交
1203 1204
            # self.assertAtMostOneSuccess(tasks, CreateFixedTableTask) # not
            # really, in case of create-drop-create
1205 1206 1207

        # if self._state.canDropFixedTable():
            # self.assertIfExistThenSuccess(tasks, DropFixedTableTask) # Not True, the whole DB may be dropped
S
Shuduo Sang 已提交
1208 1209
            # self.assertAtMostOneSuccess(tasks, DropFixedTableTask) # not
            # really in case of drop-create-drop
1210 1211

        # if self._state.canAddData():
S
Shuduo Sang 已提交
1212 1213
        # self.assertIfExistThenSuccess(tasks, AddFixedDataTask)  # not true
        # actually
1214 1215 1216 1217 1218 1219

        # if self._state.canReadData():
            # Nothing for sure

        newState = self._findCurrentState()
        logger.debug("[STT] New DB state determined: {}".format(newState))
S
Shuduo Sang 已提交
1220 1221
        # can old state move to new state through the tasks?
        self._curState.verifyTasksToState(tasks, newState)
1222 1223 1224
        self._curState = newState

    def pickTaskType(self):
S
Shuduo Sang 已提交
1225 1226
        # all the task types we can choose from at curent state
        taskTypes = self.getTaskTypes()
1227 1228 1229
        weights = []
        for tt in taskTypes:
            endState = tt.getEndState()
S
Shuduo Sang 已提交
1230 1231 1232
            if endState is not None:
                # TODO: change to a method
                weights.append(self._stateWeights[endState.getValIndex()])
1233
            else:
S
Shuduo Sang 已提交
1234 1235
                # read data task, default to 10: TODO: change to a constant
                weights.append(10)
1236
        i = self._weighted_choice_sub(weights)
S
Shuduo Sang 已提交
1237
        # logger.debug(" (weighted random:{}/{}) ".format(i, len(taskTypes)))
1238 1239
        return taskTypes[i]

S
Shuduo Sang 已提交
1240 1241 1242 1243 1244
    # ref:
    # https://eli.thegreenplace.net/2010/01/22/weighted-random-generation-in-python/
    def _weighted_choice_sub(self, weights):
        # TODO: use our dice to ensure it being determinstic?
        rnd = random.random() * sum(weights)
1245 1246 1247 1248
        for i, w in enumerate(weights):
            rnd -= w
            if rnd < 0:
                return i
1249

1250
# Manager of the Database Data/Connection
S
Shuduo Sang 已提交
1251 1252 1253 1254


class DbManager():
    def __init__(self, resetDb=True):
S
Steven Li 已提交
1255
        self.tableNumQueue = LinearQueue()
S
Shuduo Sang 已提交
1256 1257 1258
        # datetime.datetime(2019, 1, 1) # initial date time tick
        self._lastTick = self.setupLastTick()
        self._lastInt = 0  # next one is initial integer
1259
        self._lock = threading.RLock()
S
Shuduo Sang 已提交
1260

1261
        # self.openDbServerConnection()
S
Shuduo Sang 已提交
1262 1263
        self._dbConn = DbConn.createNative() if (
            gConfig.connector_type == 'native') else DbConn.createRest()
1264
        try:
S
Shuduo Sang 已提交
1265
            self._dbConn.open()  # may throw taos.error.ProgrammingError: disconnected
1266 1267
        except taos.error.ProgrammingError as err:
            # print("Error type: {}, msg: {}, value: {}".format(type(err), err.msg, err))
S
Shuduo Sang 已提交
1268 1269 1270
            if (err.msg == 'client disconnected'):  # cannot open DB connection
                print(
                    "Cannot establish DB connection, please re-run script without parameter, and follow the instructions.")
S
Steven Li 已提交
1271
                sys.exit(2)
1272
            else:
S
Shuduo Sang 已提交
1273 1274
                raise
        except BaseException:
S
Steven Li 已提交
1275
            print("[=] Unexpected exception")
S
Shuduo Sang 已提交
1276 1277 1278 1279
            raise

        if resetDb:
            self._dbConn.resetDb()  # drop and recreate DB
1280

S
Shuduo Sang 已提交
1281 1282
        # Do this after dbConn is in proper shape
        self._stateMachine = StateMechine(self._dbConn)
1283

1284 1285 1286
    def getDbConn(self):
        return self._dbConn

S
Shuduo Sang 已提交
1287
    def getStateMachine(self) -> StateMechine:
1288 1289 1290 1291
        return self._stateMachine

    # def getState(self):
    #     return self._stateMachine.getCurrentState()
1292 1293 1294 1295 1296 1297

    # We aim to create a starting time tick, such that, whenever we run our test here once
    # We should be able to safely create 100,000 records, which will not have any repeated time stamp
    # when we re-run the test in 3 minutes (180 seconds), basically we should expand time duration
    # by a factor of 500.
    # TODO: what if it goes beyond 10 years into the future
1298
    # TODO: fix the error as result of above: "tsdb timestamp is out of range"
1299
    def setupLastTick(self):
1300
        t1 = datetime.datetime(2020, 6, 1)
1301
        t2 = datetime.datetime.now()
S
Shuduo Sang 已提交
1302 1303 1304 1305
        # maybe a very large number, takes 69 years to exceed Python int range
        elSec = int(t2.timestamp() - t1.timestamp())
        elSec2 = (elSec % (8 * 12 * 30 * 24 * 60 * 60 / 500)) * \
            500  # a number representing seconds within 10 years
1306
        # print("elSec = {}".format(elSec))
S
Shuduo Sang 已提交
1307 1308 1309
        t3 = datetime.datetime(2012, 1, 1)  # default "keep" is 10 years
        t4 = datetime.datetime.fromtimestamp(
            t3.timestamp() + elSec2)  # see explanation above
1310 1311 1312
        logger.info("Setting up TICKS to start from: {}".format(t4))
        return t4

S
Shuduo Sang 已提交
1313
    def pickAndAllocateTable(self):  # pick any table, and "use" it
S
Steven Li 已提交
1314 1315
        return self.tableNumQueue.pickAndAllocate()

1316 1317 1318 1319 1320
    def addTable(self):
        with self._lock:
            tIndex = self.tableNumQueue.push()
        return tIndex

1321 1322
    @classmethod
    def getFixedSuperTableName(cls):
1323
        return "fs_table"
1324

1325 1326 1327 1328
    @classmethod
    def getFixedSuperTable(cls):
        return TdSuperTable(cls.getFixedSuperTableName())

S
Shuduo Sang 已提交
1329
    def releaseTable(self, i):  # return the table back, so others can use it
S
Steven Li 已提交
1330 1331
        self.tableNumQueue.release(i)

1332
    def getNextTick(self):
S
Shuduo Sang 已提交
1333
        with self._lock:  # prevent duplicate tick
1334 1335
            if Dice.throw(20) == 0:  # 1 in 20 chance
                return self._lastTick + datetime.timedelta(0, -100) # Go back in time 100 seconds
S
Shuduo Sang 已提交
1336 1337 1338
            else:  # regular
                # add one second to it
                self._lastTick += datetime.timedelta(0, 1)
S
Steven Li 已提交
1339
                return self._lastTick
1340 1341

    def getNextInt(self):
1342 1343 1344
        with self._lock:
            self._lastInt += 1
            return self._lastInt
1345 1346

    def getNextBinary(self):
S
Shuduo Sang 已提交
1347 1348
        return "Beijing_Shanghai_Los_Angeles_New_York_San_Francisco_Chicago_Beijing_Shanghai_Los_Angeles_New_York_San_Francisco_Chicago_{}".format(
            self.getNextInt())
1349 1350

    def getNextFloat(self):
1351 1352 1353
        ret = 0.9 + self.getNextInt()
        # print("Float obtained: {}".format(ret))
        return ret
S
Shuduo Sang 已提交
1354

S
Steven Li 已提交
1355
    def getTableNameToDelete(self):
S
Shuduo Sang 已提交
1356 1357
        tblNum = self.tableNumQueue.pop()  # TODO: race condition!
        if (not tblNum):  # maybe false
1358
            return False
S
Shuduo Sang 已提交
1359

S
Steven Li 已提交
1360 1361
        return "table_{}".format(tblNum)

1362
    def cleanUp(self):
S
Shuduo Sang 已提交
1363 1364
        self._dbConn.close()

1365

1366
class TaskExecutor():
1367
    class BoundedList:
S
Shuduo Sang 已提交
1368
        def __init__(self, size=10):
1369 1370
            self._size = size
            self._list = []
S
Steven Li 已提交
1371
            self._lock = threading.Lock()
1372

S
Shuduo Sang 已提交
1373
        def add(self, n: int):
S
Steven Li 已提交
1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399
            with self._lock:
                if not self._list:  # empty
                    self._list.append(n)
                    return
                # now we should insert
                nItems = len(self._list)
                insPos = 0
                for i in range(nItems):
                    insPos = i
                    if n <= self._list[i]:  # smaller than this item, time to insert
                        break  # found the insertion point
                    insPos += 1  # insert to the right

                if insPos == 0:  # except for the 1st item, # TODO: elimiate first item as gating item
                    return  # do nothing

                # print("Inserting at postion {}, value: {}".format(insPos, n))
                self._list.insert(insPos, n)  # insert

                newLen = len(self._list)
                if newLen <= self._size:
                    return  # do nothing
                elif newLen == (self._size + 1):
                    del self._list[0]  # remove the first item
                else:
                    raise RuntimeError("Corrupt Bounded List")
1400 1401 1402 1403 1404 1405

        def __str__(self):
            return repr(self._list)

    _boundedList = BoundedList()

1406 1407 1408
    def __init__(self, curStep):
        self._curStep = curStep

1409 1410 1411 1412
    @classmethod
    def getBoundedList(cls):
        return cls._boundedList

1413 1414 1415
    def getCurStep(self):
        return self._curStep

S
Shuduo Sang 已提交
1416
    def execute(self, task: Task, wt: WorkerThread):  # execute a task on a thread
1417
        task.execute(wt)
1418

1419 1420 1421 1422
    def recordDataMark(self, n: int):
        # print("[{}]".format(n), end="", flush=True)
        self._boundedList.add(n)

1423 1424
    # def logInfo(self, msg):
    #     logger.info("    T[{}.x]: ".format(self._curStep) + msg)
1425

1426 1427
    # def logDebug(self, msg):
    #     logger.debug("    T[{}.x]: ".format(self._curStep) + msg)
1428

S
Shuduo Sang 已提交
1429

S
Steven Li 已提交
1430
class Task():
1431 1432 1433 1434
    taskSn = 100

    @classmethod
    def allocTaskNum(cls):
S
Shuduo Sang 已提交
1435
        Task.taskSn += 1  # IMPORTANT: cannot use cls.taskSn, since each sub class will have a copy
S
Steven Li 已提交
1436 1437
        # logger.debug("Allocating taskSN: {}".format(Task.taskSn))
        return Task.taskSn
1438

S
Shuduo Sang 已提交
1439
    def __init__(self, dbManager: DbManager, execStats: ExecutionStats):
1440
        self._dbManager = dbManager
S
Shuduo Sang 已提交
1441
        self._workerThread = None
1442
        self._err = None
1443
        self._aborted = False
1444
        self._curStep = None
S
Shuduo Sang 已提交
1445
        self._numRows = None  # Number of rows affected
1446

S
Shuduo Sang 已提交
1447
        # Assign an incremental task serial number
1448
        self._taskNum = self.allocTaskNum()
S
Steven Li 已提交
1449
        # logger.debug("Creating new task {}...".format(self._taskNum))
1450

1451 1452
        self._execStats = execStats

1453
    def isSuccess(self):
S
Shuduo Sang 已提交
1454
        return self._err is None
1455

1456 1457 1458
    def isAborted(self):
        return self._aborted

S
Shuduo Sang 已提交
1459
    def clone(self):  # TODO: why do we need this again?
1460
        newTask = self.__class__(self._dbManager, self._execStats)
1461 1462 1463
        return newTask

    def logDebug(self, msg):
S
Shuduo Sang 已提交
1464 1465 1466
        self._workerThread.logDebug(
            "Step[{}.{}] {}".format(
                self._curStep, self._taskNum, msg))
1467 1468

    def logInfo(self, msg):
S
Shuduo Sang 已提交
1469 1470 1471
        self._workerThread.logInfo(
            "Step[{}.{}] {}".format(
                self._curStep, self._taskNum, msg))
1472

1473
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
S
Shuduo Sang 已提交
1474 1475 1476
        raise RuntimeError(
            "To be implemeted by child classes, class name: {}".format(
                self.__class__.__name__))
1477

1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494
    def _isErrAcceptable(self, errno, msg):
        if errno in [
                0x05,  # TSDB_CODE_RPC_NOT_READY
                # 0x200, # invalid SQL, TODO: re-examine with TD-934
                0x360, 0x362, 
                0x369, # tag already exists
                0x36A, 0x36B, 0x36D,
                0x381, 
                0x380, # "db not selected"
                0x383,
                0x386,  # DB is being dropped?!
                0x503,
                0x510,  # vnode not in ready state
                0x600,
                1000  # REST catch-all error
            ]: 
            return True # These are the ALWAYS-ACCEPTABLE ones
1495 1496
        elif (errno in [ 0x0B ]) and gConfig.auto_start_service:
            return True # We may get "network unavilable" when restarting service
1497 1498 1499
        elif errno == 0x200 : # invalid SQL, we need to div in a bit more
            if msg.find("invalid column name") != -1:
                return True 
1500 1501 1502 1503
            elif msg.find("tags number not matched") != -1: # mismatched tags after modification
                return True
            elif msg.find("duplicated column names") != -1: # also alter table tag issues
                return True
S
Steven Li 已提交
1504 1505 1506
        elif (gSvcMgr!=None) and gSvcMgr.isRestarting():
            logger.info("Ignoring error when service is restarting: errno = {}, msg = {}".format(errno, msg))
            return True
1507 1508 1509 1510
        
        return False # Not an acceptable error


1511 1512
    def execute(self, wt: WorkerThread):
        wt.verifyThreadSelf()
S
Shuduo Sang 已提交
1513
        self._workerThread = wt  # type: ignore
1514 1515

        te = wt.getTaskExecutor()
1516
        self._curStep = te.getCurStep()
S
Shuduo Sang 已提交
1517 1518
        self.logDebug(
            "[-] executing task {}...".format(self.__class__.__name__))
1519 1520

        self._err = None
1521 1522
        self._execStats.beginTaskType(self.__class__.__name__)  # mark beginning
        errno2 = None
1523
        try:
S
Shuduo Sang 已提交
1524
            self._executeInternal(te, wt)  # TODO: no return value?
1525
        except taos.error.ProgrammingError as err:
1526
            errno2 = err.errno if (err.errno > 0) else 0x80000000 + err.errno  # correct error scheme
1527
            if (gConfig.continue_on_exception):  # user choose to continue
1528
                self.logDebug("[=] Continue after TAOS exception: errno=0x{:X}, msg: {}, SQL: {}".format(
1529
                        errno2, err, wt.getDbConn().getLastSql()))
1530
                self._err = err
1531 1532
            elif self._isErrAcceptable(errno2, err.__str__()):
                self.logDebug("[=] Acceptable Taos library exception: errno=0x{:X}, msg: {}, SQL: {}".format(
1533
                        errno2, err, wt.getDbConn().getLastSql()))
1534
                print("_", end="", flush=True)
S
Shuduo Sang 已提交
1535
                self._err = err
1536
            else: # not an acceptable error
1537 1538 1539
                errMsg = "[=] Unexpected Taos library exception ({}): errno=0x{:X}, msg: {}, SQL: {}".format(
                    self.__class__.__name__,
                    errno2, err, wt.getDbConn().getLastSql())
1540
                self.logDebug(errMsg)
S
Shuduo Sang 已提交
1541
                if gConfig.debug:
1542 1543
                    # raise # so that we see full stack
                    traceback.print_exc()
1544 1545
                print(
                    "\n\n----------------------------\nProgram ABORTED Due to Unexpected TAOS Error: \n\n{}\n".format(errMsg) +
1546 1547 1548 1549
                    "----------------------------\n")
                # sys.exit(-1)
                self._err = err
                self._aborted = True
S
Shuduo Sang 已提交
1550
        except Exception as e:
S
Steven Li 已提交
1551
            self.logInfo("Non-TAOS exception encountered")
S
Shuduo Sang 已提交
1552
            self._err = e
S
Steven Li 已提交
1553
            self._aborted = True
1554
            traceback.print_exc()
1555
        except BaseException as e:
1556
            self.logInfo("Python base exception encountered")
1557
            self._err = e
1558
            self._aborted = True
S
Steven Li 已提交
1559
            traceback.print_exc()
S
Shuduo Sang 已提交
1560 1561 1562
        except BaseException:
            self.logDebug(
                "[=] Unexpected exception, SQL: {}".format(
1563
                    wt.getDbConn().getLastSql()))
1564
            raise
1565
        self._execStats.endTaskType(self.__class__.__name__, self.isSuccess())
S
Shuduo Sang 已提交
1566 1567 1568 1569

        self.logDebug("[X] task execution completed, {}, status: {}".format(
            self.__class__.__name__, "Success" if self.isSuccess() else "Failure"))
        # TODO: merge with above.
1570
        self._execStats.incExecCount(self.__class__.__name__, self.isSuccess(), errno2)
S
Steven Li 已提交
1571

1572
    def execSql(self, sql):
1573
        return self._dbManager.execute(sql)
1574

S
Shuduo Sang 已提交
1575
    def execWtSql(self, wt: WorkerThread, sql):  # execute an SQL on the worker thread
1576 1577
        return wt.execSql(sql)

S
Shuduo Sang 已提交
1578
    def queryWtSql(self, wt: WorkerThread, sql):  # execute an SQL on the worker thread
1579 1580
        return wt.querySql(sql)

S
Shuduo Sang 已提交
1581
    def getQueryResult(self, wt: WorkerThread):  # execute an SQL on the worker thread
1582 1583 1584
        return wt.getQueryResult()


1585
class ExecutionStats:
1586
    def __init__(self):
S
Shuduo Sang 已提交
1587 1588
        # total/success times for a task
        self._execTimes: Dict[str, [int, int]] = {}
1589 1590 1591
        self._tasksInProgress = 0
        self._lock = threading.Lock()
        self._firstTaskStartTime = None
1592
        self._execStartTime = None
1593
        self._errors = {}
S
Shuduo Sang 已提交
1594 1595
        self._elapsedTime = 0.0  # total elapsed time
        self._accRunTime = 0.0  # accumulated run time
1596

1597 1598 1599
        self._failed = False
        self._failureReason = None

S
Steven Li 已提交
1600
    def __str__(self):
S
Shuduo Sang 已提交
1601 1602
        return "[ExecStats: _failed={}, _failureReason={}".format(
            self._failed, self._failureReason)
S
Steven Li 已提交
1603 1604

    def isFailed(self):
S
Shuduo Sang 已提交
1605
        return self._failed
S
Steven Li 已提交
1606

1607 1608 1609 1610 1611 1612
    def startExec(self):
        self._execStartTime = time.time()

    def endExec(self):
        self._elapsedTime = time.time() - self._execStartTime

1613
    def incExecCount(self, klassName, isSuccess, eno=None):  # TODO: add a lock here
1614 1615
        if klassName not in self._execTimes:
            self._execTimes[klassName] = [0, 0]
S
Shuduo Sang 已提交
1616 1617
        t = self._execTimes[klassName]  # tuple for the data
        t[0] += 1  # index 0 has the "total" execution times
1618
        if isSuccess:
S
Shuduo Sang 已提交
1619
            t[1] += 1  # index 1 has the "success" execution times
1620 1621 1622 1623 1624
        if eno != None:             
            if klassName not in self._errors:
                self._errors[klassName] = {}
            errors = self._errors[klassName]
            errors[eno] = errors[eno]+1 if eno in errors else 1
1625 1626 1627

    def beginTaskType(self, klassName):
        with self._lock:
S
Shuduo Sang 已提交
1628 1629
            if self._tasksInProgress == 0:  # starting a new round
                self._firstTaskStartTime = time.time()  # I am now the first task
1630 1631 1632 1633 1634
            self._tasksInProgress += 1

    def endTaskType(self, klassName, isSuccess):
        with self._lock:
            self._tasksInProgress -= 1
S
Shuduo Sang 已提交
1635
            if self._tasksInProgress == 0:  # all tasks have stopped
1636 1637 1638
                self._accRunTime += (time.time() - self._firstTaskStartTime)
                self._firstTaskStartTime = None

1639 1640 1641 1642
    def registerFailure(self, reason):
        self._failed = True
        self._failureReason = reason

1643
    def printStats(self):
S
Shuduo Sang 已提交
1644 1645 1646 1647 1648 1649
        logger.info(
            "----------------------------------------------------------------------")
        logger.info(
            "| Crash_Gen test {}, with the following stats:". format(
                "FAILED (reason: {})".format(
                    self._failureReason) if self._failed else "SUCCEEDED"))
1650
        logger.info("| Task Execution Times (success/total):")
1651
        execTimesAny = 0
S
Shuduo Sang 已提交
1652
        for k, n in self._execTimes.items():
1653
            execTimesAny += n[0]
1654 1655 1656 1657 1658 1659 1660 1661
            errStr = None
            if k in self._errors:
                errors = self._errors[k]
                # print("errors = {}".format(errors))
                errStrs = ["0x{:X}:{}".format(eno, n) for (eno, n) in errors.items()]
                # print("error strings = {}".format(errStrs))
                errStr = ", ".join(errStrs) 
            logger.info("|    {0:<24}: {1}/{2} (Errors: {3})".format(k, n[1], n[0], errStr))
S
Shuduo Sang 已提交
1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680

        logger.info(
            "| Total Tasks Executed (success or not): {} ".format(execTimesAny))
        logger.info(
            "| Total Tasks In Progress at End: {}".format(
                self._tasksInProgress))
        logger.info(
            "| Total Task Busy Time (elapsed time when any task is in progress): {:.3f} seconds".format(
                self._accRunTime))
        logger.info(
            "| Average Per-Task Execution Time: {:.3f} seconds".format(self._accRunTime / execTimesAny))
        logger.info(
            "| Total Elapsed Time (from wall clock): {:.3f} seconds".format(
                self._elapsedTime))
        logger.info(
            "| Top numbers written: {}".format(
                TaskExecutor.getBoundedList()))
        logger.info(
            "----------------------------------------------------------------------")
1681 1682 1683


class StateTransitionTask(Task):
1684 1685 1686 1687 1688
    LARGE_NUMBER_OF_TABLES = 35
    SMALL_NUMBER_OF_TABLES = 3
    LARGE_NUMBER_OF_RECORDS = 50
    SMALL_NUMBER_OF_RECORDS = 3

1689
    @classmethod
S
Shuduo Sang 已提交
1690
    def getInfo(cls):  # each sub class should supply their own information
1691 1692
        raise RuntimeError("Overriding method expected")

S
Shuduo Sang 已提交
1693
    _endState = None
1694
    @classmethod
S
Shuduo Sang 已提交
1695
    def getEndState(cls):  # TODO: optimize by calling it fewer times
1696 1697
        raise RuntimeError("Overriding method expected")

1698 1699 1700
    # @classmethod
    # def getBeginStates(cls):
    #     return cls.getInfo()[0]
1701

1702 1703 1704
    # @classmethod
    # def getEndState(cls): # returning the class name
    #     return cls.getInfo()[0]
1705 1706

    @classmethod
1707 1708 1709
    def canBeginFrom(cls, state: AnyState):
        # return state.getValue() in cls.getBeginStates()
        raise RuntimeError("must be overriden")
1710

1711 1712
    @classmethod
    def getRegTableName(cls, i):
1713
        return "reg_table_{}".format(i)
1714

1715 1716
    def execute(self, wt: WorkerThread):
        super().execute(wt)
S
Shuduo Sang 已提交
1717 1718


1719
class TaskCreateDb(StateTransitionTask):
1720
    @classmethod
1721
    def getEndState(cls):
S
Shuduo Sang 已提交
1722
        return StateDbOnly()
1723

1724 1725 1726 1727
    @classmethod
    def canBeginFrom(cls, state: AnyState):
        return state.canCreateDb()

1728
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
S
Shuduo Sang 已提交
1729 1730
        self.execWtSql(wt, "create database db")

1731

1732
class TaskDropDb(StateTransitionTask):
1733
    @classmethod
1734 1735
    def getEndState(cls):
        return StateEmpty()
1736

1737 1738 1739 1740
    @classmethod
    def canBeginFrom(cls, state: AnyState):
        return state.canDropDb()

1741
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
1742
        self.execWtSql(wt, "drop database db")
S
Steven Li 已提交
1743
        logger.debug("[OPS] database dropped at {}".format(time.time()))
1744

S
Shuduo Sang 已提交
1745

1746
class TaskCreateSuperTable(StateTransitionTask):
1747
    @classmethod
1748 1749
    def getEndState(cls):
        return StateSuperTableOnly()
1750

1751 1752
    @classmethod
    def canBeginFrom(cls, state: AnyState):
1753
        return state.canCreateFixedSuperTable()
1754

1755
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
S
Shuduo Sang 已提交
1756
        if not wt.dbInUse():  # no DB yet, to the best of our knowledge
1757 1758 1759
            logger.debug("Skipping task, no DB yet")
            return

1760
        sTable = self._dbManager.getFixedSuperTable()
1761
        # wt.execSql("use db")    # should always be in place
1762 1763
        sTable.create(wt.getDbConn(), {'ts':'timestamp', 'speed':'int'}, {'b':'binary(200)', 'f':'float'})
        # self.execWtSql(wt,"create table db.{} (ts timestamp, speed int) tags (b binary(200), f float) ".format(tblName))
S
Shuduo Sang 已提交
1764 1765
        # No need to create the regular tables, INSERT will do that
        # automatically
1766

S
Steven Li 已提交
1767

1768 1769 1770 1771
class TdSuperTable:
    def __init__(self, stName):
        self._stName = stName

1772 1773 1774 1775 1776 1777 1778 1779
    def create(self, dbc, cols: dict, tags: dict):
        sql = "CREATE TABLE db.{} ({}) TAGS ({})".format(
            self._stName,
            ",".join(['%s %s'%(k,v) for (k,v) in cols.items()]),
            ",".join(['%s %s'%(k,v) for (k,v) in tags.items()])
            )
        dbc.execute(sql)        

1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797
    def getRegTables(self, dbc: DbConn):
        try:
            dbc.query("select TBNAME from db.{}".format(self._stName))  # TODO: analyze result set later            
        except taos.error.ProgrammingError as err:                    
            errno2 = err.errno if (err.errno > 0) else 0x80000000 + err.errno
            logger.debug("[=] Failed to get tables from super table: errno=0x{:X}, msg: {}".format(errno2, err))
            raise

        qr = dbc.getQueryResult()
        return [v[0] for v in qr] # list transformation, ref: https://stackoverflow.com/questions/643823/python-list-transformation

    def hasRegTables(self, dbc: DbConn):
        return dbc.query("SELECT * FROM db.{}".format(self._stName)) > 0

    def ensureTable(self, dbc: DbConn, regTableName: str):
        sql = "select tbname from {} where tbname in ('{}')".format(self._stName, regTableName)
        if dbc.query(sql) >= 1 : # reg table exists already
            return
1798 1799
        sql = "CREATE TABLE {} USING {} tags ({})".format(
            regTableName, self._stName, self._getTagStrForSql(dbc)
1800 1801 1802
        )
        dbc.execute(sql)

1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847
    def _getTagStrForSql(self, dbc) :
        tags = self._getTags(dbc)
        tagStrs = []
        for tagName in tags: 
            tagType = tags[tagName]
            if tagType == 'BINARY':
                tagStrs.append("'Beijing-Shanghai-LosAngeles'")
            elif tagType == 'FLOAT':
                tagStrs.append('9.9')
            elif tagType == 'INT':
                tagStrs.append('88')
            else:
                raise RuntimeError("Unexpected tag type: {}".format(tagType))
        return ", ".join(tagStrs)

    def _getTags(self, dbc) -> dict:
        dbc.query("DESCRIBE {}".format(self._stName))
        stCols = dbc.getQueryResult()
        # print(stCols)
        ret = {row[0]:row[1] for row in stCols if row[3]=='TAG'} # name:type
        # print("Tags retrieved: {}".format(ret))
        return ret

    def addTag(self, dbc, tagName, tagType):
        if tagName in self._getTags(dbc): # already 
            return
        # sTable.addTag("extraTag", "int")
        sql = "alter table db.{} add tag {} {}".format(self._stName, tagName, tagType)
        dbc.execute(sql)

    def dropTag(self, dbc, tagName):
        if not tagName in self._getTags(dbc): # don't have this tag
            return
        sql = "alter table db.{} drop tag {}".format(self._stName, tagName)
        dbc.execute(sql)

    def changeTag(self, dbc, oldTag, newTag):
        tags = self._getTags(dbc)
        if not oldTag in tags: # don't have this tag
            return
        if newTag in tags: # already have this tag
            return
        sql = "alter table db.{} change tag {} {}".format(self._stName, oldTag, newTag)
        dbc.execute(sql)

1848
class TaskReadData(StateTransitionTask):
1849
    @classmethod
1850
    def getEndState(cls):
S
Shuduo Sang 已提交
1851
        return None  # meaning doesn't affect state
1852

1853 1854 1855 1856
    @classmethod
    def canBeginFrom(cls, state: AnyState):
        return state.canReadData()

1857
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
1858
        sTable = self._dbManager.getFixedSuperTable()
1859

S
Shuduo Sang 已提交
1860 1861
        if random.randrange(
                5) == 0:  # 1 in 5 chance, simulate a broken connection. TODO: break connection in all situations
1862 1863
            wt.getDbConn().close()
            wt.getDbConn().open()
1864
        
1865
        for rTbName in sTable.getRegTables(wt.getDbConn()):  # regular tables
1866 1867 1868 1869 1870 1871 1872 1873
            aggExpr = Dice.choice(['*', 'count(*)', 'avg(speed)', 
                # 'twa(speed)', # TODO: this one REQUIRES a where statement, not reasonable
                'sum(speed)', 'stddev(speed)', 
                'min(speed)', 'max(speed)', 'first(speed)', 'last(speed)']) # TODO: add more from 'top'
            try:
                self.execWtSql(wt, "select {} from db.{}".format(aggExpr, rTbName))
            except taos.error.ProgrammingError as err:                    
                errno2 = err.errno if (err.errno > 0) else 0x80000000 + err.errno
1874
                logger.debug("[=] Read Failure: errno=0x{:X}, msg: {}, SQL: {}".format(errno2, err, wt.getDbConn().getLastSql()))
1875
                raise
S
Shuduo Sang 已提交
1876

1877
class TaskDropSuperTable(StateTransitionTask):
1878
    @classmethod
1879
    def getEndState(cls):
S
Shuduo Sang 已提交
1880
        return StateDbOnly()
1881

1882 1883
    @classmethod
    def canBeginFrom(cls, state: AnyState):
1884
        return state.canDropFixedSuperTable()
1885

1886
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
S
Shuduo Sang 已提交
1887 1888 1889 1890 1891 1892 1893
        # 1/2 chance, we'll drop the regular tables one by one, in a randomized
        # sequence
        if Dice.throw(2) == 0:
            tblSeq = list(range(
                2 + (self.LARGE_NUMBER_OF_TABLES if gConfig.larger_data else self.SMALL_NUMBER_OF_TABLES)))
            random.shuffle(tblSeq)
            tickOutput = False  # if we have spitted out a "d" character for "drop regular table"
1894
            isSuccess = True
S
Shuduo Sang 已提交
1895 1896 1897
            for i in tblSeq:
                regTableName = self.getRegTableName(
                    i)  # "db.reg_table_{}".format(i)
1898
                try:
S
Shuduo Sang 已提交
1899 1900 1901 1902 1903 1904 1905
                    self.execWtSql(wt, "drop table {}".format(
                        regTableName))  # nRows always 0, like MySQL
                except taos.error.ProgrammingError as err:
                    # correcting for strange error number scheme
                    errno2 = err.errno if (
                        err.errno > 0) else 0x80000000 + err.errno
                    if (errno2 in [0x362]):  # mnode invalid table name
1906
                        isSuccess = False
S
Shuduo Sang 已提交
1907 1908 1909
                        logger.debug(
                            "[DB] Acceptable error when dropping a table")
                    continue  # try to delete next regular table
1910 1911

                if (not tickOutput):
S
Shuduo Sang 已提交
1912 1913
                    tickOutput = True  # Print only one time
                    if isSuccess:
1914 1915
                        print("d", end="", flush=True)
                    else:
S
Shuduo Sang 已提交
1916
                        print("f", end="", flush=True)
1917 1918

        # Drop the super table itself
S
Shuduo Sang 已提交
1919
        tblName = self._dbManager.getFixedSuperTableName()
1920
        self.execWtSql(wt, "drop table db.{}".format(tblName))
1921

S
Shuduo Sang 已提交
1922

1923 1924 1925
class TaskAlterTags(StateTransitionTask):
    @classmethod
    def getEndState(cls):
S
Shuduo Sang 已提交
1926
        return None  # meaning doesn't affect state
1927 1928 1929

    @classmethod
    def canBeginFrom(cls, state: AnyState):
S
Shuduo Sang 已提交
1930
        return state.canDropFixedSuperTable()  # if we can drop it, we can alter tags
1931 1932

    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
1933 1934 1935
        # tblName = self._dbManager.getFixedSuperTableName()
        dbc = wt.getDbConn()
        sTable = self._dbManager.getFixedSuperTable()
1936
        dice = Dice.throw(4)
S
Shuduo Sang 已提交
1937
        if dice == 0:
1938 1939
            sTable.addTag(dbc, "extraTag", "int")
            # sql = "alter table db.{} add tag extraTag int".format(tblName)
S
Shuduo Sang 已提交
1940
        elif dice == 1:
1941 1942
            sTable.dropTag(dbc, "extraTag")
            # sql = "alter table db.{} drop tag extraTag".format(tblName)
S
Shuduo Sang 已提交
1943
        elif dice == 2:
1944 1945
            sTable.dropTag(dbc, "newTag")
            # sql = "alter table db.{} drop tag newTag".format(tblName)
S
Shuduo Sang 已提交
1946
        else:  # dice == 3
1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963
            sTable.changeTag(dbc, "extraTag", "newTag")
            # sql = "alter table db.{} change tag extraTag newTag".format(tblName)

class TaskRestartService(StateTransitionTask):
    _isRunning = False
    _classLock = threading.Lock()

    @classmethod
    def getEndState(cls):
        return None  # meaning doesn't affect state

    @classmethod
    def canBeginFrom(cls, state: AnyState):
        if gConfig.auto_start_service:
            return state.canDropFixedSuperTable()  # Basicallly when we have the super table
        return False # don't run this otherwise

1964
    CHANCE_TO_RESTART_SERVICE = 100
1965 1966 1967 1968
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
        if not gConfig.auto_start_service: # only execute when we are in -a mode
            print("_a", end="", flush=True)
            return
1969

1970 1971 1972 1973 1974 1975
        with self._classLock:
            if self._isRunning:
                print("Skipping restart task, another running already")
                return
            self._isRunning = True

1976
        if Dice.throw(self.CHANCE_TO_RESTART_SERVICE) == 0: # 1 in N chance
1977 1978 1979
            dbc = wt.getDbConn()
            dbc.execute("show databases") # simple delay, align timing with other workers
            gSvcMgr.restart()
1980

1981
        self._isRunning = False
S
Shuduo Sang 已提交
1982

1983
class TaskAddData(StateTransitionTask):
S
Shuduo Sang 已提交
1984 1985
    # Track which table is being actively worked on
    activeTable: Set[int] = set()
1986

S
Shuduo Sang 已提交
1987 1988
    # We use these two files to record operations to DB, useful for power-off
    # tests
1989 1990 1991 1992 1993
    fAddLogReady = None
    fAddLogDone = None

    @classmethod
    def prepToRecordOps(cls):
S
Shuduo Sang 已提交
1994 1995 1996 1997
        if gConfig.record_ops:
            if (cls.fAddLogReady is None):
                logger.info(
                    "Recording in a file operations to be performed...")
1998
                cls.fAddLogReady = open("add_log_ready.txt", "w")
S
Shuduo Sang 已提交
1999
            if (cls.fAddLogDone is None):
2000 2001
                logger.info("Recording in a file operations completed...")
                cls.fAddLogDone = open("add_log_done.txt", "w")
2002

2003
    @classmethod
2004 2005
    def getEndState(cls):
        return StateHasData()
2006 2007 2008 2009

    @classmethod
    def canBeginFrom(cls, state: AnyState):
        return state.canAddData()
S
Shuduo Sang 已提交
2010

2011
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
2012
        ds = self._dbManager # Quite DANGEROUS here, may result in multi-thread client access
2013
        tblSeq = list(range(
S
Shuduo Sang 已提交
2014 2015 2016 2017
                self.LARGE_NUMBER_OF_TABLES if gConfig.larger_data else self.SMALL_NUMBER_OF_TABLES))
        random.shuffle(tblSeq)
        for i in tblSeq:
            if (i in self.activeTable):  # wow already active
2018
                print("x", end="", flush=True) # concurrent insertion
2019
            else:
S
Shuduo Sang 已提交
2020
                self.activeTable.add(i)  # marking it active
2021 2022 2023
            
            sTable = ds.getFixedSuperTable()
            regTableName = self.getRegTableName(i)  # "db.reg_table_{}".format(i)
2024
            sTable.ensureTable(wt.getDbConn(), regTableName)  # Ensure the table exists           
2025 2026
           
            for j in range(self.LARGE_NUMBER_OF_RECORDS if gConfig.larger_data else self.SMALL_NUMBER_OF_RECORDS):  # number of records per table
S
Shuduo Sang 已提交
2027
                nextInt = ds.getNextInt()
2028 2029
                if gConfig.record_ops:
                    self.prepToRecordOps()
2030
                    self.fAddLogReady.write("Ready to write {} to {}\n".format(nextInt, regTableName))
2031 2032
                    self.fAddLogReady.flush()
                    os.fsync(self.fAddLogReady)
2033
                sql = "insert into {} values ('{}', {});".format( # removed: tags ('{}', {})
S
Shuduo Sang 已提交
2034
                    regTableName,
2035 2036
                    # ds.getFixedSuperTableName(),
                    # ds.getNextBinary(), ds.getNextFloat(),
2037
                    ds.getNextTick(), nextInt)
S
Shuduo Sang 已提交
2038 2039 2040
                self.execWtSql(wt, sql)
                # Successfully wrote the data into the DB, let's record it
                # somehow
2041
                te.recordDataMark(nextInt)
2042
                if gConfig.record_ops:
S
Shuduo Sang 已提交
2043 2044 2045
                    self.fAddLogDone.write(
                        "Wrote {} to {}\n".format(
                            nextInt, regTableName))
2046 2047
                    self.fAddLogDone.flush()
                    os.fsync(self.fAddLogDone)
S
Shuduo Sang 已提交
2048
            self.activeTable.discard(i)  # not raising an error, unlike remove
2049 2050


S
Steven Li 已提交
2051 2052
# Deterministic random number generator
class Dice():
S
Shuduo Sang 已提交
2053
    seeded = False  # static, uninitialized
S
Steven Li 已提交
2054 2055

    @classmethod
S
Shuduo Sang 已提交
2056
    def seed(cls, s):  # static
S
Steven Li 已提交
2057
        if (cls.seeded):
S
Shuduo Sang 已提交
2058 2059
            raise RuntimeError(
                "Cannot seed the random generator more than once")
S
Steven Li 已提交
2060 2061 2062 2063 2064
        cls.verifyRNG()
        random.seed(s)
        cls.seeded = True  # TODO: protect against multi-threading

    @classmethod
S
Shuduo Sang 已提交
2065
    def verifyRNG(cls):  # Verify that the RNG is determinstic
S
Steven Li 已提交
2066 2067 2068 2069
        random.seed(0)
        x1 = random.randrange(0, 1000)
        x2 = random.randrange(0, 1000)
        x3 = random.randrange(0, 1000)
S
Shuduo Sang 已提交
2070
        if (x1 != 864 or x2 != 394 or x3 != 776):
S
Steven Li 已提交
2071 2072 2073
            raise RuntimeError("System RNG is not deterministic")

    @classmethod
S
Shuduo Sang 已提交
2074
    def throw(cls, stop):  # get 0 to stop-1
2075
        return cls.throwRange(0, stop)
S
Steven Li 已提交
2076 2077

    @classmethod
S
Shuduo Sang 已提交
2078 2079
    def throwRange(cls, start, stop):  # up to stop-1
        if (not cls.seeded):
S
Steven Li 已提交
2080
            raise RuntimeError("Cannot throw dice before seeding it")
2081
        return random.randrange(start, stop)
S
Steven Li 已提交
2082

2083 2084 2085 2086
    @classmethod
    def choice(cls, cList):
        return random.choice(cList)

S
Steven Li 已提交
2087

S
Steven Li 已提交
2088 2089
class LoggingFilter(logging.Filter):
    def filter(self, record: logging.LogRecord):
S
Shuduo Sang 已提交
2090 2091
        if (record.levelno >= logging.INFO):
            return True  # info or above always log
S
Steven Li 已提交
2092

S
Steven Li 已提交
2093 2094 2095 2096
        # Commenting out below to adjust...

        # if msg.startswith("[TRD]"):
        #     return False
S
Steven Li 已提交
2097 2098
        return True

S
Shuduo Sang 已提交
2099 2100

class MyLoggingAdapter(logging.LoggerAdapter):
2101 2102 2103 2104
    def process(self, msg, kwargs):
        return "[{}]{}".format(threading.get_ident() % 10000, msg), kwargs
        # return '[%s] %s' % (self.extra['connid'], msg), kwargs

S
Shuduo Sang 已提交
2105 2106

class SvcManager:
2107
    def __init__(self):
2108
        print("Starting TDengine Service Manager")
2109 2110 2111
        # signal.signal(signal.SIGTERM, self.sigIntHandler) # Moved to MainExec
        # signal.signal(signal.SIGINT, self.sigIntHandler)
        # signal.signal(signal.SIGUSR1, self.sigUsrHandler)  # different handler!
2112

2113
        self.inSigHandler = False
2114 2115
        # self._status = MainExec.STATUS_RUNNING # set inside
        # _startTaosService()
2116
        self.svcMgrThread = None
2117 2118
        self._lock = threading.Lock()
        self._isRestarting = False
2119

2120 2121 2122 2123 2124 2125 2126 2127
    def _doMenu(self):
        choice = ""
        while True:
            print("\nInterrupting Service Program, Choose an Action: ")
            print("1: Resume")
            print("2: Terminate")
            print("3: Restart")
            # Remember to update the if range below
S
Shuduo Sang 已提交
2128
            # print("Enter Choice: ", end="", flush=True)
2129 2130 2131
            while choice == "":
                choice = input("Enter Choice: ")
                if choice != "":
S
Shuduo Sang 已提交
2132 2133 2134
                    break  # done with reading repeated input
            if choice in ["1", "2", "3"]:
                break  # we are done with whole method
2135
            print("Invalid choice, please try again.")
S
Shuduo Sang 已提交
2136
            choice = ""  # reset
2137 2138
        return choice

S
Shuduo Sang 已提交
2139
    def sigUsrHandler(self, signalNumber, frame):
2140
        print("Interrupting main thread execution upon SIGUSR1")
2141
        if self.inSigHandler:  # already
2142
            print("Ignoring repeated SIG...")
S
Shuduo Sang 已提交
2143
            return  # do nothing if it's already not running
2144
        self.inSigHandler = True
2145 2146

        choice = self._doMenu()
S
Shuduo Sang 已提交
2147 2148 2149 2150 2151
        if choice == "1":
            # TODO: can the sub-process be blocked due to us not reading from
            # queue?
            self.sigHandlerResume()
        elif choice == "2":
2152
            self.stopTaosService()
2153 2154
        elif choice == "3": # Restart
            self.restart()
2155 2156
        else:
            raise RuntimeError("Invalid menu choice: {}".format(choice))
2157

2158 2159
        self.inSigHandler = False

2160
    def sigIntHandler(self, signalNumber, frame):
2161
        print("SvcManager: INT Signal Handler starting...")
2162
        if self.inSigHandler:
2163 2164
            print("Ignoring repeated SIG_INT...")
            return
2165
        self.inSigHandler = True
2166

S
Shuduo Sang 已提交
2167
        self.stopTaosService()
2168
        print("SvcManager: INT Signal Handler returning...")
2169
        self.inSigHandler = False
2170

S
Shuduo Sang 已提交
2171
    def sigHandlerResume(self):
2172
        print("Resuming TDengine service manager thread (main thread)...\n\n")
2173

2174
    def _checkServiceManagerThread(self):
2175 2176 2177 2178
        if self.svcMgrThread:  # valid svc mgr thread
            if self.svcMgrThread.isStopped():  # done?
                self.svcMgrThread.procIpcBatch()  # one last time. TODO: appropriate?
                self.svcMgrThread = None  # no more
2179 2180

    def _procIpcAll(self):
2181 2182 2183 2184 2185 2186
        while self.isRunning() or self.isRestarting() :  # for as long as the svc mgr thread is still here
            if self.isRunning():
                self.svcMgrThread.procIpcBatch()  # regular processing,
                self._checkServiceManagerThread()
            elif self.isRetarting():
                print("Service restarting...")
2187 2188 2189 2190 2191
            time.sleep(0.5)  # pause, before next round
        print(
            "Service Manager Thread (with subprocess) has ended, main thread now exiting...")

    def startTaosService(self):
2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205
        with self._lock:
            if self.svcMgrThread:
                raise RuntimeError("Cannot start TAOS service when one may already be running")

            # Find if there's already a taosd service, and then kill it
            for proc in psutil.process_iter():
                if proc.name() == 'taosd':
                    print("Killing an existing TAOSD process in 2 seconds... press CTRL-C to interrupe")
                    time.sleep(2.0)
                    proc.kill()
                # print("Process: {}".format(proc.name()))
            
            self.svcMgrThread = ServiceManagerThread()  # create the object
            self.svcMgrThread.start()
S
Steven Li 已提交
2206
            print("Attempting to start TAOS service started, printing out output...")
2207 2208 2209 2210
            self.svcMgrThread.procIpcBatch(
                trimToTarget=10,
                forceOutput=True)  # for printing 10 lines
            print("TAOS service started")
2211 2212

    def stopTaosService(self, outputLines=20):
2213 2214 2215 2216
        with self._lock:
            if not self.isRunning():
                logger.warning("Cannot stop TAOS service, not running")
                return
2217

2218 2219 2220 2221 2222 2223 2224 2225 2226
            print("Terminating Service Manager Thread (SMT) execution...")
            self.svcMgrThread.stop()
            if self.svcMgrThread.isStopped():
                self.svcMgrThread.procIpcBatch(outputLines)  # one last time
                self.svcMgrThread = None
                print("----- End of TDengine Service Output -----\n")
                print("SMT execution terminated")
            else:
                print("WARNING: SMT did not terminate as expected")
2227 2228 2229

    def run(self):
        self.startTaosService()
2230
        self._procIpcAll()  # pump/process all the messages, may encounter SIG + restart
2231
        if self.isRunning():  # if sig handler hasn't destroyed it by now
2232 2233
            self.stopTaosService()  # should have started already

2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247
    def restart(self):
        if self._isRestarting:
            logger.warning("Cannot restart service when it's already restarting")
            return

        self._isRestarting = True
        if self.isRunning():
            self.stopTaosService()
        else:
            logger.warning("Service not running when restart requested")

        self.startTaosService()
        self._isRestarting = False

2248 2249
    def isRunning(self):
        return self.svcMgrThread != None
2250

2251 2252 2253
    def isRestarting(self):
        return self._isRestarting

2254 2255 2256 2257 2258
class ServiceManagerThread:
    MAX_QUEUE_SIZE = 10000

    def __init__(self):
        self._tdeSubProcess = None
2259
        self._thread = None
2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276
        self._status = None

    def getStatus(self):
        return self._status

    def isRunning(self):
        # return self._thread and self._thread.is_alive()
        return self._status == MainExec.STATUS_RUNNING

    def isStopping(self):
        return self._status == MainExec.STATUS_STOPPING

    def isStopped(self):
        return self._status == MainExec.STATUS_STOPPED

    # Start the thread (with sub process), and wait for the sub service
    # to become fully operational
2277 2278
    def start(self):
        if self._thread:
2279
            raise RuntimeError("Unexpected _thread")
2280
        if self._tdeSubProcess:
2281 2282 2283 2284
            raise RuntimeError("TDengine sub process already created/running")

        self._status = MainExec.STATUS_STARTING

2285
        self._tdeSubProcess = TdeSubProcess()
2286 2287 2288 2289
        self._tdeSubProcess.start()

        self._ipcQueue = Queue()
        self._thread = threading.Thread(
2290
            target=self.svcOutputReader,
2291
            args=(self._tdeSubProcess.getStdOut(), self._ipcQueue))
2292
        self._thread.daemon = True  # thread dies with the program
2293 2294 2295
        self._thread.start()

        # wait for service to start
2296
        for i in range(0, 10):
2297 2298 2299
            time.sleep(1.0)
            # self.procIpcBatch() # don't pump message during start up
            print("_zz_", end="", flush=True)
2300
            if self._status == MainExec.STATUS_RUNNING:
2301
                logger.info("[] TDengine service READY to process requests")
2302 2303
                return  # now we've started
        # TODO: handle this better?
2304
        self.procIpcBatch(20, True) # display output before cronking out, trim to last 20 msgs, force output
2305
        raise RuntimeError("TDengine service did not start successfully")
2306 2307 2308 2309 2310 2311

    def stop(self):
        # can be called from both main thread or signal handler
        print("Terminating TDengine service running as the sub process...")
        if self.isStopped():
            print("Service already stopped")
2312
            return
2313 2314 2315
        if self.isStopping():
            print("Service is already being stopped")
            return
2316 2317 2318 2319
        # Linux will send Control-C generated SIGINT to the TDengine process
        # already, ref:
        # https://unix.stackexchange.com/questions/176235/fork-and-how-signals-are-delivered-to-processes
        if not self._tdeSubProcess:
2320
            raise RuntimeError("sub process object missing")
2321

2322 2323 2324
        self._status = MainExec.STATUS_STOPPING
        self._tdeSubProcess.stop()

2325 2326 2327 2328
        if self._tdeSubProcess.isRunning():  # still running
            print(
                "FAILED to stop sub process, it is still running... pid = {}".format(
                    self.subProcess.pid))
2329
        else:
2330 2331 2332
            self._tdeSubProcess = None  # not running any more
            self.join()  # stop the thread, change the status, etc.

2333 2334 2335
    def join(self):
        # TODO: sanity check
        if not self.isStopping():
2336 2337 2338
            raise RuntimeError(
                "Unexpected status when ending svc mgr thread: {}".format(
                    self._status))
2339

2340
        if self._thread:
2341
            self._thread.join()
2342
            self._thread = None
2343
            self._status = MainExec.STATUS_STOPPED
S
Shuduo Sang 已提交
2344
        else:
2345
            print("Joining empty thread, doing nothing")
2346 2347 2348

    def _trimQueue(self, targetSize):
        if targetSize <= 0:
2349
            return  # do nothing
2350
        q = self._ipcQueue
2351
        if (q.qsize() <= targetSize):  # no need to trim
2352 2353 2354 2355
            return

        logger.debug("Triming IPC queue to target size: {}".format(targetSize))
        itemsToTrim = q.qsize() - targetSize
2356
        for i in range(0, itemsToTrim):
2357 2358 2359
            try:
                q.get_nowait()
            except Empty:
2360 2361
                break  # break out of for loop, no more trimming

2362
    TD_READY_MSG = "TDengine is initialized successfully"
S
Shuduo Sang 已提交
2363

2364 2365
    def procIpcBatch(self, trimToTarget=0, forceOutput=False):
        self._trimQueue(trimToTarget)  # trim if necessary
S
Shuduo Sang 已提交
2366 2367
        # Process all the output generated by the underlying sub process,
        # managed by IO thread
2368
        print("<", end="", flush=True)
S
Shuduo Sang 已提交
2369 2370
        while True:
            try:
2371
                line = self._ipcQueue.get_nowait()  # getting output at fast speed
2372
                self._printProgress("_o")
2373 2374 2375
            except Empty:
                # time.sleep(2.3) # wait only if there's no output
                # no more output
2376
                print(".>", end="", flush=True)
S
Shuduo Sang 已提交
2377
                return  # we are done with THIS BATCH
2378
            else:  # got line, printing out
2379 2380 2381 2382 2383 2384 2385
                if forceOutput:
                    logger.info(line)
                else:
                    logger.debug(line)
        print(">", end="", flush=True)

    _ProgressBars = ["--", "//", "||", "\\\\"]
2386

2387 2388
    def _printProgress(self, msg):  # TODO: assuming 2 chars
        print(msg, end="", flush=True)
2389 2390 2391
        pBar = self._ProgressBars[Dice.throw(4)]
        print(pBar, end="", flush=True)
        print('\b\b\b\b', end="", flush=True)
2392

2393 2394 2395
    def svcOutputReader(self, out: IO, queue):
        # Important Reference: https://stackoverflow.com/questions/375427/non-blocking-read-on-a-subprocess-pipe-in-python
        # print("This is the svcOutput Reader...")
2396
        # for line in out :
2397 2398 2399 2400
        for line in iter(out.readline, b''):
            # print("Finished reading a line: {}".format(line))
            # print("Adding item to queue...")
            line = line.decode("utf-8").rstrip()
2401 2402
            # This might block, and then causing "out" buffer to block
            queue.put(line)
2403 2404
            self._printProgress("_i")

2405 2406
            if self._status == MainExec.STATUS_STARTING:  # we are starting, let's see if we have started
                if line.find(self.TD_READY_MSG) != -1:  # found
S
Steven Li 已提交
2407 2408 2409 2410
                    logger.info("Waiting for the service to become FULLY READY")
                    time.sleep(1.0) # wait for the server to truly start. TODO: remove this
                    logger.info("Service is now FULLY READY")   
                    self._status = MainExec.STATUS_RUNNING                 
2411 2412

            # Trim the queue if necessary: TODO: try this 1 out of 10 times
2413
            self._trimQueue(self.MAX_QUEUE_SIZE * 9 // 10)  # trim to 90% size
2414

2415 2416 2417
            if self.isStopping():  # TODO: use thread status instead
                # WAITING for stopping sub process to finish its outptu
                print("_w", end="", flush=True)
2418 2419

            # queue.put(line)
2420 2421
        # meaning sub process must have died
        print("\nNo more output from IO thread managing TDengine service")
2422 2423
        out.close()

2424 2425

class TdeSubProcess:
2426 2427 2428 2429 2430
    def __init__(self):
        self.subProcess = None

    def getStdOut(self):
        return self.subProcess.stdout
2431

2432
    def isRunning(self):
2433
        return self.subProcess is not None
2434

S
Shuduo Sang 已提交
2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448
    def getBuildPath(self):
        selfPath = os.path.dirname(os.path.realpath(__file__))
        if ("community" in selfPath):
            projPath = selfPath[:selfPath.find("communit")]
        else:
            projPath = selfPath[:selfPath.find("tests")]

        for root, dirs, files in os.walk(projPath):
            if ("taosd" in files):
                rootRealPath = os.path.dirname(os.path.realpath(root))
                if ("packaging" not in rootRealPath):
                    buildPath = root[:len(root) - len("/build/bin")]
                    break
        return buildPath
2449

2450
    def start(self):
2451
        ON_POSIX = 'posix' in sys.builtin_module_names
S
Shuduo Sang 已提交
2452

2453 2454 2455
        taosdPath = self.getBuildPath() + "/build/bin/taosd"
        cfgPath = self.getBuildPath() + "/test/cfg"

2456 2457 2458
        # Delete the log files
        logPath = self.getBuildPath() + "/test/log"
        # ref: https://stackoverflow.com/questions/1995373/deleting-all-files-in-a-directory-with-python/1995397
2459 2460 2461 2462
        # filelist = [ f for f in os.listdir(logPath) ] # if f.endswith(".bak") ]
        # for f in filelist:
        #     filePath = os.path.join(logPath, f)
        #     print("Removing log file: {}".format(filePath))
2463 2464 2465 2466 2467 2468
        #     os.remove(filePath)        
        if os.path.exists(logPath):
            logPathSaved = logPath + "_" + time.strftime('%Y-%m-%d-%H-%M-%S')
            logger.info("Saving old log files to: {}".format(logPathSaved))
            os.rename(logPath, logPathSaved)
        # os.mkdir(logPath) # recreate, no need actually, TDengine will auto-create with proper perms
2469
            
2470

S
Shuduo Sang 已提交
2471
        svcCmd = [taosdPath, '-c', cfgPath]
2472
        # svcCmd = ['vmstat', '1']
S
Shuduo Sang 已提交
2473
        if self.subProcess:  # already there
2474 2475 2476
            raise RuntimeError("Corrupt process state")

        self.subProcess = subprocess.Popen(
S
Shuduo Sang 已提交
2477 2478
            svcCmd,
            stdout=subprocess.PIPE,
2479
            # bufsize=1, # not supported in binary mode
S
Shuduo Sang 已提交
2480
            close_fds=ON_POSIX)  # had text=True, which interferred with reading EOF
2481

2482 2483 2484
    def stop(self):
        if not self.subProcess:
            print("Sub process already stopped")
2485 2486 2487
            return

        retCode = self.subProcess.poll()
S
Shuduo Sang 已提交
2488
        if retCode:  # valid return code, process ended
2489
            self.subProcess = None
S
Shuduo Sang 已提交
2490 2491
        else:  # process still alive, let's interrupt it
            print(
2492
                "Sub process is running, sending SIG_INT and waiting for it to terminate...")
S
Shuduo Sang 已提交
2493 2494 2495 2496
            # sub process should end, then IPC queue should end, causing IO
            # thread to end
            self.subProcess.send_signal(signal.SIGINT)
            try:
2497 2498 2499 2500
                self.subProcess.wait(10)
            except subprocess.TimeoutExpired as err:
                print("Time out waiting for TDengine service process to exit")
            else:
2501
                print("TDengine service process terminated successfully from SIG_INT")
2502 2503
                self.subProcess = None

2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531
class ThreadStacks: # stack info for all threads
    def __init__(self):
        self._allStacks = {}
        allFrames = sys._current_frames()
        for th in threading.enumerate():                        
            stack = traceback.extract_stack(allFrames[th.ident])     
            self._allStacks[th.native_id] = stack

    def print(self, filteredEndName = None, filterInternal = False):
        for thNid, stack in self._allStacks.items(): # for each thread            
            lastFrame = stack[-1]
            if filteredEndName: # we need to filter out stacks that match this name                
                if lastFrame.name == filteredEndName : # end did not match
                    continue
            if filterInternal:
                if lastFrame.name in ['wait', 'invoke_excepthook', 
                    '_wait', # The Barrier exception
                    'svcOutputReader', # the svcMgr thread
                    '__init__']: # the thread that extracted the stack
                    continue # ignore
            # Now print
            print("\n<----- Thread Info for ID: {}".format(thNid))
            for frame in stack:
                # print(frame)
                print("File {filename}, line {lineno}, in {name}".format(
                    filename=frame.filename, lineno=frame.lineno, name=frame.name))
                print("    {}".format(frame.line))
            print("-----> End of Thread Info\n")
S
Shuduo Sang 已提交
2532

2533 2534 2535
class ClientManager:
    def __init__(self):
        print("Starting service manager")
2536 2537
        # signal.signal(signal.SIGTERM, self.sigIntHandler)
        # signal.signal(signal.SIGINT, self.sigIntHandler)
2538

2539
        self._status = MainExec.STATUS_RUNNING
2540 2541
        self.tc = None

2542 2543
        self.inSigHandler = False

2544
    def sigIntHandler(self, signalNumber, frame):
2545
        if self._status != MainExec.STATUS_RUNNING:
2546 2547 2548
            print("Repeated SIGINT received, forced exit...")
            # return  # do nothing if it's already not running
            sys.exit(-1)
2549
        self._status = MainExec.STATUS_STOPPING  # immediately set our status
2550

2551
        print("ClientManager: Terminating program...")
2552 2553
        self.tc.requestToStop()

2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594
    def _doMenu(self):
        choice = ""
        while True:
            print("\nInterrupting Client Program, Choose an Action: ")
            print("1: Resume")
            print("2: Terminate")
            print("3: Show Threads")
            # Remember to update the if range below
            # print("Enter Choice: ", end="", flush=True)
            while choice == "":
                choice = input("Enter Choice: ")
                if choice != "":
                    break  # done with reading repeated input
            if choice in ["1", "2", "3"]:
                break  # we are done with whole method
            print("Invalid choice, please try again.")
            choice = ""  # reset
        return choice

    def sigUsrHandler(self, signalNumber, frame):
        print("Interrupting main thread execution upon SIGUSR1")
        if self.inSigHandler:  # already
            print("Ignoring repeated SIG_USR1...")
            return  # do nothing if it's already not running
        self.inSigHandler = True

        choice = self._doMenu()
        if choice == "1":
            print("Resuming execution...")
            time.sleep(1.0)
        elif choice == "2":
            print("Not implemented yet")
            time.sleep(1.0)
        elif choice == "3":
            ts = ThreadStacks()
            ts.print()
        else:
            raise RuntimeError("Invalid menu choice: {}".format(choice))

        self.inSigHandler = False

S
Shuduo Sang 已提交
2595
    def _printLastNumbers(self):  # to verify data durability
2596 2597
        dbManager = DbManager(resetDb=False)
        dbc = dbManager.getDbConn()
S
Shuduo Sang 已提交
2598
        if dbc.query("show databases") == 0:  # no databae
2599
            return
S
Shuduo Sang 已提交
2600
        if dbc.query("show tables") == 0:  # no tables
S
Steven Li 已提交
2601
            return
2602 2603

        dbc.execute("use db")
S
Shuduo Sang 已提交
2604
        sTbName = dbManager.getFixedSuperTableName()
2605 2606

        # get all regular tables
S
Shuduo Sang 已提交
2607 2608
        # TODO: analyze result set later
        dbc.query("select TBNAME from db.{}".format(sTbName))
2609 2610 2611
        rTables = dbc.getQueryResult()

        bList = TaskExecutor.BoundedList()
S
Shuduo Sang 已提交
2612
        for rTbName in rTables:  # regular tables
2613 2614
            dbc.query("select speed from db.{}".format(rTbName[0]))
            numbers = dbc.getQueryResult()
S
Shuduo Sang 已提交
2615
            for row in numbers:
2616 2617 2618 2619 2620 2621
                # print("<{}>".format(n), end="", flush=True)
                bList.add(row[0])

        print("Top numbers in DB right now: {}".format(bList))
        print("TDengine client execution is about to start in 2 seconds...")
        time.sleep(2.0)
S
Shuduo Sang 已提交
2622
        dbManager = None  # release?
2623 2624 2625 2626

    def prepare(self):
        self._printLastNumbers()

2627
    def run(self, svcMgr):    
2628 2629
        self._printLastNumbers()

S
Shuduo Sang 已提交
2630
        dbManager = DbManager()  # Regular function
2631
        thPool = ThreadPool(gConfig.num_threads, gConfig.max_steps)
2632
        self.tc = ThreadCoordinator(thPool, dbManager)
2633
        
2634
        self.tc.run()
S
Steven Li 已提交
2635 2636
        # print("exec stats: {}".format(self.tc.getExecStats()))
        # print("TC failed = {}".format(self.tc.isFailed()))
2637
        if svcMgr: # gConfig.auto_start_service:
2638
            svcMgr.stopTaosService()
2639 2640
        # Print exec status, etc., AFTER showing messages from the server
        self.conclude()
S
Steven Li 已提交
2641
        # print("TC failed (2) = {}".format(self.tc.isFailed()))
S
Shuduo Sang 已提交
2642 2643
        # Linux return code: ref https://shapeshed.com/unix-exit-codes/
        return 1 if self.tc.isFailed() else 0
2644 2645 2646

    def conclude(self):
        self.tc.printStats()
S
Shuduo Sang 已提交
2647
        self.tc.getDbManager().cleanUp()
2648 2649

class MainExec:
2650 2651 2652
    STATUS_STARTING = 1
    STATUS_RUNNING = 2
    STATUS_STOPPING = 3
2653
    STATUS_STOPPED = 4
2654

2655 2656 2657
    def __init__(self):        
        self._clientMgr = None
        self._svcMgr = None
2658

2659 2660 2661
        signal.signal(signal.SIGTERM, self.sigIntHandler)
        signal.signal(signal.SIGINT,  self.sigIntHandler)
        signal.signal(signal.SIGUSR1, self.sigUsrHandler)  # different handler!
2662

2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675
    def sigUsrHandler(self, signalNumber, frame):
        if self._clientMgr:
            self._clientMgr.sigUsrHandler(signalNumber, frame)
        elif self._svcMgr: # Only if no client mgr, we are running alone
            self._svcMgr.sigUsrHandler(signalNumber, frame)
        
    def sigIntHandler(self, signalNumber, frame):
        if self._svcMgr:
            self._svcMgr.sigIntHandler(signalNumber, frame)
        if self._clientMgr:
            self._clientMgr.sigIntHandler(signalNumber, frame)

    def runClient(self):
2676
        global gSvcMgr
2677 2678
        if gConfig.auto_start_service:
            self._svcMgr = SvcManager()
2679
            gSvcMgr = self._svcMgr # hack alert
2680 2681 2682
            self._svcMgr.startTaosService() # we start, don't run
        
        self._clientMgr = ClientManager()
2683 2684 2685 2686 2687 2688 2689
        ret = None
        try: 
            ret = self._clientMgr.run(self._svcMgr) # stop TAOS service inside
        except requests.exceptions.ConnectionError as err:
            logger.warning("Failed to open REST connection to DB")
            # don't raise
        return ret
2690 2691

    def runService(self):
2692
        global gSvcMgr
2693
        self._svcMgr = SvcManager()
2694 2695
        gSvcMgr = self._svcMgr # save it in a global variable TODO: hack alert

2696
        self._svcMgr.run() # run to some end state
2697 2698
        self._svcMgr = None 
        gSvcMgr = None        
2699 2700

    def runTemp(self):  # for debugging purposes
2701 2702
        # # Hack to exercise reading from disk, imcreasing coverage. TODO: fix
        # dbc = dbState.getDbConn()
S
Shuduo Sang 已提交
2703
        # sTbName = dbState.getFixedSuperTableName()
2704 2705
        # dbc.execute("create database if not exists db")
        # if not dbState.getState().equals(StateEmpty()):
S
Shuduo Sang 已提交
2706
        #     dbc.execute("use db")
2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717

        # rTables = None
        # try: # the super table may not exist
        #     sql = "select TBNAME from db.{}".format(sTbName)
        #     logger.info("Finding out tables in super table: {}".format(sql))
        #     dbc.query(sql) # TODO: analyze result set later
        #     logger.info("Fetching result")
        #     rTables = dbc.getQueryResult()
        #     logger.info("Result: {}".format(rTables))
        # except taos.error.ProgrammingError as err:
        #     logger.info("Initial Super table OPS error: {}".format(err))
S
Shuduo Sang 已提交
2718

2719 2720 2721 2722 2723 2724 2725 2726
        # # sys.exit()
        # if ( not rTables == None):
        #     # print("rTables[0] = {}, type = {}".format(rTables[0], type(rTables[0])))
        #     try:
        #         for rTbName in rTables : # regular tables
        #             ds = dbState
        #             logger.info("Inserting into table: {}".format(rTbName[0]))
        #             sql = "insert into db.{} values ('{}', {});".format(
S
Shuduo Sang 已提交
2727
        #                 rTbName[0],
2728 2729
        #                 ds.getNextTick(), ds.getNextInt())
        #             dbc.execute(sql)
S
Shuduo Sang 已提交
2730
        #         for rTbName in rTables : # regular tables
2731
        #             dbc.query("select * from db.{}".format(rTbName[0])) # TODO: check success failure
S
Shuduo Sang 已提交
2732
        #         logger.info("Initial READING operation is successful")
2733
        #     except taos.error.ProgrammingError as err:
S
Shuduo Sang 已提交
2734 2735
        #         logger.info("Initial WRITE/READ error: {}".format(err))

2736 2737 2738
        # Sandbox testing code
        # dbc = dbState.getDbConn()
        # while True:
S
Shuduo Sang 已提交
2739
        #     rows = dbc.query("show databases")
2740
        #     print("Rows: {}, time={}".format(rows, time.time()))
S
Shuduo Sang 已提交
2741 2742
        return

S
Steven Li 已提交
2743

2744
def main():
S
Shuduo Sang 已提交
2745 2746
    # Super cool Python argument library:
    # https://docs.python.org/3/library/argparse.html
2747 2748 2749 2750 2751 2752 2753 2754 2755
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description=textwrap.dedent('''\
            TDengine Auto Crash Generator (PLEASE NOTICE the Prerequisites Below)
            ---------------------------------------------------------------------
            1. You build TDengine in the top level ./build directory, as described in offical docs
            2. You run the server there before this script: ./build/bin/taosd -c test/cfg

            '''))
2756

2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777
    # parser.add_argument('-a', '--auto-start-service', action='store_true',                        
    #                     help='Automatically start/stop the TDengine service (default: false)')
    # parser.add_argument('-c', '--connector-type', action='store', default='native', type=str,
    #                     help='Connector type to use: native, rest, or mixed (default: 10)')
    # parser.add_argument('-d', '--debug', action='store_true',                        
    #                     help='Turn on DEBUG mode for more logging (default: false)')
    # parser.add_argument('-e', '--run-tdengine', action='store_true',                        
    #                     help='Run TDengine service in foreground (default: false)')
    # parser.add_argument('-l', '--larger-data', action='store_true',                        
    #                     help='Write larger amount of data during write operations (default: false)')
    # parser.add_argument('-p', '--per-thread-db-connection', action='store_true',                        
    #                     help='Use a single shared db connection (default: false)')
    # parser.add_argument('-r', '--record-ops', action='store_true',                        
    #                     help='Use a pair of always-fsynced fils to record operations performing + performed, for power-off tests (default: false)')                    
    # parser.add_argument('-s', '--max-steps', action='store', default=1000, type=int,
    #                     help='Maximum number of steps to run (default: 100)')
    # parser.add_argument('-t', '--num-threads', action='store', default=5, type=int,
    #                     help='Number of threads to run (default: 10)')
    # parser.add_argument('-x', '--continue-on-exception', action='store_true',                        
    #                     help='Continue execution after encountering unexpected/disallowed errors/exceptions (default: false)')                        

S
Shuduo Sang 已提交
2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828
    parser.add_argument(
        '-a',
        '--auto-start-service',
        action='store_true',
        help='Automatically start/stop the TDengine service (default: false)')
    parser.add_argument(
        '-c',
        '--connector-type',
        action='store',
        default='native',
        type=str,
        help='Connector type to use: native, rest, or mixed (default: 10)')
    parser.add_argument(
        '-d',
        '--debug',
        action='store_true',
        help='Turn on DEBUG mode for more logging (default: false)')
    parser.add_argument(
        '-e',
        '--run-tdengine',
        action='store_true',
        help='Run TDengine service in foreground (default: false)')
    parser.add_argument(
        '-l',
        '--larger-data',
        action='store_true',
        help='Write larger amount of data during write operations (default: false)')
    parser.add_argument(
        '-p',
        '--per-thread-db-connection',
        action='store_true',
        help='Use a single shared db connection (default: false)')
    parser.add_argument(
        '-r',
        '--record-ops',
        action='store_true',
        help='Use a pair of always-fsynced fils to record operations performing + performed, for power-off tests (default: false)')
    parser.add_argument(
        '-s',
        '--max-steps',
        action='store',
        default=1000,
        type=int,
        help='Maximum number of steps to run (default: 100)')
    parser.add_argument(
        '-t',
        '--num-threads',
        action='store',
        default=5,
        type=int,
        help='Number of threads to run (default: 10)')
2829 2830 2831 2832 2833
    parser.add_argument(
        '-x',
        '--continue-on-exception',
        action='store_true',
        help='Continue execution after encountering unexpected/disallowed errors/exceptions (default: false)')
2834

2835
    global gConfig
2836
    gConfig = parser.parse_args()
2837

2838
    # Logging Stuff
2839
    global logger
S
Shuduo Sang 已提交
2840 2841
    _logger = logging.getLogger('CrashGen')  # real logger
    _logger.addFilter(LoggingFilter())
2842 2843 2844
    ch = logging.StreamHandler()
    _logger.addHandler(ch)

S
Shuduo Sang 已提交
2845 2846
    # Logging adapter, to be used as a logger
    logger = MyLoggingAdapter(_logger, [])
2847

S
Shuduo Sang 已提交
2848 2849
    if (gConfig.debug):
        logger.setLevel(logging.DEBUG)  # default seems to be INFO
S
Steven Li 已提交
2850 2851
    else:
        logger.setLevel(logging.INFO)
S
Shuduo Sang 已提交
2852

2853 2854
    Dice.seed(0)  # initial seeding of dice

2855
    # Run server or client
2856
    mExec = MainExec()
S
Shuduo Sang 已提交
2857
    if gConfig.run_tdengine:  # run server
2858
        mExec.runService()
S
Shuduo Sang 已提交
2859
    else:
2860
        return mExec.runClient()
2861

S
Shuduo Sang 已提交
2862

2863
if __name__ == "__main__":
S
Steven Li 已提交
2864 2865 2866
    exitCode = main()
    # print("Exiting with code: {}".format(exitCode))
    sys.exit(exitCode)