crash_gen.py 106.8 KB
Newer Older
S
Shuduo Sang 已提交
1
# -----!/usr/bin/python3.7
S
Steven Li 已提交
2 3 4 5 6 7 8 9 10 11 12 13
###################################################################
#           Copyright (c) 2016 by TAOS Technologies, Inc.
#                     All rights reserved.
#
#  This file is proprietary and confidential to TAOS Technologies.
#  No part of this file may be reproduced, stored, transmitted,
#  disclosed or used in any form or by any means other than as
#  expressly provided by the written permission from Jianhui Tao
#
###################################################################

# -*- coding: utf-8 -*-
S
Shuduo Sang 已提交
14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38
# For type hinting before definition, ref:
# https://stackoverflow.com/questions/33533148/how-do-i-specify-that-the-return-type-of-a-method-is-the-same-as-the-class-itsel
from __future__ import annotations
import taos
import crash_gen
from util.sql import *
from util.cases import *
from util.dnodes import *
from util.log import *
from queue import Queue, Empty
from typing import IO
from typing import Set
from typing import Dict
from typing import List
from requests.auth import HTTPBasicAuth
import textwrap
import datetime
import logging
import time
import random
import threading
import requests
import copy
import argparse
import getopt
39

S
Steven Li 已提交
40
import sys
41
import os
42 43
import io
import signal
44
import traceback
45 46 47 48 49 50 51

try:
    import psutil
except:
    print("Psutil module needed, please install: sudo pip3 install psutil")
    sys.exit(-1)

52 53 54 55
# Require Python 3
if sys.version_info[0] < 3:
    raise Exception("Must be using Python 3")

S
Steven Li 已提交
56

S
Shuduo Sang 已提交
57
# Global variables, tried to keep a small number.
58 59 60

# Command-line/Environment Configurations, will set a bit later
# ConfigNameSpace = argparse.Namespace
S
Shuduo Sang 已提交
61
gConfig = argparse.Namespace()  # Dummy value, will be replaced later
62
gSvcMgr = None # TODO: refactor this hack, use dep injection
63
logger = None
S
Steven Li 已提交
64

S
Shuduo Sang 已提交
65
def runThread(wt: WorkerThread):
66
    wt.run()
67

68 69
class CrashGenError(Exception):
    def __init__(self, msg=None, errno=None):
S
Shuduo Sang 已提交
70
        self.msg = msg
71
        self.errno = errno
S
Shuduo Sang 已提交
72

73 74 75
    def __str__(self):
        return self.msg

S
Shuduo Sang 已提交
76

S
Steven Li 已提交
77
class WorkerThread:
78
    def __init__(self, pool: ThreadPool, tid, tc: ThreadCoordinator,
S
Shuduo Sang 已提交
79 80 81
                 # te: TaskExecutor,
                 ):  # note: main thread context!
        # self._curStep = -1
82
        self._pool = pool
S
Shuduo Sang 已提交
83 84
        self._tid = tid
        self._tc = tc  # type: ThreadCoordinator
S
Steven Li 已提交
85
        # self.threadIdent = threading.get_ident()
86 87
        self._thread = threading.Thread(target=runThread, args=(self,))
        self._stepGate = threading.Event()
S
Steven Li 已提交
88

89
        # Let us have a DB connection of our own
S
Shuduo Sang 已提交
90
        if (gConfig.per_thread_db_connection):  # type: ignore
91
            # print("connector_type = {}".format(gConfig.connector_type))
92 93 94 95 96 97 98 99 100 101 102
            if gConfig.connector_type == 'native':
                self._dbConn = DbConn.createNative() 
            elif gConfig.connector_type == 'rest':
                self._dbConn = DbConn.createRest() 
            elif gConfig.connector_type == 'mixed':
                if Dice.throw(2) == 0: # 1/2 chance
                    self._dbConn = DbConn.createNative() 
                else:
                    self._dbConn = DbConn.createRest() 
            else:
                raise RuntimeError("Unexpected connector type: {}".format(gConfig.connector_type))
103

S
Shuduo Sang 已提交
104
        self._dbInUse = False  # if "use db" was executed already
105

106
    def logDebug(self, msg):
S
Steven Li 已提交
107
        logger.debug("    TRD[{}] {}".format(self._tid, msg))
108 109

    def logInfo(self, msg):
S
Steven Li 已提交
110
        logger.info("    TRD[{}] {}".format(self._tid, msg))
111

112 113 114 115
    def dbInUse(self):
        return self._dbInUse

    def useDb(self):
S
Shuduo Sang 已提交
116
        if (not self._dbInUse):
117 118 119
            self.execSql("use db")
        self._dbInUse = True

120
    def getTaskExecutor(self):
S
Shuduo Sang 已提交
121
        return self._tc.getTaskExecutor()
122

S
Steven Li 已提交
123
    def start(self):
124
        self._thread.start()  # AFTER the thread is recorded
S
Steven Li 已提交
125

S
Shuduo Sang 已提交
126
    def run(self):
S
Steven Li 已提交
127
        # initialization after thread starts, in the thread context
128
        # self.isSleeping = False
129 130
        logger.info("Starting to run thread: {}".format(self._tid))

S
Shuduo Sang 已提交
131
        if (gConfig.per_thread_db_connection):  # type: ignore
132
            logger.debug("Worker thread openning database connection")
133
            self._dbConn.open()
S
Steven Li 已提交
134

S
Shuduo Sang 已提交
135 136
        self._doTaskLoop()

137
        # clean up
S
Shuduo Sang 已提交
138
        if (gConfig.per_thread_db_connection):  # type: ignore
139
            self._dbConn.close()
140

S
Shuduo Sang 已提交
141
    def _doTaskLoop(self):
142 143
        # while self._curStep < self._pool.maxSteps:
        # tc = ThreadCoordinator(None)
S
Shuduo Sang 已提交
144 145
        while True:
            tc = self._tc  # Thread Coordinator, the overall master
146 147 148 149 150 151
            try:
                tc.crossStepBarrier()  # shared barrier first, INCLUDING the last one
            except threading.BrokenBarrierError as err: # main thread timed out
                logger.debug("[TRD] Worker thread exiting due to main thread barrier time-out")
                break

152
            logger.debug("[TRD] Worker thread [{}] exited barrier...".format(self._tid))
153
            self.crossStepGate()   # then per-thread gate, after being tapped
154
            logger.debug("[TRD] Worker thread [{}] exited step gate...".format(self._tid))
155
            if not self._tc.isRunning():
156
                logger.debug("[TRD] Thread Coordinator not running any more, worker thread now stopping...")
157 158
                break

159
            # Fetch a task from the Thread Coordinator
160
            logger.debug( "[TRD] Worker thread [{}] about to fetch task".format(self._tid))
161
            task = tc.fetchTask()
162 163

            # Execute such a task
S
Shuduo Sang 已提交
164 165 166
            logger.debug(
                "[TRD] Worker thread [{}] about to execute task: {}".format(
                    self._tid, task.__class__.__name__))
167
            task.execute(self)
168
            tc.saveExecutedTask(task)
169
            logger.debug("[TRD] Worker thread [{}] finished executing task".format(self._tid))
S
Shuduo Sang 已提交
170 171

            self._dbInUse = False  # there may be changes between steps
172

S
Shuduo Sang 已提交
173 174
    def verifyThreadSelf(self):  # ensure we are called by this own thread
        if (threading.get_ident() != self._thread.ident):
S
Steven Li 已提交
175 176
            raise RuntimeError("Unexpectly called from other threads")

S
Shuduo Sang 已提交
177 178
    def verifyThreadMain(self):  # ensure we are called by the main thread
        if (threading.get_ident() != threading.main_thread().ident):
S
Steven Li 已提交
179 180 181
            raise RuntimeError("Unexpectly called from other threads")

    def verifyThreadAlive(self):
S
Shuduo Sang 已提交
182
        if (not self._thread.is_alive()):
S
Steven Li 已提交
183 184
            raise RuntimeError("Unexpected dead thread")

185
    # A gate is different from a barrier in that a thread needs to be "tapped"
S
Steven Li 已提交
186 187
    def crossStepGate(self):
        self.verifyThreadAlive()
S
Shuduo Sang 已提交
188 189
        self.verifyThreadSelf()  # only allowed by ourselves

190
        # Wait again at the "gate", waiting to be "tapped"
S
Shuduo Sang 已提交
191 192 193 194
        logger.debug(
            "[TRD] Worker thread {} about to cross the step gate".format(
                self._tid))
        self._stepGate.wait()
195
        self._stepGate.clear()
S
Shuduo Sang 已提交
196

197
        # self._curStep += 1  # off to a new step...
S
Steven Li 已提交
198

S
Shuduo Sang 已提交
199
    def tapStepGate(self):  # give it a tap, release the thread waiting there
S
Steven Li 已提交
200
        self.verifyThreadAlive()
S
Shuduo Sang 已提交
201 202
        self.verifyThreadMain()  # only allowed for main thread

S
Steven Li 已提交
203
        logger.debug("[TRD] Tapping worker thread {}".format(self._tid))
S
Shuduo Sang 已提交
204 205
        self._stepGate.set()  # wake up!
        time.sleep(0)  # let the released thread run a bit
206

S
Shuduo Sang 已提交
207
    def execSql(self, sql):  # TODO: expose DbConn directly
208
        return self.getDbConn().execute(sql)
209

S
Shuduo Sang 已提交
210
    def querySql(self, sql):  # TODO: expose DbConn directly
211
        return self.getDbConn().query(sql)
212 213

    def getQueryResult(self):
214
        return self.getDbConn().getQueryResult()
215

216
    def getDbConn(self):
S
Shuduo Sang 已提交
217 218
        if (gConfig.per_thread_db_connection):
            return self._dbConn
219
        else:
220
            return self._tc.getDbManager().getDbConn()
221

222 223
    # def querySql(self, sql): # not "execute", since we are out side the DB context
    #     if ( gConfig.per_thread_db_connection ):
S
Shuduo Sang 已提交
224
    #         return self._dbConn.query(sql)
225 226
    #     else:
    #         return self._tc.getDbState().getDbConn().query(sql)
227

228
# The coordinator of all worker threads, mostly running in main thread
S
Shuduo Sang 已提交
229 230


231
class ThreadCoordinator:
232 233
    WORKER_THREAD_TIMEOUT = 30

234
    def __init__(self, pool: ThreadPool, dbManager):
S
Shuduo Sang 已提交
235
        self._curStep = -1  # first step is 0
236
        self._pool = pool
237
        # self._wd = wd
S
Shuduo Sang 已提交
238
        self._te = None  # prepare for every new step
239
        self._dbManager = dbManager
S
Shuduo Sang 已提交
240 241
        self._executedTasks: List[Task] = []  # in a given step
        self._lock = threading.RLock()  # sync access for a few things
S
Steven Li 已提交
242

S
Shuduo Sang 已提交
243 244
        self._stepBarrier = threading.Barrier(
            self._pool.numThreads + 1)  # one barrier for all threads
245
        self._execStats = ExecutionStats()
246
        self._runStatus = MainExec.STATUS_RUNNING
S
Steven Li 已提交
247

248 249 250
    def getTaskExecutor(self):
        return self._te

S
Shuduo Sang 已提交
251
    def getDbManager(self) -> DbManager:
252
        return self._dbManager
253

254 255
    def crossStepBarrier(self, timeout=None):
        self._stepBarrier.wait(timeout) 
256

257 258 259 260
    def requestToStop(self):
        self._runStatus = MainExec.STATUS_STOPPING
        self._execStats.registerFailure("User Interruption")

261
    def _runShouldEnd(self, transitionFailed, hasAbortedTask, workerTimeout):
262 263 264 265 266 267 268 269 270
        maxSteps = gConfig.max_steps  # type: ignore
        if self._curStep >= (maxSteps - 1): # maxStep==10, last curStep should be 9
            return True
        if self._runStatus != MainExec.STATUS_RUNNING:
            return True
        if transitionFailed:
            return True
        if hasAbortedTask:
            return True
271 272
        if workerTimeout:
            return True
273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303
        return False

    def _hasAbortedTask(self): # from execution of previous step
        for task in self._executedTasks:
            if task.isAborted():
                # print("Task aborted: {}".format(task))
                # hasAbortedTask = True
                return True
        return False

    def _releaseAllWorkerThreads(self, transitionFailed):
        self._curStep += 1  # we are about to get into next step. TODO: race condition here!
        # Now not all threads had time to go to sleep
        logger.debug(
            "--\r\n\n--> Step {} starts with main thread waking up".format(self._curStep))

        # A new TE for the new step
        self._te = None # set to empty first, to signal worker thread to stop
        if not transitionFailed:  # only if not failed
            self._te = TaskExecutor(self._curStep)

        logger.debug("[TRD] Main thread waking up at step {}, tapping worker threads".format(
                self._curStep))  # Now not all threads had time to go to sleep
        # Worker threads will wake up at this point, and each execute it's own task
        self.tapAllThreads() # release all worker thread from their "gate"

    def _syncAtBarrier(self):
         # Now main thread (that's us) is ready to enter a step
        # let other threads go past the pool barrier, but wait at the
        # thread gate
        logger.debug("[TRD] Main thread about to cross the barrier")
304
        self.crossStepBarrier(timeout=self.WORKER_THREAD_TIMEOUT)
305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340
        self._stepBarrier.reset()  # Other worker threads should now be at the "gate"
        logger.debug("[TRD] Main thread finished crossing the barrier")

    def _doTransition(self):
        transitionFailed = False
        try:
            sm = self._dbManager.getStateMachine()
            logger.debug("[STT] starting transitions")
            # at end of step, transiton the DB state
            sm.transition(self._executedTasks)
            logger.debug("[STT] transition ended")
            # Due to limitation (or maybe not) of the Python library,
            # we cannot share connections across threads
            if sm.hasDatabase():
                for t in self._pool.threadList:
                    logger.debug("[DB] use db for all worker threads")
                    t.useDb()
                    # t.execSql("use db") # main thread executing "use
                    # db" on behalf of every worker thread
        except taos.error.ProgrammingError as err:
            if (err.msg == 'network unavailable'):  # broken DB connection
                logger.info("DB connection broken, execution failed")
                traceback.print_stack()
                transitionFailed = True
                self._te = None  # Not running any more
                self._execStats.registerFailure("Broken DB Connection")
                # continue # don't do that, need to tap all threads at
                # end, and maybe signal them to stop
            else:
                raise

        self.resetExecutedTasks()  # clear the tasks after we are done
        # Get ready for next step
        logger.debug("<-- Step {} finished, trasition failed = {}".format(self._curStep, transitionFailed))
        return transitionFailed

S
Shuduo Sang 已提交
341
    def run(self):
342
        self._pool.createAndStartThreads(self)
S
Steven Li 已提交
343 344

        # Coordinate all threads step by step
S
Shuduo Sang 已提交
345
        self._curStep = -1  # not started yet
346
        
S
Shuduo Sang 已提交
347
        self._execStats.startExec()  # start the stop watch
348 349
        transitionFailed = False
        hasAbortedTask = False
350 351
        workerTimeout = False
        while not self._runShouldEnd(transitionFailed, hasAbortedTask, workerTimeout):
352
            if not gConfig.debug: # print this only if we are not in debug mode                
S
Shuduo Sang 已提交
353
                print(".", end="", flush=True)
354
                        
355 356 357 358 359 360 361 362 363 364
            try:
                self._syncAtBarrier() # For now just cross the barrier
            except threading.BrokenBarrierError as err:
                logger.info("Main loop aborted, caused by worker thread time-out")
                self._execStats.registerFailure("Aborted due to worker thread timeout")
                print("\n\nWorker Thread time-out detected, important thread info:")
                ts = ThreadStacks()
                ts.print(filterInternal=True)
                workerTimeout = True
                break
365 366

            # At this point, all threads should be pass the overall "barrier" and before the per-thread "gate"
S
Shuduo Sang 已提交
367 368
            # We use this period to do house keeping work, when all worker
            # threads are QUIET.
369 370 371
            hasAbortedTask = self._hasAbortedTask() # from previous step
            if hasAbortedTask: 
                logger.info("Aborted task encountered, exiting test program")
372
                self._execStats.registerFailure("Aborted Task Encountered")
373
                break # do transition only if tasks are error free
S
Shuduo Sang 已提交
374

375 376 377 378
            # Ending previous step
            transitionFailed = self._doTransition() # To start, we end step -1 first
            # Then we move on to the next step
            self._releaseAllWorkerThreads(transitionFailed)                    
379

380 381
        if hasAbortedTask or transitionFailed : # abnormal ending, workers waiting at "gate"
            logger.debug("Abnormal ending of main thraed")
382 383
        elif workerTimeout:
            logger.debug("Abnormal ending of main thread, due to worker timeout")
384 385 386
        else: # regular ending, workers waiting at "barrier"
            logger.debug("Regular ending, main thread waiting for all worker threads to stop...")
            self._syncAtBarrier()
387

388 389 390
        self._te = None  # No more executor, time to end
        logger.debug("Main thread tapping all threads one last time...")
        self.tapAllThreads()  # Let the threads run one last time
391

392
        logger.debug("\r\n\n--> Main thread ready to finish up...")
393
        logger.debug("Main thread joining all threads")
S
Shuduo Sang 已提交
394
        self._pool.joinAll()  # Get all threads to finish
395
        logger.info("\nAll worker threads finished")
396 397
        self._execStats.endExec()

398 399
    def printStats(self):
        self._execStats.printStats()
S
Steven Li 已提交
400

S
Steven Li 已提交
401 402 403 404 405 406
    def isFailed(self):
        return self._execStats.isFailed()

    def getExecStats(self):
        return self._execStats

S
Shuduo Sang 已提交
407
    def tapAllThreads(self):  # in a deterministic manner
S
Steven Li 已提交
408
        wakeSeq = []
S
Shuduo Sang 已提交
409 410
        for i in range(self._pool.numThreads):  # generate a random sequence
            if Dice.throw(2) == 1:
S
Steven Li 已提交
411 412 413
                wakeSeq.append(i)
            else:
                wakeSeq.insert(0, i)
S
Shuduo Sang 已提交
414 415 416
        logger.debug(
            "[TRD] Main thread waking up worker threads: {}".format(
                str(wakeSeq)))
417
        # TODO: set dice seed to a deterministic value
S
Steven Li 已提交
418
        for i in wakeSeq:
S
Shuduo Sang 已提交
419 420 421
            # TODO: maybe a bit too deep?!
            self._pool.threadList[i].tapStepGate()
            time.sleep(0)  # yield
S
Steven Li 已提交
422

423
    def isRunning(self):
S
Shuduo Sang 已提交
424
        return self._te is not None
425

S
Shuduo Sang 已提交
426 427
    def fetchTask(self) -> Task:
        if (not self.isRunning()):  # no task
428
            raise RuntimeError("Cannot fetch task when not running")
429 430
        # return self._wd.pickTask()
        # Alternatively, let's ask the DbState for the appropriate task
431 432 433 434 435 436 437
        # dbState = self.getDbState()
        # tasks = dbState.getTasksAtState() # TODO: create every time?
        # nTasks = len(tasks)
        # i = Dice.throw(nTasks)
        # logger.debug(" (dice:{}/{}) ".format(i, nTasks))
        # # return copy.copy(tasks[i]) # Needs a fresh copy, to save execution results, etc.
        # return tasks[i].clone() # TODO: still necessary?
S
Shuduo Sang 已提交
438 439 440 441 442
        # pick a task type for current state
        taskType = self.getDbManager().getStateMachine().pickTaskType()
        return taskType(
            self.getDbManager(),
            self._execStats)  # create a task from it
443 444

    def resetExecutedTasks(self):
S
Shuduo Sang 已提交
445
        self._executedTasks = []  # should be under single thread
446 447 448 449

    def saveExecutedTask(self, task):
        with self._lock:
            self._executedTasks.append(task)
450 451

# We define a class to run a number of threads in locking steps.
S
Shuduo Sang 已提交
452 453


454
class ThreadPool:
455
    def __init__(self, numThreads, maxSteps):
456 457 458 459
        self.numThreads = numThreads
        self.maxSteps = maxSteps
        # Internal class variables
        self.curStep = 0
S
Shuduo Sang 已提交
460 461
        self.threadList = []  # type: List[WorkerThread]

462
    # starting to run all the threads, in locking steps
463
    def createAndStartThreads(self, tc: ThreadCoordinator):
S
Shuduo Sang 已提交
464 465
        for tid in range(0, self.numThreads):  # Create the threads
            workerThread = WorkerThread(self, tid, tc)
466
            self.threadList.append(workerThread)
S
Shuduo Sang 已提交
467
            workerThread.start()  # start, but should block immediately before step 0
468 469 470 471 472 473

    def joinAll(self):
        for workerThread in self.threadList:
            logger.debug("Joining thread...")
            workerThread._thread.join()

474 475
# A queue of continguous POSITIVE integers, used by DbManager to generate continuous numbers
# for new table names
S
Shuduo Sang 已提交
476 477


S
Steven Li 已提交
478 479
class LinearQueue():
    def __init__(self):
480
        self.firstIndex = 1  # 1st ever element
S
Steven Li 已提交
481
        self.lastIndex = 0
S
Shuduo Sang 已提交
482 483
        self._lock = threading.RLock()  # our functions may call each other
        self.inUse = set()  # the indexes that are in use right now
S
Steven Li 已提交
484

485
    def toText(self):
S
Shuduo Sang 已提交
486 487
        return "[{}..{}], in use: {}".format(
            self.firstIndex, self.lastIndex, self.inUse)
488 489

    # Push (add new element, largest) to the tail, and mark it in use
S
Shuduo Sang 已提交
490
    def push(self):
491
        with self._lock:
S
Shuduo Sang 已提交
492 493
            # if ( self.isEmpty() ):
            #     self.lastIndex = self.firstIndex
494
            #     return self.firstIndex
495 496
            # Otherwise we have something
            self.lastIndex += 1
497 498
            self.allocate(self.lastIndex)
            # self.inUse.add(self.lastIndex) # mark it in use immediately
499
            return self.lastIndex
S
Steven Li 已提交
500 501

    def pop(self):
502
        with self._lock:
S
Shuduo Sang 已提交
503 504 505 506
            if (self.isEmpty()):
                # raise RuntimeError("Cannot pop an empty queue")
                return False  # TODO: None?

507
            index = self.firstIndex
S
Shuduo Sang 已提交
508
            if (index in self.inUse):
509 510
                return False

511 512 513 514 515 516 517
            self.firstIndex += 1
            return index

    def isEmpty(self):
        return self.firstIndex > self.lastIndex

    def popIfNotEmpty(self):
518
        with self._lock:
519 520 521 522
            if (self.isEmpty()):
                return 0
            return self.pop()

S
Steven Li 已提交
523
    def allocate(self, i):
524
        with self._lock:
525
            # logger.debug("LQ allocating item {}".format(i))
S
Shuduo Sang 已提交
526 527 528
            if (i in self.inUse):
                raise RuntimeError(
                    "Cannot re-use same index in queue: {}".format(i))
529 530
            self.inUse.add(i)

S
Steven Li 已提交
531
    def release(self, i):
532
        with self._lock:
533
            # logger.debug("LQ releasing item {}".format(i))
S
Shuduo Sang 已提交
534
            self.inUse.remove(i)  # KeyError possible, TODO: why?
535 536 537 538

    def size(self):
        return self.lastIndex + 1 - self.firstIndex

S
Steven Li 已提交
539
    def pickAndAllocate(self):
S
Shuduo Sang 已提交
540
        if (self.isEmpty()):
541 542
            return None
        with self._lock:
S
Shuduo Sang 已提交
543
            cnt = 0  # counting the interations
544 545
            while True:
                cnt += 1
S
Shuduo Sang 已提交
546
                if (cnt > self.size() * 10):  # 10x iteration already
547 548
                    # raise RuntimeError("Failed to allocate LinearQueue element")
                    return None
S
Shuduo Sang 已提交
549 550
                ret = Dice.throwRange(self.firstIndex, self.lastIndex + 1)
                if (ret not in self.inUse):
551 552 553
                    self.allocate(ret)
                    return ret

S
Shuduo Sang 已提交
554

555
class DbConn:
556
    TYPE_NATIVE = "native-c"
557
    TYPE_REST =   "rest-api"
558 559 560 561 562 563 564 565 566
    TYPE_INVALID = "invalid"

    @classmethod
    def create(cls, connType):
        if connType == cls.TYPE_NATIVE:
            return DbConnNative()
        elif connType == cls.TYPE_REST:
            return DbConnRest()
        else:
S
Shuduo Sang 已提交
567 568
            raise RuntimeError(
                "Unexpected connection type: {}".format(connType))
569 570 571 572 573 574 575 576 577

    @classmethod
    def createNative(cls):
        return cls.create(cls.TYPE_NATIVE)

    @classmethod
    def createRest(cls):
        return cls.create(cls.TYPE_REST)

578 579
    def __init__(self):
        self.isOpen = False
580
        self._type = self.TYPE_INVALID
581 582 583 584
        self._lastSql = None

    def getLastSql(self):
        return self._lastSql
585 586

    def open(self):
S
Shuduo Sang 已提交
587
        if (self.isOpen):
588 589
            raise RuntimeError("Cannot re-open an existing DB connection")

590 591
        # below implemented by child classes
        self.openByType()
592

593
        logger.debug("[DB] data connection opened, type = {}".format(self._type))
594 595
        self.isOpen = True

S
Shuduo Sang 已提交
596 597 598 599
    def resetDb(self):  # reset the whole database, etc.
        if (not self.isOpen):
            raise RuntimeError(
                "Cannot reset database until connection is open")
600 601
        # self._tdSql.prepare() # Recreate database, etc.

602
        self.execute('drop database if exists db')
603 604
        logger.debug("Resetting DB, dropped database")
        # self._cursor.execute('create database db')
605
        # self._cursor.execute('use db')
606 607
        # tdSql.execute('show databases')

S
Shuduo Sang 已提交
608
    def queryScalar(self, sql) -> int:
609 610
        return self._queryAny(sql)

S
Shuduo Sang 已提交
611
    def queryString(self, sql) -> str:
612 613
        return self._queryAny(sql)

S
Shuduo Sang 已提交
614 615
    def _queryAny(self, sql):  # actual query result as an int
        if (not self.isOpen):
616
            raise RuntimeError("Cannot query database until connection is open")
617
        nRows = self.query(sql)
S
Shuduo Sang 已提交
618
        if nRows != 1:
619
            raise RuntimeError("Unexpected result for query: {}, rows = {}".format(sql, nRows))
620
        if self.getResultRows() != 1 or self.getResultCols() != 1:
621
            raise RuntimeError("Unexpected result set for query: {}".format(sql))
622 623
        return self.getQueryResult()[0][0]

624 625 626 627 628 629 630 631 632
    def use(self, dbName):
        self.execute("use {}".format(dbName))

    def hasDatabases(self):
        return self.query("show databases") > 0

    def hasTables(self):
        return self.query("show tables") > 0

633 634
    def execute(self, sql):
        raise RuntimeError("Unexpected execution, should be overriden")
S
Shuduo Sang 已提交
635

636 637 638
    def query(self, sql) -> int: # return num rows returned
        raise RuntimeError("Unexpected execution, should be overriden")

639 640
    def openByType(self):
        raise RuntimeError("Unexpected execution, should be overriden")
S
Shuduo Sang 已提交
641

642 643
    def getQueryResult(self):
        raise RuntimeError("Unexpected execution, should be overriden")
S
Shuduo Sang 已提交
644

645 646
    def getResultRows(self):
        raise RuntimeError("Unexpected execution, should be overriden")
S
Shuduo Sang 已提交
647

648 649 650 651
    def getResultCols(self):
        raise RuntimeError("Unexpected execution, should be overriden")

# Sample: curl -u root:taosdata -d "show databases" localhost:6020/rest/sql
S
Shuduo Sang 已提交
652 653


654 655 656 657
class DbConnRest(DbConn):
    def __init__(self):
        super().__init__()
        self._type = self.TYPE_REST
S
Shuduo Sang 已提交
658
        self._url = "http://localhost:6020/rest/sql"  # fixed for now
659 660
        self._result = None

S
Shuduo Sang 已提交
661 662 663
    def openByType(self):  # Open connection
        pass  # do nothing, always open

664
    def close(self):
S
Shuduo Sang 已提交
665 666 667
        if (not self.isOpen):
            raise RuntimeError(
                "Cannot clean up database until connection is open")
668 669 670 671 672
        # Do nothing for REST
        logger.debug("[DB] REST Database connection closed")
        self.isOpen = False

    def _doSql(self, sql):
673
        self._lastSql = sql # remember this, last SQL attempted
674 675 676
        try:
            r = requests.post(self._url, 
                data = sql,
677
                auth = HTTPBasicAuth('root', 'taosdata'))         
678 679 680
        except:
            print("REST API Failure (TODO: more info here)")
            raise
681 682
        rj = r.json()
        # Sanity check for the "Json Result"
S
Shuduo Sang 已提交
683
        if ('status' not in rj):
684 685
            raise RuntimeError("No status in REST response")

S
Shuduo Sang 已提交
686 687 688 689
        if rj['status'] == 'error':  # clearly reported error
            if ('code' not in rj):  # error without code
                raise RuntimeError("REST error return without code")
            errno = rj['code']  # May need to massage this in the future
690
            # print("Raising programming error with REST return: {}".format(rj))
S
Shuduo Sang 已提交
691 692
            raise taos.error.ProgrammingError(
                rj['desc'], errno)  # todo: check existance of 'desc'
693

S
Shuduo Sang 已提交
694 695 696 697
        if rj['status'] != 'succ':  # better be this
            raise RuntimeError(
                "Unexpected REST return status: {}".format(
                    rj['status']))
698 699

        nRows = rj['rows'] if ('rows' in rj) else 0
S
Shuduo Sang 已提交
700
        self._result = rj
701 702
        return nRows

S
Shuduo Sang 已提交
703 704 705 706
    def execute(self, sql):
        if (not self.isOpen):
            raise RuntimeError(
                "Cannot execute database commands until connection is open")
707 708
        logger.debug("[SQL-REST] Executing SQL: {}".format(sql))
        nRows = self._doSql(sql)
S
Shuduo Sang 已提交
709 710
        logger.debug(
            "[SQL-REST] Execution Result, nRows = {}, SQL = {}".format(nRows, sql))
711 712
        return nRows

S
Shuduo Sang 已提交
713
    def query(self, sql):  # return rows affected
714 715 716 717 718 719 720 721 722 723 724 725 726
        return self.execute(sql)

    def getQueryResult(self):
        return self._result['data']

    def getResultRows(self):
        print(self._result)
        raise RuntimeError("TBD")
        # return self._tdSql.queryRows

    def getResultCols(self):
        print(self._result)
        raise RuntimeError("TBD")
S
Shuduo Sang 已提交
727

728
    # Duplicate code from TDMySQL, TODO: merge all this into DbConnNative
729 730


731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758
class MyTDSql:
    def __init__(self):
        self.queryRows = 0
        self.queryCols = 0
        self.affectedRows = 0

    def init(self, cursor, log=True):
        self.cursor = cursor
        # if (log):
        #     caller = inspect.getframeinfo(inspect.stack()[1][0])
        #     self.cursor.log(caller.filename + ".sql")

    def close(self):
        self.cursor.close()

    def query(self, sql):
        self.sql = sql
        try:
            self.cursor.execute(sql)
            self.queryResult = self.cursor.fetchall()
            self.queryRows = len(self.queryResult)
            self.queryCols = len(self.cursor.description)
        except Exception as e:
            # caller = inspect.getframeinfo(inspect.stack()[1][0])
            # args = (caller.filename, caller.lineno, sql, repr(e))
            # tdLog.exit("%s(%d) failed: sql:%s, %s" % args)
            raise
        return self.queryRows
759

760 761 762 763 764 765 766 767 768 769 770
    def execute(self, sql):
        self.sql = sql
        try:
            self.affectedRows = self.cursor.execute(sql)
        except Exception as e:
            # caller = inspect.getframeinfo(inspect.stack()[1][0])
            # args = (caller.filename, caller.lineno, sql, repr(e))
            # tdLog.exit("%s(%d) failed: sql:%s, %s" % args)
            raise
        return self.affectedRows

S
Shuduo Sang 已提交
771

772
class DbConnNative(DbConn):
773 774 775 776
    # Class variables
    _lock = threading.Lock()
    _connInfoDisplayed = False

777 778
    def __init__(self):
        super().__init__()
779
        self._type = self.TYPE_NATIVE
S
Shuduo Sang 已提交
780
        self._conn = None
781
        self._cursor = None
782
        
S
Shuduo Sang 已提交
783

784 785 786 787 788 789 790 791 792 793 794
    def getBuildPath(self):
        selfPath = os.path.dirname(os.path.realpath(__file__))
        if ("community" in selfPath):
            projPath = selfPath[:selfPath.find("communit")]
        else:
            projPath = selfPath[:selfPath.find("tests")]

        for root, dirs, files in os.walk(projPath):
            if ("taosd" in files):
                rootRealPath = os.path.dirname(os.path.realpath(root))
                if ("packaging" not in rootRealPath):
S
Shuduo Sang 已提交
795
                    buildPath = root[:len(root) - len("/build/bin")]
796 797 798
                    break
        return buildPath

799
    
S
Shuduo Sang 已提交
800
    def openByType(self):  # Open connection
801
        cfgPath = self.getBuildPath() + "/test/cfg"
802
        hostAddr = "127.0.0.1"
803

804 805 806 807 808
        with self._lock: # force single threading for opening DB connections
            if not self._connInfoDisplayed:
                self.__class__._connInfoDisplayed = True # updating CLASS variable
                logger.info("Initiating TAOS native connection to {}, using config at {}".format(hostAddr, cfgPath))
            
809
            self._conn = taos.connect(host=hostAddr, config=cfgPath)  # TODO: make configurable
810 811
            self._cursor = self._conn.cursor()
        
812
        self._cursor.execute('reset query cache')
S
Shuduo Sang 已提交
813
        # self._cursor.execute('use db') # do this at the beginning of every
814 815

        # Open connection
816
        self._tdSql = MyTDSql()
817
        self._tdSql.init(self._cursor)
S
Shuduo Sang 已提交
818

819
    def close(self):
S
Shuduo Sang 已提交
820 821 822
        if (not self.isOpen):
            raise RuntimeError(
                "Cannot clean up database until connection is open")
823
        self._tdSql.close()
824
        logger.debug("[DB] Database connection closed")
825
        self.isOpen = False
S
Steven Li 已提交
826

S
Shuduo Sang 已提交
827 828 829 830
    def execute(self, sql):
        if (not self.isOpen):
            raise RuntimeError(
                "Cannot execute database commands until connection is open")
831 832
        logger.debug("[SQL] Executing SQL: {}".format(sql))
        nRows = self._tdSql.execute(sql)
S
Shuduo Sang 已提交
833 834 835
        logger.debug(
            "[SQL] Execution Result, nRows = {}, SQL = {}".format(
                nRows, sql))
836
        return nRows
S
Steven Li 已提交
837

S
Shuduo Sang 已提交
838 839 840 841
    def query(self, sql):  # return rows affected
        if (not self.isOpen):
            raise RuntimeError(
                "Cannot query database until connection is open")
842 843
        logger.debug("[SQL] Executing SQL: {}".format(sql))
        nRows = self._tdSql.query(sql)
S
Shuduo Sang 已提交
844 845 846
        logger.debug(
            "[SQL] Query Result, nRows = {}, SQL = {}".format(
                nRows, sql))
847
        return nRows
848
        # results are in: return self._tdSql.queryResult
849

850 851 852
    def getQueryResult(self):
        return self._tdSql.queryResult

853 854
    def getResultRows(self):
        return self._tdSql.queryRows
855

856 857
    def getResultCols(self):
        return self._tdSql.queryCols
858

S
Shuduo Sang 已提交
859

860
class AnyState:
S
Shuduo Sang 已提交
861 862 863
    STATE_INVALID = -1
    STATE_EMPTY = 0  # nothing there, no even a DB
    STATE_DB_ONLY = 1  # we have a DB, but nothing else
864
    STATE_TABLE_ONLY = 2  # we have a table, but totally empty
S
Shuduo Sang 已提交
865
    STATE_HAS_DATA = 3  # we have some data in the table
866 867 868 869 870
    _stateNames = ["Invalid", "Empty", "DB_Only", "Table_Only", "Has_Data"]

    STATE_VAL_IDX = 0
    CAN_CREATE_DB = 1
    CAN_DROP_DB = 2
871 872
    CAN_CREATE_FIXED_SUPER_TABLE = 3
    CAN_DROP_FIXED_SUPER_TABLE = 4
873 874 875 876 877 878 879
    CAN_ADD_DATA = 5
    CAN_READ_DATA = 6

    def __init__(self):
        self._info = self.getInfo()

    def __str__(self):
S
Shuduo Sang 已提交
880 881
        # -1 hack to accomodate the STATE_INVALID case
        return self._stateNames[self._info[self.STATE_VAL_IDX] + 1]
882 883 884 885

    def getInfo(self):
        raise RuntimeError("Must be overriden by child classes")

S
Steven Li 已提交
886 887 888 889 890 891
    def equals(self, other):
        if isinstance(other, int):
            return self.getValIndex() == other
        elif isinstance(other, AnyState):
            return self.getValIndex() == other.getValIndex()
        else:
S
Shuduo Sang 已提交
892 893 894
            raise RuntimeError(
                "Unexpected comparison, type = {}".format(
                    type(other)))
S
Steven Li 已提交
895

896 897 898
    def verifyTasksToState(self, tasks, newState):
        raise RuntimeError("Must be overriden by child classes")

S
Steven Li 已提交
899 900 901
    def getValIndex(self):
        return self._info[self.STATE_VAL_IDX]

902 903
    def getValue(self):
        return self._info[self.STATE_VAL_IDX]
S
Shuduo Sang 已提交
904

905 906
    def canCreateDb(self):
        return self._info[self.CAN_CREATE_DB]
S
Shuduo Sang 已提交
907

908 909
    def canDropDb(self):
        return self._info[self.CAN_DROP_DB]
S
Shuduo Sang 已提交
910

911 912
    def canCreateFixedSuperTable(self):
        return self._info[self.CAN_CREATE_FIXED_SUPER_TABLE]
S
Shuduo Sang 已提交
913

914 915
    def canDropFixedSuperTable(self):
        return self._info[self.CAN_DROP_FIXED_SUPER_TABLE]
S
Shuduo Sang 已提交
916

917 918
    def canAddData(self):
        return self._info[self.CAN_ADD_DATA]
S
Shuduo Sang 已提交
919

920 921 922 923 924
    def canReadData(self):
        return self._info[self.CAN_READ_DATA]

    def assertAtMostOneSuccess(self, tasks, cls):
        sCnt = 0
S
Shuduo Sang 已提交
925
        for task in tasks:
926 927 928
            if not isinstance(task, cls):
                continue
            if task.isSuccess():
S
Steven Li 已提交
929
                # task.logDebug("Task success found")
930
                sCnt += 1
S
Shuduo Sang 已提交
931 932 933
                if (sCnt >= 2):
                    raise RuntimeError(
                        "Unexpected more than 1 success with task: {}".format(cls))
934 935 936 937

    def assertIfExistThenSuccess(self, tasks, cls):
        sCnt = 0
        exists = False
S
Shuduo Sang 已提交
938
        for task in tasks:
939 940
            if not isinstance(task, cls):
                continue
S
Shuduo Sang 已提交
941
            exists = True  # we have a valid instance
942 943
            if task.isSuccess():
                sCnt += 1
S
Shuduo Sang 已提交
944 945 946
        if (exists and sCnt <= 0):
            raise RuntimeError(
                "Unexpected zero success for task: {}".format(cls))
947 948

    def assertNoTask(self, tasks, cls):
S
Shuduo Sang 已提交
949
        for task in tasks:
950
            if isinstance(task, cls):
S
Shuduo Sang 已提交
951 952
                raise CrashGenError(
                    "This task: {}, is not expected to be present, given the success/failure of others".format(cls.__name__))
953 954

    def assertNoSuccess(self, tasks, cls):
S
Shuduo Sang 已提交
955
        for task in tasks:
956 957
            if isinstance(task, cls):
                if task.isSuccess():
S
Shuduo Sang 已提交
958 959
                    raise RuntimeError(
                        "Unexpected successful task: {}".format(cls))
960 961

    def hasSuccess(self, tasks, cls):
S
Shuduo Sang 已提交
962
        for task in tasks:
963 964 965 966 967 968
            if not isinstance(task, cls):
                continue
            if task.isSuccess():
                return True
        return False

S
Steven Li 已提交
969
    def hasTask(self, tasks, cls):
S
Shuduo Sang 已提交
970
        for task in tasks:
S
Steven Li 已提交
971 972 973 974
            if isinstance(task, cls):
                return True
        return False

S
Shuduo Sang 已提交
975

976 977 978 979
class StateInvalid(AnyState):
    def getInfo(self):
        return [
            self.STATE_INVALID,
S
Shuduo Sang 已提交
980 981 982
            False, False,  # can create/drop Db
            False, False,  # can create/drop fixed table
            False, False,  # can insert/read data with fixed table
983 984 985 986
        ]

    # def verifyTasksToState(self, tasks, newState):

S
Shuduo Sang 已提交
987

988 989 990 991
class StateEmpty(AnyState):
    def getInfo(self):
        return [
            self.STATE_EMPTY,
S
Shuduo Sang 已提交
992 993 994
            True, False,  # can create/drop Db
            False, False,  # can create/drop fixed table
            False, False,  # can insert/read data with fixed table
995 996
        ]

S
Shuduo Sang 已提交
997 998
    def verifyTasksToState(self, tasks, newState):
        if (self.hasSuccess(tasks, TaskCreateDb)
999
                ):  # at EMPTY, if there's succes in creating DB
S
Shuduo Sang 已提交
1000 1001 1002 1003
            if (not self.hasTask(tasks, TaskDropDb)):  # and no drop_db tasks
                # we must have at most one. TODO: compare numbers
                self.assertAtMostOneSuccess(tasks, TaskCreateDb)

1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014

class StateDbOnly(AnyState):
    def getInfo(self):
        return [
            self.STATE_DB_ONLY,
            False, True,
            True, False,
            False, False,
        ]

    def verifyTasksToState(self, tasks, newState):
S
Shuduo Sang 已提交
1015 1016 1017
        if (not self.hasTask(tasks, TaskCreateDb)):
            # only if we don't create any more
            self.assertAtMostOneSuccess(tasks, TaskDropDb)
1018
        self.assertIfExistThenSuccess(tasks, TaskDropDb)
S
Steven Li 已提交
1019
        # self.assertAtMostOneSuccess(tasks, CreateFixedTableTask) # not true in massively parrallel cases
1020
        # Nothing to be said about adding data task
1021
        # if ( self.hasSuccess(tasks, DropDbTask) ): # dropped the DB
S
Shuduo Sang 已提交
1022 1023 1024
        # self.assertHasTask(tasks, DropDbTask) # implied by hasSuccess
        # self.assertAtMostOneSuccess(tasks, DropDbTask)
        # self._state = self.STATE_EMPTY
1025 1026
        # if ( self.hasSuccess(tasks, TaskCreateSuperTable) ): # did not drop db, create table success
        #     # self.assertHasTask(tasks, CreateFixedTableTask) # tried to create table
S
Shuduo Sang 已提交
1027
        #     if ( not self.hasTask(tasks, TaskDropSuperTable) ):
1028
        #         self.assertAtMostOneSuccess(tasks, TaskCreateSuperTable) # at most 1 attempt is successful, if we don't drop anything
S
Shuduo Sang 已提交
1029 1030 1031 1032 1033 1034
        # self.assertNoTask(tasks, DropDbTask) # should have have tried
        # if ( not self.hasSuccess(tasks, AddFixedDataTask) ): # just created table, no data yet
        #     # can't say there's add-data attempts, since they may all fail
        #     self._state = self.STATE_TABLE_ONLY
        # else:
        #     self._state = self.STATE_HAS_DATA
1035 1036 1037 1038
        # What about AddFixedData?
        # elif ( self.hasSuccess(tasks, AddFixedDataTask) ):
        #     self._state = self.STATE_HAS_DATA
        # else: # no success in dropping db tasks, no success in create fixed table? read data should also fail
S
Shuduo Sang 已提交
1039
        #     # raise RuntimeError("Unexpected no-success scenario")   # We might just landed all failure tasks,
1040 1041
        #     self._state = self.STATE_DB_ONLY  # no change

S
Shuduo Sang 已提交
1042

1043
class StateSuperTableOnly(AnyState):
1044 1045 1046 1047 1048 1049 1050 1051 1052
    def getInfo(self):
        return [
            self.STATE_TABLE_ONLY,
            False, True,
            False, True,
            True, True,
        ]

    def verifyTasksToState(self, tasks, newState):
S
Shuduo Sang 已提交
1053
        if (self.hasSuccess(tasks, TaskDropSuperTable)
1054
                ):  # we are able to drop the table
1055
            #self.assertAtMostOneSuccess(tasks, TaskDropSuperTable)
S
Shuduo Sang 已提交
1056 1057
            # we must have had recreted it
            self.hasSuccess(tasks, TaskCreateSuperTable)
1058

1059
            # self._state = self.STATE_DB_ONLY
S
Steven Li 已提交
1060 1061
        # elif ( self.hasSuccess(tasks, AddFixedDataTask) ): # no success dropping the table, but added data
        #     self.assertNoTask(tasks, DropFixedTableTask) # not true in massively parrallel cases
1062
            # self._state = self.STATE_HAS_DATA
S
Steven Li 已提交
1063 1064 1065
        # elif ( self.hasSuccess(tasks, ReadFixedDataTask) ): # no success in prev cases, but was able to read data
            # self.assertNoTask(tasks, DropFixedTableTask)
            # self.assertNoTask(tasks, AddFixedDataTask)
1066
            # self._state = self.STATE_TABLE_ONLY # no change
S
Steven Li 已提交
1067 1068 1069
        # else: # did not drop table, did not insert data, did not read successfully, that is impossible
        #     raise RuntimeError("Unexpected no-success scenarios")
        # TODO: need to revamp!!
1070

S
Shuduo Sang 已提交
1071

1072 1073 1074 1075 1076 1077 1078 1079 1080 1081
class StateHasData(AnyState):
    def getInfo(self):
        return [
            self.STATE_HAS_DATA,
            False, True,
            False, True,
            True, True,
        ]

    def verifyTasksToState(self, tasks, newState):
S
Shuduo Sang 已提交
1082
        if (newState.equals(AnyState.STATE_EMPTY)):
1083
            self.hasSuccess(tasks, TaskDropDb)
S
Shuduo Sang 已提交
1084 1085 1086 1087
            if (not self.hasTask(tasks, TaskCreateDb)):
                self.assertAtMostOneSuccess(tasks, TaskDropDb)  # TODO: dicy
        elif (newState.equals(AnyState.STATE_DB_ONLY)):  # in DB only
            if (not self.hasTask(tasks, TaskCreateDb)
1088
                ):  # without a create_db task
S
Shuduo Sang 已提交
1089 1090
                # we must have drop_db task
                self.assertNoTask(tasks, TaskDropDb)
1091
            self.hasSuccess(tasks, TaskDropSuperTable)
1092
            # self.assertAtMostOneSuccess(tasks, DropFixedSuperTableTask) # TODO: dicy
1093 1094 1095 1096
        # elif ( newState.equals(AnyState.STATE_TABLE_ONLY) ): # data deleted
            # self.assertNoTask(tasks, TaskDropDb)
            # self.assertNoTask(tasks, TaskDropSuperTable)
            # self.assertNoTask(tasks, TaskAddData)
S
Steven Li 已提交
1097
            # self.hasSuccess(tasks, DeleteDataTasks)
S
Shuduo Sang 已提交
1098 1099
        else:  # should be STATE_HAS_DATA
            if (not self.hasTask(tasks, TaskCreateDb)
1100
                ):  # only if we didn't create one
S
Shuduo Sang 已提交
1101 1102 1103
                # we shouldn't have dropped it
                self.assertNoTask(tasks, TaskDropDb)
            if (not self.hasTask(tasks, TaskCreateSuperTable)
1104
                    ):  # if we didn't create the table
S
Shuduo Sang 已提交
1105 1106
                # we should not have a task that drops it
                self.assertNoTask(tasks, TaskDropSuperTable)
1107
            # self.assertIfExistThenSuccess(tasks, ReadFixedDataTask)
S
Steven Li 已提交
1108

S
Shuduo Sang 已提交
1109

1110
class StateMechine:
1111 1112
    def __init__(self, dbConn):
        self._dbConn = dbConn
S
Shuduo Sang 已提交
1113 1114 1115
        self._curState = self._findCurrentState()  # starting state
        # transitition target probabilities, indexed with value of STATE_EMPTY,
        # STATE_DB_ONLY, etc.
1116
        self._stateWeights = [1, 2, 10, 40]
S
Shuduo Sang 已提交
1117

1118 1119 1120
    def getCurrentState(self):
        return self._curState

1121 1122 1123
    def hasDatabase(self):
        return self._curState.canDropDb()  # ha, can drop DB means it has one

1124
    # May be slow, use cautionsly...
S
Shuduo Sang 已提交
1125
    def getTaskTypes(self):  # those that can run (directly/indirectly) from the current state
1126 1127 1128 1129 1130 1131
        def typesToStrings(types):
            ss = []
            for t in types:
                ss.append(t.__name__)
            return ss

S
Shuduo Sang 已提交
1132
        allTaskClasses = StateTransitionTask.__subclasses__()  # all state transition tasks
1133 1134
        firstTaskTypes = []
        for tc in allTaskClasses:
S
Shuduo Sang 已提交
1135
            # t = tc(self) # create task object
1136 1137
            if tc.canBeginFrom(self._curState):
                firstTaskTypes.append(tc)
S
Shuduo Sang 已提交
1138 1139 1140 1141 1142 1143 1144 1145
        # now we have all the tasks that can begin directly from the current
        # state, let's figure out the INDIRECT ones
        taskTypes = firstTaskTypes.copy()  # have to have these
        for task1 in firstTaskTypes:  # each task type gathered so far
            endState = task1.getEndState()  # figure the end state
            if endState is None:  # does not change end state
                continue  # no use, do nothing
            for tc in allTaskClasses:  # what task can further begin from there?
1146
                if tc.canBeginFrom(endState) and (tc not in firstTaskTypes):
S
Shuduo Sang 已提交
1147
                    taskTypes.append(tc)  # gather it
1148 1149

        if len(taskTypes) <= 0:
S
Shuduo Sang 已提交
1150 1151 1152 1153 1154 1155 1156
            raise RuntimeError(
                "No suitable task types found for state: {}".format(
                    self._curState))
        logger.debug(
            "[OPS] Tasks found for state {}: {}".format(
                self._curState,
                typesToStrings(taskTypes)))
1157 1158 1159 1160
        return taskTypes

    def _findCurrentState(self):
        dbc = self._dbConn
S
Shuduo Sang 已提交
1161
        ts = time.time()  # we use this to debug how fast/slow it is to do the various queries to find the current DB state
1162 1163
        if not dbc.hasDatabases():  # no database?!
            logger.debug( "[STT] empty database found, between {} and {}".format(ts, time.time()))
1164
            return StateEmpty()
S
Shuduo Sang 已提交
1165 1166
        # did not do this when openning connection, and this is NOT the worker
        # thread, which does this on their own
1167 1168 1169
        dbc.use("db")
        if not dbc.hasTables():  # no tables
            logger.debug("[STT] DB_ONLY found, between {} and {}".format(ts, time.time()))
1170
            return StateDbOnly()
1171 1172 1173 1174

        sTable = DbManager.getFixedSuperTable()
        if sTable.hasRegTables(dbc):  # no regular tables
            logger.debug("[STT] SUPER_TABLE_ONLY found, between {} and {}".format(ts, time.time()))
1175
            return StateSuperTableOnly()
S
Shuduo Sang 已提交
1176
        else:  # has actual tables
1177
            logger.debug("[STT] HAS_DATA found, between {} and {}".format(ts, time.time()))
1178 1179 1180
            return StateHasData()

    def transition(self, tasks):
S
Shuduo Sang 已提交
1181
        if (len(tasks) == 0):  # before 1st step, or otherwise empty
1182
            logger.debug("[STT] Starting State: {}".format(self._curState))
S
Shuduo Sang 已提交
1183
            return  # do nothing
1184

S
Shuduo Sang 已提交
1185 1186
        # this should show up in the server log, separating steps
        self._dbConn.execute("show dnodes")
1187 1188 1189 1190

        # Generic Checks, first based on the start state
        if self._curState.canCreateDb():
            self._curState.assertIfExistThenSuccess(tasks, TaskCreateDb)
S
Shuduo Sang 已提交
1191 1192
            # self.assertAtMostOneSuccess(tasks, CreateDbTask) # not really, in
            # case of multiple creation and drops
1193 1194 1195

        if self._curState.canDropDb():
            self._curState.assertIfExistThenSuccess(tasks, TaskDropDb)
S
Shuduo Sang 已提交
1196 1197
            # self.assertAtMostOneSuccess(tasks, DropDbTask) # not really in
            # case of drop-create-drop
1198 1199 1200

        # if self._state.canCreateFixedTable():
            # self.assertIfExistThenSuccess(tasks, CreateFixedTableTask) # Not true, DB may be dropped
S
Shuduo Sang 已提交
1201 1202
            # self.assertAtMostOneSuccess(tasks, CreateFixedTableTask) # not
            # really, in case of create-drop-create
1203 1204 1205

        # if self._state.canDropFixedTable():
            # self.assertIfExistThenSuccess(tasks, DropFixedTableTask) # Not True, the whole DB may be dropped
S
Shuduo Sang 已提交
1206 1207
            # self.assertAtMostOneSuccess(tasks, DropFixedTableTask) # not
            # really in case of drop-create-drop
1208 1209

        # if self._state.canAddData():
S
Shuduo Sang 已提交
1210 1211
        # self.assertIfExistThenSuccess(tasks, AddFixedDataTask)  # not true
        # actually
1212 1213 1214 1215 1216 1217

        # if self._state.canReadData():
            # Nothing for sure

        newState = self._findCurrentState()
        logger.debug("[STT] New DB state determined: {}".format(newState))
S
Shuduo Sang 已提交
1218 1219
        # can old state move to new state through the tasks?
        self._curState.verifyTasksToState(tasks, newState)
1220 1221 1222
        self._curState = newState

    def pickTaskType(self):
S
Shuduo Sang 已提交
1223 1224
        # all the task types we can choose from at curent state
        taskTypes = self.getTaskTypes()
1225 1226 1227
        weights = []
        for tt in taskTypes:
            endState = tt.getEndState()
S
Shuduo Sang 已提交
1228 1229 1230
            if endState is not None:
                # TODO: change to a method
                weights.append(self._stateWeights[endState.getValIndex()])
1231
            else:
S
Shuduo Sang 已提交
1232 1233
                # read data task, default to 10: TODO: change to a constant
                weights.append(10)
1234
        i = self._weighted_choice_sub(weights)
S
Shuduo Sang 已提交
1235
        # logger.debug(" (weighted random:{}/{}) ".format(i, len(taskTypes)))
1236 1237
        return taskTypes[i]

S
Shuduo Sang 已提交
1238 1239 1240 1241 1242
    # ref:
    # https://eli.thegreenplace.net/2010/01/22/weighted-random-generation-in-python/
    def _weighted_choice_sub(self, weights):
        # TODO: use our dice to ensure it being determinstic?
        rnd = random.random() * sum(weights)
1243 1244 1245 1246
        for i, w in enumerate(weights):
            rnd -= w
            if rnd < 0:
                return i
1247

1248
# Manager of the Database Data/Connection
S
Shuduo Sang 已提交
1249 1250 1251 1252


class DbManager():
    def __init__(self, resetDb=True):
S
Steven Li 已提交
1253
        self.tableNumQueue = LinearQueue()
S
Shuduo Sang 已提交
1254 1255 1256
        # datetime.datetime(2019, 1, 1) # initial date time tick
        self._lastTick = self.setupLastTick()
        self._lastInt = 0  # next one is initial integer
1257
        self._lock = threading.RLock()
S
Shuduo Sang 已提交
1258

1259
        # self.openDbServerConnection()
S
Shuduo Sang 已提交
1260 1261
        self._dbConn = DbConn.createNative() if (
            gConfig.connector_type == 'native') else DbConn.createRest()
1262
        try:
S
Shuduo Sang 已提交
1263
            self._dbConn.open()  # may throw taos.error.ProgrammingError: disconnected
1264 1265
        except taos.error.ProgrammingError as err:
            # print("Error type: {}, msg: {}, value: {}".format(type(err), err.msg, err))
S
Shuduo Sang 已提交
1266 1267 1268
            if (err.msg == 'client disconnected'):  # cannot open DB connection
                print(
                    "Cannot establish DB connection, please re-run script without parameter, and follow the instructions.")
S
Steven Li 已提交
1269
                sys.exit(2)
1270
            else:
S
Shuduo Sang 已提交
1271 1272
                raise
        except BaseException:
S
Steven Li 已提交
1273
            print("[=] Unexpected exception")
S
Shuduo Sang 已提交
1274 1275 1276 1277
            raise

        if resetDb:
            self._dbConn.resetDb()  # drop and recreate DB
1278

S
Shuduo Sang 已提交
1279 1280
        # Do this after dbConn is in proper shape
        self._stateMachine = StateMechine(self._dbConn)
1281

1282 1283 1284
    def getDbConn(self):
        return self._dbConn

S
Shuduo Sang 已提交
1285
    def getStateMachine(self) -> StateMechine:
1286 1287 1288 1289
        return self._stateMachine

    # def getState(self):
    #     return self._stateMachine.getCurrentState()
1290 1291 1292 1293 1294 1295

    # We aim to create a starting time tick, such that, whenever we run our test here once
    # We should be able to safely create 100,000 records, which will not have any repeated time stamp
    # when we re-run the test in 3 minutes (180 seconds), basically we should expand time duration
    # by a factor of 500.
    # TODO: what if it goes beyond 10 years into the future
1296
    # TODO: fix the error as result of above: "tsdb timestamp is out of range"
1297
    def setupLastTick(self):
1298
        t1 = datetime.datetime(2020, 6, 1)
1299
        t2 = datetime.datetime.now()
S
Shuduo Sang 已提交
1300 1301 1302 1303
        # maybe a very large number, takes 69 years to exceed Python int range
        elSec = int(t2.timestamp() - t1.timestamp())
        elSec2 = (elSec % (8 * 12 * 30 * 24 * 60 * 60 / 500)) * \
            500  # a number representing seconds within 10 years
1304
        # print("elSec = {}".format(elSec))
S
Shuduo Sang 已提交
1305 1306 1307
        t3 = datetime.datetime(2012, 1, 1)  # default "keep" is 10 years
        t4 = datetime.datetime.fromtimestamp(
            t3.timestamp() + elSec2)  # see explanation above
1308 1309 1310
        logger.info("Setting up TICKS to start from: {}".format(t4))
        return t4

S
Shuduo Sang 已提交
1311
    def pickAndAllocateTable(self):  # pick any table, and "use" it
S
Steven Li 已提交
1312 1313
        return self.tableNumQueue.pickAndAllocate()

1314 1315 1316 1317 1318
    def addTable(self):
        with self._lock:
            tIndex = self.tableNumQueue.push()
        return tIndex

1319 1320
    @classmethod
    def getFixedSuperTableName(cls):
1321
        return "fs_table"
1322

1323 1324 1325 1326
    @classmethod
    def getFixedSuperTable(cls):
        return TdSuperTable(cls.getFixedSuperTableName())

S
Shuduo Sang 已提交
1327
    def releaseTable(self, i):  # return the table back, so others can use it
S
Steven Li 已提交
1328 1329
        self.tableNumQueue.release(i)

1330
    def getNextTick(self):
S
Shuduo Sang 已提交
1331
        with self._lock:  # prevent duplicate tick
1332 1333
            if Dice.throw(20) == 0:  # 1 in 20 chance
                return self._lastTick + datetime.timedelta(0, -100) # Go back in time 100 seconds
S
Shuduo Sang 已提交
1334 1335 1336
            else:  # regular
                # add one second to it
                self._lastTick += datetime.timedelta(0, 1)
S
Steven Li 已提交
1337
                return self._lastTick
1338 1339

    def getNextInt(self):
1340 1341 1342
        with self._lock:
            self._lastInt += 1
            return self._lastInt
1343 1344

    def getNextBinary(self):
S
Shuduo Sang 已提交
1345 1346
        return "Beijing_Shanghai_Los_Angeles_New_York_San_Francisco_Chicago_Beijing_Shanghai_Los_Angeles_New_York_San_Francisco_Chicago_{}".format(
            self.getNextInt())
1347 1348

    def getNextFloat(self):
1349 1350 1351
        ret = 0.9 + self.getNextInt()
        # print("Float obtained: {}".format(ret))
        return ret
S
Shuduo Sang 已提交
1352

S
Steven Li 已提交
1353
    def getTableNameToDelete(self):
S
Shuduo Sang 已提交
1354 1355
        tblNum = self.tableNumQueue.pop()  # TODO: race condition!
        if (not tblNum):  # maybe false
1356
            return False
S
Shuduo Sang 已提交
1357

S
Steven Li 已提交
1358 1359
        return "table_{}".format(tblNum)

1360
    def cleanUp(self):
S
Shuduo Sang 已提交
1361 1362
        self._dbConn.close()

1363

1364
class TaskExecutor():
1365
    class BoundedList:
S
Shuduo Sang 已提交
1366
        def __init__(self, size=10):
1367 1368 1369
            self._size = size
            self._list = []

S
Shuduo Sang 已提交
1370 1371
        def add(self, n: int):
            if not self._list:  # empty
1372 1373 1374 1375 1376 1377 1378
                self._list.append(n)
                return
            # now we should insert
            nItems = len(self._list)
            insPos = 0
            for i in range(nItems):
                insPos = i
S
Shuduo Sang 已提交
1379 1380 1381
                if n <= self._list[i]:  # smaller than this item, time to insert
                    break  # found the insertion point
                insPos += 1  # insert to the right
1382

S
Shuduo Sang 已提交
1383 1384
            if insPos == 0:  # except for the 1st item, # TODO: elimiate first item as gating item
                return  # do nothing
1385 1386

            # print("Inserting at postion {}, value: {}".format(insPos, n))
S
Shuduo Sang 已提交
1387 1388
            self._list.insert(insPos, n)  # insert

1389
            newLen = len(self._list)
S
Shuduo Sang 已提交
1390 1391 1392 1393 1394
            if newLen <= self._size:
                return  # do nothing
            elif newLen == (self._size + 1):
                del self._list[0]  # remove the first item
            else:
1395 1396 1397 1398 1399 1400 1401
                raise RuntimeError("Corrupt Bounded List")

        def __str__(self):
            return repr(self._list)

    _boundedList = BoundedList()

1402 1403 1404
    def __init__(self, curStep):
        self._curStep = curStep

1405 1406 1407 1408
    @classmethod
    def getBoundedList(cls):
        return cls._boundedList

1409 1410 1411
    def getCurStep(self):
        return self._curStep

S
Shuduo Sang 已提交
1412
    def execute(self, task: Task, wt: WorkerThread):  # execute a task on a thread
1413
        task.execute(wt)
1414

1415 1416 1417 1418
    def recordDataMark(self, n: int):
        # print("[{}]".format(n), end="", flush=True)
        self._boundedList.add(n)

1419 1420
    # def logInfo(self, msg):
    #     logger.info("    T[{}.x]: ".format(self._curStep) + msg)
1421

1422 1423
    # def logDebug(self, msg):
    #     logger.debug("    T[{}.x]: ".format(self._curStep) + msg)
1424

S
Shuduo Sang 已提交
1425

S
Steven Li 已提交
1426
class Task():
1427 1428 1429 1430
    taskSn = 100

    @classmethod
    def allocTaskNum(cls):
S
Shuduo Sang 已提交
1431
        Task.taskSn += 1  # IMPORTANT: cannot use cls.taskSn, since each sub class will have a copy
S
Steven Li 已提交
1432 1433
        # logger.debug("Allocating taskSN: {}".format(Task.taskSn))
        return Task.taskSn
1434

S
Shuduo Sang 已提交
1435
    def __init__(self, dbManager: DbManager, execStats: ExecutionStats):
1436
        self._dbManager = dbManager
S
Shuduo Sang 已提交
1437
        self._workerThread = None
1438
        self._err = None
1439
        self._aborted = False
1440
        self._curStep = None
S
Shuduo Sang 已提交
1441
        self._numRows = None  # Number of rows affected
1442

S
Shuduo Sang 已提交
1443
        # Assign an incremental task serial number
1444
        self._taskNum = self.allocTaskNum()
S
Steven Li 已提交
1445
        # logger.debug("Creating new task {}...".format(self._taskNum))
1446

1447 1448
        self._execStats = execStats

1449
    def isSuccess(self):
S
Shuduo Sang 已提交
1450
        return self._err is None
1451

1452 1453 1454
    def isAborted(self):
        return self._aborted

S
Shuduo Sang 已提交
1455
    def clone(self):  # TODO: why do we need this again?
1456
        newTask = self.__class__(self._dbManager, self._execStats)
1457 1458 1459
        return newTask

    def logDebug(self, msg):
S
Shuduo Sang 已提交
1460 1461 1462
        self._workerThread.logDebug(
            "Step[{}.{}] {}".format(
                self._curStep, self._taskNum, msg))
1463 1464

    def logInfo(self, msg):
S
Shuduo Sang 已提交
1465 1466 1467
        self._workerThread.logInfo(
            "Step[{}.{}] {}".format(
                self._curStep, self._taskNum, msg))
1468

1469
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
S
Shuduo Sang 已提交
1470 1471 1472
        raise RuntimeError(
            "To be implemeted by child classes, class name: {}".format(
                self.__class__.__name__))
1473

1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490
    def _isErrAcceptable(self, errno, msg):
        if errno in [
                0x05,  # TSDB_CODE_RPC_NOT_READY
                # 0x200, # invalid SQL, TODO: re-examine with TD-934
                0x360, 0x362, 
                0x369, # tag already exists
                0x36A, 0x36B, 0x36D,
                0x381, 
                0x380, # "db not selected"
                0x383,
                0x386,  # DB is being dropped?!
                0x503,
                0x510,  # vnode not in ready state
                0x600,
                1000  # REST catch-all error
            ]: 
            return True # These are the ALWAYS-ACCEPTABLE ones
1491 1492
        elif (errno in [ 0x0B ]) and gConfig.auto_start_service:
            return True # We may get "network unavilable" when restarting service
1493 1494 1495
        elif errno == 0x200 : # invalid SQL, we need to div in a bit more
            if msg.find("invalid column name") != -1:
                return True 
1496 1497 1498 1499
            elif msg.find("tags number not matched") != -1: # mismatched tags after modification
                return True
            elif msg.find("duplicated column names") != -1: # also alter table tag issues
                return True
1500 1501 1502 1503
        
        return False # Not an acceptable error


1504 1505
    def execute(self, wt: WorkerThread):
        wt.verifyThreadSelf()
S
Shuduo Sang 已提交
1506
        self._workerThread = wt  # type: ignore
1507 1508

        te = wt.getTaskExecutor()
1509
        self._curStep = te.getCurStep()
S
Shuduo Sang 已提交
1510 1511
        self.logDebug(
            "[-] executing task {}...".format(self.__class__.__name__))
1512 1513

        self._err = None
1514 1515
        self._execStats.beginTaskType(self.__class__.__name__)  # mark beginning
        errno2 = None
1516
        try:
S
Shuduo Sang 已提交
1517
            self._executeInternal(te, wt)  # TODO: no return value?
1518
        except taos.error.ProgrammingError as err:
S
Shuduo Sang 已提交
1519 1520
            errno2 = err.errno if (
                err.errno > 0) else 0x80000000 + err.errno  # correct error scheme
1521
            if (gConfig.continue_on_exception):  # user choose to continue
1522
                self.logDebug("[=] Continue after TAOS exception: errno=0x{:X}, msg: {}, SQL: {}".format(
1523
                        errno2, err, wt.getDbConn().getLastSql()))
1524
                self._err = err
1525 1526
            elif self._isErrAcceptable(errno2, err.__str__()):
                self.logDebug("[=] Acceptable Taos library exception: errno=0x{:X}, msg: {}, SQL: {}".format(
1527
                        errno2, err, wt.getDbConn().getLastSql()))
1528
                print("_", end="", flush=True)
S
Shuduo Sang 已提交
1529
                self._err = err
1530
            else: # not an acceptable error
1531 1532 1533
                errMsg = "[=] Unexpected Taos library exception ({}): errno=0x{:X}, msg: {}, SQL: {}".format(
                    self.__class__.__name__,
                    errno2, err, wt.getDbConn().getLastSql())
1534
                self.logDebug(errMsg)
S
Shuduo Sang 已提交
1535
                if gConfig.debug:
1536 1537
                    # raise # so that we see full stack
                    traceback.print_exc()
1538 1539
                print(
                    "\n\n----------------------------\nProgram ABORTED Due to Unexpected TAOS Error: \n\n{}\n".format(errMsg) +
1540 1541 1542 1543
                    "----------------------------\n")
                # sys.exit(-1)
                self._err = err
                self._aborted = True
S
Shuduo Sang 已提交
1544
        except Exception as e:
S
Steven Li 已提交
1545
            self.logInfo("Non-TAOS exception encountered")
S
Shuduo Sang 已提交
1546
            self._err = e
S
Steven Li 已提交
1547
            self._aborted = True
1548
            traceback.print_exc()
1549
        except BaseException as e:
1550
            self.logInfo("Python base exception encountered")
1551
            self._err = e
1552
            self._aborted = True
S
Steven Li 已提交
1553
            traceback.print_exc()
S
Shuduo Sang 已提交
1554 1555 1556
        except BaseException:
            self.logDebug(
                "[=] Unexpected exception, SQL: {}".format(
1557
                    wt.getDbConn().getLastSql()))
1558
            raise
1559
        self._execStats.endTaskType(self.__class__.__name__, self.isSuccess())
S
Shuduo Sang 已提交
1560 1561 1562 1563

        self.logDebug("[X] task execution completed, {}, status: {}".format(
            self.__class__.__name__, "Success" if self.isSuccess() else "Failure"))
        # TODO: merge with above.
1564
        self._execStats.incExecCount(self.__class__.__name__, self.isSuccess(), errno2)
S
Steven Li 已提交
1565

1566
    def execSql(self, sql):
1567
        return self._dbManager.execute(sql)
1568

S
Shuduo Sang 已提交
1569
    def execWtSql(self, wt: WorkerThread, sql):  # execute an SQL on the worker thread
1570 1571
        return wt.execSql(sql)

S
Shuduo Sang 已提交
1572
    def queryWtSql(self, wt: WorkerThread, sql):  # execute an SQL on the worker thread
1573 1574
        return wt.querySql(sql)

S
Shuduo Sang 已提交
1575
    def getQueryResult(self, wt: WorkerThread):  # execute an SQL on the worker thread
1576 1577 1578
        return wt.getQueryResult()


1579
class ExecutionStats:
1580
    def __init__(self):
S
Shuduo Sang 已提交
1581 1582
        # total/success times for a task
        self._execTimes: Dict[str, [int, int]] = {}
1583 1584 1585
        self._tasksInProgress = 0
        self._lock = threading.Lock()
        self._firstTaskStartTime = None
1586
        self._execStartTime = None
1587
        self._errors = {}
S
Shuduo Sang 已提交
1588 1589
        self._elapsedTime = 0.0  # total elapsed time
        self._accRunTime = 0.0  # accumulated run time
1590

1591 1592 1593
        self._failed = False
        self._failureReason = None

S
Steven Li 已提交
1594
    def __str__(self):
S
Shuduo Sang 已提交
1595 1596
        return "[ExecStats: _failed={}, _failureReason={}".format(
            self._failed, self._failureReason)
S
Steven Li 已提交
1597 1598

    def isFailed(self):
S
Shuduo Sang 已提交
1599
        return self._failed
S
Steven Li 已提交
1600

1601 1602 1603 1604 1605 1606
    def startExec(self):
        self._execStartTime = time.time()

    def endExec(self):
        self._elapsedTime = time.time() - self._execStartTime

1607
    def incExecCount(self, klassName, isSuccess, eno=None):  # TODO: add a lock here
1608 1609
        if klassName not in self._execTimes:
            self._execTimes[klassName] = [0, 0]
S
Shuduo Sang 已提交
1610 1611
        t = self._execTimes[klassName]  # tuple for the data
        t[0] += 1  # index 0 has the "total" execution times
1612
        if isSuccess:
S
Shuduo Sang 已提交
1613
            t[1] += 1  # index 1 has the "success" execution times
1614 1615 1616 1617 1618
        if eno != None:             
            if klassName not in self._errors:
                self._errors[klassName] = {}
            errors = self._errors[klassName]
            errors[eno] = errors[eno]+1 if eno in errors else 1
1619 1620 1621

    def beginTaskType(self, klassName):
        with self._lock:
S
Shuduo Sang 已提交
1622 1623
            if self._tasksInProgress == 0:  # starting a new round
                self._firstTaskStartTime = time.time()  # I am now the first task
1624 1625 1626 1627 1628
            self._tasksInProgress += 1

    def endTaskType(self, klassName, isSuccess):
        with self._lock:
            self._tasksInProgress -= 1
S
Shuduo Sang 已提交
1629
            if self._tasksInProgress == 0:  # all tasks have stopped
1630 1631 1632
                self._accRunTime += (time.time() - self._firstTaskStartTime)
                self._firstTaskStartTime = None

1633 1634 1635 1636
    def registerFailure(self, reason):
        self._failed = True
        self._failureReason = reason

1637
    def printStats(self):
S
Shuduo Sang 已提交
1638 1639 1640 1641 1642 1643
        logger.info(
            "----------------------------------------------------------------------")
        logger.info(
            "| Crash_Gen test {}, with the following stats:". format(
                "FAILED (reason: {})".format(
                    self._failureReason) if self._failed else "SUCCEEDED"))
1644
        logger.info("| Task Execution Times (success/total):")
1645
        execTimesAny = 0
S
Shuduo Sang 已提交
1646
        for k, n in self._execTimes.items():
1647
            execTimesAny += n[0]
1648 1649 1650 1651 1652 1653 1654 1655
            errStr = None
            if k in self._errors:
                errors = self._errors[k]
                # print("errors = {}".format(errors))
                errStrs = ["0x{:X}:{}".format(eno, n) for (eno, n) in errors.items()]
                # print("error strings = {}".format(errStrs))
                errStr = ", ".join(errStrs) 
            logger.info("|    {0:<24}: {1}/{2} (Errors: {3})".format(k, n[1], n[0], errStr))
S
Shuduo Sang 已提交
1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674

        logger.info(
            "| Total Tasks Executed (success or not): {} ".format(execTimesAny))
        logger.info(
            "| Total Tasks In Progress at End: {}".format(
                self._tasksInProgress))
        logger.info(
            "| Total Task Busy Time (elapsed time when any task is in progress): {:.3f} seconds".format(
                self._accRunTime))
        logger.info(
            "| Average Per-Task Execution Time: {:.3f} seconds".format(self._accRunTime / execTimesAny))
        logger.info(
            "| Total Elapsed Time (from wall clock): {:.3f} seconds".format(
                self._elapsedTime))
        logger.info(
            "| Top numbers written: {}".format(
                TaskExecutor.getBoundedList()))
        logger.info(
            "----------------------------------------------------------------------")
1675 1676 1677


class StateTransitionTask(Task):
1678 1679 1680 1681 1682
    LARGE_NUMBER_OF_TABLES = 35
    SMALL_NUMBER_OF_TABLES = 3
    LARGE_NUMBER_OF_RECORDS = 50
    SMALL_NUMBER_OF_RECORDS = 3

1683
    @classmethod
S
Shuduo Sang 已提交
1684
    def getInfo(cls):  # each sub class should supply their own information
1685 1686
        raise RuntimeError("Overriding method expected")

S
Shuduo Sang 已提交
1687
    _endState = None
1688
    @classmethod
S
Shuduo Sang 已提交
1689
    def getEndState(cls):  # TODO: optimize by calling it fewer times
1690 1691
        raise RuntimeError("Overriding method expected")

1692 1693 1694
    # @classmethod
    # def getBeginStates(cls):
    #     return cls.getInfo()[0]
1695

1696 1697 1698
    # @classmethod
    # def getEndState(cls): # returning the class name
    #     return cls.getInfo()[0]
1699 1700

    @classmethod
1701 1702 1703
    def canBeginFrom(cls, state: AnyState):
        # return state.getValue() in cls.getBeginStates()
        raise RuntimeError("must be overriden")
1704

1705 1706
    @classmethod
    def getRegTableName(cls, i):
1707
        return "reg_table_{}".format(i)
1708

1709 1710
    def execute(self, wt: WorkerThread):
        super().execute(wt)
S
Shuduo Sang 已提交
1711 1712


1713
class TaskCreateDb(StateTransitionTask):
1714
    @classmethod
1715
    def getEndState(cls):
S
Shuduo Sang 已提交
1716
        return StateDbOnly()
1717

1718 1719 1720 1721
    @classmethod
    def canBeginFrom(cls, state: AnyState):
        return state.canCreateDb()

1722
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
S
Shuduo Sang 已提交
1723 1724
        self.execWtSql(wt, "create database db")

1725

1726
class TaskDropDb(StateTransitionTask):
1727
    @classmethod
1728 1729
    def getEndState(cls):
        return StateEmpty()
1730

1731 1732 1733 1734
    @classmethod
    def canBeginFrom(cls, state: AnyState):
        return state.canDropDb()

1735
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
1736
        self.execWtSql(wt, "drop database db")
S
Steven Li 已提交
1737
        logger.debug("[OPS] database dropped at {}".format(time.time()))
1738

S
Shuduo Sang 已提交
1739

1740
class TaskCreateSuperTable(StateTransitionTask):
1741
    @classmethod
1742 1743
    def getEndState(cls):
        return StateSuperTableOnly()
1744

1745 1746
    @classmethod
    def canBeginFrom(cls, state: AnyState):
1747
        return state.canCreateFixedSuperTable()
1748

1749
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
S
Shuduo Sang 已提交
1750
        if not wt.dbInUse():  # no DB yet, to the best of our knowledge
1751 1752 1753
            logger.debug("Skipping task, no DB yet")
            return

1754
        sTable = self._dbManager.getFixedSuperTable()
1755
        # wt.execSql("use db")    # should always be in place
1756 1757
        sTable.create(wt.getDbConn(), {'ts':'timestamp', 'speed':'int'}, {'b':'binary(200)', 'f':'float'})
        # self.execWtSql(wt,"create table db.{} (ts timestamp, speed int) tags (b binary(200), f float) ".format(tblName))
S
Shuduo Sang 已提交
1758 1759
        # No need to create the regular tables, INSERT will do that
        # automatically
1760

S
Steven Li 已提交
1761

1762 1763 1764 1765
class TdSuperTable:
    def __init__(self, stName):
        self._stName = stName

1766 1767 1768 1769 1770 1771 1772 1773
    def create(self, dbc, cols: dict, tags: dict):
        sql = "CREATE TABLE db.{} ({}) TAGS ({})".format(
            self._stName,
            ",".join(['%s %s'%(k,v) for (k,v) in cols.items()]),
            ",".join(['%s %s'%(k,v) for (k,v) in tags.items()])
            )
        dbc.execute(sql)        

1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791
    def getRegTables(self, dbc: DbConn):
        try:
            dbc.query("select TBNAME from db.{}".format(self._stName))  # TODO: analyze result set later            
        except taos.error.ProgrammingError as err:                    
            errno2 = err.errno if (err.errno > 0) else 0x80000000 + err.errno
            logger.debug("[=] Failed to get tables from super table: errno=0x{:X}, msg: {}".format(errno2, err))
            raise

        qr = dbc.getQueryResult()
        return [v[0] for v in qr] # list transformation, ref: https://stackoverflow.com/questions/643823/python-list-transformation

    def hasRegTables(self, dbc: DbConn):
        return dbc.query("SELECT * FROM db.{}".format(self._stName)) > 0

    def ensureTable(self, dbc: DbConn, regTableName: str):
        sql = "select tbname from {} where tbname in ('{}')".format(self._stName, regTableName)
        if dbc.query(sql) >= 1 : # reg table exists already
            return
1792 1793
        sql = "CREATE TABLE {} USING {} tags ({})".format(
            regTableName, self._stName, self._getTagStrForSql(dbc)
1794 1795 1796
        )
        dbc.execute(sql)

1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841
    def _getTagStrForSql(self, dbc) :
        tags = self._getTags(dbc)
        tagStrs = []
        for tagName in tags: 
            tagType = tags[tagName]
            if tagType == 'BINARY':
                tagStrs.append("'Beijing-Shanghai-LosAngeles'")
            elif tagType == 'FLOAT':
                tagStrs.append('9.9')
            elif tagType == 'INT':
                tagStrs.append('88')
            else:
                raise RuntimeError("Unexpected tag type: {}".format(tagType))
        return ", ".join(tagStrs)

    def _getTags(self, dbc) -> dict:
        dbc.query("DESCRIBE {}".format(self._stName))
        stCols = dbc.getQueryResult()
        # print(stCols)
        ret = {row[0]:row[1] for row in stCols if row[3]=='TAG'} # name:type
        # print("Tags retrieved: {}".format(ret))
        return ret

    def addTag(self, dbc, tagName, tagType):
        if tagName in self._getTags(dbc): # already 
            return
        # sTable.addTag("extraTag", "int")
        sql = "alter table db.{} add tag {} {}".format(self._stName, tagName, tagType)
        dbc.execute(sql)

    def dropTag(self, dbc, tagName):
        if not tagName in self._getTags(dbc): # don't have this tag
            return
        sql = "alter table db.{} drop tag {}".format(self._stName, tagName)
        dbc.execute(sql)

    def changeTag(self, dbc, oldTag, newTag):
        tags = self._getTags(dbc)
        if not oldTag in tags: # don't have this tag
            return
        if newTag in tags: # already have this tag
            return
        sql = "alter table db.{} change tag {} {}".format(self._stName, oldTag, newTag)
        dbc.execute(sql)

1842
class TaskReadData(StateTransitionTask):
1843
    @classmethod
1844
    def getEndState(cls):
S
Shuduo Sang 已提交
1845
        return None  # meaning doesn't affect state
1846

1847 1848 1849 1850
    @classmethod
    def canBeginFrom(cls, state: AnyState):
        return state.canReadData()

1851
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
1852
        sTable = self._dbManager.getFixedSuperTable()
1853

S
Shuduo Sang 已提交
1854 1855
        if random.randrange(
                5) == 0:  # 1 in 5 chance, simulate a broken connection. TODO: break connection in all situations
1856 1857
            wt.getDbConn().close()
            wt.getDbConn().open()
1858
        
1859
        for rTbName in sTable.getRegTables(wt.getDbConn()):  # regular tables
1860 1861 1862 1863 1864 1865 1866 1867
            aggExpr = Dice.choice(['*', 'count(*)', 'avg(speed)', 
                # 'twa(speed)', # TODO: this one REQUIRES a where statement, not reasonable
                'sum(speed)', 'stddev(speed)', 
                'min(speed)', 'max(speed)', 'first(speed)', 'last(speed)']) # TODO: add more from 'top'
            try:
                self.execWtSql(wt, "select {} from db.{}".format(aggExpr, rTbName))
            except taos.error.ProgrammingError as err:                    
                errno2 = err.errno if (err.errno > 0) else 0x80000000 + err.errno
1868
                logger.debug("[=] Read Failure: errno=0x{:X}, msg: {}, SQL: {}".format(errno2, err, wt.getDbConn().getLastSql()))
1869
                raise
S
Shuduo Sang 已提交
1870

1871
class TaskDropSuperTable(StateTransitionTask):
1872
    @classmethod
1873
    def getEndState(cls):
S
Shuduo Sang 已提交
1874
        return StateDbOnly()
1875

1876 1877
    @classmethod
    def canBeginFrom(cls, state: AnyState):
1878
        return state.canDropFixedSuperTable()
1879

1880
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
S
Shuduo Sang 已提交
1881 1882 1883 1884 1885 1886 1887
        # 1/2 chance, we'll drop the regular tables one by one, in a randomized
        # sequence
        if Dice.throw(2) == 0:
            tblSeq = list(range(
                2 + (self.LARGE_NUMBER_OF_TABLES if gConfig.larger_data else self.SMALL_NUMBER_OF_TABLES)))
            random.shuffle(tblSeq)
            tickOutput = False  # if we have spitted out a "d" character for "drop regular table"
1888
            isSuccess = True
S
Shuduo Sang 已提交
1889 1890 1891
            for i in tblSeq:
                regTableName = self.getRegTableName(
                    i)  # "db.reg_table_{}".format(i)
1892
                try:
S
Shuduo Sang 已提交
1893 1894 1895 1896 1897 1898 1899
                    self.execWtSql(wt, "drop table {}".format(
                        regTableName))  # nRows always 0, like MySQL
                except taos.error.ProgrammingError as err:
                    # correcting for strange error number scheme
                    errno2 = err.errno if (
                        err.errno > 0) else 0x80000000 + err.errno
                    if (errno2 in [0x362]):  # mnode invalid table name
1900
                        isSuccess = False
S
Shuduo Sang 已提交
1901 1902 1903
                        logger.debug(
                            "[DB] Acceptable error when dropping a table")
                    continue  # try to delete next regular table
1904 1905

                if (not tickOutput):
S
Shuduo Sang 已提交
1906 1907
                    tickOutput = True  # Print only one time
                    if isSuccess:
1908 1909
                        print("d", end="", flush=True)
                    else:
S
Shuduo Sang 已提交
1910
                        print("f", end="", flush=True)
1911 1912

        # Drop the super table itself
S
Shuduo Sang 已提交
1913
        tblName = self._dbManager.getFixedSuperTableName()
1914
        self.execWtSql(wt, "drop table db.{}".format(tblName))
1915

S
Shuduo Sang 已提交
1916

1917 1918 1919
class TaskAlterTags(StateTransitionTask):
    @classmethod
    def getEndState(cls):
S
Shuduo Sang 已提交
1920
        return None  # meaning doesn't affect state
1921 1922 1923

    @classmethod
    def canBeginFrom(cls, state: AnyState):
S
Shuduo Sang 已提交
1924
        return state.canDropFixedSuperTable()  # if we can drop it, we can alter tags
1925 1926

    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
1927 1928 1929
        # tblName = self._dbManager.getFixedSuperTableName()
        dbc = wt.getDbConn()
        sTable = self._dbManager.getFixedSuperTable()
1930
        dice = Dice.throw(4)
S
Shuduo Sang 已提交
1931
        if dice == 0:
1932 1933
            sTable.addTag(dbc, "extraTag", "int")
            # sql = "alter table db.{} add tag extraTag int".format(tblName)
S
Shuduo Sang 已提交
1934
        elif dice == 1:
1935 1936
            sTable.dropTag(dbc, "extraTag")
            # sql = "alter table db.{} drop tag extraTag".format(tblName)
S
Shuduo Sang 已提交
1937
        elif dice == 2:
1938 1939
            sTable.dropTag(dbc, "newTag")
            # sql = "alter table db.{} drop tag newTag".format(tblName)
S
Shuduo Sang 已提交
1940
        else:  # dice == 3
1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961
            sTable.changeTag(dbc, "extraTag", "newTag")
            # sql = "alter table db.{} change tag extraTag newTag".format(tblName)

class TaskRestartService(StateTransitionTask):
    _isRunning = False
    _classLock = threading.Lock()

    @classmethod
    def getEndState(cls):
        return None  # meaning doesn't affect state

    @classmethod
    def canBeginFrom(cls, state: AnyState):
        if gConfig.auto_start_service:
            return state.canDropFixedSuperTable()  # Basicallly when we have the super table
        return False # don't run this otherwise

    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
        if not gConfig.auto_start_service: # only execute when we are in -a mode
            print("_a", end="", flush=True)
            return
1962

1963 1964 1965 1966 1967 1968 1969 1970 1971 1972
        with self._classLock:
            if self._isRunning:
                print("Skipping restart task, another running already")
                return
            self._isRunning = True

        if Dice.throw(50) == 0: # 1 in N chance
            dbc = wt.getDbConn()
            dbc.execute("show databases") # simple delay, align timing with other workers
            gSvcMgr.restart()
1973

1974
        self._isRunning = False
S
Shuduo Sang 已提交
1975

1976
class TaskAddData(StateTransitionTask):
S
Shuduo Sang 已提交
1977 1978
    # Track which table is being actively worked on
    activeTable: Set[int] = set()
1979

S
Shuduo Sang 已提交
1980 1981
    # We use these two files to record operations to DB, useful for power-off
    # tests
1982 1983 1984 1985 1986
    fAddLogReady = None
    fAddLogDone = None

    @classmethod
    def prepToRecordOps(cls):
S
Shuduo Sang 已提交
1987 1988 1989 1990
        if gConfig.record_ops:
            if (cls.fAddLogReady is None):
                logger.info(
                    "Recording in a file operations to be performed...")
1991
                cls.fAddLogReady = open("add_log_ready.txt", "w")
S
Shuduo Sang 已提交
1992
            if (cls.fAddLogDone is None):
1993 1994
                logger.info("Recording in a file operations completed...")
                cls.fAddLogDone = open("add_log_done.txt", "w")
1995

1996
    @classmethod
1997 1998
    def getEndState(cls):
        return StateHasData()
1999 2000 2001 2002

    @classmethod
    def canBeginFrom(cls, state: AnyState):
        return state.canAddData()
S
Shuduo Sang 已提交
2003

2004
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
2005
        ds = self._dbManager # Quite DANGEROUS here, may result in multi-thread client access
2006
        tblSeq = list(range(
S
Shuduo Sang 已提交
2007 2008 2009 2010
                self.LARGE_NUMBER_OF_TABLES if gConfig.larger_data else self.SMALL_NUMBER_OF_TABLES))
        random.shuffle(tblSeq)
        for i in tblSeq:
            if (i in self.activeTable):  # wow already active
2011
                print("x", end="", flush=True) # concurrent insertion
2012
            else:
S
Shuduo Sang 已提交
2013
                self.activeTable.add(i)  # marking it active
2014 2015 2016
            
            sTable = ds.getFixedSuperTable()
            regTableName = self.getRegTableName(i)  # "db.reg_table_{}".format(i)
2017
            sTable.ensureTable(wt.getDbConn(), regTableName)  # Ensure the table exists           
2018 2019
           
            for j in range(self.LARGE_NUMBER_OF_RECORDS if gConfig.larger_data else self.SMALL_NUMBER_OF_RECORDS):  # number of records per table
S
Shuduo Sang 已提交
2020
                nextInt = ds.getNextInt()
2021 2022
                if gConfig.record_ops:
                    self.prepToRecordOps()
2023
                    self.fAddLogReady.write("Ready to write {} to {}\n".format(nextInt, regTableName))
2024 2025
                    self.fAddLogReady.flush()
                    os.fsync(self.fAddLogReady)
2026
                sql = "insert into {} values ('{}', {});".format( # removed: tags ('{}', {})
S
Shuduo Sang 已提交
2027
                    regTableName,
2028 2029
                    # ds.getFixedSuperTableName(),
                    # ds.getNextBinary(), ds.getNextFloat(),
2030
                    ds.getNextTick(), nextInt)
S
Shuduo Sang 已提交
2031 2032 2033
                self.execWtSql(wt, sql)
                # Successfully wrote the data into the DB, let's record it
                # somehow
2034
                te.recordDataMark(nextInt)
2035
                if gConfig.record_ops:
S
Shuduo Sang 已提交
2036 2037 2038
                    self.fAddLogDone.write(
                        "Wrote {} to {}\n".format(
                            nextInt, regTableName))
2039 2040
                    self.fAddLogDone.flush()
                    os.fsync(self.fAddLogDone)
S
Shuduo Sang 已提交
2041
            self.activeTable.discard(i)  # not raising an error, unlike remove
2042 2043


S
Steven Li 已提交
2044 2045
# Deterministic random number generator
class Dice():
S
Shuduo Sang 已提交
2046
    seeded = False  # static, uninitialized
S
Steven Li 已提交
2047 2048

    @classmethod
S
Shuduo Sang 已提交
2049
    def seed(cls, s):  # static
S
Steven Li 已提交
2050
        if (cls.seeded):
S
Shuduo Sang 已提交
2051 2052
            raise RuntimeError(
                "Cannot seed the random generator more than once")
S
Steven Li 已提交
2053 2054 2055 2056 2057
        cls.verifyRNG()
        random.seed(s)
        cls.seeded = True  # TODO: protect against multi-threading

    @classmethod
S
Shuduo Sang 已提交
2058
    def verifyRNG(cls):  # Verify that the RNG is determinstic
S
Steven Li 已提交
2059 2060 2061 2062
        random.seed(0)
        x1 = random.randrange(0, 1000)
        x2 = random.randrange(0, 1000)
        x3 = random.randrange(0, 1000)
S
Shuduo Sang 已提交
2063
        if (x1 != 864 or x2 != 394 or x3 != 776):
S
Steven Li 已提交
2064 2065 2066
            raise RuntimeError("System RNG is not deterministic")

    @classmethod
S
Shuduo Sang 已提交
2067
    def throw(cls, stop):  # get 0 to stop-1
2068
        return cls.throwRange(0, stop)
S
Steven Li 已提交
2069 2070

    @classmethod
S
Shuduo Sang 已提交
2071 2072
    def throwRange(cls, start, stop):  # up to stop-1
        if (not cls.seeded):
S
Steven Li 已提交
2073
            raise RuntimeError("Cannot throw dice before seeding it")
2074
        return random.randrange(start, stop)
S
Steven Li 已提交
2075

2076 2077 2078 2079
    @classmethod
    def choice(cls, cList):
        return random.choice(cList)

S
Steven Li 已提交
2080

S
Steven Li 已提交
2081 2082
class LoggingFilter(logging.Filter):
    def filter(self, record: logging.LogRecord):
S
Shuduo Sang 已提交
2083 2084
        if (record.levelno >= logging.INFO):
            return True  # info or above always log
S
Steven Li 已提交
2085

S
Steven Li 已提交
2086 2087 2088 2089
        # Commenting out below to adjust...

        # if msg.startswith("[TRD]"):
        #     return False
S
Steven Li 已提交
2090 2091
        return True

S
Shuduo Sang 已提交
2092 2093

class MyLoggingAdapter(logging.LoggerAdapter):
2094 2095 2096 2097
    def process(self, msg, kwargs):
        return "[{}]{}".format(threading.get_ident() % 10000, msg), kwargs
        # return '[%s] %s' % (self.extra['connid'], msg), kwargs

S
Shuduo Sang 已提交
2098 2099

class SvcManager:
2100
    def __init__(self):
2101
        print("Starting TDengine Service Manager")
2102 2103 2104
        # signal.signal(signal.SIGTERM, self.sigIntHandler) # Moved to MainExec
        # signal.signal(signal.SIGINT, self.sigIntHandler)
        # signal.signal(signal.SIGUSR1, self.sigUsrHandler)  # different handler!
2105

2106
        self.inSigHandler = False
2107 2108
        # self._status = MainExec.STATUS_RUNNING # set inside
        # _startTaosService()
2109
        self.svcMgrThread = None
2110 2111
        self._lock = threading.Lock()
        self._isRestarting = False
2112

2113 2114 2115 2116 2117 2118 2119 2120
    def _doMenu(self):
        choice = ""
        while True:
            print("\nInterrupting Service Program, Choose an Action: ")
            print("1: Resume")
            print("2: Terminate")
            print("3: Restart")
            # Remember to update the if range below
S
Shuduo Sang 已提交
2121
            # print("Enter Choice: ", end="", flush=True)
2122 2123 2124
            while choice == "":
                choice = input("Enter Choice: ")
                if choice != "":
S
Shuduo Sang 已提交
2125 2126 2127
                    break  # done with reading repeated input
            if choice in ["1", "2", "3"]:
                break  # we are done with whole method
2128
            print("Invalid choice, please try again.")
S
Shuduo Sang 已提交
2129
            choice = ""  # reset
2130 2131
        return choice

S
Shuduo Sang 已提交
2132
    def sigUsrHandler(self, signalNumber, frame):
2133
        print("Interrupting main thread execution upon SIGUSR1")
2134
        if self.inSigHandler:  # already
2135
            print("Ignoring repeated SIG...")
S
Shuduo Sang 已提交
2136
            return  # do nothing if it's already not running
2137
        self.inSigHandler = True
2138 2139

        choice = self._doMenu()
S
Shuduo Sang 已提交
2140 2141 2142 2143 2144
        if choice == "1":
            # TODO: can the sub-process be blocked due to us not reading from
            # queue?
            self.sigHandlerResume()
        elif choice == "2":
2145
            self.stopTaosService()
2146 2147
        elif choice == "3": # Restart
            self.restart()
2148 2149
        else:
            raise RuntimeError("Invalid menu choice: {}".format(choice))
2150

2151 2152
        self.inSigHandler = False

2153
    def sigIntHandler(self, signalNumber, frame):
2154
        print("SvcManager: INT Signal Handler starting...")
2155
        if self.inSigHandler:
2156 2157
            print("Ignoring repeated SIG_INT...")
            return
2158
        self.inSigHandler = True
2159

S
Shuduo Sang 已提交
2160
        self.stopTaosService()
2161
        print("SvcManager: INT Signal Handler returning...")
2162
        self.inSigHandler = False
2163

S
Shuduo Sang 已提交
2164
    def sigHandlerResume(self):
2165
        print("Resuming TDengine service manager thread (main thread)...\n\n")
2166

2167
    def _checkServiceManagerThread(self):
2168 2169 2170 2171
        if self.svcMgrThread:  # valid svc mgr thread
            if self.svcMgrThread.isStopped():  # done?
                self.svcMgrThread.procIpcBatch()  # one last time. TODO: appropriate?
                self.svcMgrThread = None  # no more
2172 2173

    def _procIpcAll(self):
2174 2175 2176 2177 2178 2179
        while self.isRunning() or self.isRestarting() :  # for as long as the svc mgr thread is still here
            if self.isRunning():
                self.svcMgrThread.procIpcBatch()  # regular processing,
                self._checkServiceManagerThread()
            elif self.isRetarting():
                print("Service restarting...")
2180 2181 2182 2183 2184
            time.sleep(0.5)  # pause, before next round
        print(
            "Service Manager Thread (with subprocess) has ended, main thread now exiting...")

    def startTaosService(self):
2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203
        with self._lock:
            if self.svcMgrThread:
                raise RuntimeError("Cannot start TAOS service when one may already be running")

            # Find if there's already a taosd service, and then kill it
            for proc in psutil.process_iter():
                if proc.name() == 'taosd':
                    print("Killing an existing TAOSD process in 2 seconds... press CTRL-C to interrupe")
                    time.sleep(2.0)
                    proc.kill()
                # print("Process: {}".format(proc.name()))
            
            self.svcMgrThread = ServiceManagerThread()  # create the object
            self.svcMgrThread.start()
            print("TAOS service started, printing out output...")
            self.svcMgrThread.procIpcBatch(
                trimToTarget=10,
                forceOutput=True)  # for printing 10 lines
            print("TAOS service started")
2204 2205

    def stopTaosService(self, outputLines=20):
2206 2207 2208 2209
        with self._lock:
            if not self.isRunning():
                logger.warning("Cannot stop TAOS service, not running")
                return
2210

2211 2212 2213 2214 2215 2216 2217 2218 2219
            print("Terminating Service Manager Thread (SMT) execution...")
            self.svcMgrThread.stop()
            if self.svcMgrThread.isStopped():
                self.svcMgrThread.procIpcBatch(outputLines)  # one last time
                self.svcMgrThread = None
                print("----- End of TDengine Service Output -----\n")
                print("SMT execution terminated")
            else:
                print("WARNING: SMT did not terminate as expected")
2220 2221 2222

    def run(self):
        self.startTaosService()
2223
        self._procIpcAll()  # pump/process all the messages, may encounter SIG + restart
2224
        if self.isRunning():  # if sig handler hasn't destroyed it by now
2225 2226
            self.stopTaosService()  # should have started already

2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240
    def restart(self):
        if self._isRestarting:
            logger.warning("Cannot restart service when it's already restarting")
            return

        self._isRestarting = True
        if self.isRunning():
            self.stopTaosService()
        else:
            logger.warning("Service not running when restart requested")

        self.startTaosService()
        self._isRestarting = False

2241 2242
    def isRunning(self):
        return self.svcMgrThread != None
2243

2244 2245 2246
    def isRestarting(self):
        return self._isRestarting

2247 2248 2249 2250 2251
class ServiceManagerThread:
    MAX_QUEUE_SIZE = 10000

    def __init__(self):
        self._tdeSubProcess = None
2252
        self._thread = None
2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269
        self._status = None

    def getStatus(self):
        return self._status

    def isRunning(self):
        # return self._thread and self._thread.is_alive()
        return self._status == MainExec.STATUS_RUNNING

    def isStopping(self):
        return self._status == MainExec.STATUS_STOPPING

    def isStopped(self):
        return self._status == MainExec.STATUS_STOPPED

    # Start the thread (with sub process), and wait for the sub service
    # to become fully operational
2270 2271
    def start(self):
        if self._thread:
2272
            raise RuntimeError("Unexpected _thread")
2273
        if self._tdeSubProcess:
2274 2275 2276 2277
            raise RuntimeError("TDengine sub process already created/running")

        self._status = MainExec.STATUS_STARTING

2278
        self._tdeSubProcess = TdeSubProcess()
2279 2280 2281 2282
        self._tdeSubProcess.start()

        self._ipcQueue = Queue()
        self._thread = threading.Thread(
2283
            target=self.svcOutputReader,
2284
            args=(self._tdeSubProcess.getStdOut(), self._ipcQueue))
2285
        self._thread.daemon = True  # thread dies with the program
2286 2287 2288
        self._thread.start()

        # wait for service to start
2289
        for i in range(0, 10):
2290 2291 2292
            time.sleep(1.0)
            # self.procIpcBatch() # don't pump message during start up
            print("_zz_", end="", flush=True)
2293
            if self._status == MainExec.STATUS_RUNNING:
2294
                logger.info("[] TDengine service READY to process requests")
2295 2296
                return  # now we've started
        # TODO: handle this better?
2297
        self.procIpcBatch(20, True) # display output before cronking out, trim to last 20 msgs, force output
2298
        raise RuntimeError("TDengine service did not start successfully")
2299 2300 2301 2302 2303 2304

    def stop(self):
        # can be called from both main thread or signal handler
        print("Terminating TDengine service running as the sub process...")
        if self.isStopped():
            print("Service already stopped")
2305
            return
2306 2307 2308
        if self.isStopping():
            print("Service is already being stopped")
            return
2309 2310 2311 2312
        # Linux will send Control-C generated SIGINT to the TDengine process
        # already, ref:
        # https://unix.stackexchange.com/questions/176235/fork-and-how-signals-are-delivered-to-processes
        if not self._tdeSubProcess:
2313
            raise RuntimeError("sub process object missing")
2314

2315 2316 2317
        self._status = MainExec.STATUS_STOPPING
        self._tdeSubProcess.stop()

2318 2319 2320 2321
        if self._tdeSubProcess.isRunning():  # still running
            print(
                "FAILED to stop sub process, it is still running... pid = {}".format(
                    self.subProcess.pid))
2322
        else:
2323 2324 2325
            self._tdeSubProcess = None  # not running any more
            self.join()  # stop the thread, change the status, etc.

2326 2327 2328
    def join(self):
        # TODO: sanity check
        if not self.isStopping():
2329 2330 2331
            raise RuntimeError(
                "Unexpected status when ending svc mgr thread: {}".format(
                    self._status))
2332

2333
        if self._thread:
2334
            self._thread.join()
2335
            self._thread = None
2336
            self._status = MainExec.STATUS_STOPPED
S
Shuduo Sang 已提交
2337
        else:
2338
            print("Joining empty thread, doing nothing")
2339 2340 2341

    def _trimQueue(self, targetSize):
        if targetSize <= 0:
2342
            return  # do nothing
2343
        q = self._ipcQueue
2344
        if (q.qsize() <= targetSize):  # no need to trim
2345 2346 2347 2348
            return

        logger.debug("Triming IPC queue to target size: {}".format(targetSize))
        itemsToTrim = q.qsize() - targetSize
2349
        for i in range(0, itemsToTrim):
2350 2351 2352
            try:
                q.get_nowait()
            except Empty:
2353 2354
                break  # break out of for loop, no more trimming

2355
    TD_READY_MSG = "TDengine is initialized successfully"
S
Shuduo Sang 已提交
2356

2357 2358
    def procIpcBatch(self, trimToTarget=0, forceOutput=False):
        self._trimQueue(trimToTarget)  # trim if necessary
S
Shuduo Sang 已提交
2359 2360
        # Process all the output generated by the underlying sub process,
        # managed by IO thread
2361
        print("<", end="", flush=True)
S
Shuduo Sang 已提交
2362 2363
        while True:
            try:
2364
                line = self._ipcQueue.get_nowait()  # getting output at fast speed
2365
                self._printProgress("_o")
2366 2367 2368
            except Empty:
                # time.sleep(2.3) # wait only if there's no output
                # no more output
2369
                print(".>", end="", flush=True)
S
Shuduo Sang 已提交
2370
                return  # we are done with THIS BATCH
2371
            else:  # got line, printing out
2372 2373 2374 2375 2376 2377 2378
                if forceOutput:
                    logger.info(line)
                else:
                    logger.debug(line)
        print(">", end="", flush=True)

    _ProgressBars = ["--", "//", "||", "\\\\"]
2379

2380 2381
    def _printProgress(self, msg):  # TODO: assuming 2 chars
        print(msg, end="", flush=True)
2382 2383 2384
        pBar = self._ProgressBars[Dice.throw(4)]
        print(pBar, end="", flush=True)
        print('\b\b\b\b', end="", flush=True)
2385

2386 2387 2388
    def svcOutputReader(self, out: IO, queue):
        # Important Reference: https://stackoverflow.com/questions/375427/non-blocking-read-on-a-subprocess-pipe-in-python
        # print("This is the svcOutput Reader...")
2389
        # for line in out :
2390 2391 2392 2393
        for line in iter(out.readline, b''):
            # print("Finished reading a line: {}".format(line))
            # print("Adding item to queue...")
            line = line.decode("utf-8").rstrip()
2394 2395
            # This might block, and then causing "out" buffer to block
            queue.put(line)
2396 2397
            self._printProgress("_i")

2398 2399 2400
            if self._status == MainExec.STATUS_STARTING:  # we are starting, let's see if we have started
                if line.find(self.TD_READY_MSG) != -1:  # found
                    self._status = MainExec.STATUS_RUNNING
2401 2402

            # Trim the queue if necessary: TODO: try this 1 out of 10 times
2403
            self._trimQueue(self.MAX_QUEUE_SIZE * 9 // 10)  # trim to 90% size
2404

2405 2406 2407
            if self.isStopping():  # TODO: use thread status instead
                # WAITING for stopping sub process to finish its outptu
                print("_w", end="", flush=True)
2408 2409

            # queue.put(line)
2410 2411
        # meaning sub process must have died
        print("\nNo more output from IO thread managing TDengine service")
2412 2413
        out.close()

2414 2415

class TdeSubProcess:
2416 2417 2418 2419 2420
    def __init__(self):
        self.subProcess = None

    def getStdOut(self):
        return self.subProcess.stdout
2421

2422
    def isRunning(self):
2423
        return self.subProcess is not None
2424

S
Shuduo Sang 已提交
2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438
    def getBuildPath(self):
        selfPath = os.path.dirname(os.path.realpath(__file__))
        if ("community" in selfPath):
            projPath = selfPath[:selfPath.find("communit")]
        else:
            projPath = selfPath[:selfPath.find("tests")]

        for root, dirs, files in os.walk(projPath):
            if ("taosd" in files):
                rootRealPath = os.path.dirname(os.path.realpath(root))
                if ("packaging" not in rootRealPath):
                    buildPath = root[:len(root) - len("/build/bin")]
                    break
        return buildPath
2439

2440
    def start(self):
2441
        ON_POSIX = 'posix' in sys.builtin_module_names
S
Shuduo Sang 已提交
2442

2443 2444 2445
        taosdPath = self.getBuildPath() + "/build/bin/taosd"
        cfgPath = self.getBuildPath() + "/test/cfg"

2446 2447 2448 2449 2450 2451 2452 2453 2454
        # Delete the log files
        logPath = self.getBuildPath() + "/test/log"
        # ref: https://stackoverflow.com/questions/1995373/deleting-all-files-in-a-directory-with-python/1995397
        filelist = [ f for f in os.listdir(logPath) ] # if f.endswith(".bak") ]
        for f in filelist:
            filePath = os.path.join(logPath, f)
            print("Removing log file: {}".format(filePath))
            os.remove(filePath)

S
Shuduo Sang 已提交
2455
        svcCmd = [taosdPath, '-c', cfgPath]
2456
        # svcCmd = ['vmstat', '1']
S
Shuduo Sang 已提交
2457
        if self.subProcess:  # already there
2458 2459 2460
            raise RuntimeError("Corrupt process state")

        self.subProcess = subprocess.Popen(
S
Shuduo Sang 已提交
2461 2462
            svcCmd,
            stdout=subprocess.PIPE,
2463
            # bufsize=1, # not supported in binary mode
S
Shuduo Sang 已提交
2464
            close_fds=ON_POSIX)  # had text=True, which interferred with reading EOF
2465

2466 2467 2468
    def stop(self):
        if not self.subProcess:
            print("Sub process already stopped")
2469 2470 2471
            return

        retCode = self.subProcess.poll()
S
Shuduo Sang 已提交
2472
        if retCode:  # valid return code, process ended
2473
            self.subProcess = None
S
Shuduo Sang 已提交
2474 2475
        else:  # process still alive, let's interrupt it
            print(
2476
                "Sub process is running, sending SIG_INT and waiting for it to terminate...")
S
Shuduo Sang 已提交
2477 2478 2479 2480
            # sub process should end, then IPC queue should end, causing IO
            # thread to end
            self.subProcess.send_signal(signal.SIGINT)
            try:
2481 2482 2483 2484
                self.subProcess.wait(10)
            except subprocess.TimeoutExpired as err:
                print("Time out waiting for TDengine service process to exit")
            else:
2485
                print("TDengine service process terminated successfully from SIG_INT")
2486 2487
                self.subProcess = None

2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515
class ThreadStacks: # stack info for all threads
    def __init__(self):
        self._allStacks = {}
        allFrames = sys._current_frames()
        for th in threading.enumerate():                        
            stack = traceback.extract_stack(allFrames[th.ident])     
            self._allStacks[th.native_id] = stack

    def print(self, filteredEndName = None, filterInternal = False):
        for thNid, stack in self._allStacks.items(): # for each thread            
            lastFrame = stack[-1]
            if filteredEndName: # we need to filter out stacks that match this name                
                if lastFrame.name == filteredEndName : # end did not match
                    continue
            if filterInternal:
                if lastFrame.name in ['wait', 'invoke_excepthook', 
                    '_wait', # The Barrier exception
                    'svcOutputReader', # the svcMgr thread
                    '__init__']: # the thread that extracted the stack
                    continue # ignore
            # Now print
            print("\n<----- Thread Info for ID: {}".format(thNid))
            for frame in stack:
                # print(frame)
                print("File {filename}, line {lineno}, in {name}".format(
                    filename=frame.filename, lineno=frame.lineno, name=frame.name))
                print("    {}".format(frame.line))
            print("-----> End of Thread Info\n")
S
Shuduo Sang 已提交
2516

2517 2518 2519
class ClientManager:
    def __init__(self):
        print("Starting service manager")
2520 2521
        # signal.signal(signal.SIGTERM, self.sigIntHandler)
        # signal.signal(signal.SIGINT, self.sigIntHandler)
2522

2523
        self._status = MainExec.STATUS_RUNNING
2524 2525
        self.tc = None

2526 2527
        self.inSigHandler = False

2528
    def sigIntHandler(self, signalNumber, frame):
2529
        if self._status != MainExec.STATUS_RUNNING:
2530 2531 2532
            print("Repeated SIGINT received, forced exit...")
            # return  # do nothing if it's already not running
            sys.exit(-1)
2533
        self._status = MainExec.STATUS_STOPPING  # immediately set our status
2534

2535
        print("ClientManager: Terminating program...")
2536 2537
        self.tc.requestToStop()

2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578
    def _doMenu(self):
        choice = ""
        while True:
            print("\nInterrupting Client Program, Choose an Action: ")
            print("1: Resume")
            print("2: Terminate")
            print("3: Show Threads")
            # Remember to update the if range below
            # print("Enter Choice: ", end="", flush=True)
            while choice == "":
                choice = input("Enter Choice: ")
                if choice != "":
                    break  # done with reading repeated input
            if choice in ["1", "2", "3"]:
                break  # we are done with whole method
            print("Invalid choice, please try again.")
            choice = ""  # reset
        return choice

    def sigUsrHandler(self, signalNumber, frame):
        print("Interrupting main thread execution upon SIGUSR1")
        if self.inSigHandler:  # already
            print("Ignoring repeated SIG_USR1...")
            return  # do nothing if it's already not running
        self.inSigHandler = True

        choice = self._doMenu()
        if choice == "1":
            print("Resuming execution...")
            time.sleep(1.0)
        elif choice == "2":
            print("Not implemented yet")
            time.sleep(1.0)
        elif choice == "3":
            ts = ThreadStacks()
            ts.print()
        else:
            raise RuntimeError("Invalid menu choice: {}".format(choice))

        self.inSigHandler = False

S
Shuduo Sang 已提交
2579
    def _printLastNumbers(self):  # to verify data durability
2580 2581
        dbManager = DbManager(resetDb=False)
        dbc = dbManager.getDbConn()
S
Shuduo Sang 已提交
2582
        if dbc.query("show databases") == 0:  # no databae
2583
            return
S
Shuduo Sang 已提交
2584
        if dbc.query("show tables") == 0:  # no tables
S
Steven Li 已提交
2585
            return
2586 2587

        dbc.execute("use db")
S
Shuduo Sang 已提交
2588
        sTbName = dbManager.getFixedSuperTableName()
2589 2590

        # get all regular tables
S
Shuduo Sang 已提交
2591 2592
        # TODO: analyze result set later
        dbc.query("select TBNAME from db.{}".format(sTbName))
2593 2594 2595
        rTables = dbc.getQueryResult()

        bList = TaskExecutor.BoundedList()
S
Shuduo Sang 已提交
2596
        for rTbName in rTables:  # regular tables
2597 2598
            dbc.query("select speed from db.{}".format(rTbName[0]))
            numbers = dbc.getQueryResult()
S
Shuduo Sang 已提交
2599
            for row in numbers:
2600 2601 2602 2603 2604 2605
                # print("<{}>".format(n), end="", flush=True)
                bList.add(row[0])

        print("Top numbers in DB right now: {}".format(bList))
        print("TDengine client execution is about to start in 2 seconds...")
        time.sleep(2.0)
S
Shuduo Sang 已提交
2606
        dbManager = None  # release?
2607 2608 2609 2610

    def prepare(self):
        self._printLastNumbers()

2611
    def run(self, svcMgr):
2612 2613
        self._printLastNumbers()

S
Shuduo Sang 已提交
2614
        dbManager = DbManager()  # Regular function
2615
        thPool = ThreadPool(gConfig.num_threads, gConfig.max_steps)
2616
        self.tc = ThreadCoordinator(thPool, dbManager)
S
Shuduo Sang 已提交
2617

2618
        self.tc.run()
S
Steven Li 已提交
2619 2620
        # print("exec stats: {}".format(self.tc.getExecStats()))
        # print("TC failed = {}".format(self.tc.isFailed()))
2621
        if svcMgr: # gConfig.auto_start_service:
2622
            svcMgr.stopTaosService()
2623 2624
        # Print exec status, etc., AFTER showing messages from the server
        self.conclude()
S
Steven Li 已提交
2625
        # print("TC failed (2) = {}".format(self.tc.isFailed()))
S
Shuduo Sang 已提交
2626 2627
        # Linux return code: ref https://shapeshed.com/unix-exit-codes/
        return 1 if self.tc.isFailed() else 0
2628 2629 2630

    def conclude(self):
        self.tc.printStats()
S
Shuduo Sang 已提交
2631
        self.tc.getDbManager().cleanUp()
2632 2633

class MainExec:
2634 2635 2636
    STATUS_STARTING = 1
    STATUS_RUNNING = 2
    STATUS_STOPPING = 3
2637
    STATUS_STOPPED = 4
2638

2639 2640 2641
    def __init__(self):        
        self._clientMgr = None
        self._svcMgr = None
2642

2643 2644 2645
        signal.signal(signal.SIGTERM, self.sigIntHandler)
        signal.signal(signal.SIGINT,  self.sigIntHandler)
        signal.signal(signal.SIGUSR1, self.sigUsrHandler)  # different handler!
2646

2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659
    def sigUsrHandler(self, signalNumber, frame):
        if self._clientMgr:
            self._clientMgr.sigUsrHandler(signalNumber, frame)
        elif self._svcMgr: # Only if no client mgr, we are running alone
            self._svcMgr.sigUsrHandler(signalNumber, frame)
        
    def sigIntHandler(self, signalNumber, frame):
        if self._svcMgr:
            self._svcMgr.sigIntHandler(signalNumber, frame)
        if self._clientMgr:
            self._clientMgr.sigIntHandler(signalNumber, frame)

    def runClient(self):
2660
        global gSvcMgr
2661 2662
        if gConfig.auto_start_service:
            self._svcMgr = SvcManager()
2663
            gSvcMgr = self._svcMgr # hack alert
2664 2665 2666
            self._svcMgr.startTaosService() # we start, don't run
        
        self._clientMgr = ClientManager()
2667 2668 2669 2670 2671 2672 2673
        ret = None
        try: 
            ret = self._clientMgr.run(self._svcMgr) # stop TAOS service inside
        except requests.exceptions.ConnectionError as err:
            logger.warning("Failed to open REST connection to DB")
            # don't raise
        return ret
2674 2675

    def runService(self):
2676
        global gSvcMgr
2677
        self._svcMgr = SvcManager()
2678 2679
        gSvcMgr = self._svcMgr # save it in a global variable TODO: hack alert

2680
        self._svcMgr.run() # run to some end state
2681 2682
        self._svcMgr = None 
        gSvcMgr = None        
2683 2684

    def runTemp(self):  # for debugging purposes
2685 2686
        # # Hack to exercise reading from disk, imcreasing coverage. TODO: fix
        # dbc = dbState.getDbConn()
S
Shuduo Sang 已提交
2687
        # sTbName = dbState.getFixedSuperTableName()
2688 2689
        # dbc.execute("create database if not exists db")
        # if not dbState.getState().equals(StateEmpty()):
S
Shuduo Sang 已提交
2690
        #     dbc.execute("use db")
2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701

        # rTables = None
        # try: # the super table may not exist
        #     sql = "select TBNAME from db.{}".format(sTbName)
        #     logger.info("Finding out tables in super table: {}".format(sql))
        #     dbc.query(sql) # TODO: analyze result set later
        #     logger.info("Fetching result")
        #     rTables = dbc.getQueryResult()
        #     logger.info("Result: {}".format(rTables))
        # except taos.error.ProgrammingError as err:
        #     logger.info("Initial Super table OPS error: {}".format(err))
S
Shuduo Sang 已提交
2702

2703 2704 2705 2706 2707 2708 2709 2710
        # # sys.exit()
        # if ( not rTables == None):
        #     # print("rTables[0] = {}, type = {}".format(rTables[0], type(rTables[0])))
        #     try:
        #         for rTbName in rTables : # regular tables
        #             ds = dbState
        #             logger.info("Inserting into table: {}".format(rTbName[0]))
        #             sql = "insert into db.{} values ('{}', {});".format(
S
Shuduo Sang 已提交
2711
        #                 rTbName[0],
2712 2713
        #                 ds.getNextTick(), ds.getNextInt())
        #             dbc.execute(sql)
S
Shuduo Sang 已提交
2714
        #         for rTbName in rTables : # regular tables
2715
        #             dbc.query("select * from db.{}".format(rTbName[0])) # TODO: check success failure
S
Shuduo Sang 已提交
2716
        #         logger.info("Initial READING operation is successful")
2717
        #     except taos.error.ProgrammingError as err:
S
Shuduo Sang 已提交
2718 2719
        #         logger.info("Initial WRITE/READ error: {}".format(err))

2720 2721 2722
        # Sandbox testing code
        # dbc = dbState.getDbConn()
        # while True:
S
Shuduo Sang 已提交
2723
        #     rows = dbc.query("show databases")
2724
        #     print("Rows: {}, time={}".format(rows, time.time()))
S
Shuduo Sang 已提交
2725 2726
        return

S
Steven Li 已提交
2727

2728
def main():
S
Shuduo Sang 已提交
2729 2730
    # Super cool Python argument library:
    # https://docs.python.org/3/library/argparse.html
2731 2732 2733 2734 2735 2736 2737 2738 2739
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description=textwrap.dedent('''\
            TDengine Auto Crash Generator (PLEASE NOTICE the Prerequisites Below)
            ---------------------------------------------------------------------
            1. You build TDengine in the top level ./build directory, as described in offical docs
            2. You run the server there before this script: ./build/bin/taosd -c test/cfg

            '''))
2740

2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761
    # parser.add_argument('-a', '--auto-start-service', action='store_true',                        
    #                     help='Automatically start/stop the TDengine service (default: false)')
    # parser.add_argument('-c', '--connector-type', action='store', default='native', type=str,
    #                     help='Connector type to use: native, rest, or mixed (default: 10)')
    # parser.add_argument('-d', '--debug', action='store_true',                        
    #                     help='Turn on DEBUG mode for more logging (default: false)')
    # parser.add_argument('-e', '--run-tdengine', action='store_true',                        
    #                     help='Run TDengine service in foreground (default: false)')
    # parser.add_argument('-l', '--larger-data', action='store_true',                        
    #                     help='Write larger amount of data during write operations (default: false)')
    # parser.add_argument('-p', '--per-thread-db-connection', action='store_true',                        
    #                     help='Use a single shared db connection (default: false)')
    # parser.add_argument('-r', '--record-ops', action='store_true',                        
    #                     help='Use a pair of always-fsynced fils to record operations performing + performed, for power-off tests (default: false)')                    
    # parser.add_argument('-s', '--max-steps', action='store', default=1000, type=int,
    #                     help='Maximum number of steps to run (default: 100)')
    # parser.add_argument('-t', '--num-threads', action='store', default=5, type=int,
    #                     help='Number of threads to run (default: 10)')
    # parser.add_argument('-x', '--continue-on-exception', action='store_true',                        
    #                     help='Continue execution after encountering unexpected/disallowed errors/exceptions (default: false)')                        

S
Shuduo Sang 已提交
2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812
    parser.add_argument(
        '-a',
        '--auto-start-service',
        action='store_true',
        help='Automatically start/stop the TDengine service (default: false)')
    parser.add_argument(
        '-c',
        '--connector-type',
        action='store',
        default='native',
        type=str,
        help='Connector type to use: native, rest, or mixed (default: 10)')
    parser.add_argument(
        '-d',
        '--debug',
        action='store_true',
        help='Turn on DEBUG mode for more logging (default: false)')
    parser.add_argument(
        '-e',
        '--run-tdengine',
        action='store_true',
        help='Run TDengine service in foreground (default: false)')
    parser.add_argument(
        '-l',
        '--larger-data',
        action='store_true',
        help='Write larger amount of data during write operations (default: false)')
    parser.add_argument(
        '-p',
        '--per-thread-db-connection',
        action='store_true',
        help='Use a single shared db connection (default: false)')
    parser.add_argument(
        '-r',
        '--record-ops',
        action='store_true',
        help='Use a pair of always-fsynced fils to record operations performing + performed, for power-off tests (default: false)')
    parser.add_argument(
        '-s',
        '--max-steps',
        action='store',
        default=1000,
        type=int,
        help='Maximum number of steps to run (default: 100)')
    parser.add_argument(
        '-t',
        '--num-threads',
        action='store',
        default=5,
        type=int,
        help='Number of threads to run (default: 10)')
2813 2814 2815 2816 2817
    parser.add_argument(
        '-x',
        '--continue-on-exception',
        action='store_true',
        help='Continue execution after encountering unexpected/disallowed errors/exceptions (default: false)')
2818

2819
    global gConfig
2820
    gConfig = parser.parse_args()
2821

2822
    # Logging Stuff
2823
    global logger
S
Shuduo Sang 已提交
2824 2825
    _logger = logging.getLogger('CrashGen')  # real logger
    _logger.addFilter(LoggingFilter())
2826 2827 2828
    ch = logging.StreamHandler()
    _logger.addHandler(ch)

S
Shuduo Sang 已提交
2829 2830
    # Logging adapter, to be used as a logger
    logger = MyLoggingAdapter(_logger, [])
2831

S
Shuduo Sang 已提交
2832 2833
    if (gConfig.debug):
        logger.setLevel(logging.DEBUG)  # default seems to be INFO
S
Steven Li 已提交
2834 2835
    else:
        logger.setLevel(logging.INFO)
S
Shuduo Sang 已提交
2836

2837 2838
    Dice.seed(0)  # initial seeding of dice

2839
    # Run server or client
2840
    mExec = MainExec()
S
Shuduo Sang 已提交
2841
    if gConfig.run_tdengine:  # run server
2842
        mExec.runService()
S
Shuduo Sang 已提交
2843
    else:
2844
        return mExec.runClient()
2845

S
Shuduo Sang 已提交
2846

2847
if __name__ == "__main__":
S
Steven Li 已提交
2848 2849 2850
    exitCode = main()
    # print("Exiting with code: {}".format(exitCode))
    sys.exit(exitCode)