crash_gen.py 95.5 KB
Newer Older
S
Shuduo Sang 已提交
1
# -----!/usr/bin/python3.7
S
Steven Li 已提交
2 3 4 5 6 7 8 9 10 11 12 13
###################################################################
#           Copyright (c) 2016 by TAOS Technologies, Inc.
#                     All rights reserved.
#
#  This file is proprietary and confidential to TAOS Technologies.
#  No part of this file may be reproduced, stored, transmitted,
#  disclosed or used in any form or by any means other than as
#  expressly provided by the written permission from Jianhui Tao
#
###################################################################

# -*- coding: utf-8 -*-
S
Shuduo Sang 已提交
14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38
# For type hinting before definition, ref:
# https://stackoverflow.com/questions/33533148/how-do-i-specify-that-the-return-type-of-a-method-is-the-same-as-the-class-itsel
from __future__ import annotations
import taos
import crash_gen
from util.sql import *
from util.cases import *
from util.dnodes import *
from util.log import *
from queue import Queue, Empty
from typing import IO
from typing import Set
from typing import Dict
from typing import List
from requests.auth import HTTPBasicAuth
import textwrap
import datetime
import logging
import time
import random
import threading
import requests
import copy
import argparse
import getopt
39

S
Steven Li 已提交
40
import sys
41
import os
42 43
import io
import signal
44
import traceback
45 46 47 48
# Require Python 3
if sys.version_info[0] < 3:
    raise Exception("Must be using Python 3")

S
Steven Li 已提交
49

S
Shuduo Sang 已提交
50
# Global variables, tried to keep a small number.
51 52 53

# Command-line/Environment Configurations, will set a bit later
# ConfigNameSpace = argparse.Namespace
S
Shuduo Sang 已提交
54
gConfig = argparse.Namespace()  # Dummy value, will be replaced later
55
logger = None
S
Steven Li 已提交
56

S
Shuduo Sang 已提交
57 58

def runThread(wt: WorkerThread):
59
    wt.run()
60

S
Shuduo Sang 已提交
61

62 63
class CrashGenError(Exception):
    def __init__(self, msg=None, errno=None):
S
Shuduo Sang 已提交
64
        self.msg = msg
65
        self.errno = errno
S
Shuduo Sang 已提交
66

67 68 69
    def __str__(self):
        return self.msg

S
Shuduo Sang 已提交
70

S
Steven Li 已提交
71
class WorkerThread:
S
Shuduo Sang 已提交
72 73 74 75 76
    def __init__(self, pool: ThreadPool, tid,
                 tc: ThreadCoordinator,
                 # te: TaskExecutor,
                 ):  # note: main thread context!
        # self._curStep = -1
77
        self._pool = pool
S
Shuduo Sang 已提交
78 79
        self._tid = tid
        self._tc = tc  # type: ThreadCoordinator
S
Steven Li 已提交
80
        # self.threadIdent = threading.get_ident()
81 82
        self._thread = threading.Thread(target=runThread, args=(self,))
        self._stepGate = threading.Event()
S
Steven Li 已提交
83

84
        # Let us have a DB connection of our own
S
Shuduo Sang 已提交
85
        if (gConfig.per_thread_db_connection):  # type: ignore
86
            # print("connector_type = {}".format(gConfig.connector_type))
87 88 89 90 91 92 93 94 95 96 97
            if gConfig.connector_type == 'native':
                self._dbConn = DbConn.createNative() 
            elif gConfig.connector_type == 'rest':
                self._dbConn = DbConn.createRest() 
            elif gConfig.connector_type == 'mixed':
                if Dice.throw(2) == 0: # 1/2 chance
                    self._dbConn = DbConn.createNative() 
                else:
                    self._dbConn = DbConn.createRest() 
            else:
                raise RuntimeError("Unexpected connector type: {}".format(gConfig.connector_type))
98

S
Shuduo Sang 已提交
99
        self._dbInUse = False  # if "use db" was executed already
100

101
    def logDebug(self, msg):
S
Steven Li 已提交
102
        logger.debug("    TRD[{}] {}".format(self._tid, msg))
103 104

    def logInfo(self, msg):
S
Steven Li 已提交
105
        logger.info("    TRD[{}] {}".format(self._tid, msg))
106

107 108 109 110
    def dbInUse(self):
        return self._dbInUse

    def useDb(self):
S
Shuduo Sang 已提交
111
        if (not self._dbInUse):
112 113 114
            self.execSql("use db")
        self._dbInUse = True

115
    def getTaskExecutor(self):
S
Shuduo Sang 已提交
116
        return self._tc.getTaskExecutor()
117

S
Steven Li 已提交
118
    def start(self):
119
        self._thread.start()  # AFTER the thread is recorded
S
Steven Li 已提交
120

S
Shuduo Sang 已提交
121
    def run(self):
S
Steven Li 已提交
122
        # initialization after thread starts, in the thread context
123
        # self.isSleeping = False
124 125
        logger.info("Starting to run thread: {}".format(self._tid))

S
Shuduo Sang 已提交
126
        if (gConfig.per_thread_db_connection):  # type: ignore
127
            logger.debug("Worker thread openning database connection")
128
            self._dbConn.open()
S
Steven Li 已提交
129

S
Shuduo Sang 已提交
130 131
        self._doTaskLoop()

132
        # clean up
S
Shuduo Sang 已提交
133
        if (gConfig.per_thread_db_connection):  # type: ignore
134
            self._dbConn.close()
135

S
Shuduo Sang 已提交
136
    def _doTaskLoop(self):
137 138
        # while self._curStep < self._pool.maxSteps:
        # tc = ThreadCoordinator(None)
S
Shuduo Sang 已提交
139 140
        while True:
            tc = self._tc  # Thread Coordinator, the overall master
141
            tc.crossStepBarrier()  # shared barrier first, INCLUDING the last one
142
            logger.debug("[TRD] Worker thread [{}] exited barrier...".format(self._tid))
143
            self.crossStepGate()   # then per-thread gate, after being tapped
144
            logger.debug("[TRD] Worker thread [{}] exited step gate...".format(self._tid))
145
            if not self._tc.isRunning():
146
                logger.debug("[TRD] Thread Coordinator not running any more, worker thread now stopping...")
147 148
                break

149
            # Fetch a task from the Thread Coordinator
150
            logger.debug( "[TRD] Worker thread [{}] about to fetch task".format(self._tid))
151
            task = tc.fetchTask()
152 153

            # Execute such a task
S
Shuduo Sang 已提交
154 155 156
            logger.debug(
                "[TRD] Worker thread [{}] about to execute task: {}".format(
                    self._tid, task.__class__.__name__))
157
            task.execute(self)
158
            tc.saveExecutedTask(task)
159
            logger.debug("[TRD] Worker thread [{}] finished executing task".format(self._tid))
S
Shuduo Sang 已提交
160 161

            self._dbInUse = False  # there may be changes between steps
162

S
Shuduo Sang 已提交
163 164
    def verifyThreadSelf(self):  # ensure we are called by this own thread
        if (threading.get_ident() != self._thread.ident):
S
Steven Li 已提交
165 166
            raise RuntimeError("Unexpectly called from other threads")

S
Shuduo Sang 已提交
167 168
    def verifyThreadMain(self):  # ensure we are called by the main thread
        if (threading.get_ident() != threading.main_thread().ident):
S
Steven Li 已提交
169 170 171
            raise RuntimeError("Unexpectly called from other threads")

    def verifyThreadAlive(self):
S
Shuduo Sang 已提交
172
        if (not self._thread.is_alive()):
S
Steven Li 已提交
173 174
            raise RuntimeError("Unexpected dead thread")

175
    # A gate is different from a barrier in that a thread needs to be "tapped"
S
Steven Li 已提交
176 177
    def crossStepGate(self):
        self.verifyThreadAlive()
S
Shuduo Sang 已提交
178 179
        self.verifyThreadSelf()  # only allowed by ourselves

180
        # Wait again at the "gate", waiting to be "tapped"
S
Shuduo Sang 已提交
181 182 183 184
        logger.debug(
            "[TRD] Worker thread {} about to cross the step gate".format(
                self._tid))
        self._stepGate.wait()
185
        self._stepGate.clear()
S
Shuduo Sang 已提交
186

187
        # self._curStep += 1  # off to a new step...
S
Steven Li 已提交
188

S
Shuduo Sang 已提交
189
    def tapStepGate(self):  # give it a tap, release the thread waiting there
S
Steven Li 已提交
190
        self.verifyThreadAlive()
S
Shuduo Sang 已提交
191 192
        self.verifyThreadMain()  # only allowed for main thread

S
Steven Li 已提交
193
        logger.debug("[TRD] Tapping worker thread {}".format(self._tid))
S
Shuduo Sang 已提交
194 195
        self._stepGate.set()  # wake up!
        time.sleep(0)  # let the released thread run a bit
196

S
Shuduo Sang 已提交
197 198 199
    def execSql(self, sql):  # TODO: expose DbConn directly
        if (gConfig.per_thread_db_connection):
            return self._dbConn.execute(sql)
200
        else:
201
            return self._tc.getDbManager().getDbConn().execute(sql)
202

S
Shuduo Sang 已提交
203 204 205
    def querySql(self, sql):  # TODO: expose DbConn directly
        if (gConfig.per_thread_db_connection):
            return self._dbConn.query(sql)
206 207 208 209
        else:
            return self._tc.getDbManager().getDbConn().query(sql)

    def getQueryResult(self):
S
Shuduo Sang 已提交
210 211
        if (gConfig.per_thread_db_connection):
            return self._dbConn.getQueryResult()
212 213 214
        else:
            return self._tc.getDbManager().getDbConn().getQueryResult()

215
    def getDbConn(self):
S
Shuduo Sang 已提交
216 217
        if (gConfig.per_thread_db_connection):
            return self._dbConn
218
        else:
219
            return self._tc.getDbManager().getDbConn()
220

221 222
    # def querySql(self, sql): # not "execute", since we are out side the DB context
    #     if ( gConfig.per_thread_db_connection ):
S
Shuduo Sang 已提交
223
    #         return self._dbConn.query(sql)
224 225
    #     else:
    #         return self._tc.getDbState().getDbConn().query(sql)
226

227
# The coordinator of all worker threads, mostly running in main thread
S
Shuduo Sang 已提交
228 229


230
class ThreadCoordinator:
231
    def __init__(self, pool: ThreadPool, dbManager):
S
Shuduo Sang 已提交
232
        self._curStep = -1  # first step is 0
233
        self._pool = pool
234
        # self._wd = wd
S
Shuduo Sang 已提交
235
        self._te = None  # prepare for every new step
236
        self._dbManager = dbManager
S
Shuduo Sang 已提交
237 238
        self._executedTasks: List[Task] = []  # in a given step
        self._lock = threading.RLock()  # sync access for a few things
S
Steven Li 已提交
239

S
Shuduo Sang 已提交
240 241
        self._stepBarrier = threading.Barrier(
            self._pool.numThreads + 1)  # one barrier for all threads
242
        self._execStats = ExecutionStats()
243
        self._runStatus = MainExec.STATUS_RUNNING
S
Steven Li 已提交
244

245 246 247
    def getTaskExecutor(self):
        return self._te

S
Shuduo Sang 已提交
248
    def getDbManager(self) -> DbManager:
249
        return self._dbManager
250

251 252 253
    def crossStepBarrier(self):
        self._stepBarrier.wait()

254 255 256 257
    def requestToStop(self):
        self._runStatus = MainExec.STATUS_STOPPING
        self._execStats.registerFailure("User Interruption")

258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335
    def _runShouldEnd(self, transitionFailed, hasAbortedTask):
        maxSteps = gConfig.max_steps  # type: ignore
        if self._curStep >= (maxSteps - 1): # maxStep==10, last curStep should be 9
            return True
        if self._runStatus != MainExec.STATUS_RUNNING:
            return True
        if transitionFailed:
            return True
        if hasAbortedTask:
            return True
        return False

    def _hasAbortedTask(self): # from execution of previous step
        for task in self._executedTasks:
            if task.isAborted():
                # print("Task aborted: {}".format(task))
                # hasAbortedTask = True
                return True
        return False

    def _releaseAllWorkerThreads(self, transitionFailed):
        self._curStep += 1  # we are about to get into next step. TODO: race condition here!
        # Now not all threads had time to go to sleep
        logger.debug(
            "--\r\n\n--> Step {} starts with main thread waking up".format(self._curStep))

        # A new TE for the new step
        self._te = None # set to empty first, to signal worker thread to stop
        if not transitionFailed:  # only if not failed
            self._te = TaskExecutor(self._curStep)

        logger.debug("[TRD] Main thread waking up at step {}, tapping worker threads".format(
                self._curStep))  # Now not all threads had time to go to sleep
        # Worker threads will wake up at this point, and each execute it's own task
        self.tapAllThreads() # release all worker thread from their "gate"

    def _syncAtBarrier(self):
         # Now main thread (that's us) is ready to enter a step
        # let other threads go past the pool barrier, but wait at the
        # thread gate
        logger.debug("[TRD] Main thread about to cross the barrier")
        self.crossStepBarrier()
        self._stepBarrier.reset()  # Other worker threads should now be at the "gate"
        logger.debug("[TRD] Main thread finished crossing the barrier")

    def _doTransition(self):
        transitionFailed = False
        try:
            sm = self._dbManager.getStateMachine()
            logger.debug("[STT] starting transitions")
            # at end of step, transiton the DB state
            sm.transition(self._executedTasks)
            logger.debug("[STT] transition ended")
            # Due to limitation (or maybe not) of the Python library,
            # we cannot share connections across threads
            if sm.hasDatabase():
                for t in self._pool.threadList:
                    logger.debug("[DB] use db for all worker threads")
                    t.useDb()
                    # t.execSql("use db") # main thread executing "use
                    # db" on behalf of every worker thread
        except taos.error.ProgrammingError as err:
            if (err.msg == 'network unavailable'):  # broken DB connection
                logger.info("DB connection broken, execution failed")
                traceback.print_stack()
                transitionFailed = True
                self._te = None  # Not running any more
                self._execStats.registerFailure("Broken DB Connection")
                # continue # don't do that, need to tap all threads at
                # end, and maybe signal them to stop
            else:
                raise

        self.resetExecutedTasks()  # clear the tasks after we are done
        # Get ready for next step
        logger.debug("<-- Step {} finished, trasition failed = {}".format(self._curStep, transitionFailed))
        return transitionFailed

S
Shuduo Sang 已提交
336
    def run(self):
337
        self._pool.createAndStartThreads(self)
S
Steven Li 已提交
338 339

        # Coordinate all threads step by step
S
Shuduo Sang 已提交
340
        self._curStep = -1  # not started yet
341
        
S
Shuduo Sang 已提交
342
        self._execStats.startExec()  # start the stop watch
343 344
        transitionFailed = False
        hasAbortedTask = False
345 346
        while not self._runShouldEnd(transitionFailed, hasAbortedTask):
            if not gConfig.debug: # print this only if we are not in debug mode                
S
Shuduo Sang 已提交
347
                print(".", end="", flush=True)
348 349
                        
            self._syncAtBarrier() # For now just cross the barrier
350 351

            # At this point, all threads should be pass the overall "barrier" and before the per-thread "gate"
S
Shuduo Sang 已提交
352 353
            # We use this period to do house keeping work, when all worker
            # threads are QUIET.
354 355 356
            hasAbortedTask = self._hasAbortedTask() # from previous step
            if hasAbortedTask: 
                logger.info("Aborted task encountered, exiting test program")
357
                self._execStats.registerFailure("Aborted Task Encountered")
358
                break # do transition only if tasks are error free
S
Shuduo Sang 已提交
359

360 361 362 363
            # Ending previous step
            transitionFailed = self._doTransition() # To start, we end step -1 first
            # Then we move on to the next step
            self._releaseAllWorkerThreads(transitionFailed)                    
364

365 366 367 368 369
        if hasAbortedTask or transitionFailed : # abnormal ending, workers waiting at "gate"
            logger.debug("Abnormal ending of main thraed")
        else: # regular ending, workers waiting at "barrier"
            logger.debug("Regular ending, main thread waiting for all worker threads to stop...")
            self._syncAtBarrier()
370

371 372 373
        self._te = None  # No more executor, time to end
        logger.debug("Main thread tapping all threads one last time...")
        self.tapAllThreads()  # Let the threads run one last time
374

375
        logger.debug("\r\n\n--> Main thread ready to finish up...")
376
        logger.debug("Main thread joining all threads")
S
Shuduo Sang 已提交
377
        self._pool.joinAll()  # Get all threads to finish
378
        logger.info("\nAll worker threads finished")
379 380
        self._execStats.endExec()

381 382
    def printStats(self):
        self._execStats.printStats()
S
Steven Li 已提交
383

S
Steven Li 已提交
384 385 386 387 388 389
    def isFailed(self):
        return self._execStats.isFailed()

    def getExecStats(self):
        return self._execStats

S
Shuduo Sang 已提交
390
    def tapAllThreads(self):  # in a deterministic manner
S
Steven Li 已提交
391
        wakeSeq = []
S
Shuduo Sang 已提交
392 393
        for i in range(self._pool.numThreads):  # generate a random sequence
            if Dice.throw(2) == 1:
S
Steven Li 已提交
394 395 396
                wakeSeq.append(i)
            else:
                wakeSeq.insert(0, i)
S
Shuduo Sang 已提交
397 398 399
        logger.debug(
            "[TRD] Main thread waking up worker threads: {}".format(
                str(wakeSeq)))
400
        # TODO: set dice seed to a deterministic value
S
Steven Li 已提交
401
        for i in wakeSeq:
S
Shuduo Sang 已提交
402 403 404
            # TODO: maybe a bit too deep?!
            self._pool.threadList[i].tapStepGate()
            time.sleep(0)  # yield
S
Steven Li 已提交
405

406
    def isRunning(self):
S
Shuduo Sang 已提交
407
        return self._te is not None
408

S
Shuduo Sang 已提交
409 410
    def fetchTask(self) -> Task:
        if (not self.isRunning()):  # no task
411
            raise RuntimeError("Cannot fetch task when not running")
412 413
        # return self._wd.pickTask()
        # Alternatively, let's ask the DbState for the appropriate task
414 415 416 417 418 419 420
        # dbState = self.getDbState()
        # tasks = dbState.getTasksAtState() # TODO: create every time?
        # nTasks = len(tasks)
        # i = Dice.throw(nTasks)
        # logger.debug(" (dice:{}/{}) ".format(i, nTasks))
        # # return copy.copy(tasks[i]) # Needs a fresh copy, to save execution results, etc.
        # return tasks[i].clone() # TODO: still necessary?
S
Shuduo Sang 已提交
421 422 423 424 425
        # pick a task type for current state
        taskType = self.getDbManager().getStateMachine().pickTaskType()
        return taskType(
            self.getDbManager(),
            self._execStats)  # create a task from it
426 427

    def resetExecutedTasks(self):
S
Shuduo Sang 已提交
428
        self._executedTasks = []  # should be under single thread
429 430 431 432

    def saveExecutedTask(self, task):
        with self._lock:
            self._executedTasks.append(task)
433 434

# We define a class to run a number of threads in locking steps.
S
Shuduo Sang 已提交
435 436


437
class ThreadPool:
438
    def __init__(self, numThreads, maxSteps):
439 440 441 442
        self.numThreads = numThreads
        self.maxSteps = maxSteps
        # Internal class variables
        self.curStep = 0
S
Shuduo Sang 已提交
443 444
        self.threadList = []  # type: List[WorkerThread]

445
    # starting to run all the threads, in locking steps
446
    def createAndStartThreads(self, tc: ThreadCoordinator):
S
Shuduo Sang 已提交
447 448
        for tid in range(0, self.numThreads):  # Create the threads
            workerThread = WorkerThread(self, tid, tc)
449
            self.threadList.append(workerThread)
S
Shuduo Sang 已提交
450
            workerThread.start()  # start, but should block immediately before step 0
451 452 453 454 455 456

    def joinAll(self):
        for workerThread in self.threadList:
            logger.debug("Joining thread...")
            workerThread._thread.join()

457 458
# A queue of continguous POSITIVE integers, used by DbManager to generate continuous numbers
# for new table names
S
Shuduo Sang 已提交
459 460


S
Steven Li 已提交
461 462
class LinearQueue():
    def __init__(self):
463
        self.firstIndex = 1  # 1st ever element
S
Steven Li 已提交
464
        self.lastIndex = 0
S
Shuduo Sang 已提交
465 466
        self._lock = threading.RLock()  # our functions may call each other
        self.inUse = set()  # the indexes that are in use right now
S
Steven Li 已提交
467

468
    def toText(self):
S
Shuduo Sang 已提交
469 470
        return "[{}..{}], in use: {}".format(
            self.firstIndex, self.lastIndex, self.inUse)
471 472

    # Push (add new element, largest) to the tail, and mark it in use
S
Shuduo Sang 已提交
473
    def push(self):
474
        with self._lock:
S
Shuduo Sang 已提交
475 476
            # if ( self.isEmpty() ):
            #     self.lastIndex = self.firstIndex
477
            #     return self.firstIndex
478 479
            # Otherwise we have something
            self.lastIndex += 1
480 481
            self.allocate(self.lastIndex)
            # self.inUse.add(self.lastIndex) # mark it in use immediately
482
            return self.lastIndex
S
Steven Li 已提交
483 484

    def pop(self):
485
        with self._lock:
S
Shuduo Sang 已提交
486 487 488 489
            if (self.isEmpty()):
                # raise RuntimeError("Cannot pop an empty queue")
                return False  # TODO: None?

490
            index = self.firstIndex
S
Shuduo Sang 已提交
491
            if (index in self.inUse):
492 493
                return False

494 495 496 497 498 499 500
            self.firstIndex += 1
            return index

    def isEmpty(self):
        return self.firstIndex > self.lastIndex

    def popIfNotEmpty(self):
501
        with self._lock:
502 503 504 505
            if (self.isEmpty()):
                return 0
            return self.pop()

S
Steven Li 已提交
506
    def allocate(self, i):
507
        with self._lock:
508
            # logger.debug("LQ allocating item {}".format(i))
S
Shuduo Sang 已提交
509 510 511
            if (i in self.inUse):
                raise RuntimeError(
                    "Cannot re-use same index in queue: {}".format(i))
512 513
            self.inUse.add(i)

S
Steven Li 已提交
514
    def release(self, i):
515
        with self._lock:
516
            # logger.debug("LQ releasing item {}".format(i))
S
Shuduo Sang 已提交
517
            self.inUse.remove(i)  # KeyError possible, TODO: why?
518 519 520 521

    def size(self):
        return self.lastIndex + 1 - self.firstIndex

S
Steven Li 已提交
522
    def pickAndAllocate(self):
S
Shuduo Sang 已提交
523
        if (self.isEmpty()):
524 525
            return None
        with self._lock:
S
Shuduo Sang 已提交
526
            cnt = 0  # counting the interations
527 528
            while True:
                cnt += 1
S
Shuduo Sang 已提交
529
                if (cnt > self.size() * 10):  # 10x iteration already
530 531
                    # raise RuntimeError("Failed to allocate LinearQueue element")
                    return None
S
Shuduo Sang 已提交
532 533
                ret = Dice.throwRange(self.firstIndex, self.lastIndex + 1)
                if (ret not in self.inUse):
534 535 536
                    self.allocate(ret)
                    return ret

S
Shuduo Sang 已提交
537

538
class DbConn:
539
    TYPE_NATIVE = "native-c"
540
    TYPE_REST =   "rest-api"
541 542 543 544 545 546 547 548 549
    TYPE_INVALID = "invalid"

    @classmethod
    def create(cls, connType):
        if connType == cls.TYPE_NATIVE:
            return DbConnNative()
        elif connType == cls.TYPE_REST:
            return DbConnRest()
        else:
S
Shuduo Sang 已提交
550 551
            raise RuntimeError(
                "Unexpected connection type: {}".format(connType))
552 553 554 555 556 557 558 559 560

    @classmethod
    def createNative(cls):
        return cls.create(cls.TYPE_NATIVE)

    @classmethod
    def createRest(cls):
        return cls.create(cls.TYPE_REST)

561 562
    def __init__(self):
        self.isOpen = False
563 564 565
        self._type = self.TYPE_INVALID

    def open(self):
S
Shuduo Sang 已提交
566
        if (self.isOpen):
567 568
            raise RuntimeError("Cannot re-open an existing DB connection")

569 570
        # below implemented by child classes
        self.openByType()
571

S
Shuduo Sang 已提交
572 573 574
        logger.debug(
            "[DB] data connection opened, type = {}".format(
                self._type))
575 576
        self.isOpen = True

S
Shuduo Sang 已提交
577 578 579 580
    def resetDb(self):  # reset the whole database, etc.
        if (not self.isOpen):
            raise RuntimeError(
                "Cannot reset database until connection is open")
581 582
        # self._tdSql.prepare() # Recreate database, etc.

583
        self.execute('drop database if exists db')
584 585
        logger.debug("Resetting DB, dropped database")
        # self._cursor.execute('create database db')
586
        # self._cursor.execute('use db')
587 588
        # tdSql.execute('show databases')

S
Shuduo Sang 已提交
589
    def queryScalar(self, sql) -> int:
590 591
        return self._queryAny(sql)

S
Shuduo Sang 已提交
592
    def queryString(self, sql) -> str:
593 594
        return self._queryAny(sql)

S
Shuduo Sang 已提交
595 596
    def _queryAny(self, sql):  # actual query result as an int
        if (not self.isOpen):
597
            raise RuntimeError("Cannot query database until connection is open")
598
        nRows = self.query(sql)
S
Shuduo Sang 已提交
599
        if nRows != 1:
600
            raise RuntimeError("Unexpected result for query: {}, rows = {}".format(sql, nRows))
601
        if self.getResultRows() != 1 or self.getResultCols() != 1:
602
            raise RuntimeError("Unexpected result set for query: {}".format(sql))
603 604
        return self.getQueryResult()[0][0]

605 606 607 608 609 610 611 612 613
    def use(self, dbName):
        self.execute("use {}".format(dbName))

    def hasDatabases(self):
        return self.query("show databases") > 0

    def hasTables(self):
        return self.query("show tables") > 0

614 615
    def execute(self, sql):
        raise RuntimeError("Unexpected execution, should be overriden")
S
Shuduo Sang 已提交
616

617 618 619
    def query(self, sql) -> int: # return num rows returned
        raise RuntimeError("Unexpected execution, should be overriden")

620 621
    def openByType(self):
        raise RuntimeError("Unexpected execution, should be overriden")
S
Shuduo Sang 已提交
622

623 624
    def getQueryResult(self):
        raise RuntimeError("Unexpected execution, should be overriden")
S
Shuduo Sang 已提交
625

626 627
    def getResultRows(self):
        raise RuntimeError("Unexpected execution, should be overriden")
S
Shuduo Sang 已提交
628

629 630 631 632
    def getResultCols(self):
        raise RuntimeError("Unexpected execution, should be overriden")

# Sample: curl -u root:taosdata -d "show databases" localhost:6020/rest/sql
S
Shuduo Sang 已提交
633 634


635 636 637 638
class DbConnRest(DbConn):
    def __init__(self):
        super().__init__()
        self._type = self.TYPE_REST
S
Shuduo Sang 已提交
639
        self._url = "http://localhost:6020/rest/sql"  # fixed for now
640 641
        self._result = None

S
Shuduo Sang 已提交
642 643 644
    def openByType(self):  # Open connection
        pass  # do nothing, always open

645
    def close(self):
S
Shuduo Sang 已提交
646 647 648
        if (not self.isOpen):
            raise RuntimeError(
                "Cannot clean up database until connection is open")
649 650 651 652 653
        # Do nothing for REST
        logger.debug("[DB] REST Database connection closed")
        self.isOpen = False

    def _doSql(self, sql):
654 655 656 657 658 659 660
        try:
            r = requests.post(self._url, 
                data = sql,
                auth = HTTPBasicAuth('root', 'taosdata'))            
        except:
            print("REST API Failure (TODO: more info here)")
            raise
661 662
        rj = r.json()
        # Sanity check for the "Json Result"
S
Shuduo Sang 已提交
663
        if ('status' not in rj):
664 665
            raise RuntimeError("No status in REST response")

S
Shuduo Sang 已提交
666 667 668 669
        if rj['status'] == 'error':  # clearly reported error
            if ('code' not in rj):  # error without code
                raise RuntimeError("REST error return without code")
            errno = rj['code']  # May need to massage this in the future
670
            # print("Raising programming error with REST return: {}".format(rj))
S
Shuduo Sang 已提交
671 672
            raise taos.error.ProgrammingError(
                rj['desc'], errno)  # todo: check existance of 'desc'
673

S
Shuduo Sang 已提交
674 675 676 677
        if rj['status'] != 'succ':  # better be this
            raise RuntimeError(
                "Unexpected REST return status: {}".format(
                    rj['status']))
678 679

        nRows = rj['rows'] if ('rows' in rj) else 0
S
Shuduo Sang 已提交
680
        self._result = rj
681 682
        return nRows

S
Shuduo Sang 已提交
683 684 685 686
    def execute(self, sql):
        if (not self.isOpen):
            raise RuntimeError(
                "Cannot execute database commands until connection is open")
687 688
        logger.debug("[SQL-REST] Executing SQL: {}".format(sql))
        nRows = self._doSql(sql)
S
Shuduo Sang 已提交
689 690
        logger.debug(
            "[SQL-REST] Execution Result, nRows = {}, SQL = {}".format(nRows, sql))
691 692
        return nRows

S
Shuduo Sang 已提交
693
    def query(self, sql):  # return rows affected
694 695 696 697 698 699 700 701 702 703 704 705 706
        return self.execute(sql)

    def getQueryResult(self):
        return self._result['data']

    def getResultRows(self):
        print(self._result)
        raise RuntimeError("TBD")
        # return self._tdSql.queryRows

    def getResultCols(self):
        print(self._result)
        raise RuntimeError("TBD")
S
Shuduo Sang 已提交
707

708
    # Duplicate code from TDMySQL, TODO: merge all this into DbConnNative
709 710


711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738
class MyTDSql:
    def __init__(self):
        self.queryRows = 0
        self.queryCols = 0
        self.affectedRows = 0

    def init(self, cursor, log=True):
        self.cursor = cursor
        # if (log):
        #     caller = inspect.getframeinfo(inspect.stack()[1][0])
        #     self.cursor.log(caller.filename + ".sql")

    def close(self):
        self.cursor.close()

    def query(self, sql):
        self.sql = sql
        try:
            self.cursor.execute(sql)
            self.queryResult = self.cursor.fetchall()
            self.queryRows = len(self.queryResult)
            self.queryCols = len(self.cursor.description)
        except Exception as e:
            # caller = inspect.getframeinfo(inspect.stack()[1][0])
            # args = (caller.filename, caller.lineno, sql, repr(e))
            # tdLog.exit("%s(%d) failed: sql:%s, %s" % args)
            raise
        return self.queryRows
739

740 741 742 743 744 745 746 747 748 749 750
    def execute(self, sql):
        self.sql = sql
        try:
            self.affectedRows = self.cursor.execute(sql)
        except Exception as e:
            # caller = inspect.getframeinfo(inspect.stack()[1][0])
            # args = (caller.filename, caller.lineno, sql, repr(e))
            # tdLog.exit("%s(%d) failed: sql:%s, %s" % args)
            raise
        return self.affectedRows

S
Shuduo Sang 已提交
751

752
class DbConnNative(DbConn):
753 754 755 756
    # Class variables
    _lock = threading.Lock()
    _connInfoDisplayed = False

757 758
    def __init__(self):
        super().__init__()
759
        self._type = self.TYPE_NATIVE
S
Shuduo Sang 已提交
760
        self._conn = None
761
        self._cursor = None
762
        
S
Shuduo Sang 已提交
763

764 765 766 767 768 769 770 771 772 773 774
    def getBuildPath(self):
        selfPath = os.path.dirname(os.path.realpath(__file__))
        if ("community" in selfPath):
            projPath = selfPath[:selfPath.find("communit")]
        else:
            projPath = selfPath[:selfPath.find("tests")]

        for root, dirs, files in os.walk(projPath):
            if ("taosd" in files):
                rootRealPath = os.path.dirname(os.path.realpath(root))
                if ("packaging" not in rootRealPath):
S
Shuduo Sang 已提交
775
                    buildPath = root[:len(root) - len("/build/bin")]
776 777 778
                    break
        return buildPath

779
    
S
Shuduo Sang 已提交
780
    def openByType(self):  # Open connection
781
        cfgPath = self.getBuildPath() + "/test/cfg"
782
        hostAddr = "127.0.0.1"
783

784 785 786 787 788 789 790 791 792 793
        with self._lock: # force single threading for opening DB connections
            if not self._connInfoDisplayed:
                self.__class__._connInfoDisplayed = True # updating CLASS variable
                logger.info("Initiating TAOS native connection to {}, using config at {}".format(hostAddr, cfgPath))
            
            self._conn = taos.connect(
                host=hostAddr,
                config=cfgPath)  # TODO: make configurable
            self._cursor = self._conn.cursor()
        
794
        self._cursor.execute('reset query cache')
S
Shuduo Sang 已提交
795
        # self._cursor.execute('use db') # do this at the beginning of every
796 797

        # Open connection
798
        self._tdSql = MyTDSql()
799
        self._tdSql.init(self._cursor)
S
Shuduo Sang 已提交
800

801
    def close(self):
S
Shuduo Sang 已提交
802 803 804
        if (not self.isOpen):
            raise RuntimeError(
                "Cannot clean up database until connection is open")
805
        self._tdSql.close()
806
        logger.debug("[DB] Database connection closed")
807
        self.isOpen = False
S
Steven Li 已提交
808

S
Shuduo Sang 已提交
809 810 811 812
    def execute(self, sql):
        if (not self.isOpen):
            raise RuntimeError(
                "Cannot execute database commands until connection is open")
813 814
        logger.debug("[SQL] Executing SQL: {}".format(sql))
        nRows = self._tdSql.execute(sql)
S
Shuduo Sang 已提交
815 816 817
        logger.debug(
            "[SQL] Execution Result, nRows = {}, SQL = {}".format(
                nRows, sql))
818
        return nRows
S
Steven Li 已提交
819

S
Shuduo Sang 已提交
820 821 822 823
    def query(self, sql):  # return rows affected
        if (not self.isOpen):
            raise RuntimeError(
                "Cannot query database until connection is open")
824 825
        logger.debug("[SQL] Executing SQL: {}".format(sql))
        nRows = self._tdSql.query(sql)
S
Shuduo Sang 已提交
826 827 828
        logger.debug(
            "[SQL] Query Result, nRows = {}, SQL = {}".format(
                nRows, sql))
829
        return nRows
830
        # results are in: return self._tdSql.queryResult
831

832 833 834
    def getQueryResult(self):
        return self._tdSql.queryResult

835 836
    def getResultRows(self):
        return self._tdSql.queryRows
837

838 839
    def getResultCols(self):
        return self._tdSql.queryCols
840

S
Shuduo Sang 已提交
841

842
class AnyState:
S
Shuduo Sang 已提交
843 844 845
    STATE_INVALID = -1
    STATE_EMPTY = 0  # nothing there, no even a DB
    STATE_DB_ONLY = 1  # we have a DB, but nothing else
846
    STATE_TABLE_ONLY = 2  # we have a table, but totally empty
S
Shuduo Sang 已提交
847
    STATE_HAS_DATA = 3  # we have some data in the table
848 849 850 851 852
    _stateNames = ["Invalid", "Empty", "DB_Only", "Table_Only", "Has_Data"]

    STATE_VAL_IDX = 0
    CAN_CREATE_DB = 1
    CAN_DROP_DB = 2
853 854
    CAN_CREATE_FIXED_SUPER_TABLE = 3
    CAN_DROP_FIXED_SUPER_TABLE = 4
855 856 857 858 859 860 861
    CAN_ADD_DATA = 5
    CAN_READ_DATA = 6

    def __init__(self):
        self._info = self.getInfo()

    def __str__(self):
S
Shuduo Sang 已提交
862 863
        # -1 hack to accomodate the STATE_INVALID case
        return self._stateNames[self._info[self.STATE_VAL_IDX] + 1]
864 865 866 867

    def getInfo(self):
        raise RuntimeError("Must be overriden by child classes")

S
Steven Li 已提交
868 869 870 871 872 873
    def equals(self, other):
        if isinstance(other, int):
            return self.getValIndex() == other
        elif isinstance(other, AnyState):
            return self.getValIndex() == other.getValIndex()
        else:
S
Shuduo Sang 已提交
874 875 876
            raise RuntimeError(
                "Unexpected comparison, type = {}".format(
                    type(other)))
S
Steven Li 已提交
877

878 879 880
    def verifyTasksToState(self, tasks, newState):
        raise RuntimeError("Must be overriden by child classes")

S
Steven Li 已提交
881 882 883
    def getValIndex(self):
        return self._info[self.STATE_VAL_IDX]

884 885
    def getValue(self):
        return self._info[self.STATE_VAL_IDX]
S
Shuduo Sang 已提交
886

887 888
    def canCreateDb(self):
        return self._info[self.CAN_CREATE_DB]
S
Shuduo Sang 已提交
889

890 891
    def canDropDb(self):
        return self._info[self.CAN_DROP_DB]
S
Shuduo Sang 已提交
892

893 894
    def canCreateFixedSuperTable(self):
        return self._info[self.CAN_CREATE_FIXED_SUPER_TABLE]
S
Shuduo Sang 已提交
895

896 897
    def canDropFixedSuperTable(self):
        return self._info[self.CAN_DROP_FIXED_SUPER_TABLE]
S
Shuduo Sang 已提交
898

899 900
    def canAddData(self):
        return self._info[self.CAN_ADD_DATA]
S
Shuduo Sang 已提交
901

902 903 904 905 906
    def canReadData(self):
        return self._info[self.CAN_READ_DATA]

    def assertAtMostOneSuccess(self, tasks, cls):
        sCnt = 0
S
Shuduo Sang 已提交
907
        for task in tasks:
908 909 910
            if not isinstance(task, cls):
                continue
            if task.isSuccess():
S
Steven Li 已提交
911
                # task.logDebug("Task success found")
912
                sCnt += 1
S
Shuduo Sang 已提交
913 914 915
                if (sCnt >= 2):
                    raise RuntimeError(
                        "Unexpected more than 1 success with task: {}".format(cls))
916 917 918 919

    def assertIfExistThenSuccess(self, tasks, cls):
        sCnt = 0
        exists = False
S
Shuduo Sang 已提交
920
        for task in tasks:
921 922
            if not isinstance(task, cls):
                continue
S
Shuduo Sang 已提交
923
            exists = True  # we have a valid instance
924 925
            if task.isSuccess():
                sCnt += 1
S
Shuduo Sang 已提交
926 927 928
        if (exists and sCnt <= 0):
            raise RuntimeError(
                "Unexpected zero success for task: {}".format(cls))
929 930

    def assertNoTask(self, tasks, cls):
S
Shuduo Sang 已提交
931
        for task in tasks:
932
            if isinstance(task, cls):
S
Shuduo Sang 已提交
933 934
                raise CrashGenError(
                    "This task: {}, is not expected to be present, given the success/failure of others".format(cls.__name__))
935 936

    def assertNoSuccess(self, tasks, cls):
S
Shuduo Sang 已提交
937
        for task in tasks:
938 939
            if isinstance(task, cls):
                if task.isSuccess():
S
Shuduo Sang 已提交
940 941
                    raise RuntimeError(
                        "Unexpected successful task: {}".format(cls))
942 943

    def hasSuccess(self, tasks, cls):
S
Shuduo Sang 已提交
944
        for task in tasks:
945 946 947 948 949 950
            if not isinstance(task, cls):
                continue
            if task.isSuccess():
                return True
        return False

S
Steven Li 已提交
951
    def hasTask(self, tasks, cls):
S
Shuduo Sang 已提交
952
        for task in tasks:
S
Steven Li 已提交
953 954 955 956
            if isinstance(task, cls):
                return True
        return False

S
Shuduo Sang 已提交
957

958 959 960 961
class StateInvalid(AnyState):
    def getInfo(self):
        return [
            self.STATE_INVALID,
S
Shuduo Sang 已提交
962 963 964
            False, False,  # can create/drop Db
            False, False,  # can create/drop fixed table
            False, False,  # can insert/read data with fixed table
965 966 967 968
        ]

    # def verifyTasksToState(self, tasks, newState):

S
Shuduo Sang 已提交
969

970 971 972 973
class StateEmpty(AnyState):
    def getInfo(self):
        return [
            self.STATE_EMPTY,
S
Shuduo Sang 已提交
974 975 976
            True, False,  # can create/drop Db
            False, False,  # can create/drop fixed table
            False, False,  # can insert/read data with fixed table
977 978
        ]

S
Shuduo Sang 已提交
979 980
    def verifyTasksToState(self, tasks, newState):
        if (self.hasSuccess(tasks, TaskCreateDb)
981
                ):  # at EMPTY, if there's succes in creating DB
S
Shuduo Sang 已提交
982 983 984 985
            if (not self.hasTask(tasks, TaskDropDb)):  # and no drop_db tasks
                # we must have at most one. TODO: compare numbers
                self.assertAtMostOneSuccess(tasks, TaskCreateDb)

986 987 988 989 990 991 992 993 994 995 996

class StateDbOnly(AnyState):
    def getInfo(self):
        return [
            self.STATE_DB_ONLY,
            False, True,
            True, False,
            False, False,
        ]

    def verifyTasksToState(self, tasks, newState):
S
Shuduo Sang 已提交
997 998 999
        if (not self.hasTask(tasks, TaskCreateDb)):
            # only if we don't create any more
            self.assertAtMostOneSuccess(tasks, TaskDropDb)
1000
        self.assertIfExistThenSuccess(tasks, TaskDropDb)
S
Steven Li 已提交
1001
        # self.assertAtMostOneSuccess(tasks, CreateFixedTableTask) # not true in massively parrallel cases
1002
        # Nothing to be said about adding data task
1003
        # if ( self.hasSuccess(tasks, DropDbTask) ): # dropped the DB
S
Shuduo Sang 已提交
1004 1005 1006
        # self.assertHasTask(tasks, DropDbTask) # implied by hasSuccess
        # self.assertAtMostOneSuccess(tasks, DropDbTask)
        # self._state = self.STATE_EMPTY
1007 1008
        # if ( self.hasSuccess(tasks, TaskCreateSuperTable) ): # did not drop db, create table success
        #     # self.assertHasTask(tasks, CreateFixedTableTask) # tried to create table
S
Shuduo Sang 已提交
1009
        #     if ( not self.hasTask(tasks, TaskDropSuperTable) ):
1010
        #         self.assertAtMostOneSuccess(tasks, TaskCreateSuperTable) # at most 1 attempt is successful, if we don't drop anything
S
Shuduo Sang 已提交
1011 1012 1013 1014 1015 1016
        # self.assertNoTask(tasks, DropDbTask) # should have have tried
        # if ( not self.hasSuccess(tasks, AddFixedDataTask) ): # just created table, no data yet
        #     # can't say there's add-data attempts, since they may all fail
        #     self._state = self.STATE_TABLE_ONLY
        # else:
        #     self._state = self.STATE_HAS_DATA
1017 1018 1019 1020
        # What about AddFixedData?
        # elif ( self.hasSuccess(tasks, AddFixedDataTask) ):
        #     self._state = self.STATE_HAS_DATA
        # else: # no success in dropping db tasks, no success in create fixed table? read data should also fail
S
Shuduo Sang 已提交
1021
        #     # raise RuntimeError("Unexpected no-success scenario")   # We might just landed all failure tasks,
1022 1023
        #     self._state = self.STATE_DB_ONLY  # no change

S
Shuduo Sang 已提交
1024

1025
class StateSuperTableOnly(AnyState):
1026 1027 1028 1029 1030 1031 1032 1033 1034
    def getInfo(self):
        return [
            self.STATE_TABLE_ONLY,
            False, True,
            False, True,
            True, True,
        ]

    def verifyTasksToState(self, tasks, newState):
S
Shuduo Sang 已提交
1035
        if (self.hasSuccess(tasks, TaskDropSuperTable)
1036
                ):  # we are able to drop the table
1037
            #self.assertAtMostOneSuccess(tasks, TaskDropSuperTable)
S
Shuduo Sang 已提交
1038 1039
            # we must have had recreted it
            self.hasSuccess(tasks, TaskCreateSuperTable)
1040

1041
            # self._state = self.STATE_DB_ONLY
S
Steven Li 已提交
1042 1043
        # elif ( self.hasSuccess(tasks, AddFixedDataTask) ): # no success dropping the table, but added data
        #     self.assertNoTask(tasks, DropFixedTableTask) # not true in massively parrallel cases
1044
            # self._state = self.STATE_HAS_DATA
S
Steven Li 已提交
1045 1046 1047
        # elif ( self.hasSuccess(tasks, ReadFixedDataTask) ): # no success in prev cases, but was able to read data
            # self.assertNoTask(tasks, DropFixedTableTask)
            # self.assertNoTask(tasks, AddFixedDataTask)
1048
            # self._state = self.STATE_TABLE_ONLY # no change
S
Steven Li 已提交
1049 1050 1051
        # else: # did not drop table, did not insert data, did not read successfully, that is impossible
        #     raise RuntimeError("Unexpected no-success scenarios")
        # TODO: need to revamp!!
1052

S
Shuduo Sang 已提交
1053

1054 1055 1056 1057 1058 1059 1060 1061 1062 1063
class StateHasData(AnyState):
    def getInfo(self):
        return [
            self.STATE_HAS_DATA,
            False, True,
            False, True,
            True, True,
        ]

    def verifyTasksToState(self, tasks, newState):
S
Shuduo Sang 已提交
1064
        if (newState.equals(AnyState.STATE_EMPTY)):
1065
            self.hasSuccess(tasks, TaskDropDb)
S
Shuduo Sang 已提交
1066 1067 1068 1069
            if (not self.hasTask(tasks, TaskCreateDb)):
                self.assertAtMostOneSuccess(tasks, TaskDropDb)  # TODO: dicy
        elif (newState.equals(AnyState.STATE_DB_ONLY)):  # in DB only
            if (not self.hasTask(tasks, TaskCreateDb)
1070
                ):  # without a create_db task
S
Shuduo Sang 已提交
1071 1072
                # we must have drop_db task
                self.assertNoTask(tasks, TaskDropDb)
1073
            self.hasSuccess(tasks, TaskDropSuperTable)
1074
            # self.assertAtMostOneSuccess(tasks, DropFixedSuperTableTask) # TODO: dicy
1075 1076 1077 1078
        # elif ( newState.equals(AnyState.STATE_TABLE_ONLY) ): # data deleted
            # self.assertNoTask(tasks, TaskDropDb)
            # self.assertNoTask(tasks, TaskDropSuperTable)
            # self.assertNoTask(tasks, TaskAddData)
S
Steven Li 已提交
1079
            # self.hasSuccess(tasks, DeleteDataTasks)
S
Shuduo Sang 已提交
1080 1081
        else:  # should be STATE_HAS_DATA
            if (not self.hasTask(tasks, TaskCreateDb)
1082
                ):  # only if we didn't create one
S
Shuduo Sang 已提交
1083 1084 1085
                # we shouldn't have dropped it
                self.assertNoTask(tasks, TaskDropDb)
            if (not self.hasTask(tasks, TaskCreateSuperTable)
1086
                    ):  # if we didn't create the table
S
Shuduo Sang 已提交
1087 1088
                # we should not have a task that drops it
                self.assertNoTask(tasks, TaskDropSuperTable)
1089
            # self.assertIfExistThenSuccess(tasks, ReadFixedDataTask)
S
Steven Li 已提交
1090

S
Shuduo Sang 已提交
1091

1092
class StateMechine:
1093 1094
    def __init__(self, dbConn):
        self._dbConn = dbConn
S
Shuduo Sang 已提交
1095 1096 1097 1098 1099
        self._curState = self._findCurrentState()  # starting state
        # transitition target probabilities, indexed with value of STATE_EMPTY,
        # STATE_DB_ONLY, etc.
        self._stateWeights = [1, 3, 5, 15]

1100 1101 1102
    def getCurrentState(self):
        return self._curState

1103 1104 1105
    def hasDatabase(self):
        return self._curState.canDropDb()  # ha, can drop DB means it has one

1106
    # May be slow, use cautionsly...
S
Shuduo Sang 已提交
1107
    def getTaskTypes(self):  # those that can run (directly/indirectly) from the current state
1108 1109 1110 1111 1112 1113
        def typesToStrings(types):
            ss = []
            for t in types:
                ss.append(t.__name__)
            return ss

S
Shuduo Sang 已提交
1114
        allTaskClasses = StateTransitionTask.__subclasses__()  # all state transition tasks
1115 1116
        firstTaskTypes = []
        for tc in allTaskClasses:
S
Shuduo Sang 已提交
1117
            # t = tc(self) # create task object
1118 1119
            if tc.canBeginFrom(self._curState):
                firstTaskTypes.append(tc)
S
Shuduo Sang 已提交
1120 1121 1122 1123 1124 1125 1126 1127
        # now we have all the tasks that can begin directly from the current
        # state, let's figure out the INDIRECT ones
        taskTypes = firstTaskTypes.copy()  # have to have these
        for task1 in firstTaskTypes:  # each task type gathered so far
            endState = task1.getEndState()  # figure the end state
            if endState is None:  # does not change end state
                continue  # no use, do nothing
            for tc in allTaskClasses:  # what task can further begin from there?
1128
                if tc.canBeginFrom(endState) and (tc not in firstTaskTypes):
S
Shuduo Sang 已提交
1129
                    taskTypes.append(tc)  # gather it
1130 1131

        if len(taskTypes) <= 0:
S
Shuduo Sang 已提交
1132 1133 1134 1135 1136 1137 1138
            raise RuntimeError(
                "No suitable task types found for state: {}".format(
                    self._curState))
        logger.debug(
            "[OPS] Tasks found for state {}: {}".format(
                self._curState,
                typesToStrings(taskTypes)))
1139 1140 1141 1142
        return taskTypes

    def _findCurrentState(self):
        dbc = self._dbConn
S
Shuduo Sang 已提交
1143
        ts = time.time()  # we use this to debug how fast/slow it is to do the various queries to find the current DB state
1144 1145
        if not dbc.hasDatabases():  # no database?!
            logger.debug( "[STT] empty database found, between {} and {}".format(ts, time.time()))
1146
            return StateEmpty()
S
Shuduo Sang 已提交
1147 1148
        # did not do this when openning connection, and this is NOT the worker
        # thread, which does this on their own
1149 1150 1151
        dbc.use("db")
        if not dbc.hasTables():  # no tables
            logger.debug("[STT] DB_ONLY found, between {} and {}".format(ts, time.time()))
1152
            return StateDbOnly()
1153 1154 1155 1156

        sTable = DbManager.getFixedSuperTable()
        if sTable.hasRegTables(dbc):  # no regular tables
            logger.debug("[STT] SUPER_TABLE_ONLY found, between {} and {}".format(ts, time.time()))
1157
            return StateSuperTableOnly()
S
Shuduo Sang 已提交
1158
        else:  # has actual tables
1159
            logger.debug("[STT] HAS_DATA found, between {} and {}".format(ts, time.time()))
1160 1161 1162
            return StateHasData()

    def transition(self, tasks):
S
Shuduo Sang 已提交
1163
        if (len(tasks) == 0):  # before 1st step, or otherwise empty
1164
            logger.debug("[STT] Starting State: {}".format(self._curState))
S
Shuduo Sang 已提交
1165
            return  # do nothing
1166

S
Shuduo Sang 已提交
1167 1168
        # this should show up in the server log, separating steps
        self._dbConn.execute("show dnodes")
1169 1170 1171 1172

        # Generic Checks, first based on the start state
        if self._curState.canCreateDb():
            self._curState.assertIfExistThenSuccess(tasks, TaskCreateDb)
S
Shuduo Sang 已提交
1173 1174
            # self.assertAtMostOneSuccess(tasks, CreateDbTask) # not really, in
            # case of multiple creation and drops
1175 1176 1177

        if self._curState.canDropDb():
            self._curState.assertIfExistThenSuccess(tasks, TaskDropDb)
S
Shuduo Sang 已提交
1178 1179
            # self.assertAtMostOneSuccess(tasks, DropDbTask) # not really in
            # case of drop-create-drop
1180 1181 1182

        # if self._state.canCreateFixedTable():
            # self.assertIfExistThenSuccess(tasks, CreateFixedTableTask) # Not true, DB may be dropped
S
Shuduo Sang 已提交
1183 1184
            # self.assertAtMostOneSuccess(tasks, CreateFixedTableTask) # not
            # really, in case of create-drop-create
1185 1186 1187

        # if self._state.canDropFixedTable():
            # self.assertIfExistThenSuccess(tasks, DropFixedTableTask) # Not True, the whole DB may be dropped
S
Shuduo Sang 已提交
1188 1189
            # self.assertAtMostOneSuccess(tasks, DropFixedTableTask) # not
            # really in case of drop-create-drop
1190 1191

        # if self._state.canAddData():
S
Shuduo Sang 已提交
1192 1193
        # self.assertIfExistThenSuccess(tasks, AddFixedDataTask)  # not true
        # actually
1194 1195 1196 1197 1198 1199

        # if self._state.canReadData():
            # Nothing for sure

        newState = self._findCurrentState()
        logger.debug("[STT] New DB state determined: {}".format(newState))
S
Shuduo Sang 已提交
1200 1201
        # can old state move to new state through the tasks?
        self._curState.verifyTasksToState(tasks, newState)
1202 1203 1204
        self._curState = newState

    def pickTaskType(self):
S
Shuduo Sang 已提交
1205 1206
        # all the task types we can choose from at curent state
        taskTypes = self.getTaskTypes()
1207 1208 1209
        weights = []
        for tt in taskTypes:
            endState = tt.getEndState()
S
Shuduo Sang 已提交
1210 1211 1212
            if endState is not None:
                # TODO: change to a method
                weights.append(self._stateWeights[endState.getValIndex()])
1213
            else:
S
Shuduo Sang 已提交
1214 1215
                # read data task, default to 10: TODO: change to a constant
                weights.append(10)
1216
        i = self._weighted_choice_sub(weights)
S
Shuduo Sang 已提交
1217
        # logger.debug(" (weighted random:{}/{}) ".format(i, len(taskTypes)))
1218 1219
        return taskTypes[i]

S
Shuduo Sang 已提交
1220 1221 1222 1223 1224
    # ref:
    # https://eli.thegreenplace.net/2010/01/22/weighted-random-generation-in-python/
    def _weighted_choice_sub(self, weights):
        # TODO: use our dice to ensure it being determinstic?
        rnd = random.random() * sum(weights)
1225 1226 1227 1228
        for i, w in enumerate(weights):
            rnd -= w
            if rnd < 0:
                return i
1229

1230
# Manager of the Database Data/Connection
S
Shuduo Sang 已提交
1231 1232 1233 1234


class DbManager():
    def __init__(self, resetDb=True):
S
Steven Li 已提交
1235
        self.tableNumQueue = LinearQueue()
S
Shuduo Sang 已提交
1236 1237 1238
        # datetime.datetime(2019, 1, 1) # initial date time tick
        self._lastTick = self.setupLastTick()
        self._lastInt = 0  # next one is initial integer
1239
        self._lock = threading.RLock()
S
Shuduo Sang 已提交
1240

1241
        # self.openDbServerConnection()
S
Shuduo Sang 已提交
1242 1243
        self._dbConn = DbConn.createNative() if (
            gConfig.connector_type == 'native') else DbConn.createRest()
1244
        try:
S
Shuduo Sang 已提交
1245
            self._dbConn.open()  # may throw taos.error.ProgrammingError: disconnected
1246 1247
        except taos.error.ProgrammingError as err:
            # print("Error type: {}, msg: {}, value: {}".format(type(err), err.msg, err))
S
Shuduo Sang 已提交
1248 1249 1250
            if (err.msg == 'client disconnected'):  # cannot open DB connection
                print(
                    "Cannot establish DB connection, please re-run script without parameter, and follow the instructions.")
S
Steven Li 已提交
1251
                sys.exit(2)
1252
            else:
S
Shuduo Sang 已提交
1253 1254
                raise
        except BaseException:
S
Steven Li 已提交
1255
            print("[=] Unexpected exception")
S
Shuduo Sang 已提交
1256 1257 1258 1259
            raise

        if resetDb:
            self._dbConn.resetDb()  # drop and recreate DB
1260

S
Shuduo Sang 已提交
1261 1262
        # Do this after dbConn is in proper shape
        self._stateMachine = StateMechine(self._dbConn)
1263

1264 1265 1266
    def getDbConn(self):
        return self._dbConn

S
Shuduo Sang 已提交
1267
    def getStateMachine(self) -> StateMechine:
1268 1269 1270 1271
        return self._stateMachine

    # def getState(self):
    #     return self._stateMachine.getCurrentState()
1272 1273 1274 1275 1276 1277

    # We aim to create a starting time tick, such that, whenever we run our test here once
    # We should be able to safely create 100,000 records, which will not have any repeated time stamp
    # when we re-run the test in 3 minutes (180 seconds), basically we should expand time duration
    # by a factor of 500.
    # TODO: what if it goes beyond 10 years into the future
1278
    # TODO: fix the error as result of above: "tsdb timestamp is out of range"
1279
    def setupLastTick(self):
1280
        t1 = datetime.datetime(2020, 6, 1)
1281
        t2 = datetime.datetime.now()
S
Shuduo Sang 已提交
1282 1283 1284 1285
        # maybe a very large number, takes 69 years to exceed Python int range
        elSec = int(t2.timestamp() - t1.timestamp())
        elSec2 = (elSec % (8 * 12 * 30 * 24 * 60 * 60 / 500)) * \
            500  # a number representing seconds within 10 years
1286
        # print("elSec = {}".format(elSec))
S
Shuduo Sang 已提交
1287 1288 1289
        t3 = datetime.datetime(2012, 1, 1)  # default "keep" is 10 years
        t4 = datetime.datetime.fromtimestamp(
            t3.timestamp() + elSec2)  # see explanation above
1290 1291 1292
        logger.info("Setting up TICKS to start from: {}".format(t4))
        return t4

S
Shuduo Sang 已提交
1293
    def pickAndAllocateTable(self):  # pick any table, and "use" it
S
Steven Li 已提交
1294 1295
        return self.tableNumQueue.pickAndAllocate()

1296 1297 1298 1299 1300
    def addTable(self):
        with self._lock:
            tIndex = self.tableNumQueue.push()
        return tIndex

1301 1302
    @classmethod
    def getFixedSuperTableName(cls):
1303
        return "fs_table"
1304

1305 1306 1307 1308
    @classmethod
    def getFixedSuperTable(cls):
        return TdSuperTable(cls.getFixedSuperTableName())

S
Shuduo Sang 已提交
1309
    def releaseTable(self, i):  # return the table back, so others can use it
S
Steven Li 已提交
1310 1311
        self.tableNumQueue.release(i)

1312
    def getNextTick(self):
S
Shuduo Sang 已提交
1313 1314
        with self._lock:  # prevent duplicate tick
            if Dice.throw(10) == 0:  # 1 in 10 chance
S
Steven Li 已提交
1315
                return self._lastTick + datetime.timedelta(0, -100)
S
Shuduo Sang 已提交
1316 1317 1318
            else:  # regular
                # add one second to it
                self._lastTick += datetime.timedelta(0, 1)
S
Steven Li 已提交
1319
                return self._lastTick
1320 1321

    def getNextInt(self):
1322 1323 1324
        with self._lock:
            self._lastInt += 1
            return self._lastInt
1325 1326

    def getNextBinary(self):
S
Shuduo Sang 已提交
1327 1328
        return "Beijing_Shanghai_Los_Angeles_New_York_San_Francisco_Chicago_Beijing_Shanghai_Los_Angeles_New_York_San_Francisco_Chicago_{}".format(
            self.getNextInt())
1329 1330

    def getNextFloat(self):
1331 1332 1333
        ret = 0.9 + self.getNextInt()
        # print("Float obtained: {}".format(ret))
        return ret
S
Shuduo Sang 已提交
1334

S
Steven Li 已提交
1335
    def getTableNameToDelete(self):
S
Shuduo Sang 已提交
1336 1337
        tblNum = self.tableNumQueue.pop()  # TODO: race condition!
        if (not tblNum):  # maybe false
1338
            return False
S
Shuduo Sang 已提交
1339

S
Steven Li 已提交
1340 1341
        return "table_{}".format(tblNum)

1342
    def cleanUp(self):
S
Shuduo Sang 已提交
1343 1344
        self._dbConn.close()

1345

1346
class TaskExecutor():
1347
    class BoundedList:
S
Shuduo Sang 已提交
1348
        def __init__(self, size=10):
1349 1350 1351
            self._size = size
            self._list = []

S
Shuduo Sang 已提交
1352 1353
        def add(self, n: int):
            if not self._list:  # empty
1354 1355 1356 1357 1358 1359 1360
                self._list.append(n)
                return
            # now we should insert
            nItems = len(self._list)
            insPos = 0
            for i in range(nItems):
                insPos = i
S
Shuduo Sang 已提交
1361 1362 1363
                if n <= self._list[i]:  # smaller than this item, time to insert
                    break  # found the insertion point
                insPos += 1  # insert to the right
1364

S
Shuduo Sang 已提交
1365 1366
            if insPos == 0:  # except for the 1st item, # TODO: elimiate first item as gating item
                return  # do nothing
1367 1368

            # print("Inserting at postion {}, value: {}".format(insPos, n))
S
Shuduo Sang 已提交
1369 1370
            self._list.insert(insPos, n)  # insert

1371
            newLen = len(self._list)
S
Shuduo Sang 已提交
1372 1373 1374 1375 1376
            if newLen <= self._size:
                return  # do nothing
            elif newLen == (self._size + 1):
                del self._list[0]  # remove the first item
            else:
1377 1378 1379 1380 1381 1382 1383
                raise RuntimeError("Corrupt Bounded List")

        def __str__(self):
            return repr(self._list)

    _boundedList = BoundedList()

1384 1385 1386
    def __init__(self, curStep):
        self._curStep = curStep

1387 1388 1389 1390
    @classmethod
    def getBoundedList(cls):
        return cls._boundedList

1391 1392 1393
    def getCurStep(self):
        return self._curStep

S
Shuduo Sang 已提交
1394
    def execute(self, task: Task, wt: WorkerThread):  # execute a task on a thread
1395
        task.execute(wt)
1396

1397 1398 1399 1400
    def recordDataMark(self, n: int):
        # print("[{}]".format(n), end="", flush=True)
        self._boundedList.add(n)

1401 1402
    # def logInfo(self, msg):
    #     logger.info("    T[{}.x]: ".format(self._curStep) + msg)
1403

1404 1405
    # def logDebug(self, msg):
    #     logger.debug("    T[{}.x]: ".format(self._curStep) + msg)
1406

S
Shuduo Sang 已提交
1407

S
Steven Li 已提交
1408
class Task():
1409 1410 1411 1412
    taskSn = 100

    @classmethod
    def allocTaskNum(cls):
S
Shuduo Sang 已提交
1413
        Task.taskSn += 1  # IMPORTANT: cannot use cls.taskSn, since each sub class will have a copy
S
Steven Li 已提交
1414 1415
        # logger.debug("Allocating taskSN: {}".format(Task.taskSn))
        return Task.taskSn
1416

S
Shuduo Sang 已提交
1417
    def __init__(self, dbManager: DbManager, execStats: ExecutionStats):
1418
        self._dbManager = dbManager
S
Shuduo Sang 已提交
1419
        self._workerThread = None
1420
        self._err = None
1421
        self._aborted = False
1422
        self._curStep = None
S
Shuduo Sang 已提交
1423
        self._numRows = None  # Number of rows affected
1424

S
Shuduo Sang 已提交
1425
        # Assign an incremental task serial number
1426
        self._taskNum = self.allocTaskNum()
S
Steven Li 已提交
1427
        # logger.debug("Creating new task {}...".format(self._taskNum))
1428

1429
        self._execStats = execStats
S
Shuduo Sang 已提交
1430
        self._lastSql = ""  # last SQL executed/attempted
1431

1432
    def isSuccess(self):
S
Shuduo Sang 已提交
1433
        return self._err is None
1434

1435 1436 1437
    def isAborted(self):
        return self._aborted

S
Shuduo Sang 已提交
1438
    def clone(self):  # TODO: why do we need this again?
1439
        newTask = self.__class__(self._dbManager, self._execStats)
1440 1441 1442
        return newTask

    def logDebug(self, msg):
S
Shuduo Sang 已提交
1443 1444 1445
        self._workerThread.logDebug(
            "Step[{}.{}] {}".format(
                self._curStep, self._taskNum, msg))
1446 1447

    def logInfo(self, msg):
S
Shuduo Sang 已提交
1448 1449 1450
        self._workerThread.logInfo(
            "Step[{}.{}] {}".format(
                self._curStep, self._taskNum, msg))
1451

1452
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
S
Shuduo Sang 已提交
1453 1454 1455
        raise RuntimeError(
            "To be implemeted by child classes, class name: {}".format(
                self.__class__.__name__))
1456

1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480
    def _isErrAcceptable(self, errno, msg):
        if errno in [
                0x05,  # TSDB_CODE_RPC_NOT_READY
                # 0x200, # invalid SQL, TODO: re-examine with TD-934
                0x360, 0x362, 
                0x369, # tag already exists
                0x36A, 0x36B, 0x36D,
                0x381, 
                0x380, # "db not selected"
                0x383,
                0x386,  # DB is being dropped?!
                0x503,
                0x510,  # vnode not in ready state
                0x600,
                1000  # REST catch-all error
            ]: 
            return True # These are the ALWAYS-ACCEPTABLE ones
        elif errno == 0x200 : # invalid SQL, we need to div in a bit more
            if msg.find("invalid column name") != -1:
                return True 
        
        return False # Not an acceptable error


1481 1482
    def execute(self, wt: WorkerThread):
        wt.verifyThreadSelf()
S
Shuduo Sang 已提交
1483
        self._workerThread = wt  # type: ignore
1484 1485

        te = wt.getTaskExecutor()
1486
        self._curStep = te.getCurStep()
S
Shuduo Sang 已提交
1487 1488
        self.logDebug(
            "[-] executing task {}...".format(self.__class__.__name__))
1489 1490

        self._err = None
S
Shuduo Sang 已提交
1491 1492
        self._execStats.beginTaskType(
            self.__class__.__name__)  # mark beginning
1493
        try:
S
Shuduo Sang 已提交
1494
            self._executeInternal(te, wt)  # TODO: no return value?
1495
        except taos.error.ProgrammingError as err:
S
Shuduo Sang 已提交
1496 1497
            errno2 = err.errno if (
                err.errno > 0) else 0x80000000 + err.errno  # correct error scheme
1498
            if (gConfig.continue_on_exception):  # user choose to continue
1499
                self.logDebug("[=] Continue after TAOS exception: errno=0x{:X}, msg: {}, SQL: {}".format(
1500
                        errno2, err, self._lastSql))
1501
                self._err = err
1502 1503
            elif self._isErrAcceptable(errno2, err.__str__()):
                self.logDebug("[=] Acceptable Taos library exception: errno=0x{:X}, msg: {}, SQL: {}".format(
S
Shuduo Sang 已提交
1504
                        errno2, err, self._lastSql))
1505
                print("_", end="", flush=True)
S
Shuduo Sang 已提交
1506
                self._err = err
1507
            else: # not an acceptable error
S
Shuduo Sang 已提交
1508 1509
                errMsg = "[=] Unexpected Taos library exception: errno=0x{:X}, msg: {}, SQL: {}".format(
                    errno2, err, self._lastSql)
1510
                self.logDebug(errMsg)
S
Shuduo Sang 已提交
1511
                if gConfig.debug:
1512 1513
                    # raise # so that we see full stack
                    traceback.print_exc()
1514 1515
                print(
                    "\n\n----------------------------\nProgram ABORTED Due to Unexpected TAOS Error: \n\n{}\n".format(errMsg) +
1516 1517 1518 1519
                    "----------------------------\n")
                # sys.exit(-1)
                self._err = err
                self._aborted = True
S
Shuduo Sang 已提交
1520
        except Exception as e:
S
Steven Li 已提交
1521
            self.logInfo("Non-TAOS exception encountered")
S
Shuduo Sang 已提交
1522
            self._err = e
S
Steven Li 已提交
1523
            self._aborted = True
1524
            traceback.print_exc()
1525
        except BaseException as e:
1526
            self.logInfo("Python base exception encountered")
1527
            self._err = e
1528
            self._aborted = True
S
Steven Li 已提交
1529
            traceback.print_exc()
S
Shuduo Sang 已提交
1530 1531 1532 1533
        except BaseException:
            self.logDebug(
                "[=] Unexpected exception, SQL: {}".format(
                    self._lastSql))
1534
            raise
1535
        self._execStats.endTaskType(self.__class__.__name__, self.isSuccess())
S
Shuduo Sang 已提交
1536 1537 1538 1539 1540

        self.logDebug("[X] task execution completed, {}, status: {}".format(
            self.__class__.__name__, "Success" if self.isSuccess() else "Failure"))
        # TODO: merge with above.
        self._execStats.incExecCount(self.__class__.__name__, self.isSuccess())
S
Steven Li 已提交
1541

1542
    def execSql(self, sql):
1543
        self._lastSql = sql
1544
        return self._dbManager.execute(sql)
1545

S
Shuduo Sang 已提交
1546
    def execWtSql(self, wt: WorkerThread, sql):  # execute an SQL on the worker thread
1547 1548 1549
        self._lastSql = sql
        return wt.execSql(sql)

S
Shuduo Sang 已提交
1550
    def queryWtSql(self, wt: WorkerThread, sql):  # execute an SQL on the worker thread
1551 1552 1553
        self._lastSql = sql
        return wt.querySql(sql)

S
Shuduo Sang 已提交
1554
    def getQueryResult(self, wt: WorkerThread):  # execute an SQL on the worker thread
1555 1556 1557
        return wt.getQueryResult()


1558
class ExecutionStats:
1559
    def __init__(self):
S
Shuduo Sang 已提交
1560 1561
        # total/success times for a task
        self._execTimes: Dict[str, [int, int]] = {}
1562 1563 1564
        self._tasksInProgress = 0
        self._lock = threading.Lock()
        self._firstTaskStartTime = None
1565
        self._execStartTime = None
S
Shuduo Sang 已提交
1566 1567
        self._elapsedTime = 0.0  # total elapsed time
        self._accRunTime = 0.0  # accumulated run time
1568

1569 1570 1571
        self._failed = False
        self._failureReason = None

S
Steven Li 已提交
1572
    def __str__(self):
S
Shuduo Sang 已提交
1573 1574
        return "[ExecStats: _failed={}, _failureReason={}".format(
            self._failed, self._failureReason)
S
Steven Li 已提交
1575 1576

    def isFailed(self):
S
Shuduo Sang 已提交
1577
        return self._failed
S
Steven Li 已提交
1578

1579 1580 1581 1582 1583 1584
    def startExec(self):
        self._execStartTime = time.time()

    def endExec(self):
        self._elapsedTime = time.time() - self._execStartTime

S
Shuduo Sang 已提交
1585
    def incExecCount(self, klassName, isSuccess):  # TODO: add a lock here
1586 1587
        if klassName not in self._execTimes:
            self._execTimes[klassName] = [0, 0]
S
Shuduo Sang 已提交
1588 1589
        t = self._execTimes[klassName]  # tuple for the data
        t[0] += 1  # index 0 has the "total" execution times
1590
        if isSuccess:
S
Shuduo Sang 已提交
1591
            t[1] += 1  # index 1 has the "success" execution times
1592 1593 1594

    def beginTaskType(self, klassName):
        with self._lock:
S
Shuduo Sang 已提交
1595 1596
            if self._tasksInProgress == 0:  # starting a new round
                self._firstTaskStartTime = time.time()  # I am now the first task
1597 1598 1599 1600 1601
            self._tasksInProgress += 1

    def endTaskType(self, klassName, isSuccess):
        with self._lock:
            self._tasksInProgress -= 1
S
Shuduo Sang 已提交
1602
            if self._tasksInProgress == 0:  # all tasks have stopped
1603 1604 1605
                self._accRunTime += (time.time() - self._firstTaskStartTime)
                self._firstTaskStartTime = None

1606 1607 1608 1609
    def registerFailure(self, reason):
        self._failed = True
        self._failureReason = reason

1610
    def printStats(self):
S
Shuduo Sang 已提交
1611 1612 1613 1614 1615 1616
        logger.info(
            "----------------------------------------------------------------------")
        logger.info(
            "| Crash_Gen test {}, with the following stats:". format(
                "FAILED (reason: {})".format(
                    self._failureReason) if self._failed else "SUCCEEDED"))
1617
        logger.info("| Task Execution Times (success/total):")
1618
        execTimesAny = 0
S
Shuduo Sang 已提交
1619
        for k, n in self._execTimes.items():
1620
            execTimesAny += n[0]
S
Shuduo Sang 已提交
1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640
            logger.info("|    {0:<24}: {1}/{2}".format(k, n[1], n[0]))

        logger.info(
            "| Total Tasks Executed (success or not): {} ".format(execTimesAny))
        logger.info(
            "| Total Tasks In Progress at End: {}".format(
                self._tasksInProgress))
        logger.info(
            "| Total Task Busy Time (elapsed time when any task is in progress): {:.3f} seconds".format(
                self._accRunTime))
        logger.info(
            "| Average Per-Task Execution Time: {:.3f} seconds".format(self._accRunTime / execTimesAny))
        logger.info(
            "| Total Elapsed Time (from wall clock): {:.3f} seconds".format(
                self._elapsedTime))
        logger.info(
            "| Top numbers written: {}".format(
                TaskExecutor.getBoundedList()))
        logger.info(
            "----------------------------------------------------------------------")
1641 1642 1643


class StateTransitionTask(Task):
1644 1645 1646 1647 1648
    LARGE_NUMBER_OF_TABLES = 35
    SMALL_NUMBER_OF_TABLES = 3
    LARGE_NUMBER_OF_RECORDS = 50
    SMALL_NUMBER_OF_RECORDS = 3

1649
    @classmethod
S
Shuduo Sang 已提交
1650
    def getInfo(cls):  # each sub class should supply their own information
1651 1652
        raise RuntimeError("Overriding method expected")

S
Shuduo Sang 已提交
1653
    _endState = None
1654
    @classmethod
S
Shuduo Sang 已提交
1655
    def getEndState(cls):  # TODO: optimize by calling it fewer times
1656 1657
        raise RuntimeError("Overriding method expected")

1658 1659 1660
    # @classmethod
    # def getBeginStates(cls):
    #     return cls.getInfo()[0]
1661

1662 1663 1664
    # @classmethod
    # def getEndState(cls): # returning the class name
    #     return cls.getInfo()[0]
1665 1666

    @classmethod
1667 1668 1669
    def canBeginFrom(cls, state: AnyState):
        # return state.getValue() in cls.getBeginStates()
        raise RuntimeError("must be overriden")
1670

1671 1672 1673 1674
    @classmethod
    def getRegTableName(cls, i):
        return "db.reg_table_{}".format(i)

1675 1676
    def execute(self, wt: WorkerThread):
        super().execute(wt)
S
Shuduo Sang 已提交
1677 1678


1679
class TaskCreateDb(StateTransitionTask):
1680
    @classmethod
1681
    def getEndState(cls):
S
Shuduo Sang 已提交
1682
        return StateDbOnly()
1683

1684 1685 1686 1687
    @classmethod
    def canBeginFrom(cls, state: AnyState):
        return state.canCreateDb()

1688
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
S
Shuduo Sang 已提交
1689 1690
        self.execWtSql(wt, "create database db")

1691

1692
class TaskDropDb(StateTransitionTask):
1693
    @classmethod
1694 1695
    def getEndState(cls):
        return StateEmpty()
1696

1697 1698 1699 1700
    @classmethod
    def canBeginFrom(cls, state: AnyState):
        return state.canDropDb()

1701
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
1702
        self.execWtSql(wt, "drop database db")
S
Steven Li 已提交
1703
        logger.debug("[OPS] database dropped at {}".format(time.time()))
1704

S
Shuduo Sang 已提交
1705

1706
class TaskCreateSuperTable(StateTransitionTask):
1707
    @classmethod
1708 1709
    def getEndState(cls):
        return StateSuperTableOnly()
1710

1711 1712
    @classmethod
    def canBeginFrom(cls, state: AnyState):
1713
        return state.canCreateFixedSuperTable()
1714

1715
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
S
Shuduo Sang 已提交
1716
        if not wt.dbInUse():  # no DB yet, to the best of our knowledge
1717 1718 1719
            logger.debug("Skipping task, no DB yet")
            return

S
Shuduo Sang 已提交
1720
        tblName = self._dbManager.getFixedSuperTableName()
1721
        # wt.execSql("use db")    # should always be in place
S
Shuduo Sang 已提交
1722 1723 1724 1725 1726
        self.execWtSql(
            wt,
            "create table db.{} (ts timestamp, speed int) tags (b binary(200), f float) ".format(tblName))
        # No need to create the regular tables, INSERT will do that
        # automatically
1727

S
Steven Li 已提交
1728

1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756
class TdSuperTable:
    def __init__(self, stName):
        self._stName = stName

    def getRegTables(self, dbc: DbConn):
        try:
            dbc.query("select TBNAME from db.{}".format(self._stName))  # TODO: analyze result set later            
        except taos.error.ProgrammingError as err:                    
            errno2 = err.errno if (err.errno > 0) else 0x80000000 + err.errno
            logger.debug("[=] Failed to get tables from super table: errno=0x{:X}, msg: {}".format(errno2, err))
            raise

        qr = dbc.getQueryResult()
        return [v[0] for v in qr] # list transformation, ref: https://stackoverflow.com/questions/643823/python-list-transformation

    def hasRegTables(self, dbc: DbConn):
        return dbc.query("SELECT * FROM db.{}".format(self._stName)) > 0

    def ensureTable(self, dbc: DbConn, regTableName: str):
        sql = "select tbname from {} where tbname in ('{}')".format(self._stName, regTableName)
        if dbc.query(sql) >= 1 : # reg table exists already
            return
        sql = "CREATE TABLE {} USING {} tags ('{}', {})".format(
            regTableName, self._stName,
            'xyz', '33'
        )
        dbc.execute(sql)

1757
class TaskReadData(StateTransitionTask):
1758
    @classmethod
1759
    def getEndState(cls):
S
Shuduo Sang 已提交
1760
        return None  # meaning doesn't affect state
1761

1762 1763 1764 1765
    @classmethod
    def canBeginFrom(cls, state: AnyState):
        return state.canReadData()

1766
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
1767
        sTable = self._dbManager.getFixedSuperTable()
1768

S
Shuduo Sang 已提交
1769 1770
        if random.randrange(
                5) == 0:  # 1 in 5 chance, simulate a broken connection. TODO: break connection in all situations
1771 1772
            wt.getDbConn().close()
            wt.getDbConn().open()
1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784
        
        for rTbName in sTable.getRegTables(self._dbManager.getDbConn()):  # regular tables
            aggExpr = Dice.choice(['*', 'count(*)', 'avg(speed)', 
                # 'twa(speed)', # TODO: this one REQUIRES a where statement, not reasonable
                'sum(speed)', 'stddev(speed)', 
                'min(speed)', 'max(speed)', 'first(speed)', 'last(speed)']) # TODO: add more from 'top'
            try:
                self.execWtSql(wt, "select {} from db.{}".format(aggExpr, rTbName))
            except taos.error.ProgrammingError as err:                    
                errno2 = err.errno if (err.errno > 0) else 0x80000000 + err.errno
                logger.debug("[=] Read Failure: errno=0x{:X}, msg: {}, SQL: {}".format(errno2, err, self._lastSql))
                raise
S
Shuduo Sang 已提交
1785

1786
class TaskDropSuperTable(StateTransitionTask):
1787
    @classmethod
1788
    def getEndState(cls):
S
Shuduo Sang 已提交
1789
        return StateDbOnly()
1790

1791 1792
    @classmethod
    def canBeginFrom(cls, state: AnyState):
1793
        return state.canDropFixedSuperTable()
1794

1795
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
S
Shuduo Sang 已提交
1796 1797 1798 1799 1800 1801 1802
        # 1/2 chance, we'll drop the regular tables one by one, in a randomized
        # sequence
        if Dice.throw(2) == 0:
            tblSeq = list(range(
                2 + (self.LARGE_NUMBER_OF_TABLES if gConfig.larger_data else self.SMALL_NUMBER_OF_TABLES)))
            random.shuffle(tblSeq)
            tickOutput = False  # if we have spitted out a "d" character for "drop regular table"
1803
            isSuccess = True
S
Shuduo Sang 已提交
1804 1805 1806
            for i in tblSeq:
                regTableName = self.getRegTableName(
                    i)  # "db.reg_table_{}".format(i)
1807
                try:
S
Shuduo Sang 已提交
1808 1809 1810 1811 1812 1813 1814
                    self.execWtSql(wt, "drop table {}".format(
                        regTableName))  # nRows always 0, like MySQL
                except taos.error.ProgrammingError as err:
                    # correcting for strange error number scheme
                    errno2 = err.errno if (
                        err.errno > 0) else 0x80000000 + err.errno
                    if (errno2 in [0x362]):  # mnode invalid table name
1815
                        isSuccess = False
S
Shuduo Sang 已提交
1816 1817 1818
                        logger.debug(
                            "[DB] Acceptable error when dropping a table")
                    continue  # try to delete next regular table
1819 1820

                if (not tickOutput):
S
Shuduo Sang 已提交
1821 1822
                    tickOutput = True  # Print only one time
                    if isSuccess:
1823 1824
                        print("d", end="", flush=True)
                    else:
S
Shuduo Sang 已提交
1825
                        print("f", end="", flush=True)
1826 1827

        # Drop the super table itself
S
Shuduo Sang 已提交
1828
        tblName = self._dbManager.getFixedSuperTableName()
1829
        self.execWtSql(wt, "drop table db.{}".format(tblName))
1830

S
Shuduo Sang 已提交
1831

1832 1833 1834
class TaskAlterTags(StateTransitionTask):
    @classmethod
    def getEndState(cls):
S
Shuduo Sang 已提交
1835
        return None  # meaning doesn't affect state
1836 1837 1838

    @classmethod
    def canBeginFrom(cls, state: AnyState):
S
Shuduo Sang 已提交
1839
        return state.canDropFixedSuperTable()  # if we can drop it, we can alter tags
1840 1841

    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
S
Shuduo Sang 已提交
1842
        tblName = self._dbManager.getFixedSuperTableName()
1843
        dice = Dice.throw(4)
S
Shuduo Sang 已提交
1844
        if dice == 0:
1845
            sql = "alter table db.{} add tag extraTag int".format(tblName)
S
Shuduo Sang 已提交
1846
        elif dice == 1:
1847
            sql = "alter table db.{} drop tag extraTag".format(tblName)
S
Shuduo Sang 已提交
1848
        elif dice == 2:
1849
            sql = "alter table db.{} drop tag newTag".format(tblName)
S
Shuduo Sang 已提交
1850 1851 1852
        else:  # dice == 3
            sql = "alter table db.{} change tag extraTag newTag".format(
                tblName)
1853 1854

        self.execWtSql(wt, sql)
1855

S
Shuduo Sang 已提交
1856

1857
class TaskAddData(StateTransitionTask):
S
Shuduo Sang 已提交
1858 1859
    # Track which table is being actively worked on
    activeTable: Set[int] = set()
1860

S
Shuduo Sang 已提交
1861 1862
    # We use these two files to record operations to DB, useful for power-off
    # tests
1863 1864 1865 1866 1867
    fAddLogReady = None
    fAddLogDone = None

    @classmethod
    def prepToRecordOps(cls):
S
Shuduo Sang 已提交
1868 1869 1870 1871
        if gConfig.record_ops:
            if (cls.fAddLogReady is None):
                logger.info(
                    "Recording in a file operations to be performed...")
1872
                cls.fAddLogReady = open("add_log_ready.txt", "w")
S
Shuduo Sang 已提交
1873
            if (cls.fAddLogDone is None):
1874 1875
                logger.info("Recording in a file operations completed...")
                cls.fAddLogDone = open("add_log_done.txt", "w")
1876

1877
    @classmethod
1878 1879
    def getEndState(cls):
        return StateHasData()
1880 1881 1882 1883

    @classmethod
    def canBeginFrom(cls, state: AnyState):
        return state.canAddData()
S
Shuduo Sang 已提交
1884

1885
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
1886
        ds = self._dbManager
1887
        tblSeq = list(range(
S
Shuduo Sang 已提交
1888 1889 1890 1891
                self.LARGE_NUMBER_OF_TABLES if gConfig.larger_data else self.SMALL_NUMBER_OF_TABLES))
        random.shuffle(tblSeq)
        for i in tblSeq:
            if (i in self.activeTable):  # wow already active
1892
                print("x", end="", flush=True) # concurrent insertion
1893
            else:
S
Shuduo Sang 已提交
1894
                self.activeTable.add(i)  # marking it active
1895 1896 1897 1898 1899 1900
            
            sTable = ds.getFixedSuperTable()
            regTableName = self.getRegTableName(i)  # "db.reg_table_{}".format(i)
            sTable.ensureTable(ds.getDbConn(), regTableName)  # Ensure the table exists           
           
            for j in range(self.LARGE_NUMBER_OF_RECORDS if gConfig.larger_data else self.SMALL_NUMBER_OF_RECORDS):  # number of records per table
S
Shuduo Sang 已提交
1901
                nextInt = ds.getNextInt()
1902 1903
                if gConfig.record_ops:
                    self.prepToRecordOps()
1904
                    self.fAddLogReady.write("Ready to write {} to {}\n".format(nextInt, regTableName))
1905 1906
                    self.fAddLogReady.flush()
                    os.fsync(self.fAddLogReady)
1907
                sql = "insert into {} values ('{}', {});".format( # removed: tags ('{}', {})
S
Shuduo Sang 已提交
1908
                    regTableName,
1909 1910
                    # ds.getFixedSuperTableName(),
                    # ds.getNextBinary(), ds.getNextFloat(),
1911
                    ds.getNextTick(), nextInt)
S
Shuduo Sang 已提交
1912 1913 1914
                self.execWtSql(wt, sql)
                # Successfully wrote the data into the DB, let's record it
                # somehow
1915
                te.recordDataMark(nextInt)
1916
                if gConfig.record_ops:
S
Shuduo Sang 已提交
1917 1918 1919
                    self.fAddLogDone.write(
                        "Wrote {} to {}\n".format(
                            nextInt, regTableName))
1920 1921
                    self.fAddLogDone.flush()
                    os.fsync(self.fAddLogDone)
S
Shuduo Sang 已提交
1922
            self.activeTable.discard(i)  # not raising an error, unlike remove
1923 1924


S
Steven Li 已提交
1925 1926
# Deterministic random number generator
class Dice():
S
Shuduo Sang 已提交
1927
    seeded = False  # static, uninitialized
S
Steven Li 已提交
1928 1929

    @classmethod
S
Shuduo Sang 已提交
1930
    def seed(cls, s):  # static
S
Steven Li 已提交
1931
        if (cls.seeded):
S
Shuduo Sang 已提交
1932 1933
            raise RuntimeError(
                "Cannot seed the random generator more than once")
S
Steven Li 已提交
1934 1935 1936 1937 1938
        cls.verifyRNG()
        random.seed(s)
        cls.seeded = True  # TODO: protect against multi-threading

    @classmethod
S
Shuduo Sang 已提交
1939
    def verifyRNG(cls):  # Verify that the RNG is determinstic
S
Steven Li 已提交
1940 1941 1942 1943
        random.seed(0)
        x1 = random.randrange(0, 1000)
        x2 = random.randrange(0, 1000)
        x3 = random.randrange(0, 1000)
S
Shuduo Sang 已提交
1944
        if (x1 != 864 or x2 != 394 or x3 != 776):
S
Steven Li 已提交
1945 1946 1947
            raise RuntimeError("System RNG is not deterministic")

    @classmethod
S
Shuduo Sang 已提交
1948
    def throw(cls, stop):  # get 0 to stop-1
1949
        return cls.throwRange(0, stop)
S
Steven Li 已提交
1950 1951

    @classmethod
S
Shuduo Sang 已提交
1952 1953
    def throwRange(cls, start, stop):  # up to stop-1
        if (not cls.seeded):
S
Steven Li 已提交
1954
            raise RuntimeError("Cannot throw dice before seeding it")
1955
        return random.randrange(start, stop)
S
Steven Li 已提交
1956

1957 1958 1959 1960
    @classmethod
    def choice(cls, cList):
        return random.choice(cList)

S
Steven Li 已提交
1961

S
Steven Li 已提交
1962 1963
class LoggingFilter(logging.Filter):
    def filter(self, record: logging.LogRecord):
S
Shuduo Sang 已提交
1964 1965
        if (record.levelno >= logging.INFO):
            return True  # info or above always log
S
Steven Li 已提交
1966

S
Steven Li 已提交
1967 1968 1969 1970
        # Commenting out below to adjust...

        # if msg.startswith("[TRD]"):
        #     return False
S
Steven Li 已提交
1971 1972
        return True

S
Shuduo Sang 已提交
1973 1974

class MyLoggingAdapter(logging.LoggerAdapter):
1975 1976 1977 1978
    def process(self, msg, kwargs):
        return "[{}]{}".format(threading.get_ident() % 10000, msg), kwargs
        # return '[%s] %s' % (self.extra['connid'], msg), kwargs

S
Shuduo Sang 已提交
1979 1980

class SvcManager:
1981
    def __init__(self):
1982
        print("Starting TDengine Service Manager")
1983 1984
        signal.signal(signal.SIGTERM, self.sigIntHandler)
        signal.signal(signal.SIGINT, self.sigIntHandler)
1985
        signal.signal(signal.SIGUSR1, self.sigUsrHandler)  # different handler!
1986

1987
        self.inSigHandler = False
1988 1989
        # self._status = MainExec.STATUS_RUNNING # set inside
        # _startTaosService()
1990
        self.svcMgrThread = None
1991

1992 1993 1994 1995 1996 1997 1998 1999
    def _doMenu(self):
        choice = ""
        while True:
            print("\nInterrupting Service Program, Choose an Action: ")
            print("1: Resume")
            print("2: Terminate")
            print("3: Restart")
            # Remember to update the if range below
S
Shuduo Sang 已提交
2000
            # print("Enter Choice: ", end="", flush=True)
2001 2002 2003
            while choice == "":
                choice = input("Enter Choice: ")
                if choice != "":
S
Shuduo Sang 已提交
2004 2005 2006
                    break  # done with reading repeated input
            if choice in ["1", "2", "3"]:
                break  # we are done with whole method
2007
            print("Invalid choice, please try again.")
S
Shuduo Sang 已提交
2008
            choice = ""  # reset
2009 2010
        return choice

S
Shuduo Sang 已提交
2011
    def sigUsrHandler(self, signalNumber, frame):
2012
        print("Interrupting main thread execution upon SIGUSR1")
2013
        if self.inSigHandler:  # already
2014
            print("Ignoring repeated SIG...")
S
Shuduo Sang 已提交
2015
            return  # do nothing if it's already not running
2016
        self.inSigHandler = True
2017 2018

        choice = self._doMenu()
S
Shuduo Sang 已提交
2019 2020 2021 2022 2023
        if choice == "1":
            # TODO: can the sub-process be blocked due to us not reading from
            # queue?
            self.sigHandlerResume()
        elif choice == "2":
2024
            self.stopTaosService()
S
Shuduo Sang 已提交
2025
        elif choice == "3":
2026 2027
            self.stopTaosService()
            self.startTaosService()
2028 2029
        else:
            raise RuntimeError("Invalid menu choice: {}".format(choice))
2030

2031 2032
        self.inSigHandler = False

2033 2034
    def sigIntHandler(self, signalNumber, frame):
        print("Sig INT Handler starting...")
2035
        if self.inSigHandler:
2036 2037
            print("Ignoring repeated SIG_INT...")
            return
2038
        self.inSigHandler = True
2039

S
Shuduo Sang 已提交
2040 2041
        self.stopTaosService()
        print("INT signal handler returning...")
2042
        self.inSigHandler = False
2043

S
Shuduo Sang 已提交
2044
    def sigHandlerResume(self):
2045
        print("Resuming TDengine service manager thread (main thread)...\n\n")
2046

2047
    def _checkServiceManagerThread(self):
2048 2049 2050 2051
        if self.svcMgrThread:  # valid svc mgr thread
            if self.svcMgrThread.isStopped():  # done?
                self.svcMgrThread.procIpcBatch()  # one last time. TODO: appropriate?
                self.svcMgrThread = None  # no more
2052 2053

    def _procIpcAll(self):
2054 2055 2056 2057 2058 2059 2060 2061
        while self.svcMgrThread:  # for as long as the svc mgr thread is still here
            self.svcMgrThread.procIpcBatch()  # regular processing,
            time.sleep(0.5)  # pause, before next round
            self._checkServiceManagerThread()
        print(
            "Service Manager Thread (with subprocess) has ended, main thread now exiting...")

    def startTaosService(self):
2062
        if self.svcMgrThread:
2063 2064 2065
            raise RuntimeError(
                "Cannot start TAOS service when one may already be running")
        self.svcMgrThread = ServiceManagerThread()  # create the object
2066 2067
        self.svcMgrThread.start()
        print("TAOS service started, printing out output...")
2068 2069 2070
        self.svcMgrThread.procIpcBatch(
            trimToTarget=10,
            forceOutput=True)  # for printing 10 lines
2071
        print("TAOS service started")
2072 2073

    def stopTaosService(self, outputLines=20):
2074 2075 2076 2077 2078
        print("Terminating Service Manager Thread (SMT) execution...")
        if not self.svcMgrThread:
            raise RuntimeError("Unexpected empty svc mgr thread")
        self.svcMgrThread.stop()
        if self.svcMgrThread.isStopped():
2079 2080
            self.svcMgrThread.procIpcBatch(outputLines)  # one last time
            self.svcMgrThread = None
2081 2082 2083 2084 2085 2086 2087
            print("----- End of TDengine Service Output -----\n")
            print("SMT execution terminated")
        else:
            print("WARNING: SMT did not terminate as expected")

    def run(self):
        self.startTaosService()
2088 2089 2090 2091 2092
        self._procIpcAll()  # pump/process all the messages
        if self.svcMgrThread:  # if sig handler hasn't destroyed it by now
            self.stopTaosService()  # should have started already


2093 2094 2095 2096 2097
class ServiceManagerThread:
    MAX_QUEUE_SIZE = 10000

    def __init__(self):
        self._tdeSubProcess = None
2098
        self._thread = None
2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115
        self._status = None

    def getStatus(self):
        return self._status

    def isRunning(self):
        # return self._thread and self._thread.is_alive()
        return self._status == MainExec.STATUS_RUNNING

    def isStopping(self):
        return self._status == MainExec.STATUS_STOPPING

    def isStopped(self):
        return self._status == MainExec.STATUS_STOPPED

    # Start the thread (with sub process), and wait for the sub service
    # to become fully operational
2116 2117
    def start(self):
        if self._thread:
2118
            raise RuntimeError("Unexpected _thread")
2119
        if self._tdeSubProcess:
2120 2121 2122 2123
            raise RuntimeError("TDengine sub process already created/running")

        self._status = MainExec.STATUS_STARTING

2124
        self._tdeSubProcess = TdeSubProcess()
2125 2126 2127 2128
        self._tdeSubProcess.start()

        self._ipcQueue = Queue()
        self._thread = threading.Thread(
2129
            target=self.svcOutputReader,
2130
            args=(self._tdeSubProcess.getStdOut(), self._ipcQueue))
2131
        self._thread.daemon = True  # thread dies with the program
2132 2133 2134
        self._thread.start()

        # wait for service to start
2135
        for i in range(0, 10):
2136 2137 2138
            time.sleep(1.0)
            # self.procIpcBatch() # don't pump message during start up
            print("_zz_", end="", flush=True)
2139
            if self._status == MainExec.STATUS_RUNNING:
2140
                logger.info("[] TDengine service READY to process requests")
2141 2142 2143
                return  # now we've started
        # TODO: handle this better?
        raise RuntimeError("TDengine service did not start successfully")
2144 2145 2146 2147 2148 2149

    def stop(self):
        # can be called from both main thread or signal handler
        print("Terminating TDengine service running as the sub process...")
        if self.isStopped():
            print("Service already stopped")
2150
            return
2151 2152 2153
        if self.isStopping():
            print("Service is already being stopped")
            return
2154 2155 2156 2157
        # Linux will send Control-C generated SIGINT to the TDengine process
        # already, ref:
        # https://unix.stackexchange.com/questions/176235/fork-and-how-signals-are-delivered-to-processes
        if not self._tdeSubProcess:
2158
            raise RuntimeError("sub process object missing")
2159

2160 2161 2162
        self._status = MainExec.STATUS_STOPPING
        self._tdeSubProcess.stop()

2163 2164 2165 2166
        if self._tdeSubProcess.isRunning():  # still running
            print(
                "FAILED to stop sub process, it is still running... pid = {}".format(
                    self.subProcess.pid))
2167
        else:
2168 2169 2170
            self._tdeSubProcess = None  # not running any more
            self.join()  # stop the thread, change the status, etc.

2171 2172 2173
    def join(self):
        # TODO: sanity check
        if not self.isStopping():
2174 2175 2176
            raise RuntimeError(
                "Unexpected status when ending svc mgr thread: {}".format(
                    self._status))
2177

2178
        if self._thread:
2179
            self._thread.join()
2180
            self._thread = None
2181
            self._status = MainExec.STATUS_STOPPED
S
Shuduo Sang 已提交
2182
        else:
2183
            print("Joining empty thread, doing nothing")
2184 2185 2186

    def _trimQueue(self, targetSize):
        if targetSize <= 0:
2187
            return  # do nothing
2188
        q = self._ipcQueue
2189
        if (q.qsize() <= targetSize):  # no need to trim
2190 2191 2192 2193
            return

        logger.debug("Triming IPC queue to target size: {}".format(targetSize))
        itemsToTrim = q.qsize() - targetSize
2194
        for i in range(0, itemsToTrim):
2195 2196 2197
            try:
                q.get_nowait()
            except Empty:
2198 2199
                break  # break out of for loop, no more trimming

2200
    TD_READY_MSG = "TDengine is initialized successfully"
S
Shuduo Sang 已提交
2201

2202 2203
    def procIpcBatch(self, trimToTarget=0, forceOutput=False):
        self._trimQueue(trimToTarget)  # trim if necessary
S
Shuduo Sang 已提交
2204 2205
        # Process all the output generated by the underlying sub process,
        # managed by IO thread
2206
        print("<", end="", flush=True)
S
Shuduo Sang 已提交
2207 2208
        while True:
            try:
2209
                line = self._ipcQueue.get_nowait()  # getting output at fast speed
2210
                self._printProgress("_o")
2211 2212 2213
            except Empty:
                # time.sleep(2.3) # wait only if there's no output
                # no more output
2214
                print(".>", end="", flush=True)
S
Shuduo Sang 已提交
2215
                return  # we are done with THIS BATCH
2216
            else:  # got line, printing out
2217 2218 2219 2220 2221 2222 2223
                if forceOutput:
                    logger.info(line)
                else:
                    logger.debug(line)
        print(">", end="", flush=True)

    _ProgressBars = ["--", "//", "||", "\\\\"]
2224

2225 2226
    def _printProgress(self, msg):  # TODO: assuming 2 chars
        print(msg, end="", flush=True)
2227 2228 2229
        pBar = self._ProgressBars[Dice.throw(4)]
        print(pBar, end="", flush=True)
        print('\b\b\b\b', end="", flush=True)
2230

2231 2232 2233
    def svcOutputReader(self, out: IO, queue):
        # Important Reference: https://stackoverflow.com/questions/375427/non-blocking-read-on-a-subprocess-pipe-in-python
        # print("This is the svcOutput Reader...")
2234
        # for line in out :
2235 2236 2237 2238
        for line in iter(out.readline, b''):
            # print("Finished reading a line: {}".format(line))
            # print("Adding item to queue...")
            line = line.decode("utf-8").rstrip()
2239 2240
            # This might block, and then causing "out" buffer to block
            queue.put(line)
2241 2242
            self._printProgress("_i")

2243 2244 2245
            if self._status == MainExec.STATUS_STARTING:  # we are starting, let's see if we have started
                if line.find(self.TD_READY_MSG) != -1:  # found
                    self._status = MainExec.STATUS_RUNNING
2246 2247

            # Trim the queue if necessary: TODO: try this 1 out of 10 times
2248
            self._trimQueue(self.MAX_QUEUE_SIZE * 9 // 10)  # trim to 90% size
2249

2250 2251 2252
            if self.isStopping():  # TODO: use thread status instead
                # WAITING for stopping sub process to finish its outptu
                print("_w", end="", flush=True)
2253 2254

            # queue.put(line)
2255 2256
        # meaning sub process must have died
        print("\nNo more output from IO thread managing TDengine service")
2257 2258
        out.close()

2259 2260

class TdeSubProcess:
2261 2262 2263 2264 2265
    def __init__(self):
        self.subProcess = None

    def getStdOut(self):
        return self.subProcess.stdout
2266

2267
    def isRunning(self):
2268
        return self.subProcess is not None
2269

S
Shuduo Sang 已提交
2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283
    def getBuildPath(self):
        selfPath = os.path.dirname(os.path.realpath(__file__))
        if ("community" in selfPath):
            projPath = selfPath[:selfPath.find("communit")]
        else:
            projPath = selfPath[:selfPath.find("tests")]

        for root, dirs, files in os.walk(projPath):
            if ("taosd" in files):
                rootRealPath = os.path.dirname(os.path.realpath(root))
                if ("packaging" not in rootRealPath):
                    buildPath = root[:len(root) - len("/build/bin")]
                    break
        return buildPath
2284

2285
    def start(self):
2286
        ON_POSIX = 'posix' in sys.builtin_module_names
S
Shuduo Sang 已提交
2287

2288 2289 2290
        taosdPath = self.getBuildPath() + "/build/bin/taosd"
        cfgPath = self.getBuildPath() + "/test/cfg"

S
Shuduo Sang 已提交
2291
        svcCmd = [taosdPath, '-c', cfgPath]
2292
        # svcCmd = ['vmstat', '1']
S
Shuduo Sang 已提交
2293
        if self.subProcess:  # already there
2294 2295 2296
            raise RuntimeError("Corrupt process state")

        self.subProcess = subprocess.Popen(
S
Shuduo Sang 已提交
2297 2298
            svcCmd,
            stdout=subprocess.PIPE,
2299
            # bufsize=1, # not supported in binary mode
S
Shuduo Sang 已提交
2300
            close_fds=ON_POSIX)  # had text=True, which interferred with reading EOF
2301

2302 2303 2304
    def stop(self):
        if not self.subProcess:
            print("Sub process already stopped")
2305 2306 2307
            return

        retCode = self.subProcess.poll()
S
Shuduo Sang 已提交
2308
        if retCode:  # valid return code, process ended
2309
            self.subProcess = None
S
Shuduo Sang 已提交
2310 2311
        else:  # process still alive, let's interrupt it
            print(
2312
                "Sub process is running, sending SIG_INT and waiting for it to terminate...")
S
Shuduo Sang 已提交
2313 2314 2315 2316
            # sub process should end, then IPC queue should end, causing IO
            # thread to end
            self.subProcess.send_signal(signal.SIGINT)
            try:
2317 2318 2319 2320
                self.subProcess.wait(10)
            except subprocess.TimeoutExpired as err:
                print("Time out waiting for TDengine service process to exit")
            else:
2321
                print("TDengine service process terminated successfully from SIG_INT")
2322 2323
                self.subProcess = None

S
Shuduo Sang 已提交
2324

2325 2326 2327 2328 2329 2330
class ClientManager:
    def __init__(self):
        print("Starting service manager")
        signal.signal(signal.SIGTERM, self.sigIntHandler)
        signal.signal(signal.SIGINT, self.sigIntHandler)

2331
        self._status = MainExec.STATUS_RUNNING
2332 2333 2334
        self.tc = None

    def sigIntHandler(self, signalNumber, frame):
2335
        if self._status != MainExec.STATUS_RUNNING:
2336 2337 2338
            print("Repeated SIGINT received, forced exit...")
            # return  # do nothing if it's already not running
            sys.exit(-1)
2339
        self._status = MainExec.STATUS_STOPPING  # immediately set our status
2340 2341 2342 2343

        print("Terminating program...")
        self.tc.requestToStop()

S
Shuduo Sang 已提交
2344
    def _printLastNumbers(self):  # to verify data durability
2345 2346
        dbManager = DbManager(resetDb=False)
        dbc = dbManager.getDbConn()
S
Shuduo Sang 已提交
2347
        if dbc.query("show databases") == 0:  # no databae
2348
            return
S
Shuduo Sang 已提交
2349
        if dbc.query("show tables") == 0:  # no tables
S
Steven Li 已提交
2350
            return
2351 2352

        dbc.execute("use db")
S
Shuduo Sang 已提交
2353
        sTbName = dbManager.getFixedSuperTableName()
2354 2355

        # get all regular tables
S
Shuduo Sang 已提交
2356 2357
        # TODO: analyze result set later
        dbc.query("select TBNAME from db.{}".format(sTbName))
2358 2359 2360
        rTables = dbc.getQueryResult()

        bList = TaskExecutor.BoundedList()
S
Shuduo Sang 已提交
2361
        for rTbName in rTables:  # regular tables
2362 2363
            dbc.query("select speed from db.{}".format(rTbName[0]))
            numbers = dbc.getQueryResult()
S
Shuduo Sang 已提交
2364
            for row in numbers:
2365 2366 2367 2368 2369 2370
                # print("<{}>".format(n), end="", flush=True)
                bList.add(row[0])

        print("Top numbers in DB right now: {}".format(bList))
        print("TDengine client execution is about to start in 2 seconds...")
        time.sleep(2.0)
S
Shuduo Sang 已提交
2371
        dbManager = None  # release?
2372 2373 2374 2375 2376

    def prepare(self):
        self._printLastNumbers()

    def run(self):
S
Shuduo Sang 已提交
2377
        if gConfig.auto_start_service:
2378 2379 2380
            svcMgr = SvcManager()
            svcMgr.startTaosService()

2381 2382
        self._printLastNumbers()

S
Shuduo Sang 已提交
2383
        dbManager = DbManager()  # Regular function
2384
        thPool = ThreadPool(gConfig.num_threads, gConfig.max_steps)
2385
        self.tc = ThreadCoordinator(thPool, dbManager)
S
Shuduo Sang 已提交
2386

2387
        self.tc.run()
S
Steven Li 已提交
2388 2389
        # print("exec stats: {}".format(self.tc.getExecStats()))
        # print("TC failed = {}".format(self.tc.isFailed()))
S
Shuduo Sang 已提交
2390
        if gConfig.auto_start_service:
2391
            svcMgr.stopTaosService()
2392 2393
        # Print exec status, etc., AFTER showing messages from the server
        self.conclude()
S
Steven Li 已提交
2394
        # print("TC failed (2) = {}".format(self.tc.isFailed()))
S
Shuduo Sang 已提交
2395 2396
        # Linux return code: ref https://shapeshed.com/unix-exit-codes/
        return 1 if self.tc.isFailed() else 0
2397 2398 2399

    def conclude(self):
        self.tc.printStats()
S
Shuduo Sang 已提交
2400
        self.tc.getDbManager().cleanUp()
2401 2402 2403


class MainExec:
2404 2405 2406
    STATUS_STARTING = 1
    STATUS_RUNNING = 2
    STATUS_STOPPING = 3
2407
    STATUS_STOPPED = 4
2408 2409 2410 2411

    @classmethod
    def runClient(cls):
        clientManager = ClientManager()
S
Steven Li 已提交
2412
        return clientManager.run()
2413 2414 2415

    @classmethod
    def runService(cls):
2416 2417
        svcManager = SvcManager()
        svcManager.run()
2418 2419

    @classmethod
S
Shuduo Sang 已提交
2420
    def runTemp(cls):  # for debugging purposes
2421 2422
        # # Hack to exercise reading from disk, imcreasing coverage. TODO: fix
        # dbc = dbState.getDbConn()
S
Shuduo Sang 已提交
2423
        # sTbName = dbState.getFixedSuperTableName()
2424 2425
        # dbc.execute("create database if not exists db")
        # if not dbState.getState().equals(StateEmpty()):
S
Shuduo Sang 已提交
2426
        #     dbc.execute("use db")
2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437

        # rTables = None
        # try: # the super table may not exist
        #     sql = "select TBNAME from db.{}".format(sTbName)
        #     logger.info("Finding out tables in super table: {}".format(sql))
        #     dbc.query(sql) # TODO: analyze result set later
        #     logger.info("Fetching result")
        #     rTables = dbc.getQueryResult()
        #     logger.info("Result: {}".format(rTables))
        # except taos.error.ProgrammingError as err:
        #     logger.info("Initial Super table OPS error: {}".format(err))
S
Shuduo Sang 已提交
2438

2439 2440 2441 2442 2443 2444 2445 2446
        # # sys.exit()
        # if ( not rTables == None):
        #     # print("rTables[0] = {}, type = {}".format(rTables[0], type(rTables[0])))
        #     try:
        #         for rTbName in rTables : # regular tables
        #             ds = dbState
        #             logger.info("Inserting into table: {}".format(rTbName[0]))
        #             sql = "insert into db.{} values ('{}', {});".format(
S
Shuduo Sang 已提交
2447
        #                 rTbName[0],
2448 2449
        #                 ds.getNextTick(), ds.getNextInt())
        #             dbc.execute(sql)
S
Shuduo Sang 已提交
2450
        #         for rTbName in rTables : # regular tables
2451
        #             dbc.query("select * from db.{}".format(rTbName[0])) # TODO: check success failure
S
Shuduo Sang 已提交
2452
        #         logger.info("Initial READING operation is successful")
2453
        #     except taos.error.ProgrammingError as err:
S
Shuduo Sang 已提交
2454 2455
        #         logger.info("Initial WRITE/READ error: {}".format(err))

2456 2457 2458
        # Sandbox testing code
        # dbc = dbState.getDbConn()
        # while True:
S
Shuduo Sang 已提交
2459
        #     rows = dbc.query("show databases")
2460
        #     print("Rows: {}, time={}".format(rows, time.time()))
S
Shuduo Sang 已提交
2461 2462
        return

S
Steven Li 已提交
2463

2464
def main():
S
Shuduo Sang 已提交
2465 2466
    # Super cool Python argument library:
    # https://docs.python.org/3/library/argparse.html
2467 2468 2469 2470 2471 2472 2473 2474 2475
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description=textwrap.dedent('''\
            TDengine Auto Crash Generator (PLEASE NOTICE the Prerequisites Below)
            ---------------------------------------------------------------------
            1. You build TDengine in the top level ./build directory, as described in offical docs
            2. You run the server there before this script: ./build/bin/taosd -c test/cfg

            '''))
2476

2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497
    # parser.add_argument('-a', '--auto-start-service', action='store_true',                        
    #                     help='Automatically start/stop the TDengine service (default: false)')
    # parser.add_argument('-c', '--connector-type', action='store', default='native', type=str,
    #                     help='Connector type to use: native, rest, or mixed (default: 10)')
    # parser.add_argument('-d', '--debug', action='store_true',                        
    #                     help='Turn on DEBUG mode for more logging (default: false)')
    # parser.add_argument('-e', '--run-tdengine', action='store_true',                        
    #                     help='Run TDengine service in foreground (default: false)')
    # parser.add_argument('-l', '--larger-data', action='store_true',                        
    #                     help='Write larger amount of data during write operations (default: false)')
    # parser.add_argument('-p', '--per-thread-db-connection', action='store_true',                        
    #                     help='Use a single shared db connection (default: false)')
    # parser.add_argument('-r', '--record-ops', action='store_true',                        
    #                     help='Use a pair of always-fsynced fils to record operations performing + performed, for power-off tests (default: false)')                    
    # parser.add_argument('-s', '--max-steps', action='store', default=1000, type=int,
    #                     help='Maximum number of steps to run (default: 100)')
    # parser.add_argument('-t', '--num-threads', action='store', default=5, type=int,
    #                     help='Number of threads to run (default: 10)')
    # parser.add_argument('-x', '--continue-on-exception', action='store_true',                        
    #                     help='Continue execution after encountering unexpected/disallowed errors/exceptions (default: false)')                        

S
Shuduo Sang 已提交
2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548
    parser.add_argument(
        '-a',
        '--auto-start-service',
        action='store_true',
        help='Automatically start/stop the TDengine service (default: false)')
    parser.add_argument(
        '-c',
        '--connector-type',
        action='store',
        default='native',
        type=str,
        help='Connector type to use: native, rest, or mixed (default: 10)')
    parser.add_argument(
        '-d',
        '--debug',
        action='store_true',
        help='Turn on DEBUG mode for more logging (default: false)')
    parser.add_argument(
        '-e',
        '--run-tdengine',
        action='store_true',
        help='Run TDengine service in foreground (default: false)')
    parser.add_argument(
        '-l',
        '--larger-data',
        action='store_true',
        help='Write larger amount of data during write operations (default: false)')
    parser.add_argument(
        '-p',
        '--per-thread-db-connection',
        action='store_true',
        help='Use a single shared db connection (default: false)')
    parser.add_argument(
        '-r',
        '--record-ops',
        action='store_true',
        help='Use a pair of always-fsynced fils to record operations performing + performed, for power-off tests (default: false)')
    parser.add_argument(
        '-s',
        '--max-steps',
        action='store',
        default=1000,
        type=int,
        help='Maximum number of steps to run (default: 100)')
    parser.add_argument(
        '-t',
        '--num-threads',
        action='store',
        default=5,
        type=int,
        help='Number of threads to run (default: 10)')
2549 2550 2551 2552 2553
    parser.add_argument(
        '-x',
        '--continue-on-exception',
        action='store_true',
        help='Continue execution after encountering unexpected/disallowed errors/exceptions (default: false)')
2554

2555
    global gConfig
2556
    gConfig = parser.parse_args()
2557

2558
    # Logging Stuff
2559
    global logger
S
Shuduo Sang 已提交
2560 2561
    _logger = logging.getLogger('CrashGen')  # real logger
    _logger.addFilter(LoggingFilter())
2562 2563 2564
    ch = logging.StreamHandler()
    _logger.addHandler(ch)

S
Shuduo Sang 已提交
2565 2566
    # Logging adapter, to be used as a logger
    logger = MyLoggingAdapter(_logger, [])
2567

S
Shuduo Sang 已提交
2568 2569
    if (gConfig.debug):
        logger.setLevel(logging.DEBUG)  # default seems to be INFO
S
Steven Li 已提交
2570 2571
    else:
        logger.setLevel(logging.INFO)
S
Shuduo Sang 已提交
2572

2573 2574
    Dice.seed(0)  # initial seeding of dice

2575
    # Run server or client
S
Shuduo Sang 已提交
2576
    if gConfig.run_tdengine:  # run server
2577
        MainExec.runService()
S
Shuduo Sang 已提交
2578
    else:
S
Steven Li 已提交
2579
        return MainExec.runClient()
2580

S
Shuduo Sang 已提交
2581

2582
if __name__ == "__main__":
S
Steven Li 已提交
2583 2584 2585
    exitCode = main()
    # print("Exiting with code: {}".format(exitCode))
    sys.exit(exitCode)