crash_gen.py 92.1 KB
Newer Older
S
Shuduo Sang 已提交
1
# -----!/usr/bin/python3.7
S
Steven Li 已提交
2 3 4 5 6 7 8 9 10 11 12 13
###################################################################
#           Copyright (c) 2016 by TAOS Technologies, Inc.
#                     All rights reserved.
#
#  This file is proprietary and confidential to TAOS Technologies.
#  No part of this file may be reproduced, stored, transmitted,
#  disclosed or used in any form or by any means other than as
#  expressly provided by the written permission from Jianhui Tao
#
###################################################################

# -*- coding: utf-8 -*-
S
Shuduo Sang 已提交
14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38
# For type hinting before definition, ref:
# https://stackoverflow.com/questions/33533148/how-do-i-specify-that-the-return-type-of-a-method-is-the-same-as-the-class-itsel
from __future__ import annotations
import taos
import crash_gen
from util.sql import *
from util.cases import *
from util.dnodes import *
from util.log import *
from queue import Queue, Empty
from typing import IO
from typing import Set
from typing import Dict
from typing import List
from requests.auth import HTTPBasicAuth
import textwrap
import datetime
import logging
import time
import random
import threading
import requests
import copy
import argparse
import getopt
39

S
Steven Li 已提交
40
import sys
41
import os
42 43
import io
import signal
44
import traceback
45 46 47 48
# Require Python 3
if sys.version_info[0] < 3:
    raise Exception("Must be using Python 3")

S
Steven Li 已提交
49

S
Shuduo Sang 已提交
50
# Global variables, tried to keep a small number.
51 52 53

# Command-line/Environment Configurations, will set a bit later
# ConfigNameSpace = argparse.Namespace
S
Shuduo Sang 已提交
54
gConfig = argparse.Namespace()  # Dummy value, will be replaced later
55
logger = None
S
Steven Li 已提交
56

S
Shuduo Sang 已提交
57 58

def runThread(wt: WorkerThread):
59
    wt.run()
60

S
Shuduo Sang 已提交
61

62 63
class CrashGenError(Exception):
    def __init__(self, msg=None, errno=None):
S
Shuduo Sang 已提交
64
        self.msg = msg
65
        self.errno = errno
S
Shuduo Sang 已提交
66

67 68 69
    def __str__(self):
        return self.msg

S
Shuduo Sang 已提交
70

S
Steven Li 已提交
71
class WorkerThread:
S
Shuduo Sang 已提交
72 73 74 75 76
    def __init__(self, pool: ThreadPool, tid,
                 tc: ThreadCoordinator,
                 # te: TaskExecutor,
                 ):  # note: main thread context!
        # self._curStep = -1
77
        self._pool = pool
S
Shuduo Sang 已提交
78 79
        self._tid = tid
        self._tc = tc  # type: ThreadCoordinator
S
Steven Li 已提交
80
        # self.threadIdent = threading.get_ident()
81 82
        self._thread = threading.Thread(target=runThread, args=(self,))
        self._stepGate = threading.Event()
S
Steven Li 已提交
83

84
        # Let us have a DB connection of our own
S
Shuduo Sang 已提交
85
        if (gConfig.per_thread_db_connection):  # type: ignore
86
            # print("connector_type = {}".format(gConfig.connector_type))
S
Shuduo Sang 已提交
87 88
            self._dbConn = DbConn.createNative() if (
                gConfig.connector_type == 'native') else DbConn.createRest()
89

S
Shuduo Sang 已提交
90
        self._dbInUse = False  # if "use db" was executed already
91

92
    def logDebug(self, msg):
S
Steven Li 已提交
93
        logger.debug("    TRD[{}] {}".format(self._tid, msg))
94 95

    def logInfo(self, msg):
S
Steven Li 已提交
96
        logger.info("    TRD[{}] {}".format(self._tid, msg))
97

98 99 100 101
    def dbInUse(self):
        return self._dbInUse

    def useDb(self):
S
Shuduo Sang 已提交
102
        if (not self._dbInUse):
103 104 105
            self.execSql("use db")
        self._dbInUse = True

106
    def getTaskExecutor(self):
S
Shuduo Sang 已提交
107
        return self._tc.getTaskExecutor()
108

S
Steven Li 已提交
109
    def start(self):
110
        self._thread.start()  # AFTER the thread is recorded
S
Steven Li 已提交
111

S
Shuduo Sang 已提交
112
    def run(self):
S
Steven Li 已提交
113
        # initialization after thread starts, in the thread context
114
        # self.isSleeping = False
115 116
        logger.info("Starting to run thread: {}".format(self._tid))

S
Shuduo Sang 已提交
117
        if (gConfig.per_thread_db_connection):  # type: ignore
118
            logger.debug("Worker thread openning database connection")
119
            self._dbConn.open()
S
Steven Li 已提交
120

S
Shuduo Sang 已提交
121 122
        self._doTaskLoop()

123
        # clean up
S
Shuduo Sang 已提交
124
        if (gConfig.per_thread_db_connection):  # type: ignore
125
            self._dbConn.close()
126

S
Shuduo Sang 已提交
127
    def _doTaskLoop(self):
128 129
        # while self._curStep < self._pool.maxSteps:
        # tc = ThreadCoordinator(None)
S
Shuduo Sang 已提交
130 131
        while True:
            tc = self._tc  # Thread Coordinator, the overall master
132
            tc.crossStepBarrier()  # shared barrier first, INCLUDING the last one
S
Shuduo Sang 已提交
133 134 135
            logger.debug(
                "[TRD] Worker thread [{}] exited barrier...".format(
                    self._tid))
136
            self.crossStepGate()   # then per-thread gate, after being tapped
S
Shuduo Sang 已提交
137 138 139
            logger.debug(
                "[TRD] Worker thread [{}] exited step gate...".format(
                    self._tid))
140
            if not self._tc.isRunning():
S
Shuduo Sang 已提交
141 142
                logger.debug(
                    "[TRD] Thread Coordinator not running any more, worker thread now stopping...")
143 144
                break

145
            # Fetch a task from the Thread Coordinator
S
Shuduo Sang 已提交
146 147 148
            logger.debug(
                "[TRD] Worker thread [{}] about to fetch task".format(
                    self._tid))
149
            task = tc.fetchTask()
150 151

            # Execute such a task
S
Shuduo Sang 已提交
152 153 154
            logger.debug(
                "[TRD] Worker thread [{}] about to execute task: {}".format(
                    self._tid, task.__class__.__name__))
155
            task.execute(self)
156
            tc.saveExecutedTask(task)
S
Shuduo Sang 已提交
157 158 159 160 161
            logger.debug(
                "[TRD] Worker thread [{}] finished executing task".format(
                    self._tid))

            self._dbInUse = False  # there may be changes between steps
162

S
Shuduo Sang 已提交
163 164
    def verifyThreadSelf(self):  # ensure we are called by this own thread
        if (threading.get_ident() != self._thread.ident):
S
Steven Li 已提交
165 166
            raise RuntimeError("Unexpectly called from other threads")

S
Shuduo Sang 已提交
167 168
    def verifyThreadMain(self):  # ensure we are called by the main thread
        if (threading.get_ident() != threading.main_thread().ident):
S
Steven Li 已提交
169 170 171
            raise RuntimeError("Unexpectly called from other threads")

    def verifyThreadAlive(self):
S
Shuduo Sang 已提交
172
        if (not self._thread.is_alive()):
S
Steven Li 已提交
173 174
            raise RuntimeError("Unexpected dead thread")

175
    # A gate is different from a barrier in that a thread needs to be "tapped"
S
Steven Li 已提交
176 177
    def crossStepGate(self):
        self.verifyThreadAlive()
S
Shuduo Sang 已提交
178 179
        self.verifyThreadSelf()  # only allowed by ourselves

180
        # Wait again at the "gate", waiting to be "tapped"
S
Shuduo Sang 已提交
181 182 183 184
        logger.debug(
            "[TRD] Worker thread {} about to cross the step gate".format(
                self._tid))
        self._stepGate.wait()
185
        self._stepGate.clear()
S
Shuduo Sang 已提交
186

187
        # self._curStep += 1  # off to a new step...
S
Steven Li 已提交
188

S
Shuduo Sang 已提交
189
    def tapStepGate(self):  # give it a tap, release the thread waiting there
S
Steven Li 已提交
190
        self.verifyThreadAlive()
S
Shuduo Sang 已提交
191 192
        self.verifyThreadMain()  # only allowed for main thread

S
Steven Li 已提交
193
        logger.debug("[TRD] Tapping worker thread {}".format(self._tid))
S
Shuduo Sang 已提交
194 195
        self._stepGate.set()  # wake up!
        time.sleep(0)  # let the released thread run a bit
196

S
Shuduo Sang 已提交
197 198 199
    def execSql(self, sql):  # TODO: expose DbConn directly
        if (gConfig.per_thread_db_connection):
            return self._dbConn.execute(sql)
200
        else:
201
            return self._tc.getDbManager().getDbConn().execute(sql)
202

S
Shuduo Sang 已提交
203 204 205
    def querySql(self, sql):  # TODO: expose DbConn directly
        if (gConfig.per_thread_db_connection):
            return self._dbConn.query(sql)
206 207 208 209
        else:
            return self._tc.getDbManager().getDbConn().query(sql)

    def getQueryResult(self):
S
Shuduo Sang 已提交
210 211
        if (gConfig.per_thread_db_connection):
            return self._dbConn.getQueryResult()
212 213 214
        else:
            return self._tc.getDbManager().getDbConn().getQueryResult()

215
    def getDbConn(self):
S
Shuduo Sang 已提交
216 217
        if (gConfig.per_thread_db_connection):
            return self._dbConn
218
        else:
219
            return self._tc.getDbManager().getDbConn()
220

221 222
    # def querySql(self, sql): # not "execute", since we are out side the DB context
    #     if ( gConfig.per_thread_db_connection ):
S
Shuduo Sang 已提交
223
    #         return self._dbConn.query(sql)
224 225
    #     else:
    #         return self._tc.getDbState().getDbConn().query(sql)
226

227
# The coordinator of all worker threads, mostly running in main thread
S
Shuduo Sang 已提交
228 229


230
class ThreadCoordinator:
231
    def __init__(self, pool: ThreadPool, dbManager):
S
Shuduo Sang 已提交
232
        self._curStep = -1  # first step is 0
233
        self._pool = pool
234
        # self._wd = wd
S
Shuduo Sang 已提交
235
        self._te = None  # prepare for every new step
236
        self._dbManager = dbManager
S
Shuduo Sang 已提交
237 238
        self._executedTasks: List[Task] = []  # in a given step
        self._lock = threading.RLock()  # sync access for a few things
S
Steven Li 已提交
239

S
Shuduo Sang 已提交
240 241
        self._stepBarrier = threading.Barrier(
            self._pool.numThreads + 1)  # one barrier for all threads
242
        self._execStats = ExecutionStats()
243
        self._runStatus = MainExec.STATUS_RUNNING
S
Steven Li 已提交
244

245 246 247
    def getTaskExecutor(self):
        return self._te

S
Shuduo Sang 已提交
248
    def getDbManager(self) -> DbManager:
249
        return self._dbManager
250

251 252 253
    def crossStepBarrier(self):
        self._stepBarrier.wait()

254 255 256 257
    def requestToStop(self):
        self._runStatus = MainExec.STATUS_STOPPING
        self._execStats.registerFailure("User Interruption")

S
Shuduo Sang 已提交
258
    def run(self):
259
        self._pool.createAndStartThreads(self)
S
Steven Li 已提交
260 261

        # Coordinate all threads step by step
S
Shuduo Sang 已提交
262 263 264
        self._curStep = -1  # not started yet
        maxSteps = gConfig.max_steps  # type: ignore
        self._execStats.startExec()  # start the stop watch
265 266
        transitionFailed = False
        hasAbortedTask = False
S
Shuduo Sang 已提交
267 268 269 270 271 272 273 274
        while(self._curStep < maxSteps - 1 and
              (not transitionFailed) and
              (self._runStatus == MainExec.STATUS_RUNNING) and
                (not hasAbortedTask)):  # maxStep==10, last curStep should be 9

            if not gConfig.debug:
                # print this only if we are not in debug mode
                print(".", end="", flush=True)
S
Steven Li 已提交
275
            logger.debug("[TRD] Main thread going to sleep")
276

277
            # Now main thread (that's us) is ready to enter a step
S
Shuduo Sang 已提交
278 279 280 281
            # let other threads go past the pool barrier, but wait at the
            # thread gate
            self.crossStepBarrier()
            self._stepBarrier.reset()  # Other worker threads should now be at the "gate"
282 283

            # At this point, all threads should be pass the overall "barrier" and before the per-thread "gate"
S
Shuduo Sang 已提交
284 285
            # We use this period to do house keeping work, when all worker
            # threads are QUIET.
286
            hasAbortedTask = False
S
Shuduo Sang 已提交
287 288
            for task in self._executedTasks:
                if task.isAborted():
289 290 291 292
                    print("Task aborted: {}".format(task))
                    hasAbortedTask = True
                    break

S
Shuduo Sang 已提交
293
            if hasAbortedTask:  # do transition only if tasks are error free
294
                self._execStats.registerFailure("Aborted Task Encountered")
S
Shuduo Sang 已提交
295
            else:
296 297 298
                try:
                    sm = self._dbManager.getStateMachine()
                    logger.debug("[STT] starting transitions")
S
Shuduo Sang 已提交
299 300
                    # at end of step, transiton the DB state
                    sm.transition(self._executedTasks)
301
                    logger.debug("[STT] transition ended")
S
Shuduo Sang 已提交
302 303 304
                    # Due to limitation (or maybe not) of the Python library,
                    # we cannot share connections across threads
                    if sm.hasDatabase():
305 306 307
                        for t in self._pool.threadList:
                            logger.debug("[DB] use db for all worker threads")
                            t.useDb()
S
Shuduo Sang 已提交
308 309
                            # t.execSql("use db") # main thread executing "use
                            # db" on behalf of every worker thread
310
                except taos.error.ProgrammingError as err:
S
Shuduo Sang 已提交
311
                    if (err.msg == 'network unavailable'):  # broken DB connection
312 313 314
                        logger.info("DB connection broken, execution failed")
                        traceback.print_stack()
                        transitionFailed = True
S
Shuduo Sang 已提交
315
                        self._te = None  # Not running any more
316
                        self._execStats.registerFailure("Broken DB Connection")
S
Shuduo Sang 已提交
317 318
                        # continue # don't do that, need to tap all threads at
                        # end, and maybe signal them to stop
319
                    else:
S
Shuduo Sang 已提交
320
                        raise
321 322
            # finally:
            #     pass
S
Shuduo Sang 已提交
323 324

            self.resetExecutedTasks()  # clear the tasks after we are done
325 326

            # Get ready for next step
S
Steven Li 已提交
327
            logger.debug("<-- Step {} finished".format(self._curStep))
S
Shuduo Sang 已提交
328 329 330 331
            self._curStep += 1  # we are about to get into next step. TODO: race condition here!
            # Now not all threads had time to go to sleep
            logger.debug(
                "\r\n\n--> Step {} starts with main thread waking up".format(self._curStep))
332

333
            # A new TE for the new step
S
Shuduo Sang 已提交
334
            if not transitionFailed:  # only if not failed
335
                self._te = TaskExecutor(self._curStep)
336

S
Shuduo Sang 已提交
337 338 339 340 341 342
            logger.debug(
                "[TRD] Main thread waking up at step {}, tapping worker threads".format(
                    self._curStep))  # Now not all threads had time to go to sleep
            # Worker threads will wake up at this point, and each execute it's
            # own task
            self.tapAllThreads()
S
Steven Li 已提交
343

344
        logger.debug("Main thread ready to finish up...")
S
Shuduo Sang 已提交
345 346
        if not transitionFailed:  # only in regular situations
            self.crossStepBarrier()  # Cross it one last time, after all threads finish
347 348
            self._stepBarrier.reset()
            logger.debug("Main thread in exclusive zone...")
S
Shuduo Sang 已提交
349
            self._te = None  # No more executor, time to end
350
            logger.debug("Main thread tapping all threads one last time...")
S
Shuduo Sang 已提交
351
            self.tapAllThreads()  # Let the threads run one last time
352

353
        logger.debug("Main thread joining all threads")
S
Shuduo Sang 已提交
354
        self._pool.joinAll()  # Get all threads to finish
355
        logger.info("\nAll worker threads finished")
356 357
        self._execStats.endExec()

358 359
    def printStats(self):
        self._execStats.printStats()
S
Steven Li 已提交
360

S
Steven Li 已提交
361 362 363 364 365 366
    def isFailed(self):
        return self._execStats.isFailed()

    def getExecStats(self):
        return self._execStats

S
Shuduo Sang 已提交
367
    def tapAllThreads(self):  # in a deterministic manner
S
Steven Li 已提交
368
        wakeSeq = []
S
Shuduo Sang 已提交
369 370
        for i in range(self._pool.numThreads):  # generate a random sequence
            if Dice.throw(2) == 1:
S
Steven Li 已提交
371 372 373
                wakeSeq.append(i)
            else:
                wakeSeq.insert(0, i)
S
Shuduo Sang 已提交
374 375 376
        logger.debug(
            "[TRD] Main thread waking up worker threads: {}".format(
                str(wakeSeq)))
377
        # TODO: set dice seed to a deterministic value
S
Steven Li 已提交
378
        for i in wakeSeq:
S
Shuduo Sang 已提交
379 380 381
            # TODO: maybe a bit too deep?!
            self._pool.threadList[i].tapStepGate()
            time.sleep(0)  # yield
S
Steven Li 已提交
382

383
    def isRunning(self):
S
Shuduo Sang 已提交
384
        return self._te is not None
385

S
Shuduo Sang 已提交
386 387
    def fetchTask(self) -> Task:
        if (not self.isRunning()):  # no task
388
            raise RuntimeError("Cannot fetch task when not running")
389 390
        # return self._wd.pickTask()
        # Alternatively, let's ask the DbState for the appropriate task
391 392 393 394 395 396 397
        # dbState = self.getDbState()
        # tasks = dbState.getTasksAtState() # TODO: create every time?
        # nTasks = len(tasks)
        # i = Dice.throw(nTasks)
        # logger.debug(" (dice:{}/{}) ".format(i, nTasks))
        # # return copy.copy(tasks[i]) # Needs a fresh copy, to save execution results, etc.
        # return tasks[i].clone() # TODO: still necessary?
S
Shuduo Sang 已提交
398 399 400 401 402
        # pick a task type for current state
        taskType = self.getDbManager().getStateMachine().pickTaskType()
        return taskType(
            self.getDbManager(),
            self._execStats)  # create a task from it
403 404

    def resetExecutedTasks(self):
S
Shuduo Sang 已提交
405
        self._executedTasks = []  # should be under single thread
406 407 408 409

    def saveExecutedTask(self, task):
        with self._lock:
            self._executedTasks.append(task)
410 411

# We define a class to run a number of threads in locking steps.
S
Shuduo Sang 已提交
412 413


414
class ThreadPool:
415
    def __init__(self, numThreads, maxSteps):
416 417 418 419
        self.numThreads = numThreads
        self.maxSteps = maxSteps
        # Internal class variables
        self.curStep = 0
S
Shuduo Sang 已提交
420 421
        self.threadList = []  # type: List[WorkerThread]

422
    # starting to run all the threads, in locking steps
423
    def createAndStartThreads(self, tc: ThreadCoordinator):
S
Shuduo Sang 已提交
424 425
        for tid in range(0, self.numThreads):  # Create the threads
            workerThread = WorkerThread(self, tid, tc)
426
            self.threadList.append(workerThread)
S
Shuduo Sang 已提交
427
            workerThread.start()  # start, but should block immediately before step 0
428 429 430 431 432 433

    def joinAll(self):
        for workerThread in self.threadList:
            logger.debug("Joining thread...")
            workerThread._thread.join()

434 435
# A queue of continguous POSITIVE integers, used by DbManager to generate continuous numbers
# for new table names
S
Shuduo Sang 已提交
436 437


S
Steven Li 已提交
438 439
class LinearQueue():
    def __init__(self):
440
        self.firstIndex = 1  # 1st ever element
S
Steven Li 已提交
441
        self.lastIndex = 0
S
Shuduo Sang 已提交
442 443
        self._lock = threading.RLock()  # our functions may call each other
        self.inUse = set()  # the indexes that are in use right now
S
Steven Li 已提交
444

445
    def toText(self):
S
Shuduo Sang 已提交
446 447
        return "[{}..{}], in use: {}".format(
            self.firstIndex, self.lastIndex, self.inUse)
448 449

    # Push (add new element, largest) to the tail, and mark it in use
S
Shuduo Sang 已提交
450
    def push(self):
451
        with self._lock:
S
Shuduo Sang 已提交
452 453
            # if ( self.isEmpty() ):
            #     self.lastIndex = self.firstIndex
454
            #     return self.firstIndex
455 456
            # Otherwise we have something
            self.lastIndex += 1
457 458
            self.allocate(self.lastIndex)
            # self.inUse.add(self.lastIndex) # mark it in use immediately
459
            return self.lastIndex
S
Steven Li 已提交
460 461

    def pop(self):
462
        with self._lock:
S
Shuduo Sang 已提交
463 464 465 466
            if (self.isEmpty()):
                # raise RuntimeError("Cannot pop an empty queue")
                return False  # TODO: None?

467
            index = self.firstIndex
S
Shuduo Sang 已提交
468
            if (index in self.inUse):
469 470
                return False

471 472 473 474 475 476 477
            self.firstIndex += 1
            return index

    def isEmpty(self):
        return self.firstIndex > self.lastIndex

    def popIfNotEmpty(self):
478
        with self._lock:
479 480 481 482
            if (self.isEmpty()):
                return 0
            return self.pop()

S
Steven Li 已提交
483
    def allocate(self, i):
484
        with self._lock:
485
            # logger.debug("LQ allocating item {}".format(i))
S
Shuduo Sang 已提交
486 487 488
            if (i in self.inUse):
                raise RuntimeError(
                    "Cannot re-use same index in queue: {}".format(i))
489 490
            self.inUse.add(i)

S
Steven Li 已提交
491
    def release(self, i):
492
        with self._lock:
493
            # logger.debug("LQ releasing item {}".format(i))
S
Shuduo Sang 已提交
494
            self.inUse.remove(i)  # KeyError possible, TODO: why?
495 496 497 498

    def size(self):
        return self.lastIndex + 1 - self.firstIndex

S
Steven Li 已提交
499
    def pickAndAllocate(self):
S
Shuduo Sang 已提交
500
        if (self.isEmpty()):
501 502
            return None
        with self._lock:
S
Shuduo Sang 已提交
503
            cnt = 0  # counting the interations
504 505
            while True:
                cnt += 1
S
Shuduo Sang 已提交
506
                if (cnt > self.size() * 10):  # 10x iteration already
507 508
                    # raise RuntimeError("Failed to allocate LinearQueue element")
                    return None
S
Shuduo Sang 已提交
509 510
                ret = Dice.throwRange(self.firstIndex, self.lastIndex + 1)
                if (ret not in self.inUse):
511 512 513
                    self.allocate(ret)
                    return ret

S
Shuduo Sang 已提交
514

515
class DbConn:
516 517 518 519 520 521 522 523 524 525 526
    TYPE_NATIVE = "native-c"
    TYPE_REST = "rest-api"
    TYPE_INVALID = "invalid"

    @classmethod
    def create(cls, connType):
        if connType == cls.TYPE_NATIVE:
            return DbConnNative()
        elif connType == cls.TYPE_REST:
            return DbConnRest()
        else:
S
Shuduo Sang 已提交
527 528
            raise RuntimeError(
                "Unexpected connection type: {}".format(connType))
529 530 531 532 533 534 535 536 537

    @classmethod
    def createNative(cls):
        return cls.create(cls.TYPE_NATIVE)

    @classmethod
    def createRest(cls):
        return cls.create(cls.TYPE_REST)

538 539
    def __init__(self):
        self.isOpen = False
540 541 542
        self._type = self.TYPE_INVALID

    def open(self):
S
Shuduo Sang 已提交
543
        if (self.isOpen):
544 545
            raise RuntimeError("Cannot re-open an existing DB connection")

546 547
        # below implemented by child classes
        self.openByType()
548

S
Shuduo Sang 已提交
549 550 551
        logger.debug(
            "[DB] data connection opened, type = {}".format(
                self._type))
552 553
        self.isOpen = True

S
Shuduo Sang 已提交
554 555 556 557
    def resetDb(self):  # reset the whole database, etc.
        if (not self.isOpen):
            raise RuntimeError(
                "Cannot reset database until connection is open")
558 559
        # self._tdSql.prepare() # Recreate database, etc.

560
        self.execute('drop database if exists db')
561 562
        logger.debug("Resetting DB, dropped database")
        # self._cursor.execute('create database db')
563
        # self._cursor.execute('use db')
564 565
        # tdSql.execute('show databases')

S
Shuduo Sang 已提交
566
    def queryScalar(self, sql) -> int:
567 568
        return self._queryAny(sql)

S
Shuduo Sang 已提交
569
    def queryString(self, sql) -> str:
570 571
        return self._queryAny(sql)

S
Shuduo Sang 已提交
572 573 574 575
    def _queryAny(self, sql):  # actual query result as an int
        if (not self.isOpen):
            raise RuntimeError(
                "Cannot query database until connection is open")
576
        nRows = self.query(sql)
S
Shuduo Sang 已提交
577 578 579 580
        if nRows != 1:
            raise RuntimeError(
                "Unexpected result for query: {}, rows = {}".format(
                    sql, nRows))
581
        if self.getResultRows() != 1 or self.getResultCols() != 1:
S
Shuduo Sang 已提交
582 583
            raise RuntimeError(
                "Unexpected result set for query: {}".format(sql))
584 585 586 587
        return self.getQueryResult()[0][0]

    def execute(self, sql):
        raise RuntimeError("Unexpected execution, should be overriden")
S
Shuduo Sang 已提交
588

589 590
    def openByType(self):
        raise RuntimeError("Unexpected execution, should be overriden")
S
Shuduo Sang 已提交
591

592 593
    def getQueryResult(self):
        raise RuntimeError("Unexpected execution, should be overriden")
S
Shuduo Sang 已提交
594

595 596
    def getResultRows(self):
        raise RuntimeError("Unexpected execution, should be overriden")
S
Shuduo Sang 已提交
597

598 599 600 601
    def getResultCols(self):
        raise RuntimeError("Unexpected execution, should be overriden")

# Sample: curl -u root:taosdata -d "show databases" localhost:6020/rest/sql
S
Shuduo Sang 已提交
602 603


604 605 606 607
class DbConnRest(DbConn):
    def __init__(self):
        super().__init__()
        self._type = self.TYPE_REST
S
Shuduo Sang 已提交
608
        self._url = "http://localhost:6020/rest/sql"  # fixed for now
609 610
        self._result = None

S
Shuduo Sang 已提交
611 612 613
    def openByType(self):  # Open connection
        pass  # do nothing, always open

614
    def close(self):
S
Shuduo Sang 已提交
615 616 617
        if (not self.isOpen):
            raise RuntimeError(
                "Cannot clean up database until connection is open")
618 619 620 621 622
        # Do nothing for REST
        logger.debug("[DB] REST Database connection closed")
        self.isOpen = False

    def _doSql(self, sql):
623 624 625 626 627 628 629
        try:
            r = requests.post(self._url, 
                data = sql,
                auth = HTTPBasicAuth('root', 'taosdata'))            
        except:
            print("REST API Failure (TODO: more info here)")
            raise
630 631
        rj = r.json()
        # Sanity check for the "Json Result"
S
Shuduo Sang 已提交
632
        if ('status' not in rj):
633 634
            raise RuntimeError("No status in REST response")

S
Shuduo Sang 已提交
635 636 637 638
        if rj['status'] == 'error':  # clearly reported error
            if ('code' not in rj):  # error without code
                raise RuntimeError("REST error return without code")
            errno = rj['code']  # May need to massage this in the future
639
            # print("Raising programming error with REST return: {}".format(rj))
S
Shuduo Sang 已提交
640 641
            raise taos.error.ProgrammingError(
                rj['desc'], errno)  # todo: check existance of 'desc'
642

S
Shuduo Sang 已提交
643 644 645 646
        if rj['status'] != 'succ':  # better be this
            raise RuntimeError(
                "Unexpected REST return status: {}".format(
                    rj['status']))
647 648

        nRows = rj['rows'] if ('rows' in rj) else 0
S
Shuduo Sang 已提交
649
        self._result = rj
650 651
        return nRows

S
Shuduo Sang 已提交
652 653 654 655
    def execute(self, sql):
        if (not self.isOpen):
            raise RuntimeError(
                "Cannot execute database commands until connection is open")
656 657
        logger.debug("[SQL-REST] Executing SQL: {}".format(sql))
        nRows = self._doSql(sql)
S
Shuduo Sang 已提交
658 659
        logger.debug(
            "[SQL-REST] Execution Result, nRows = {}, SQL = {}".format(nRows, sql))
660 661
        return nRows

S
Shuduo Sang 已提交
662
    def query(self, sql):  # return rows affected
663 664 665 666 667 668 669 670 671 672 673 674 675
        return self.execute(sql)

    def getQueryResult(self):
        return self._result['data']

    def getResultRows(self):
        print(self._result)
        raise RuntimeError("TBD")
        # return self._tdSql.queryRows

    def getResultCols(self):
        print(self._result)
        raise RuntimeError("TBD")
S
Shuduo Sang 已提交
676

677
    # Duplicate code from TDMySQL, TODO: merge all this into DbConnNative
678 679


680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707
class MyTDSql:
    def __init__(self):
        self.queryRows = 0
        self.queryCols = 0
        self.affectedRows = 0

    def init(self, cursor, log=True):
        self.cursor = cursor
        # if (log):
        #     caller = inspect.getframeinfo(inspect.stack()[1][0])
        #     self.cursor.log(caller.filename + ".sql")

    def close(self):
        self.cursor.close()

    def query(self, sql):
        self.sql = sql
        try:
            self.cursor.execute(sql)
            self.queryResult = self.cursor.fetchall()
            self.queryRows = len(self.queryResult)
            self.queryCols = len(self.cursor.description)
        except Exception as e:
            # caller = inspect.getframeinfo(inspect.stack()[1][0])
            # args = (caller.filename, caller.lineno, sql, repr(e))
            # tdLog.exit("%s(%d) failed: sql:%s, %s" % args)
            raise
        return self.queryRows
708

709 710 711 712 713 714 715 716 717 718 719
    def execute(self, sql):
        self.sql = sql
        try:
            self.affectedRows = self.cursor.execute(sql)
        except Exception as e:
            # caller = inspect.getframeinfo(inspect.stack()[1][0])
            # args = (caller.filename, caller.lineno, sql, repr(e))
            # tdLog.exit("%s(%d) failed: sql:%s, %s" % args)
            raise
        return self.affectedRows

S
Shuduo Sang 已提交
720

721 722 723 724
class DbConnNative(DbConn):
    def __init__(self):
        super().__init__()
        self._type = self.TYPE_REST
S
Shuduo Sang 已提交
725
        self._conn = None
726
        self._cursor = None
S
Shuduo Sang 已提交
727

728 729 730 731 732 733 734 735 736 737 738
    def getBuildPath(self):
        selfPath = os.path.dirname(os.path.realpath(__file__))
        if ("community" in selfPath):
            projPath = selfPath[:selfPath.find("communit")]
        else:
            projPath = selfPath[:selfPath.find("tests")]

        for root, dirs, files in os.walk(projPath):
            if ("taosd" in files):
                rootRealPath = os.path.dirname(os.path.realpath(root))
                if ("packaging" not in rootRealPath):
S
Shuduo Sang 已提交
739
                    buildPath = root[:len(root) - len("/build/bin")]
740 741 742
                    break
        return buildPath

S
Shuduo Sang 已提交
743
    def openByType(self):  # Open connection
744
        cfgPath = self.getBuildPath() + "/test/cfg"
S
Shuduo Sang 已提交
745 746 747
        self._conn = taos.connect(
            host="127.0.0.1",
            config=cfgPath)  # TODO: make configurable
748 749 750 751
        self._cursor = self._conn.cursor()

        # Get the connection/cursor ready
        self._cursor.execute('reset query cache')
S
Shuduo Sang 已提交
752 753
        # self._cursor.execute('use db') # do this at the beginning of every
        # step
754 755

        # Open connection
756
        self._tdSql = MyTDSql()
757
        self._tdSql.init(self._cursor)
S
Shuduo Sang 已提交
758

759
    def close(self):
S
Shuduo Sang 已提交
760 761 762
        if (not self.isOpen):
            raise RuntimeError(
                "Cannot clean up database until connection is open")
763
        self._tdSql.close()
764
        logger.debug("[DB] Database connection closed")
765
        self.isOpen = False
S
Steven Li 已提交
766

S
Shuduo Sang 已提交
767 768 769 770
    def execute(self, sql):
        if (not self.isOpen):
            raise RuntimeError(
                "Cannot execute database commands until connection is open")
771 772
        logger.debug("[SQL] Executing SQL: {}".format(sql))
        nRows = self._tdSql.execute(sql)
S
Shuduo Sang 已提交
773 774 775
        logger.debug(
            "[SQL] Execution Result, nRows = {}, SQL = {}".format(
                nRows, sql))
776
        return nRows
S
Steven Li 已提交
777

S
Shuduo Sang 已提交
778 779 780 781
    def query(self, sql):  # return rows affected
        if (not self.isOpen):
            raise RuntimeError(
                "Cannot query database until connection is open")
782 783
        logger.debug("[SQL] Executing SQL: {}".format(sql))
        nRows = self._tdSql.query(sql)
S
Shuduo Sang 已提交
784 785 786
        logger.debug(
            "[SQL] Query Result, nRows = {}, SQL = {}".format(
                nRows, sql))
787
        return nRows
788
        # results are in: return self._tdSql.queryResult
789

790 791 792
    def getQueryResult(self):
        return self._tdSql.queryResult

793 794
    def getResultRows(self):
        return self._tdSql.queryRows
795

796 797
    def getResultCols(self):
        return self._tdSql.queryCols
798

S
Shuduo Sang 已提交
799

800
class AnyState:
S
Shuduo Sang 已提交
801 802 803
    STATE_INVALID = -1
    STATE_EMPTY = 0  # nothing there, no even a DB
    STATE_DB_ONLY = 1  # we have a DB, but nothing else
804
    STATE_TABLE_ONLY = 2  # we have a table, but totally empty
S
Shuduo Sang 已提交
805
    STATE_HAS_DATA = 3  # we have some data in the table
806 807 808 809 810
    _stateNames = ["Invalid", "Empty", "DB_Only", "Table_Only", "Has_Data"]

    STATE_VAL_IDX = 0
    CAN_CREATE_DB = 1
    CAN_DROP_DB = 2
811 812
    CAN_CREATE_FIXED_SUPER_TABLE = 3
    CAN_DROP_FIXED_SUPER_TABLE = 4
813 814 815 816 817 818 819
    CAN_ADD_DATA = 5
    CAN_READ_DATA = 6

    def __init__(self):
        self._info = self.getInfo()

    def __str__(self):
S
Shuduo Sang 已提交
820 821
        # -1 hack to accomodate the STATE_INVALID case
        return self._stateNames[self._info[self.STATE_VAL_IDX] + 1]
822 823 824 825

    def getInfo(self):
        raise RuntimeError("Must be overriden by child classes")

S
Steven Li 已提交
826 827 828 829 830 831
    def equals(self, other):
        if isinstance(other, int):
            return self.getValIndex() == other
        elif isinstance(other, AnyState):
            return self.getValIndex() == other.getValIndex()
        else:
S
Shuduo Sang 已提交
832 833 834
            raise RuntimeError(
                "Unexpected comparison, type = {}".format(
                    type(other)))
S
Steven Li 已提交
835

836 837 838
    def verifyTasksToState(self, tasks, newState):
        raise RuntimeError("Must be overriden by child classes")

S
Steven Li 已提交
839 840 841
    def getValIndex(self):
        return self._info[self.STATE_VAL_IDX]

842 843
    def getValue(self):
        return self._info[self.STATE_VAL_IDX]
S
Shuduo Sang 已提交
844

845 846
    def canCreateDb(self):
        return self._info[self.CAN_CREATE_DB]
S
Shuduo Sang 已提交
847

848 849
    def canDropDb(self):
        return self._info[self.CAN_DROP_DB]
S
Shuduo Sang 已提交
850

851 852
    def canCreateFixedSuperTable(self):
        return self._info[self.CAN_CREATE_FIXED_SUPER_TABLE]
S
Shuduo Sang 已提交
853

854 855
    def canDropFixedSuperTable(self):
        return self._info[self.CAN_DROP_FIXED_SUPER_TABLE]
S
Shuduo Sang 已提交
856

857 858
    def canAddData(self):
        return self._info[self.CAN_ADD_DATA]
S
Shuduo Sang 已提交
859

860 861 862 863 864
    def canReadData(self):
        return self._info[self.CAN_READ_DATA]

    def assertAtMostOneSuccess(self, tasks, cls):
        sCnt = 0
S
Shuduo Sang 已提交
865
        for task in tasks:
866 867 868
            if not isinstance(task, cls):
                continue
            if task.isSuccess():
S
Steven Li 已提交
869
                # task.logDebug("Task success found")
870
                sCnt += 1
S
Shuduo Sang 已提交
871 872 873
                if (sCnt >= 2):
                    raise RuntimeError(
                        "Unexpected more than 1 success with task: {}".format(cls))
874 875 876 877

    def assertIfExistThenSuccess(self, tasks, cls):
        sCnt = 0
        exists = False
S
Shuduo Sang 已提交
878
        for task in tasks:
879 880
            if not isinstance(task, cls):
                continue
S
Shuduo Sang 已提交
881
            exists = True  # we have a valid instance
882 883
            if task.isSuccess():
                sCnt += 1
S
Shuduo Sang 已提交
884 885 886
        if (exists and sCnt <= 0):
            raise RuntimeError(
                "Unexpected zero success for task: {}".format(cls))
887 888

    def assertNoTask(self, tasks, cls):
S
Shuduo Sang 已提交
889
        for task in tasks:
890
            if isinstance(task, cls):
S
Shuduo Sang 已提交
891 892
                raise CrashGenError(
                    "This task: {}, is not expected to be present, given the success/failure of others".format(cls.__name__))
893 894

    def assertNoSuccess(self, tasks, cls):
S
Shuduo Sang 已提交
895
        for task in tasks:
896 897
            if isinstance(task, cls):
                if task.isSuccess():
S
Shuduo Sang 已提交
898 899
                    raise RuntimeError(
                        "Unexpected successful task: {}".format(cls))
900 901

    def hasSuccess(self, tasks, cls):
S
Shuduo Sang 已提交
902
        for task in tasks:
903 904 905 906 907 908
            if not isinstance(task, cls):
                continue
            if task.isSuccess():
                return True
        return False

S
Steven Li 已提交
909
    def hasTask(self, tasks, cls):
S
Shuduo Sang 已提交
910
        for task in tasks:
S
Steven Li 已提交
911 912 913 914
            if isinstance(task, cls):
                return True
        return False

S
Shuduo Sang 已提交
915

916 917 918 919
class StateInvalid(AnyState):
    def getInfo(self):
        return [
            self.STATE_INVALID,
S
Shuduo Sang 已提交
920 921 922
            False, False,  # can create/drop Db
            False, False,  # can create/drop fixed table
            False, False,  # can insert/read data with fixed table
923 924 925 926
        ]

    # def verifyTasksToState(self, tasks, newState):

S
Shuduo Sang 已提交
927

928 929 930 931
class StateEmpty(AnyState):
    def getInfo(self):
        return [
            self.STATE_EMPTY,
S
Shuduo Sang 已提交
932 933 934
            True, False,  # can create/drop Db
            False, False,  # can create/drop fixed table
            False, False,  # can insert/read data with fixed table
935 936
        ]

S
Shuduo Sang 已提交
937 938
    def verifyTasksToState(self, tasks, newState):
        if (self.hasSuccess(tasks, TaskCreateDb)
939
                ):  # at EMPTY, if there's succes in creating DB
S
Shuduo Sang 已提交
940 941 942 943
            if (not self.hasTask(tasks, TaskDropDb)):  # and no drop_db tasks
                # we must have at most one. TODO: compare numbers
                self.assertAtMostOneSuccess(tasks, TaskCreateDb)

944 945 946 947 948 949 950 951 952 953 954

class StateDbOnly(AnyState):
    def getInfo(self):
        return [
            self.STATE_DB_ONLY,
            False, True,
            True, False,
            False, False,
        ]

    def verifyTasksToState(self, tasks, newState):
S
Shuduo Sang 已提交
955 956 957
        if (not self.hasTask(tasks, TaskCreateDb)):
            # only if we don't create any more
            self.assertAtMostOneSuccess(tasks, TaskDropDb)
958
        self.assertIfExistThenSuccess(tasks, TaskDropDb)
S
Steven Li 已提交
959
        # self.assertAtMostOneSuccess(tasks, CreateFixedTableTask) # not true in massively parrallel cases
960
        # Nothing to be said about adding data task
961
        # if ( self.hasSuccess(tasks, DropDbTask) ): # dropped the DB
S
Shuduo Sang 已提交
962 963 964
        # self.assertHasTask(tasks, DropDbTask) # implied by hasSuccess
        # self.assertAtMostOneSuccess(tasks, DropDbTask)
        # self._state = self.STATE_EMPTY
965 966
        # if ( self.hasSuccess(tasks, TaskCreateSuperTable) ): # did not drop db, create table success
        #     # self.assertHasTask(tasks, CreateFixedTableTask) # tried to create table
S
Shuduo Sang 已提交
967
        #     if ( not self.hasTask(tasks, TaskDropSuperTable) ):
968
        #         self.assertAtMostOneSuccess(tasks, TaskCreateSuperTable) # at most 1 attempt is successful, if we don't drop anything
S
Shuduo Sang 已提交
969 970 971 972 973 974
        # self.assertNoTask(tasks, DropDbTask) # should have have tried
        # if ( not self.hasSuccess(tasks, AddFixedDataTask) ): # just created table, no data yet
        #     # can't say there's add-data attempts, since they may all fail
        #     self._state = self.STATE_TABLE_ONLY
        # else:
        #     self._state = self.STATE_HAS_DATA
975 976 977 978
        # What about AddFixedData?
        # elif ( self.hasSuccess(tasks, AddFixedDataTask) ):
        #     self._state = self.STATE_HAS_DATA
        # else: # no success in dropping db tasks, no success in create fixed table? read data should also fail
S
Shuduo Sang 已提交
979
        #     # raise RuntimeError("Unexpected no-success scenario")   # We might just landed all failure tasks,
980 981
        #     self._state = self.STATE_DB_ONLY  # no change

S
Shuduo Sang 已提交
982

983
class StateSuperTableOnly(AnyState):
984 985 986 987 988 989 990 991 992
    def getInfo(self):
        return [
            self.STATE_TABLE_ONLY,
            False, True,
            False, True,
            True, True,
        ]

    def verifyTasksToState(self, tasks, newState):
S
Shuduo Sang 已提交
993
        if (self.hasSuccess(tasks, TaskDropSuperTable)
994
                ):  # we are able to drop the table
995
            #self.assertAtMostOneSuccess(tasks, TaskDropSuperTable)
S
Shuduo Sang 已提交
996 997
            # we must have had recreted it
            self.hasSuccess(tasks, TaskCreateSuperTable)
998

999
            # self._state = self.STATE_DB_ONLY
S
Steven Li 已提交
1000 1001
        # elif ( self.hasSuccess(tasks, AddFixedDataTask) ): # no success dropping the table, but added data
        #     self.assertNoTask(tasks, DropFixedTableTask) # not true in massively parrallel cases
1002
            # self._state = self.STATE_HAS_DATA
S
Steven Li 已提交
1003 1004 1005
        # elif ( self.hasSuccess(tasks, ReadFixedDataTask) ): # no success in prev cases, but was able to read data
            # self.assertNoTask(tasks, DropFixedTableTask)
            # self.assertNoTask(tasks, AddFixedDataTask)
1006
            # self._state = self.STATE_TABLE_ONLY # no change
S
Steven Li 已提交
1007 1008 1009
        # else: # did not drop table, did not insert data, did not read successfully, that is impossible
        #     raise RuntimeError("Unexpected no-success scenarios")
        # TODO: need to revamp!!
1010

S
Shuduo Sang 已提交
1011

1012 1013 1014 1015 1016 1017 1018 1019 1020 1021
class StateHasData(AnyState):
    def getInfo(self):
        return [
            self.STATE_HAS_DATA,
            False, True,
            False, True,
            True, True,
        ]

    def verifyTasksToState(self, tasks, newState):
S
Shuduo Sang 已提交
1022
        if (newState.equals(AnyState.STATE_EMPTY)):
1023
            self.hasSuccess(tasks, TaskDropDb)
S
Shuduo Sang 已提交
1024 1025 1026 1027
            if (not self.hasTask(tasks, TaskCreateDb)):
                self.assertAtMostOneSuccess(tasks, TaskDropDb)  # TODO: dicy
        elif (newState.equals(AnyState.STATE_DB_ONLY)):  # in DB only
            if (not self.hasTask(tasks, TaskCreateDb)
1028
                ):  # without a create_db task
S
Shuduo Sang 已提交
1029 1030
                # we must have drop_db task
                self.assertNoTask(tasks, TaskDropDb)
1031
            self.hasSuccess(tasks, TaskDropSuperTable)
1032
            # self.assertAtMostOneSuccess(tasks, DropFixedSuperTableTask) # TODO: dicy
1033 1034 1035 1036
        # elif ( newState.equals(AnyState.STATE_TABLE_ONLY) ): # data deleted
            # self.assertNoTask(tasks, TaskDropDb)
            # self.assertNoTask(tasks, TaskDropSuperTable)
            # self.assertNoTask(tasks, TaskAddData)
S
Steven Li 已提交
1037
            # self.hasSuccess(tasks, DeleteDataTasks)
S
Shuduo Sang 已提交
1038 1039
        else:  # should be STATE_HAS_DATA
            if (not self.hasTask(tasks, TaskCreateDb)
1040
                ):  # only if we didn't create one
S
Shuduo Sang 已提交
1041 1042 1043
                # we shouldn't have dropped it
                self.assertNoTask(tasks, TaskDropDb)
            if (not self.hasTask(tasks, TaskCreateSuperTable)
1044
                    ):  # if we didn't create the table
S
Shuduo Sang 已提交
1045 1046
                # we should not have a task that drops it
                self.assertNoTask(tasks, TaskDropSuperTable)
1047
            # self.assertIfExistThenSuccess(tasks, ReadFixedDataTask)
S
Steven Li 已提交
1048

S
Shuduo Sang 已提交
1049

1050
class StateMechine:
1051 1052
    def __init__(self, dbConn):
        self._dbConn = dbConn
S
Shuduo Sang 已提交
1053 1054 1055 1056 1057
        self._curState = self._findCurrentState()  # starting state
        # transitition target probabilities, indexed with value of STATE_EMPTY,
        # STATE_DB_ONLY, etc.
        self._stateWeights = [1, 3, 5, 15]

1058 1059 1060
    def getCurrentState(self):
        return self._curState

1061 1062 1063
    def hasDatabase(self):
        return self._curState.canDropDb()  # ha, can drop DB means it has one

1064
    # May be slow, use cautionsly...
S
Shuduo Sang 已提交
1065
    def getTaskTypes(self):  # those that can run (directly/indirectly) from the current state
1066 1067 1068 1069 1070 1071
        def typesToStrings(types):
            ss = []
            for t in types:
                ss.append(t.__name__)
            return ss

S
Shuduo Sang 已提交
1072
        allTaskClasses = StateTransitionTask.__subclasses__()  # all state transition tasks
1073 1074
        firstTaskTypes = []
        for tc in allTaskClasses:
S
Shuduo Sang 已提交
1075
            # t = tc(self) # create task object
1076 1077
            if tc.canBeginFrom(self._curState):
                firstTaskTypes.append(tc)
S
Shuduo Sang 已提交
1078 1079 1080 1081 1082 1083 1084 1085
        # now we have all the tasks that can begin directly from the current
        # state, let's figure out the INDIRECT ones
        taskTypes = firstTaskTypes.copy()  # have to have these
        for task1 in firstTaskTypes:  # each task type gathered so far
            endState = task1.getEndState()  # figure the end state
            if endState is None:  # does not change end state
                continue  # no use, do nothing
            for tc in allTaskClasses:  # what task can further begin from there?
1086
                if tc.canBeginFrom(endState) and (tc not in firstTaskTypes):
S
Shuduo Sang 已提交
1087
                    taskTypes.append(tc)  # gather it
1088 1089

        if len(taskTypes) <= 0:
S
Shuduo Sang 已提交
1090 1091 1092 1093 1094 1095 1096
            raise RuntimeError(
                "No suitable task types found for state: {}".format(
                    self._curState))
        logger.debug(
            "[OPS] Tasks found for state {}: {}".format(
                self._curState,
                typesToStrings(taskTypes)))
1097 1098 1099 1100
        return taskTypes

    def _findCurrentState(self):
        dbc = self._dbConn
S
Shuduo Sang 已提交
1101 1102
        ts = time.time()  # we use this to debug how fast/slow it is to do the various queries to find the current DB state
        if dbc.query("show databases") == 0:  # no database?!
1103
            # logger.debug("Found EMPTY state")
S
Shuduo Sang 已提交
1104 1105 1106
            logger.debug(
                "[STT] empty database found, between {} and {}".format(
                    ts, time.time()))
1107
            return StateEmpty()
S
Shuduo Sang 已提交
1108 1109 1110 1111
        # did not do this when openning connection, and this is NOT the worker
        # thread, which does this on their own
        dbc.execute("use db")
        if dbc.query("show tables") == 0:  # no tables
1112
            # logger.debug("Found DB ONLY state")
S
Shuduo Sang 已提交
1113 1114 1115
            logger.debug(
                "[STT] DB_ONLY found, between {} and {}".format(
                    ts, time.time()))
1116
            return StateDbOnly()
S
Shuduo Sang 已提交
1117 1118
        if dbc.query("SELECT * FROM db.{}".format(DbManager.getFixedSuperTableName())
                     ) == 0:  # no regular tables
1119
            # logger.debug("Found TABLE_ONLY state")
S
Shuduo Sang 已提交
1120 1121 1122
            logger.debug(
                "[STT] SUPER_TABLE_ONLY found, between {} and {}".format(
                    ts, time.time()))
1123
            return StateSuperTableOnly()
S
Shuduo Sang 已提交
1124
        else:  # has actual tables
1125
            # logger.debug("Found HAS_DATA state")
S
Shuduo Sang 已提交
1126 1127 1128
            logger.debug(
                "[STT] HAS_DATA found, between {} and {}".format(
                    ts, time.time()))
1129 1130 1131
            return StateHasData()

    def transition(self, tasks):
S
Shuduo Sang 已提交
1132
        if (len(tasks) == 0):  # before 1st step, or otherwise empty
1133
            logger.debug("[STT] Starting State: {}".format(self._curState))
S
Shuduo Sang 已提交
1134
            return  # do nothing
1135

S
Shuduo Sang 已提交
1136 1137
        # this should show up in the server log, separating steps
        self._dbConn.execute("show dnodes")
1138 1139 1140 1141

        # Generic Checks, first based on the start state
        if self._curState.canCreateDb():
            self._curState.assertIfExistThenSuccess(tasks, TaskCreateDb)
S
Shuduo Sang 已提交
1142 1143
            # self.assertAtMostOneSuccess(tasks, CreateDbTask) # not really, in
            # case of multiple creation and drops
1144 1145 1146

        if self._curState.canDropDb():
            self._curState.assertIfExistThenSuccess(tasks, TaskDropDb)
S
Shuduo Sang 已提交
1147 1148
            # self.assertAtMostOneSuccess(tasks, DropDbTask) # not really in
            # case of drop-create-drop
1149 1150 1151

        # if self._state.canCreateFixedTable():
            # self.assertIfExistThenSuccess(tasks, CreateFixedTableTask) # Not true, DB may be dropped
S
Shuduo Sang 已提交
1152 1153
            # self.assertAtMostOneSuccess(tasks, CreateFixedTableTask) # not
            # really, in case of create-drop-create
1154 1155 1156

        # if self._state.canDropFixedTable():
            # self.assertIfExistThenSuccess(tasks, DropFixedTableTask) # Not True, the whole DB may be dropped
S
Shuduo Sang 已提交
1157 1158
            # self.assertAtMostOneSuccess(tasks, DropFixedTableTask) # not
            # really in case of drop-create-drop
1159 1160

        # if self._state.canAddData():
S
Shuduo Sang 已提交
1161 1162
        # self.assertIfExistThenSuccess(tasks, AddFixedDataTask)  # not true
        # actually
1163 1164 1165 1166 1167 1168

        # if self._state.canReadData():
            # Nothing for sure

        newState = self._findCurrentState()
        logger.debug("[STT] New DB state determined: {}".format(newState))
S
Shuduo Sang 已提交
1169 1170
        # can old state move to new state through the tasks?
        self._curState.verifyTasksToState(tasks, newState)
1171 1172 1173
        self._curState = newState

    def pickTaskType(self):
S
Shuduo Sang 已提交
1174 1175
        # all the task types we can choose from at curent state
        taskTypes = self.getTaskTypes()
1176 1177 1178
        weights = []
        for tt in taskTypes:
            endState = tt.getEndState()
S
Shuduo Sang 已提交
1179 1180 1181
            if endState is not None:
                # TODO: change to a method
                weights.append(self._stateWeights[endState.getValIndex()])
1182
            else:
S
Shuduo Sang 已提交
1183 1184
                # read data task, default to 10: TODO: change to a constant
                weights.append(10)
1185
        i = self._weighted_choice_sub(weights)
S
Shuduo Sang 已提交
1186
        # logger.debug(" (weighted random:{}/{}) ".format(i, len(taskTypes)))
1187 1188
        return taskTypes[i]

S
Shuduo Sang 已提交
1189 1190 1191 1192 1193
    # ref:
    # https://eli.thegreenplace.net/2010/01/22/weighted-random-generation-in-python/
    def _weighted_choice_sub(self, weights):
        # TODO: use our dice to ensure it being determinstic?
        rnd = random.random() * sum(weights)
1194 1195 1196 1197
        for i, w in enumerate(weights):
            rnd -= w
            if rnd < 0:
                return i
1198

1199
# Manager of the Database Data/Connection
S
Shuduo Sang 已提交
1200 1201 1202 1203


class DbManager():
    def __init__(self, resetDb=True):
S
Steven Li 已提交
1204
        self.tableNumQueue = LinearQueue()
S
Shuduo Sang 已提交
1205 1206 1207
        # datetime.datetime(2019, 1, 1) # initial date time tick
        self._lastTick = self.setupLastTick()
        self._lastInt = 0  # next one is initial integer
1208
        self._lock = threading.RLock()
S
Shuduo Sang 已提交
1209

1210
        # self.openDbServerConnection()
S
Shuduo Sang 已提交
1211 1212
        self._dbConn = DbConn.createNative() if (
            gConfig.connector_type == 'native') else DbConn.createRest()
1213
        try:
S
Shuduo Sang 已提交
1214
            self._dbConn.open()  # may throw taos.error.ProgrammingError: disconnected
1215 1216
        except taos.error.ProgrammingError as err:
            # print("Error type: {}, msg: {}, value: {}".format(type(err), err.msg, err))
S
Shuduo Sang 已提交
1217 1218 1219
            if (err.msg == 'client disconnected'):  # cannot open DB connection
                print(
                    "Cannot establish DB connection, please re-run script without parameter, and follow the instructions.")
S
Steven Li 已提交
1220
                sys.exit(2)
1221
            else:
S
Shuduo Sang 已提交
1222 1223
                raise
        except BaseException:
S
Steven Li 已提交
1224
            print("[=] Unexpected exception")
S
Shuduo Sang 已提交
1225 1226 1227 1228
            raise

        if resetDb:
            self._dbConn.resetDb()  # drop and recreate DB
1229

S
Shuduo Sang 已提交
1230 1231
        # Do this after dbConn is in proper shape
        self._stateMachine = StateMechine(self._dbConn)
1232

1233 1234 1235
    def getDbConn(self):
        return self._dbConn

S
Shuduo Sang 已提交
1236
    def getStateMachine(self) -> StateMechine:
1237 1238 1239 1240
        return self._stateMachine

    # def getState(self):
    #     return self._stateMachine.getCurrentState()
1241 1242 1243 1244 1245 1246

    # We aim to create a starting time tick, such that, whenever we run our test here once
    # We should be able to safely create 100,000 records, which will not have any repeated time stamp
    # when we re-run the test in 3 minutes (180 seconds), basically we should expand time duration
    # by a factor of 500.
    # TODO: what if it goes beyond 10 years into the future
1247
    # TODO: fix the error as result of above: "tsdb timestamp is out of range"
1248
    def setupLastTick(self):
1249
        t1 = datetime.datetime(2020, 6, 1)
1250
        t2 = datetime.datetime.now()
S
Shuduo Sang 已提交
1251 1252 1253 1254
        # maybe a very large number, takes 69 years to exceed Python int range
        elSec = int(t2.timestamp() - t1.timestamp())
        elSec2 = (elSec % (8 * 12 * 30 * 24 * 60 * 60 / 500)) * \
            500  # a number representing seconds within 10 years
1255
        # print("elSec = {}".format(elSec))
S
Shuduo Sang 已提交
1256 1257 1258
        t3 = datetime.datetime(2012, 1, 1)  # default "keep" is 10 years
        t4 = datetime.datetime.fromtimestamp(
            t3.timestamp() + elSec2)  # see explanation above
1259 1260 1261
        logger.info("Setting up TICKS to start from: {}".format(t4))
        return t4

S
Shuduo Sang 已提交
1262
    def pickAndAllocateTable(self):  # pick any table, and "use" it
S
Steven Li 已提交
1263 1264
        return self.tableNumQueue.pickAndAllocate()

1265 1266 1267 1268 1269
    def addTable(self):
        with self._lock:
            tIndex = self.tableNumQueue.push()
        return tIndex

1270 1271
    @classmethod
    def getFixedSuperTableName(cls):
1272
        return "fs_table"
1273

S
Shuduo Sang 已提交
1274
    def releaseTable(self, i):  # return the table back, so others can use it
S
Steven Li 已提交
1275 1276
        self.tableNumQueue.release(i)

1277
    def getNextTick(self):
S
Shuduo Sang 已提交
1278 1279
        with self._lock:  # prevent duplicate tick
            if Dice.throw(10) == 0:  # 1 in 10 chance
S
Steven Li 已提交
1280
                return self._lastTick + datetime.timedelta(0, -100)
S
Shuduo Sang 已提交
1281 1282 1283
            else:  # regular
                # add one second to it
                self._lastTick += datetime.timedelta(0, 1)
S
Steven Li 已提交
1284
                return self._lastTick
1285 1286

    def getNextInt(self):
1287 1288 1289
        with self._lock:
            self._lastInt += 1
            return self._lastInt
1290 1291

    def getNextBinary(self):
S
Shuduo Sang 已提交
1292 1293
        return "Beijing_Shanghai_Los_Angeles_New_York_San_Francisco_Chicago_Beijing_Shanghai_Los_Angeles_New_York_San_Francisco_Chicago_{}".format(
            self.getNextInt())
1294 1295 1296

    def getNextFloat(self):
        return 0.9 + self.getNextInt()
S
Shuduo Sang 已提交
1297

S
Steven Li 已提交
1298
    def getTableNameToDelete(self):
S
Shuduo Sang 已提交
1299 1300
        tblNum = self.tableNumQueue.pop()  # TODO: race condition!
        if (not tblNum):  # maybe false
1301
            return False
S
Shuduo Sang 已提交
1302

S
Steven Li 已提交
1303 1304
        return "table_{}".format(tblNum)

1305
    def cleanUp(self):
S
Shuduo Sang 已提交
1306 1307
        self._dbConn.close()

1308

1309
class TaskExecutor():
1310
    class BoundedList:
S
Shuduo Sang 已提交
1311
        def __init__(self, size=10):
1312 1313 1314
            self._size = size
            self._list = []

S
Shuduo Sang 已提交
1315 1316
        def add(self, n: int):
            if not self._list:  # empty
1317 1318 1319 1320 1321 1322 1323
                self._list.append(n)
                return
            # now we should insert
            nItems = len(self._list)
            insPos = 0
            for i in range(nItems):
                insPos = i
S
Shuduo Sang 已提交
1324 1325 1326
                if n <= self._list[i]:  # smaller than this item, time to insert
                    break  # found the insertion point
                insPos += 1  # insert to the right
1327

S
Shuduo Sang 已提交
1328 1329
            if insPos == 0:  # except for the 1st item, # TODO: elimiate first item as gating item
                return  # do nothing
1330 1331

            # print("Inserting at postion {}, value: {}".format(insPos, n))
S
Shuduo Sang 已提交
1332 1333
            self._list.insert(insPos, n)  # insert

1334
            newLen = len(self._list)
S
Shuduo Sang 已提交
1335 1336 1337 1338 1339
            if newLen <= self._size:
                return  # do nothing
            elif newLen == (self._size + 1):
                del self._list[0]  # remove the first item
            else:
1340 1341 1342 1343 1344 1345 1346
                raise RuntimeError("Corrupt Bounded List")

        def __str__(self):
            return repr(self._list)

    _boundedList = BoundedList()

1347 1348 1349
    def __init__(self, curStep):
        self._curStep = curStep

1350 1351 1352 1353
    @classmethod
    def getBoundedList(cls):
        return cls._boundedList

1354 1355 1356
    def getCurStep(self):
        return self._curStep

S
Shuduo Sang 已提交
1357
    def execute(self, task: Task, wt: WorkerThread):  # execute a task on a thread
1358
        task.execute(wt)
1359

1360 1361 1362 1363
    def recordDataMark(self, n: int):
        # print("[{}]".format(n), end="", flush=True)
        self._boundedList.add(n)

1364 1365
    # def logInfo(self, msg):
    #     logger.info("    T[{}.x]: ".format(self._curStep) + msg)
1366

1367 1368
    # def logDebug(self, msg):
    #     logger.debug("    T[{}.x]: ".format(self._curStep) + msg)
1369

S
Shuduo Sang 已提交
1370

S
Steven Li 已提交
1371
class Task():
1372 1373 1374 1375
    taskSn = 100

    @classmethod
    def allocTaskNum(cls):
S
Shuduo Sang 已提交
1376
        Task.taskSn += 1  # IMPORTANT: cannot use cls.taskSn, since each sub class will have a copy
S
Steven Li 已提交
1377 1378
        # logger.debug("Allocating taskSN: {}".format(Task.taskSn))
        return Task.taskSn
1379

S
Shuduo Sang 已提交
1380
    def __init__(self, dbManager: DbManager, execStats: ExecutionStats):
1381
        self._dbManager = dbManager
S
Shuduo Sang 已提交
1382
        self._workerThread = None
1383
        self._err = None
1384
        self._aborted = False
1385
        self._curStep = None
S
Shuduo Sang 已提交
1386
        self._numRows = None  # Number of rows affected
1387

S
Shuduo Sang 已提交
1388
        # Assign an incremental task serial number
1389
        self._taskNum = self.allocTaskNum()
S
Steven Li 已提交
1390
        # logger.debug("Creating new task {}...".format(self._taskNum))
1391

1392
        self._execStats = execStats
S
Shuduo Sang 已提交
1393
        self._lastSql = ""  # last SQL executed/attempted
1394

1395
    def isSuccess(self):
S
Shuduo Sang 已提交
1396
        return self._err is None
1397

1398 1399 1400
    def isAborted(self):
        return self._aborted

S
Shuduo Sang 已提交
1401
    def clone(self):  # TODO: why do we need this again?
1402
        newTask = self.__class__(self._dbManager, self._execStats)
1403 1404 1405
        return newTask

    def logDebug(self, msg):
S
Shuduo Sang 已提交
1406 1407 1408
        self._workerThread.logDebug(
            "Step[{}.{}] {}".format(
                self._curStep, self._taskNum, msg))
1409 1410

    def logInfo(self, msg):
S
Shuduo Sang 已提交
1411 1412 1413
        self._workerThread.logInfo(
            "Step[{}.{}] {}".format(
                self._curStep, self._taskNum, msg))
1414

1415
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
S
Shuduo Sang 已提交
1416 1417 1418
        raise RuntimeError(
            "To be implemeted by child classes, class name: {}".format(
                self.__class__.__name__))
1419

1420 1421
    def execute(self, wt: WorkerThread):
        wt.verifyThreadSelf()
S
Shuduo Sang 已提交
1422
        self._workerThread = wt  # type: ignore
1423 1424

        te = wt.getTaskExecutor()
1425
        self._curStep = te.getCurStep()
S
Shuduo Sang 已提交
1426 1427
        self.logDebug(
            "[-] executing task {}...".format(self.__class__.__name__))
1428 1429

        self._err = None
S
Shuduo Sang 已提交
1430 1431
        self._execStats.beginTaskType(
            self.__class__.__name__)  # mark beginning
1432
        try:
S
Shuduo Sang 已提交
1433
            self._executeInternal(te, wt)  # TODO: no return value?
1434
        except taos.error.ProgrammingError as err:
S
Shuduo Sang 已提交
1435 1436
            errno2 = err.errno if (
                err.errno > 0) else 0x80000000 + err.errno  # correct error scheme
1437 1438 1439 1440
            if (gConfig.continue_on_exception):  # user choose to continue
                self.logDebug(
                    "[=] Continue after TAOS exception: errno=0x{:X}, msg: {}, SQL: {}".format(
                        errno2, err, self._lastSql))
1441
                self._err = err
1442
            elif (errno2 in [
S
Shuduo Sang 已提交
1443
                0x05,  # TSDB_CODE_RPC_NOT_READY
1444 1445 1446 1447
                0x200, 0x360, 0x362, 0x36A, 0x36B, 0x36D,
                0x381, 0x380, 0x383,
                0x386,  # DB is being dropped?!
                0x503,
S
Shuduo Sang 已提交
1448
                0x510,  # vnode not in ready state
1449
                0x600,
S
Shuduo Sang 已提交
1450 1451 1452 1453 1454
                1000  # REST catch-all error
            ]):  # allowed errors
                self.logDebug(
                    "[=] Acceptable Taos library exception: errno=0x{:X}, msg: {}, SQL: {}".format(
                        errno2, err, self._lastSql))
1455
                print("_", end="", flush=True)
S
Shuduo Sang 已提交
1456
                self._err = err
1457
            else:
S
Shuduo Sang 已提交
1458 1459
                errMsg = "[=] Unexpected Taos library exception: errno=0x{:X}, msg: {}, SQL: {}".format(
                    errno2, err, self._lastSql)
1460
                self.logDebug(errMsg)
S
Shuduo Sang 已提交
1461
                if gConfig.debug:
1462 1463
                    # raise # so that we see full stack
                    traceback.print_exc()
1464 1465
                print(
                    "\n\n----------------------------\nProgram ABORTED Due to Unexpected TAOS Error: \n\n{}\n".format(errMsg) +
1466 1467 1468 1469
                    "----------------------------\n")
                # sys.exit(-1)
                self._err = err
                self._aborted = True
S
Shuduo Sang 已提交
1470
        except Exception as e:
S
Steven Li 已提交
1471
            self.logInfo("Non-TAOS exception encountered")
S
Shuduo Sang 已提交
1472
            self._err = e
S
Steven Li 已提交
1473
            self._aborted = True
1474
            traceback.print_exc()
1475
        except BaseException as e:
1476
            self.logInfo("Python base exception encountered")
1477
            self._err = e
1478
            self._aborted = True
S
Steven Li 已提交
1479
            traceback.print_exc()
S
Shuduo Sang 已提交
1480 1481 1482 1483
        except BaseException:
            self.logDebug(
                "[=] Unexpected exception, SQL: {}".format(
                    self._lastSql))
1484
            raise
1485
        self._execStats.endTaskType(self.__class__.__name__, self.isSuccess())
S
Shuduo Sang 已提交
1486 1487 1488 1489 1490

        self.logDebug("[X] task execution completed, {}, status: {}".format(
            self.__class__.__name__, "Success" if self.isSuccess() else "Failure"))
        # TODO: merge with above.
        self._execStats.incExecCount(self.__class__.__name__, self.isSuccess())
S
Steven Li 已提交
1491

1492
    def execSql(self, sql):
1493
        self._lastSql = sql
1494
        return self._dbManager.execute(sql)
1495

S
Shuduo Sang 已提交
1496
    def execWtSql(self, wt: WorkerThread, sql):  # execute an SQL on the worker thread
1497 1498 1499
        self._lastSql = sql
        return wt.execSql(sql)

S
Shuduo Sang 已提交
1500
    def queryWtSql(self, wt: WorkerThread, sql):  # execute an SQL on the worker thread
1501 1502 1503
        self._lastSql = sql
        return wt.querySql(sql)

S
Shuduo Sang 已提交
1504
    def getQueryResult(self, wt: WorkerThread):  # execute an SQL on the worker thread
1505 1506 1507
        return wt.getQueryResult()


1508
class ExecutionStats:
1509
    def __init__(self):
S
Shuduo Sang 已提交
1510 1511
        # total/success times for a task
        self._execTimes: Dict[str, [int, int]] = {}
1512 1513 1514
        self._tasksInProgress = 0
        self._lock = threading.Lock()
        self._firstTaskStartTime = None
1515
        self._execStartTime = None
S
Shuduo Sang 已提交
1516 1517
        self._elapsedTime = 0.0  # total elapsed time
        self._accRunTime = 0.0  # accumulated run time
1518

1519 1520 1521
        self._failed = False
        self._failureReason = None

S
Steven Li 已提交
1522
    def __str__(self):
S
Shuduo Sang 已提交
1523 1524
        return "[ExecStats: _failed={}, _failureReason={}".format(
            self._failed, self._failureReason)
S
Steven Li 已提交
1525 1526

    def isFailed(self):
S
Shuduo Sang 已提交
1527
        return self._failed
S
Steven Li 已提交
1528

1529 1530 1531 1532 1533 1534
    def startExec(self):
        self._execStartTime = time.time()

    def endExec(self):
        self._elapsedTime = time.time() - self._execStartTime

S
Shuduo Sang 已提交
1535
    def incExecCount(self, klassName, isSuccess):  # TODO: add a lock here
1536 1537
        if klassName not in self._execTimes:
            self._execTimes[klassName] = [0, 0]
S
Shuduo Sang 已提交
1538 1539
        t = self._execTimes[klassName]  # tuple for the data
        t[0] += 1  # index 0 has the "total" execution times
1540
        if isSuccess:
S
Shuduo Sang 已提交
1541
            t[1] += 1  # index 1 has the "success" execution times
1542 1543 1544

    def beginTaskType(self, klassName):
        with self._lock:
S
Shuduo Sang 已提交
1545 1546
            if self._tasksInProgress == 0:  # starting a new round
                self._firstTaskStartTime = time.time()  # I am now the first task
1547 1548 1549 1550 1551
            self._tasksInProgress += 1

    def endTaskType(self, klassName, isSuccess):
        with self._lock:
            self._tasksInProgress -= 1
S
Shuduo Sang 已提交
1552
            if self._tasksInProgress == 0:  # all tasks have stopped
1553 1554 1555
                self._accRunTime += (time.time() - self._firstTaskStartTime)
                self._firstTaskStartTime = None

1556 1557 1558 1559
    def registerFailure(self, reason):
        self._failed = True
        self._failureReason = reason

1560
    def printStats(self):
S
Shuduo Sang 已提交
1561 1562 1563 1564 1565 1566
        logger.info(
            "----------------------------------------------------------------------")
        logger.info(
            "| Crash_Gen test {}, with the following stats:". format(
                "FAILED (reason: {})".format(
                    self._failureReason) if self._failed else "SUCCEEDED"))
1567
        logger.info("| Task Execution Times (success/total):")
1568
        execTimesAny = 0
S
Shuduo Sang 已提交
1569
        for k, n in self._execTimes.items():
1570
            execTimesAny += n[0]
S
Shuduo Sang 已提交
1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590
            logger.info("|    {0:<24}: {1}/{2}".format(k, n[1], n[0]))

        logger.info(
            "| Total Tasks Executed (success or not): {} ".format(execTimesAny))
        logger.info(
            "| Total Tasks In Progress at End: {}".format(
                self._tasksInProgress))
        logger.info(
            "| Total Task Busy Time (elapsed time when any task is in progress): {:.3f} seconds".format(
                self._accRunTime))
        logger.info(
            "| Average Per-Task Execution Time: {:.3f} seconds".format(self._accRunTime / execTimesAny))
        logger.info(
            "| Total Elapsed Time (from wall clock): {:.3f} seconds".format(
                self._elapsedTime))
        logger.info(
            "| Top numbers written: {}".format(
                TaskExecutor.getBoundedList()))
        logger.info(
            "----------------------------------------------------------------------")
1591 1592 1593


class StateTransitionTask(Task):
1594 1595 1596 1597 1598
    LARGE_NUMBER_OF_TABLES = 35
    SMALL_NUMBER_OF_TABLES = 3
    LARGE_NUMBER_OF_RECORDS = 50
    SMALL_NUMBER_OF_RECORDS = 3

1599
    @classmethod
S
Shuduo Sang 已提交
1600
    def getInfo(cls):  # each sub class should supply their own information
1601 1602
        raise RuntimeError("Overriding method expected")

S
Shuduo Sang 已提交
1603
    _endState = None
1604
    @classmethod
S
Shuduo Sang 已提交
1605
    def getEndState(cls):  # TODO: optimize by calling it fewer times
1606 1607
        raise RuntimeError("Overriding method expected")

1608 1609 1610
    # @classmethod
    # def getBeginStates(cls):
    #     return cls.getInfo()[0]
1611

1612 1613 1614
    # @classmethod
    # def getEndState(cls): # returning the class name
    #     return cls.getInfo()[0]
1615 1616

    @classmethod
1617 1618 1619
    def canBeginFrom(cls, state: AnyState):
        # return state.getValue() in cls.getBeginStates()
        raise RuntimeError("must be overriden")
1620

1621 1622 1623 1624
    @classmethod
    def getRegTableName(cls, i):
        return "db.reg_table_{}".format(i)

1625 1626
    def execute(self, wt: WorkerThread):
        super().execute(wt)
S
Shuduo Sang 已提交
1627 1628


1629
class TaskCreateDb(StateTransitionTask):
1630
    @classmethod
1631
    def getEndState(cls):
S
Shuduo Sang 已提交
1632
        return StateDbOnly()
1633

1634 1635 1636 1637
    @classmethod
    def canBeginFrom(cls, state: AnyState):
        return state.canCreateDb()

1638
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
S
Shuduo Sang 已提交
1639 1640
        self.execWtSql(wt, "create database db")

1641

1642
class TaskDropDb(StateTransitionTask):
1643
    @classmethod
1644 1645
    def getEndState(cls):
        return StateEmpty()
1646

1647 1648 1649 1650
    @classmethod
    def canBeginFrom(cls, state: AnyState):
        return state.canDropDb()

1651
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
1652
        self.execWtSql(wt, "drop database db")
S
Steven Li 已提交
1653
        logger.debug("[OPS] database dropped at {}".format(time.time()))
1654

S
Shuduo Sang 已提交
1655

1656
class TaskCreateSuperTable(StateTransitionTask):
1657
    @classmethod
1658 1659
    def getEndState(cls):
        return StateSuperTableOnly()
1660

1661 1662
    @classmethod
    def canBeginFrom(cls, state: AnyState):
1663
        return state.canCreateFixedSuperTable()
1664

1665
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
S
Shuduo Sang 已提交
1666
        if not wt.dbInUse():  # no DB yet, to the best of our knowledge
1667 1668 1669
            logger.debug("Skipping task, no DB yet")
            return

S
Shuduo Sang 已提交
1670
        tblName = self._dbManager.getFixedSuperTableName()
1671
        # wt.execSql("use db")    # should always be in place
S
Shuduo Sang 已提交
1672 1673 1674 1675 1676
        self.execWtSql(
            wt,
            "create table db.{} (ts timestamp, speed int) tags (b binary(200), f float) ".format(tblName))
        # No need to create the regular tables, INSERT will do that
        # automatically
1677

S
Steven Li 已提交
1678

1679
class TaskReadData(StateTransitionTask):
1680
    @classmethod
1681
    def getEndState(cls):
S
Shuduo Sang 已提交
1682
        return None  # meaning doesn't affect state
1683

1684 1685 1686 1687
    @classmethod
    def canBeginFrom(cls, state: AnyState):
        return state.canReadData()

1688
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
S
Shuduo Sang 已提交
1689 1690 1691
        sTbName = self._dbManager.getFixedSuperTableName()
        self.queryWtSql(wt, "select TBNAME from db.{}".format(
            sTbName))  # TODO: analyze result set later
1692

S
Shuduo Sang 已提交
1693 1694
        if random.randrange(
                5) == 0:  # 1 in 5 chance, simulate a broken connection. TODO: break connection in all situations
1695 1696
            wt.getDbConn().close()
            wt.getDbConn().open()
1697
        else:
S
Shuduo Sang 已提交
1698 1699
            # wt.getDbConn().getQueryResult()
            rTables = self.getQueryResult(wt)
1700
            # print("rTables[0] = {}, type = {}".format(rTables[0], type(rTables[0])))
S
Shuduo Sang 已提交
1701
            for rTbName in rTables:  # regular tables
1702
                self.execWtSql(wt, "select * from db.{}".format(rTbName[0]))
1703

1704 1705
        # tdSql.query(" cars where tbname in ('carzero', 'carone')")

S
Shuduo Sang 已提交
1706

1707
class TaskDropSuperTable(StateTransitionTask):
1708
    @classmethod
1709
    def getEndState(cls):
S
Shuduo Sang 已提交
1710
        return StateDbOnly()
1711

1712 1713
    @classmethod
    def canBeginFrom(cls, state: AnyState):
1714
        return state.canDropFixedSuperTable()
1715

1716
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
S
Shuduo Sang 已提交
1717 1718 1719 1720 1721 1722 1723
        # 1/2 chance, we'll drop the regular tables one by one, in a randomized
        # sequence
        if Dice.throw(2) == 0:
            tblSeq = list(range(
                2 + (self.LARGE_NUMBER_OF_TABLES if gConfig.larger_data else self.SMALL_NUMBER_OF_TABLES)))
            random.shuffle(tblSeq)
            tickOutput = False  # if we have spitted out a "d" character for "drop regular table"
1724
            isSuccess = True
S
Shuduo Sang 已提交
1725 1726 1727
            for i in tblSeq:
                regTableName = self.getRegTableName(
                    i)  # "db.reg_table_{}".format(i)
1728
                try:
S
Shuduo Sang 已提交
1729 1730 1731 1732 1733 1734 1735
                    self.execWtSql(wt, "drop table {}".format(
                        regTableName))  # nRows always 0, like MySQL
                except taos.error.ProgrammingError as err:
                    # correcting for strange error number scheme
                    errno2 = err.errno if (
                        err.errno > 0) else 0x80000000 + err.errno
                    if (errno2 in [0x362]):  # mnode invalid table name
1736
                        isSuccess = False
S
Shuduo Sang 已提交
1737 1738 1739
                        logger.debug(
                            "[DB] Acceptable error when dropping a table")
                    continue  # try to delete next regular table
1740 1741

                if (not tickOutput):
S
Shuduo Sang 已提交
1742 1743
                    tickOutput = True  # Print only one time
                    if isSuccess:
1744 1745
                        print("d", end="", flush=True)
                    else:
S
Shuduo Sang 已提交
1746
                        print("f", end="", flush=True)
1747 1748

        # Drop the super table itself
S
Shuduo Sang 已提交
1749
        tblName = self._dbManager.getFixedSuperTableName()
1750
        self.execWtSql(wt, "drop table db.{}".format(tblName))
1751

S
Shuduo Sang 已提交
1752

1753 1754 1755
class TaskAlterTags(StateTransitionTask):
    @classmethod
    def getEndState(cls):
S
Shuduo Sang 已提交
1756
        return None  # meaning doesn't affect state
1757 1758 1759

    @classmethod
    def canBeginFrom(cls, state: AnyState):
S
Shuduo Sang 已提交
1760
        return state.canDropFixedSuperTable()  # if we can drop it, we can alter tags
1761 1762

    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
S
Shuduo Sang 已提交
1763
        tblName = self._dbManager.getFixedSuperTableName()
1764
        dice = Dice.throw(4)
S
Shuduo Sang 已提交
1765
        if dice == 0:
1766
            sql = "alter table db.{} add tag extraTag int".format(tblName)
S
Shuduo Sang 已提交
1767
        elif dice == 1:
1768
            sql = "alter table db.{} drop tag extraTag".format(tblName)
S
Shuduo Sang 已提交
1769
        elif dice == 2:
1770
            sql = "alter table db.{} drop tag newTag".format(tblName)
S
Shuduo Sang 已提交
1771 1772 1773
        else:  # dice == 3
            sql = "alter table db.{} change tag extraTag newTag".format(
                tblName)
1774 1775

        self.execWtSql(wt, sql)
1776

S
Shuduo Sang 已提交
1777

1778
class TaskAddData(StateTransitionTask):
S
Shuduo Sang 已提交
1779 1780
    # Track which table is being actively worked on
    activeTable: Set[int] = set()
1781

S
Shuduo Sang 已提交
1782 1783
    # We use these two files to record operations to DB, useful for power-off
    # tests
1784 1785 1786 1787 1788
    fAddLogReady = None
    fAddLogDone = None

    @classmethod
    def prepToRecordOps(cls):
S
Shuduo Sang 已提交
1789 1790 1791 1792
        if gConfig.record_ops:
            if (cls.fAddLogReady is None):
                logger.info(
                    "Recording in a file operations to be performed...")
1793
                cls.fAddLogReady = open("add_log_ready.txt", "w")
S
Shuduo Sang 已提交
1794
            if (cls.fAddLogDone is None):
1795 1796
                logger.info("Recording in a file operations completed...")
                cls.fAddLogDone = open("add_log_done.txt", "w")
1797

1798
    @classmethod
1799 1800
    def getEndState(cls):
        return StateHasData()
1801 1802 1803 1804

    @classmethod
    def canBeginFrom(cls, state: AnyState):
        return state.canAddData()
S
Shuduo Sang 已提交
1805

1806
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
1807
        ds = self._dbManager
S
Shuduo Sang 已提交
1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818
        # wt.execSql("use db") # TODO: seems to be an INSERT bug to require
        # this
        tblSeq = list(
            range(
                self.LARGE_NUMBER_OF_TABLES if gConfig.larger_data else self.SMALL_NUMBER_OF_TABLES))
        random.shuffle(tblSeq)
        for i in tblSeq:
            if (i in self.activeTable):  # wow already active
                # logger.info("Concurrent data insertion into table: {}".format(i))
                # print("ct({})".format(i), end="", flush=True) # Concurrent
                # insertion into table
1819 1820
                print("x", end="", flush=True)
            else:
S
Shuduo Sang 已提交
1821 1822 1823 1824 1825 1826 1827 1828
                self.activeTable.add(i)  # marking it active
            # No need to shuffle data sequence, unless later we decide to do
            # non-increment insertion
            regTableName = self.getRegTableName(
                i)  # "db.reg_table_{}".format(i)
            for j in range(
                    self.LARGE_NUMBER_OF_RECORDS if gConfig.larger_data else self.SMALL_NUMBER_OF_RECORDS):  # number of records per table
                nextInt = ds.getNextInt()
1829 1830
                if gConfig.record_ops:
                    self.prepToRecordOps()
S
Shuduo Sang 已提交
1831 1832 1833
                    self.fAddLogReady.write(
                        "Ready to write {} to {}\n".format(
                            nextInt, regTableName))
1834 1835 1836
                    self.fAddLogReady.flush()
                    os.fsync(self.fAddLogReady)
                sql = "insert into {} using {} tags ('{}', {}) values ('{}', {});".format(
S
Shuduo Sang 已提交
1837 1838
                    regTableName,
                    ds.getFixedSuperTableName(),
1839
                    ds.getNextBinary(), ds.getNextFloat(),
1840
                    ds.getNextTick(), nextInt)
S
Shuduo Sang 已提交
1841 1842 1843
                self.execWtSql(wt, sql)
                # Successfully wrote the data into the DB, let's record it
                # somehow
1844
                te.recordDataMark(nextInt)
1845
                if gConfig.record_ops:
S
Shuduo Sang 已提交
1846 1847 1848
                    self.fAddLogDone.write(
                        "Wrote {} to {}\n".format(
                            nextInt, regTableName))
1849 1850
                    self.fAddLogDone.flush()
                    os.fsync(self.fAddLogDone)
S
Shuduo Sang 已提交
1851
            self.activeTable.discard(i)  # not raising an error, unlike remove
1852 1853


S
Steven Li 已提交
1854 1855
# Deterministic random number generator
class Dice():
S
Shuduo Sang 已提交
1856
    seeded = False  # static, uninitialized
S
Steven Li 已提交
1857 1858

    @classmethod
S
Shuduo Sang 已提交
1859
    def seed(cls, s):  # static
S
Steven Li 已提交
1860
        if (cls.seeded):
S
Shuduo Sang 已提交
1861 1862
            raise RuntimeError(
                "Cannot seed the random generator more than once")
S
Steven Li 已提交
1863 1864 1865 1866 1867
        cls.verifyRNG()
        random.seed(s)
        cls.seeded = True  # TODO: protect against multi-threading

    @classmethod
S
Shuduo Sang 已提交
1868
    def verifyRNG(cls):  # Verify that the RNG is determinstic
S
Steven Li 已提交
1869 1870 1871 1872
        random.seed(0)
        x1 = random.randrange(0, 1000)
        x2 = random.randrange(0, 1000)
        x3 = random.randrange(0, 1000)
S
Shuduo Sang 已提交
1873
        if (x1 != 864 or x2 != 394 or x3 != 776):
S
Steven Li 已提交
1874 1875 1876
            raise RuntimeError("System RNG is not deterministic")

    @classmethod
S
Shuduo Sang 已提交
1877
    def throw(cls, stop):  # get 0 to stop-1
1878
        return cls.throwRange(0, stop)
S
Steven Li 已提交
1879 1880

    @classmethod
S
Shuduo Sang 已提交
1881 1882
    def throwRange(cls, start, stop):  # up to stop-1
        if (not cls.seeded):
S
Steven Li 已提交
1883
            raise RuntimeError("Cannot throw dice before seeding it")
1884
        return random.randrange(start, stop)
S
Steven Li 已提交
1885 1886


S
Steven Li 已提交
1887 1888
class LoggingFilter(logging.Filter):
    def filter(self, record: logging.LogRecord):
S
Shuduo Sang 已提交
1889 1890
        if (record.levelno >= logging.INFO):
            return True  # info or above always log
S
Steven Li 已提交
1891

S
Steven Li 已提交
1892 1893 1894 1895
        # Commenting out below to adjust...

        # if msg.startswith("[TRD]"):
        #     return False
S
Steven Li 已提交
1896 1897
        return True

S
Shuduo Sang 已提交
1898 1899

class MyLoggingAdapter(logging.LoggerAdapter):
1900 1901 1902 1903
    def process(self, msg, kwargs):
        return "[{}]{}".format(threading.get_ident() % 10000, msg), kwargs
        # return '[%s] %s' % (self.extra['connid'], msg), kwargs

S
Shuduo Sang 已提交
1904 1905

class SvcManager:
1906
    def __init__(self):
1907
        print("Starting TDengine Service Manager")
1908 1909
        signal.signal(signal.SIGTERM, self.sigIntHandler)
        signal.signal(signal.SIGINT, self.sigIntHandler)
1910
        signal.signal(signal.SIGUSR1, self.sigUsrHandler)  # different handler!
1911

1912
        self.inSigHandler = False
1913 1914
        # self._status = MainExec.STATUS_RUNNING # set inside
        # _startTaosService()
1915
        self.svcMgrThread = None
1916

1917 1918 1919 1920 1921 1922 1923 1924
    def _doMenu(self):
        choice = ""
        while True:
            print("\nInterrupting Service Program, Choose an Action: ")
            print("1: Resume")
            print("2: Terminate")
            print("3: Restart")
            # Remember to update the if range below
S
Shuduo Sang 已提交
1925
            # print("Enter Choice: ", end="", flush=True)
1926 1927 1928
            while choice == "":
                choice = input("Enter Choice: ")
                if choice != "":
S
Shuduo Sang 已提交
1929 1930 1931
                    break  # done with reading repeated input
            if choice in ["1", "2", "3"]:
                break  # we are done with whole method
1932
            print("Invalid choice, please try again.")
S
Shuduo Sang 已提交
1933
            choice = ""  # reset
1934 1935
        return choice

S
Shuduo Sang 已提交
1936
    def sigUsrHandler(self, signalNumber, frame):
1937
        print("Interrupting main thread execution upon SIGUSR1")
1938
        if self.inSigHandler:  # already
1939
            print("Ignoring repeated SIG...")
S
Shuduo Sang 已提交
1940
            return  # do nothing if it's already not running
1941
        self.inSigHandler = True
1942 1943

        choice = self._doMenu()
S
Shuduo Sang 已提交
1944 1945 1946 1947 1948
        if choice == "1":
            # TODO: can the sub-process be blocked due to us not reading from
            # queue?
            self.sigHandlerResume()
        elif choice == "2":
1949
            self.stopTaosService()
S
Shuduo Sang 已提交
1950
        elif choice == "3":
1951 1952
            self.stopTaosService()
            self.startTaosService()
1953 1954
        else:
            raise RuntimeError("Invalid menu choice: {}".format(choice))
1955

1956 1957
        self.inSigHandler = False

1958 1959
    def sigIntHandler(self, signalNumber, frame):
        print("Sig INT Handler starting...")
1960
        if self.inSigHandler:
1961 1962
            print("Ignoring repeated SIG_INT...")
            return
1963
        self.inSigHandler = True
1964

S
Shuduo Sang 已提交
1965 1966
        self.stopTaosService()
        print("INT signal handler returning...")
1967
        self.inSigHandler = False
1968

S
Shuduo Sang 已提交
1969
    def sigHandlerResume(self):
1970
        print("Resuming TDengine service manager thread (main thread)...\n\n")
1971

1972
    def _checkServiceManagerThread(self):
1973 1974 1975 1976
        if self.svcMgrThread:  # valid svc mgr thread
            if self.svcMgrThread.isStopped():  # done?
                self.svcMgrThread.procIpcBatch()  # one last time. TODO: appropriate?
                self.svcMgrThread = None  # no more
1977 1978

    def _procIpcAll(self):
1979 1980 1981 1982 1983 1984 1985 1986
        while self.svcMgrThread:  # for as long as the svc mgr thread is still here
            self.svcMgrThread.procIpcBatch()  # regular processing,
            time.sleep(0.5)  # pause, before next round
            self._checkServiceManagerThread()
        print(
            "Service Manager Thread (with subprocess) has ended, main thread now exiting...")

    def startTaosService(self):
1987
        if self.svcMgrThread:
1988 1989 1990
            raise RuntimeError(
                "Cannot start TAOS service when one may already be running")
        self.svcMgrThread = ServiceManagerThread()  # create the object
1991 1992
        self.svcMgrThread.start()
        print("TAOS service started, printing out output...")
1993 1994 1995
        self.svcMgrThread.procIpcBatch(
            trimToTarget=10,
            forceOutput=True)  # for printing 10 lines
1996
        print("TAOS service started")
1997 1998

    def stopTaosService(self, outputLines=20):
1999 2000 2001 2002 2003
        print("Terminating Service Manager Thread (SMT) execution...")
        if not self.svcMgrThread:
            raise RuntimeError("Unexpected empty svc mgr thread")
        self.svcMgrThread.stop()
        if self.svcMgrThread.isStopped():
2004 2005
            self.svcMgrThread.procIpcBatch(outputLines)  # one last time
            self.svcMgrThread = None
2006 2007 2008 2009 2010 2011 2012
            print("----- End of TDengine Service Output -----\n")
            print("SMT execution terminated")
        else:
            print("WARNING: SMT did not terminate as expected")

    def run(self):
        self.startTaosService()
2013 2014 2015 2016 2017
        self._procIpcAll()  # pump/process all the messages
        if self.svcMgrThread:  # if sig handler hasn't destroyed it by now
            self.stopTaosService()  # should have started already


2018 2019 2020 2021 2022
class ServiceManagerThread:
    MAX_QUEUE_SIZE = 10000

    def __init__(self):
        self._tdeSubProcess = None
2023
        self._thread = None
2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040
        self._status = None

    def getStatus(self):
        return self._status

    def isRunning(self):
        # return self._thread and self._thread.is_alive()
        return self._status == MainExec.STATUS_RUNNING

    def isStopping(self):
        return self._status == MainExec.STATUS_STOPPING

    def isStopped(self):
        return self._status == MainExec.STATUS_STOPPED

    # Start the thread (with sub process), and wait for the sub service
    # to become fully operational
2041 2042
    def start(self):
        if self._thread:
2043
            raise RuntimeError("Unexpected _thread")
2044
        if self._tdeSubProcess:
2045 2046 2047 2048
            raise RuntimeError("TDengine sub process already created/running")

        self._status = MainExec.STATUS_STARTING

2049
        self._tdeSubProcess = TdeSubProcess()
2050 2051 2052 2053
        self._tdeSubProcess.start()

        self._ipcQueue = Queue()
        self._thread = threading.Thread(
2054
            target=self.svcOutputReader,
2055
            args=(self._tdeSubProcess.getStdOut(), self._ipcQueue))
2056
        self._thread.daemon = True  # thread dies with the program
2057 2058 2059
        self._thread.start()

        # wait for service to start
2060
        for i in range(0, 10):
2061 2062 2063
            time.sleep(1.0)
            # self.procIpcBatch() # don't pump message during start up
            print("_zz_", end="", flush=True)
2064
            if self._status == MainExec.STATUS_RUNNING:
2065
                logger.info("[] TDengine service READY to process requests")
2066 2067 2068
                return  # now we've started
        # TODO: handle this better?
        raise RuntimeError("TDengine service did not start successfully")
2069 2070 2071 2072 2073 2074

    def stop(self):
        # can be called from both main thread or signal handler
        print("Terminating TDengine service running as the sub process...")
        if self.isStopped():
            print("Service already stopped")
2075
            return
2076 2077 2078
        if self.isStopping():
            print("Service is already being stopped")
            return
2079 2080 2081 2082
        # Linux will send Control-C generated SIGINT to the TDengine process
        # already, ref:
        # https://unix.stackexchange.com/questions/176235/fork-and-how-signals-are-delivered-to-processes
        if not self._tdeSubProcess:
2083
            raise RuntimeError("sub process object missing")
2084

2085 2086 2087
        self._status = MainExec.STATUS_STOPPING
        self._tdeSubProcess.stop()

2088 2089 2090 2091
        if self._tdeSubProcess.isRunning():  # still running
            print(
                "FAILED to stop sub process, it is still running... pid = {}".format(
                    self.subProcess.pid))
2092
        else:
2093 2094 2095
            self._tdeSubProcess = None  # not running any more
            self.join()  # stop the thread, change the status, etc.

2096 2097 2098
    def join(self):
        # TODO: sanity check
        if not self.isStopping():
2099 2100 2101
            raise RuntimeError(
                "Unexpected status when ending svc mgr thread: {}".format(
                    self._status))
2102

2103
        if self._thread:
2104
            self._thread.join()
2105
            self._thread = None
2106
            self._status = MainExec.STATUS_STOPPED
S
Shuduo Sang 已提交
2107
        else:
2108
            print("Joining empty thread, doing nothing")
2109 2110 2111

    def _trimQueue(self, targetSize):
        if targetSize <= 0:
2112
            return  # do nothing
2113
        q = self._ipcQueue
2114
        if (q.qsize() <= targetSize):  # no need to trim
2115 2116 2117 2118
            return

        logger.debug("Triming IPC queue to target size: {}".format(targetSize))
        itemsToTrim = q.qsize() - targetSize
2119
        for i in range(0, itemsToTrim):
2120 2121 2122
            try:
                q.get_nowait()
            except Empty:
2123 2124
                break  # break out of for loop, no more trimming

2125
    TD_READY_MSG = "TDengine is initialized successfully"
S
Shuduo Sang 已提交
2126

2127 2128
    def procIpcBatch(self, trimToTarget=0, forceOutput=False):
        self._trimQueue(trimToTarget)  # trim if necessary
S
Shuduo Sang 已提交
2129 2130
        # Process all the output generated by the underlying sub process,
        # managed by IO thread
2131
        print("<", end="", flush=True)
S
Shuduo Sang 已提交
2132 2133
        while True:
            try:
2134
                line = self._ipcQueue.get_nowait()  # getting output at fast speed
2135
                self._printProgress("_o")
2136 2137 2138
            except Empty:
                # time.sleep(2.3) # wait only if there's no output
                # no more output
2139
                print(".>", end="", flush=True)
S
Shuduo Sang 已提交
2140
                return  # we are done with THIS BATCH
2141
            else:  # got line, printing out
2142 2143 2144 2145 2146 2147 2148
                if forceOutput:
                    logger.info(line)
                else:
                    logger.debug(line)
        print(">", end="", flush=True)

    _ProgressBars = ["--", "//", "||", "\\\\"]
2149

2150 2151
    def _printProgress(self, msg):  # TODO: assuming 2 chars
        print(msg, end="", flush=True)
2152 2153 2154
        pBar = self._ProgressBars[Dice.throw(4)]
        print(pBar, end="", flush=True)
        print('\b\b\b\b', end="", flush=True)
2155

2156 2157 2158
    def svcOutputReader(self, out: IO, queue):
        # Important Reference: https://stackoverflow.com/questions/375427/non-blocking-read-on-a-subprocess-pipe-in-python
        # print("This is the svcOutput Reader...")
2159
        # for line in out :
2160 2161 2162 2163
        for line in iter(out.readline, b''):
            # print("Finished reading a line: {}".format(line))
            # print("Adding item to queue...")
            line = line.decode("utf-8").rstrip()
2164 2165
            # This might block, and then causing "out" buffer to block
            queue.put(line)
2166 2167
            self._printProgress("_i")

2168 2169 2170
            if self._status == MainExec.STATUS_STARTING:  # we are starting, let's see if we have started
                if line.find(self.TD_READY_MSG) != -1:  # found
                    self._status = MainExec.STATUS_RUNNING
2171 2172

            # Trim the queue if necessary: TODO: try this 1 out of 10 times
2173
            self._trimQueue(self.MAX_QUEUE_SIZE * 9 // 10)  # trim to 90% size
2174

2175 2176 2177
            if self.isStopping():  # TODO: use thread status instead
                # WAITING for stopping sub process to finish its outptu
                print("_w", end="", flush=True)
2178 2179

            # queue.put(line)
2180 2181
        # meaning sub process must have died
        print("\nNo more output from IO thread managing TDengine service")
2182 2183
        out.close()

2184 2185

class TdeSubProcess:
2186 2187 2188 2189 2190
    def __init__(self):
        self.subProcess = None

    def getStdOut(self):
        return self.subProcess.stdout
2191

2192
    def isRunning(self):
2193
        return self.subProcess is not None
2194

S
Shuduo Sang 已提交
2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208
    def getBuildPath(self):
        selfPath = os.path.dirname(os.path.realpath(__file__))
        if ("community" in selfPath):
            projPath = selfPath[:selfPath.find("communit")]
        else:
            projPath = selfPath[:selfPath.find("tests")]

        for root, dirs, files in os.walk(projPath):
            if ("taosd" in files):
                rootRealPath = os.path.dirname(os.path.realpath(root))
                if ("packaging" not in rootRealPath):
                    buildPath = root[:len(root) - len("/build/bin")]
                    break
        return buildPath
2209

2210
    def start(self):
2211
        ON_POSIX = 'posix' in sys.builtin_module_names
S
Shuduo Sang 已提交
2212

2213 2214 2215
        taosdPath = self.getBuildPath() + "/build/bin/taosd"
        cfgPath = self.getBuildPath() + "/test/cfg"

S
Shuduo Sang 已提交
2216
        svcCmd = [taosdPath, '-c', cfgPath]
2217
        # svcCmd = ['vmstat', '1']
S
Shuduo Sang 已提交
2218
        if self.subProcess:  # already there
2219 2220 2221
            raise RuntimeError("Corrupt process state")

        self.subProcess = subprocess.Popen(
S
Shuduo Sang 已提交
2222 2223
            svcCmd,
            stdout=subprocess.PIPE,
2224
            # bufsize=1, # not supported in binary mode
S
Shuduo Sang 已提交
2225
            close_fds=ON_POSIX)  # had text=True, which interferred with reading EOF
2226

2227 2228 2229
    def stop(self):
        if not self.subProcess:
            print("Sub process already stopped")
2230 2231 2232
            return

        retCode = self.subProcess.poll()
S
Shuduo Sang 已提交
2233
        if retCode:  # valid return code, process ended
2234
            self.subProcess = None
S
Shuduo Sang 已提交
2235 2236
        else:  # process still alive, let's interrupt it
            print(
2237
                "Sub process is running, sending SIG_INT and waiting for it to terminate...")
S
Shuduo Sang 已提交
2238 2239 2240 2241
            # sub process should end, then IPC queue should end, causing IO
            # thread to end
            self.subProcess.send_signal(signal.SIGINT)
            try:
2242 2243 2244 2245
                self.subProcess.wait(10)
            except subprocess.TimeoutExpired as err:
                print("Time out waiting for TDengine service process to exit")
            else:
2246
                print("TDengine service process terminated successfully from SIG_INT")
2247 2248
                self.subProcess = None

S
Shuduo Sang 已提交
2249

2250 2251 2252 2253 2254 2255
class ClientManager:
    def __init__(self):
        print("Starting service manager")
        signal.signal(signal.SIGTERM, self.sigIntHandler)
        signal.signal(signal.SIGINT, self.sigIntHandler)

2256
        self._status = MainExec.STATUS_RUNNING
2257 2258 2259
        self.tc = None

    def sigIntHandler(self, signalNumber, frame):
2260
        if self._status != MainExec.STATUS_RUNNING:
2261
            print("Ignoring repeated SIGINT...")
S
Shuduo Sang 已提交
2262
            return  # do nothing if it's already not running
2263
        self._status = MainExec.STATUS_STOPPING  # immediately set our status
2264 2265 2266 2267

        print("Terminating program...")
        self.tc.requestToStop()

S
Shuduo Sang 已提交
2268
    def _printLastNumbers(self):  # to verify data durability
2269 2270
        dbManager = DbManager(resetDb=False)
        dbc = dbManager.getDbConn()
S
Shuduo Sang 已提交
2271
        if dbc.query("show databases") == 0:  # no databae
2272
            return
S
Shuduo Sang 已提交
2273
        if dbc.query("show tables") == 0:  # no tables
S
Steven Li 已提交
2274
            return
2275 2276

        dbc.execute("use db")
S
Shuduo Sang 已提交
2277
        sTbName = dbManager.getFixedSuperTableName()
2278 2279

        # get all regular tables
S
Shuduo Sang 已提交
2280 2281
        # TODO: analyze result set later
        dbc.query("select TBNAME from db.{}".format(sTbName))
2282 2283 2284
        rTables = dbc.getQueryResult()

        bList = TaskExecutor.BoundedList()
S
Shuduo Sang 已提交
2285
        for rTbName in rTables:  # regular tables
2286 2287
            dbc.query("select speed from db.{}".format(rTbName[0]))
            numbers = dbc.getQueryResult()
S
Shuduo Sang 已提交
2288
            for row in numbers:
2289 2290 2291 2292 2293 2294
                # print("<{}>".format(n), end="", flush=True)
                bList.add(row[0])

        print("Top numbers in DB right now: {}".format(bList))
        print("TDengine client execution is about to start in 2 seconds...")
        time.sleep(2.0)
S
Shuduo Sang 已提交
2295
        dbManager = None  # release?
2296 2297 2298 2299 2300

    def prepare(self):
        self._printLastNumbers()

    def run(self):
S
Shuduo Sang 已提交
2301
        if gConfig.auto_start_service:
2302 2303 2304
            svcMgr = SvcManager()
            svcMgr.startTaosService()

2305 2306
        self._printLastNumbers()

S
Shuduo Sang 已提交
2307
        dbManager = DbManager()  # Regular function
2308
        thPool = ThreadPool(gConfig.num_threads, gConfig.max_steps)
2309
        self.tc = ThreadCoordinator(thPool, dbManager)
S
Shuduo Sang 已提交
2310

2311
        self.tc.run()
S
Steven Li 已提交
2312 2313
        # print("exec stats: {}".format(self.tc.getExecStats()))
        # print("TC failed = {}".format(self.tc.isFailed()))
S
Shuduo Sang 已提交
2314
        if gConfig.auto_start_service:
2315
            svcMgr.stopTaosService()
2316 2317
        # Print exec status, etc., AFTER showing messages from the server
        self.conclude()
S
Steven Li 已提交
2318
        # print("TC failed (2) = {}".format(self.tc.isFailed()))
S
Shuduo Sang 已提交
2319 2320
        # Linux return code: ref https://shapeshed.com/unix-exit-codes/
        return 1 if self.tc.isFailed() else 0
2321 2322 2323

    def conclude(self):
        self.tc.printStats()
S
Shuduo Sang 已提交
2324
        self.tc.getDbManager().cleanUp()
2325 2326 2327


class MainExec:
2328 2329 2330
    STATUS_STARTING = 1
    STATUS_RUNNING = 2
    STATUS_STOPPING = 3
2331
    STATUS_STOPPED = 4
2332 2333 2334 2335

    @classmethod
    def runClient(cls):
        clientManager = ClientManager()
S
Steven Li 已提交
2336
        return clientManager.run()
2337 2338 2339

    @classmethod
    def runService(cls):
2340 2341
        svcManager = SvcManager()
        svcManager.run()
2342 2343

    @classmethod
S
Shuduo Sang 已提交
2344
    def runTemp(cls):  # for debugging purposes
2345 2346
        # # Hack to exercise reading from disk, imcreasing coverage. TODO: fix
        # dbc = dbState.getDbConn()
S
Shuduo Sang 已提交
2347
        # sTbName = dbState.getFixedSuperTableName()
2348 2349
        # dbc.execute("create database if not exists db")
        # if not dbState.getState().equals(StateEmpty()):
S
Shuduo Sang 已提交
2350
        #     dbc.execute("use db")
2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361

        # rTables = None
        # try: # the super table may not exist
        #     sql = "select TBNAME from db.{}".format(sTbName)
        #     logger.info("Finding out tables in super table: {}".format(sql))
        #     dbc.query(sql) # TODO: analyze result set later
        #     logger.info("Fetching result")
        #     rTables = dbc.getQueryResult()
        #     logger.info("Result: {}".format(rTables))
        # except taos.error.ProgrammingError as err:
        #     logger.info("Initial Super table OPS error: {}".format(err))
S
Shuduo Sang 已提交
2362

2363 2364 2365 2366 2367 2368 2369 2370
        # # sys.exit()
        # if ( not rTables == None):
        #     # print("rTables[0] = {}, type = {}".format(rTables[0], type(rTables[0])))
        #     try:
        #         for rTbName in rTables : # regular tables
        #             ds = dbState
        #             logger.info("Inserting into table: {}".format(rTbName[0]))
        #             sql = "insert into db.{} values ('{}', {});".format(
S
Shuduo Sang 已提交
2371
        #                 rTbName[0],
2372 2373
        #                 ds.getNextTick(), ds.getNextInt())
        #             dbc.execute(sql)
S
Shuduo Sang 已提交
2374
        #         for rTbName in rTables : # regular tables
2375
        #             dbc.query("select * from db.{}".format(rTbName[0])) # TODO: check success failure
S
Shuduo Sang 已提交
2376
        #         logger.info("Initial READING operation is successful")
2377
        #     except taos.error.ProgrammingError as err:
S
Shuduo Sang 已提交
2378 2379
        #         logger.info("Initial WRITE/READ error: {}".format(err))

2380 2381 2382
        # Sandbox testing code
        # dbc = dbState.getDbConn()
        # while True:
S
Shuduo Sang 已提交
2383
        #     rows = dbc.query("show databases")
2384
        #     print("Rows: {}, time={}".format(rows, time.time()))
S
Shuduo Sang 已提交
2385 2386
        return

S
Steven Li 已提交
2387

2388
def main():
S
Shuduo Sang 已提交
2389 2390
    # Super cool Python argument library:
    # https://docs.python.org/3/library/argparse.html
2391 2392 2393 2394 2395 2396 2397 2398 2399
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description=textwrap.dedent('''\
            TDengine Auto Crash Generator (PLEASE NOTICE the Prerequisites Below)
            ---------------------------------------------------------------------
            1. You build TDengine in the top level ./build directory, as described in offical docs
            2. You run the server there before this script: ./build/bin/taosd -c test/cfg

            '''))
2400

2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421
    # parser.add_argument('-a', '--auto-start-service', action='store_true',                        
    #                     help='Automatically start/stop the TDengine service (default: false)')
    # parser.add_argument('-c', '--connector-type', action='store', default='native', type=str,
    #                     help='Connector type to use: native, rest, or mixed (default: 10)')
    # parser.add_argument('-d', '--debug', action='store_true',                        
    #                     help='Turn on DEBUG mode for more logging (default: false)')
    # parser.add_argument('-e', '--run-tdengine', action='store_true',                        
    #                     help='Run TDengine service in foreground (default: false)')
    # parser.add_argument('-l', '--larger-data', action='store_true',                        
    #                     help='Write larger amount of data during write operations (default: false)')
    # parser.add_argument('-p', '--per-thread-db-connection', action='store_true',                        
    #                     help='Use a single shared db connection (default: false)')
    # parser.add_argument('-r', '--record-ops', action='store_true',                        
    #                     help='Use a pair of always-fsynced fils to record operations performing + performed, for power-off tests (default: false)')                    
    # parser.add_argument('-s', '--max-steps', action='store', default=1000, type=int,
    #                     help='Maximum number of steps to run (default: 100)')
    # parser.add_argument('-t', '--num-threads', action='store', default=5, type=int,
    #                     help='Number of threads to run (default: 10)')
    # parser.add_argument('-x', '--continue-on-exception', action='store_true',                        
    #                     help='Continue execution after encountering unexpected/disallowed errors/exceptions (default: false)')                        

S
Shuduo Sang 已提交
2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472
    parser.add_argument(
        '-a',
        '--auto-start-service',
        action='store_true',
        help='Automatically start/stop the TDengine service (default: false)')
    parser.add_argument(
        '-c',
        '--connector-type',
        action='store',
        default='native',
        type=str,
        help='Connector type to use: native, rest, or mixed (default: 10)')
    parser.add_argument(
        '-d',
        '--debug',
        action='store_true',
        help='Turn on DEBUG mode for more logging (default: false)')
    parser.add_argument(
        '-e',
        '--run-tdengine',
        action='store_true',
        help='Run TDengine service in foreground (default: false)')
    parser.add_argument(
        '-l',
        '--larger-data',
        action='store_true',
        help='Write larger amount of data during write operations (default: false)')
    parser.add_argument(
        '-p',
        '--per-thread-db-connection',
        action='store_true',
        help='Use a single shared db connection (default: false)')
    parser.add_argument(
        '-r',
        '--record-ops',
        action='store_true',
        help='Use a pair of always-fsynced fils to record operations performing + performed, for power-off tests (default: false)')
    parser.add_argument(
        '-s',
        '--max-steps',
        action='store',
        default=1000,
        type=int,
        help='Maximum number of steps to run (default: 100)')
    parser.add_argument(
        '-t',
        '--num-threads',
        action='store',
        default=5,
        type=int,
        help='Number of threads to run (default: 10)')
2473 2474 2475 2476 2477
    parser.add_argument(
        '-x',
        '--continue-on-exception',
        action='store_true',
        help='Continue execution after encountering unexpected/disallowed errors/exceptions (default: false)')
2478

2479
    global gConfig
2480
    gConfig = parser.parse_args()
2481

2482
    # Logging Stuff
2483
    global logger
S
Shuduo Sang 已提交
2484 2485
    _logger = logging.getLogger('CrashGen')  # real logger
    _logger.addFilter(LoggingFilter())
2486 2487 2488
    ch = logging.StreamHandler()
    _logger.addHandler(ch)

S
Shuduo Sang 已提交
2489 2490
    # Logging adapter, to be used as a logger
    logger = MyLoggingAdapter(_logger, [])
2491

S
Shuduo Sang 已提交
2492 2493
    if (gConfig.debug):
        logger.setLevel(logging.DEBUG)  # default seems to be INFO
S
Steven Li 已提交
2494 2495
    else:
        logger.setLevel(logging.INFO)
S
Shuduo Sang 已提交
2496

2497 2498
    Dice.seed(0)  # initial seeding of dice

2499
    # Run server or client
S
Shuduo Sang 已提交
2500
    if gConfig.run_tdengine:  # run server
2501
        MainExec.runService()
S
Shuduo Sang 已提交
2502
    else:
S
Steven Li 已提交
2503
        return MainExec.runClient()
2504

S
Shuduo Sang 已提交
2505

2506
if __name__ == "__main__":
S
Steven Li 已提交
2507 2508 2509
    exitCode = main()
    # print("Exiting with code: {}".format(exitCode))
    sys.exit(exitCode)